{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.687870982192855, "eval_steps": 500, "global_step": 48000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.599731212901781e-05, "grad_norm": 1.5208414793014526, "learning_rate": 5e-09, "loss": 0.5181, "step": 1 }, { "epoch": 0.00011199462425803562, "grad_norm": 1.3712694644927979, "learning_rate": 1e-08, "loss": 0.376, "step": 2 }, { "epoch": 0.0001679919363870534, "grad_norm": 1.4184719324111938, "learning_rate": 1.5e-08, "loss": 0.4098, "step": 3 }, { "epoch": 0.00022398924851607123, "grad_norm": 1.4291776418685913, "learning_rate": 2e-08, "loss": 0.4771, "step": 4 }, { "epoch": 0.00027998656064508903, "grad_norm": 1.239039659500122, "learning_rate": 2.5000000000000002e-08, "loss": 0.5122, "step": 5 }, { "epoch": 0.0003359838727741068, "grad_norm": 1.3924657106399536, "learning_rate": 3e-08, "loss": 0.5332, "step": 6 }, { "epoch": 0.00039198118490312467, "grad_norm": 1.0782504081726074, "learning_rate": 3.5e-08, "loss": 0.3749, "step": 7 }, { "epoch": 0.00044797849703214247, "grad_norm": 1.5758692026138306, "learning_rate": 4e-08, "loss": 0.5218, "step": 8 }, { "epoch": 0.0005039758091611603, "grad_norm": 1.3339658975601196, "learning_rate": 4.5e-08, "loss": 0.5131, "step": 9 }, { "epoch": 0.0005599731212901781, "grad_norm": 1.5984278917312622, "learning_rate": 5.0000000000000004e-08, "loss": 0.5228, "step": 10 }, { "epoch": 0.0006159704334191959, "grad_norm": 1.2421988248825073, "learning_rate": 5.5e-08, "loss": 0.3985, "step": 11 }, { "epoch": 0.0006719677455482136, "grad_norm": 1.3071264028549194, "learning_rate": 6e-08, "loss": 0.4958, "step": 12 }, { "epoch": 0.0007279650576772315, "grad_norm": 1.7665575742721558, "learning_rate": 6.5e-08, "loss": 0.5927, "step": 13 }, { "epoch": 0.0007839623698062493, "grad_norm": 1.494536280632019, "learning_rate": 7e-08, "loss": 0.4472, "step": 14 }, { "epoch": 0.0008399596819352671, "grad_norm": 1.1757087707519531, "learning_rate": 7.500000000000001e-08, "loss": 0.4116, "step": 15 }, { "epoch": 0.0008959569940642849, "grad_norm": 1.6829992532730103, "learning_rate": 8e-08, "loss": 0.5333, "step": 16 }, { "epoch": 0.0009519543061933027, "grad_norm": 1.836796522140503, "learning_rate": 8.5e-08, "loss": 0.4205, "step": 17 }, { "epoch": 0.0010079516183223206, "grad_norm": 1.6840550899505615, "learning_rate": 9e-08, "loss": 0.4523, "step": 18 }, { "epoch": 0.0010639489304513383, "grad_norm": 1.5171109437942505, "learning_rate": 9.5e-08, "loss": 0.5163, "step": 19 }, { "epoch": 0.0011199462425803561, "grad_norm": 1.4950929880142212, "learning_rate": 1.0000000000000001e-07, "loss": 0.4597, "step": 20 }, { "epoch": 0.001175943554709374, "grad_norm": 1.510266900062561, "learning_rate": 1.05e-07, "loss": 0.4991, "step": 21 }, { "epoch": 0.0012319408668383918, "grad_norm": 1.2690305709838867, "learning_rate": 1.1e-07, "loss": 0.4515, "step": 22 }, { "epoch": 0.0012879381789674097, "grad_norm": 1.4038795232772827, "learning_rate": 1.15e-07, "loss": 0.4929, "step": 23 }, { "epoch": 0.0013439354910964273, "grad_norm": 1.4907742738723755, "learning_rate": 1.2e-07, "loss": 0.766, "step": 24 }, { "epoch": 0.0013999328032254451, "grad_norm": 1.5836879014968872, "learning_rate": 1.2500000000000002e-07, "loss": 0.5249, "step": 25 }, { "epoch": 0.001455930115354463, "grad_norm": 2.3851399421691895, "learning_rate": 1.3e-07, "loss": 0.5666, "step": 26 }, { "epoch": 0.0015119274274834808, "grad_norm": 1.2829978466033936, "learning_rate": 1.35e-07, "loss": 0.4299, "step": 27 }, { "epoch": 0.0015679247396124987, "grad_norm": 1.6352183818817139, "learning_rate": 1.4e-07, "loss": 0.5148, "step": 28 }, { "epoch": 0.0016239220517415163, "grad_norm": 1.3571499586105347, "learning_rate": 1.45e-07, "loss": 0.4276, "step": 29 }, { "epoch": 0.0016799193638705342, "grad_norm": 1.4235525131225586, "learning_rate": 1.5000000000000002e-07, "loss": 0.6501, "step": 30 }, { "epoch": 0.001735916675999552, "grad_norm": 1.5542964935302734, "learning_rate": 1.55e-07, "loss": 0.504, "step": 31 }, { "epoch": 0.0017919139881285699, "grad_norm": 1.2780183553695679, "learning_rate": 1.6e-07, "loss": 0.4262, "step": 32 }, { "epoch": 0.0018479113002575877, "grad_norm": 1.2457411289215088, "learning_rate": 1.65e-07, "loss": 0.4542, "step": 33 }, { "epoch": 0.0019039086123866053, "grad_norm": 1.409238576889038, "learning_rate": 1.7e-07, "loss": 0.5514, "step": 34 }, { "epoch": 0.0019599059245156234, "grad_norm": 1.564186453819275, "learning_rate": 1.7500000000000002e-07, "loss": 0.4696, "step": 35 }, { "epoch": 0.0020159032366446413, "grad_norm": 1.1453927755355835, "learning_rate": 1.8e-07, "loss": 0.4193, "step": 36 }, { "epoch": 0.0020719005487736587, "grad_norm": 1.3826923370361328, "learning_rate": 1.8500000000000003e-07, "loss": 0.457, "step": 37 }, { "epoch": 0.0021278978609026765, "grad_norm": 1.4963278770446777, "learning_rate": 1.9e-07, "loss": 0.6355, "step": 38 }, { "epoch": 0.0021838951730316944, "grad_norm": 2.089097261428833, "learning_rate": 1.95e-07, "loss": 0.4094, "step": 39 }, { "epoch": 0.0022398924851607122, "grad_norm": 1.8463817834854126, "learning_rate": 2.0000000000000002e-07, "loss": 0.5716, "step": 40 }, { "epoch": 0.00229588979728973, "grad_norm": 1.464522123336792, "learning_rate": 2.0500000000000002e-07, "loss": 0.519, "step": 41 }, { "epoch": 0.002351887109418748, "grad_norm": 1.4796329736709595, "learning_rate": 2.1e-07, "loss": 0.5112, "step": 42 }, { "epoch": 0.0024078844215477658, "grad_norm": 1.595494270324707, "learning_rate": 2.15e-07, "loss": 0.6506, "step": 43 }, { "epoch": 0.0024638817336767836, "grad_norm": 1.8938987255096436, "learning_rate": 2.2e-07, "loss": 0.4963, "step": 44 }, { "epoch": 0.0025198790458058015, "grad_norm": 1.4932397603988647, "learning_rate": 2.25e-07, "loss": 0.4818, "step": 45 }, { "epoch": 0.0025758763579348193, "grad_norm": 1.2402743101119995, "learning_rate": 2.3e-07, "loss": 0.4129, "step": 46 }, { "epoch": 0.0026318736700638367, "grad_norm": 1.3202379941940308, "learning_rate": 2.3500000000000003e-07, "loss": 0.4353, "step": 47 }, { "epoch": 0.0026878709821928546, "grad_norm": 1.5665568113327026, "learning_rate": 2.4e-07, "loss": 0.6673, "step": 48 }, { "epoch": 0.0027438682943218724, "grad_norm": 1.557573676109314, "learning_rate": 2.45e-07, "loss": 0.4768, "step": 49 }, { "epoch": 0.0027998656064508903, "grad_norm": 1.5255318880081177, "learning_rate": 2.5000000000000004e-07, "loss": 0.528, "step": 50 }, { "epoch": 0.002855862918579908, "grad_norm": 1.3243638277053833, "learning_rate": 2.5500000000000005e-07, "loss": 0.5488, "step": 51 }, { "epoch": 0.002911860230708926, "grad_norm": 1.2211617231369019, "learning_rate": 2.6e-07, "loss": 0.44, "step": 52 }, { "epoch": 0.002967857542837944, "grad_norm": 2.0557689666748047, "learning_rate": 2.65e-07, "loss": 0.5321, "step": 53 }, { "epoch": 0.0030238548549669617, "grad_norm": 1.4214329719543457, "learning_rate": 2.7e-07, "loss": 0.3911, "step": 54 }, { "epoch": 0.0030798521670959795, "grad_norm": 1.35947847366333, "learning_rate": 2.75e-07, "loss": 0.4427, "step": 55 }, { "epoch": 0.0031358494792249974, "grad_norm": 1.4284133911132812, "learning_rate": 2.8e-07, "loss": 0.4277, "step": 56 }, { "epoch": 0.0031918467913540152, "grad_norm": 1.4069606065750122, "learning_rate": 2.85e-07, "loss": 0.4372, "step": 57 }, { "epoch": 0.0032478441034830326, "grad_norm": 2.8065545558929443, "learning_rate": 2.9e-07, "loss": 0.4607, "step": 58 }, { "epoch": 0.0033038414156120505, "grad_norm": 1.6844050884246826, "learning_rate": 2.95e-07, "loss": 0.6309, "step": 59 }, { "epoch": 0.0033598387277410683, "grad_norm": 1.4104372262954712, "learning_rate": 3.0000000000000004e-07, "loss": 0.4987, "step": 60 }, { "epoch": 0.003415836039870086, "grad_norm": 1.5404714345932007, "learning_rate": 3.0500000000000004e-07, "loss": 0.5942, "step": 61 }, { "epoch": 0.003471833351999104, "grad_norm": 1.5271331071853638, "learning_rate": 3.1e-07, "loss": 0.4563, "step": 62 }, { "epoch": 0.003527830664128122, "grad_norm": 1.4416390657424927, "learning_rate": 3.15e-07, "loss": 0.5131, "step": 63 }, { "epoch": 0.0035838279762571397, "grad_norm": 1.416885256767273, "learning_rate": 3.2e-07, "loss": 0.4983, "step": 64 }, { "epoch": 0.0036398252883861576, "grad_norm": 1.6866464614868164, "learning_rate": 3.25e-07, "loss": 0.4553, "step": 65 }, { "epoch": 0.0036958226005151754, "grad_norm": 1.4955333471298218, "learning_rate": 3.3e-07, "loss": 0.4709, "step": 66 }, { "epoch": 0.0037518199126441933, "grad_norm": 1.3900035619735718, "learning_rate": 3.35e-07, "loss": 0.5626, "step": 67 }, { "epoch": 0.0038078172247732107, "grad_norm": 1.3770145177841187, "learning_rate": 3.4e-07, "loss": 0.5259, "step": 68 }, { "epoch": 0.0038638145369022285, "grad_norm": 1.2875261306762695, "learning_rate": 3.4500000000000003e-07, "loss": 0.4388, "step": 69 }, { "epoch": 0.003919811849031247, "grad_norm": 1.3735551834106445, "learning_rate": 3.5000000000000004e-07, "loss": 0.5785, "step": 70 }, { "epoch": 0.003975809161160264, "grad_norm": 1.9509772062301636, "learning_rate": 3.5500000000000004e-07, "loss": 0.5238, "step": 71 }, { "epoch": 0.0040318064732892825, "grad_norm": 1.8565374612808228, "learning_rate": 3.6e-07, "loss": 0.7029, "step": 72 }, { "epoch": 0.0040878037854183, "grad_norm": 1.4239002466201782, "learning_rate": 3.65e-07, "loss": 0.4697, "step": 73 }, { "epoch": 0.004143801097547317, "grad_norm": 1.467603087425232, "learning_rate": 3.7000000000000006e-07, "loss": 0.4178, "step": 74 }, { "epoch": 0.004199798409676336, "grad_norm": 1.1795387268066406, "learning_rate": 3.75e-07, "loss": 0.4297, "step": 75 }, { "epoch": 0.004255795721805353, "grad_norm": 1.1643917560577393, "learning_rate": 3.8e-07, "loss": 0.4823, "step": 76 }, { "epoch": 0.004311793033934371, "grad_norm": 1.4003437757492065, "learning_rate": 3.85e-07, "loss": 0.4341, "step": 77 }, { "epoch": 0.004367790346063389, "grad_norm": 1.246448278427124, "learning_rate": 3.9e-07, "loss": 0.4935, "step": 78 }, { "epoch": 0.004423787658192407, "grad_norm": 1.449789047241211, "learning_rate": 3.950000000000001e-07, "loss": 0.4214, "step": 79 }, { "epoch": 0.0044797849703214244, "grad_norm": 1.5086668729782104, "learning_rate": 4.0000000000000003e-07, "loss": 0.5524, "step": 80 }, { "epoch": 0.004535782282450443, "grad_norm": 1.5325603485107422, "learning_rate": 4.05e-07, "loss": 0.4823, "step": 81 }, { "epoch": 0.00459177959457946, "grad_norm": 1.292855143547058, "learning_rate": 4.1000000000000004e-07, "loss": 0.5164, "step": 82 }, { "epoch": 0.0046477769067084776, "grad_norm": 1.3947184085845947, "learning_rate": 4.1500000000000005e-07, "loss": 0.5706, "step": 83 }, { "epoch": 0.004703774218837496, "grad_norm": 2.1091814041137695, "learning_rate": 4.2e-07, "loss": 0.5663, "step": 84 }, { "epoch": 0.004759771530966513, "grad_norm": 1.1866250038146973, "learning_rate": 4.2500000000000006e-07, "loss": 0.4289, "step": 85 }, { "epoch": 0.0048157688430955315, "grad_norm": 1.5813519954681396, "learning_rate": 4.3e-07, "loss": 0.8407, "step": 86 }, { "epoch": 0.004871766155224549, "grad_norm": 1.4272483587265015, "learning_rate": 4.3499999999999996e-07, "loss": 0.5547, "step": 87 }, { "epoch": 0.004927763467353567, "grad_norm": 1.3648320436477661, "learning_rate": 4.4e-07, "loss": 0.6105, "step": 88 }, { "epoch": 0.004983760779482585, "grad_norm": 1.4899494647979736, "learning_rate": 4.4500000000000003e-07, "loss": 0.5103, "step": 89 }, { "epoch": 0.005039758091611603, "grad_norm": 1.5137723684310913, "learning_rate": 4.5e-07, "loss": 0.6131, "step": 90 }, { "epoch": 0.00509575540374062, "grad_norm": 2.122572660446167, "learning_rate": 4.5500000000000004e-07, "loss": 0.4913, "step": 91 }, { "epoch": 0.005151752715869639, "grad_norm": 1.4125502109527588, "learning_rate": 4.6e-07, "loss": 0.4826, "step": 92 }, { "epoch": 0.005207750027998656, "grad_norm": 1.275382399559021, "learning_rate": 4.65e-07, "loss": 0.4789, "step": 93 }, { "epoch": 0.0052637473401276735, "grad_norm": 1.5469411611557007, "learning_rate": 4.7000000000000005e-07, "loss": 0.5324, "step": 94 }, { "epoch": 0.005319744652256692, "grad_norm": 1.503063440322876, "learning_rate": 4.75e-07, "loss": 0.5175, "step": 95 }, { "epoch": 0.005375741964385709, "grad_norm": 1.4756922721862793, "learning_rate": 4.8e-07, "loss": 0.6135, "step": 96 }, { "epoch": 0.0054317392765147274, "grad_norm": 1.2535488605499268, "learning_rate": 4.85e-07, "loss": 0.4542, "step": 97 }, { "epoch": 0.005487736588643745, "grad_norm": 1.3292878866195679, "learning_rate": 4.9e-07, "loss": 0.6138, "step": 98 }, { "epoch": 0.005543733900772763, "grad_norm": 1.4198826551437378, "learning_rate": 4.95e-07, "loss": 0.5016, "step": 99 }, { "epoch": 0.0055997312129017806, "grad_norm": 1.4598007202148438, "learning_rate": 5.000000000000001e-07, "loss": 0.4207, "step": 100 }, { "epoch": 0.005655728525030799, "grad_norm": 1.2995636463165283, "learning_rate": 5.05e-07, "loss": 0.6009, "step": 101 }, { "epoch": 0.005711725837159816, "grad_norm": 1.3568147420883179, "learning_rate": 5.100000000000001e-07, "loss": 0.405, "step": 102 }, { "epoch": 0.0057677231492888345, "grad_norm": 1.1702243089675903, "learning_rate": 5.15e-07, "loss": 0.3981, "step": 103 }, { "epoch": 0.005823720461417852, "grad_norm": 1.4444057941436768, "learning_rate": 5.2e-07, "loss": 0.4031, "step": 104 }, { "epoch": 0.005879717773546869, "grad_norm": 1.5215481519699097, "learning_rate": 5.250000000000001e-07, "loss": 0.6405, "step": 105 }, { "epoch": 0.005935715085675888, "grad_norm": 1.5095003843307495, "learning_rate": 5.3e-07, "loss": 0.6124, "step": 106 }, { "epoch": 0.005991712397804905, "grad_norm": 1.3150241374969482, "learning_rate": 5.35e-07, "loss": 0.4757, "step": 107 }, { "epoch": 0.006047709709933923, "grad_norm": 1.1489843130111694, "learning_rate": 5.4e-07, "loss": 0.4294, "step": 108 }, { "epoch": 0.006103707022062941, "grad_norm": 1.4761924743652344, "learning_rate": 5.450000000000001e-07, "loss": 0.5482, "step": 109 }, { "epoch": 0.006159704334191959, "grad_norm": 1.4262828826904297, "learning_rate": 5.5e-07, "loss": 0.4507, "step": 110 }, { "epoch": 0.0062157016463209765, "grad_norm": 1.3081737756729126, "learning_rate": 5.550000000000001e-07, "loss": 0.4249, "step": 111 }, { "epoch": 0.006271698958449995, "grad_norm": 2.10310435295105, "learning_rate": 5.6e-07, "loss": 0.5236, "step": 112 }, { "epoch": 0.006327696270579012, "grad_norm": 1.2382475137710571, "learning_rate": 5.65e-07, "loss": 0.3968, "step": 113 }, { "epoch": 0.0063836935827080304, "grad_norm": 2.0119569301605225, "learning_rate": 5.7e-07, "loss": 0.6051, "step": 114 }, { "epoch": 0.006439690894837048, "grad_norm": 1.5267064571380615, "learning_rate": 5.75e-07, "loss": 0.5475, "step": 115 }, { "epoch": 0.006495688206966065, "grad_norm": 2.7379636764526367, "learning_rate": 5.8e-07, "loss": 0.3968, "step": 116 }, { "epoch": 0.0065516855190950836, "grad_norm": 1.4146085977554321, "learning_rate": 5.85e-07, "loss": 0.6878, "step": 117 }, { "epoch": 0.006607682831224101, "grad_norm": 1.2528458833694458, "learning_rate": 5.9e-07, "loss": 0.4599, "step": 118 }, { "epoch": 0.006663680143353119, "grad_norm": 1.3054895401000977, "learning_rate": 5.95e-07, "loss": 0.4442, "step": 119 }, { "epoch": 0.006719677455482137, "grad_norm": 1.1482446193695068, "learning_rate": 6.000000000000001e-07, "loss": 0.4932, "step": 120 }, { "epoch": 0.006775674767611155, "grad_norm": 1.2546861171722412, "learning_rate": 6.05e-07, "loss": 0.4436, "step": 121 }, { "epoch": 0.006831672079740172, "grad_norm": 1.4483879804611206, "learning_rate": 6.100000000000001e-07, "loss": 0.5461, "step": 122 }, { "epoch": 0.006887669391869191, "grad_norm": 1.6083807945251465, "learning_rate": 6.15e-07, "loss": 0.5924, "step": 123 }, { "epoch": 0.006943666703998208, "grad_norm": 1.2077231407165527, "learning_rate": 6.2e-07, "loss": 0.4581, "step": 124 }, { "epoch": 0.0069996640161272255, "grad_norm": 1.2149949073791504, "learning_rate": 6.25e-07, "loss": 0.4393, "step": 125 }, { "epoch": 0.007055661328256244, "grad_norm": 1.4178581237792969, "learning_rate": 6.3e-07, "loss": 0.422, "step": 126 }, { "epoch": 0.007111658640385261, "grad_norm": 1.2476677894592285, "learning_rate": 6.35e-07, "loss": 0.496, "step": 127 }, { "epoch": 0.0071676559525142795, "grad_norm": 1.5778127908706665, "learning_rate": 6.4e-07, "loss": 0.6051, "step": 128 }, { "epoch": 0.007223653264643297, "grad_norm": 1.2792600393295288, "learning_rate": 6.450000000000001e-07, "loss": 0.467, "step": 129 }, { "epoch": 0.007279650576772315, "grad_norm": 2.7617485523223877, "learning_rate": 6.5e-07, "loss": 0.486, "step": 130 }, { "epoch": 0.007335647888901333, "grad_norm": 1.6663296222686768, "learning_rate": 6.550000000000001e-07, "loss": 0.5529, "step": 131 }, { "epoch": 0.007391645201030351, "grad_norm": 1.3751574754714966, "learning_rate": 6.6e-07, "loss": 0.5267, "step": 132 }, { "epoch": 0.007447642513159368, "grad_norm": 1.3827310800552368, "learning_rate": 6.65e-07, "loss": 0.4651, "step": 133 }, { "epoch": 0.0075036398252883866, "grad_norm": 1.3962993621826172, "learning_rate": 6.7e-07, "loss": 0.5695, "step": 134 }, { "epoch": 0.007559637137417404, "grad_norm": 1.3722511529922485, "learning_rate": 6.75e-07, "loss": 0.482, "step": 135 }, { "epoch": 0.007615634449546421, "grad_norm": 1.6061739921569824, "learning_rate": 6.8e-07, "loss": 0.4326, "step": 136 }, { "epoch": 0.00767163176167544, "grad_norm": 1.2789545059204102, "learning_rate": 6.85e-07, "loss": 0.3909, "step": 137 }, { "epoch": 0.007727629073804457, "grad_norm": 1.573917269706726, "learning_rate": 6.900000000000001e-07, "loss": 0.5517, "step": 138 }, { "epoch": 0.007783626385933475, "grad_norm": Infinity, "learning_rate": 6.900000000000001e-07, "loss": 0.5032, "step": 139 }, { "epoch": 0.007839623698062494, "grad_norm": 1.4517173767089844, "learning_rate": 6.95e-07, "loss": 0.5369, "step": 140 }, { "epoch": 0.007895621010191511, "grad_norm": 1.7139356136322021, "learning_rate": 7.000000000000001e-07, "loss": 0.5134, "step": 141 }, { "epoch": 0.007951618322320528, "grad_norm": 2.1147677898406982, "learning_rate": 7.05e-07, "loss": 0.6408, "step": 142 }, { "epoch": 0.008007615634449546, "grad_norm": 1.1497327089309692, "learning_rate": 7.100000000000001e-07, "loss": 0.4417, "step": 143 }, { "epoch": 0.008063612946578565, "grad_norm": 1.518349051475525, "learning_rate": 7.15e-07, "loss": 0.5329, "step": 144 }, { "epoch": 0.008119610258707582, "grad_norm": 1.3756242990493774, "learning_rate": 7.2e-07, "loss": 0.4445, "step": 145 }, { "epoch": 0.0081756075708366, "grad_norm": 1.3819228410720825, "learning_rate": 7.25e-07, "loss": 0.4424, "step": 146 }, { "epoch": 0.008231604882965617, "grad_norm": 1.4747849702835083, "learning_rate": 7.3e-07, "loss": 0.5919, "step": 147 }, { "epoch": 0.008287602195094635, "grad_norm": 1.4428592920303345, "learning_rate": 7.350000000000001e-07, "loss": 0.6117, "step": 148 }, { "epoch": 0.008343599507223654, "grad_norm": 1.2393968105316162, "learning_rate": 7.400000000000001e-07, "loss": 0.4504, "step": 149 }, { "epoch": 0.008399596819352671, "grad_norm": 1.219472885131836, "learning_rate": 7.450000000000001e-07, "loss": 0.4756, "step": 150 }, { "epoch": 0.008455594131481689, "grad_norm": 1.123125433921814, "learning_rate": 7.5e-07, "loss": 0.4298, "step": 151 }, { "epoch": 0.008511591443610706, "grad_norm": 1.373967170715332, "learning_rate": 7.550000000000001e-07, "loss": 0.4997, "step": 152 }, { "epoch": 0.008567588755739725, "grad_norm": 1.5358420610427856, "learning_rate": 7.6e-07, "loss": 0.6462, "step": 153 }, { "epoch": 0.008623586067868743, "grad_norm": 2.188631296157837, "learning_rate": 7.65e-07, "loss": 0.4562, "step": 154 }, { "epoch": 0.00867958337999776, "grad_norm": 1.5309580564498901, "learning_rate": 7.7e-07, "loss": 0.4204, "step": 155 }, { "epoch": 0.008735580692126777, "grad_norm": 1.2409296035766602, "learning_rate": 7.75e-07, "loss": 0.4228, "step": 156 }, { "epoch": 0.008791578004255795, "grad_norm": Infinity, "learning_rate": 7.75e-07, "loss": 0.5002, "step": 157 }, { "epoch": 0.008847575316384814, "grad_norm": 1.2380938529968262, "learning_rate": 7.8e-07, "loss": 0.4157, "step": 158 }, { "epoch": 0.008903572628513831, "grad_norm": 1.5206270217895508, "learning_rate": 7.85e-07, "loss": 0.5231, "step": 159 }, { "epoch": 0.008959569940642849, "grad_norm": 1.4741681814193726, "learning_rate": 7.900000000000002e-07, "loss": 0.4099, "step": 160 }, { "epoch": 0.009015567252771866, "grad_norm": 1.3287562131881714, "learning_rate": 7.950000000000001e-07, "loss": 0.3698, "step": 161 }, { "epoch": 0.009071564564900885, "grad_norm": 1.2945431470870972, "learning_rate": 8.000000000000001e-07, "loss": 0.4772, "step": 162 }, { "epoch": 0.009127561877029903, "grad_norm": 1.1704779863357544, "learning_rate": 8.05e-07, "loss": 0.5405, "step": 163 }, { "epoch": 0.00918355918915892, "grad_norm": 1.4360202550888062, "learning_rate": 8.1e-07, "loss": 0.6144, "step": 164 }, { "epoch": 0.009239556501287938, "grad_norm": 1.7820653915405273, "learning_rate": 8.149999999999999e-07, "loss": 0.6294, "step": 165 }, { "epoch": 0.009295553813416955, "grad_norm": 1.488885760307312, "learning_rate": 8.200000000000001e-07, "loss": 0.5041, "step": 166 }, { "epoch": 0.009351551125545974, "grad_norm": 1.0918800830841064, "learning_rate": 8.25e-07, "loss": 0.3615, "step": 167 }, { "epoch": 0.009407548437674992, "grad_norm": 1.3130441904067993, "learning_rate": 8.300000000000001e-07, "loss": 0.4489, "step": 168 }, { "epoch": 0.009463545749804009, "grad_norm": 1.2139885425567627, "learning_rate": 8.35e-07, "loss": 0.4585, "step": 169 }, { "epoch": 0.009519543061933027, "grad_norm": 1.5473610162734985, "learning_rate": 8.4e-07, "loss": 0.5744, "step": 170 }, { "epoch": 0.009575540374062046, "grad_norm": 1.2141534090042114, "learning_rate": 8.45e-07, "loss": 0.4876, "step": 171 }, { "epoch": 0.009631537686191063, "grad_norm": 1.2391549348831177, "learning_rate": 8.500000000000001e-07, "loss": 0.3436, "step": 172 }, { "epoch": 0.00968753499832008, "grad_norm": 1.445334792137146, "learning_rate": 8.550000000000001e-07, "loss": 0.4885, "step": 173 }, { "epoch": 0.009743532310449098, "grad_norm": 1.298776626586914, "learning_rate": 8.6e-07, "loss": 0.4514, "step": 174 }, { "epoch": 0.009799529622578117, "grad_norm": 1.5566740036010742, "learning_rate": 8.65e-07, "loss": 0.4906, "step": 175 }, { "epoch": 0.009855526934707134, "grad_norm": 1.181283712387085, "learning_rate": 8.699999999999999e-07, "loss": 0.5238, "step": 176 }, { "epoch": 0.009911524246836152, "grad_norm": 1.2867265939712524, "learning_rate": 8.750000000000001e-07, "loss": 0.4365, "step": 177 }, { "epoch": 0.00996752155896517, "grad_norm": 1.2805672883987427, "learning_rate": 8.8e-07, "loss": 0.4703, "step": 178 }, { "epoch": 0.010023518871094187, "grad_norm": 1.4181572198867798, "learning_rate": 8.850000000000001e-07, "loss": 0.545, "step": 179 }, { "epoch": 0.010079516183223206, "grad_norm": 1.273695945739746, "learning_rate": 8.900000000000001e-07, "loss": 0.4459, "step": 180 }, { "epoch": 0.010135513495352223, "grad_norm": 1.3415385484695435, "learning_rate": 8.95e-07, "loss": 0.5312, "step": 181 }, { "epoch": 0.01019151080748124, "grad_norm": 1.2442187070846558, "learning_rate": 9e-07, "loss": 0.4855, "step": 182 }, { "epoch": 0.010247508119610258, "grad_norm": 1.607743740081787, "learning_rate": 9.050000000000001e-07, "loss": 0.636, "step": 183 }, { "epoch": 0.010303505431739277, "grad_norm": 1.3131659030914307, "learning_rate": 9.100000000000001e-07, "loss": 0.4415, "step": 184 }, { "epoch": 0.010359502743868295, "grad_norm": 1.1934609413146973, "learning_rate": 9.15e-07, "loss": 0.4209, "step": 185 }, { "epoch": 0.010415500055997312, "grad_norm": 1.2915111780166626, "learning_rate": 9.2e-07, "loss": 0.4771, "step": 186 }, { "epoch": 0.01047149736812633, "grad_norm": 1.5296729803085327, "learning_rate": 9.25e-07, "loss": 0.6426, "step": 187 }, { "epoch": 0.010527494680255347, "grad_norm": 2.22491717338562, "learning_rate": 9.3e-07, "loss": 0.5904, "step": 188 }, { "epoch": 0.010583491992384366, "grad_norm": 1.2684471607208252, "learning_rate": 9.350000000000002e-07, "loss": 0.5164, "step": 189 }, { "epoch": 0.010639489304513383, "grad_norm": 1.7580739259719849, "learning_rate": 9.400000000000001e-07, "loss": 0.471, "step": 190 }, { "epoch": 0.010695486616642401, "grad_norm": 1.3328614234924316, "learning_rate": 9.450000000000001e-07, "loss": 0.5199, "step": 191 }, { "epoch": 0.010751483928771418, "grad_norm": 10.069008827209473, "learning_rate": 9.5e-07, "loss": 0.3754, "step": 192 }, { "epoch": 0.010807481240900437, "grad_norm": 1.2109394073486328, "learning_rate": 9.55e-07, "loss": 0.5417, "step": 193 }, { "epoch": 0.010863478553029455, "grad_norm": 1.279045581817627, "learning_rate": 9.6e-07, "loss": 0.6722, "step": 194 }, { "epoch": 0.010919475865158472, "grad_norm": 1.482125997543335, "learning_rate": 9.65e-07, "loss": 0.4102, "step": 195 }, { "epoch": 0.01097547317728749, "grad_norm": 1.2239766120910645, "learning_rate": 9.7e-07, "loss": 0.4908, "step": 196 }, { "epoch": 0.011031470489416509, "grad_norm": 1.3760238885879517, "learning_rate": 9.75e-07, "loss": 0.4594, "step": 197 }, { "epoch": 0.011087467801545526, "grad_norm": 1.3297518491744995, "learning_rate": 9.8e-07, "loss": 0.5052, "step": 198 }, { "epoch": 0.011143465113674544, "grad_norm": 1.2966132164001465, "learning_rate": 9.849999999999999e-07, "loss": 0.4952, "step": 199 }, { "epoch": 0.011199462425803561, "grad_norm": 1.4916179180145264, "learning_rate": 9.9e-07, "loss": 0.4624, "step": 200 }, { "epoch": 0.011255459737932579, "grad_norm": 1.8758313655853271, "learning_rate": 9.95e-07, "loss": 0.6089, "step": 201 }, { "epoch": 0.011311457050061598, "grad_norm": 1.1678751707077026, "learning_rate": 1.0000000000000002e-06, "loss": 0.4251, "step": 202 }, { "epoch": 0.011367454362190615, "grad_norm": 1.1295413970947266, "learning_rate": 1.0050000000000001e-06, "loss": 0.394, "step": 203 }, { "epoch": 0.011423451674319633, "grad_norm": 1.2641093730926514, "learning_rate": 1.01e-06, "loss": 0.4225, "step": 204 }, { "epoch": 0.01147944898644865, "grad_norm": 1.326363205909729, "learning_rate": 1.015e-06, "loss": 0.5714, "step": 205 }, { "epoch": 0.011535446298577669, "grad_norm": 0.9996131062507629, "learning_rate": 1.0200000000000002e-06, "loss": 0.3477, "step": 206 }, { "epoch": 0.011591443610706686, "grad_norm": 1.3686496019363403, "learning_rate": 1.0250000000000001e-06, "loss": 0.4609, "step": 207 }, { "epoch": 0.011647440922835704, "grad_norm": 1.1996912956237793, "learning_rate": 1.03e-06, "loss": 0.5012, "step": 208 }, { "epoch": 0.011703438234964721, "grad_norm": 1.2017878293991089, "learning_rate": 1.035e-06, "loss": 0.4679, "step": 209 }, { "epoch": 0.011759435547093739, "grad_norm": 1.3492447137832642, "learning_rate": 1.04e-06, "loss": 0.6228, "step": 210 }, { "epoch": 0.011815432859222758, "grad_norm": 1.0122253894805908, "learning_rate": 1.045e-06, "loss": 0.4258, "step": 211 }, { "epoch": 0.011871430171351775, "grad_norm": 1.3060179948806763, "learning_rate": 1.0500000000000001e-06, "loss": 0.4753, "step": 212 }, { "epoch": 0.011927427483480793, "grad_norm": 1.1057965755462646, "learning_rate": 1.055e-06, "loss": 0.372, "step": 213 }, { "epoch": 0.01198342479560981, "grad_norm": 1.383988857269287, "learning_rate": 1.06e-06, "loss": 0.6251, "step": 214 }, { "epoch": 0.01203942210773883, "grad_norm": 1.1000547409057617, "learning_rate": 1.065e-06, "loss": 0.3334, "step": 215 }, { "epoch": 0.012095419419867847, "grad_norm": 1.307716727256775, "learning_rate": 1.07e-06, "loss": 0.4021, "step": 216 }, { "epoch": 0.012151416731996864, "grad_norm": 1.3594478368759155, "learning_rate": 1.0749999999999999e-06, "loss": 0.4034, "step": 217 }, { "epoch": 0.012207414044125882, "grad_norm": 1.0731086730957031, "learning_rate": 1.08e-06, "loss": 0.4315, "step": 218 }, { "epoch": 0.012263411356254899, "grad_norm": 1.0504783391952515, "learning_rate": 1.085e-06, "loss": 0.3754, "step": 219 }, { "epoch": 0.012319408668383918, "grad_norm": 1.1919739246368408, "learning_rate": 1.0900000000000002e-06, "loss": 0.4962, "step": 220 }, { "epoch": 0.012375405980512936, "grad_norm": 1.6576035022735596, "learning_rate": 1.095e-06, "loss": 0.6879, "step": 221 }, { "epoch": 0.012431403292641953, "grad_norm": 1.518970251083374, "learning_rate": 1.1e-06, "loss": 0.6722, "step": 222 }, { "epoch": 0.01248740060477097, "grad_norm": 1.672376036643982, "learning_rate": 1.1050000000000002e-06, "loss": 0.5444, "step": 223 }, { "epoch": 0.01254339791689999, "grad_norm": 1.0940489768981934, "learning_rate": 1.1100000000000002e-06, "loss": 0.3958, "step": 224 }, { "epoch": 0.012599395229029007, "grad_norm": 1.4551310539245605, "learning_rate": 1.1150000000000001e-06, "loss": 0.5054, "step": 225 }, { "epoch": 0.012655392541158024, "grad_norm": 1.3679637908935547, "learning_rate": 1.12e-06, "loss": 0.4151, "step": 226 }, { "epoch": 0.012711389853287042, "grad_norm": 1.242140293121338, "learning_rate": 1.125e-06, "loss": 0.401, "step": 227 }, { "epoch": 0.012767387165416061, "grad_norm": 1.0123270750045776, "learning_rate": 1.13e-06, "loss": 0.4147, "step": 228 }, { "epoch": 0.012823384477545078, "grad_norm": 1.3932843208312988, "learning_rate": 1.1350000000000001e-06, "loss": 0.4361, "step": 229 }, { "epoch": 0.012879381789674096, "grad_norm": 1.0555158853530884, "learning_rate": 1.14e-06, "loss": 0.4174, "step": 230 }, { "epoch": 0.012935379101803113, "grad_norm": 1.380471110343933, "learning_rate": 1.145e-06, "loss": 0.4259, "step": 231 }, { "epoch": 0.01299137641393213, "grad_norm": 1.2852363586425781, "learning_rate": 1.15e-06, "loss": 0.4641, "step": 232 }, { "epoch": 0.01304737372606115, "grad_norm": 1.1543710231781006, "learning_rate": 1.155e-06, "loss": 0.5133, "step": 233 }, { "epoch": 0.013103371038190167, "grad_norm": 1.174673318862915, "learning_rate": 1.16e-06, "loss": 0.3946, "step": 234 }, { "epoch": 0.013159368350319185, "grad_norm": 1.1139841079711914, "learning_rate": 1.165e-06, "loss": 0.4283, "step": 235 }, { "epoch": 0.013215365662448202, "grad_norm": 1.6161155700683594, "learning_rate": 1.17e-06, "loss": 0.5241, "step": 236 }, { "epoch": 0.013271362974577221, "grad_norm": 1.313589334487915, "learning_rate": 1.175e-06, "loss": 0.5173, "step": 237 }, { "epoch": 0.013327360286706239, "grad_norm": 1.4072622060775757, "learning_rate": 1.18e-06, "loss": 0.7632, "step": 238 }, { "epoch": 0.013383357598835256, "grad_norm": 1.611053228378296, "learning_rate": 1.185e-06, "loss": 0.526, "step": 239 }, { "epoch": 0.013439354910964273, "grad_norm": 1.2626253366470337, "learning_rate": 1.19e-06, "loss": 0.6914, "step": 240 }, { "epoch": 0.01349535222309329, "grad_norm": 1.1688967943191528, "learning_rate": 1.1950000000000002e-06, "loss": 0.5033, "step": 241 }, { "epoch": 0.01355134953522231, "grad_norm": 1.1805068254470825, "learning_rate": 1.2000000000000002e-06, "loss": 0.5048, "step": 242 }, { "epoch": 0.013607346847351327, "grad_norm": 1.3243757486343384, "learning_rate": 1.2050000000000001e-06, "loss": 0.5549, "step": 243 }, { "epoch": 0.013663344159480345, "grad_norm": 1.4326049089431763, "learning_rate": 1.21e-06, "loss": 0.733, "step": 244 }, { "epoch": 0.013719341471609362, "grad_norm": 1.2837655544281006, "learning_rate": 1.215e-06, "loss": 0.5065, "step": 245 }, { "epoch": 0.013775338783738381, "grad_norm": 1.687691330909729, "learning_rate": 1.2200000000000002e-06, "loss": 0.605, "step": 246 }, { "epoch": 0.013831336095867399, "grad_norm": 1.4184305667877197, "learning_rate": 1.2250000000000001e-06, "loss": 0.4447, "step": 247 }, { "epoch": 0.013887333407996416, "grad_norm": 1.5195060968399048, "learning_rate": 1.23e-06, "loss": 0.4551, "step": 248 }, { "epoch": 0.013943330720125434, "grad_norm": 1.5335530042648315, "learning_rate": 1.235e-06, "loss": 0.5239, "step": 249 }, { "epoch": 0.013999328032254451, "grad_norm": 1.2263363599777222, "learning_rate": 1.24e-06, "loss": 0.4089, "step": 250 }, { "epoch": 0.01405532534438347, "grad_norm": 1.3511929512023926, "learning_rate": 1.245e-06, "loss": 0.5311, "step": 251 }, { "epoch": 0.014111322656512488, "grad_norm": 1.4361425638198853, "learning_rate": 1.25e-06, "loss": 0.5113, "step": 252 }, { "epoch": 0.014167319968641505, "grad_norm": 1.3185176849365234, "learning_rate": 1.255e-06, "loss": 0.5712, "step": 253 }, { "epoch": 0.014223317280770522, "grad_norm": 1.7490395307540894, "learning_rate": 1.26e-06, "loss": 0.4938, "step": 254 }, { "epoch": 0.014279314592899542, "grad_norm": 1.2518575191497803, "learning_rate": 1.265e-06, "loss": 0.4094, "step": 255 }, { "epoch": 0.014335311905028559, "grad_norm": 1.9215528964996338, "learning_rate": 1.27e-06, "loss": 0.5999, "step": 256 }, { "epoch": 0.014391309217157576, "grad_norm": 1.1665399074554443, "learning_rate": 1.275e-06, "loss": 0.4608, "step": 257 }, { "epoch": 0.014447306529286594, "grad_norm": 1.461685299873352, "learning_rate": 1.28e-06, "loss": 0.593, "step": 258 }, { "epoch": 0.014503303841415613, "grad_norm": 1.0450584888458252, "learning_rate": 1.2850000000000002e-06, "loss": 0.3811, "step": 259 }, { "epoch": 0.01455930115354463, "grad_norm": 1.2816405296325684, "learning_rate": 1.2900000000000001e-06, "loss": 0.4941, "step": 260 }, { "epoch": 0.014615298465673648, "grad_norm": 1.2557902336120605, "learning_rate": 1.295e-06, "loss": 0.459, "step": 261 }, { "epoch": 0.014671295777802665, "grad_norm": 1.1175843477249146, "learning_rate": 1.3e-06, "loss": 0.4936, "step": 262 }, { "epoch": 0.014727293089931683, "grad_norm": 1.479242205619812, "learning_rate": 1.3050000000000002e-06, "loss": 0.4825, "step": 263 }, { "epoch": 0.014783290402060702, "grad_norm": 1.5817683935165405, "learning_rate": 1.3100000000000002e-06, "loss": 0.3805, "step": 264 }, { "epoch": 0.01483928771418972, "grad_norm": 1.5696752071380615, "learning_rate": 1.3150000000000001e-06, "loss": 0.6506, "step": 265 }, { "epoch": 0.014895285026318737, "grad_norm": 1.4629154205322266, "learning_rate": 1.32e-06, "loss": 0.4585, "step": 266 }, { "epoch": 0.014951282338447754, "grad_norm": 1.5436128377914429, "learning_rate": 1.325e-06, "loss": 0.5556, "step": 267 }, { "epoch": 0.015007279650576773, "grad_norm": 1.4215129613876343, "learning_rate": 1.33e-06, "loss": 0.4365, "step": 268 }, { "epoch": 0.01506327696270579, "grad_norm": 1.3057754039764404, "learning_rate": 1.3350000000000001e-06, "loss": 0.4787, "step": 269 }, { "epoch": 0.015119274274834808, "grad_norm": 1.1739662885665894, "learning_rate": 1.34e-06, "loss": 0.5333, "step": 270 }, { "epoch": 0.015175271586963825, "grad_norm": 1.51952064037323, "learning_rate": 1.345e-06, "loss": 0.3872, "step": 271 }, { "epoch": 0.015231268899092843, "grad_norm": 1.7925301790237427, "learning_rate": 1.35e-06, "loss": 0.5211, "step": 272 }, { "epoch": 0.015287266211221862, "grad_norm": 1.2592473030090332, "learning_rate": 1.355e-06, "loss": 0.4538, "step": 273 }, { "epoch": 0.01534326352335088, "grad_norm": 1.247963547706604, "learning_rate": 1.36e-06, "loss": 0.4779, "step": 274 }, { "epoch": 0.015399260835479897, "grad_norm": 1.4560402631759644, "learning_rate": 1.365e-06, "loss": 0.4341, "step": 275 }, { "epoch": 0.015455258147608914, "grad_norm": 1.4166802167892456, "learning_rate": 1.37e-06, "loss": 0.5152, "step": 276 }, { "epoch": 0.015511255459737933, "grad_norm": 1.4385263919830322, "learning_rate": 1.3750000000000002e-06, "loss": 0.4547, "step": 277 }, { "epoch": 0.01556725277186695, "grad_norm": 1.5150035619735718, "learning_rate": 1.3800000000000001e-06, "loss": 0.5362, "step": 278 }, { "epoch": 0.015623250083995968, "grad_norm": 3.9275166988372803, "learning_rate": 1.385e-06, "loss": 0.5216, "step": 279 }, { "epoch": 0.015679247396124987, "grad_norm": 1.3215997219085693, "learning_rate": 1.39e-06, "loss": 0.6029, "step": 280 }, { "epoch": 0.015735244708254003, "grad_norm": 1.7647961378097534, "learning_rate": 1.3950000000000002e-06, "loss": 0.4077, "step": 281 }, { "epoch": 0.015791242020383022, "grad_norm": 1.5197356939315796, "learning_rate": 1.4000000000000001e-06, "loss": 0.6688, "step": 282 }, { "epoch": 0.015847239332512038, "grad_norm": 1.4369826316833496, "learning_rate": 1.405e-06, "loss": 0.4598, "step": 283 }, { "epoch": 0.015903236644641057, "grad_norm": 1.3526933193206787, "learning_rate": 1.41e-06, "loss": 0.5358, "step": 284 }, { "epoch": 0.015959233956770076, "grad_norm": 1.2903668880462646, "learning_rate": 1.415e-06, "loss": 0.3934, "step": 285 }, { "epoch": 0.016015231268899092, "grad_norm": 1.2543387413024902, "learning_rate": 1.4200000000000002e-06, "loss": 0.496, "step": 286 }, { "epoch": 0.01607122858102811, "grad_norm": 1.3359887599945068, "learning_rate": 1.4250000000000001e-06, "loss": 0.5165, "step": 287 }, { "epoch": 0.01612722589315713, "grad_norm": 1.1500910520553589, "learning_rate": 1.43e-06, "loss": 0.3095, "step": 288 }, { "epoch": 0.016183223205286146, "grad_norm": 1.53554368019104, "learning_rate": 1.435e-06, "loss": 0.6023, "step": 289 }, { "epoch": 0.016239220517415165, "grad_norm": 1.2718192338943481, "learning_rate": 1.44e-06, "loss": 0.4301, "step": 290 }, { "epoch": 0.01629521782954418, "grad_norm": 1.4737380743026733, "learning_rate": 1.445e-06, "loss": 0.4529, "step": 291 }, { "epoch": 0.0163512151416732, "grad_norm": 1.2656103372573853, "learning_rate": 1.45e-06, "loss": 0.5206, "step": 292 }, { "epoch": 0.01640721245380222, "grad_norm": 1.2045220136642456, "learning_rate": 1.455e-06, "loss": 0.4806, "step": 293 }, { "epoch": 0.016463209765931235, "grad_norm": 1.7034382820129395, "learning_rate": 1.46e-06, "loss": 0.4972, "step": 294 }, { "epoch": 0.016519207078060254, "grad_norm": 1.1198574304580688, "learning_rate": 1.465e-06, "loss": 0.5209, "step": 295 }, { "epoch": 0.01657520439018927, "grad_norm": 1.133908748626709, "learning_rate": 1.4700000000000001e-06, "loss": 0.5018, "step": 296 }, { "epoch": 0.01663120170231829, "grad_norm": 1.2515907287597656, "learning_rate": 1.475e-06, "loss": 0.5826, "step": 297 }, { "epoch": 0.016687199014447308, "grad_norm": 1.297999382019043, "learning_rate": 1.4800000000000002e-06, "loss": 0.4961, "step": 298 }, { "epoch": 0.016743196326576323, "grad_norm": 1.4551876783370972, "learning_rate": 1.4850000000000002e-06, "loss": 0.5899, "step": 299 }, { "epoch": 0.016799193638705343, "grad_norm": 1.346625566482544, "learning_rate": 1.4900000000000001e-06, "loss": 0.5737, "step": 300 }, { "epoch": 0.016855190950834358, "grad_norm": 1.3084245920181274, "learning_rate": 1.495e-06, "loss": 0.5083, "step": 301 }, { "epoch": 0.016911188262963377, "grad_norm": 1.501230001449585, "learning_rate": 1.5e-06, "loss": 0.5419, "step": 302 }, { "epoch": 0.016967185575092397, "grad_norm": 1.3253297805786133, "learning_rate": 1.505e-06, "loss": 0.5299, "step": 303 }, { "epoch": 0.017023182887221412, "grad_norm": 1.3683420419692993, "learning_rate": 1.5100000000000002e-06, "loss": 0.5981, "step": 304 }, { "epoch": 0.01707918019935043, "grad_norm": 1.3841209411621094, "learning_rate": 1.5150000000000001e-06, "loss": 0.4693, "step": 305 }, { "epoch": 0.01713517751147945, "grad_norm": 1.3428679704666138, "learning_rate": 1.52e-06, "loss": 0.445, "step": 306 }, { "epoch": 0.017191174823608466, "grad_norm": 1.248568058013916, "learning_rate": 1.525e-06, "loss": 0.3911, "step": 307 }, { "epoch": 0.017247172135737485, "grad_norm": 1.3845674991607666, "learning_rate": 1.53e-06, "loss": 0.546, "step": 308 }, { "epoch": 0.0173031694478665, "grad_norm": 1.465867519378662, "learning_rate": 1.5350000000000001e-06, "loss": 0.6043, "step": 309 }, { "epoch": 0.01735916675999552, "grad_norm": 1.1764181852340698, "learning_rate": 1.54e-06, "loss": 0.377, "step": 310 }, { "epoch": 0.01741516407212454, "grad_norm": 1.1894500255584717, "learning_rate": 1.545e-06, "loss": 0.4326, "step": 311 }, { "epoch": 0.017471161384253555, "grad_norm": 1.270615577697754, "learning_rate": 1.55e-06, "loss": 0.448, "step": 312 }, { "epoch": 0.017527158696382574, "grad_norm": 1.9738068580627441, "learning_rate": 1.555e-06, "loss": 0.5594, "step": 313 }, { "epoch": 0.01758315600851159, "grad_norm": 1.4984630346298218, "learning_rate": 1.56e-06, "loss": 0.5236, "step": 314 }, { "epoch": 0.01763915332064061, "grad_norm": 1.8723793029785156, "learning_rate": 1.565e-06, "loss": 0.6548, "step": 315 }, { "epoch": 0.017695150632769628, "grad_norm": 1.298327922821045, "learning_rate": 1.57e-06, "loss": 0.4244, "step": 316 }, { "epoch": 0.017751147944898644, "grad_norm": 1.0812171697616577, "learning_rate": 1.5750000000000002e-06, "loss": 0.4079, "step": 317 }, { "epoch": 0.017807145257027663, "grad_norm": 1.3327000141143799, "learning_rate": 1.5800000000000003e-06, "loss": 0.6322, "step": 318 }, { "epoch": 0.017863142569156682, "grad_norm": 1.8076618909835815, "learning_rate": 1.585e-06, "loss": 0.409, "step": 319 }, { "epoch": 0.017919139881285698, "grad_norm": 1.5142481327056885, "learning_rate": 1.5900000000000002e-06, "loss": 0.5574, "step": 320 }, { "epoch": 0.017975137193414717, "grad_norm": 1.940686821937561, "learning_rate": 1.595e-06, "loss": 0.533, "step": 321 }, { "epoch": 0.018031134505543733, "grad_norm": 1.33735990524292, "learning_rate": 1.6000000000000001e-06, "loss": 0.4811, "step": 322 }, { "epoch": 0.018087131817672752, "grad_norm": 1.296083927154541, "learning_rate": 1.6049999999999999e-06, "loss": 0.4361, "step": 323 }, { "epoch": 0.01814312912980177, "grad_norm": 1.1967061758041382, "learning_rate": 1.61e-06, "loss": 0.4532, "step": 324 }, { "epoch": 0.018199126441930787, "grad_norm": 1.6449378728866577, "learning_rate": 1.6150000000000002e-06, "loss": 0.7581, "step": 325 }, { "epoch": 0.018255123754059806, "grad_norm": 1.3829392194747925, "learning_rate": 1.62e-06, "loss": 0.5744, "step": 326 }, { "epoch": 0.01831112106618882, "grad_norm": 1.4353978633880615, "learning_rate": 1.6250000000000001e-06, "loss": 0.5701, "step": 327 }, { "epoch": 0.01836711837831784, "grad_norm": 1.405627965927124, "learning_rate": 1.6299999999999999e-06, "loss": 0.4762, "step": 328 }, { "epoch": 0.01842311569044686, "grad_norm": 1.2175483703613281, "learning_rate": 1.635e-06, "loss": 0.4396, "step": 329 }, { "epoch": 0.018479113002575875, "grad_norm": 1.2880176305770874, "learning_rate": 1.6400000000000002e-06, "loss": 0.3867, "step": 330 }, { "epoch": 0.018535110314704895, "grad_norm": 1.3141285181045532, "learning_rate": 1.645e-06, "loss": 0.4472, "step": 331 }, { "epoch": 0.01859110762683391, "grad_norm": 1.2125033140182495, "learning_rate": 1.65e-06, "loss": 0.5351, "step": 332 }, { "epoch": 0.01864710493896293, "grad_norm": 1.4135538339614868, "learning_rate": 1.655e-06, "loss": 0.6807, "step": 333 }, { "epoch": 0.01870310225109195, "grad_norm": 1.2327980995178223, "learning_rate": 1.6600000000000002e-06, "loss": 0.4119, "step": 334 }, { "epoch": 0.018759099563220964, "grad_norm": 1.4258291721343994, "learning_rate": 1.6650000000000002e-06, "loss": 0.6192, "step": 335 }, { "epoch": 0.018815096875349983, "grad_norm": 1.2788861989974976, "learning_rate": 1.67e-06, "loss": 0.5347, "step": 336 }, { "epoch": 0.018871094187479003, "grad_norm": 1.8292855024337769, "learning_rate": 1.6750000000000003e-06, "loss": 0.5791, "step": 337 }, { "epoch": 0.018927091499608018, "grad_norm": 1.2971700429916382, "learning_rate": 1.68e-06, "loss": 0.7015, "step": 338 }, { "epoch": 0.018983088811737037, "grad_norm": 1.2348295450210571, "learning_rate": 1.6850000000000002e-06, "loss": 0.3874, "step": 339 }, { "epoch": 0.019039086123866053, "grad_norm": 1.1175373792648315, "learning_rate": 1.69e-06, "loss": 0.3748, "step": 340 }, { "epoch": 0.019095083435995072, "grad_norm": 1.308707594871521, "learning_rate": 1.695e-06, "loss": 0.6144, "step": 341 }, { "epoch": 0.01915108074812409, "grad_norm": 1.1951675415039062, "learning_rate": 1.7000000000000002e-06, "loss": 0.4266, "step": 342 }, { "epoch": 0.019207078060253107, "grad_norm": 1.2016880512237549, "learning_rate": 1.705e-06, "loss": 0.457, "step": 343 }, { "epoch": 0.019263075372382126, "grad_norm": 1.9292396306991577, "learning_rate": 1.7100000000000001e-06, "loss": 0.508, "step": 344 }, { "epoch": 0.019319072684511142, "grad_norm": 1.3462649583816528, "learning_rate": 1.7149999999999999e-06, "loss": 0.5321, "step": 345 }, { "epoch": 0.01937506999664016, "grad_norm": 2.1647658348083496, "learning_rate": 1.72e-06, "loss": 0.6202, "step": 346 }, { "epoch": 0.01943106730876918, "grad_norm": 1.140655755996704, "learning_rate": 1.7250000000000002e-06, "loss": 0.486, "step": 347 }, { "epoch": 0.019487064620898196, "grad_norm": 1.2482906579971313, "learning_rate": 1.73e-06, "loss": 0.446, "step": 348 }, { "epoch": 0.019543061933027215, "grad_norm": 1.4118690490722656, "learning_rate": 1.7350000000000001e-06, "loss": 0.7418, "step": 349 }, { "epoch": 0.019599059245156234, "grad_norm": 1.7058286666870117, "learning_rate": 1.7399999999999999e-06, "loss": 0.4212, "step": 350 }, { "epoch": 0.01965505655728525, "grad_norm": 1.3479478359222412, "learning_rate": 1.745e-06, "loss": 0.4631, "step": 351 }, { "epoch": 0.01971105386941427, "grad_norm": 1.2280378341674805, "learning_rate": 1.7500000000000002e-06, "loss": 0.6453, "step": 352 }, { "epoch": 0.019767051181543285, "grad_norm": 1.2790160179138184, "learning_rate": 1.7550000000000001e-06, "loss": 0.4572, "step": 353 }, { "epoch": 0.019823048493672304, "grad_norm": 1.3132696151733398, "learning_rate": 1.76e-06, "loss": 0.612, "step": 354 }, { "epoch": 0.019879045805801323, "grad_norm": 1.4868576526641846, "learning_rate": 1.765e-06, "loss": 0.5342, "step": 355 }, { "epoch": 0.01993504311793034, "grad_norm": 1.3354017734527588, "learning_rate": 1.7700000000000002e-06, "loss": 0.5269, "step": 356 }, { "epoch": 0.019991040430059358, "grad_norm": 1.2551268339157104, "learning_rate": 1.775e-06, "loss": 0.4749, "step": 357 }, { "epoch": 0.020047037742188373, "grad_norm": 1.1117607355117798, "learning_rate": 1.7800000000000001e-06, "loss": 0.3936, "step": 358 }, { "epoch": 0.020103035054317393, "grad_norm": 1.2144349813461304, "learning_rate": 1.7850000000000003e-06, "loss": 0.5228, "step": 359 }, { "epoch": 0.020159032366446412, "grad_norm": 1.3747631311416626, "learning_rate": 1.79e-06, "loss": 0.3782, "step": 360 }, { "epoch": 0.020215029678575427, "grad_norm": 1.2662698030471802, "learning_rate": 1.7950000000000002e-06, "loss": 0.4512, "step": 361 }, { "epoch": 0.020271026990704447, "grad_norm": 1.4247366189956665, "learning_rate": 1.8e-06, "loss": 0.5507, "step": 362 }, { "epoch": 0.020327024302833462, "grad_norm": 1.3011784553527832, "learning_rate": 1.805e-06, "loss": 0.4161, "step": 363 }, { "epoch": 0.02038302161496248, "grad_norm": 1.6431331634521484, "learning_rate": 1.8100000000000002e-06, "loss": 0.4019, "step": 364 }, { "epoch": 0.0204390189270915, "grad_norm": 1.3516236543655396, "learning_rate": 1.815e-06, "loss": 0.5224, "step": 365 }, { "epoch": 0.020495016239220516, "grad_norm": 1.4772320985794067, "learning_rate": 1.8200000000000002e-06, "loss": 0.4568, "step": 366 }, { "epoch": 0.020551013551349535, "grad_norm": 1.2806977033615112, "learning_rate": 1.8249999999999999e-06, "loss": 0.3767, "step": 367 }, { "epoch": 0.020607010863478555, "grad_norm": 1.4820971488952637, "learning_rate": 1.83e-06, "loss": 0.5307, "step": 368 }, { "epoch": 0.02066300817560757, "grad_norm": 1.237572193145752, "learning_rate": 1.8350000000000002e-06, "loss": 0.4643, "step": 369 }, { "epoch": 0.02071900548773659, "grad_norm": 1.4231642484664917, "learning_rate": 1.84e-06, "loss": 0.5078, "step": 370 }, { "epoch": 0.020775002799865605, "grad_norm": 1.2499034404754639, "learning_rate": 1.8450000000000001e-06, "loss": 0.5165, "step": 371 }, { "epoch": 0.020831000111994624, "grad_norm": 1.2820899486541748, "learning_rate": 1.85e-06, "loss": 0.5586, "step": 372 }, { "epoch": 0.020886997424123643, "grad_norm": 1.3260475397109985, "learning_rate": 1.8550000000000002e-06, "loss": 0.3488, "step": 373 }, { "epoch": 0.02094299473625266, "grad_norm": 2.5342912673950195, "learning_rate": 1.86e-06, "loss": 0.7005, "step": 374 }, { "epoch": 0.020998992048381678, "grad_norm": 1.2824656963348389, "learning_rate": 1.8650000000000001e-06, "loss": 0.4665, "step": 375 }, { "epoch": 0.021054989360510694, "grad_norm": 1.4731974601745605, "learning_rate": 1.8700000000000003e-06, "loss": 0.5446, "step": 376 }, { "epoch": 0.021110986672639713, "grad_norm": 1.3010905981063843, "learning_rate": 1.875e-06, "loss": 0.5301, "step": 377 }, { "epoch": 0.021166983984768732, "grad_norm": 1.2400481700897217, "learning_rate": 1.8800000000000002e-06, "loss": 0.6083, "step": 378 }, { "epoch": 0.021222981296897748, "grad_norm": 1.2115167379379272, "learning_rate": 1.885e-06, "loss": 0.4465, "step": 379 }, { "epoch": 0.021278978609026767, "grad_norm": 1.2475694417953491, "learning_rate": 1.8900000000000001e-06, "loss": 0.5166, "step": 380 }, { "epoch": 0.021334975921155786, "grad_norm": 1.2450370788574219, "learning_rate": 1.8950000000000003e-06, "loss": 0.4768, "step": 381 }, { "epoch": 0.021390973233284802, "grad_norm": 1.3486833572387695, "learning_rate": 1.9e-06, "loss": 0.63, "step": 382 }, { "epoch": 0.02144697054541382, "grad_norm": 1.5615993738174438, "learning_rate": 1.9050000000000002e-06, "loss": 0.5499, "step": 383 }, { "epoch": 0.021502967857542837, "grad_norm": 1.0564674139022827, "learning_rate": 1.91e-06, "loss": 0.4291, "step": 384 }, { "epoch": 0.021558965169671856, "grad_norm": 1.1341657638549805, "learning_rate": 1.9150000000000003e-06, "loss": 0.4337, "step": 385 }, { "epoch": 0.021614962481800875, "grad_norm": 1.2850117683410645, "learning_rate": 1.92e-06, "loss": 0.3848, "step": 386 }, { "epoch": 0.02167095979392989, "grad_norm": 1.4669718742370605, "learning_rate": 1.925e-06, "loss": 0.4459, "step": 387 }, { "epoch": 0.02172695710605891, "grad_norm": 1.3067212104797363, "learning_rate": 1.93e-06, "loss": 0.4179, "step": 388 }, { "epoch": 0.021782954418187925, "grad_norm": 1.338586688041687, "learning_rate": 1.935e-06, "loss": 0.468, "step": 389 }, { "epoch": 0.021838951730316945, "grad_norm": 1.3039277791976929, "learning_rate": 1.94e-06, "loss": 0.4632, "step": 390 }, { "epoch": 0.021894949042445964, "grad_norm": 1.2103965282440186, "learning_rate": 1.945e-06, "loss": 0.4425, "step": 391 }, { "epoch": 0.02195094635457498, "grad_norm": 1.1756149530410767, "learning_rate": 1.95e-06, "loss": 0.5389, "step": 392 }, { "epoch": 0.022006943666704, "grad_norm": 1.6295759677886963, "learning_rate": 1.9550000000000003e-06, "loss": 0.5089, "step": 393 }, { "epoch": 0.022062940978833018, "grad_norm": 1.2471020221710205, "learning_rate": 1.96e-06, "loss": 0.4591, "step": 394 }, { "epoch": 0.022118938290962033, "grad_norm": 1.276649832725525, "learning_rate": 1.9650000000000002e-06, "loss": 0.4814, "step": 395 }, { "epoch": 0.022174935603091053, "grad_norm": 1.3960410356521606, "learning_rate": 1.9699999999999998e-06, "loss": 0.3976, "step": 396 }, { "epoch": 0.022230932915220068, "grad_norm": 1.5828909873962402, "learning_rate": 1.975e-06, "loss": 0.4629, "step": 397 }, { "epoch": 0.022286930227349087, "grad_norm": 1.1500638723373413, "learning_rate": 1.98e-06, "loss": 0.4718, "step": 398 }, { "epoch": 0.022342927539478107, "grad_norm": 1.217605471611023, "learning_rate": 1.985e-06, "loss": 0.5962, "step": 399 }, { "epoch": 0.022398924851607122, "grad_norm": 1.4575655460357666, "learning_rate": 1.99e-06, "loss": 0.5309, "step": 400 }, { "epoch": 0.02245492216373614, "grad_norm": 1.4009506702423096, "learning_rate": 1.995e-06, "loss": 0.5567, "step": 401 }, { "epoch": 0.022510919475865157, "grad_norm": 1.2377105951309204, "learning_rate": 2.0000000000000003e-06, "loss": 0.4204, "step": 402 }, { "epoch": 0.022566916787994176, "grad_norm": 1.214349389076233, "learning_rate": 2.005e-06, "loss": 0.5886, "step": 403 }, { "epoch": 0.022622914100123195, "grad_norm": 1.275333046913147, "learning_rate": 2.0100000000000002e-06, "loss": 0.4612, "step": 404 }, { "epoch": 0.02267891141225221, "grad_norm": 1.3972492218017578, "learning_rate": 2.015e-06, "loss": 0.6052, "step": 405 }, { "epoch": 0.02273490872438123, "grad_norm": 1.0658378601074219, "learning_rate": 2.02e-06, "loss": 0.3883, "step": 406 }, { "epoch": 0.022790906036510246, "grad_norm": 1.2928189039230347, "learning_rate": 2.025e-06, "loss": 0.6167, "step": 407 }, { "epoch": 0.022846903348639265, "grad_norm": 1.3358932733535767, "learning_rate": 2.03e-06, "loss": 0.3844, "step": 408 }, { "epoch": 0.022902900660768284, "grad_norm": 1.479500412940979, "learning_rate": 2.035e-06, "loss": 0.4249, "step": 409 }, { "epoch": 0.0229588979728973, "grad_norm": 1.2579445838928223, "learning_rate": 2.0400000000000004e-06, "loss": 0.4825, "step": 410 }, { "epoch": 0.02301489528502632, "grad_norm": 1.2056101560592651, "learning_rate": 2.045e-06, "loss": 0.4273, "step": 411 }, { "epoch": 0.023070892597155338, "grad_norm": 1.3829147815704346, "learning_rate": 2.0500000000000003e-06, "loss": 0.6324, "step": 412 }, { "epoch": 0.023126889909284354, "grad_norm": 1.4959462881088257, "learning_rate": 2.055e-06, "loss": 0.5117, "step": 413 }, { "epoch": 0.023182887221413373, "grad_norm": 1.593402624130249, "learning_rate": 2.06e-06, "loss": 0.573, "step": 414 }, { "epoch": 0.02323888453354239, "grad_norm": 1.2119688987731934, "learning_rate": 2.065e-06, "loss": 0.4263, "step": 415 }, { "epoch": 0.023294881845671408, "grad_norm": 1.2972121238708496, "learning_rate": 2.07e-06, "loss": 0.4812, "step": 416 }, { "epoch": 0.023350879157800427, "grad_norm": 1.3286548852920532, "learning_rate": 2.075e-06, "loss": 0.4082, "step": 417 }, { "epoch": 0.023406876469929443, "grad_norm": 1.6062296628952026, "learning_rate": 2.08e-06, "loss": 0.5704, "step": 418 }, { "epoch": 0.023462873782058462, "grad_norm": 1.2025117874145508, "learning_rate": 2.085e-06, "loss": 0.4505, "step": 419 }, { "epoch": 0.023518871094187477, "grad_norm": 1.1478592157363892, "learning_rate": 2.09e-06, "loss": 0.4226, "step": 420 }, { "epoch": 0.023574868406316497, "grad_norm": 1.5359517335891724, "learning_rate": 2.0950000000000003e-06, "loss": 0.5153, "step": 421 }, { "epoch": 0.023630865718445516, "grad_norm": 1.400523066520691, "learning_rate": 2.1000000000000002e-06, "loss": 0.5782, "step": 422 }, { "epoch": 0.02368686303057453, "grad_norm": 1.2018365859985352, "learning_rate": 2.105e-06, "loss": 0.4398, "step": 423 }, { "epoch": 0.02374286034270355, "grad_norm": 1.286759853363037, "learning_rate": 2.11e-06, "loss": 0.4881, "step": 424 }, { "epoch": 0.02379885765483257, "grad_norm": 1.2115166187286377, "learning_rate": 2.115e-06, "loss": 0.5765, "step": 425 }, { "epoch": 0.023854854966961585, "grad_norm": 1.3774797916412354, "learning_rate": 2.12e-06, "loss": 0.4356, "step": 426 }, { "epoch": 0.023910852279090605, "grad_norm": 1.211519479751587, "learning_rate": 2.1250000000000004e-06, "loss": 0.5455, "step": 427 }, { "epoch": 0.02396684959121962, "grad_norm": 1.1984440088272095, "learning_rate": 2.13e-06, "loss": 0.4587, "step": 428 }, { "epoch": 0.02402284690334864, "grad_norm": 1.484640121459961, "learning_rate": 2.1350000000000003e-06, "loss": 0.4316, "step": 429 }, { "epoch": 0.02407884421547766, "grad_norm": 1.4276765584945679, "learning_rate": 2.14e-06, "loss": 0.5961, "step": 430 }, { "epoch": 0.024134841527606674, "grad_norm": 0.9489923715591431, "learning_rate": 2.1450000000000002e-06, "loss": 0.3301, "step": 431 }, { "epoch": 0.024190838839735693, "grad_norm": 1.2276958227157593, "learning_rate": 2.1499999999999997e-06, "loss": 0.3866, "step": 432 }, { "epoch": 0.02424683615186471, "grad_norm": 1.308908462524414, "learning_rate": 2.155e-06, "loss": 0.4123, "step": 433 }, { "epoch": 0.024302833463993728, "grad_norm": 1.2939643859863281, "learning_rate": 2.16e-06, "loss": 0.4278, "step": 434 }, { "epoch": 0.024358830776122747, "grad_norm": 1.3001083135604858, "learning_rate": 2.165e-06, "loss": 0.657, "step": 435 }, { "epoch": 0.024414828088251763, "grad_norm": 1.6597774028778076, "learning_rate": 2.17e-06, "loss": 0.6464, "step": 436 }, { "epoch": 0.024470825400380782, "grad_norm": 1.2446776628494263, "learning_rate": 2.175e-06, "loss": 0.4831, "step": 437 }, { "epoch": 0.024526822712509798, "grad_norm": 0.962810218334198, "learning_rate": 2.1800000000000003e-06, "loss": 0.3997, "step": 438 }, { "epoch": 0.024582820024638817, "grad_norm": 1.5243562459945679, "learning_rate": 2.1850000000000003e-06, "loss": 0.3864, "step": 439 }, { "epoch": 0.024638817336767836, "grad_norm": 1.2688652276992798, "learning_rate": 2.19e-06, "loss": 0.4964, "step": 440 }, { "epoch": 0.024694814648896852, "grad_norm": 1.4606717824935913, "learning_rate": 2.195e-06, "loss": 0.6258, "step": 441 }, { "epoch": 0.02475081196102587, "grad_norm": 1.2517849206924438, "learning_rate": 2.2e-06, "loss": 0.4499, "step": 442 }, { "epoch": 0.02480680927315489, "grad_norm": 1.210240125656128, "learning_rate": 2.205e-06, "loss": 0.5199, "step": 443 }, { "epoch": 0.024862806585283906, "grad_norm": 1.3706921339035034, "learning_rate": 2.2100000000000004e-06, "loss": 0.4387, "step": 444 }, { "epoch": 0.024918803897412925, "grad_norm": 1.144190788269043, "learning_rate": 2.215e-06, "loss": 0.3526, "step": 445 }, { "epoch": 0.02497480120954194, "grad_norm": 1.22667396068573, "learning_rate": 2.2200000000000003e-06, "loss": 0.5217, "step": 446 }, { "epoch": 0.02503079852167096, "grad_norm": 1.5109798908233643, "learning_rate": 2.225e-06, "loss": 0.4878, "step": 447 }, { "epoch": 0.02508679583379998, "grad_norm": 1.2600774765014648, "learning_rate": 2.2300000000000002e-06, "loss": 0.4519, "step": 448 }, { "epoch": 0.025142793145928995, "grad_norm": 1.29214346408844, "learning_rate": 2.2349999999999998e-06, "loss": 0.459, "step": 449 }, { "epoch": 0.025198790458058014, "grad_norm": 1.1631250381469727, "learning_rate": 2.24e-06, "loss": 0.4691, "step": 450 }, { "epoch": 0.02525478777018703, "grad_norm": 1.379044771194458, "learning_rate": 2.245e-06, "loss": 0.524, "step": 451 }, { "epoch": 0.02531078508231605, "grad_norm": 1.169918417930603, "learning_rate": 2.25e-06, "loss": 0.4829, "step": 452 }, { "epoch": 0.025366782394445068, "grad_norm": 1.133783221244812, "learning_rate": 2.255e-06, "loss": 0.536, "step": 453 }, { "epoch": 0.025422779706574083, "grad_norm": 1.431279182434082, "learning_rate": 2.26e-06, "loss": 0.5463, "step": 454 }, { "epoch": 0.025478777018703103, "grad_norm": 1.282882809638977, "learning_rate": 2.265e-06, "loss": 0.4981, "step": 455 }, { "epoch": 0.025534774330832122, "grad_norm": 1.1804708242416382, "learning_rate": 2.2700000000000003e-06, "loss": 0.419, "step": 456 }, { "epoch": 0.025590771642961137, "grad_norm": 1.298541784286499, "learning_rate": 2.2750000000000002e-06, "loss": 0.4705, "step": 457 }, { "epoch": 0.025646768955090157, "grad_norm": 1.1998291015625, "learning_rate": 2.28e-06, "loss": 0.3948, "step": 458 }, { "epoch": 0.025702766267219172, "grad_norm": 1.3842189311981201, "learning_rate": 2.285e-06, "loss": 0.4904, "step": 459 }, { "epoch": 0.02575876357934819, "grad_norm": 1.264269232749939, "learning_rate": 2.29e-06, "loss": 0.4574, "step": 460 }, { "epoch": 0.02581476089147721, "grad_norm": 1.2546792030334473, "learning_rate": 2.2950000000000005e-06, "loss": 0.3574, "step": 461 }, { "epoch": 0.025870758203606226, "grad_norm": 1.0641919374465942, "learning_rate": 2.3e-06, "loss": 0.5158, "step": 462 }, { "epoch": 0.025926755515735245, "grad_norm": 1.35035240650177, "learning_rate": 2.3050000000000004e-06, "loss": 0.5019, "step": 463 }, { "epoch": 0.02598275282786426, "grad_norm": 1.248283863067627, "learning_rate": 2.31e-06, "loss": 0.4133, "step": 464 }, { "epoch": 0.02603875013999328, "grad_norm": 1.6036901473999023, "learning_rate": 2.3150000000000003e-06, "loss": 0.6371, "step": 465 }, { "epoch": 0.0260947474521223, "grad_norm": 1.4229565858840942, "learning_rate": 2.32e-06, "loss": 0.4239, "step": 466 }, { "epoch": 0.026150744764251315, "grad_norm": 1.245241403579712, "learning_rate": 2.325e-06, "loss": 0.4052, "step": 467 }, { "epoch": 0.026206742076380334, "grad_norm": 1.4726648330688477, "learning_rate": 2.33e-06, "loss": 0.5263, "step": 468 }, { "epoch": 0.02626273938850935, "grad_norm": 1.1756625175476074, "learning_rate": 2.335e-06, "loss": 0.4515, "step": 469 }, { "epoch": 0.02631873670063837, "grad_norm": 1.3073421716690063, "learning_rate": 2.34e-06, "loss": 0.6833, "step": 470 }, { "epoch": 0.026374734012767388, "grad_norm": 1.1485313177108765, "learning_rate": 2.345e-06, "loss": 0.3888, "step": 471 }, { "epoch": 0.026430731324896404, "grad_norm": 1.120802402496338, "learning_rate": 2.35e-06, "loss": 0.3704, "step": 472 }, { "epoch": 0.026486728637025423, "grad_norm": 1.3014438152313232, "learning_rate": 2.3550000000000003e-06, "loss": 0.6238, "step": 473 }, { "epoch": 0.026542725949154442, "grad_norm": 1.2936598062515259, "learning_rate": 2.36e-06, "loss": 0.5, "step": 474 }, { "epoch": 0.026598723261283458, "grad_norm": 1.2880456447601318, "learning_rate": 2.3650000000000002e-06, "loss": 0.41, "step": 475 }, { "epoch": 0.026654720573412477, "grad_norm": 1.2736504077911377, "learning_rate": 2.37e-06, "loss": 0.4159, "step": 476 }, { "epoch": 0.026710717885541493, "grad_norm": 1.3126493692398071, "learning_rate": 2.375e-06, "loss": 0.3981, "step": 477 }, { "epoch": 0.026766715197670512, "grad_norm": 1.1644043922424316, "learning_rate": 2.38e-06, "loss": 0.4143, "step": 478 }, { "epoch": 0.02682271250979953, "grad_norm": 1.2235571146011353, "learning_rate": 2.385e-06, "loss": 0.4354, "step": 479 }, { "epoch": 0.026878709821928547, "grad_norm": 1.503503680229187, "learning_rate": 2.3900000000000004e-06, "loss": 0.4693, "step": 480 }, { "epoch": 0.026934707134057566, "grad_norm": 1.1442761421203613, "learning_rate": 2.395e-06, "loss": 0.4132, "step": 481 }, { "epoch": 0.02699070444618658, "grad_norm": 1.382543921470642, "learning_rate": 2.4000000000000003e-06, "loss": 0.4063, "step": 482 }, { "epoch": 0.0270467017583156, "grad_norm": 1.2548270225524902, "learning_rate": 2.405e-06, "loss": 0.3863, "step": 483 }, { "epoch": 0.02710269907044462, "grad_norm": 1.2861868143081665, "learning_rate": 2.4100000000000002e-06, "loss": 0.4733, "step": 484 }, { "epoch": 0.027158696382573635, "grad_norm": 1.3512135744094849, "learning_rate": 2.415e-06, "loss": 0.4445, "step": 485 }, { "epoch": 0.027214693694702655, "grad_norm": 16.69654655456543, "learning_rate": 2.42e-06, "loss": 0.5016, "step": 486 }, { "epoch": 0.027270691006831674, "grad_norm": 1.4881073236465454, "learning_rate": 2.425e-06, "loss": 0.5744, "step": 487 }, { "epoch": 0.02732668831896069, "grad_norm": 1.1898785829544067, "learning_rate": 2.43e-06, "loss": 0.526, "step": 488 }, { "epoch": 0.02738268563108971, "grad_norm": 1.2967259883880615, "learning_rate": 2.435e-06, "loss": 0.5289, "step": 489 }, { "epoch": 0.027438682943218724, "grad_norm": 1.49234139919281, "learning_rate": 2.4400000000000004e-06, "loss": 0.6374, "step": 490 }, { "epoch": 0.027494680255347743, "grad_norm": 1.1433918476104736, "learning_rate": 2.445e-06, "loss": 0.3643, "step": 491 }, { "epoch": 0.027550677567476763, "grad_norm": 1.375954508781433, "learning_rate": 2.4500000000000003e-06, "loss": 0.655, "step": 492 }, { "epoch": 0.02760667487960578, "grad_norm": 1.0267375707626343, "learning_rate": 2.4550000000000002e-06, "loss": 0.5066, "step": 493 }, { "epoch": 0.027662672191734797, "grad_norm": 1.6544849872589111, "learning_rate": 2.46e-06, "loss": 0.4229, "step": 494 }, { "epoch": 0.027718669503863813, "grad_norm": 1.0966876745224, "learning_rate": 2.465e-06, "loss": 0.446, "step": 495 }, { "epoch": 0.027774666815992832, "grad_norm": 1.3126972913742065, "learning_rate": 2.47e-06, "loss": 0.4629, "step": 496 }, { "epoch": 0.02783066412812185, "grad_norm": 1.245058298110962, "learning_rate": 2.4750000000000004e-06, "loss": 0.618, "step": 497 }, { "epoch": 0.027886661440250867, "grad_norm": 1.700477957725525, "learning_rate": 2.48e-06, "loss": 0.5081, "step": 498 }, { "epoch": 0.027942658752379886, "grad_norm": 1.3252248764038086, "learning_rate": 2.4850000000000003e-06, "loss": 0.4537, "step": 499 }, { "epoch": 0.027998656064508902, "grad_norm": 1.6630816459655762, "learning_rate": 2.49e-06, "loss": 0.3542, "step": 500 }, { "epoch": 0.02805465337663792, "grad_norm": 1.546900749206543, "learning_rate": 2.4950000000000003e-06, "loss": 0.6161, "step": 501 }, { "epoch": 0.02811065068876694, "grad_norm": 1.234569787979126, "learning_rate": 2.5e-06, "loss": 0.436, "step": 502 }, { "epoch": 0.028166648000895956, "grad_norm": 1.4984447956085205, "learning_rate": 2.505e-06, "loss": 0.4401, "step": 503 }, { "epoch": 0.028222645313024975, "grad_norm": 1.3634111881256104, "learning_rate": 2.51e-06, "loss": 0.4091, "step": 504 }, { "epoch": 0.028278642625153994, "grad_norm": 1.5010645389556885, "learning_rate": 2.515e-06, "loss": 0.4773, "step": 505 }, { "epoch": 0.02833463993728301, "grad_norm": 1.1970478296279907, "learning_rate": 2.52e-06, "loss": 0.4239, "step": 506 }, { "epoch": 0.02839063724941203, "grad_norm": 1.2129427194595337, "learning_rate": 2.5250000000000004e-06, "loss": 0.444, "step": 507 }, { "epoch": 0.028446634561541045, "grad_norm": 1.586907148361206, "learning_rate": 2.53e-06, "loss": 0.5241, "step": 508 }, { "epoch": 0.028502631873670064, "grad_norm": 1.338651418685913, "learning_rate": 2.5350000000000003e-06, "loss": 0.5268, "step": 509 }, { "epoch": 0.028558629185799083, "grad_norm": 1.1952580213546753, "learning_rate": 2.54e-06, "loss": 0.4454, "step": 510 }, { "epoch": 0.0286146264979281, "grad_norm": 1.2241628170013428, "learning_rate": 2.545e-06, "loss": 0.4019, "step": 511 }, { "epoch": 0.028670623810057118, "grad_norm": 1.2646491527557373, "learning_rate": 2.55e-06, "loss": 0.5136, "step": 512 }, { "epoch": 0.028726621122186134, "grad_norm": 1.3502599000930786, "learning_rate": 2.555e-06, "loss": 0.557, "step": 513 }, { "epoch": 0.028782618434315153, "grad_norm": 1.107664704322815, "learning_rate": 2.56e-06, "loss": 0.3778, "step": 514 }, { "epoch": 0.028838615746444172, "grad_norm": 1.457395076751709, "learning_rate": 2.565e-06, "loss": 0.7625, "step": 515 }, { "epoch": 0.028894613058573188, "grad_norm": 1.2146413326263428, "learning_rate": 2.5700000000000004e-06, "loss": 0.4772, "step": 516 }, { "epoch": 0.028950610370702207, "grad_norm": 1.2828574180603027, "learning_rate": 2.575e-06, "loss": 0.4514, "step": 517 }, { "epoch": 0.029006607682831226, "grad_norm": 1.3281506299972534, "learning_rate": 2.5800000000000003e-06, "loss": 0.5961, "step": 518 }, { "epoch": 0.02906260499496024, "grad_norm": 1.1464653015136719, "learning_rate": 2.5850000000000002e-06, "loss": 0.3494, "step": 519 }, { "epoch": 0.02911860230708926, "grad_norm": 1.368131399154663, "learning_rate": 2.59e-06, "loss": 0.5817, "step": 520 }, { "epoch": 0.029174599619218276, "grad_norm": 1.3296947479248047, "learning_rate": 2.595e-06, "loss": 0.615, "step": 521 }, { "epoch": 0.029230596931347295, "grad_norm": 1.1147074699401855, "learning_rate": 2.6e-06, "loss": 0.437, "step": 522 }, { "epoch": 0.029286594243476315, "grad_norm": 1.301010012626648, "learning_rate": 2.605e-06, "loss": 0.4163, "step": 523 }, { "epoch": 0.02934259155560533, "grad_norm": 1.564185380935669, "learning_rate": 2.6100000000000004e-06, "loss": 0.6379, "step": 524 }, { "epoch": 0.02939858886773435, "grad_norm": 1.2892937660217285, "learning_rate": 2.615e-06, "loss": 0.5075, "step": 525 }, { "epoch": 0.029454586179863365, "grad_norm": 1.5146722793579102, "learning_rate": 2.6200000000000003e-06, "loss": 0.5486, "step": 526 }, { "epoch": 0.029510583491992384, "grad_norm": 1.3844255208969116, "learning_rate": 2.625e-06, "loss": 0.5322, "step": 527 }, { "epoch": 0.029566580804121403, "grad_norm": 1.4252188205718994, "learning_rate": 2.6300000000000002e-06, "loss": 0.696, "step": 528 }, { "epoch": 0.02962257811625042, "grad_norm": 1.1514140367507935, "learning_rate": 2.6349999999999998e-06, "loss": 0.3972, "step": 529 }, { "epoch": 0.02967857542837944, "grad_norm": 1.384243369102478, "learning_rate": 2.64e-06, "loss": 0.4538, "step": 530 }, { "epoch": 0.029734572740508454, "grad_norm": 1.5517767667770386, "learning_rate": 2.645e-06, "loss": 0.7234, "step": 531 }, { "epoch": 0.029790570052637473, "grad_norm": 1.220450520515442, "learning_rate": 2.65e-06, "loss": 0.4006, "step": 532 }, { "epoch": 0.029846567364766492, "grad_norm": 1.5837041139602661, "learning_rate": 2.655e-06, "loss": 0.5086, "step": 533 }, { "epoch": 0.029902564676895508, "grad_norm": 2.34853196144104, "learning_rate": 2.66e-06, "loss": 0.5333, "step": 534 }, { "epoch": 0.029958561989024527, "grad_norm": 1.433725118637085, "learning_rate": 2.6650000000000003e-06, "loss": 0.6396, "step": 535 }, { "epoch": 0.030014559301153546, "grad_norm": 1.336133599281311, "learning_rate": 2.6700000000000003e-06, "loss": 0.5378, "step": 536 }, { "epoch": 0.030070556613282562, "grad_norm": 1.3267064094543457, "learning_rate": 2.6750000000000002e-06, "loss": 0.6231, "step": 537 }, { "epoch": 0.03012655392541158, "grad_norm": 1.4553563594818115, "learning_rate": 2.68e-06, "loss": 0.5325, "step": 538 }, { "epoch": 0.030182551237540597, "grad_norm": 1.1272870302200317, "learning_rate": 2.685e-06, "loss": 0.4525, "step": 539 }, { "epoch": 0.030238548549669616, "grad_norm": 1.8186594247817993, "learning_rate": 2.69e-06, "loss": 0.4983, "step": 540 }, { "epoch": 0.030294545861798635, "grad_norm": 1.2522361278533936, "learning_rate": 2.6950000000000005e-06, "loss": 0.5709, "step": 541 }, { "epoch": 0.03035054317392765, "grad_norm": 1.3136264085769653, "learning_rate": 2.7e-06, "loss": 0.4878, "step": 542 }, { "epoch": 0.03040654048605667, "grad_norm": 1.222240686416626, "learning_rate": 2.7050000000000004e-06, "loss": 0.4881, "step": 543 }, { "epoch": 0.030462537798185686, "grad_norm": 1.3070313930511475, "learning_rate": 2.71e-06, "loss": 0.4469, "step": 544 }, { "epoch": 0.030518535110314705, "grad_norm": 1.2443084716796875, "learning_rate": 2.7150000000000003e-06, "loss": 0.5433, "step": 545 }, { "epoch": 0.030574532422443724, "grad_norm": 1.4681586027145386, "learning_rate": 2.72e-06, "loss": 0.5622, "step": 546 }, { "epoch": 0.03063052973457274, "grad_norm": 1.0752031803131104, "learning_rate": 2.725e-06, "loss": 0.3916, "step": 547 }, { "epoch": 0.03068652704670176, "grad_norm": 1.338870644569397, "learning_rate": 2.73e-06, "loss": 0.4114, "step": 548 }, { "epoch": 0.030742524358830778, "grad_norm": 1.2860264778137207, "learning_rate": 2.735e-06, "loss": 0.4996, "step": 549 }, { "epoch": 0.030798521670959794, "grad_norm": 10.62662124633789, "learning_rate": 2.74e-06, "loss": 0.474, "step": 550 }, { "epoch": 0.030854518983088813, "grad_norm": 1.2164727449417114, "learning_rate": 2.745e-06, "loss": 0.5298, "step": 551 }, { "epoch": 0.03091051629521783, "grad_norm": 1.0920971632003784, "learning_rate": 2.7500000000000004e-06, "loss": 0.4692, "step": 552 }, { "epoch": 0.030966513607346847, "grad_norm": 1.2918709516525269, "learning_rate": 2.7550000000000003e-06, "loss": 0.3756, "step": 553 }, { "epoch": 0.031022510919475867, "grad_norm": 1.6300508975982666, "learning_rate": 2.7600000000000003e-06, "loss": 0.4695, "step": 554 }, { "epoch": 0.031078508231604882, "grad_norm": 1.2380396127700806, "learning_rate": 2.765e-06, "loss": 0.5224, "step": 555 }, { "epoch": 0.0311345055437339, "grad_norm": 1.748967170715332, "learning_rate": 2.77e-06, "loss": 0.6471, "step": 556 }, { "epoch": 0.031190502855862917, "grad_norm": 1.3673230409622192, "learning_rate": 2.775e-06, "loss": 0.576, "step": 557 }, { "epoch": 0.031246500167991936, "grad_norm": 1.1652144193649292, "learning_rate": 2.78e-06, "loss": 0.4436, "step": 558 }, { "epoch": 0.031302497480120955, "grad_norm": 1.121286153793335, "learning_rate": 2.785e-06, "loss": 0.4163, "step": 559 }, { "epoch": 0.031358494792249975, "grad_norm": 1.2735199928283691, "learning_rate": 2.7900000000000004e-06, "loss": 0.4716, "step": 560 }, { "epoch": 0.03141449210437899, "grad_norm": 1.420231819152832, "learning_rate": 2.795e-06, "loss": 0.618, "step": 561 }, { "epoch": 0.031470489416508006, "grad_norm": 1.2376223802566528, "learning_rate": 2.8000000000000003e-06, "loss": 0.3637, "step": 562 }, { "epoch": 0.031526486728637025, "grad_norm": 1.3908500671386719, "learning_rate": 2.805e-06, "loss": 0.6301, "step": 563 }, { "epoch": 0.031582484040766044, "grad_norm": 1.2173155546188354, "learning_rate": 2.81e-06, "loss": 0.4616, "step": 564 }, { "epoch": 0.03163848135289506, "grad_norm": 1.752326488494873, "learning_rate": 2.815e-06, "loss": 0.403, "step": 565 }, { "epoch": 0.031694478665024076, "grad_norm": 1.6780214309692383, "learning_rate": 2.82e-06, "loss": 0.5124, "step": 566 }, { "epoch": 0.031750475977153095, "grad_norm": 1.2604048252105713, "learning_rate": 2.825e-06, "loss": 0.4851, "step": 567 }, { "epoch": 0.031806473289282114, "grad_norm": 1.31491219997406, "learning_rate": 2.83e-06, "loss": 0.4711, "step": 568 }, { "epoch": 0.03186247060141113, "grad_norm": 4.062485218048096, "learning_rate": 2.835e-06, "loss": 0.5685, "step": 569 }, { "epoch": 0.03191846791354015, "grad_norm": 1.2817109823226929, "learning_rate": 2.8400000000000003e-06, "loss": 0.427, "step": 570 }, { "epoch": 0.031974465225669164, "grad_norm": 1.2257922887802124, "learning_rate": 2.8450000000000003e-06, "loss": 0.485, "step": 571 }, { "epoch": 0.032030462537798184, "grad_norm": 1.1841542720794678, "learning_rate": 2.8500000000000002e-06, "loss": 0.4409, "step": 572 }, { "epoch": 0.0320864598499272, "grad_norm": 1.4808175563812256, "learning_rate": 2.855e-06, "loss": 0.4201, "step": 573 }, { "epoch": 0.03214245716205622, "grad_norm": 1.2412053346633911, "learning_rate": 2.86e-06, "loss": 0.4371, "step": 574 }, { "epoch": 0.03219845447418524, "grad_norm": 1.184926986694336, "learning_rate": 2.865e-06, "loss": 0.5311, "step": 575 }, { "epoch": 0.03225445178631426, "grad_norm": 1.2667714357376099, "learning_rate": 2.87e-06, "loss": 0.4015, "step": 576 }, { "epoch": 0.03231044909844327, "grad_norm": 1.4195163249969482, "learning_rate": 2.8750000000000004e-06, "loss": 0.4566, "step": 577 }, { "epoch": 0.03236644641057229, "grad_norm": 1.3507108688354492, "learning_rate": 2.88e-06, "loss": 0.5713, "step": 578 }, { "epoch": 0.03242244372270131, "grad_norm": 1.4129518270492554, "learning_rate": 2.8850000000000003e-06, "loss": 0.5334, "step": 579 }, { "epoch": 0.03247844103483033, "grad_norm": 1.3056820631027222, "learning_rate": 2.89e-06, "loss": 0.4025, "step": 580 }, { "epoch": 0.03253443834695935, "grad_norm": 1.4413847923278809, "learning_rate": 2.8950000000000002e-06, "loss": 0.5258, "step": 581 }, { "epoch": 0.03259043565908836, "grad_norm": 1.270606279373169, "learning_rate": 2.9e-06, "loss": 0.4603, "step": 582 }, { "epoch": 0.03264643297121738, "grad_norm": 1.3087241649627686, "learning_rate": 2.905e-06, "loss": 0.5114, "step": 583 }, { "epoch": 0.0327024302833464, "grad_norm": 1.163053274154663, "learning_rate": 2.91e-06, "loss": 0.4819, "step": 584 }, { "epoch": 0.03275842759547542, "grad_norm": 1.5739001035690308, "learning_rate": 2.915e-06, "loss": 0.6085, "step": 585 }, { "epoch": 0.03281442490760444, "grad_norm": 1.167637825012207, "learning_rate": 2.92e-06, "loss": 0.4609, "step": 586 }, { "epoch": 0.03287042221973345, "grad_norm": 1.22612464427948, "learning_rate": 2.9250000000000004e-06, "loss": 0.5268, "step": 587 }, { "epoch": 0.03292641953186247, "grad_norm": 1.5374791622161865, "learning_rate": 2.93e-06, "loss": 0.5035, "step": 588 }, { "epoch": 0.03298241684399149, "grad_norm": 1.3516424894332886, "learning_rate": 2.9350000000000003e-06, "loss": 0.4111, "step": 589 }, { "epoch": 0.03303841415612051, "grad_norm": 1.4715403318405151, "learning_rate": 2.9400000000000002e-06, "loss": 0.371, "step": 590 }, { "epoch": 0.03309441146824953, "grad_norm": 1.4850103855133057, "learning_rate": 2.945e-06, "loss": 0.4527, "step": 591 }, { "epoch": 0.03315040878037854, "grad_norm": 1.4048868417739868, "learning_rate": 2.95e-06, "loss": 0.458, "step": 592 }, { "epoch": 0.03320640609250756, "grad_norm": 1.741652011871338, "learning_rate": 2.955e-06, "loss": 0.4839, "step": 593 }, { "epoch": 0.03326240340463658, "grad_norm": 1.2577733993530273, "learning_rate": 2.9600000000000005e-06, "loss": 0.5411, "step": 594 }, { "epoch": 0.033318400716765596, "grad_norm": 1.3856233358383179, "learning_rate": 2.965e-06, "loss": 0.5322, "step": 595 }, { "epoch": 0.033374398028894615, "grad_norm": 1.447217345237732, "learning_rate": 2.9700000000000004e-06, "loss": 0.6493, "step": 596 }, { "epoch": 0.03343039534102363, "grad_norm": 1.4229252338409424, "learning_rate": 2.975e-06, "loss": 0.3921, "step": 597 }, { "epoch": 0.03348639265315265, "grad_norm": 11.00639820098877, "learning_rate": 2.9800000000000003e-06, "loss": 0.3584, "step": 598 }, { "epoch": 0.033542389965281666, "grad_norm": 1.2174385786056519, "learning_rate": 2.9850000000000002e-06, "loss": 0.3824, "step": 599 }, { "epoch": 0.033598387277410685, "grad_norm": 1.1561325788497925, "learning_rate": 2.99e-06, "loss": 0.4915, "step": 600 }, { "epoch": 0.033654384589539704, "grad_norm": 1.4047815799713135, "learning_rate": 2.995e-06, "loss": 0.5279, "step": 601 }, { "epoch": 0.033710381901668716, "grad_norm": 4.640164852142334, "learning_rate": 3e-06, "loss": 0.4194, "step": 602 }, { "epoch": 0.033766379213797736, "grad_norm": 1.154270052909851, "learning_rate": 3.005e-06, "loss": 0.4438, "step": 603 }, { "epoch": 0.033822376525926755, "grad_norm": 1.1494734287261963, "learning_rate": 3.01e-06, "loss": 0.4764, "step": 604 }, { "epoch": 0.033878373838055774, "grad_norm": 1.1763715744018555, "learning_rate": 3.015e-06, "loss": 0.5438, "step": 605 }, { "epoch": 0.03393437115018479, "grad_norm": 1.0460625886917114, "learning_rate": 3.0200000000000003e-06, "loss": 0.4528, "step": 606 }, { "epoch": 0.03399036846231381, "grad_norm": 5.100392818450928, "learning_rate": 3.0250000000000003e-06, "loss": 0.4503, "step": 607 }, { "epoch": 0.034046365774442824, "grad_norm": 1.0152342319488525, "learning_rate": 3.0300000000000002e-06, "loss": 0.4112, "step": 608 }, { "epoch": 0.034102363086571844, "grad_norm": 1.1221469640731812, "learning_rate": 3.035e-06, "loss": 0.4216, "step": 609 }, { "epoch": 0.03415836039870086, "grad_norm": 2.1545469760894775, "learning_rate": 3.04e-06, "loss": 0.5182, "step": 610 }, { "epoch": 0.03421435771082988, "grad_norm": 1.3261489868164062, "learning_rate": 3.0450000000000005e-06, "loss": 0.7225, "step": 611 }, { "epoch": 0.0342703550229589, "grad_norm": 1.443620204925537, "learning_rate": 3.05e-06, "loss": 0.6039, "step": 612 }, { "epoch": 0.03432635233508791, "grad_norm": 1.3014883995056152, "learning_rate": 3.0550000000000004e-06, "loss": 0.5219, "step": 613 }, { "epoch": 0.03438234964721693, "grad_norm": 2.6879069805145264, "learning_rate": 3.06e-06, "loss": 0.501, "step": 614 }, { "epoch": 0.03443834695934595, "grad_norm": 1.3093355894088745, "learning_rate": 3.0650000000000003e-06, "loss": 0.4093, "step": 615 }, { "epoch": 0.03449434427147497, "grad_norm": 1.1495851278305054, "learning_rate": 3.0700000000000003e-06, "loss": 0.388, "step": 616 }, { "epoch": 0.03455034158360399, "grad_norm": 1.318150281906128, "learning_rate": 3.075e-06, "loss": 0.4241, "step": 617 }, { "epoch": 0.034606338895733, "grad_norm": 1.5912282466888428, "learning_rate": 3.08e-06, "loss": 0.5434, "step": 618 }, { "epoch": 0.03466233620786202, "grad_norm": 1.113492488861084, "learning_rate": 3.085e-06, "loss": 0.5174, "step": 619 }, { "epoch": 0.03471833351999104, "grad_norm": 1.3931667804718018, "learning_rate": 3.09e-06, "loss": 0.3684, "step": 620 }, { "epoch": 0.03477433083212006, "grad_norm": 1.3992459774017334, "learning_rate": 3.095e-06, "loss": 0.472, "step": 621 }, { "epoch": 0.03483032814424908, "grad_norm": 1.2402055263519287, "learning_rate": 3.1e-06, "loss": 0.3491, "step": 622 }, { "epoch": 0.03488632545637809, "grad_norm": 1.3450154066085815, "learning_rate": 3.1050000000000003e-06, "loss": 0.5987, "step": 623 }, { "epoch": 0.03494232276850711, "grad_norm": 1.214292287826538, "learning_rate": 3.11e-06, "loss": 0.4254, "step": 624 }, { "epoch": 0.03499832008063613, "grad_norm": 1.2098424434661865, "learning_rate": 3.1150000000000002e-06, "loss": 0.3846, "step": 625 }, { "epoch": 0.03505431739276515, "grad_norm": 1.3445996046066284, "learning_rate": 3.12e-06, "loss": 0.5421, "step": 626 }, { "epoch": 0.03511031470489417, "grad_norm": 1.2277252674102783, "learning_rate": 3.125e-06, "loss": 0.6699, "step": 627 }, { "epoch": 0.03516631201702318, "grad_norm": 1.179675817489624, "learning_rate": 3.13e-06, "loss": 0.3626, "step": 628 }, { "epoch": 0.0352223093291522, "grad_norm": 12.111026763916016, "learning_rate": 3.1350000000000005e-06, "loss": 0.3788, "step": 629 }, { "epoch": 0.03527830664128122, "grad_norm": 1.2397090196609497, "learning_rate": 3.14e-06, "loss": 0.4611, "step": 630 }, { "epoch": 0.03533430395341024, "grad_norm": 1.2845584154129028, "learning_rate": 3.145e-06, "loss": 0.4117, "step": 631 }, { "epoch": 0.035390301265539256, "grad_norm": 1.0865263938903809, "learning_rate": 3.1500000000000003e-06, "loss": 0.3984, "step": 632 }, { "epoch": 0.03544629857766827, "grad_norm": 1.6106657981872559, "learning_rate": 3.1550000000000003e-06, "loss": 0.5957, "step": 633 }, { "epoch": 0.03550229588979729, "grad_norm": 1.0239503383636475, "learning_rate": 3.1600000000000007e-06, "loss": 0.4252, "step": 634 }, { "epoch": 0.03555829320192631, "grad_norm": 1.4274102449417114, "learning_rate": 3.1649999999999998e-06, "loss": 0.5052, "step": 635 }, { "epoch": 0.035614290514055326, "grad_norm": 1.3243954181671143, "learning_rate": 3.17e-06, "loss": 0.5587, "step": 636 }, { "epoch": 0.035670287826184345, "grad_norm": 1.0345747470855713, "learning_rate": 3.175e-06, "loss": 0.3972, "step": 637 }, { "epoch": 0.035726285138313364, "grad_norm": 1.2989063262939453, "learning_rate": 3.1800000000000005e-06, "loss": 0.536, "step": 638 }, { "epoch": 0.035782282450442376, "grad_norm": 1.3336299657821655, "learning_rate": 3.1850000000000004e-06, "loss": 0.4359, "step": 639 }, { "epoch": 0.035838279762571396, "grad_norm": 1.2301009893417358, "learning_rate": 3.19e-06, "loss": 0.4369, "step": 640 }, { "epoch": 0.035894277074700415, "grad_norm": 1.0255048274993896, "learning_rate": 3.195e-06, "loss": 0.4257, "step": 641 }, { "epoch": 0.035950274386829434, "grad_norm": 1.262315273284912, "learning_rate": 3.2000000000000003e-06, "loss": 0.409, "step": 642 }, { "epoch": 0.03600627169895845, "grad_norm": 1.1630877256393433, "learning_rate": 3.2050000000000002e-06, "loss": 0.3283, "step": 643 }, { "epoch": 0.036062269011087465, "grad_norm": 1.3334583044052124, "learning_rate": 3.2099999999999998e-06, "loss": 0.4425, "step": 644 }, { "epoch": 0.036118266323216484, "grad_norm": 1.649351954460144, "learning_rate": 3.215e-06, "loss": 0.5322, "step": 645 }, { "epoch": 0.036174263635345504, "grad_norm": 1.298487901687622, "learning_rate": 3.22e-06, "loss": 0.4998, "step": 646 }, { "epoch": 0.03623026094747452, "grad_norm": 1.2624866962432861, "learning_rate": 3.225e-06, "loss": 0.3944, "step": 647 }, { "epoch": 0.03628625825960354, "grad_norm": 1.4487106800079346, "learning_rate": 3.2300000000000004e-06, "loss": 0.4767, "step": 648 }, { "epoch": 0.036342255571732554, "grad_norm": 1.2317790985107422, "learning_rate": 3.235e-06, "loss": 0.4973, "step": 649 }, { "epoch": 0.03639825288386157, "grad_norm": 1.002759337425232, "learning_rate": 3.24e-06, "loss": 0.3591, "step": 650 }, { "epoch": 0.03645425019599059, "grad_norm": 1.2259923219680786, "learning_rate": 3.2450000000000003e-06, "loss": 0.3969, "step": 651 }, { "epoch": 0.03651024750811961, "grad_norm": 1.3137469291687012, "learning_rate": 3.2500000000000002e-06, "loss": 0.4246, "step": 652 }, { "epoch": 0.03656624482024863, "grad_norm": 1.1844868659973145, "learning_rate": 3.2550000000000006e-06, "loss": 0.4375, "step": 653 }, { "epoch": 0.03662224213237764, "grad_norm": 1.4780162572860718, "learning_rate": 3.2599999999999997e-06, "loss": 0.5578, "step": 654 }, { "epoch": 0.03667823944450666, "grad_norm": 1.3112592697143555, "learning_rate": 3.265e-06, "loss": 0.354, "step": 655 }, { "epoch": 0.03673423675663568, "grad_norm": 1.0690807104110718, "learning_rate": 3.27e-06, "loss": 0.4815, "step": 656 }, { "epoch": 0.0367902340687647, "grad_norm": 1.209903359413147, "learning_rate": 3.2750000000000004e-06, "loss": 0.3635, "step": 657 }, { "epoch": 0.03684623138089372, "grad_norm": 1.1705541610717773, "learning_rate": 3.2800000000000004e-06, "loss": 0.5315, "step": 658 }, { "epoch": 0.03690222869302273, "grad_norm": 1.2070939540863037, "learning_rate": 3.285e-06, "loss": 0.3196, "step": 659 }, { "epoch": 0.03695822600515175, "grad_norm": 1.1473920345306396, "learning_rate": 3.29e-06, "loss": 0.4252, "step": 660 }, { "epoch": 0.03701422331728077, "grad_norm": 1.2592459917068481, "learning_rate": 3.2950000000000002e-06, "loss": 0.4727, "step": 661 }, { "epoch": 0.03707022062940979, "grad_norm": 2.0471949577331543, "learning_rate": 3.3e-06, "loss": 0.6152, "step": 662 }, { "epoch": 0.03712621794153881, "grad_norm": 1.3031131029129028, "learning_rate": 3.3050000000000005e-06, "loss": 0.7303, "step": 663 }, { "epoch": 0.03718221525366782, "grad_norm": 1.2334582805633545, "learning_rate": 3.31e-06, "loss": 0.5149, "step": 664 }, { "epoch": 0.03723821256579684, "grad_norm": 1.376380205154419, "learning_rate": 3.315e-06, "loss": 0.4672, "step": 665 }, { "epoch": 0.03729420987792586, "grad_norm": 1.150626301765442, "learning_rate": 3.3200000000000004e-06, "loss": 0.506, "step": 666 }, { "epoch": 0.03735020719005488, "grad_norm": 1.3962228298187256, "learning_rate": 3.3250000000000004e-06, "loss": 0.6496, "step": 667 }, { "epoch": 0.0374062045021839, "grad_norm": 1.3976243734359741, "learning_rate": 3.3300000000000003e-06, "loss": 0.5132, "step": 668 }, { "epoch": 0.037462201814312916, "grad_norm": 1.2252672910690308, "learning_rate": 3.335e-06, "loss": 0.3773, "step": 669 }, { "epoch": 0.03751819912644193, "grad_norm": 1.3803831338882446, "learning_rate": 3.34e-06, "loss": 0.4245, "step": 670 }, { "epoch": 0.03757419643857095, "grad_norm": 1.45225989818573, "learning_rate": 3.345e-06, "loss": 0.7511, "step": 671 }, { "epoch": 0.03763019375069997, "grad_norm": 1.3100789785385132, "learning_rate": 3.3500000000000005e-06, "loss": 0.4476, "step": 672 }, { "epoch": 0.037686191062828986, "grad_norm": 1.2988783121109009, "learning_rate": 3.3550000000000005e-06, "loss": 0.5123, "step": 673 }, { "epoch": 0.037742188374958005, "grad_norm": 1.1171373128890991, "learning_rate": 3.36e-06, "loss": 0.4446, "step": 674 }, { "epoch": 0.03779818568708702, "grad_norm": 1.5306018590927124, "learning_rate": 3.365e-06, "loss": 0.4832, "step": 675 }, { "epoch": 0.037854182999216036, "grad_norm": 1.277156114578247, "learning_rate": 3.3700000000000003e-06, "loss": 0.4448, "step": 676 }, { "epoch": 0.037910180311345056, "grad_norm": 1.3082318305969238, "learning_rate": 3.3750000000000003e-06, "loss": 0.4844, "step": 677 }, { "epoch": 0.037966177623474075, "grad_norm": 1.2136180400848389, "learning_rate": 3.38e-06, "loss": 0.5297, "step": 678 }, { "epoch": 0.038022174935603094, "grad_norm": 1.3228617906570435, "learning_rate": 3.3849999999999998e-06, "loss": 0.4339, "step": 679 }, { "epoch": 0.038078172247732106, "grad_norm": 1.2094823122024536, "learning_rate": 3.39e-06, "loss": 0.4614, "step": 680 }, { "epoch": 0.038134169559861125, "grad_norm": 1.2327871322631836, "learning_rate": 3.395e-06, "loss": 0.4556, "step": 681 }, { "epoch": 0.038190166871990144, "grad_norm": 1.4336551427841187, "learning_rate": 3.4000000000000005e-06, "loss": 0.5025, "step": 682 }, { "epoch": 0.038246164184119164, "grad_norm": 1.1964848041534424, "learning_rate": 3.405e-06, "loss": 0.3432, "step": 683 }, { "epoch": 0.03830216149624818, "grad_norm": 1.4089807271957397, "learning_rate": 3.41e-06, "loss": 0.5415, "step": 684 }, { "epoch": 0.038358158808377195, "grad_norm": 1.3728796243667603, "learning_rate": 3.4150000000000003e-06, "loss": 0.4414, "step": 685 }, { "epoch": 0.038414156120506214, "grad_norm": 1.1719013452529907, "learning_rate": 3.4200000000000003e-06, "loss": 0.4603, "step": 686 }, { "epoch": 0.03847015343263523, "grad_norm": 1.244240164756775, "learning_rate": 3.4250000000000002e-06, "loss": 0.5535, "step": 687 }, { "epoch": 0.03852615074476425, "grad_norm": 1.2287418842315674, "learning_rate": 3.4299999999999998e-06, "loss": 0.4683, "step": 688 }, { "epoch": 0.03858214805689327, "grad_norm": 1.5462092161178589, "learning_rate": 3.435e-06, "loss": 0.5485, "step": 689 }, { "epoch": 0.038638145369022284, "grad_norm": 1.2673622369766235, "learning_rate": 3.44e-06, "loss": 0.6002, "step": 690 }, { "epoch": 0.0386941426811513, "grad_norm": 1.442499041557312, "learning_rate": 3.4450000000000005e-06, "loss": 0.6001, "step": 691 }, { "epoch": 0.03875013999328032, "grad_norm": 1.2574551105499268, "learning_rate": 3.4500000000000004e-06, "loss": 0.4971, "step": 692 }, { "epoch": 0.03880613730540934, "grad_norm": 1.633691668510437, "learning_rate": 3.455e-06, "loss": 0.6757, "step": 693 }, { "epoch": 0.03886213461753836, "grad_norm": 1.447800636291504, "learning_rate": 3.46e-06, "loss": 0.506, "step": 694 }, { "epoch": 0.03891813192966737, "grad_norm": 1.336168646812439, "learning_rate": 3.4650000000000003e-06, "loss": 0.4252, "step": 695 }, { "epoch": 0.03897412924179639, "grad_norm": 1.0794219970703125, "learning_rate": 3.4700000000000002e-06, "loss": 0.4782, "step": 696 }, { "epoch": 0.03903012655392541, "grad_norm": 1.2878397703170776, "learning_rate": 3.4750000000000006e-06, "loss": 0.6454, "step": 697 }, { "epoch": 0.03908612386605443, "grad_norm": 1.3281662464141846, "learning_rate": 3.4799999999999997e-06, "loss": 0.4986, "step": 698 }, { "epoch": 0.03914212117818345, "grad_norm": 1.8202744722366333, "learning_rate": 3.485e-06, "loss": 0.6661, "step": 699 }, { "epoch": 0.03919811849031247, "grad_norm": 1.1826694011688232, "learning_rate": 3.49e-06, "loss": 0.4021, "step": 700 }, { "epoch": 0.03925411580244148, "grad_norm": 2.1669232845306396, "learning_rate": 3.4950000000000004e-06, "loss": 0.4714, "step": 701 }, { "epoch": 0.0393101131145705, "grad_norm": 1.3901138305664062, "learning_rate": 3.5000000000000004e-06, "loss": 0.5407, "step": 702 }, { "epoch": 0.03936611042669952, "grad_norm": 1.0436351299285889, "learning_rate": 3.505e-06, "loss": 0.3554, "step": 703 }, { "epoch": 0.03942210773882854, "grad_norm": 1.0835562944412231, "learning_rate": 3.5100000000000003e-06, "loss": 0.4768, "step": 704 }, { "epoch": 0.03947810505095756, "grad_norm": 1.1899420022964478, "learning_rate": 3.5150000000000002e-06, "loss": 0.539, "step": 705 }, { "epoch": 0.03953410236308657, "grad_norm": 1.0693700313568115, "learning_rate": 3.52e-06, "loss": 0.4778, "step": 706 }, { "epoch": 0.03959009967521559, "grad_norm": 1.595470666885376, "learning_rate": 3.5249999999999997e-06, "loss": 0.5446, "step": 707 }, { "epoch": 0.03964609698734461, "grad_norm": 1.35414719581604, "learning_rate": 3.53e-06, "loss": 0.5125, "step": 708 }, { "epoch": 0.03970209429947363, "grad_norm": 1.272335410118103, "learning_rate": 3.535e-06, "loss": 0.4132, "step": 709 }, { "epoch": 0.039758091611602646, "grad_norm": 3.9252395629882812, "learning_rate": 3.5400000000000004e-06, "loss": 0.488, "step": 710 }, { "epoch": 0.03981408892373166, "grad_norm": 1.3578699827194214, "learning_rate": 3.5450000000000004e-06, "loss": 0.6098, "step": 711 }, { "epoch": 0.03987008623586068, "grad_norm": 1.3184001445770264, "learning_rate": 3.55e-06, "loss": 0.3705, "step": 712 }, { "epoch": 0.039926083547989696, "grad_norm": 1.26827871799469, "learning_rate": 3.555e-06, "loss": 0.4091, "step": 713 }, { "epoch": 0.039982080860118716, "grad_norm": 1.2401190996170044, "learning_rate": 3.5600000000000002e-06, "loss": 0.5061, "step": 714 }, { "epoch": 0.040038078172247735, "grad_norm": 1.6993461847305298, "learning_rate": 3.565e-06, "loss": 0.5484, "step": 715 }, { "epoch": 0.04009407548437675, "grad_norm": 1.1220078468322754, "learning_rate": 3.5700000000000005e-06, "loss": 0.5662, "step": 716 }, { "epoch": 0.040150072796505766, "grad_norm": 1.306207299232483, "learning_rate": 3.575e-06, "loss": 0.4847, "step": 717 }, { "epoch": 0.040206070108634785, "grad_norm": 1.3487942218780518, "learning_rate": 3.58e-06, "loss": 0.428, "step": 718 }, { "epoch": 0.040262067420763804, "grad_norm": 1.2893807888031006, "learning_rate": 3.585e-06, "loss": 0.439, "step": 719 }, { "epoch": 0.040318064732892823, "grad_norm": 1.2305127382278442, "learning_rate": 3.5900000000000004e-06, "loss": 0.4885, "step": 720 }, { "epoch": 0.040374062045021836, "grad_norm": 1.2074695825576782, "learning_rate": 3.5950000000000003e-06, "loss": 0.4735, "step": 721 }, { "epoch": 0.040430059357150855, "grad_norm": 1.2309809923171997, "learning_rate": 3.6e-06, "loss": 0.4212, "step": 722 }, { "epoch": 0.040486056669279874, "grad_norm": 1.4647796154022217, "learning_rate": 3.6050000000000002e-06, "loss": 0.4403, "step": 723 }, { "epoch": 0.04054205398140889, "grad_norm": 2.394265651702881, "learning_rate": 3.61e-06, "loss": 0.5968, "step": 724 }, { "epoch": 0.04059805129353791, "grad_norm": 1.4066598415374756, "learning_rate": 3.6150000000000005e-06, "loss": 0.4851, "step": 725 }, { "epoch": 0.040654048605666925, "grad_norm": 1.1987708806991577, "learning_rate": 3.6200000000000005e-06, "loss": 0.4466, "step": 726 }, { "epoch": 0.040710045917795944, "grad_norm": 1.0690430402755737, "learning_rate": 3.625e-06, "loss": 0.5593, "step": 727 }, { "epoch": 0.04076604322992496, "grad_norm": 1.1197233200073242, "learning_rate": 3.63e-06, "loss": 0.5191, "step": 728 }, { "epoch": 0.04082204054205398, "grad_norm": 1.168726921081543, "learning_rate": 3.6350000000000003e-06, "loss": 0.3816, "step": 729 }, { "epoch": 0.040878037854183, "grad_norm": 1.2563281059265137, "learning_rate": 3.6400000000000003e-06, "loss": 0.4761, "step": 730 }, { "epoch": 0.04093403516631202, "grad_norm": 1.3992208242416382, "learning_rate": 3.6450000000000007e-06, "loss": 0.5434, "step": 731 }, { "epoch": 0.04099003247844103, "grad_norm": 0.9997865557670593, "learning_rate": 3.6499999999999998e-06, "loss": 0.4953, "step": 732 }, { "epoch": 0.04104602979057005, "grad_norm": 1.3411892652511597, "learning_rate": 3.655e-06, "loss": 0.4478, "step": 733 }, { "epoch": 0.04110202710269907, "grad_norm": 6.3067450523376465, "learning_rate": 3.66e-06, "loss": 0.4111, "step": 734 }, { "epoch": 0.04115802441482809, "grad_norm": 1.3528656959533691, "learning_rate": 3.6650000000000005e-06, "loss": 0.522, "step": 735 }, { "epoch": 0.04121402172695711, "grad_norm": 1.1481785774230957, "learning_rate": 3.6700000000000004e-06, "loss": 0.4935, "step": 736 }, { "epoch": 0.04127001903908612, "grad_norm": 1.2303099632263184, "learning_rate": 3.675e-06, "loss": 0.4304, "step": 737 }, { "epoch": 0.04132601635121514, "grad_norm": 1.5268962383270264, "learning_rate": 3.68e-06, "loss": 0.4484, "step": 738 }, { "epoch": 0.04138201366334416, "grad_norm": 1.2627424001693726, "learning_rate": 3.6850000000000003e-06, "loss": 0.489, "step": 739 }, { "epoch": 0.04143801097547318, "grad_norm": 1.397262692451477, "learning_rate": 3.6900000000000002e-06, "loss": 0.5543, "step": 740 }, { "epoch": 0.0414940082876022, "grad_norm": 1.2485219240188599, "learning_rate": 3.6949999999999998e-06, "loss": 0.4055, "step": 741 }, { "epoch": 0.04155000559973121, "grad_norm": 1.1135332584381104, "learning_rate": 3.7e-06, "loss": 0.3795, "step": 742 }, { "epoch": 0.04160600291186023, "grad_norm": 1.4126280546188354, "learning_rate": 3.705e-06, "loss": 0.6212, "step": 743 }, { "epoch": 0.04166200022398925, "grad_norm": 1.4079738855361938, "learning_rate": 3.7100000000000005e-06, "loss": 0.7047, "step": 744 }, { "epoch": 0.04171799753611827, "grad_norm": 1.4724724292755127, "learning_rate": 3.7150000000000004e-06, "loss": 0.4877, "step": 745 }, { "epoch": 0.04177399484824729, "grad_norm": 1.661381721496582, "learning_rate": 3.72e-06, "loss": 0.4769, "step": 746 }, { "epoch": 0.0418299921603763, "grad_norm": 1.7881790399551392, "learning_rate": 3.725e-06, "loss": 0.563, "step": 747 }, { "epoch": 0.04188598947250532, "grad_norm": 1.3039181232452393, "learning_rate": 3.7300000000000003e-06, "loss": 0.5058, "step": 748 }, { "epoch": 0.04194198678463434, "grad_norm": 1.1592111587524414, "learning_rate": 3.7350000000000002e-06, "loss": 0.4011, "step": 749 }, { "epoch": 0.041997984096763356, "grad_norm": 1.2834731340408325, "learning_rate": 3.7400000000000006e-06, "loss": 0.4299, "step": 750 }, { "epoch": 0.042053981408892376, "grad_norm": 1.2477951049804688, "learning_rate": 3.7449999999999997e-06, "loss": 0.4805, "step": 751 }, { "epoch": 0.04210997872102139, "grad_norm": 1.5107780694961548, "learning_rate": 3.75e-06, "loss": 0.4794, "step": 752 }, { "epoch": 0.04216597603315041, "grad_norm": 1.288833737373352, "learning_rate": 3.755e-06, "loss": 0.5569, "step": 753 }, { "epoch": 0.042221973345279426, "grad_norm": 1.2187424898147583, "learning_rate": 3.7600000000000004e-06, "loss": 0.3778, "step": 754 }, { "epoch": 0.042277970657408445, "grad_norm": 1.2177382707595825, "learning_rate": 3.7650000000000004e-06, "loss": 0.4573, "step": 755 }, { "epoch": 0.042333967969537464, "grad_norm": 1.2576156854629517, "learning_rate": 3.77e-06, "loss": 0.443, "step": 756 }, { "epoch": 0.042389965281666483, "grad_norm": 1.2027933597564697, "learning_rate": 3.775e-06, "loss": 0.5188, "step": 757 }, { "epoch": 0.042445962593795496, "grad_norm": 1.3134757280349731, "learning_rate": 3.7800000000000002e-06, "loss": 0.5296, "step": 758 }, { "epoch": 0.042501959905924515, "grad_norm": 1.1032532453536987, "learning_rate": 3.785e-06, "loss": 0.4721, "step": 759 }, { "epoch": 0.042557957218053534, "grad_norm": 1.1537388563156128, "learning_rate": 3.7900000000000006e-06, "loss": 0.3652, "step": 760 }, { "epoch": 0.04261395453018255, "grad_norm": 1.1453958749771118, "learning_rate": 3.795e-06, "loss": 0.3934, "step": 761 }, { "epoch": 0.04266995184231157, "grad_norm": 1.3031656742095947, "learning_rate": 3.8e-06, "loss": 0.4986, "step": 762 }, { "epoch": 0.042725949154440584, "grad_norm": 1.8433433771133423, "learning_rate": 3.8050000000000004e-06, "loss": 0.615, "step": 763 }, { "epoch": 0.042781946466569604, "grad_norm": 1.5257457494735718, "learning_rate": 3.8100000000000004e-06, "loss": 0.5185, "step": 764 }, { "epoch": 0.04283794377869862, "grad_norm": 1.3897265195846558, "learning_rate": 3.815000000000001e-06, "loss": 0.5186, "step": 765 }, { "epoch": 0.04289394109082764, "grad_norm": 1.1871623992919922, "learning_rate": 3.82e-06, "loss": 0.4082, "step": 766 }, { "epoch": 0.04294993840295666, "grad_norm": 2.2760584354400635, "learning_rate": 3.825e-06, "loss": 0.8009, "step": 767 }, { "epoch": 0.04300593571508567, "grad_norm": 1.3228431940078735, "learning_rate": 3.830000000000001e-06, "loss": 0.4165, "step": 768 }, { "epoch": 0.04306193302721469, "grad_norm": 1.8395787477493286, "learning_rate": 3.8350000000000006e-06, "loss": 0.6306, "step": 769 }, { "epoch": 0.04311793033934371, "grad_norm": 1.3083232641220093, "learning_rate": 3.84e-06, "loss": 0.393, "step": 770 }, { "epoch": 0.04317392765147273, "grad_norm": 1.3711607456207275, "learning_rate": 3.845e-06, "loss": 0.5634, "step": 771 }, { "epoch": 0.04322992496360175, "grad_norm": 1.2909704446792603, "learning_rate": 3.85e-06, "loss": 0.4414, "step": 772 }, { "epoch": 0.04328592227573076, "grad_norm": 1.2462844848632812, "learning_rate": 3.855e-06, "loss": 0.4788, "step": 773 }, { "epoch": 0.04334191958785978, "grad_norm": 1.2240833044052124, "learning_rate": 3.86e-06, "loss": 0.5835, "step": 774 }, { "epoch": 0.0433979168999888, "grad_norm": 1.207229733467102, "learning_rate": 3.865e-06, "loss": 0.4087, "step": 775 }, { "epoch": 0.04345391421211782, "grad_norm": 1.5298811197280884, "learning_rate": 3.87e-06, "loss": 0.5086, "step": 776 }, { "epoch": 0.04350991152424684, "grad_norm": 1.5431842803955078, "learning_rate": 3.875e-06, "loss": 0.5255, "step": 777 }, { "epoch": 0.04356590883637585, "grad_norm": 1.2514537572860718, "learning_rate": 3.88e-06, "loss": 0.4644, "step": 778 }, { "epoch": 0.04362190614850487, "grad_norm": 1.1557564735412598, "learning_rate": 3.885e-06, "loss": 0.4376, "step": 779 }, { "epoch": 0.04367790346063389, "grad_norm": 1.2817264795303345, "learning_rate": 3.89e-06, "loss": 0.5082, "step": 780 }, { "epoch": 0.04373390077276291, "grad_norm": 1.2424750328063965, "learning_rate": 3.895e-06, "loss": 0.506, "step": 781 }, { "epoch": 0.04378989808489193, "grad_norm": 1.014085054397583, "learning_rate": 3.9e-06, "loss": 0.5267, "step": 782 }, { "epoch": 0.04384589539702094, "grad_norm": 1.3631815910339355, "learning_rate": 3.905000000000001e-06, "loss": 0.4526, "step": 783 }, { "epoch": 0.04390189270914996, "grad_norm": 1.462369441986084, "learning_rate": 3.910000000000001e-06, "loss": 0.3593, "step": 784 }, { "epoch": 0.04395789002127898, "grad_norm": 1.1282445192337036, "learning_rate": 3.915e-06, "loss": 0.4777, "step": 785 }, { "epoch": 0.044013887333408, "grad_norm": 1.2128945589065552, "learning_rate": 3.92e-06, "loss": 0.3664, "step": 786 }, { "epoch": 0.044069884645537016, "grad_norm": 1.27531099319458, "learning_rate": 3.9250000000000005e-06, "loss": 0.5199, "step": 787 }, { "epoch": 0.044125881957666035, "grad_norm": 1.6103917360305786, "learning_rate": 3.9300000000000005e-06, "loss": 0.7764, "step": 788 }, { "epoch": 0.04418187926979505, "grad_norm": 1.2535672187805176, "learning_rate": 3.9350000000000004e-06, "loss": 0.5684, "step": 789 }, { "epoch": 0.04423787658192407, "grad_norm": 1.2681461572647095, "learning_rate": 3.9399999999999995e-06, "loss": 0.4236, "step": 790 }, { "epoch": 0.044293873894053086, "grad_norm": 1.3095355033874512, "learning_rate": 3.945e-06, "loss": 0.5838, "step": 791 }, { "epoch": 0.044349871206182105, "grad_norm": 1.3122279644012451, "learning_rate": 3.95e-06, "loss": 0.4812, "step": 792 }, { "epoch": 0.044405868518311124, "grad_norm": 1.420548915863037, "learning_rate": 3.955e-06, "loss": 0.512, "step": 793 }, { "epoch": 0.044461865830440137, "grad_norm": 1.1349029541015625, "learning_rate": 3.96e-06, "loss": 0.3778, "step": 794 }, { "epoch": 0.044517863142569156, "grad_norm": 1.3369938135147095, "learning_rate": 3.965e-06, "loss": 0.4454, "step": 795 }, { "epoch": 0.044573860454698175, "grad_norm": 1.3689488172531128, "learning_rate": 3.97e-06, "loss": 0.4819, "step": 796 }, { "epoch": 0.044629857766827194, "grad_norm": 6.419103145599365, "learning_rate": 3.975e-06, "loss": 0.5101, "step": 797 }, { "epoch": 0.04468585507895621, "grad_norm": 1.4806280136108398, "learning_rate": 3.98e-06, "loss": 0.6097, "step": 798 }, { "epoch": 0.044741852391085225, "grad_norm": 1.527851939201355, "learning_rate": 3.985e-06, "loss": 0.6501, "step": 799 }, { "epoch": 0.044797849703214244, "grad_norm": 1.3300751447677612, "learning_rate": 3.99e-06, "loss": 0.4639, "step": 800 }, { "epoch": 0.044853847015343264, "grad_norm": 1.3300977945327759, "learning_rate": 3.995e-06, "loss": 0.4535, "step": 801 }, { "epoch": 0.04490984432747228, "grad_norm": 1.2579883337020874, "learning_rate": 4.000000000000001e-06, "loss": 0.5186, "step": 802 }, { "epoch": 0.0449658416396013, "grad_norm": 1.0359623432159424, "learning_rate": 4.005000000000001e-06, "loss": 0.4602, "step": 803 }, { "epoch": 0.045021838951730314, "grad_norm": 1.2562466859817505, "learning_rate": 4.01e-06, "loss": 0.6528, "step": 804 }, { "epoch": 0.04507783626385933, "grad_norm": 1.155367374420166, "learning_rate": 4.015e-06, "loss": 0.4138, "step": 805 }, { "epoch": 0.04513383357598835, "grad_norm": 1.1500927209854126, "learning_rate": 4.0200000000000005e-06, "loss": 0.4477, "step": 806 }, { "epoch": 0.04518983088811737, "grad_norm": 1.2277973890304565, "learning_rate": 4.0250000000000004e-06, "loss": 0.4754, "step": 807 }, { "epoch": 0.04524582820024639, "grad_norm": 1.1716548204421997, "learning_rate": 4.03e-06, "loss": 0.3876, "step": 808 }, { "epoch": 0.0453018255123754, "grad_norm": 1.2428687810897827, "learning_rate": 4.0349999999999995e-06, "loss": 0.4257, "step": 809 }, { "epoch": 0.04535782282450442, "grad_norm": 1.1642229557037354, "learning_rate": 4.04e-06, "loss": 0.4966, "step": 810 }, { "epoch": 0.04541382013663344, "grad_norm": 1.3695844411849976, "learning_rate": 4.045e-06, "loss": 0.4443, "step": 811 }, { "epoch": 0.04546981744876246, "grad_norm": 1.215888500213623, "learning_rate": 4.05e-06, "loss": 0.4461, "step": 812 }, { "epoch": 0.04552581476089148, "grad_norm": 1.8594948053359985, "learning_rate": 4.055e-06, "loss": 0.5258, "step": 813 }, { "epoch": 0.04558181207302049, "grad_norm": 1.3100166320800781, "learning_rate": 4.06e-06, "loss": 0.4898, "step": 814 }, { "epoch": 0.04563780938514951, "grad_norm": 1.0542774200439453, "learning_rate": 4.065e-06, "loss": 0.3433, "step": 815 }, { "epoch": 0.04569380669727853, "grad_norm": 1.1762754917144775, "learning_rate": 4.07e-06, "loss": 0.4285, "step": 816 }, { "epoch": 0.04574980400940755, "grad_norm": 1.7509896755218506, "learning_rate": 4.075e-06, "loss": 0.7001, "step": 817 }, { "epoch": 0.04580580132153657, "grad_norm": 2.022645950317383, "learning_rate": 4.080000000000001e-06, "loss": 0.4217, "step": 818 }, { "epoch": 0.04586179863366559, "grad_norm": 1.2015148401260376, "learning_rate": 4.085e-06, "loss": 0.4548, "step": 819 }, { "epoch": 0.0459177959457946, "grad_norm": 1.2890264987945557, "learning_rate": 4.09e-06, "loss": 0.4291, "step": 820 }, { "epoch": 0.04597379325792362, "grad_norm": 1.2706619501113892, "learning_rate": 4.095000000000001e-06, "loss": 0.4252, "step": 821 }, { "epoch": 0.04602979057005264, "grad_norm": 1.3321216106414795, "learning_rate": 4.1000000000000006e-06, "loss": 0.4343, "step": 822 }, { "epoch": 0.04608578788218166, "grad_norm": 1.4125432968139648, "learning_rate": 4.1050000000000005e-06, "loss": 0.6235, "step": 823 }, { "epoch": 0.046141785194310676, "grad_norm": 1.304635763168335, "learning_rate": 4.11e-06, "loss": 0.6007, "step": 824 }, { "epoch": 0.04619778250643969, "grad_norm": 1.1754714250564575, "learning_rate": 4.115e-06, "loss": 0.4203, "step": 825 }, { "epoch": 0.04625377981856871, "grad_norm": 1.2974853515625, "learning_rate": 4.12e-06, "loss": 0.6425, "step": 826 }, { "epoch": 0.04630977713069773, "grad_norm": 1.3489078283309937, "learning_rate": 4.125e-06, "loss": 0.5505, "step": 827 }, { "epoch": 0.046365774442826746, "grad_norm": 1.4762914180755615, "learning_rate": 4.13e-06, "loss": 0.4373, "step": 828 }, { "epoch": 0.046421771754955765, "grad_norm": 1.038651943206787, "learning_rate": 4.135e-06, "loss": 0.4102, "step": 829 }, { "epoch": 0.04647776906708478, "grad_norm": 1.2074174880981445, "learning_rate": 4.14e-06, "loss": 0.557, "step": 830 }, { "epoch": 0.046533766379213796, "grad_norm": 1.2075752019882202, "learning_rate": 4.145e-06, "loss": 0.4114, "step": 831 }, { "epoch": 0.046589763691342816, "grad_norm": 1.296036720275879, "learning_rate": 4.15e-06, "loss": 0.5151, "step": 832 }, { "epoch": 0.046645761003471835, "grad_norm": 1.5932459831237793, "learning_rate": 4.155e-06, "loss": 0.6072, "step": 833 }, { "epoch": 0.046701758315600854, "grad_norm": 1.1555140018463135, "learning_rate": 4.16e-06, "loss": 0.4633, "step": 834 }, { "epoch": 0.046757755627729866, "grad_norm": 1.7015200853347778, "learning_rate": 4.165e-06, "loss": 0.5737, "step": 835 }, { "epoch": 0.046813752939858885, "grad_norm": 1.2817109823226929, "learning_rate": 4.17e-06, "loss": 0.6031, "step": 836 }, { "epoch": 0.046869750251987904, "grad_norm": 1.0388692617416382, "learning_rate": 4.175000000000001e-06, "loss": 0.3362, "step": 837 }, { "epoch": 0.046925747564116924, "grad_norm": 1.156796932220459, "learning_rate": 4.18e-06, "loss": 0.4485, "step": 838 }, { "epoch": 0.04698174487624594, "grad_norm": 1.2893918752670288, "learning_rate": 4.185e-06, "loss": 0.5426, "step": 839 }, { "epoch": 0.047037742188374955, "grad_norm": 1.2383168935775757, "learning_rate": 4.1900000000000005e-06, "loss": 0.4145, "step": 840 }, { "epoch": 0.047093739500503974, "grad_norm": 1.4720534086227417, "learning_rate": 4.1950000000000005e-06, "loss": 0.5713, "step": 841 }, { "epoch": 0.04714973681263299, "grad_norm": 5.215790748596191, "learning_rate": 4.2000000000000004e-06, "loss": 0.4335, "step": 842 }, { "epoch": 0.04720573412476201, "grad_norm": 1.6487631797790527, "learning_rate": 4.2049999999999996e-06, "loss": 0.466, "step": 843 }, { "epoch": 0.04726173143689103, "grad_norm": 1.4052884578704834, "learning_rate": 4.21e-06, "loss": 0.5244, "step": 844 }, { "epoch": 0.047317728749020044, "grad_norm": 1.196442723274231, "learning_rate": 4.215e-06, "loss": 0.5315, "step": 845 }, { "epoch": 0.04737372606114906, "grad_norm": 1.341271996498108, "learning_rate": 4.22e-06, "loss": 0.5769, "step": 846 }, { "epoch": 0.04742972337327808, "grad_norm": 1.5218042135238647, "learning_rate": 4.225e-06, "loss": 0.5436, "step": 847 }, { "epoch": 0.0474857206854071, "grad_norm": 1.12349271774292, "learning_rate": 4.23e-06, "loss": 0.4701, "step": 848 }, { "epoch": 0.04754171799753612, "grad_norm": 1.3710609674453735, "learning_rate": 4.235e-06, "loss": 0.5771, "step": 849 }, { "epoch": 0.04759771530966514, "grad_norm": 1.0842112302780151, "learning_rate": 4.24e-06, "loss": 0.3687, "step": 850 }, { "epoch": 0.04765371262179415, "grad_norm": 1.6881595849990845, "learning_rate": 4.245e-06, "loss": 0.5157, "step": 851 }, { "epoch": 0.04770970993392317, "grad_norm": 5.3729047775268555, "learning_rate": 4.250000000000001e-06, "loss": 0.5579, "step": 852 }, { "epoch": 0.04776570724605219, "grad_norm": 1.4211618900299072, "learning_rate": 4.255e-06, "loss": 0.288, "step": 853 }, { "epoch": 0.04782170455818121, "grad_norm": 1.1432451009750366, "learning_rate": 4.26e-06, "loss": 0.6627, "step": 854 }, { "epoch": 0.04787770187031023, "grad_norm": 1.4285887479782104, "learning_rate": 4.265e-06, "loss": 0.5766, "step": 855 }, { "epoch": 0.04793369918243924, "grad_norm": 1.2693086862564087, "learning_rate": 4.270000000000001e-06, "loss": 0.5203, "step": 856 }, { "epoch": 0.04798969649456826, "grad_norm": 1.0911688804626465, "learning_rate": 4.2750000000000006e-06, "loss": 0.373, "step": 857 }, { "epoch": 0.04804569380669728, "grad_norm": 1.2234435081481934, "learning_rate": 4.28e-06, "loss": 0.4711, "step": 858 }, { "epoch": 0.0481016911188263, "grad_norm": 1.1317834854125977, "learning_rate": 4.2850000000000005e-06, "loss": 0.3715, "step": 859 }, { "epoch": 0.04815768843095532, "grad_norm": 1.4064030647277832, "learning_rate": 4.2900000000000004e-06, "loss": 0.4611, "step": 860 }, { "epoch": 0.04821368574308433, "grad_norm": 1.5491019487380981, "learning_rate": 4.295e-06, "loss": 0.4954, "step": 861 }, { "epoch": 0.04826968305521335, "grad_norm": 1.1596893072128296, "learning_rate": 4.2999999999999995e-06, "loss": 0.4147, "step": 862 }, { "epoch": 0.04832568036734237, "grad_norm": 1.39393150806427, "learning_rate": 4.305e-06, "loss": 0.4504, "step": 863 }, { "epoch": 0.04838167767947139, "grad_norm": 0.9863626956939697, "learning_rate": 4.31e-06, "loss": 0.4018, "step": 864 }, { "epoch": 0.048437674991600406, "grad_norm": 1.2500860691070557, "learning_rate": 4.315e-06, "loss": 0.3177, "step": 865 }, { "epoch": 0.04849367230372942, "grad_norm": 1.2027941942214966, "learning_rate": 4.32e-06, "loss": 0.5849, "step": 866 }, { "epoch": 0.04854966961585844, "grad_norm": 1.3106149435043335, "learning_rate": 4.325e-06, "loss": 0.3912, "step": 867 }, { "epoch": 0.048605666927987456, "grad_norm": 2.14656400680542, "learning_rate": 4.33e-06, "loss": 0.5411, "step": 868 }, { "epoch": 0.048661664240116476, "grad_norm": 1.165299415588379, "learning_rate": 4.335e-06, "loss": 0.3608, "step": 869 }, { "epoch": 0.048717661552245495, "grad_norm": 1.5042529106140137, "learning_rate": 4.34e-06, "loss": 0.3919, "step": 870 }, { "epoch": 0.04877365886437451, "grad_norm": 1.0934454202651978, "learning_rate": 4.345000000000001e-06, "loss": 0.3231, "step": 871 }, { "epoch": 0.048829656176503526, "grad_norm": 1.1957106590270996, "learning_rate": 4.35e-06, "loss": 0.5768, "step": 872 }, { "epoch": 0.048885653488632545, "grad_norm": 1.435402750968933, "learning_rate": 4.355e-06, "loss": 0.5095, "step": 873 }, { "epoch": 0.048941650800761564, "grad_norm": 1.366929531097412, "learning_rate": 4.360000000000001e-06, "loss": 0.4974, "step": 874 }, { "epoch": 0.048997648112890584, "grad_norm": 1.3479841947555542, "learning_rate": 4.3650000000000006e-06, "loss": 0.4395, "step": 875 }, { "epoch": 0.049053645425019596, "grad_norm": 1.4555615186691284, "learning_rate": 4.3700000000000005e-06, "loss": 0.7476, "step": 876 }, { "epoch": 0.049109642737148615, "grad_norm": 1.3223801851272583, "learning_rate": 4.375e-06, "loss": 0.4592, "step": 877 }, { "epoch": 0.049165640049277634, "grad_norm": 1.015773057937622, "learning_rate": 4.38e-06, "loss": 0.3394, "step": 878 }, { "epoch": 0.04922163736140665, "grad_norm": 1.224155068397522, "learning_rate": 4.385e-06, "loss": 0.4811, "step": 879 }, { "epoch": 0.04927763467353567, "grad_norm": 1.174251675605774, "learning_rate": 4.39e-06, "loss": 0.3923, "step": 880 }, { "epoch": 0.04933363198566469, "grad_norm": 1.5335127115249634, "learning_rate": 4.395e-06, "loss": 0.531, "step": 881 }, { "epoch": 0.049389629297793704, "grad_norm": 1.2524313926696777, "learning_rate": 4.4e-06, "loss": 0.5577, "step": 882 }, { "epoch": 0.04944562660992272, "grad_norm": 1.1989326477050781, "learning_rate": 4.405e-06, "loss": 0.5136, "step": 883 }, { "epoch": 0.04950162392205174, "grad_norm": 1.1970378160476685, "learning_rate": 4.41e-06, "loss": 0.3874, "step": 884 }, { "epoch": 0.04955762123418076, "grad_norm": 1.2088544368743896, "learning_rate": 4.415e-06, "loss": 0.4222, "step": 885 }, { "epoch": 0.04961361854630978, "grad_norm": 1.228928565979004, "learning_rate": 4.420000000000001e-06, "loss": 0.4155, "step": 886 }, { "epoch": 0.04966961585843879, "grad_norm": 1.324442744255066, "learning_rate": 4.425e-06, "loss": 0.4051, "step": 887 }, { "epoch": 0.04972561317056781, "grad_norm": 1.2726175785064697, "learning_rate": 4.43e-06, "loss": 0.3889, "step": 888 }, { "epoch": 0.04978161048269683, "grad_norm": 1.198268175125122, "learning_rate": 4.435e-06, "loss": 0.4054, "step": 889 }, { "epoch": 0.04983760779482585, "grad_norm": 1.3631871938705444, "learning_rate": 4.440000000000001e-06, "loss": 0.5341, "step": 890 }, { "epoch": 0.04989360510695487, "grad_norm": 1.1524522304534912, "learning_rate": 4.445000000000001e-06, "loss": 0.584, "step": 891 }, { "epoch": 0.04994960241908388, "grad_norm": 1.4735164642333984, "learning_rate": 4.45e-06, "loss": 0.5243, "step": 892 }, { "epoch": 0.0500055997312129, "grad_norm": 1.1556566953659058, "learning_rate": 4.4550000000000005e-06, "loss": 0.3543, "step": 893 }, { "epoch": 0.05006159704334192, "grad_norm": 1.2251462936401367, "learning_rate": 4.4600000000000005e-06, "loss": 0.4228, "step": 894 }, { "epoch": 0.05011759435547094, "grad_norm": 1.2216525077819824, "learning_rate": 4.4650000000000004e-06, "loss": 0.5462, "step": 895 }, { "epoch": 0.05017359166759996, "grad_norm": 3.547142744064331, "learning_rate": 4.4699999999999996e-06, "loss": 0.7819, "step": 896 }, { "epoch": 0.05022958897972897, "grad_norm": 1.2285093069076538, "learning_rate": 4.475e-06, "loss": 0.6265, "step": 897 }, { "epoch": 0.05028558629185799, "grad_norm": 1.074223518371582, "learning_rate": 4.48e-06, "loss": 0.3546, "step": 898 }, { "epoch": 0.05034158360398701, "grad_norm": 1.1250219345092773, "learning_rate": 4.485e-06, "loss": 0.42, "step": 899 }, { "epoch": 0.05039758091611603, "grad_norm": 1.5750139951705933, "learning_rate": 4.49e-06, "loss": 0.5449, "step": 900 }, { "epoch": 0.05045357822824505, "grad_norm": 1.213463306427002, "learning_rate": 4.495e-06, "loss": 0.4779, "step": 901 }, { "epoch": 0.05050957554037406, "grad_norm": 1.3777439594268799, "learning_rate": 4.5e-06, "loss": 0.4854, "step": 902 }, { "epoch": 0.05056557285250308, "grad_norm": 1.180437445640564, "learning_rate": 4.505e-06, "loss": 0.5171, "step": 903 }, { "epoch": 0.0506215701646321, "grad_norm": 1.2369575500488281, "learning_rate": 4.51e-06, "loss": 0.5179, "step": 904 }, { "epoch": 0.050677567476761116, "grad_norm": 1.1850531101226807, "learning_rate": 4.515000000000001e-06, "loss": 0.4336, "step": 905 }, { "epoch": 0.050733564788890136, "grad_norm": 1.65920889377594, "learning_rate": 4.52e-06, "loss": 0.4392, "step": 906 }, { "epoch": 0.05078956210101915, "grad_norm": 1.313531517982483, "learning_rate": 4.525e-06, "loss": 0.4201, "step": 907 }, { "epoch": 0.05084555941314817, "grad_norm": 1.7814490795135498, "learning_rate": 4.53e-06, "loss": 0.424, "step": 908 }, { "epoch": 0.050901556725277186, "grad_norm": 1.093064785003662, "learning_rate": 4.535000000000001e-06, "loss": 0.4134, "step": 909 }, { "epoch": 0.050957554037406205, "grad_norm": 1.2600054740905762, "learning_rate": 4.540000000000001e-06, "loss": 0.4557, "step": 910 }, { "epoch": 0.051013551349535224, "grad_norm": 1.131227970123291, "learning_rate": 4.545e-06, "loss": 0.4015, "step": 911 }, { "epoch": 0.051069548661664244, "grad_norm": 1.0976067781448364, "learning_rate": 4.5500000000000005e-06, "loss": 0.4971, "step": 912 }, { "epoch": 0.051125545973793256, "grad_norm": 1.1147174835205078, "learning_rate": 4.5550000000000004e-06, "loss": 0.3695, "step": 913 }, { "epoch": 0.051181543285922275, "grad_norm": 1.4934289455413818, "learning_rate": 4.56e-06, "loss": 0.6644, "step": 914 }, { "epoch": 0.051237540598051294, "grad_norm": 1.0716238021850586, "learning_rate": 4.565e-06, "loss": 0.3123, "step": 915 }, { "epoch": 0.05129353791018031, "grad_norm": 1.3327045440673828, "learning_rate": 4.57e-06, "loss": 0.5055, "step": 916 }, { "epoch": 0.05134953522230933, "grad_norm": 1.1724621057510376, "learning_rate": 4.575e-06, "loss": 0.5311, "step": 917 }, { "epoch": 0.051405532534438345, "grad_norm": 1.1683303117752075, "learning_rate": 4.58e-06, "loss": 0.5039, "step": 918 }, { "epoch": 0.051461529846567364, "grad_norm": 1.2588891983032227, "learning_rate": 4.585e-06, "loss": 0.5484, "step": 919 }, { "epoch": 0.05151752715869638, "grad_norm": 1.3223018646240234, "learning_rate": 4.590000000000001e-06, "loss": 0.5563, "step": 920 }, { "epoch": 0.0515735244708254, "grad_norm": 1.3173508644104004, "learning_rate": 4.595e-06, "loss": 0.466, "step": 921 }, { "epoch": 0.05162952178295442, "grad_norm": 1.736454963684082, "learning_rate": 4.6e-06, "loss": 0.5395, "step": 922 }, { "epoch": 0.05168551909508343, "grad_norm": 1.2006267309188843, "learning_rate": 4.605e-06, "loss": 0.5065, "step": 923 }, { "epoch": 0.05174151640721245, "grad_norm": 1.3227877616882324, "learning_rate": 4.610000000000001e-06, "loss": 0.4938, "step": 924 }, { "epoch": 0.05179751371934147, "grad_norm": 1.1532106399536133, "learning_rate": 4.615e-06, "loss": 0.6579, "step": 925 }, { "epoch": 0.05185351103147049, "grad_norm": 1.1137820482254028, "learning_rate": 4.62e-06, "loss": 0.4266, "step": 926 }, { "epoch": 0.05190950834359951, "grad_norm": 1.6235989332199097, "learning_rate": 4.625e-06, "loss": 0.4395, "step": 927 }, { "epoch": 0.05196550565572852, "grad_norm": 1.167560338973999, "learning_rate": 4.6300000000000006e-06, "loss": 0.3794, "step": 928 }, { "epoch": 0.05202150296785754, "grad_norm": 1.2593872547149658, "learning_rate": 4.6350000000000005e-06, "loss": 0.4781, "step": 929 }, { "epoch": 0.05207750027998656, "grad_norm": 1.4556719064712524, "learning_rate": 4.64e-06, "loss": 0.5134, "step": 930 }, { "epoch": 0.05213349759211558, "grad_norm": 1.1882364749908447, "learning_rate": 4.645e-06, "loss": 0.4005, "step": 931 }, { "epoch": 0.0521894949042446, "grad_norm": 1.14823579788208, "learning_rate": 4.65e-06, "loss": 0.4556, "step": 932 }, { "epoch": 0.05224549221637361, "grad_norm": 1.373916506767273, "learning_rate": 4.655e-06, "loss": 0.4299, "step": 933 }, { "epoch": 0.05230148952850263, "grad_norm": 1.1341737508773804, "learning_rate": 4.66e-06, "loss": 0.4239, "step": 934 }, { "epoch": 0.05235748684063165, "grad_norm": 1.2210662364959717, "learning_rate": 4.665e-06, "loss": 0.5539, "step": 935 }, { "epoch": 0.05241348415276067, "grad_norm": 1.1791493892669678, "learning_rate": 4.67e-06, "loss": 0.4105, "step": 936 }, { "epoch": 0.05246948146488969, "grad_norm": 1.2142149209976196, "learning_rate": 4.675e-06, "loss": 0.4108, "step": 937 }, { "epoch": 0.0525254787770187, "grad_norm": 1.3496472835540771, "learning_rate": 4.68e-06, "loss": 0.396, "step": 938 }, { "epoch": 0.05258147608914772, "grad_norm": 1.1289809942245483, "learning_rate": 4.685000000000001e-06, "loss": 0.3934, "step": 939 }, { "epoch": 0.05263747340127674, "grad_norm": 1.210899829864502, "learning_rate": 4.69e-06, "loss": 0.4609, "step": 940 }, { "epoch": 0.05269347071340576, "grad_norm": 1.090699553489685, "learning_rate": 4.695e-06, "loss": 0.5039, "step": 941 }, { "epoch": 0.052749468025534776, "grad_norm": 1.3170802593231201, "learning_rate": 4.7e-06, "loss": 0.498, "step": 942 }, { "epoch": 0.052805465337663796, "grad_norm": 1.2018908262252808, "learning_rate": 4.705000000000001e-06, "loss": 0.393, "step": 943 }, { "epoch": 0.05286146264979281, "grad_norm": 1.3873302936553955, "learning_rate": 4.710000000000001e-06, "loss": 0.478, "step": 944 }, { "epoch": 0.05291745996192183, "grad_norm": 1.3100378513336182, "learning_rate": 4.715e-06, "loss": 0.4197, "step": 945 }, { "epoch": 0.052973457274050846, "grad_norm": 1.6671725511550903, "learning_rate": 4.72e-06, "loss": 0.6221, "step": 946 }, { "epoch": 0.053029454586179865, "grad_norm": 1.6387559175491333, "learning_rate": 4.7250000000000005e-06, "loss": 0.5495, "step": 947 }, { "epoch": 0.053085451898308884, "grad_norm": 1.2692862749099731, "learning_rate": 4.7300000000000005e-06, "loss": 0.4587, "step": 948 }, { "epoch": 0.0531414492104379, "grad_norm": 1.299742341041565, "learning_rate": 4.735e-06, "loss": 0.4027, "step": 949 }, { "epoch": 0.053197446522566916, "grad_norm": 1.3480663299560547, "learning_rate": 4.74e-06, "loss": 0.4369, "step": 950 }, { "epoch": 0.053253443834695935, "grad_norm": 1.2166800498962402, "learning_rate": 4.745e-06, "loss": 0.5011, "step": 951 }, { "epoch": 0.053309441146824954, "grad_norm": 1.2832262516021729, "learning_rate": 4.75e-06, "loss": 0.402, "step": 952 }, { "epoch": 0.05336543845895397, "grad_norm": 1.4792896509170532, "learning_rate": 4.755e-06, "loss": 0.5747, "step": 953 }, { "epoch": 0.053421435771082985, "grad_norm": 1.568776249885559, "learning_rate": 4.76e-06, "loss": 0.4724, "step": 954 }, { "epoch": 0.053477433083212005, "grad_norm": 1.1982817649841309, "learning_rate": 4.765e-06, "loss": 0.5061, "step": 955 }, { "epoch": 0.053533430395341024, "grad_norm": 1.3185970783233643, "learning_rate": 4.77e-06, "loss": 0.5286, "step": 956 }, { "epoch": 0.05358942770747004, "grad_norm": 1.147926688194275, "learning_rate": 4.775e-06, "loss": 0.3612, "step": 957 }, { "epoch": 0.05364542501959906, "grad_norm": 1.26372230052948, "learning_rate": 4.780000000000001e-06, "loss": 0.5493, "step": 958 }, { "epoch": 0.053701422331728074, "grad_norm": 1.3605045080184937, "learning_rate": 4.785e-06, "loss": 0.6715, "step": 959 }, { "epoch": 0.05375741964385709, "grad_norm": 1.3014461994171143, "learning_rate": 4.79e-06, "loss": 0.4488, "step": 960 }, { "epoch": 0.05381341695598611, "grad_norm": 1.37398362159729, "learning_rate": 4.795e-06, "loss": 0.4222, "step": 961 }, { "epoch": 0.05386941426811513, "grad_norm": 1.230643391609192, "learning_rate": 4.800000000000001e-06, "loss": 0.4864, "step": 962 }, { "epoch": 0.05392541158024415, "grad_norm": 1.095284104347229, "learning_rate": 4.805000000000001e-06, "loss": 0.5956, "step": 963 }, { "epoch": 0.05398140889237316, "grad_norm": 1.1946420669555664, "learning_rate": 4.81e-06, "loss": 0.4388, "step": 964 }, { "epoch": 0.05403740620450218, "grad_norm": 1.0814542770385742, "learning_rate": 4.8150000000000005e-06, "loss": 0.4001, "step": 965 }, { "epoch": 0.0540934035166312, "grad_norm": 1.1272335052490234, "learning_rate": 4.8200000000000004e-06, "loss": 0.4352, "step": 966 }, { "epoch": 0.05414940082876022, "grad_norm": 1.2352651357650757, "learning_rate": 4.825e-06, "loss": 0.4212, "step": 967 }, { "epoch": 0.05420539814088924, "grad_norm": 1.3018606901168823, "learning_rate": 4.83e-06, "loss": 0.429, "step": 968 }, { "epoch": 0.05426139545301825, "grad_norm": 1.3863848447799683, "learning_rate": 4.835e-06, "loss": 0.5049, "step": 969 }, { "epoch": 0.05431739276514727, "grad_norm": 1.1036901473999023, "learning_rate": 4.84e-06, "loss": 0.4213, "step": 970 }, { "epoch": 0.05437339007727629, "grad_norm": 1.420832633972168, "learning_rate": 4.845e-06, "loss": 0.42, "step": 971 }, { "epoch": 0.05442938738940531, "grad_norm": 1.2023344039916992, "learning_rate": 4.85e-06, "loss": 0.5156, "step": 972 }, { "epoch": 0.05448538470153433, "grad_norm": 1.184357762336731, "learning_rate": 4.855e-06, "loss": 0.4431, "step": 973 }, { "epoch": 0.05454138201366335, "grad_norm": 1.2339346408843994, "learning_rate": 4.86e-06, "loss": 0.4456, "step": 974 }, { "epoch": 0.05459737932579236, "grad_norm": 1.1052863597869873, "learning_rate": 4.865e-06, "loss": 0.3762, "step": 975 }, { "epoch": 0.05465337663792138, "grad_norm": 1.15488600730896, "learning_rate": 4.87e-06, "loss": 0.4208, "step": 976 }, { "epoch": 0.0547093739500504, "grad_norm": 1.5858561992645264, "learning_rate": 4.875000000000001e-06, "loss": 0.4824, "step": 977 }, { "epoch": 0.05476537126217942, "grad_norm": 1.2476688623428345, "learning_rate": 4.880000000000001e-06, "loss": 0.6081, "step": 978 }, { "epoch": 0.054821368574308436, "grad_norm": 0.9850094318389893, "learning_rate": 4.885e-06, "loss": 0.4135, "step": 979 }, { "epoch": 0.05487736588643745, "grad_norm": 1.3759442567825317, "learning_rate": 4.89e-06, "loss": 0.5223, "step": 980 }, { "epoch": 0.05493336319856647, "grad_norm": 1.2599742412567139, "learning_rate": 4.8950000000000006e-06, "loss": 0.5832, "step": 981 }, { "epoch": 0.05498936051069549, "grad_norm": 2.0648694038391113, "learning_rate": 4.9000000000000005e-06, "loss": 0.8202, "step": 982 }, { "epoch": 0.055045357822824506, "grad_norm": 1.2609869241714478, "learning_rate": 4.9050000000000005e-06, "loss": 0.4878, "step": 983 }, { "epoch": 0.055101355134953525, "grad_norm": 1.1653976440429688, "learning_rate": 4.9100000000000004e-06, "loss": 0.4716, "step": 984 }, { "epoch": 0.05515735244708254, "grad_norm": 1.4410059452056885, "learning_rate": 4.915e-06, "loss": 0.5038, "step": 985 }, { "epoch": 0.05521334975921156, "grad_norm": 1.3019509315490723, "learning_rate": 4.92e-06, "loss": 0.417, "step": 986 }, { "epoch": 0.055269347071340576, "grad_norm": 1.4015498161315918, "learning_rate": 4.925e-06, "loss": 0.6574, "step": 987 }, { "epoch": 0.055325344383469595, "grad_norm": 1.9603328704833984, "learning_rate": 4.93e-06, "loss": 0.5267, "step": 988 }, { "epoch": 0.055381341695598614, "grad_norm": 1.3965656757354736, "learning_rate": 4.935e-06, "loss": 0.4231, "step": 989 }, { "epoch": 0.055437339007727626, "grad_norm": 1.1022210121154785, "learning_rate": 4.94e-06, "loss": 0.4136, "step": 990 }, { "epoch": 0.055493336319856645, "grad_norm": 1.0762064456939697, "learning_rate": 4.945e-06, "loss": 0.4043, "step": 991 }, { "epoch": 0.055549333631985665, "grad_norm": 1.375928282737732, "learning_rate": 4.950000000000001e-06, "loss": 0.4881, "step": 992 }, { "epoch": 0.055605330944114684, "grad_norm": 1.385292649269104, "learning_rate": 4.955e-06, "loss": 0.4528, "step": 993 }, { "epoch": 0.0556613282562437, "grad_norm": 1.17954683303833, "learning_rate": 4.96e-06, "loss": 0.4497, "step": 994 }, { "epoch": 0.055717325568372715, "grad_norm": 1.412698745727539, "learning_rate": 4.965e-06, "loss": 0.4283, "step": 995 }, { "epoch": 0.055773322880501734, "grad_norm": 1.191758155822754, "learning_rate": 4.970000000000001e-06, "loss": 0.3965, "step": 996 }, { "epoch": 0.05582932019263075, "grad_norm": 6.146461486816406, "learning_rate": 4.975000000000001e-06, "loss": 0.5264, "step": 997 }, { "epoch": 0.05588531750475977, "grad_norm": 1.2692173719406128, "learning_rate": 4.98e-06, "loss": 0.4529, "step": 998 }, { "epoch": 0.05594131481688879, "grad_norm": 1.5443811416625977, "learning_rate": 4.985e-06, "loss": 0.4728, "step": 999 }, { "epoch": 0.055997312129017804, "grad_norm": 1.2640265226364136, "learning_rate": 4.9900000000000005e-06, "loss": 0.5565, "step": 1000 }, { "epoch": 0.05605330944114682, "grad_norm": 1.2944355010986328, "learning_rate": 4.9950000000000005e-06, "loss": 0.5138, "step": 1001 }, { "epoch": 0.05610930675327584, "grad_norm": 1.1413021087646484, "learning_rate": 5e-06, "loss": 0.4785, "step": 1002 }, { "epoch": 0.05616530406540486, "grad_norm": 1.3369749784469604, "learning_rate": 5.005e-06, "loss": 0.4315, "step": 1003 }, { "epoch": 0.05622130137753388, "grad_norm": 1.085232138633728, "learning_rate": 5.01e-06, "loss": 0.3798, "step": 1004 }, { "epoch": 0.0562772986896629, "grad_norm": 1.620424747467041, "learning_rate": 5.015e-06, "loss": 0.4397, "step": 1005 }, { "epoch": 0.05633329600179191, "grad_norm": 1.3571701049804688, "learning_rate": 5.02e-06, "loss": 0.5498, "step": 1006 }, { "epoch": 0.05638929331392093, "grad_norm": 1.3521087169647217, "learning_rate": 5.025e-06, "loss": 0.3857, "step": 1007 }, { "epoch": 0.05644529062604995, "grad_norm": 1.1394175291061401, "learning_rate": 5.03e-06, "loss": 0.5012, "step": 1008 }, { "epoch": 0.05650128793817897, "grad_norm": 1.5971336364746094, "learning_rate": 5.035e-06, "loss": 0.3897, "step": 1009 }, { "epoch": 0.05655728525030799, "grad_norm": 1.226791262626648, "learning_rate": 5.04e-06, "loss": 0.4669, "step": 1010 }, { "epoch": 0.056613282562437, "grad_norm": 1.140771746635437, "learning_rate": 5.045000000000001e-06, "loss": 0.4582, "step": 1011 }, { "epoch": 0.05666927987456602, "grad_norm": 1.2240629196166992, "learning_rate": 5.050000000000001e-06, "loss": 0.5498, "step": 1012 }, { "epoch": 0.05672527718669504, "grad_norm": 1.2744507789611816, "learning_rate": 5.055e-06, "loss": 0.5201, "step": 1013 }, { "epoch": 0.05678127449882406, "grad_norm": 1.2416425943374634, "learning_rate": 5.06e-06, "loss": 0.455, "step": 1014 }, { "epoch": 0.05683727181095308, "grad_norm": 1.136209487915039, "learning_rate": 5.065000000000001e-06, "loss": 0.4787, "step": 1015 }, { "epoch": 0.05689326912308209, "grad_norm": 1.156378149986267, "learning_rate": 5.070000000000001e-06, "loss": 0.4328, "step": 1016 }, { "epoch": 0.05694926643521111, "grad_norm": 1.2695443630218506, "learning_rate": 5.0750000000000005e-06, "loss": 0.4819, "step": 1017 }, { "epoch": 0.05700526374734013, "grad_norm": 1.191765308380127, "learning_rate": 5.08e-06, "loss": 0.3793, "step": 1018 }, { "epoch": 0.05706126105946915, "grad_norm": 1.2647852897644043, "learning_rate": 5.0850000000000004e-06, "loss": 0.3896, "step": 1019 }, { "epoch": 0.057117258371598166, "grad_norm": 1.2763571739196777, "learning_rate": 5.09e-06, "loss": 0.6245, "step": 1020 }, { "epoch": 0.05717325568372718, "grad_norm": 1.239418864250183, "learning_rate": 5.095e-06, "loss": 0.4452, "step": 1021 }, { "epoch": 0.0572292529958562, "grad_norm": 1.3589504957199097, "learning_rate": 5.1e-06, "loss": 0.514, "step": 1022 }, { "epoch": 0.05728525030798522, "grad_norm": 1.1839314699172974, "learning_rate": 5.105e-06, "loss": 0.4385, "step": 1023 }, { "epoch": 0.057341247620114236, "grad_norm": 1.2520166635513306, "learning_rate": 5.11e-06, "loss": 0.5166, "step": 1024 }, { "epoch": 0.057397244932243255, "grad_norm": 1.7305189371109009, "learning_rate": 5.115e-06, "loss": 0.4474, "step": 1025 }, { "epoch": 0.05745324224437227, "grad_norm": 1.2060898542404175, "learning_rate": 5.12e-06, "loss": 0.509, "step": 1026 }, { "epoch": 0.057509239556501286, "grad_norm": 1.473137617111206, "learning_rate": 5.125e-06, "loss": 0.4312, "step": 1027 }, { "epoch": 0.057565236868630305, "grad_norm": 1.0966969728469849, "learning_rate": 5.13e-06, "loss": 0.4443, "step": 1028 }, { "epoch": 0.057621234180759325, "grad_norm": 1.2823864221572876, "learning_rate": 5.135e-06, "loss": 0.533, "step": 1029 }, { "epoch": 0.057677231492888344, "grad_norm": 1.2967363595962524, "learning_rate": 5.140000000000001e-06, "loss": 0.4279, "step": 1030 }, { "epoch": 0.057733228805017356, "grad_norm": 1.336930751800537, "learning_rate": 5.145000000000001e-06, "loss": 0.5224, "step": 1031 }, { "epoch": 0.057789226117146375, "grad_norm": 1.5669735670089722, "learning_rate": 5.15e-06, "loss": 0.5194, "step": 1032 }, { "epoch": 0.057845223429275394, "grad_norm": 1.1038568019866943, "learning_rate": 5.155e-06, "loss": 0.4, "step": 1033 }, { "epoch": 0.05790122074140441, "grad_norm": 1.100191354751587, "learning_rate": 5.1600000000000006e-06, "loss": 0.4871, "step": 1034 }, { "epoch": 0.05795721805353343, "grad_norm": 1.2605558633804321, "learning_rate": 5.1650000000000005e-06, "loss": 0.4253, "step": 1035 }, { "epoch": 0.05801321536566245, "grad_norm": 1.4248777627944946, "learning_rate": 5.1700000000000005e-06, "loss": 0.3997, "step": 1036 }, { "epoch": 0.058069212677791464, "grad_norm": 1.2265772819519043, "learning_rate": 5.175e-06, "loss": 0.4736, "step": 1037 }, { "epoch": 0.05812520998992048, "grad_norm": 1.3616083860397339, "learning_rate": 5.18e-06, "loss": 0.4686, "step": 1038 }, { "epoch": 0.0581812073020495, "grad_norm": 1.1219232082366943, "learning_rate": 5.185e-06, "loss": 0.3539, "step": 1039 }, { "epoch": 0.05823720461417852, "grad_norm": 1.4267109632492065, "learning_rate": 5.19e-06, "loss": 0.5069, "step": 1040 }, { "epoch": 0.05829320192630754, "grad_norm": 1.2390908002853394, "learning_rate": 5.195e-06, "loss": 0.5129, "step": 1041 }, { "epoch": 0.05834919923843655, "grad_norm": 1.2516577243804932, "learning_rate": 5.2e-06, "loss": 0.5162, "step": 1042 }, { "epoch": 0.05840519655056557, "grad_norm": 1.876985788345337, "learning_rate": 5.205e-06, "loss": 0.3696, "step": 1043 }, { "epoch": 0.05846119386269459, "grad_norm": 1.3517183065414429, "learning_rate": 5.21e-06, "loss": 0.5062, "step": 1044 }, { "epoch": 0.05851719117482361, "grad_norm": 1.160207748413086, "learning_rate": 5.215e-06, "loss": 0.4457, "step": 1045 }, { "epoch": 0.05857318848695263, "grad_norm": 1.3615148067474365, "learning_rate": 5.220000000000001e-06, "loss": 0.5174, "step": 1046 }, { "epoch": 0.05862918579908164, "grad_norm": 1.04086434841156, "learning_rate": 5.225e-06, "loss": 0.4562, "step": 1047 }, { "epoch": 0.05868518311121066, "grad_norm": 1.3939971923828125, "learning_rate": 5.23e-06, "loss": 0.4531, "step": 1048 }, { "epoch": 0.05874118042333968, "grad_norm": 1.3616175651550293, "learning_rate": 5.235000000000001e-06, "loss": 0.478, "step": 1049 }, { "epoch": 0.0587971777354687, "grad_norm": 1.2056399583816528, "learning_rate": 5.240000000000001e-06, "loss": 0.5392, "step": 1050 }, { "epoch": 0.05885317504759772, "grad_norm": 1.2128673791885376, "learning_rate": 5.245e-06, "loss": 0.3881, "step": 1051 }, { "epoch": 0.05890917235972673, "grad_norm": 1.4392180442810059, "learning_rate": 5.25e-06, "loss": 0.4251, "step": 1052 }, { "epoch": 0.05896516967185575, "grad_norm": 1.199028730392456, "learning_rate": 5.2550000000000005e-06, "loss": 0.3948, "step": 1053 }, { "epoch": 0.05902116698398477, "grad_norm": 1.0078922510147095, "learning_rate": 5.2600000000000005e-06, "loss": 0.3426, "step": 1054 }, { "epoch": 0.05907716429611379, "grad_norm": 1.1445984840393066, "learning_rate": 5.265e-06, "loss": 0.3774, "step": 1055 }, { "epoch": 0.05913316160824281, "grad_norm": 1.3585809469223022, "learning_rate": 5.2699999999999995e-06, "loss": 0.6656, "step": 1056 }, { "epoch": 0.05918915892037182, "grad_norm": 1.065910816192627, "learning_rate": 5.275e-06, "loss": 0.4658, "step": 1057 }, { "epoch": 0.05924515623250084, "grad_norm": 1.2044261693954468, "learning_rate": 5.28e-06, "loss": 0.5031, "step": 1058 }, { "epoch": 0.05930115354462986, "grad_norm": 1.6254397630691528, "learning_rate": 5.285e-06, "loss": 0.4542, "step": 1059 }, { "epoch": 0.05935715085675888, "grad_norm": 1.1306949853897095, "learning_rate": 5.29e-06, "loss": 0.4671, "step": 1060 }, { "epoch": 0.059413148168887896, "grad_norm": 1.2099978923797607, "learning_rate": 5.295e-06, "loss": 0.4892, "step": 1061 }, { "epoch": 0.05946914548101691, "grad_norm": 1.401309609413147, "learning_rate": 5.3e-06, "loss": 0.5684, "step": 1062 }, { "epoch": 0.05952514279314593, "grad_norm": 1.8093961477279663, "learning_rate": 5.305e-06, "loss": 0.4251, "step": 1063 }, { "epoch": 0.059581140105274946, "grad_norm": 1.5952280759811401, "learning_rate": 5.31e-06, "loss": 0.5887, "step": 1064 }, { "epoch": 0.059637137417403965, "grad_norm": 1.223723292350769, "learning_rate": 5.315000000000001e-06, "loss": 0.3764, "step": 1065 }, { "epoch": 0.059693134729532984, "grad_norm": 1.25835120677948, "learning_rate": 5.32e-06, "loss": 0.4321, "step": 1066 }, { "epoch": 0.059749132041662004, "grad_norm": 1.1497814655303955, "learning_rate": 5.325e-06, "loss": 0.4415, "step": 1067 }, { "epoch": 0.059805129353791016, "grad_norm": 1.0686076879501343, "learning_rate": 5.330000000000001e-06, "loss": 0.3571, "step": 1068 }, { "epoch": 0.059861126665920035, "grad_norm": 1.2597800493240356, "learning_rate": 5.335000000000001e-06, "loss": 0.4686, "step": 1069 }, { "epoch": 0.059917123978049054, "grad_norm": 1.2790300846099854, "learning_rate": 5.3400000000000005e-06, "loss": 0.4832, "step": 1070 }, { "epoch": 0.05997312129017807, "grad_norm": 1.1553374528884888, "learning_rate": 5.345e-06, "loss": 0.5143, "step": 1071 }, { "epoch": 0.06002911860230709, "grad_norm": 1.1005035638809204, "learning_rate": 5.3500000000000004e-06, "loss": 0.4896, "step": 1072 }, { "epoch": 0.060085115914436105, "grad_norm": 1.3469942808151245, "learning_rate": 5.355e-06, "loss": 0.4591, "step": 1073 }, { "epoch": 0.060141113226565124, "grad_norm": 1.2604517936706543, "learning_rate": 5.36e-06, "loss": 0.5521, "step": 1074 }, { "epoch": 0.06019711053869414, "grad_norm": 1.243295431137085, "learning_rate": 5.365e-06, "loss": 0.6024, "step": 1075 }, { "epoch": 0.06025310785082316, "grad_norm": 1.3653740882873535, "learning_rate": 5.37e-06, "loss": 0.4207, "step": 1076 }, { "epoch": 0.06030910516295218, "grad_norm": 1.1624484062194824, "learning_rate": 5.375e-06, "loss": 0.4241, "step": 1077 }, { "epoch": 0.060365102475081193, "grad_norm": 1.260948657989502, "learning_rate": 5.38e-06, "loss": 0.4985, "step": 1078 }, { "epoch": 0.06042109978721021, "grad_norm": 1.099338173866272, "learning_rate": 5.385e-06, "loss": 0.4063, "step": 1079 }, { "epoch": 0.06047709709933923, "grad_norm": 1.4310381412506104, "learning_rate": 5.390000000000001e-06, "loss": 0.5739, "step": 1080 }, { "epoch": 0.06053309441146825, "grad_norm": 1.2879993915557861, "learning_rate": 5.395e-06, "loss": 0.4956, "step": 1081 }, { "epoch": 0.06058909172359727, "grad_norm": 1.1941678524017334, "learning_rate": 5.4e-06, "loss": 0.3758, "step": 1082 }, { "epoch": 0.06064508903572628, "grad_norm": 1.1191668510437012, "learning_rate": 5.405e-06, "loss": 0.5986, "step": 1083 }, { "epoch": 0.0607010863478553, "grad_norm": 1.4496634006500244, "learning_rate": 5.410000000000001e-06, "loss": 0.4977, "step": 1084 }, { "epoch": 0.06075708365998432, "grad_norm": 1.4725685119628906, "learning_rate": 5.415e-06, "loss": 0.5158, "step": 1085 }, { "epoch": 0.06081308097211334, "grad_norm": 1.0966806411743164, "learning_rate": 5.42e-06, "loss": 0.4611, "step": 1086 }, { "epoch": 0.06086907828424236, "grad_norm": 1.06184720993042, "learning_rate": 5.4250000000000006e-06, "loss": 0.4319, "step": 1087 }, { "epoch": 0.06092507559637137, "grad_norm": 1.874305248260498, "learning_rate": 5.4300000000000005e-06, "loss": 0.5097, "step": 1088 }, { "epoch": 0.06098107290850039, "grad_norm": 2.8478968143463135, "learning_rate": 5.4350000000000005e-06, "loss": 0.3575, "step": 1089 }, { "epoch": 0.06103707022062941, "grad_norm": 1.3469611406326294, "learning_rate": 5.44e-06, "loss": 0.428, "step": 1090 }, { "epoch": 0.06109306753275843, "grad_norm": 1.0713883638381958, "learning_rate": 5.445e-06, "loss": 0.3892, "step": 1091 }, { "epoch": 0.06114906484488745, "grad_norm": 1.3371243476867676, "learning_rate": 5.45e-06, "loss": 0.577, "step": 1092 }, { "epoch": 0.06120506215701646, "grad_norm": 1.6126753091812134, "learning_rate": 5.455e-06, "loss": 0.4554, "step": 1093 }, { "epoch": 0.06126105946914548, "grad_norm": 1.1165636777877808, "learning_rate": 5.46e-06, "loss": 0.358, "step": 1094 }, { "epoch": 0.0613170567812745, "grad_norm": 1.371865153312683, "learning_rate": 5.465e-06, "loss": 0.4045, "step": 1095 }, { "epoch": 0.06137305409340352, "grad_norm": 1.3662270307540894, "learning_rate": 5.47e-06, "loss": 0.3902, "step": 1096 }, { "epoch": 0.061429051405532537, "grad_norm": 1.5756611824035645, "learning_rate": 5.475e-06, "loss": 0.8939, "step": 1097 }, { "epoch": 0.061485048717661556, "grad_norm": 1.336142897605896, "learning_rate": 5.48e-06, "loss": 0.549, "step": 1098 }, { "epoch": 0.06154104602979057, "grad_norm": 1.1127017736434937, "learning_rate": 5.485000000000001e-06, "loss": 0.462, "step": 1099 }, { "epoch": 0.06159704334191959, "grad_norm": 1.4991849660873413, "learning_rate": 5.49e-06, "loss": 0.4478, "step": 1100 }, { "epoch": 0.061653040654048606, "grad_norm": 1.2078429460525513, "learning_rate": 5.495e-06, "loss": 0.3859, "step": 1101 }, { "epoch": 0.061709037966177625, "grad_norm": 1.0736446380615234, "learning_rate": 5.500000000000001e-06, "loss": 0.3938, "step": 1102 }, { "epoch": 0.061765035278306644, "grad_norm": 1.1046690940856934, "learning_rate": 5.505000000000001e-06, "loss": 0.335, "step": 1103 }, { "epoch": 0.06182103259043566, "grad_norm": 1.3569049835205078, "learning_rate": 5.510000000000001e-06, "loss": 0.3924, "step": 1104 }, { "epoch": 0.061877029902564676, "grad_norm": 2.3091189861297607, "learning_rate": 5.515e-06, "loss": 0.5528, "step": 1105 }, { "epoch": 0.061933027214693695, "grad_norm": 1.2597846984863281, "learning_rate": 5.5200000000000005e-06, "loss": 0.4925, "step": 1106 }, { "epoch": 0.061989024526822714, "grad_norm": 1.195900321006775, "learning_rate": 5.5250000000000005e-06, "loss": 0.5321, "step": 1107 }, { "epoch": 0.06204502183895173, "grad_norm": 1.2691329717636108, "learning_rate": 5.53e-06, "loss": 0.4351, "step": 1108 }, { "epoch": 0.062101019151080745, "grad_norm": 1.3498635292053223, "learning_rate": 5.535e-06, "loss": 0.4116, "step": 1109 }, { "epoch": 0.062157016463209765, "grad_norm": 1.189966082572937, "learning_rate": 5.54e-06, "loss": 0.4857, "step": 1110 }, { "epoch": 0.062213013775338784, "grad_norm": 1.262734293937683, "learning_rate": 5.545e-06, "loss": 0.6124, "step": 1111 }, { "epoch": 0.0622690110874678, "grad_norm": 1.9112929105758667, "learning_rate": 5.55e-06, "loss": 0.4752, "step": 1112 }, { "epoch": 0.06232500839959682, "grad_norm": 1.508002519607544, "learning_rate": 5.555e-06, "loss": 0.4645, "step": 1113 }, { "epoch": 0.062381005711725834, "grad_norm": 1.417464017868042, "learning_rate": 5.56e-06, "loss": 0.449, "step": 1114 }, { "epoch": 0.06243700302385485, "grad_norm": 1.1880978345870972, "learning_rate": 5.565e-06, "loss": 0.4766, "step": 1115 }, { "epoch": 0.06249300033598387, "grad_norm": 1.3594896793365479, "learning_rate": 5.57e-06, "loss": 0.4782, "step": 1116 }, { "epoch": 0.06254899764811289, "grad_norm": 1.158437967300415, "learning_rate": 5.575e-06, "loss": 0.5021, "step": 1117 }, { "epoch": 0.06260499496024191, "grad_norm": 1.074735164642334, "learning_rate": 5.580000000000001e-06, "loss": 0.4648, "step": 1118 }, { "epoch": 0.06266099227237093, "grad_norm": 1.4043564796447754, "learning_rate": 5.585e-06, "loss": 0.5121, "step": 1119 }, { "epoch": 0.06271698958449995, "grad_norm": 1.1679229736328125, "learning_rate": 5.59e-06, "loss": 0.3579, "step": 1120 }, { "epoch": 0.06277298689662897, "grad_norm": 1.1285709142684937, "learning_rate": 5.595000000000001e-06, "loss": 0.2937, "step": 1121 }, { "epoch": 0.06282898420875797, "grad_norm": 1.2264790534973145, "learning_rate": 5.600000000000001e-06, "loss": 0.5157, "step": 1122 }, { "epoch": 0.06288498152088699, "grad_norm": 1.1236133575439453, "learning_rate": 5.6050000000000005e-06, "loss": 0.4314, "step": 1123 }, { "epoch": 0.06294097883301601, "grad_norm": 1.0496784448623657, "learning_rate": 5.61e-06, "loss": 0.3789, "step": 1124 }, { "epoch": 0.06299697614514503, "grad_norm": 1.0616122484207153, "learning_rate": 5.6150000000000005e-06, "loss": 0.4315, "step": 1125 }, { "epoch": 0.06305297345727405, "grad_norm": 1.397759199142456, "learning_rate": 5.62e-06, "loss": 0.7762, "step": 1126 }, { "epoch": 0.06310897076940307, "grad_norm": 1.163167119026184, "learning_rate": 5.625e-06, "loss": 0.4815, "step": 1127 }, { "epoch": 0.06316496808153209, "grad_norm": 1.3902819156646729, "learning_rate": 5.63e-06, "loss": 0.5662, "step": 1128 }, { "epoch": 0.06322096539366111, "grad_norm": 1.2462685108184814, "learning_rate": 5.635e-06, "loss": 0.458, "step": 1129 }, { "epoch": 0.06327696270579013, "grad_norm": 1.4905333518981934, "learning_rate": 5.64e-06, "loss": 0.4204, "step": 1130 }, { "epoch": 0.06333296001791915, "grad_norm": 1.259095549583435, "learning_rate": 5.645e-06, "loss": 0.4723, "step": 1131 }, { "epoch": 0.06338895733004815, "grad_norm": 1.1537303924560547, "learning_rate": 5.65e-06, "loss": 0.4517, "step": 1132 }, { "epoch": 0.06344495464217717, "grad_norm": 1.472391963005066, "learning_rate": 5.655000000000001e-06, "loss": 0.451, "step": 1133 }, { "epoch": 0.06350095195430619, "grad_norm": 1.1070297956466675, "learning_rate": 5.66e-06, "loss": 0.4294, "step": 1134 }, { "epoch": 0.06355694926643521, "grad_norm": 1.4566556215286255, "learning_rate": 5.665e-06, "loss": 0.6163, "step": 1135 }, { "epoch": 0.06361294657856423, "grad_norm": 1.146721363067627, "learning_rate": 5.67e-06, "loss": 0.3609, "step": 1136 }, { "epoch": 0.06366894389069325, "grad_norm": 1.228152871131897, "learning_rate": 5.675000000000001e-06, "loss": 0.5157, "step": 1137 }, { "epoch": 0.06372494120282227, "grad_norm": 1.262759804725647, "learning_rate": 5.680000000000001e-06, "loss": 0.5129, "step": 1138 }, { "epoch": 0.06378093851495129, "grad_norm": 1.3428313732147217, "learning_rate": 5.685e-06, "loss": 0.4894, "step": 1139 }, { "epoch": 0.0638369358270803, "grad_norm": 1.4424748420715332, "learning_rate": 5.690000000000001e-06, "loss": 0.4817, "step": 1140 }, { "epoch": 0.06389293313920932, "grad_norm": 1.4872840642929077, "learning_rate": 5.6950000000000005e-06, "loss": 0.4142, "step": 1141 }, { "epoch": 0.06394893045133833, "grad_norm": 1.3789427280426025, "learning_rate": 5.7000000000000005e-06, "loss": 0.4699, "step": 1142 }, { "epoch": 0.06400492776346735, "grad_norm": 1.0845141410827637, "learning_rate": 5.705e-06, "loss": 0.4109, "step": 1143 }, { "epoch": 0.06406092507559637, "grad_norm": 1.1307657957077026, "learning_rate": 5.71e-06, "loss": 0.4811, "step": 1144 }, { "epoch": 0.06411692238772539, "grad_norm": 1.0611789226531982, "learning_rate": 5.715e-06, "loss": 0.4281, "step": 1145 }, { "epoch": 0.0641729196998544, "grad_norm": 1.1467761993408203, "learning_rate": 5.72e-06, "loss": 0.3959, "step": 1146 }, { "epoch": 0.06422891701198342, "grad_norm": 1.3303271532058716, "learning_rate": 5.725e-06, "loss": 0.4611, "step": 1147 }, { "epoch": 0.06428491432411244, "grad_norm": 1.106090784072876, "learning_rate": 5.73e-06, "loss": 0.451, "step": 1148 }, { "epoch": 0.06434091163624146, "grad_norm": 1.0233514308929443, "learning_rate": 5.735e-06, "loss": 0.4053, "step": 1149 }, { "epoch": 0.06439690894837048, "grad_norm": 0.9405644536018372, "learning_rate": 5.74e-06, "loss": 0.3488, "step": 1150 }, { "epoch": 0.0644529062604995, "grad_norm": 1.0472259521484375, "learning_rate": 5.745e-06, "loss": 0.4616, "step": 1151 }, { "epoch": 0.06450890357262852, "grad_norm": 1.146802544593811, "learning_rate": 5.750000000000001e-06, "loss": 0.4724, "step": 1152 }, { "epoch": 0.06456490088475753, "grad_norm": 1.3439722061157227, "learning_rate": 5.755e-06, "loss": 0.5802, "step": 1153 }, { "epoch": 0.06462089819688654, "grad_norm": 1.4531556367874146, "learning_rate": 5.76e-06, "loss": 0.5034, "step": 1154 }, { "epoch": 0.06467689550901556, "grad_norm": 1.0436081886291504, "learning_rate": 5.765e-06, "loss": 0.3639, "step": 1155 }, { "epoch": 0.06473289282114458, "grad_norm": 1.1610552072525024, "learning_rate": 5.770000000000001e-06, "loss": 0.4778, "step": 1156 }, { "epoch": 0.0647888901332736, "grad_norm": 1.209583044052124, "learning_rate": 5.775000000000001e-06, "loss": 0.4406, "step": 1157 }, { "epoch": 0.06484488744540262, "grad_norm": 1.3594990968704224, "learning_rate": 5.78e-06, "loss": 0.5006, "step": 1158 }, { "epoch": 0.06490088475753164, "grad_norm": 1.2716442346572876, "learning_rate": 5.7850000000000005e-06, "loss": 0.5065, "step": 1159 }, { "epoch": 0.06495688206966066, "grad_norm": 1.2829830646514893, "learning_rate": 5.7900000000000005e-06, "loss": 0.6007, "step": 1160 }, { "epoch": 0.06501287938178968, "grad_norm": 1.3459588289260864, "learning_rate": 5.795e-06, "loss": 0.5943, "step": 1161 }, { "epoch": 0.0650688766939187, "grad_norm": 1.4075367450714111, "learning_rate": 5.8e-06, "loss": 0.5364, "step": 1162 }, { "epoch": 0.0651248740060477, "grad_norm": 1.419149398803711, "learning_rate": 5.805e-06, "loss": 0.4434, "step": 1163 }, { "epoch": 0.06518087131817672, "grad_norm": 1.4249473810195923, "learning_rate": 5.81e-06, "loss": 0.5376, "step": 1164 }, { "epoch": 0.06523686863030574, "grad_norm": 1.5048781633377075, "learning_rate": 5.815e-06, "loss": 0.5568, "step": 1165 }, { "epoch": 0.06529286594243476, "grad_norm": 1.059531807899475, "learning_rate": 5.82e-06, "loss": 0.3689, "step": 1166 }, { "epoch": 0.06534886325456378, "grad_norm": 1.1879932880401611, "learning_rate": 5.825000000000001e-06, "loss": 0.4556, "step": 1167 }, { "epoch": 0.0654048605666928, "grad_norm": 2.4681849479675293, "learning_rate": 5.83e-06, "loss": 0.4627, "step": 1168 }, { "epoch": 0.06546085787882182, "grad_norm": 1.6035898923873901, "learning_rate": 5.835e-06, "loss": 0.4652, "step": 1169 }, { "epoch": 0.06551685519095084, "grad_norm": 1.147926688194275, "learning_rate": 5.84e-06, "loss": 0.3389, "step": 1170 }, { "epoch": 0.06557285250307986, "grad_norm": 1.3321911096572876, "learning_rate": 5.845000000000001e-06, "loss": 0.5156, "step": 1171 }, { "epoch": 0.06562884981520888, "grad_norm": 1.3565380573272705, "learning_rate": 5.850000000000001e-06, "loss": 0.4251, "step": 1172 }, { "epoch": 0.06568484712733788, "grad_norm": 1.0869241952896118, "learning_rate": 5.855e-06, "loss": 0.4787, "step": 1173 }, { "epoch": 0.0657408444394669, "grad_norm": 1.4895256757736206, "learning_rate": 5.86e-06, "loss": 0.6179, "step": 1174 }, { "epoch": 0.06579684175159592, "grad_norm": 1.591769814491272, "learning_rate": 5.865000000000001e-06, "loss": 0.5253, "step": 1175 }, { "epoch": 0.06585283906372494, "grad_norm": 1.162840485572815, "learning_rate": 5.8700000000000005e-06, "loss": 0.4676, "step": 1176 }, { "epoch": 0.06590883637585396, "grad_norm": 1.1410831212997437, "learning_rate": 5.875e-06, "loss": 0.3855, "step": 1177 }, { "epoch": 0.06596483368798298, "grad_norm": 1.4543075561523438, "learning_rate": 5.8800000000000005e-06, "loss": 0.4941, "step": 1178 }, { "epoch": 0.066020831000112, "grad_norm": 1.4013811349868774, "learning_rate": 5.885e-06, "loss": 0.476, "step": 1179 }, { "epoch": 0.06607682831224101, "grad_norm": 2.4917755126953125, "learning_rate": 5.89e-06, "loss": 0.3922, "step": 1180 }, { "epoch": 0.06613282562437003, "grad_norm": 1.086756944656372, "learning_rate": 5.895e-06, "loss": 0.4143, "step": 1181 }, { "epoch": 0.06618882293649905, "grad_norm": 1.2095462083816528, "learning_rate": 5.9e-06, "loss": 0.4772, "step": 1182 }, { "epoch": 0.06624482024862807, "grad_norm": 1.4608217477798462, "learning_rate": 5.905e-06, "loss": 0.4348, "step": 1183 }, { "epoch": 0.06630081756075708, "grad_norm": 1.523728609085083, "learning_rate": 5.91e-06, "loss": 0.6067, "step": 1184 }, { "epoch": 0.0663568148728861, "grad_norm": 1.1437195539474487, "learning_rate": 5.915e-06, "loss": 0.3993, "step": 1185 }, { "epoch": 0.06641281218501512, "grad_norm": 1.146959662437439, "learning_rate": 5.920000000000001e-06, "loss": 0.474, "step": 1186 }, { "epoch": 0.06646880949714414, "grad_norm": 1.1326645612716675, "learning_rate": 5.925e-06, "loss": 0.3891, "step": 1187 }, { "epoch": 0.06652480680927315, "grad_norm": 1.0731227397918701, "learning_rate": 5.93e-06, "loss": 0.3448, "step": 1188 }, { "epoch": 0.06658080412140217, "grad_norm": 1.418973684310913, "learning_rate": 5.935e-06, "loss": 0.4704, "step": 1189 }, { "epoch": 0.06663680143353119, "grad_norm": 1.1930632591247559, "learning_rate": 5.940000000000001e-06, "loss": 0.3829, "step": 1190 }, { "epoch": 0.06669279874566021, "grad_norm": 1.1238776445388794, "learning_rate": 5.945000000000001e-06, "loss": 0.3387, "step": 1191 }, { "epoch": 0.06674879605778923, "grad_norm": 1.3016777038574219, "learning_rate": 5.95e-06, "loss": 0.5368, "step": 1192 }, { "epoch": 0.06680479336991825, "grad_norm": 1.2945044040679932, "learning_rate": 5.955000000000001e-06, "loss": 0.4609, "step": 1193 }, { "epoch": 0.06686079068204726, "grad_norm": 1.4390751123428345, "learning_rate": 5.9600000000000005e-06, "loss": 0.476, "step": 1194 }, { "epoch": 0.06691678799417627, "grad_norm": 1.038458228111267, "learning_rate": 5.9650000000000005e-06, "loss": 0.3525, "step": 1195 }, { "epoch": 0.0669727853063053, "grad_norm": 1.1566381454467773, "learning_rate": 5.9700000000000004e-06, "loss": 0.4043, "step": 1196 }, { "epoch": 0.06702878261843431, "grad_norm": 1.3829346895217896, "learning_rate": 5.975e-06, "loss": 0.3849, "step": 1197 }, { "epoch": 0.06708477993056333, "grad_norm": 1.5022857189178467, "learning_rate": 5.98e-06, "loss": 0.5313, "step": 1198 }, { "epoch": 0.06714077724269235, "grad_norm": 1.3618924617767334, "learning_rate": 5.985e-06, "loss": 0.4925, "step": 1199 }, { "epoch": 0.06719677455482137, "grad_norm": 1.1289489269256592, "learning_rate": 5.99e-06, "loss": 0.4691, "step": 1200 }, { "epoch": 0.06725277186695039, "grad_norm": 1.4186307191848755, "learning_rate": 5.995e-06, "loss": 0.5976, "step": 1201 }, { "epoch": 0.06730876917907941, "grad_norm": 1.2453892230987549, "learning_rate": 6e-06, "loss": 0.7152, "step": 1202 }, { "epoch": 0.06736476649120843, "grad_norm": 1.31748628616333, "learning_rate": 6.005e-06, "loss": 0.6554, "step": 1203 }, { "epoch": 0.06742076380333743, "grad_norm": 1.271621584892273, "learning_rate": 6.01e-06, "loss": 0.5174, "step": 1204 }, { "epoch": 0.06747676111546645, "grad_norm": 1.4273734092712402, "learning_rate": 6.015000000000001e-06, "loss": 0.5568, "step": 1205 }, { "epoch": 0.06753275842759547, "grad_norm": 1.1785576343536377, "learning_rate": 6.02e-06, "loss": 0.4569, "step": 1206 }, { "epoch": 0.06758875573972449, "grad_norm": 1.4180866479873657, "learning_rate": 6.025e-06, "loss": 0.5308, "step": 1207 }, { "epoch": 0.06764475305185351, "grad_norm": 1.1484123468399048, "learning_rate": 6.03e-06, "loss": 0.3796, "step": 1208 }, { "epoch": 0.06770075036398253, "grad_norm": 1.2170040607452393, "learning_rate": 6.035000000000001e-06, "loss": 0.4532, "step": 1209 }, { "epoch": 0.06775674767611155, "grad_norm": 1.2905791997909546, "learning_rate": 6.040000000000001e-06, "loss": 0.4595, "step": 1210 }, { "epoch": 0.06781274498824057, "grad_norm": 1.2938952445983887, "learning_rate": 6.045e-06, "loss": 0.4142, "step": 1211 }, { "epoch": 0.06786874230036959, "grad_norm": 1.3278106451034546, "learning_rate": 6.0500000000000005e-06, "loss": 0.4678, "step": 1212 }, { "epoch": 0.0679247396124986, "grad_norm": 1.311219334602356, "learning_rate": 6.0550000000000005e-06, "loss": 0.3523, "step": 1213 }, { "epoch": 0.06798073692462762, "grad_norm": 1.160986304283142, "learning_rate": 6.0600000000000004e-06, "loss": 0.4355, "step": 1214 }, { "epoch": 0.06803673423675663, "grad_norm": 1.3731441497802734, "learning_rate": 6.065e-06, "loss": 0.5311, "step": 1215 }, { "epoch": 0.06809273154888565, "grad_norm": 1.191652536392212, "learning_rate": 6.07e-06, "loss": 0.4536, "step": 1216 }, { "epoch": 0.06814872886101467, "grad_norm": 1.0599191188812256, "learning_rate": 6.075e-06, "loss": 0.4172, "step": 1217 }, { "epoch": 0.06820472617314369, "grad_norm": 1.1530780792236328, "learning_rate": 6.08e-06, "loss": 0.4636, "step": 1218 }, { "epoch": 0.0682607234852727, "grad_norm": 1.4733500480651855, "learning_rate": 6.085e-06, "loss": 0.6163, "step": 1219 }, { "epoch": 0.06831672079740173, "grad_norm": 1.335119605064392, "learning_rate": 6.090000000000001e-06, "loss": 0.5694, "step": 1220 }, { "epoch": 0.06837271810953074, "grad_norm": 1.1457488536834717, "learning_rate": 6.095e-06, "loss": 0.4272, "step": 1221 }, { "epoch": 0.06842871542165976, "grad_norm": 1.125653862953186, "learning_rate": 6.1e-06, "loss": 0.3819, "step": 1222 }, { "epoch": 0.06848471273378878, "grad_norm": 1.4815268516540527, "learning_rate": 6.105e-06, "loss": 0.6059, "step": 1223 }, { "epoch": 0.0685407100459178, "grad_norm": 1.072304606437683, "learning_rate": 6.110000000000001e-06, "loss": 0.5435, "step": 1224 }, { "epoch": 0.06859670735804681, "grad_norm": 1.2699904441833496, "learning_rate": 6.115000000000001e-06, "loss": 0.4013, "step": 1225 }, { "epoch": 0.06865270467017583, "grad_norm": 1.4592626094818115, "learning_rate": 6.12e-06, "loss": 0.4995, "step": 1226 }, { "epoch": 0.06870870198230485, "grad_norm": 1.1897928714752197, "learning_rate": 6.125e-06, "loss": 0.3772, "step": 1227 }, { "epoch": 0.06876469929443386, "grad_norm": 1.142614483833313, "learning_rate": 6.130000000000001e-06, "loss": 0.3635, "step": 1228 }, { "epoch": 0.06882069660656288, "grad_norm": 1.2323251962661743, "learning_rate": 6.1350000000000006e-06, "loss": 0.4454, "step": 1229 }, { "epoch": 0.0688766939186919, "grad_norm": 1.40288507938385, "learning_rate": 6.1400000000000005e-06, "loss": 0.5085, "step": 1230 }, { "epoch": 0.06893269123082092, "grad_norm": 1.1993368864059448, "learning_rate": 6.1450000000000005e-06, "loss": 0.3992, "step": 1231 }, { "epoch": 0.06898868854294994, "grad_norm": 1.6297578811645508, "learning_rate": 6.15e-06, "loss": 0.5697, "step": 1232 }, { "epoch": 0.06904468585507896, "grad_norm": 1.4021323919296265, "learning_rate": 6.155e-06, "loss": 0.3727, "step": 1233 }, { "epoch": 0.06910068316720798, "grad_norm": 1.0993765592575073, "learning_rate": 6.16e-06, "loss": 0.6237, "step": 1234 }, { "epoch": 0.06915668047933698, "grad_norm": 1.3789044618606567, "learning_rate": 6.165e-06, "loss": 0.5462, "step": 1235 }, { "epoch": 0.069212677791466, "grad_norm": 1.5529453754425049, "learning_rate": 6.17e-06, "loss": 0.5599, "step": 1236 }, { "epoch": 0.06926867510359502, "grad_norm": 1.2780702114105225, "learning_rate": 6.175e-06, "loss": 0.6287, "step": 1237 }, { "epoch": 0.06932467241572404, "grad_norm": 0.9809923768043518, "learning_rate": 6.18e-06, "loss": 0.3876, "step": 1238 }, { "epoch": 0.06938066972785306, "grad_norm": 1.0424203872680664, "learning_rate": 6.185000000000001e-06, "loss": 0.3569, "step": 1239 }, { "epoch": 0.06943666703998208, "grad_norm": 1.1957716941833496, "learning_rate": 6.19e-06, "loss": 0.5428, "step": 1240 }, { "epoch": 0.0694926643521111, "grad_norm": 1.3562878370285034, "learning_rate": 6.195e-06, "loss": 0.4914, "step": 1241 }, { "epoch": 0.06954866166424012, "grad_norm": 1.4439308643341064, "learning_rate": 6.2e-06, "loss": 0.5092, "step": 1242 }, { "epoch": 0.06960465897636914, "grad_norm": 1.3014452457427979, "learning_rate": 6.205000000000001e-06, "loss": 0.4253, "step": 1243 }, { "epoch": 0.06966065628849816, "grad_norm": 1.09708571434021, "learning_rate": 6.210000000000001e-06, "loss": 0.4735, "step": 1244 }, { "epoch": 0.06971665360062718, "grad_norm": 1.2579667568206787, "learning_rate": 6.215e-06, "loss": 0.3417, "step": 1245 }, { "epoch": 0.06977265091275618, "grad_norm": 1.1711158752441406, "learning_rate": 6.22e-06, "loss": 0.5578, "step": 1246 }, { "epoch": 0.0698286482248852, "grad_norm": 1.3848479986190796, "learning_rate": 6.2250000000000005e-06, "loss": 0.3832, "step": 1247 }, { "epoch": 0.06988464553701422, "grad_norm": 1.2990936040878296, "learning_rate": 6.2300000000000005e-06, "loss": 0.5252, "step": 1248 }, { "epoch": 0.06994064284914324, "grad_norm": 1.3192658424377441, "learning_rate": 6.2350000000000004e-06, "loss": 0.6234, "step": 1249 }, { "epoch": 0.06999664016127226, "grad_norm": 1.1719008684158325, "learning_rate": 6.24e-06, "loss": 0.4339, "step": 1250 }, { "epoch": 0.07005263747340128, "grad_norm": 1.21258544921875, "learning_rate": 6.245e-06, "loss": 0.4835, "step": 1251 }, { "epoch": 0.0701086347855303, "grad_norm": 1.2766406536102295, "learning_rate": 6.25e-06, "loss": 0.5146, "step": 1252 }, { "epoch": 0.07016463209765932, "grad_norm": 1.305820107460022, "learning_rate": 6.254999999999999e-06, "loss": 0.3805, "step": 1253 }, { "epoch": 0.07022062940978833, "grad_norm": 1.2711191177368164, "learning_rate": 6.26e-06, "loss": 0.4137, "step": 1254 }, { "epoch": 0.07027662672191735, "grad_norm": 1.3470959663391113, "learning_rate": 6.265e-06, "loss": 0.4417, "step": 1255 }, { "epoch": 0.07033262403404636, "grad_norm": 0.9316703677177429, "learning_rate": 6.270000000000001e-06, "loss": 0.3569, "step": 1256 }, { "epoch": 0.07038862134617538, "grad_norm": 1.3561193943023682, "learning_rate": 6.275e-06, "loss": 0.5043, "step": 1257 }, { "epoch": 0.0704446186583044, "grad_norm": 1.0972591638565063, "learning_rate": 6.28e-06, "loss": 0.4823, "step": 1258 }, { "epoch": 0.07050061597043342, "grad_norm": 1.1062606573104858, "learning_rate": 6.285000000000001e-06, "loss": 0.3591, "step": 1259 }, { "epoch": 0.07055661328256244, "grad_norm": 1.186181902885437, "learning_rate": 6.29e-06, "loss": 0.6183, "step": 1260 }, { "epoch": 0.07061261059469146, "grad_norm": 1.2585527896881104, "learning_rate": 6.295000000000001e-06, "loss": 0.5375, "step": 1261 }, { "epoch": 0.07066860790682047, "grad_norm": 1.2941310405731201, "learning_rate": 6.300000000000001e-06, "loss": 0.4806, "step": 1262 }, { "epoch": 0.0707246052189495, "grad_norm": 1.3581117391586304, "learning_rate": 6.305e-06, "loss": 0.5579, "step": 1263 }, { "epoch": 0.07078060253107851, "grad_norm": 1.5740511417388916, "learning_rate": 6.3100000000000006e-06, "loss": 0.5084, "step": 1264 }, { "epoch": 0.07083659984320753, "grad_norm": 1.3985000848770142, "learning_rate": 6.315e-06, "loss": 0.4166, "step": 1265 }, { "epoch": 0.07089259715533654, "grad_norm": 1.2136940956115723, "learning_rate": 6.320000000000001e-06, "loss": 0.4584, "step": 1266 }, { "epoch": 0.07094859446746556, "grad_norm": 1.1227129697799683, "learning_rate": 6.3250000000000004e-06, "loss": 0.388, "step": 1267 }, { "epoch": 0.07100459177959458, "grad_norm": 1.2944831848144531, "learning_rate": 6.3299999999999995e-06, "loss": 0.5666, "step": 1268 }, { "epoch": 0.0710605890917236, "grad_norm": 1.0914361476898193, "learning_rate": 6.335e-06, "loss": 0.3782, "step": 1269 }, { "epoch": 0.07111658640385261, "grad_norm": 1.227116346359253, "learning_rate": 6.34e-06, "loss": 0.5257, "step": 1270 }, { "epoch": 0.07117258371598163, "grad_norm": 1.1540474891662598, "learning_rate": 6.345000000000001e-06, "loss": 0.3348, "step": 1271 }, { "epoch": 0.07122858102811065, "grad_norm": 1.340309977531433, "learning_rate": 6.35e-06, "loss": 0.465, "step": 1272 }, { "epoch": 0.07128457834023967, "grad_norm": 1.0309977531433105, "learning_rate": 6.355e-06, "loss": 0.4303, "step": 1273 }, { "epoch": 0.07134057565236869, "grad_norm": 1.1087217330932617, "learning_rate": 6.360000000000001e-06, "loss": 0.3617, "step": 1274 }, { "epoch": 0.07139657296449771, "grad_norm": 1.0575168132781982, "learning_rate": 6.365e-06, "loss": 0.4675, "step": 1275 }, { "epoch": 0.07145257027662673, "grad_norm": 3.266204833984375, "learning_rate": 6.370000000000001e-06, "loss": 0.5266, "step": 1276 }, { "epoch": 0.07150856758875573, "grad_norm": 1.163043737411499, "learning_rate": 6.375000000000001e-06, "loss": 0.3054, "step": 1277 }, { "epoch": 0.07156456490088475, "grad_norm": 1.5021004676818848, "learning_rate": 6.38e-06, "loss": 0.6384, "step": 1278 }, { "epoch": 0.07162056221301377, "grad_norm": 1.5890547037124634, "learning_rate": 6.385000000000001e-06, "loss": 0.6123, "step": 1279 }, { "epoch": 0.07167655952514279, "grad_norm": 1.2791961431503296, "learning_rate": 6.39e-06, "loss": 0.6111, "step": 1280 }, { "epoch": 0.07173255683727181, "grad_norm": 1.395918846130371, "learning_rate": 6.395000000000001e-06, "loss": 0.3987, "step": 1281 }, { "epoch": 0.07178855414940083, "grad_norm": 1.292610764503479, "learning_rate": 6.4000000000000006e-06, "loss": 0.4165, "step": 1282 }, { "epoch": 0.07184455146152985, "grad_norm": 1.6642959117889404, "learning_rate": 6.405e-06, "loss": 0.4285, "step": 1283 }, { "epoch": 0.07190054877365887, "grad_norm": 1.4356571435928345, "learning_rate": 6.4100000000000005e-06, "loss": 0.4188, "step": 1284 }, { "epoch": 0.07195654608578789, "grad_norm": 1.263612985610962, "learning_rate": 6.415e-06, "loss": 0.4058, "step": 1285 }, { "epoch": 0.0720125433979169, "grad_norm": 1.3215962648391724, "learning_rate": 6.4199999999999995e-06, "loss": 0.5013, "step": 1286 }, { "epoch": 0.07206854071004591, "grad_norm": 1.2451215982437134, "learning_rate": 6.425e-06, "loss": 0.5058, "step": 1287 }, { "epoch": 0.07212453802217493, "grad_norm": 1.1814186573028564, "learning_rate": 6.43e-06, "loss": 0.3944, "step": 1288 }, { "epoch": 0.07218053533430395, "grad_norm": 1.1279220581054688, "learning_rate": 6.435000000000001e-06, "loss": 0.4033, "step": 1289 }, { "epoch": 0.07223653264643297, "grad_norm": 1.166107416152954, "learning_rate": 6.44e-06, "loss": 0.4462, "step": 1290 }, { "epoch": 0.07229252995856199, "grad_norm": 1.1797864437103271, "learning_rate": 6.444999999999999e-06, "loss": 0.3905, "step": 1291 }, { "epoch": 0.07234852727069101, "grad_norm": 1.0709209442138672, "learning_rate": 6.45e-06, "loss": 0.3772, "step": 1292 }, { "epoch": 0.07240452458282003, "grad_norm": 1.115615963935852, "learning_rate": 6.455e-06, "loss": 0.3869, "step": 1293 }, { "epoch": 0.07246052189494905, "grad_norm": 1.7949514389038086, "learning_rate": 6.460000000000001e-06, "loss": 0.5933, "step": 1294 }, { "epoch": 0.07251651920707806, "grad_norm": 1.6825324296951294, "learning_rate": 6.465e-06, "loss": 0.5273, "step": 1295 }, { "epoch": 0.07257251651920708, "grad_norm": 1.1637367010116577, "learning_rate": 6.47e-06, "loss": 0.4111, "step": 1296 }, { "epoch": 0.07262851383133609, "grad_norm": 1.2039698362350464, "learning_rate": 6.475000000000001e-06, "loss": 0.5101, "step": 1297 }, { "epoch": 0.07268451114346511, "grad_norm": 1.3971374034881592, "learning_rate": 6.48e-06, "loss": 0.4956, "step": 1298 }, { "epoch": 0.07274050845559413, "grad_norm": 1.3009939193725586, "learning_rate": 6.485000000000001e-06, "loss": 0.371, "step": 1299 }, { "epoch": 0.07279650576772315, "grad_norm": 1.4169080257415771, "learning_rate": 6.4900000000000005e-06, "loss": 0.7171, "step": 1300 }, { "epoch": 0.07285250307985217, "grad_norm": 2.2091469764709473, "learning_rate": 6.495e-06, "loss": 0.565, "step": 1301 }, { "epoch": 0.07290850039198118, "grad_norm": 1.427626371383667, "learning_rate": 6.5000000000000004e-06, "loss": 0.39, "step": 1302 }, { "epoch": 0.0729644977041102, "grad_norm": 1.2434399127960205, "learning_rate": 6.505e-06, "loss": 0.4394, "step": 1303 }, { "epoch": 0.07302049501623922, "grad_norm": 1.6202256679534912, "learning_rate": 6.510000000000001e-06, "loss": 0.5287, "step": 1304 }, { "epoch": 0.07307649232836824, "grad_norm": 2.5532824993133545, "learning_rate": 6.515e-06, "loss": 0.577, "step": 1305 }, { "epoch": 0.07313248964049726, "grad_norm": 1.2064380645751953, "learning_rate": 6.519999999999999e-06, "loss": 0.6086, "step": 1306 }, { "epoch": 0.07318848695262628, "grad_norm": 1.3083627223968506, "learning_rate": 6.525e-06, "loss": 0.4166, "step": 1307 }, { "epoch": 0.07324448426475529, "grad_norm": 1.352712631225586, "learning_rate": 6.53e-06, "loss": 0.4438, "step": 1308 }, { "epoch": 0.0733004815768843, "grad_norm": 1.2328424453735352, "learning_rate": 6.535000000000001e-06, "loss": 0.4751, "step": 1309 }, { "epoch": 0.07335647888901332, "grad_norm": 1.2588063478469849, "learning_rate": 6.54e-06, "loss": 0.5114, "step": 1310 }, { "epoch": 0.07341247620114234, "grad_norm": 1.082762360572815, "learning_rate": 6.545e-06, "loss": 0.4373, "step": 1311 }, { "epoch": 0.07346847351327136, "grad_norm": 1.2910832166671753, "learning_rate": 6.550000000000001e-06, "loss": 0.4089, "step": 1312 }, { "epoch": 0.07352447082540038, "grad_norm": 1.1955432891845703, "learning_rate": 6.555e-06, "loss": 0.41, "step": 1313 }, { "epoch": 0.0735804681375294, "grad_norm": 1.530601143836975, "learning_rate": 6.560000000000001e-06, "loss": 0.7009, "step": 1314 }, { "epoch": 0.07363646544965842, "grad_norm": 1.209916353225708, "learning_rate": 6.565000000000001e-06, "loss": 0.6077, "step": 1315 }, { "epoch": 0.07369246276178744, "grad_norm": 1.1398082971572876, "learning_rate": 6.57e-06, "loss": 0.4088, "step": 1316 }, { "epoch": 0.07374846007391646, "grad_norm": 1.4278721809387207, "learning_rate": 6.5750000000000006e-06, "loss": 0.5536, "step": 1317 }, { "epoch": 0.07380445738604546, "grad_norm": 1.253266453742981, "learning_rate": 6.58e-06, "loss": 0.4177, "step": 1318 }, { "epoch": 0.07386045469817448, "grad_norm": 1.234006404876709, "learning_rate": 6.5850000000000005e-06, "loss": 0.3958, "step": 1319 }, { "epoch": 0.0739164520103035, "grad_norm": 1.3969980478286743, "learning_rate": 6.5900000000000004e-06, "loss": 0.5297, "step": 1320 }, { "epoch": 0.07397244932243252, "grad_norm": 1.5530579090118408, "learning_rate": 6.5949999999999995e-06, "loss": 0.5444, "step": 1321 }, { "epoch": 0.07402844663456154, "grad_norm": 1.1098066568374634, "learning_rate": 6.6e-06, "loss": 0.4511, "step": 1322 }, { "epoch": 0.07408444394669056, "grad_norm": 1.1944104433059692, "learning_rate": 6.605e-06, "loss": 0.3559, "step": 1323 }, { "epoch": 0.07414044125881958, "grad_norm": 1.2156531810760498, "learning_rate": 6.610000000000001e-06, "loss": 0.4142, "step": 1324 }, { "epoch": 0.0741964385709486, "grad_norm": 1.2404552698135376, "learning_rate": 6.615e-06, "loss": 0.4893, "step": 1325 }, { "epoch": 0.07425243588307762, "grad_norm": 1.3357646465301514, "learning_rate": 6.62e-06, "loss": 0.4867, "step": 1326 }, { "epoch": 0.07430843319520664, "grad_norm": 1.24103844165802, "learning_rate": 6.625000000000001e-06, "loss": 0.563, "step": 1327 }, { "epoch": 0.07436443050733564, "grad_norm": 0.9767637848854065, "learning_rate": 6.63e-06, "loss": 0.4116, "step": 1328 }, { "epoch": 0.07442042781946466, "grad_norm": 1.257356882095337, "learning_rate": 6.635000000000001e-06, "loss": 0.487, "step": 1329 }, { "epoch": 0.07447642513159368, "grad_norm": 1.5664671659469604, "learning_rate": 6.640000000000001e-06, "loss": 0.4498, "step": 1330 }, { "epoch": 0.0745324224437227, "grad_norm": 1.2948386669158936, "learning_rate": 6.645e-06, "loss": 0.4589, "step": 1331 }, { "epoch": 0.07458841975585172, "grad_norm": 1.3933333158493042, "learning_rate": 6.650000000000001e-06, "loss": 0.5161, "step": 1332 }, { "epoch": 0.07464441706798074, "grad_norm": 1.3152962923049927, "learning_rate": 6.655e-06, "loss": 0.539, "step": 1333 }, { "epoch": 0.07470041438010976, "grad_norm": 1.059032678604126, "learning_rate": 6.660000000000001e-06, "loss": 0.5141, "step": 1334 }, { "epoch": 0.07475641169223878, "grad_norm": 1.3351902961730957, "learning_rate": 6.6650000000000006e-06, "loss": 0.4759, "step": 1335 }, { "epoch": 0.0748124090043678, "grad_norm": 1.3436874151229858, "learning_rate": 6.67e-06, "loss": 0.5057, "step": 1336 }, { "epoch": 0.07486840631649681, "grad_norm": 1.3562463521957397, "learning_rate": 6.6750000000000005e-06, "loss": 0.419, "step": 1337 }, { "epoch": 0.07492440362862583, "grad_norm": 1.434891939163208, "learning_rate": 6.68e-06, "loss": 0.4996, "step": 1338 }, { "epoch": 0.07498040094075484, "grad_norm": 1.249218463897705, "learning_rate": 6.685000000000001e-06, "loss": 0.3987, "step": 1339 }, { "epoch": 0.07503639825288386, "grad_norm": 1.236860990524292, "learning_rate": 6.69e-06, "loss": 0.5033, "step": 1340 }, { "epoch": 0.07509239556501288, "grad_norm": 1.1586852073669434, "learning_rate": 6.695e-06, "loss": 0.4026, "step": 1341 }, { "epoch": 0.0751483928771419, "grad_norm": 1.1438720226287842, "learning_rate": 6.700000000000001e-06, "loss": 0.411, "step": 1342 }, { "epoch": 0.07520439018927091, "grad_norm": 1.0562424659729004, "learning_rate": 6.705e-06, "loss": 0.346, "step": 1343 }, { "epoch": 0.07526038750139993, "grad_norm": 1.7857998609542847, "learning_rate": 6.710000000000001e-06, "loss": 0.3514, "step": 1344 }, { "epoch": 0.07531638481352895, "grad_norm": 1.415966510772705, "learning_rate": 6.715e-06, "loss": 0.4802, "step": 1345 }, { "epoch": 0.07537238212565797, "grad_norm": 1.238795280456543, "learning_rate": 6.72e-06, "loss": 0.4715, "step": 1346 }, { "epoch": 0.07542837943778699, "grad_norm": 1.2985560894012451, "learning_rate": 6.725000000000001e-06, "loss": 0.4027, "step": 1347 }, { "epoch": 0.07548437674991601, "grad_norm": 1.053097128868103, "learning_rate": 6.73e-06, "loss": 0.3804, "step": 1348 }, { "epoch": 0.07554037406204502, "grad_norm": 1.1594445705413818, "learning_rate": 6.735e-06, "loss": 0.4952, "step": 1349 }, { "epoch": 0.07559637137417403, "grad_norm": 1.3159594535827637, "learning_rate": 6.740000000000001e-06, "loss": 0.3801, "step": 1350 }, { "epoch": 0.07565236868630305, "grad_norm": 1.2488571405410767, "learning_rate": 6.745e-06, "loss": 0.5075, "step": 1351 }, { "epoch": 0.07570836599843207, "grad_norm": 1.1797436475753784, "learning_rate": 6.750000000000001e-06, "loss": 0.4398, "step": 1352 }, { "epoch": 0.07576436331056109, "grad_norm": 1.1828269958496094, "learning_rate": 6.7550000000000005e-06, "loss": 0.5035, "step": 1353 }, { "epoch": 0.07582036062269011, "grad_norm": 1.5088539123535156, "learning_rate": 6.76e-06, "loss": 0.4993, "step": 1354 }, { "epoch": 0.07587635793481913, "grad_norm": 1.2271095514297485, "learning_rate": 6.7650000000000005e-06, "loss": 0.4532, "step": 1355 }, { "epoch": 0.07593235524694815, "grad_norm": 1.0573952198028564, "learning_rate": 6.7699999999999996e-06, "loss": 0.3829, "step": 1356 }, { "epoch": 0.07598835255907717, "grad_norm": 1.1782804727554321, "learning_rate": 6.775000000000001e-06, "loss": 0.4338, "step": 1357 }, { "epoch": 0.07604434987120619, "grad_norm": 1.1794538497924805, "learning_rate": 6.78e-06, "loss": 0.4669, "step": 1358 }, { "epoch": 0.07610034718333519, "grad_norm": 1.2195823192596436, "learning_rate": 6.784999999999999e-06, "loss": 0.614, "step": 1359 }, { "epoch": 0.07615634449546421, "grad_norm": 1.3329391479492188, "learning_rate": 6.79e-06, "loss": 0.4395, "step": 1360 }, { "epoch": 0.07621234180759323, "grad_norm": 1.1854500770568848, "learning_rate": 6.795e-06, "loss": 0.5083, "step": 1361 }, { "epoch": 0.07626833911972225, "grad_norm": 1.340144395828247, "learning_rate": 6.800000000000001e-06, "loss": 0.4426, "step": 1362 }, { "epoch": 0.07632433643185127, "grad_norm": 1.565130352973938, "learning_rate": 6.805e-06, "loss": 0.4845, "step": 1363 }, { "epoch": 0.07638033374398029, "grad_norm": 2.765289306640625, "learning_rate": 6.81e-06, "loss": 0.5165, "step": 1364 }, { "epoch": 0.07643633105610931, "grad_norm": 1.2596156597137451, "learning_rate": 6.815000000000001e-06, "loss": 0.619, "step": 1365 }, { "epoch": 0.07649232836823833, "grad_norm": 1.4560434818267822, "learning_rate": 6.82e-06, "loss": 0.4165, "step": 1366 }, { "epoch": 0.07654832568036735, "grad_norm": 1.0805444717407227, "learning_rate": 6.825000000000001e-06, "loss": 0.4847, "step": 1367 }, { "epoch": 0.07660432299249637, "grad_norm": 1.2908236980438232, "learning_rate": 6.830000000000001e-06, "loss": 0.4831, "step": 1368 }, { "epoch": 0.07666032030462538, "grad_norm": 1.1790556907653809, "learning_rate": 6.835e-06, "loss": 0.4685, "step": 1369 }, { "epoch": 0.07671631761675439, "grad_norm": 1.2401238679885864, "learning_rate": 6.840000000000001e-06, "loss": 0.4726, "step": 1370 }, { "epoch": 0.07677231492888341, "grad_norm": 1.1133661270141602, "learning_rate": 6.845e-06, "loss": 0.3749, "step": 1371 }, { "epoch": 0.07682831224101243, "grad_norm": 1.0683152675628662, "learning_rate": 6.8500000000000005e-06, "loss": 0.3595, "step": 1372 }, { "epoch": 0.07688430955314145, "grad_norm": 1.19899582862854, "learning_rate": 6.8550000000000004e-06, "loss": 0.4688, "step": 1373 }, { "epoch": 0.07694030686527047, "grad_norm": 1.1337486505508423, "learning_rate": 6.8599999999999995e-06, "loss": 0.454, "step": 1374 }, { "epoch": 0.07699630417739949, "grad_norm": 1.4259730577468872, "learning_rate": 6.865e-06, "loss": 0.5977, "step": 1375 }, { "epoch": 0.0770523014895285, "grad_norm": 0.9646322727203369, "learning_rate": 6.87e-06, "loss": 0.4321, "step": 1376 }, { "epoch": 0.07710829880165752, "grad_norm": 1.3179805278778076, "learning_rate": 6.875000000000001e-06, "loss": 0.4031, "step": 1377 }, { "epoch": 0.07716429611378654, "grad_norm": 1.463568925857544, "learning_rate": 6.88e-06, "loss": 0.5436, "step": 1378 }, { "epoch": 0.07722029342591556, "grad_norm": 1.2384649515151978, "learning_rate": 6.885e-06, "loss": 0.4111, "step": 1379 }, { "epoch": 0.07727629073804457, "grad_norm": 1.4996534585952759, "learning_rate": 6.890000000000001e-06, "loss": 0.4076, "step": 1380 }, { "epoch": 0.07733228805017359, "grad_norm": 1.232329249382019, "learning_rate": 6.895e-06, "loss": 0.4954, "step": 1381 }, { "epoch": 0.0773882853623026, "grad_norm": 1.4218552112579346, "learning_rate": 6.900000000000001e-06, "loss": 0.5603, "step": 1382 }, { "epoch": 0.07744428267443162, "grad_norm": 1.2759547233581543, "learning_rate": 6.905e-06, "loss": 0.5696, "step": 1383 }, { "epoch": 0.07750027998656064, "grad_norm": 1.9820356369018555, "learning_rate": 6.91e-06, "loss": 0.3646, "step": 1384 }, { "epoch": 0.07755627729868966, "grad_norm": 1.2480307817459106, "learning_rate": 6.915000000000001e-06, "loss": 0.4816, "step": 1385 }, { "epoch": 0.07761227461081868, "grad_norm": 1.3854848146438599, "learning_rate": 6.92e-06, "loss": 0.4986, "step": 1386 }, { "epoch": 0.0776682719229477, "grad_norm": 1.5433881282806396, "learning_rate": 6.925000000000001e-06, "loss": 0.4568, "step": 1387 }, { "epoch": 0.07772426923507672, "grad_norm": 1.2369163036346436, "learning_rate": 6.9300000000000006e-06, "loss": 0.5573, "step": 1388 }, { "epoch": 0.07778026654720574, "grad_norm": 1.256779432296753, "learning_rate": 6.935e-06, "loss": 0.436, "step": 1389 }, { "epoch": 0.07783626385933475, "grad_norm": 1.3208945989608765, "learning_rate": 6.9400000000000005e-06, "loss": 0.5535, "step": 1390 }, { "epoch": 0.07789226117146376, "grad_norm": 1.238890290260315, "learning_rate": 6.945e-06, "loss": 0.4379, "step": 1391 }, { "epoch": 0.07794825848359278, "grad_norm": 1.3645505905151367, "learning_rate": 6.950000000000001e-06, "loss": 0.6061, "step": 1392 }, { "epoch": 0.0780042557957218, "grad_norm": 1.1675653457641602, "learning_rate": 6.955e-06, "loss": 0.3521, "step": 1393 }, { "epoch": 0.07806025310785082, "grad_norm": 1.2153840065002441, "learning_rate": 6.9599999999999994e-06, "loss": 0.4654, "step": 1394 }, { "epoch": 0.07811625041997984, "grad_norm": 1.3639699220657349, "learning_rate": 6.965000000000001e-06, "loss": 0.497, "step": 1395 }, { "epoch": 0.07817224773210886, "grad_norm": 1.2933604717254639, "learning_rate": 6.97e-06, "loss": 0.6267, "step": 1396 }, { "epoch": 0.07822824504423788, "grad_norm": 1.2269926071166992, "learning_rate": 6.975000000000001e-06, "loss": 0.4726, "step": 1397 }, { "epoch": 0.0782842423563669, "grad_norm": 1.5721561908721924, "learning_rate": 6.98e-06, "loss": 0.4109, "step": 1398 }, { "epoch": 0.07834023966849592, "grad_norm": 1.0359388589859009, "learning_rate": 6.985e-06, "loss": 0.4389, "step": 1399 }, { "epoch": 0.07839623698062494, "grad_norm": 4.911037445068359, "learning_rate": 6.990000000000001e-06, "loss": 0.45, "step": 1400 }, { "epoch": 0.07845223429275394, "grad_norm": 1.179355263710022, "learning_rate": 6.995e-06, "loss": 0.4637, "step": 1401 }, { "epoch": 0.07850823160488296, "grad_norm": 1.384724736213684, "learning_rate": 7.000000000000001e-06, "loss": 0.5596, "step": 1402 }, { "epoch": 0.07856422891701198, "grad_norm": 1.3643035888671875, "learning_rate": 7.005000000000001e-06, "loss": 0.5343, "step": 1403 }, { "epoch": 0.078620226229141, "grad_norm": 1.1445242166519165, "learning_rate": 7.01e-06, "loss": 0.5881, "step": 1404 }, { "epoch": 0.07867622354127002, "grad_norm": 1.1189980506896973, "learning_rate": 7.015000000000001e-06, "loss": 0.379, "step": 1405 }, { "epoch": 0.07873222085339904, "grad_norm": 1.3632959127426147, "learning_rate": 7.0200000000000006e-06, "loss": 0.4533, "step": 1406 }, { "epoch": 0.07878821816552806, "grad_norm": 1.0132023096084595, "learning_rate": 7.025000000000001e-06, "loss": 0.4028, "step": 1407 }, { "epoch": 0.07884421547765708, "grad_norm": 1.574580192565918, "learning_rate": 7.0300000000000005e-06, "loss": 0.7049, "step": 1408 }, { "epoch": 0.0789002127897861, "grad_norm": 1.207859754562378, "learning_rate": 7.0349999999999996e-06, "loss": 0.3993, "step": 1409 }, { "epoch": 0.07895621010191511, "grad_norm": 1.1706397533416748, "learning_rate": 7.04e-06, "loss": 0.3902, "step": 1410 }, { "epoch": 0.07901220741404412, "grad_norm": 1.36715829372406, "learning_rate": 7.045e-06, "loss": 0.5922, "step": 1411 }, { "epoch": 0.07906820472617314, "grad_norm": 3.39699649810791, "learning_rate": 7.049999999999999e-06, "loss": 0.4428, "step": 1412 }, { "epoch": 0.07912420203830216, "grad_norm": 1.1224071979522705, "learning_rate": 7.055e-06, "loss": 0.4317, "step": 1413 }, { "epoch": 0.07918019935043118, "grad_norm": 1.2464991807937622, "learning_rate": 7.06e-06, "loss": 0.4403, "step": 1414 }, { "epoch": 0.0792361966625602, "grad_norm": 1.354768991470337, "learning_rate": 7.065000000000001e-06, "loss": 0.4857, "step": 1415 }, { "epoch": 0.07929219397468922, "grad_norm": 1.5229969024658203, "learning_rate": 7.07e-06, "loss": 0.5088, "step": 1416 }, { "epoch": 0.07934819128681823, "grad_norm": 1.3952661752700806, "learning_rate": 7.075e-06, "loss": 0.6225, "step": 1417 }, { "epoch": 0.07940418859894725, "grad_norm": 1.2597805261611938, "learning_rate": 7.080000000000001e-06, "loss": 0.3716, "step": 1418 }, { "epoch": 0.07946018591107627, "grad_norm": 1.479173183441162, "learning_rate": 7.085e-06, "loss": 0.6558, "step": 1419 }, { "epoch": 0.07951618322320529, "grad_norm": 1.175714135169983, "learning_rate": 7.090000000000001e-06, "loss": 0.6377, "step": 1420 }, { "epoch": 0.0795721805353343, "grad_norm": 1.1564451456069946, "learning_rate": 7.095000000000001e-06, "loss": 0.5012, "step": 1421 }, { "epoch": 0.07962817784746332, "grad_norm": 1.0812937021255493, "learning_rate": 7.1e-06, "loss": 0.4637, "step": 1422 }, { "epoch": 0.07968417515959234, "grad_norm": 1.143661379814148, "learning_rate": 7.105000000000001e-06, "loss": 0.3334, "step": 1423 }, { "epoch": 0.07974017247172135, "grad_norm": 1.3311735391616821, "learning_rate": 7.11e-06, "loss": 0.4989, "step": 1424 }, { "epoch": 0.07979616978385037, "grad_norm": 1.2069625854492188, "learning_rate": 7.1150000000000005e-06, "loss": 0.3392, "step": 1425 }, { "epoch": 0.07985216709597939, "grad_norm": 1.180067539215088, "learning_rate": 7.1200000000000004e-06, "loss": 0.3681, "step": 1426 }, { "epoch": 0.07990816440810841, "grad_norm": 1.2177414894104004, "learning_rate": 7.1249999999999995e-06, "loss": 0.4858, "step": 1427 }, { "epoch": 0.07996416172023743, "grad_norm": 1.3053133487701416, "learning_rate": 7.13e-06, "loss": 0.4826, "step": 1428 }, { "epoch": 0.08002015903236645, "grad_norm": 1.080035924911499, "learning_rate": 7.135e-06, "loss": 0.3143, "step": 1429 }, { "epoch": 0.08007615634449547, "grad_norm": 1.509613275527954, "learning_rate": 7.140000000000001e-06, "loss": 0.4389, "step": 1430 }, { "epoch": 0.08013215365662449, "grad_norm": 1.1446751356124878, "learning_rate": 7.145e-06, "loss": 0.4379, "step": 1431 }, { "epoch": 0.0801881509687535, "grad_norm": 1.4199235439300537, "learning_rate": 7.15e-06, "loss": 0.4777, "step": 1432 }, { "epoch": 0.08024414828088251, "grad_norm": 1.5133382081985474, "learning_rate": 7.155000000000001e-06, "loss": 0.6484, "step": 1433 }, { "epoch": 0.08030014559301153, "grad_norm": 1.3345826864242554, "learning_rate": 7.16e-06, "loss": 0.4601, "step": 1434 }, { "epoch": 0.08035614290514055, "grad_norm": 1.524214267730713, "learning_rate": 7.165000000000001e-06, "loss": 0.4004, "step": 1435 }, { "epoch": 0.08041214021726957, "grad_norm": 1.2183082103729248, "learning_rate": 7.17e-06, "loss": 0.4514, "step": 1436 }, { "epoch": 0.08046813752939859, "grad_norm": 1.2108083963394165, "learning_rate": 7.175e-06, "loss": 0.4932, "step": 1437 }, { "epoch": 0.08052413484152761, "grad_norm": 1.309736728668213, "learning_rate": 7.180000000000001e-06, "loss": 0.3938, "step": 1438 }, { "epoch": 0.08058013215365663, "grad_norm": 0.9972428679466248, "learning_rate": 7.185e-06, "loss": 0.322, "step": 1439 }, { "epoch": 0.08063612946578565, "grad_norm": 1.2740402221679688, "learning_rate": 7.190000000000001e-06, "loss": 0.531, "step": 1440 }, { "epoch": 0.08069212677791467, "grad_norm": 1.4080537557601929, "learning_rate": 7.1950000000000006e-06, "loss": 0.5996, "step": 1441 }, { "epoch": 0.08074812409004367, "grad_norm": 1.7083243131637573, "learning_rate": 7.2e-06, "loss": 0.6026, "step": 1442 }, { "epoch": 0.08080412140217269, "grad_norm": 1.209126591682434, "learning_rate": 7.2050000000000005e-06, "loss": 0.4386, "step": 1443 }, { "epoch": 0.08086011871430171, "grad_norm": 1.3964226245880127, "learning_rate": 7.2100000000000004e-06, "loss": 0.4701, "step": 1444 }, { "epoch": 0.08091611602643073, "grad_norm": 1.3449779748916626, "learning_rate": 7.215000000000001e-06, "loss": 0.4253, "step": 1445 }, { "epoch": 0.08097211333855975, "grad_norm": 1.313974380493164, "learning_rate": 7.22e-06, "loss": 0.4444, "step": 1446 }, { "epoch": 0.08102811065068877, "grad_norm": 1.2853071689605713, "learning_rate": 7.2249999999999994e-06, "loss": 0.5527, "step": 1447 }, { "epoch": 0.08108410796281779, "grad_norm": 1.1168649196624756, "learning_rate": 7.230000000000001e-06, "loss": 0.4293, "step": 1448 }, { "epoch": 0.0811401052749468, "grad_norm": 1.1112141609191895, "learning_rate": 7.235e-06, "loss": 0.5183, "step": 1449 }, { "epoch": 0.08119610258707582, "grad_norm": 1.6637715101242065, "learning_rate": 7.240000000000001e-06, "loss": 0.4883, "step": 1450 }, { "epoch": 0.08125209989920484, "grad_norm": 1.3402663469314575, "learning_rate": 7.245e-06, "loss": 0.3671, "step": 1451 }, { "epoch": 0.08130809721133385, "grad_norm": 1.248765230178833, "learning_rate": 7.25e-06, "loss": 0.3786, "step": 1452 }, { "epoch": 0.08136409452346287, "grad_norm": 1.4092042446136475, "learning_rate": 7.255000000000001e-06, "loss": 0.4625, "step": 1453 }, { "epoch": 0.08142009183559189, "grad_norm": 1.480547547340393, "learning_rate": 7.26e-06, "loss": 0.6167, "step": 1454 }, { "epoch": 0.0814760891477209, "grad_norm": 1.3180210590362549, "learning_rate": 7.265000000000001e-06, "loss": 0.4995, "step": 1455 }, { "epoch": 0.08153208645984993, "grad_norm": 1.1281007528305054, "learning_rate": 7.270000000000001e-06, "loss": 0.4142, "step": 1456 }, { "epoch": 0.08158808377197894, "grad_norm": 1.519881248474121, "learning_rate": 7.275e-06, "loss": 0.5344, "step": 1457 }, { "epoch": 0.08164408108410796, "grad_norm": 1.4132120609283447, "learning_rate": 7.280000000000001e-06, "loss": 0.5994, "step": 1458 }, { "epoch": 0.08170007839623698, "grad_norm": 1.0918807983398438, "learning_rate": 7.2850000000000006e-06, "loss": 0.3358, "step": 1459 }, { "epoch": 0.081756075708366, "grad_norm": 1.292568325996399, "learning_rate": 7.290000000000001e-06, "loss": 0.4402, "step": 1460 }, { "epoch": 0.08181207302049502, "grad_norm": 1.1813576221466064, "learning_rate": 7.2950000000000005e-06, "loss": 0.4083, "step": 1461 }, { "epoch": 0.08186807033262404, "grad_norm": 1.139785647392273, "learning_rate": 7.2999999999999996e-06, "loss": 0.5056, "step": 1462 }, { "epoch": 0.08192406764475305, "grad_norm": 1.500008225440979, "learning_rate": 7.305e-06, "loss": 0.4989, "step": 1463 }, { "epoch": 0.08198006495688206, "grad_norm": 1.4263228178024292, "learning_rate": 7.31e-06, "loss": 0.4714, "step": 1464 }, { "epoch": 0.08203606226901108, "grad_norm": 1.2919131517410278, "learning_rate": 7.315000000000001e-06, "loss": 0.4517, "step": 1465 }, { "epoch": 0.0820920595811401, "grad_norm": 1.3762054443359375, "learning_rate": 7.32e-06, "loss": 0.4877, "step": 1466 }, { "epoch": 0.08214805689326912, "grad_norm": 1.3548452854156494, "learning_rate": 7.325e-06, "loss": 0.4628, "step": 1467 }, { "epoch": 0.08220405420539814, "grad_norm": 1.3240776062011719, "learning_rate": 7.330000000000001e-06, "loss": 0.5224, "step": 1468 }, { "epoch": 0.08226005151752716, "grad_norm": 1.2442258596420288, "learning_rate": 7.335e-06, "loss": 0.4429, "step": 1469 }, { "epoch": 0.08231604882965618, "grad_norm": 1.2015458345413208, "learning_rate": 7.340000000000001e-06, "loss": 0.4401, "step": 1470 }, { "epoch": 0.0823720461417852, "grad_norm": 1.0796226263046265, "learning_rate": 7.345000000000001e-06, "loss": 0.3873, "step": 1471 }, { "epoch": 0.08242804345391422, "grad_norm": 1.2062454223632812, "learning_rate": 7.35e-06, "loss": 0.4338, "step": 1472 }, { "epoch": 0.08248404076604322, "grad_norm": 1.2619911432266235, "learning_rate": 7.355000000000001e-06, "loss": 0.4712, "step": 1473 }, { "epoch": 0.08254003807817224, "grad_norm": 1.6455378532409668, "learning_rate": 7.36e-06, "loss": 0.4193, "step": 1474 }, { "epoch": 0.08259603539030126, "grad_norm": 1.209715485572815, "learning_rate": 7.365e-06, "loss": 0.5695, "step": 1475 }, { "epoch": 0.08265203270243028, "grad_norm": 1.3279260396957397, "learning_rate": 7.370000000000001e-06, "loss": 0.5192, "step": 1476 }, { "epoch": 0.0827080300145593, "grad_norm": 1.1530653238296509, "learning_rate": 7.375e-06, "loss": 0.3153, "step": 1477 }, { "epoch": 0.08276402732668832, "grad_norm": 1.9557344913482666, "learning_rate": 7.3800000000000005e-06, "loss": 0.6641, "step": 1478 }, { "epoch": 0.08282002463881734, "grad_norm": 1.6863083839416504, "learning_rate": 7.3850000000000004e-06, "loss": 0.566, "step": 1479 }, { "epoch": 0.08287602195094636, "grad_norm": 1.282794713973999, "learning_rate": 7.3899999999999995e-06, "loss": 0.5705, "step": 1480 }, { "epoch": 0.08293201926307538, "grad_norm": 1.4731370210647583, "learning_rate": 7.395e-06, "loss": 0.5086, "step": 1481 }, { "epoch": 0.0829880165752044, "grad_norm": 1.4147073030471802, "learning_rate": 7.4e-06, "loss": 0.5286, "step": 1482 }, { "epoch": 0.0830440138873334, "grad_norm": 1.2918366193771362, "learning_rate": 7.405000000000001e-06, "loss": 0.3988, "step": 1483 }, { "epoch": 0.08310001119946242, "grad_norm": 0.9884164929389954, "learning_rate": 7.41e-06, "loss": 0.4509, "step": 1484 }, { "epoch": 0.08315600851159144, "grad_norm": 1.1498831510543823, "learning_rate": 7.414999999999999e-06, "loss": 0.3887, "step": 1485 }, { "epoch": 0.08321200582372046, "grad_norm": 1.2837716341018677, "learning_rate": 7.420000000000001e-06, "loss": 0.52, "step": 1486 }, { "epoch": 0.08326800313584948, "grad_norm": 1.303924560546875, "learning_rate": 7.425e-06, "loss": 0.5503, "step": 1487 }, { "epoch": 0.0833240004479785, "grad_norm": 1.507111668586731, "learning_rate": 7.430000000000001e-06, "loss": 0.6404, "step": 1488 }, { "epoch": 0.08337999776010752, "grad_norm": 1.2963229417800903, "learning_rate": 7.435e-06, "loss": 0.5778, "step": 1489 }, { "epoch": 0.08343599507223654, "grad_norm": 1.512855052947998, "learning_rate": 7.44e-06, "loss": 0.5745, "step": 1490 }, { "epoch": 0.08349199238436555, "grad_norm": 1.5575251579284668, "learning_rate": 7.445000000000001e-06, "loss": 0.4233, "step": 1491 }, { "epoch": 0.08354798969649457, "grad_norm": 1.0272166728973389, "learning_rate": 7.45e-06, "loss": 0.3971, "step": 1492 }, { "epoch": 0.08360398700862359, "grad_norm": 1.458645224571228, "learning_rate": 7.455000000000001e-06, "loss": 0.5315, "step": 1493 }, { "epoch": 0.0836599843207526, "grad_norm": 1.2564878463745117, "learning_rate": 7.4600000000000006e-06, "loss": 0.498, "step": 1494 }, { "epoch": 0.08371598163288162, "grad_norm": 1.3385682106018066, "learning_rate": 7.465e-06, "loss": 0.6363, "step": 1495 }, { "epoch": 0.08377197894501064, "grad_norm": 1.3317980766296387, "learning_rate": 7.4700000000000005e-06, "loss": 0.4346, "step": 1496 }, { "epoch": 0.08382797625713966, "grad_norm": 1.1899913549423218, "learning_rate": 7.4750000000000004e-06, "loss": 0.5111, "step": 1497 }, { "epoch": 0.08388397356926867, "grad_norm": 1.3336282968521118, "learning_rate": 7.480000000000001e-06, "loss": 0.5527, "step": 1498 }, { "epoch": 0.0839399708813977, "grad_norm": 1.2331520318984985, "learning_rate": 7.485e-06, "loss": 0.4771, "step": 1499 }, { "epoch": 0.08399596819352671, "grad_norm": 1.0488063097000122, "learning_rate": 7.4899999999999994e-06, "loss": 0.4086, "step": 1500 }, { "epoch": 0.08405196550565573, "grad_norm": 1.606632947921753, "learning_rate": 7.495e-06, "loss": 0.5363, "step": 1501 }, { "epoch": 0.08410796281778475, "grad_norm": 1.2820321321487427, "learning_rate": 7.5e-06, "loss": 0.4214, "step": 1502 }, { "epoch": 0.08416396012991377, "grad_norm": 1.3019225597381592, "learning_rate": 7.505000000000001e-06, "loss": 0.5302, "step": 1503 }, { "epoch": 0.08421995744204278, "grad_norm": 1.2436977624893188, "learning_rate": 7.51e-06, "loss": 0.555, "step": 1504 }, { "epoch": 0.0842759547541718, "grad_norm": 1.3431636095046997, "learning_rate": 7.515e-06, "loss": 0.4918, "step": 1505 }, { "epoch": 0.08433195206630081, "grad_norm": 1.1182070970535278, "learning_rate": 7.520000000000001e-06, "loss": 0.4375, "step": 1506 }, { "epoch": 0.08438794937842983, "grad_norm": 1.193709135055542, "learning_rate": 7.525e-06, "loss": 0.3783, "step": 1507 }, { "epoch": 0.08444394669055885, "grad_norm": 1.1308382749557495, "learning_rate": 7.530000000000001e-06, "loss": 0.4268, "step": 1508 }, { "epoch": 0.08449994400268787, "grad_norm": 0.9366526007652283, "learning_rate": 7.535000000000001e-06, "loss": 0.4236, "step": 1509 }, { "epoch": 0.08455594131481689, "grad_norm": 1.1923974752426147, "learning_rate": 7.54e-06, "loss": 0.3839, "step": 1510 }, { "epoch": 0.08461193862694591, "grad_norm": 1.1978589296340942, "learning_rate": 7.545000000000001e-06, "loss": 0.3851, "step": 1511 }, { "epoch": 0.08466793593907493, "grad_norm": 1.1501808166503906, "learning_rate": 7.55e-06, "loss": 0.4263, "step": 1512 }, { "epoch": 0.08472393325120395, "grad_norm": 1.415138840675354, "learning_rate": 7.555000000000001e-06, "loss": 0.5748, "step": 1513 }, { "epoch": 0.08477993056333297, "grad_norm": 1.2347288131713867, "learning_rate": 7.5600000000000005e-06, "loss": 0.4694, "step": 1514 }, { "epoch": 0.08483592787546197, "grad_norm": 1.1840616464614868, "learning_rate": 7.5649999999999996e-06, "loss": 0.5046, "step": 1515 }, { "epoch": 0.08489192518759099, "grad_norm": 1.046579122543335, "learning_rate": 7.57e-06, "loss": 0.4277, "step": 1516 }, { "epoch": 0.08494792249972001, "grad_norm": 1.0324609279632568, "learning_rate": 7.575e-06, "loss": 0.3577, "step": 1517 }, { "epoch": 0.08500391981184903, "grad_norm": 1.2944605350494385, "learning_rate": 7.580000000000001e-06, "loss": 0.6079, "step": 1518 }, { "epoch": 0.08505991712397805, "grad_norm": 1.3902816772460938, "learning_rate": 7.585e-06, "loss": 0.4648, "step": 1519 }, { "epoch": 0.08511591443610707, "grad_norm": 1.3388081789016724, "learning_rate": 7.59e-06, "loss": 0.4821, "step": 1520 }, { "epoch": 0.08517191174823609, "grad_norm": 1.249360203742981, "learning_rate": 7.595000000000001e-06, "loss": 0.4765, "step": 1521 }, { "epoch": 0.0852279090603651, "grad_norm": 1.153415560722351, "learning_rate": 7.6e-06, "loss": 0.3804, "step": 1522 }, { "epoch": 0.08528390637249413, "grad_norm": 1.3876703977584839, "learning_rate": 7.605000000000001e-06, "loss": 0.6177, "step": 1523 }, { "epoch": 0.08533990368462314, "grad_norm": 1.1727885007858276, "learning_rate": 7.610000000000001e-06, "loss": 0.4695, "step": 1524 }, { "epoch": 0.08539590099675215, "grad_norm": 1.174000859260559, "learning_rate": 7.615e-06, "loss": 0.4969, "step": 1525 }, { "epoch": 0.08545189830888117, "grad_norm": 1.5841765403747559, "learning_rate": 7.620000000000001e-06, "loss": 0.3613, "step": 1526 }, { "epoch": 0.08550789562101019, "grad_norm": 1.0650402307510376, "learning_rate": 7.625e-06, "loss": 0.4859, "step": 1527 }, { "epoch": 0.08556389293313921, "grad_norm": 1.0996922254562378, "learning_rate": 7.630000000000001e-06, "loss": 0.4277, "step": 1528 }, { "epoch": 0.08561989024526823, "grad_norm": 1.2042404413223267, "learning_rate": 7.635e-06, "loss": 0.4736, "step": 1529 }, { "epoch": 0.08567588755739725, "grad_norm": 1.3029634952545166, "learning_rate": 7.64e-06, "loss": 0.4859, "step": 1530 }, { "epoch": 0.08573188486952626, "grad_norm": 1.5617334842681885, "learning_rate": 7.645e-06, "loss": 0.5699, "step": 1531 }, { "epoch": 0.08578788218165528, "grad_norm": 1.1617907285690308, "learning_rate": 7.65e-06, "loss": 0.4353, "step": 1532 }, { "epoch": 0.0858438794937843, "grad_norm": 1.383780837059021, "learning_rate": 7.655e-06, "loss": 0.5618, "step": 1533 }, { "epoch": 0.08589987680591332, "grad_norm": 1.1526728868484497, "learning_rate": 7.660000000000001e-06, "loss": 0.4117, "step": 1534 }, { "epoch": 0.08595587411804233, "grad_norm": 1.1477508544921875, "learning_rate": 7.665e-06, "loss": 0.3695, "step": 1535 }, { "epoch": 0.08601187143017135, "grad_norm": 1.674328327178955, "learning_rate": 7.670000000000001e-06, "loss": 0.4579, "step": 1536 }, { "epoch": 0.08606786874230037, "grad_norm": 1.103920817375183, "learning_rate": 7.675e-06, "loss": 0.4256, "step": 1537 }, { "epoch": 0.08612386605442938, "grad_norm": 1.1993048191070557, "learning_rate": 7.68e-06, "loss": 0.4485, "step": 1538 }, { "epoch": 0.0861798633665584, "grad_norm": 1.079773187637329, "learning_rate": 7.685e-06, "loss": 0.3587, "step": 1539 }, { "epoch": 0.08623586067868742, "grad_norm": 1.1587178707122803, "learning_rate": 7.69e-06, "loss": 0.4912, "step": 1540 }, { "epoch": 0.08629185799081644, "grad_norm": 1.2015721797943115, "learning_rate": 7.695e-06, "loss": 0.5274, "step": 1541 }, { "epoch": 0.08634785530294546, "grad_norm": 1.0844711065292358, "learning_rate": 7.7e-06, "loss": 0.3939, "step": 1542 }, { "epoch": 0.08640385261507448, "grad_norm": 1.1569340229034424, "learning_rate": 7.705e-06, "loss": 0.5162, "step": 1543 }, { "epoch": 0.0864598499272035, "grad_norm": 1.127315640449524, "learning_rate": 7.71e-06, "loss": 0.4135, "step": 1544 }, { "epoch": 0.08651584723933252, "grad_norm": 1.184778094291687, "learning_rate": 7.715e-06, "loss": 0.477, "step": 1545 }, { "epoch": 0.08657184455146152, "grad_norm": 1.3671724796295166, "learning_rate": 7.72e-06, "loss": 0.4705, "step": 1546 }, { "epoch": 0.08662784186359054, "grad_norm": 1.2034422159194946, "learning_rate": 7.725e-06, "loss": 0.4117, "step": 1547 }, { "epoch": 0.08668383917571956, "grad_norm": 2.615835428237915, "learning_rate": 7.73e-06, "loss": 0.4523, "step": 1548 }, { "epoch": 0.08673983648784858, "grad_norm": 1.4498186111450195, "learning_rate": 7.735000000000001e-06, "loss": 0.4638, "step": 1549 }, { "epoch": 0.0867958337999776, "grad_norm": 1.169029951095581, "learning_rate": 7.74e-06, "loss": 0.3688, "step": 1550 }, { "epoch": 0.08685183111210662, "grad_norm": 1.2343640327453613, "learning_rate": 7.745000000000001e-06, "loss": 0.476, "step": 1551 }, { "epoch": 0.08690782842423564, "grad_norm": 1.3974204063415527, "learning_rate": 7.75e-06, "loss": 0.422, "step": 1552 }, { "epoch": 0.08696382573636466, "grad_norm": 1.21696937084198, "learning_rate": 7.755e-06, "loss": 0.4368, "step": 1553 }, { "epoch": 0.08701982304849368, "grad_norm": 1.798683524131775, "learning_rate": 7.76e-06, "loss": 0.4386, "step": 1554 }, { "epoch": 0.0870758203606227, "grad_norm": 1.2375297546386719, "learning_rate": 7.765e-06, "loss": 0.6109, "step": 1555 }, { "epoch": 0.0871318176727517, "grad_norm": 1.1786304712295532, "learning_rate": 7.77e-06, "loss": 0.4902, "step": 1556 }, { "epoch": 0.08718781498488072, "grad_norm": 1.4570947885513306, "learning_rate": 7.775000000000001e-06, "loss": 0.7328, "step": 1557 }, { "epoch": 0.08724381229700974, "grad_norm": 1.12009859085083, "learning_rate": 7.78e-06, "loss": 0.4181, "step": 1558 }, { "epoch": 0.08729980960913876, "grad_norm": 1.328429102897644, "learning_rate": 7.785000000000001e-06, "loss": 0.7427, "step": 1559 }, { "epoch": 0.08735580692126778, "grad_norm": 1.4115002155303955, "learning_rate": 7.79e-06, "loss": 0.4477, "step": 1560 }, { "epoch": 0.0874118042333968, "grad_norm": 1.53958261013031, "learning_rate": 7.795e-06, "loss": 0.5461, "step": 1561 }, { "epoch": 0.08746780154552582, "grad_norm": 2.6513330936431885, "learning_rate": 7.8e-06, "loss": 0.5128, "step": 1562 }, { "epoch": 0.08752379885765484, "grad_norm": 1.325663685798645, "learning_rate": 7.805e-06, "loss": 0.5435, "step": 1563 }, { "epoch": 0.08757979616978386, "grad_norm": 0.9753627777099609, "learning_rate": 7.810000000000001e-06, "loss": 0.3439, "step": 1564 }, { "epoch": 0.08763579348191287, "grad_norm": 1.3109617233276367, "learning_rate": 7.815e-06, "loss": 0.4922, "step": 1565 }, { "epoch": 0.08769179079404188, "grad_norm": 1.272173523902893, "learning_rate": 7.820000000000001e-06, "loss": 0.4498, "step": 1566 }, { "epoch": 0.0877477881061709, "grad_norm": 1.77103853225708, "learning_rate": 7.825e-06, "loss": 0.7537, "step": 1567 }, { "epoch": 0.08780378541829992, "grad_norm": 1.1595690250396729, "learning_rate": 7.83e-06, "loss": 0.4236, "step": 1568 }, { "epoch": 0.08785978273042894, "grad_norm": 1.303318738937378, "learning_rate": 7.835e-06, "loss": 0.4757, "step": 1569 }, { "epoch": 0.08791578004255796, "grad_norm": 1.357015609741211, "learning_rate": 7.84e-06, "loss": 0.659, "step": 1570 }, { "epoch": 0.08797177735468698, "grad_norm": 1.5197786092758179, "learning_rate": 7.845e-06, "loss": 0.5869, "step": 1571 }, { "epoch": 0.088027774666816, "grad_norm": 1.1078104972839355, "learning_rate": 7.850000000000001e-06, "loss": 0.3845, "step": 1572 }, { "epoch": 0.08808377197894501, "grad_norm": 1.304028868675232, "learning_rate": 7.855e-06, "loss": 0.4724, "step": 1573 }, { "epoch": 0.08813976929107403, "grad_norm": 1.3804655075073242, "learning_rate": 7.860000000000001e-06, "loss": 0.4343, "step": 1574 }, { "epoch": 0.08819576660320305, "grad_norm": 1.1329188346862793, "learning_rate": 7.865e-06, "loss": 0.4534, "step": 1575 }, { "epoch": 0.08825176391533207, "grad_norm": 1.2403630018234253, "learning_rate": 7.870000000000001e-06, "loss": 0.4721, "step": 1576 }, { "epoch": 0.08830776122746108, "grad_norm": 1.1998299360275269, "learning_rate": 7.875e-06, "loss": 0.6336, "step": 1577 }, { "epoch": 0.0883637585395901, "grad_norm": 1.285543441772461, "learning_rate": 7.879999999999999e-06, "loss": 0.5604, "step": 1578 }, { "epoch": 0.08841975585171911, "grad_norm": 1.1370717287063599, "learning_rate": 7.885e-06, "loss": 0.3956, "step": 1579 }, { "epoch": 0.08847575316384813, "grad_norm": 1.230810284614563, "learning_rate": 7.89e-06, "loss": 0.5079, "step": 1580 }, { "epoch": 0.08853175047597715, "grad_norm": 2.2659618854522705, "learning_rate": 7.895000000000001e-06, "loss": 0.5141, "step": 1581 }, { "epoch": 0.08858774778810617, "grad_norm": 1.1236450672149658, "learning_rate": 7.9e-06, "loss": 0.3948, "step": 1582 }, { "epoch": 0.08864374510023519, "grad_norm": 1.294722557067871, "learning_rate": 7.905e-06, "loss": 0.444, "step": 1583 }, { "epoch": 0.08869974241236421, "grad_norm": 1.1199108362197876, "learning_rate": 7.91e-06, "loss": 0.5846, "step": 1584 }, { "epoch": 0.08875573972449323, "grad_norm": 1.7658997774124146, "learning_rate": 7.915e-06, "loss": 0.649, "step": 1585 }, { "epoch": 0.08881173703662225, "grad_norm": 1.241074800491333, "learning_rate": 7.92e-06, "loss": 0.4549, "step": 1586 }, { "epoch": 0.08886773434875125, "grad_norm": 1.1961077451705933, "learning_rate": 7.925000000000001e-06, "loss": 0.4492, "step": 1587 }, { "epoch": 0.08892373166088027, "grad_norm": 1.2907549142837524, "learning_rate": 7.93e-06, "loss": 0.3696, "step": 1588 }, { "epoch": 0.08897972897300929, "grad_norm": 1.470672845840454, "learning_rate": 7.935000000000001e-06, "loss": 0.4875, "step": 1589 }, { "epoch": 0.08903572628513831, "grad_norm": 1.1282093524932861, "learning_rate": 7.94e-06, "loss": 0.348, "step": 1590 }, { "epoch": 0.08909172359726733, "grad_norm": 1.446938395500183, "learning_rate": 7.945000000000001e-06, "loss": 0.4021, "step": 1591 }, { "epoch": 0.08914772090939635, "grad_norm": 1.3912911415100098, "learning_rate": 7.95e-06, "loss": 0.6479, "step": 1592 }, { "epoch": 0.08920371822152537, "grad_norm": 1.2886724472045898, "learning_rate": 7.955e-06, "loss": 0.4321, "step": 1593 }, { "epoch": 0.08925971553365439, "grad_norm": 1.1775058507919312, "learning_rate": 7.96e-06, "loss": 0.5141, "step": 1594 }, { "epoch": 0.08931571284578341, "grad_norm": 1.33034348487854, "learning_rate": 7.965e-06, "loss": 0.4683, "step": 1595 }, { "epoch": 0.08937171015791243, "grad_norm": 1.1250934600830078, "learning_rate": 7.97e-06, "loss": 0.5636, "step": 1596 }, { "epoch": 0.08942770747004143, "grad_norm": 1.177208423614502, "learning_rate": 7.975e-06, "loss": 0.4972, "step": 1597 }, { "epoch": 0.08948370478217045, "grad_norm": 1.2766159772872925, "learning_rate": 7.98e-06, "loss": 0.4352, "step": 1598 }, { "epoch": 0.08953970209429947, "grad_norm": 1.3314900398254395, "learning_rate": 7.985e-06, "loss": 0.5088, "step": 1599 }, { "epoch": 0.08959569940642849, "grad_norm": 1.1538054943084717, "learning_rate": 7.99e-06, "loss": 0.4706, "step": 1600 }, { "epoch": 0.08965169671855751, "grad_norm": 1.2058525085449219, "learning_rate": 7.995e-06, "loss": 0.4178, "step": 1601 }, { "epoch": 0.08970769403068653, "grad_norm": 1.1523019075393677, "learning_rate": 8.000000000000001e-06, "loss": 0.5341, "step": 1602 }, { "epoch": 0.08976369134281555, "grad_norm": 1.2555052042007446, "learning_rate": 8.005e-06, "loss": 0.4499, "step": 1603 }, { "epoch": 0.08981968865494457, "grad_norm": 1.2619866132736206, "learning_rate": 8.010000000000001e-06, "loss": 0.4104, "step": 1604 }, { "epoch": 0.08987568596707358, "grad_norm": 1.5733071565628052, "learning_rate": 8.015e-06, "loss": 0.4929, "step": 1605 }, { "epoch": 0.0899316832792026, "grad_norm": 1.2147235870361328, "learning_rate": 8.02e-06, "loss": 0.5144, "step": 1606 }, { "epoch": 0.08998768059133162, "grad_norm": 1.2371256351470947, "learning_rate": 8.025e-06, "loss": 0.4795, "step": 1607 }, { "epoch": 0.09004367790346063, "grad_norm": 1.2636479139328003, "learning_rate": 8.03e-06, "loss": 0.4319, "step": 1608 }, { "epoch": 0.09009967521558965, "grad_norm": 1.4651589393615723, "learning_rate": 8.035e-06, "loss": 0.6121, "step": 1609 }, { "epoch": 0.09015567252771867, "grad_norm": 1.4587225914001465, "learning_rate": 8.040000000000001e-06, "loss": 0.3772, "step": 1610 }, { "epoch": 0.09021166983984769, "grad_norm": 1.1905932426452637, "learning_rate": 8.045e-06, "loss": 0.464, "step": 1611 }, { "epoch": 0.0902676671519767, "grad_norm": 1.5001569986343384, "learning_rate": 8.050000000000001e-06, "loss": 0.4785, "step": 1612 }, { "epoch": 0.09032366446410572, "grad_norm": 1.0893656015396118, "learning_rate": 8.055e-06, "loss": 0.4889, "step": 1613 }, { "epoch": 0.09037966177623474, "grad_norm": 1.2985970973968506, "learning_rate": 8.06e-06, "loss": 0.4723, "step": 1614 }, { "epoch": 0.09043565908836376, "grad_norm": 1.1527200937271118, "learning_rate": 8.065e-06, "loss": 0.4792, "step": 1615 }, { "epoch": 0.09049165640049278, "grad_norm": 1.253238558769226, "learning_rate": 8.069999999999999e-06, "loss": 0.4372, "step": 1616 }, { "epoch": 0.0905476537126218, "grad_norm": 1.2246673107147217, "learning_rate": 8.075000000000001e-06, "loss": 0.4444, "step": 1617 }, { "epoch": 0.0906036510247508, "grad_norm": 0.9724764823913574, "learning_rate": 8.08e-06, "loss": 0.426, "step": 1618 }, { "epoch": 0.09065964833687983, "grad_norm": 1.9928661584854126, "learning_rate": 8.085000000000001e-06, "loss": 0.5301, "step": 1619 }, { "epoch": 0.09071564564900884, "grad_norm": 1.286141276359558, "learning_rate": 8.09e-06, "loss": 0.5638, "step": 1620 }, { "epoch": 0.09077164296113786, "grad_norm": 1.0990967750549316, "learning_rate": 8.095e-06, "loss": 0.421, "step": 1621 }, { "epoch": 0.09082764027326688, "grad_norm": 1.2366746664047241, "learning_rate": 8.1e-06, "loss": 0.4869, "step": 1622 }, { "epoch": 0.0908836375853959, "grad_norm": 1.4013638496398926, "learning_rate": 8.105e-06, "loss": 0.4185, "step": 1623 }, { "epoch": 0.09093963489752492, "grad_norm": 1.1708909273147583, "learning_rate": 8.11e-06, "loss": 0.4643, "step": 1624 }, { "epoch": 0.09099563220965394, "grad_norm": 1.323397159576416, "learning_rate": 8.115000000000001e-06, "loss": 0.4741, "step": 1625 }, { "epoch": 0.09105162952178296, "grad_norm": 1.1295795440673828, "learning_rate": 8.12e-06, "loss": 0.4907, "step": 1626 }, { "epoch": 0.09110762683391198, "grad_norm": 1.4290260076522827, "learning_rate": 8.125000000000001e-06, "loss": 0.3986, "step": 1627 }, { "epoch": 0.09116362414604098, "grad_norm": 1.2899596691131592, "learning_rate": 8.13e-06, "loss": 0.4426, "step": 1628 }, { "epoch": 0.09121962145817, "grad_norm": 1.6422119140625, "learning_rate": 8.135000000000001e-06, "loss": 0.5782, "step": 1629 }, { "epoch": 0.09127561877029902, "grad_norm": 1.2225149869918823, "learning_rate": 8.14e-06, "loss": 0.4683, "step": 1630 }, { "epoch": 0.09133161608242804, "grad_norm": 1.1593647003173828, "learning_rate": 8.144999999999999e-06, "loss": 0.445, "step": 1631 }, { "epoch": 0.09138761339455706, "grad_norm": 1.1714057922363281, "learning_rate": 8.15e-06, "loss": 0.4355, "step": 1632 }, { "epoch": 0.09144361070668608, "grad_norm": 1.0047953128814697, "learning_rate": 8.155e-06, "loss": 0.4127, "step": 1633 }, { "epoch": 0.0914996080188151, "grad_norm": 1.2258692979812622, "learning_rate": 8.160000000000001e-06, "loss": 0.5346, "step": 1634 }, { "epoch": 0.09155560533094412, "grad_norm": 1.2532273530960083, "learning_rate": 8.165e-06, "loss": 0.4842, "step": 1635 }, { "epoch": 0.09161160264307314, "grad_norm": 1.0819934606552124, "learning_rate": 8.17e-06, "loss": 0.495, "step": 1636 }, { "epoch": 0.09166759995520216, "grad_norm": 1.4493647813796997, "learning_rate": 8.175e-06, "loss": 0.5362, "step": 1637 }, { "epoch": 0.09172359726733118, "grad_norm": 1.7464864253997803, "learning_rate": 8.18e-06, "loss": 0.4703, "step": 1638 }, { "epoch": 0.09177959457946018, "grad_norm": 1.2716431617736816, "learning_rate": 8.185e-06, "loss": 0.5344, "step": 1639 }, { "epoch": 0.0918355918915892, "grad_norm": 1.320268154144287, "learning_rate": 8.190000000000001e-06, "loss": 0.3806, "step": 1640 }, { "epoch": 0.09189158920371822, "grad_norm": 1.4997364282608032, "learning_rate": 8.195e-06, "loss": 0.4835, "step": 1641 }, { "epoch": 0.09194758651584724, "grad_norm": 1.2244665622711182, "learning_rate": 8.200000000000001e-06, "loss": 0.4452, "step": 1642 }, { "epoch": 0.09200358382797626, "grad_norm": 1.1460756063461304, "learning_rate": 8.205e-06, "loss": 0.4294, "step": 1643 }, { "epoch": 0.09205958114010528, "grad_norm": 1.1829346418380737, "learning_rate": 8.210000000000001e-06, "loss": 0.4801, "step": 1644 }, { "epoch": 0.0921155784522343, "grad_norm": 1.3419283628463745, "learning_rate": 8.215e-06, "loss": 0.5275, "step": 1645 }, { "epoch": 0.09217157576436331, "grad_norm": 1.430458426475525, "learning_rate": 8.22e-06, "loss": 0.5378, "step": 1646 }, { "epoch": 0.09222757307649233, "grad_norm": 1.3106675148010254, "learning_rate": 8.225e-06, "loss": 0.4741, "step": 1647 }, { "epoch": 0.09228357038862135, "grad_norm": 1.2035247087478638, "learning_rate": 8.23e-06, "loss": 0.5643, "step": 1648 }, { "epoch": 0.09233956770075036, "grad_norm": 1.4303395748138428, "learning_rate": 8.235000000000002e-06, "loss": 0.4945, "step": 1649 }, { "epoch": 0.09239556501287938, "grad_norm": 1.3392964601516724, "learning_rate": 8.24e-06, "loss": 0.3851, "step": 1650 }, { "epoch": 0.0924515623250084, "grad_norm": 1.2980291843414307, "learning_rate": 8.245e-06, "loss": 0.5338, "step": 1651 }, { "epoch": 0.09250755963713742, "grad_norm": 1.2684897184371948, "learning_rate": 8.25e-06, "loss": 0.4215, "step": 1652 }, { "epoch": 0.09256355694926643, "grad_norm": 1.3237746953964233, "learning_rate": 8.255e-06, "loss": 0.5577, "step": 1653 }, { "epoch": 0.09261955426139545, "grad_norm": 1.4197481870651245, "learning_rate": 8.26e-06, "loss": 0.4287, "step": 1654 }, { "epoch": 0.09267555157352447, "grad_norm": 1.2387175559997559, "learning_rate": 8.265000000000001e-06, "loss": 0.5273, "step": 1655 }, { "epoch": 0.09273154888565349, "grad_norm": 20.208898544311523, "learning_rate": 8.27e-06, "loss": 0.4626, "step": 1656 }, { "epoch": 0.09278754619778251, "grad_norm": 1.0856859683990479, "learning_rate": 8.275000000000001e-06, "loss": 0.4981, "step": 1657 }, { "epoch": 0.09284354350991153, "grad_norm": 1.3090143203735352, "learning_rate": 8.28e-06, "loss": 0.5757, "step": 1658 }, { "epoch": 0.09289954082204054, "grad_norm": 1.2446850538253784, "learning_rate": 8.285e-06, "loss": 0.4142, "step": 1659 }, { "epoch": 0.09295553813416955, "grad_norm": 1.3336029052734375, "learning_rate": 8.29e-06, "loss": 0.395, "step": 1660 }, { "epoch": 0.09301153544629857, "grad_norm": 1.2740191221237183, "learning_rate": 8.295e-06, "loss": 0.4564, "step": 1661 }, { "epoch": 0.09306753275842759, "grad_norm": 1.667121410369873, "learning_rate": 8.3e-06, "loss": 0.4583, "step": 1662 }, { "epoch": 0.09312353007055661, "grad_norm": 1.074404001235962, "learning_rate": 8.305000000000001e-06, "loss": 0.4812, "step": 1663 }, { "epoch": 0.09317952738268563, "grad_norm": 1.130149245262146, "learning_rate": 8.31e-06, "loss": 0.4388, "step": 1664 }, { "epoch": 0.09323552469481465, "grad_norm": 1.5548359155654907, "learning_rate": 8.315000000000001e-06, "loss": 0.4344, "step": 1665 }, { "epoch": 0.09329152200694367, "grad_norm": 2.0083305835723877, "learning_rate": 8.32e-06, "loss": 0.4691, "step": 1666 }, { "epoch": 0.09334751931907269, "grad_norm": 1.2877039909362793, "learning_rate": 8.325e-06, "loss": 0.6106, "step": 1667 }, { "epoch": 0.09340351663120171, "grad_norm": 1.2804948091506958, "learning_rate": 8.33e-06, "loss": 0.4049, "step": 1668 }, { "epoch": 0.09345951394333073, "grad_norm": 1.088975429534912, "learning_rate": 8.334999999999999e-06, "loss": 0.3796, "step": 1669 }, { "epoch": 0.09351551125545973, "grad_norm": 1.1175159215927124, "learning_rate": 8.34e-06, "loss": 0.4282, "step": 1670 }, { "epoch": 0.09357150856758875, "grad_norm": 1.0835729837417603, "learning_rate": 8.345e-06, "loss": 0.372, "step": 1671 }, { "epoch": 0.09362750587971777, "grad_norm": 1.7679165601730347, "learning_rate": 8.350000000000001e-06, "loss": 0.9209, "step": 1672 }, { "epoch": 0.09368350319184679, "grad_norm": 1.1253948211669922, "learning_rate": 8.355e-06, "loss": 0.4427, "step": 1673 }, { "epoch": 0.09373950050397581, "grad_norm": 1.2487722635269165, "learning_rate": 8.36e-06, "loss": 0.4141, "step": 1674 }, { "epoch": 0.09379549781610483, "grad_norm": 1.62444269657135, "learning_rate": 8.365e-06, "loss": 0.3576, "step": 1675 }, { "epoch": 0.09385149512823385, "grad_norm": 1.2208722829818726, "learning_rate": 8.37e-06, "loss": 0.4182, "step": 1676 }, { "epoch": 0.09390749244036287, "grad_norm": 1.2999380826950073, "learning_rate": 8.375e-06, "loss": 0.4973, "step": 1677 }, { "epoch": 0.09396348975249189, "grad_norm": 1.1313492059707642, "learning_rate": 8.380000000000001e-06, "loss": 0.5136, "step": 1678 }, { "epoch": 0.0940194870646209, "grad_norm": 1.0610212087631226, "learning_rate": 8.385e-06, "loss": 0.3811, "step": 1679 }, { "epoch": 0.09407548437674991, "grad_norm": 1.2607392072677612, "learning_rate": 8.390000000000001e-06, "loss": 0.4823, "step": 1680 }, { "epoch": 0.09413148168887893, "grad_norm": 1.3894352912902832, "learning_rate": 8.395e-06, "loss": 0.5465, "step": 1681 }, { "epoch": 0.09418747900100795, "grad_norm": 1.0513899326324463, "learning_rate": 8.400000000000001e-06, "loss": 0.4264, "step": 1682 }, { "epoch": 0.09424347631313697, "grad_norm": 0.9940479397773743, "learning_rate": 8.405e-06, "loss": 0.3418, "step": 1683 }, { "epoch": 0.09429947362526599, "grad_norm": 1.2676581144332886, "learning_rate": 8.409999999999999e-06, "loss": 0.4437, "step": 1684 }, { "epoch": 0.094355470937395, "grad_norm": 1.2914938926696777, "learning_rate": 8.415e-06, "loss": 0.5123, "step": 1685 }, { "epoch": 0.09441146824952402, "grad_norm": 1.2333265542984009, "learning_rate": 8.42e-06, "loss": 0.5305, "step": 1686 }, { "epoch": 0.09446746556165304, "grad_norm": 1.0795105695724487, "learning_rate": 8.425000000000001e-06, "loss": 0.441, "step": 1687 }, { "epoch": 0.09452346287378206, "grad_norm": 1.2224481105804443, "learning_rate": 8.43e-06, "loss": 0.3985, "step": 1688 }, { "epoch": 0.09457946018591108, "grad_norm": 1.626680612564087, "learning_rate": 8.435e-06, "loss": 0.5514, "step": 1689 }, { "epoch": 0.09463545749804009, "grad_norm": 1.2383500337600708, "learning_rate": 8.44e-06, "loss": 0.4813, "step": 1690 }, { "epoch": 0.0946914548101691, "grad_norm": 1.3884074687957764, "learning_rate": 8.445e-06, "loss": 0.4207, "step": 1691 }, { "epoch": 0.09474745212229813, "grad_norm": 1.1471067667007446, "learning_rate": 8.45e-06, "loss": 0.4253, "step": 1692 }, { "epoch": 0.09480344943442715, "grad_norm": 1.3975670337677002, "learning_rate": 8.455000000000001e-06, "loss": 0.5008, "step": 1693 }, { "epoch": 0.09485944674655616, "grad_norm": 1.1151567697525024, "learning_rate": 8.46e-06, "loss": 0.3075, "step": 1694 }, { "epoch": 0.09491544405868518, "grad_norm": 0.992013692855835, "learning_rate": 8.465000000000001e-06, "loss": 0.348, "step": 1695 }, { "epoch": 0.0949714413708142, "grad_norm": 1.1896450519561768, "learning_rate": 8.47e-06, "loss": 0.5221, "step": 1696 }, { "epoch": 0.09502743868294322, "grad_norm": 1.4490216970443726, "learning_rate": 8.475000000000001e-06, "loss": 0.4325, "step": 1697 }, { "epoch": 0.09508343599507224, "grad_norm": 1.1228501796722412, "learning_rate": 8.48e-06, "loss": 0.4076, "step": 1698 }, { "epoch": 0.09513943330720126, "grad_norm": 1.3249083757400513, "learning_rate": 8.485e-06, "loss": 0.456, "step": 1699 }, { "epoch": 0.09519543061933028, "grad_norm": 1.3075578212738037, "learning_rate": 8.49e-06, "loss": 0.5684, "step": 1700 }, { "epoch": 0.09525142793145928, "grad_norm": 1.3035677671432495, "learning_rate": 8.495e-06, "loss": 0.5176, "step": 1701 }, { "epoch": 0.0953074252435883, "grad_norm": 1.1941081285476685, "learning_rate": 8.500000000000002e-06, "loss": 0.4353, "step": 1702 }, { "epoch": 0.09536342255571732, "grad_norm": 1.3645747900009155, "learning_rate": 8.505e-06, "loss": 0.4457, "step": 1703 }, { "epoch": 0.09541941986784634, "grad_norm": 1.3234845399856567, "learning_rate": 8.51e-06, "loss": 0.5028, "step": 1704 }, { "epoch": 0.09547541717997536, "grad_norm": 1.5293045043945312, "learning_rate": 8.515e-06, "loss": 0.6054, "step": 1705 }, { "epoch": 0.09553141449210438, "grad_norm": 1.2742122411727905, "learning_rate": 8.52e-06, "loss": 0.3425, "step": 1706 }, { "epoch": 0.0955874118042334, "grad_norm": 1.2540256977081299, "learning_rate": 8.525e-06, "loss": 0.4417, "step": 1707 }, { "epoch": 0.09564340911636242, "grad_norm": 1.1341816186904907, "learning_rate": 8.53e-06, "loss": 0.4395, "step": 1708 }, { "epoch": 0.09569940642849144, "grad_norm": 1.4354275465011597, "learning_rate": 8.535e-06, "loss": 0.5021, "step": 1709 }, { "epoch": 0.09575540374062046, "grad_norm": 1.21577787399292, "learning_rate": 8.540000000000001e-06, "loss": 0.4162, "step": 1710 }, { "epoch": 0.09581140105274946, "grad_norm": 1.385469913482666, "learning_rate": 8.545e-06, "loss": 0.3978, "step": 1711 }, { "epoch": 0.09586739836487848, "grad_norm": 1.0504804849624634, "learning_rate": 8.550000000000001e-06, "loss": 0.46, "step": 1712 }, { "epoch": 0.0959233956770075, "grad_norm": 1.2518787384033203, "learning_rate": 8.555e-06, "loss": 0.4717, "step": 1713 }, { "epoch": 0.09597939298913652, "grad_norm": 1.2391563653945923, "learning_rate": 8.56e-06, "loss": 0.3796, "step": 1714 }, { "epoch": 0.09603539030126554, "grad_norm": 1.352744460105896, "learning_rate": 8.565e-06, "loss": 0.4809, "step": 1715 }, { "epoch": 0.09609138761339456, "grad_norm": 1.2199307680130005, "learning_rate": 8.570000000000001e-06, "loss": 0.5013, "step": 1716 }, { "epoch": 0.09614738492552358, "grad_norm": 2.9449145793914795, "learning_rate": 8.575000000000002e-06, "loss": 0.4527, "step": 1717 }, { "epoch": 0.0962033822376526, "grad_norm": 1.0867520570755005, "learning_rate": 8.580000000000001e-06, "loss": 0.5052, "step": 1718 }, { "epoch": 0.09625937954978162, "grad_norm": 1.5014264583587646, "learning_rate": 8.585e-06, "loss": 0.4327, "step": 1719 }, { "epoch": 0.09631537686191063, "grad_norm": 1.358459711074829, "learning_rate": 8.59e-06, "loss": 0.4432, "step": 1720 }, { "epoch": 0.09637137417403964, "grad_norm": 1.1366547346115112, "learning_rate": 8.595e-06, "loss": 0.4062, "step": 1721 }, { "epoch": 0.09642737148616866, "grad_norm": 1.6252096891403198, "learning_rate": 8.599999999999999e-06, "loss": 0.4598, "step": 1722 }, { "epoch": 0.09648336879829768, "grad_norm": 1.2072232961654663, "learning_rate": 8.605e-06, "loss": 0.49, "step": 1723 }, { "epoch": 0.0965393661104267, "grad_norm": 1.2367745637893677, "learning_rate": 8.61e-06, "loss": 0.4035, "step": 1724 }, { "epoch": 0.09659536342255572, "grad_norm": 1.3461731672286987, "learning_rate": 8.615000000000001e-06, "loss": 0.4942, "step": 1725 }, { "epoch": 0.09665136073468474, "grad_norm": 1.1359554529190063, "learning_rate": 8.62e-06, "loss": 0.4236, "step": 1726 }, { "epoch": 0.09670735804681375, "grad_norm": 1.090421438217163, "learning_rate": 8.625e-06, "loss": 0.4333, "step": 1727 }, { "epoch": 0.09676335535894277, "grad_norm": 5.683356285095215, "learning_rate": 8.63e-06, "loss": 0.6335, "step": 1728 }, { "epoch": 0.09681935267107179, "grad_norm": 1.271315336227417, "learning_rate": 8.635e-06, "loss": 0.4435, "step": 1729 }, { "epoch": 0.09687534998320081, "grad_norm": 1.2029268741607666, "learning_rate": 8.64e-06, "loss": 0.3945, "step": 1730 }, { "epoch": 0.09693134729532983, "grad_norm": 1.723079800605774, "learning_rate": 8.645000000000001e-06, "loss": 0.4809, "step": 1731 }, { "epoch": 0.09698734460745884, "grad_norm": 1.122281551361084, "learning_rate": 8.65e-06, "loss": 0.4269, "step": 1732 }, { "epoch": 0.09704334191958786, "grad_norm": 1.2079497575759888, "learning_rate": 8.655000000000001e-06, "loss": 0.4232, "step": 1733 }, { "epoch": 0.09709933923171687, "grad_norm": 1.2280787229537964, "learning_rate": 8.66e-06, "loss": 0.3697, "step": 1734 }, { "epoch": 0.0971553365438459, "grad_norm": 1.0389596223831177, "learning_rate": 8.665000000000001e-06, "loss": 0.4221, "step": 1735 }, { "epoch": 0.09721133385597491, "grad_norm": 1.22295343875885, "learning_rate": 8.67e-06, "loss": 0.4129, "step": 1736 }, { "epoch": 0.09726733116810393, "grad_norm": 1.3335331678390503, "learning_rate": 8.674999999999999e-06, "loss": 0.4507, "step": 1737 }, { "epoch": 0.09732332848023295, "grad_norm": 1.3346177339553833, "learning_rate": 8.68e-06, "loss": 0.3771, "step": 1738 }, { "epoch": 0.09737932579236197, "grad_norm": 1.0787492990493774, "learning_rate": 8.685e-06, "loss": 0.4032, "step": 1739 }, { "epoch": 0.09743532310449099, "grad_norm": 1.352504849433899, "learning_rate": 8.690000000000002e-06, "loss": 0.4517, "step": 1740 }, { "epoch": 0.09749132041662001, "grad_norm": 1.0957409143447876, "learning_rate": 8.695e-06, "loss": 0.3548, "step": 1741 }, { "epoch": 0.09754731772874901, "grad_norm": 1.1434050798416138, "learning_rate": 8.7e-06, "loss": 0.4702, "step": 1742 }, { "epoch": 0.09760331504087803, "grad_norm": 1.2777423858642578, "learning_rate": 8.705e-06, "loss": 0.3896, "step": 1743 }, { "epoch": 0.09765931235300705, "grad_norm": 1.4034701585769653, "learning_rate": 8.71e-06, "loss": 0.4506, "step": 1744 }, { "epoch": 0.09771530966513607, "grad_norm": 1.207825779914856, "learning_rate": 8.715e-06, "loss": 0.5022, "step": 1745 }, { "epoch": 0.09777130697726509, "grad_norm": 1.3076139688491821, "learning_rate": 8.720000000000001e-06, "loss": 0.3915, "step": 1746 }, { "epoch": 0.09782730428939411, "grad_norm": 1.2862523794174194, "learning_rate": 8.725e-06, "loss": 0.5892, "step": 1747 }, { "epoch": 0.09788330160152313, "grad_norm": 1.442941665649414, "learning_rate": 8.730000000000001e-06, "loss": 0.4577, "step": 1748 }, { "epoch": 0.09793929891365215, "grad_norm": 1.251220941543579, "learning_rate": 8.735e-06, "loss": 0.4834, "step": 1749 }, { "epoch": 0.09799529622578117, "grad_norm": 1.1590315103530884, "learning_rate": 8.740000000000001e-06, "loss": 0.5074, "step": 1750 }, { "epoch": 0.09805129353791019, "grad_norm": 1.3057984113693237, "learning_rate": 8.745e-06, "loss": 0.4082, "step": 1751 }, { "epoch": 0.09810729085003919, "grad_norm": 1.29001784324646, "learning_rate": 8.75e-06, "loss": 0.4628, "step": 1752 }, { "epoch": 0.09816328816216821, "grad_norm": 1.0873615741729736, "learning_rate": 8.755e-06, "loss": 0.3739, "step": 1753 }, { "epoch": 0.09821928547429723, "grad_norm": 1.2104002237319946, "learning_rate": 8.76e-06, "loss": 0.4093, "step": 1754 }, { "epoch": 0.09827528278642625, "grad_norm": 1.1916160583496094, "learning_rate": 8.765000000000002e-06, "loss": 0.5169, "step": 1755 }, { "epoch": 0.09833128009855527, "grad_norm": 1.1350438594818115, "learning_rate": 8.77e-06, "loss": 0.4636, "step": 1756 }, { "epoch": 0.09838727741068429, "grad_norm": 1.4811171293258667, "learning_rate": 8.775e-06, "loss": 0.5471, "step": 1757 }, { "epoch": 0.0984432747228133, "grad_norm": 1.5752403736114502, "learning_rate": 8.78e-06, "loss": 0.5765, "step": 1758 }, { "epoch": 0.09849927203494233, "grad_norm": 2.1089720726013184, "learning_rate": 8.785e-06, "loss": 0.4019, "step": 1759 }, { "epoch": 0.09855526934707134, "grad_norm": 1.8355612754821777, "learning_rate": 8.79e-06, "loss": 0.7225, "step": 1760 }, { "epoch": 0.09861126665920036, "grad_norm": 1.0810399055480957, "learning_rate": 8.795e-06, "loss": 0.3344, "step": 1761 }, { "epoch": 0.09866726397132938, "grad_norm": 1.1854368448257446, "learning_rate": 8.8e-06, "loss": 0.3407, "step": 1762 }, { "epoch": 0.09872326128345839, "grad_norm": 1.2420506477355957, "learning_rate": 8.805000000000001e-06, "loss": 0.415, "step": 1763 }, { "epoch": 0.09877925859558741, "grad_norm": 1.075506329536438, "learning_rate": 8.81e-06, "loss": 0.4851, "step": 1764 }, { "epoch": 0.09883525590771643, "grad_norm": 1.1143125295639038, "learning_rate": 8.815000000000001e-06, "loss": 0.4585, "step": 1765 }, { "epoch": 0.09889125321984545, "grad_norm": 1.1370161771774292, "learning_rate": 8.82e-06, "loss": 0.3993, "step": 1766 }, { "epoch": 0.09894725053197446, "grad_norm": 1.1423094272613525, "learning_rate": 8.825e-06, "loss": 0.3881, "step": 1767 }, { "epoch": 0.09900324784410348, "grad_norm": 1.2995854616165161, "learning_rate": 8.83e-06, "loss": 0.4175, "step": 1768 }, { "epoch": 0.0990592451562325, "grad_norm": 1.3148881196975708, "learning_rate": 8.835000000000001e-06, "loss": 0.6037, "step": 1769 }, { "epoch": 0.09911524246836152, "grad_norm": 1.084141731262207, "learning_rate": 8.840000000000002e-06, "loss": 0.3905, "step": 1770 }, { "epoch": 0.09917123978049054, "grad_norm": 1.2053523063659668, "learning_rate": 8.845000000000001e-06, "loss": 0.3216, "step": 1771 }, { "epoch": 0.09922723709261956, "grad_norm": 1.2783927917480469, "learning_rate": 8.85e-06, "loss": 0.4215, "step": 1772 }, { "epoch": 0.09928323440474857, "grad_norm": 1.163581132888794, "learning_rate": 8.855e-06, "loss": 0.5082, "step": 1773 }, { "epoch": 0.09933923171687759, "grad_norm": 1.0954021215438843, "learning_rate": 8.86e-06, "loss": 0.372, "step": 1774 }, { "epoch": 0.0993952290290066, "grad_norm": 1.236472487449646, "learning_rate": 8.865e-06, "loss": 0.3974, "step": 1775 }, { "epoch": 0.09945122634113562, "grad_norm": 1.2931112051010132, "learning_rate": 8.87e-06, "loss": 0.43, "step": 1776 }, { "epoch": 0.09950722365326464, "grad_norm": 1.1061400175094604, "learning_rate": 8.875e-06, "loss": 0.4575, "step": 1777 }, { "epoch": 0.09956322096539366, "grad_norm": 1.1908855438232422, "learning_rate": 8.880000000000001e-06, "loss": 0.4363, "step": 1778 }, { "epoch": 0.09961921827752268, "grad_norm": 1.2525837421417236, "learning_rate": 8.885e-06, "loss": 0.5029, "step": 1779 }, { "epoch": 0.0996752155896517, "grad_norm": 1.2395910024642944, "learning_rate": 8.890000000000001e-06, "loss": 0.4891, "step": 1780 }, { "epoch": 0.09973121290178072, "grad_norm": 1.4050637483596802, "learning_rate": 8.895e-06, "loss": 0.5142, "step": 1781 }, { "epoch": 0.09978721021390974, "grad_norm": 1.4121829271316528, "learning_rate": 8.9e-06, "loss": 0.6583, "step": 1782 }, { "epoch": 0.09984320752603874, "grad_norm": 1.1940996646881104, "learning_rate": 8.905e-06, "loss": 0.4022, "step": 1783 }, { "epoch": 0.09989920483816776, "grad_norm": 1.557766318321228, "learning_rate": 8.910000000000001e-06, "loss": 0.6801, "step": 1784 }, { "epoch": 0.09995520215029678, "grad_norm": 1.3310229778289795, "learning_rate": 8.915e-06, "loss": 0.3957, "step": 1785 }, { "epoch": 0.1000111994624258, "grad_norm": 1.4797648191452026, "learning_rate": 8.920000000000001e-06, "loss": 0.5639, "step": 1786 }, { "epoch": 0.10006719677455482, "grad_norm": 0.9999483227729797, "learning_rate": 8.925e-06, "loss": 0.4347, "step": 1787 }, { "epoch": 0.10012319408668384, "grad_norm": 1.196457862854004, "learning_rate": 8.930000000000001e-06, "loss": 0.5074, "step": 1788 }, { "epoch": 0.10017919139881286, "grad_norm": 1.206497073173523, "learning_rate": 8.935e-06, "loss": 0.2889, "step": 1789 }, { "epoch": 0.10023518871094188, "grad_norm": 1.3581793308258057, "learning_rate": 8.939999999999999e-06, "loss": 0.3974, "step": 1790 }, { "epoch": 0.1002911860230709, "grad_norm": 1.3118083477020264, "learning_rate": 8.945e-06, "loss": 0.5182, "step": 1791 }, { "epoch": 0.10034718333519992, "grad_norm": 1.631662368774414, "learning_rate": 8.95e-06, "loss": 0.4934, "step": 1792 }, { "epoch": 0.10040318064732894, "grad_norm": 1.0491358041763306, "learning_rate": 8.955000000000002e-06, "loss": 0.4696, "step": 1793 }, { "epoch": 0.10045917795945794, "grad_norm": 1.2039273977279663, "learning_rate": 8.96e-06, "loss": 0.5036, "step": 1794 }, { "epoch": 0.10051517527158696, "grad_norm": 1.1414377689361572, "learning_rate": 8.965e-06, "loss": 0.5919, "step": 1795 }, { "epoch": 0.10057117258371598, "grad_norm": 1.2074891328811646, "learning_rate": 8.97e-06, "loss": 0.5084, "step": 1796 }, { "epoch": 0.100627169895845, "grad_norm": 1.3807848691940308, "learning_rate": 8.975e-06, "loss": 0.373, "step": 1797 }, { "epoch": 0.10068316720797402, "grad_norm": 1.1689538955688477, "learning_rate": 8.98e-06, "loss": 0.4797, "step": 1798 }, { "epoch": 0.10073916452010304, "grad_norm": 1.133975625038147, "learning_rate": 8.985e-06, "loss": 0.4301, "step": 1799 }, { "epoch": 0.10079516183223206, "grad_norm": 1.4214868545532227, "learning_rate": 8.99e-06, "loss": 0.4385, "step": 1800 }, { "epoch": 0.10085115914436107, "grad_norm": 1.0573962926864624, "learning_rate": 8.995000000000001e-06, "loss": 0.3093, "step": 1801 }, { "epoch": 0.1009071564564901, "grad_norm": 1.1686052083969116, "learning_rate": 9e-06, "loss": 0.4314, "step": 1802 }, { "epoch": 0.10096315376861911, "grad_norm": 1.1324526071548462, "learning_rate": 9.005000000000001e-06, "loss": 0.4602, "step": 1803 }, { "epoch": 0.10101915108074812, "grad_norm": 1.3244366645812988, "learning_rate": 9.01e-06, "loss": 0.4623, "step": 1804 }, { "epoch": 0.10107514839287714, "grad_norm": 1.2081764936447144, "learning_rate": 9.015e-06, "loss": 0.6177, "step": 1805 }, { "epoch": 0.10113114570500616, "grad_norm": 1.2845309972763062, "learning_rate": 9.02e-06, "loss": 0.4678, "step": 1806 }, { "epoch": 0.10118714301713518, "grad_norm": 1.27948796749115, "learning_rate": 9.025e-06, "loss": 0.4194, "step": 1807 }, { "epoch": 0.1012431403292642, "grad_norm": 1.3915820121765137, "learning_rate": 9.030000000000002e-06, "loss": 0.4707, "step": 1808 }, { "epoch": 0.10129913764139321, "grad_norm": 1.4097074270248413, "learning_rate": 9.035e-06, "loss": 0.5651, "step": 1809 }, { "epoch": 0.10135513495352223, "grad_norm": 1.1681487560272217, "learning_rate": 9.04e-06, "loss": 0.4335, "step": 1810 }, { "epoch": 0.10141113226565125, "grad_norm": 1.217890977859497, "learning_rate": 9.045e-06, "loss": 0.522, "step": 1811 }, { "epoch": 0.10146712957778027, "grad_norm": 1.242827296257019, "learning_rate": 9.05e-06, "loss": 0.5948, "step": 1812 }, { "epoch": 0.10152312688990929, "grad_norm": 1.3299055099487305, "learning_rate": 9.055e-06, "loss": 0.656, "step": 1813 }, { "epoch": 0.1015791242020383, "grad_norm": 1.1744288206100464, "learning_rate": 9.06e-06, "loss": 0.4626, "step": 1814 }, { "epoch": 0.10163512151416731, "grad_norm": 1.298693299293518, "learning_rate": 9.065e-06, "loss": 0.5209, "step": 1815 }, { "epoch": 0.10169111882629633, "grad_norm": 1.4027745723724365, "learning_rate": 9.070000000000001e-06, "loss": 0.4836, "step": 1816 }, { "epoch": 0.10174711613842535, "grad_norm": 1.2797088623046875, "learning_rate": 9.075e-06, "loss": 0.5703, "step": 1817 }, { "epoch": 0.10180311345055437, "grad_norm": 1.4140253067016602, "learning_rate": 9.080000000000001e-06, "loss": 0.5811, "step": 1818 }, { "epoch": 0.10185911076268339, "grad_norm": 1.6313427686691284, "learning_rate": 9.085e-06, "loss": 0.5481, "step": 1819 }, { "epoch": 0.10191510807481241, "grad_norm": 1.1847879886627197, "learning_rate": 9.09e-06, "loss": 0.4082, "step": 1820 }, { "epoch": 0.10197110538694143, "grad_norm": 1.5288350582122803, "learning_rate": 9.095e-06, "loss": 0.4765, "step": 1821 }, { "epoch": 0.10202710269907045, "grad_norm": 1.3034580945968628, "learning_rate": 9.100000000000001e-06, "loss": 0.3948, "step": 1822 }, { "epoch": 0.10208310001119947, "grad_norm": 1.2626841068267822, "learning_rate": 9.105000000000002e-06, "loss": 0.4911, "step": 1823 }, { "epoch": 0.10213909732332849, "grad_norm": 1.275468111038208, "learning_rate": 9.110000000000001e-06, "loss": 0.4396, "step": 1824 }, { "epoch": 0.10219509463545749, "grad_norm": 1.1918511390686035, "learning_rate": 9.115e-06, "loss": 0.4379, "step": 1825 }, { "epoch": 0.10225109194758651, "grad_norm": 1.9813170433044434, "learning_rate": 9.12e-06, "loss": 0.7549, "step": 1826 }, { "epoch": 0.10230708925971553, "grad_norm": 1.3841813802719116, "learning_rate": 9.125e-06, "loss": 0.4712, "step": 1827 }, { "epoch": 0.10236308657184455, "grad_norm": 1.1758801937103271, "learning_rate": 9.13e-06, "loss": 0.5183, "step": 1828 }, { "epoch": 0.10241908388397357, "grad_norm": 1.1382752656936646, "learning_rate": 9.135e-06, "loss": 0.3649, "step": 1829 }, { "epoch": 0.10247508119610259, "grad_norm": 1.2009247541427612, "learning_rate": 9.14e-06, "loss": 0.4377, "step": 1830 }, { "epoch": 0.10253107850823161, "grad_norm": 1.2643797397613525, "learning_rate": 9.145000000000001e-06, "loss": 0.4512, "step": 1831 }, { "epoch": 0.10258707582036063, "grad_norm": 1.1410797834396362, "learning_rate": 9.15e-06, "loss": 0.4931, "step": 1832 }, { "epoch": 0.10264307313248965, "grad_norm": 1.101846694946289, "learning_rate": 9.155000000000001e-06, "loss": 0.5352, "step": 1833 }, { "epoch": 0.10269907044461866, "grad_norm": 1.570682168006897, "learning_rate": 9.16e-06, "loss": 0.4423, "step": 1834 }, { "epoch": 0.10275506775674767, "grad_norm": 1.3165384531021118, "learning_rate": 9.165e-06, "loss": 0.4251, "step": 1835 }, { "epoch": 0.10281106506887669, "grad_norm": 1.0519487857818604, "learning_rate": 9.17e-06, "loss": 0.4528, "step": 1836 }, { "epoch": 0.10286706238100571, "grad_norm": 1.3688673973083496, "learning_rate": 9.175000000000001e-06, "loss": 0.4195, "step": 1837 }, { "epoch": 0.10292305969313473, "grad_norm": 1.401411533355713, "learning_rate": 9.180000000000002e-06, "loss": 0.55, "step": 1838 }, { "epoch": 0.10297905700526375, "grad_norm": 1.0831941366195679, "learning_rate": 9.185000000000001e-06, "loss": 0.3569, "step": 1839 }, { "epoch": 0.10303505431739277, "grad_norm": 1.2040417194366455, "learning_rate": 9.19e-06, "loss": 0.4522, "step": 1840 }, { "epoch": 0.10309105162952178, "grad_norm": 1.1933237314224243, "learning_rate": 9.195000000000001e-06, "loss": 0.6391, "step": 1841 }, { "epoch": 0.1031470489416508, "grad_norm": 1.2009390592575073, "learning_rate": 9.2e-06, "loss": 0.5783, "step": 1842 }, { "epoch": 0.10320304625377982, "grad_norm": 1.155548095703125, "learning_rate": 9.205e-06, "loss": 0.3664, "step": 1843 }, { "epoch": 0.10325904356590884, "grad_norm": 1.0223307609558105, "learning_rate": 9.21e-06, "loss": 0.3759, "step": 1844 }, { "epoch": 0.10331504087803785, "grad_norm": 1.281187891960144, "learning_rate": 9.215e-06, "loss": 0.43, "step": 1845 }, { "epoch": 0.10337103819016687, "grad_norm": 1.2086399793624878, "learning_rate": 9.220000000000002e-06, "loss": 0.3932, "step": 1846 }, { "epoch": 0.10342703550229589, "grad_norm": 1.6127561330795288, "learning_rate": 9.225e-06, "loss": 0.5527, "step": 1847 }, { "epoch": 0.1034830328144249, "grad_norm": 1.1773858070373535, "learning_rate": 9.23e-06, "loss": 0.4325, "step": 1848 }, { "epoch": 0.10353903012655392, "grad_norm": 1.2475886344909668, "learning_rate": 9.235e-06, "loss": 0.4141, "step": 1849 }, { "epoch": 0.10359502743868294, "grad_norm": 1.3503400087356567, "learning_rate": 9.24e-06, "loss": 0.5307, "step": 1850 }, { "epoch": 0.10365102475081196, "grad_norm": 1.1808948516845703, "learning_rate": 9.245e-06, "loss": 0.5951, "step": 1851 }, { "epoch": 0.10370702206294098, "grad_norm": 1.2310532331466675, "learning_rate": 9.25e-06, "loss": 0.5327, "step": 1852 }, { "epoch": 0.10376301937507, "grad_norm": 1.1280689239501953, "learning_rate": 9.255e-06, "loss": 0.4972, "step": 1853 }, { "epoch": 0.10381901668719902, "grad_norm": 1.2415132522583008, "learning_rate": 9.260000000000001e-06, "loss": 0.5418, "step": 1854 }, { "epoch": 0.10387501399932804, "grad_norm": 1.0911405086517334, "learning_rate": 9.265e-06, "loss": 0.402, "step": 1855 }, { "epoch": 0.10393101131145704, "grad_norm": 1.1728242635726929, "learning_rate": 9.270000000000001e-06, "loss": 0.3286, "step": 1856 }, { "epoch": 0.10398700862358606, "grad_norm": 1.2124569416046143, "learning_rate": 9.275e-06, "loss": 0.4761, "step": 1857 }, { "epoch": 0.10404300593571508, "grad_norm": 1.1276865005493164, "learning_rate": 9.28e-06, "loss": 0.3177, "step": 1858 }, { "epoch": 0.1040990032478441, "grad_norm": 1.0694518089294434, "learning_rate": 9.285e-06, "loss": 0.2832, "step": 1859 }, { "epoch": 0.10415500055997312, "grad_norm": 1.132310390472412, "learning_rate": 9.29e-06, "loss": 0.4409, "step": 1860 }, { "epoch": 0.10421099787210214, "grad_norm": 1.23562490940094, "learning_rate": 9.295000000000002e-06, "loss": 0.4543, "step": 1861 }, { "epoch": 0.10426699518423116, "grad_norm": 1.2050089836120605, "learning_rate": 9.3e-06, "loss": 0.4191, "step": 1862 }, { "epoch": 0.10432299249636018, "grad_norm": 1.4085443019866943, "learning_rate": 9.305e-06, "loss": 0.4678, "step": 1863 }, { "epoch": 0.1043789898084892, "grad_norm": 1.2895890474319458, "learning_rate": 9.31e-06, "loss": 0.5659, "step": 1864 }, { "epoch": 0.10443498712061822, "grad_norm": 1.4111061096191406, "learning_rate": 9.315e-06, "loss": 0.3764, "step": 1865 }, { "epoch": 0.10449098443274722, "grad_norm": 1.1714211702346802, "learning_rate": 9.32e-06, "loss": 0.4289, "step": 1866 }, { "epoch": 0.10454698174487624, "grad_norm": 1.271815299987793, "learning_rate": 9.325e-06, "loss": 0.4254, "step": 1867 }, { "epoch": 0.10460297905700526, "grad_norm": 1.2217464447021484, "learning_rate": 9.33e-06, "loss": 0.4228, "step": 1868 }, { "epoch": 0.10465897636913428, "grad_norm": 1.1268996000289917, "learning_rate": 9.335000000000001e-06, "loss": 0.4438, "step": 1869 }, { "epoch": 0.1047149736812633, "grad_norm": 1.1250314712524414, "learning_rate": 9.34e-06, "loss": 0.3935, "step": 1870 }, { "epoch": 0.10477097099339232, "grad_norm": 1.4297596216201782, "learning_rate": 9.345000000000001e-06, "loss": 0.5209, "step": 1871 }, { "epoch": 0.10482696830552134, "grad_norm": 1.139918565750122, "learning_rate": 9.35e-06, "loss": 0.4825, "step": 1872 }, { "epoch": 0.10488296561765036, "grad_norm": 1.2917416095733643, "learning_rate": 9.355e-06, "loss": 0.5089, "step": 1873 }, { "epoch": 0.10493896292977938, "grad_norm": 1.4938721656799316, "learning_rate": 9.36e-06, "loss": 0.5283, "step": 1874 }, { "epoch": 0.1049949602419084, "grad_norm": 1.5548522472381592, "learning_rate": 9.365000000000001e-06, "loss": 0.4841, "step": 1875 }, { "epoch": 0.1050509575540374, "grad_norm": 1.1434475183486938, "learning_rate": 9.370000000000002e-06, "loss": 0.4877, "step": 1876 }, { "epoch": 0.10510695486616642, "grad_norm": 2.284370183944702, "learning_rate": 9.375000000000001e-06, "loss": 0.4799, "step": 1877 }, { "epoch": 0.10516295217829544, "grad_norm": 1.4365899562835693, "learning_rate": 9.38e-06, "loss": 0.5354, "step": 1878 }, { "epoch": 0.10521894949042446, "grad_norm": 1.1886478662490845, "learning_rate": 9.385e-06, "loss": 0.4485, "step": 1879 }, { "epoch": 0.10527494680255348, "grad_norm": 1.3608007431030273, "learning_rate": 9.39e-06, "loss": 0.4481, "step": 1880 }, { "epoch": 0.1053309441146825, "grad_norm": 1.356736421585083, "learning_rate": 9.395e-06, "loss": 0.5918, "step": 1881 }, { "epoch": 0.10538694142681151, "grad_norm": 1.3387682437896729, "learning_rate": 9.4e-06, "loss": 0.5101, "step": 1882 }, { "epoch": 0.10544293873894053, "grad_norm": 1.227960467338562, "learning_rate": 9.405e-06, "loss": 0.4004, "step": 1883 }, { "epoch": 0.10549893605106955, "grad_norm": 1.068472146987915, "learning_rate": 9.410000000000001e-06, "loss": 0.4885, "step": 1884 }, { "epoch": 0.10555493336319857, "grad_norm": 1.46451735496521, "learning_rate": 9.415e-06, "loss": 0.4849, "step": 1885 }, { "epoch": 0.10561093067532759, "grad_norm": 1.1203140020370483, "learning_rate": 9.420000000000001e-06, "loss": 0.3321, "step": 1886 }, { "epoch": 0.1056669279874566, "grad_norm": 1.3179823160171509, "learning_rate": 9.425e-06, "loss": 0.3947, "step": 1887 }, { "epoch": 0.10572292529958562, "grad_norm": 1.2362351417541504, "learning_rate": 9.43e-06, "loss": 0.4712, "step": 1888 }, { "epoch": 0.10577892261171463, "grad_norm": 1.349965214729309, "learning_rate": 9.435e-06, "loss": 0.4748, "step": 1889 }, { "epoch": 0.10583491992384365, "grad_norm": 1.158205509185791, "learning_rate": 9.44e-06, "loss": 0.3502, "step": 1890 }, { "epoch": 0.10589091723597267, "grad_norm": 1.2987258434295654, "learning_rate": 9.445000000000002e-06, "loss": 0.4718, "step": 1891 }, { "epoch": 0.10594691454810169, "grad_norm": 1.2764886617660522, "learning_rate": 9.450000000000001e-06, "loss": 0.4185, "step": 1892 }, { "epoch": 0.10600291186023071, "grad_norm": 1.5539253950119019, "learning_rate": 9.455e-06, "loss": 0.4658, "step": 1893 }, { "epoch": 0.10605890917235973, "grad_norm": 1.2473183870315552, "learning_rate": 9.460000000000001e-06, "loss": 0.3696, "step": 1894 }, { "epoch": 0.10611490648448875, "grad_norm": 1.3843179941177368, "learning_rate": 9.465e-06, "loss": 0.4765, "step": 1895 }, { "epoch": 0.10617090379661777, "grad_norm": 1.3900247812271118, "learning_rate": 9.47e-06, "loss": 0.606, "step": 1896 }, { "epoch": 0.10622690110874677, "grad_norm": 1.1595515012741089, "learning_rate": 9.475e-06, "loss": 0.4623, "step": 1897 }, { "epoch": 0.1062828984208758, "grad_norm": 2.569775104522705, "learning_rate": 9.48e-06, "loss": 0.3973, "step": 1898 }, { "epoch": 0.10633889573300481, "grad_norm": 1.197704792022705, "learning_rate": 9.485000000000002e-06, "loss": 0.4226, "step": 1899 }, { "epoch": 0.10639489304513383, "grad_norm": 2.009443521499634, "learning_rate": 9.49e-06, "loss": 0.5467, "step": 1900 }, { "epoch": 0.10645089035726285, "grad_norm": 1.1895710229873657, "learning_rate": 9.495000000000001e-06, "loss": 0.4143, "step": 1901 }, { "epoch": 0.10650688766939187, "grad_norm": 1.5191071033477783, "learning_rate": 9.5e-06, "loss": 0.4761, "step": 1902 }, { "epoch": 0.10656288498152089, "grad_norm": 1.3583176136016846, "learning_rate": 9.505e-06, "loss": 0.5819, "step": 1903 }, { "epoch": 0.10661888229364991, "grad_norm": 1.112642526626587, "learning_rate": 9.51e-06, "loss": 0.4729, "step": 1904 }, { "epoch": 0.10667487960577893, "grad_norm": 1.0252685546875, "learning_rate": 9.515e-06, "loss": 0.4811, "step": 1905 }, { "epoch": 0.10673087691790795, "grad_norm": 1.255096673965454, "learning_rate": 9.52e-06, "loss": 0.441, "step": 1906 }, { "epoch": 0.10678687423003695, "grad_norm": 1.3634824752807617, "learning_rate": 9.525000000000001e-06, "loss": 0.4749, "step": 1907 }, { "epoch": 0.10684287154216597, "grad_norm": 1.1139448881149292, "learning_rate": 9.53e-06, "loss": 0.3083, "step": 1908 }, { "epoch": 0.10689886885429499, "grad_norm": 1.3352259397506714, "learning_rate": 9.535000000000001e-06, "loss": 0.3185, "step": 1909 }, { "epoch": 0.10695486616642401, "grad_norm": 1.2440032958984375, "learning_rate": 9.54e-06, "loss": 0.474, "step": 1910 }, { "epoch": 0.10701086347855303, "grad_norm": 1.1798661947250366, "learning_rate": 9.545e-06, "loss": 0.4714, "step": 1911 }, { "epoch": 0.10706686079068205, "grad_norm": 1.2166504859924316, "learning_rate": 9.55e-06, "loss": 0.5616, "step": 1912 }, { "epoch": 0.10712285810281107, "grad_norm": 1.2180215120315552, "learning_rate": 9.555e-06, "loss": 0.4465, "step": 1913 }, { "epoch": 0.10717885541494009, "grad_norm": 1.3125149011611938, "learning_rate": 9.560000000000002e-06, "loss": 0.5186, "step": 1914 }, { "epoch": 0.1072348527270691, "grad_norm": 1.1238898038864136, "learning_rate": 9.565e-06, "loss": 0.3985, "step": 1915 }, { "epoch": 0.10729085003919812, "grad_norm": 1.2942389249801636, "learning_rate": 9.57e-06, "loss": 0.4492, "step": 1916 }, { "epoch": 0.10734684735132714, "grad_norm": 1.153488039970398, "learning_rate": 9.575e-06, "loss": 0.5233, "step": 1917 }, { "epoch": 0.10740284466345615, "grad_norm": 1.1370173692703247, "learning_rate": 9.58e-06, "loss": 0.4421, "step": 1918 }, { "epoch": 0.10745884197558517, "grad_norm": 1.3307583332061768, "learning_rate": 9.585e-06, "loss": 0.4855, "step": 1919 }, { "epoch": 0.10751483928771419, "grad_norm": 1.103548526763916, "learning_rate": 9.59e-06, "loss": 0.39, "step": 1920 }, { "epoch": 0.1075708365998432, "grad_norm": 1.2443008422851562, "learning_rate": 9.595e-06, "loss": 0.5393, "step": 1921 }, { "epoch": 0.10762683391197223, "grad_norm": 1.0949368476867676, "learning_rate": 9.600000000000001e-06, "loss": 0.3842, "step": 1922 }, { "epoch": 0.10768283122410124, "grad_norm": 1.3167270421981812, "learning_rate": 9.605e-06, "loss": 0.4478, "step": 1923 }, { "epoch": 0.10773882853623026, "grad_norm": 1.3839845657348633, "learning_rate": 9.610000000000001e-06, "loss": 0.3765, "step": 1924 }, { "epoch": 0.10779482584835928, "grad_norm": 1.3814969062805176, "learning_rate": 9.615e-06, "loss": 0.5044, "step": 1925 }, { "epoch": 0.1078508231604883, "grad_norm": 1.2561522722244263, "learning_rate": 9.62e-06, "loss": 0.4028, "step": 1926 }, { "epoch": 0.10790682047261732, "grad_norm": 1.1426435708999634, "learning_rate": 9.625e-06, "loss": 0.3741, "step": 1927 }, { "epoch": 0.10796281778474633, "grad_norm": 1.2376185655593872, "learning_rate": 9.630000000000001e-06, "loss": 0.639, "step": 1928 }, { "epoch": 0.10801881509687535, "grad_norm": 1.2048944234848022, "learning_rate": 9.635000000000002e-06, "loss": 0.4114, "step": 1929 }, { "epoch": 0.10807481240900436, "grad_norm": 1.0896660089492798, "learning_rate": 9.640000000000001e-06, "loss": 0.4479, "step": 1930 }, { "epoch": 0.10813080972113338, "grad_norm": 1.411189079284668, "learning_rate": 9.645e-06, "loss": 0.4999, "step": 1931 }, { "epoch": 0.1081868070332624, "grad_norm": 1.2659348249435425, "learning_rate": 9.65e-06, "loss": 0.5425, "step": 1932 }, { "epoch": 0.10824280434539142, "grad_norm": 3.3288135528564453, "learning_rate": 9.655e-06, "loss": 0.3585, "step": 1933 }, { "epoch": 0.10829880165752044, "grad_norm": 1.1483445167541504, "learning_rate": 9.66e-06, "loss": 0.4003, "step": 1934 }, { "epoch": 0.10835479896964946, "grad_norm": 1.2844260931015015, "learning_rate": 9.665e-06, "loss": 0.5146, "step": 1935 }, { "epoch": 0.10841079628177848, "grad_norm": 1.4117387533187866, "learning_rate": 9.67e-06, "loss": 0.5105, "step": 1936 }, { "epoch": 0.1084667935939075, "grad_norm": 1.4861122369766235, "learning_rate": 9.675000000000001e-06, "loss": 0.5692, "step": 1937 }, { "epoch": 0.1085227909060365, "grad_norm": 1.534714937210083, "learning_rate": 9.68e-06, "loss": 0.4468, "step": 1938 }, { "epoch": 0.10857878821816552, "grad_norm": 1.1656982898712158, "learning_rate": 9.685000000000001e-06, "loss": 0.4292, "step": 1939 }, { "epoch": 0.10863478553029454, "grad_norm": 1.238632321357727, "learning_rate": 9.69e-06, "loss": 0.4331, "step": 1940 }, { "epoch": 0.10869078284242356, "grad_norm": 1.5374995470046997, "learning_rate": 9.695e-06, "loss": 0.6631, "step": 1941 }, { "epoch": 0.10874678015455258, "grad_norm": 1.3231433629989624, "learning_rate": 9.7e-06, "loss": 0.3684, "step": 1942 }, { "epoch": 0.1088027774666816, "grad_norm": 1.2549045085906982, "learning_rate": 9.705e-06, "loss": 0.3888, "step": 1943 }, { "epoch": 0.10885877477881062, "grad_norm": 0.9933781623840332, "learning_rate": 9.71e-06, "loss": 0.4307, "step": 1944 }, { "epoch": 0.10891477209093964, "grad_norm": 1.3064879179000854, "learning_rate": 9.715000000000001e-06, "loss": 0.495, "step": 1945 }, { "epoch": 0.10897076940306866, "grad_norm": 1.2886368036270142, "learning_rate": 9.72e-06, "loss": 0.4827, "step": 1946 }, { "epoch": 0.10902676671519768, "grad_norm": 1.11863112449646, "learning_rate": 9.725000000000001e-06, "loss": 0.39, "step": 1947 }, { "epoch": 0.1090827640273267, "grad_norm": 1.2331268787384033, "learning_rate": 9.73e-06, "loss": 0.4058, "step": 1948 }, { "epoch": 0.1091387613394557, "grad_norm": 4.416565895080566, "learning_rate": 9.735e-06, "loss": 0.5548, "step": 1949 }, { "epoch": 0.10919475865158472, "grad_norm": 1.183388113975525, "learning_rate": 9.74e-06, "loss": 0.4693, "step": 1950 }, { "epoch": 0.10925075596371374, "grad_norm": 1.4820784330368042, "learning_rate": 9.745e-06, "loss": 0.4546, "step": 1951 }, { "epoch": 0.10930675327584276, "grad_norm": 1.2141631841659546, "learning_rate": 9.750000000000002e-06, "loss": 0.4229, "step": 1952 }, { "epoch": 0.10936275058797178, "grad_norm": 1.192245602607727, "learning_rate": 9.755e-06, "loss": 0.4096, "step": 1953 }, { "epoch": 0.1094187479001008, "grad_norm": 1.2988064289093018, "learning_rate": 9.760000000000001e-06, "loss": 0.5635, "step": 1954 }, { "epoch": 0.10947474521222982, "grad_norm": 1.2981138229370117, "learning_rate": 9.765e-06, "loss": 0.4759, "step": 1955 }, { "epoch": 0.10953074252435883, "grad_norm": 1.0608023405075073, "learning_rate": 9.77e-06, "loss": 0.4461, "step": 1956 }, { "epoch": 0.10958673983648785, "grad_norm": 1.3462153673171997, "learning_rate": 9.775e-06, "loss": 0.4433, "step": 1957 }, { "epoch": 0.10964273714861687, "grad_norm": 1.4526351690292358, "learning_rate": 9.78e-06, "loss": 0.4635, "step": 1958 }, { "epoch": 0.10969873446074588, "grad_norm": 1.1988767385482788, "learning_rate": 9.785e-06, "loss": 0.4356, "step": 1959 }, { "epoch": 0.1097547317728749, "grad_norm": 1.0474293231964111, "learning_rate": 9.790000000000001e-06, "loss": 0.3663, "step": 1960 }, { "epoch": 0.10981072908500392, "grad_norm": 1.190447211265564, "learning_rate": 9.795e-06, "loss": 0.459, "step": 1961 }, { "epoch": 0.10986672639713294, "grad_norm": 1.1437945365905762, "learning_rate": 9.800000000000001e-06, "loss": 0.4235, "step": 1962 }, { "epoch": 0.10992272370926195, "grad_norm": 1.2026954889297485, "learning_rate": 9.805e-06, "loss": 0.5103, "step": 1963 }, { "epoch": 0.10997872102139097, "grad_norm": 1.231103539466858, "learning_rate": 9.810000000000001e-06, "loss": 0.5811, "step": 1964 }, { "epoch": 0.11003471833351999, "grad_norm": 1.1028512716293335, "learning_rate": 9.815e-06, "loss": 0.381, "step": 1965 }, { "epoch": 0.11009071564564901, "grad_norm": 1.0779800415039062, "learning_rate": 9.820000000000001e-06, "loss": 0.3742, "step": 1966 }, { "epoch": 0.11014671295777803, "grad_norm": 1.243393063545227, "learning_rate": 9.825000000000002e-06, "loss": 0.4914, "step": 1967 }, { "epoch": 0.11020271026990705, "grad_norm": 1.29071044921875, "learning_rate": 9.83e-06, "loss": 0.4967, "step": 1968 }, { "epoch": 0.11025870758203606, "grad_norm": 1.126316785812378, "learning_rate": 9.835000000000002e-06, "loss": 0.4539, "step": 1969 }, { "epoch": 0.11031470489416507, "grad_norm": 1.5042952299118042, "learning_rate": 9.84e-06, "loss": 0.6822, "step": 1970 }, { "epoch": 0.1103707022062941, "grad_norm": 1.3173643350601196, "learning_rate": 9.845e-06, "loss": 0.4644, "step": 1971 }, { "epoch": 0.11042669951842311, "grad_norm": 1.1041350364685059, "learning_rate": 9.85e-06, "loss": 0.3427, "step": 1972 }, { "epoch": 0.11048269683055213, "grad_norm": 1.0766938924789429, "learning_rate": 9.855e-06, "loss": 0.4033, "step": 1973 }, { "epoch": 0.11053869414268115, "grad_norm": 1.1862468719482422, "learning_rate": 9.86e-06, "loss": 0.477, "step": 1974 }, { "epoch": 0.11059469145481017, "grad_norm": 1.4422067403793335, "learning_rate": 9.865000000000001e-06, "loss": 0.5174, "step": 1975 }, { "epoch": 0.11065068876693919, "grad_norm": 1.438631296157837, "learning_rate": 9.87e-06, "loss": 0.4254, "step": 1976 }, { "epoch": 0.11070668607906821, "grad_norm": 1.2510143518447876, "learning_rate": 9.875000000000001e-06, "loss": 0.4414, "step": 1977 }, { "epoch": 0.11076268339119723, "grad_norm": 1.0611276626586914, "learning_rate": 9.88e-06, "loss": 0.3732, "step": 1978 }, { "epoch": 0.11081868070332625, "grad_norm": 1.4915348291397095, "learning_rate": 9.885e-06, "loss": 0.5958, "step": 1979 }, { "epoch": 0.11087467801545525, "grad_norm": 1.314444899559021, "learning_rate": 9.89e-06, "loss": 0.4871, "step": 1980 }, { "epoch": 0.11093067532758427, "grad_norm": 1.2102175951004028, "learning_rate": 9.895e-06, "loss": 0.4774, "step": 1981 }, { "epoch": 0.11098667263971329, "grad_norm": 1.1883585453033447, "learning_rate": 9.900000000000002e-06, "loss": 0.3676, "step": 1982 }, { "epoch": 0.11104266995184231, "grad_norm": 1.1065423488616943, "learning_rate": 9.905000000000001e-06, "loss": 0.3954, "step": 1983 }, { "epoch": 0.11109866726397133, "grad_norm": 1.7444603443145752, "learning_rate": 9.91e-06, "loss": 0.7777, "step": 1984 }, { "epoch": 0.11115466457610035, "grad_norm": 1.5046581029891968, "learning_rate": 9.915e-06, "loss": 0.535, "step": 1985 }, { "epoch": 0.11121066188822937, "grad_norm": 1.9591126441955566, "learning_rate": 9.92e-06, "loss": 0.4007, "step": 1986 }, { "epoch": 0.11126665920035839, "grad_norm": 1.109308123588562, "learning_rate": 9.925e-06, "loss": 0.4983, "step": 1987 }, { "epoch": 0.1113226565124874, "grad_norm": 1.1649729013442993, "learning_rate": 9.93e-06, "loss": 0.4802, "step": 1988 }, { "epoch": 0.11137865382461642, "grad_norm": 1.3624181747436523, "learning_rate": 9.935e-06, "loss": 0.5429, "step": 1989 }, { "epoch": 0.11143465113674543, "grad_norm": 1.2421095371246338, "learning_rate": 9.940000000000001e-06, "loss": 0.4532, "step": 1990 }, { "epoch": 0.11149064844887445, "grad_norm": 1.2075271606445312, "learning_rate": 9.945e-06, "loss": 0.4258, "step": 1991 }, { "epoch": 0.11154664576100347, "grad_norm": 1.2682304382324219, "learning_rate": 9.950000000000001e-06, "loss": 0.5251, "step": 1992 }, { "epoch": 0.11160264307313249, "grad_norm": 1.2718613147735596, "learning_rate": 9.955e-06, "loss": 0.4165, "step": 1993 }, { "epoch": 0.1116586403852615, "grad_norm": 1.4027260541915894, "learning_rate": 9.96e-06, "loss": 0.6875, "step": 1994 }, { "epoch": 0.11171463769739053, "grad_norm": 1.2574266195297241, "learning_rate": 9.965e-06, "loss": 0.4169, "step": 1995 }, { "epoch": 0.11177063500951955, "grad_norm": 1.2845044136047363, "learning_rate": 9.97e-06, "loss": 0.4741, "step": 1996 }, { "epoch": 0.11182663232164856, "grad_norm": 1.383315086364746, "learning_rate": 9.975e-06, "loss": 0.5904, "step": 1997 }, { "epoch": 0.11188262963377758, "grad_norm": 1.2629704475402832, "learning_rate": 9.980000000000001e-06, "loss": 0.4515, "step": 1998 }, { "epoch": 0.1119386269459066, "grad_norm": 1.3212579488754272, "learning_rate": 9.985e-06, "loss": 0.5825, "step": 1999 }, { "epoch": 0.11199462425803561, "grad_norm": 1.274903655052185, "learning_rate": 9.990000000000001e-06, "loss": 0.5872, "step": 2000 }, { "epoch": 0.11205062157016463, "grad_norm": 1.2747188806533813, "learning_rate": 9.995e-06, "loss": 0.3708, "step": 2001 }, { "epoch": 0.11210661888229365, "grad_norm": 1.472104549407959, "learning_rate": 1e-05, "loss": 0.5602, "step": 2002 }, { "epoch": 0.11216261619442267, "grad_norm": 1.256666660308838, "learning_rate": 1.0005e-05, "loss": 0.4248, "step": 2003 }, { "epoch": 0.11221861350655168, "grad_norm": 1.092176079750061, "learning_rate": 1.001e-05, "loss": 0.4048, "step": 2004 }, { "epoch": 0.1122746108186807, "grad_norm": 1.1835278272628784, "learning_rate": 1.0015000000000002e-05, "loss": 0.5522, "step": 2005 }, { "epoch": 0.11233060813080972, "grad_norm": 17.31488800048828, "learning_rate": 1.002e-05, "loss": 0.354, "step": 2006 }, { "epoch": 0.11238660544293874, "grad_norm": 1.2700695991516113, "learning_rate": 1.0025000000000001e-05, "loss": 0.6672, "step": 2007 }, { "epoch": 0.11244260275506776, "grad_norm": 1.2675044536590576, "learning_rate": 1.003e-05, "loss": 0.5727, "step": 2008 }, { "epoch": 0.11249860006719678, "grad_norm": 1.354222059249878, "learning_rate": 1.0035e-05, "loss": 0.553, "step": 2009 }, { "epoch": 0.1125545973793258, "grad_norm": 1.2412958145141602, "learning_rate": 1.004e-05, "loss": 0.3739, "step": 2010 }, { "epoch": 0.1126105946914548, "grad_norm": 1.1372991800308228, "learning_rate": 1.0045e-05, "loss": 0.3767, "step": 2011 }, { "epoch": 0.11266659200358382, "grad_norm": 1.4014700651168823, "learning_rate": 1.005e-05, "loss": 0.611, "step": 2012 }, { "epoch": 0.11272258931571284, "grad_norm": 1.648236632347107, "learning_rate": 1.0055000000000001e-05, "loss": 0.546, "step": 2013 }, { "epoch": 0.11277858662784186, "grad_norm": 1.0424588918685913, "learning_rate": 1.006e-05, "loss": 0.3135, "step": 2014 }, { "epoch": 0.11283458393997088, "grad_norm": 1.1952458620071411, "learning_rate": 1.0065000000000001e-05, "loss": 0.4185, "step": 2015 }, { "epoch": 0.1128905812520999, "grad_norm": 1.2783581018447876, "learning_rate": 1.007e-05, "loss": 0.5532, "step": 2016 }, { "epoch": 0.11294657856422892, "grad_norm": 1.3510452508926392, "learning_rate": 1.0075000000000001e-05, "loss": 0.3781, "step": 2017 }, { "epoch": 0.11300257587635794, "grad_norm": 1.534055471420288, "learning_rate": 1.008e-05, "loss": 0.5066, "step": 2018 }, { "epoch": 0.11305857318848696, "grad_norm": 1.1314103603363037, "learning_rate": 1.0085e-05, "loss": 0.3968, "step": 2019 }, { "epoch": 0.11311457050061598, "grad_norm": 1.1797354221343994, "learning_rate": 1.0090000000000002e-05, "loss": 0.2986, "step": 2020 }, { "epoch": 0.11317056781274498, "grad_norm": 1.1401501893997192, "learning_rate": 1.0095e-05, "loss": 0.4593, "step": 2021 }, { "epoch": 0.113226565124874, "grad_norm": 1.2980574369430542, "learning_rate": 1.0100000000000002e-05, "loss": 0.6112, "step": 2022 }, { "epoch": 0.11328256243700302, "grad_norm": 1.022275447845459, "learning_rate": 1.0105e-05, "loss": 0.4415, "step": 2023 }, { "epoch": 0.11333855974913204, "grad_norm": 1.181807518005371, "learning_rate": 1.011e-05, "loss": 0.4308, "step": 2024 }, { "epoch": 0.11339455706126106, "grad_norm": 1.1795766353607178, "learning_rate": 1.0115e-05, "loss": 0.4049, "step": 2025 }, { "epoch": 0.11345055437339008, "grad_norm": 1.262852668762207, "learning_rate": 1.012e-05, "loss": 0.4993, "step": 2026 }, { "epoch": 0.1135065516855191, "grad_norm": 1.3481334447860718, "learning_rate": 1.0125e-05, "loss": 0.5144, "step": 2027 }, { "epoch": 0.11356254899764812, "grad_norm": 1.1121824979782104, "learning_rate": 1.0130000000000001e-05, "loss": 0.3884, "step": 2028 }, { "epoch": 0.11361854630977714, "grad_norm": 1.3804322481155396, "learning_rate": 1.0135e-05, "loss": 0.4579, "step": 2029 }, { "epoch": 0.11367454362190615, "grad_norm": 1.271407127380371, "learning_rate": 1.0140000000000001e-05, "loss": 0.6424, "step": 2030 }, { "epoch": 0.11373054093403516, "grad_norm": 1.1223245859146118, "learning_rate": 1.0145e-05, "loss": 0.4934, "step": 2031 }, { "epoch": 0.11378653824616418, "grad_norm": 1.3676866292953491, "learning_rate": 1.0150000000000001e-05, "loss": 0.4716, "step": 2032 }, { "epoch": 0.1138425355582932, "grad_norm": 1.60161292552948, "learning_rate": 1.0155e-05, "loss": 0.5407, "step": 2033 }, { "epoch": 0.11389853287042222, "grad_norm": 1.5236585140228271, "learning_rate": 1.016e-05, "loss": 0.4677, "step": 2034 }, { "epoch": 0.11395453018255124, "grad_norm": 1.2690157890319824, "learning_rate": 1.0165e-05, "loss": 0.4658, "step": 2035 }, { "epoch": 0.11401052749468026, "grad_norm": 1.9310686588287354, "learning_rate": 1.0170000000000001e-05, "loss": 0.3959, "step": 2036 }, { "epoch": 0.11406652480680927, "grad_norm": 1.4214739799499512, "learning_rate": 1.0175e-05, "loss": 0.5339, "step": 2037 }, { "epoch": 0.1141225221189383, "grad_norm": 1.6591894626617432, "learning_rate": 1.018e-05, "loss": 0.8932, "step": 2038 }, { "epoch": 0.11417851943106731, "grad_norm": 1.1825146675109863, "learning_rate": 1.0185e-05, "loss": 0.4305, "step": 2039 }, { "epoch": 0.11423451674319633, "grad_norm": 1.151872158050537, "learning_rate": 1.019e-05, "loss": 0.4143, "step": 2040 }, { "epoch": 0.11429051405532535, "grad_norm": 1.2419713735580444, "learning_rate": 1.0195e-05, "loss": 0.6054, "step": 2041 }, { "epoch": 0.11434651136745436, "grad_norm": 2.2976205348968506, "learning_rate": 1.02e-05, "loss": 0.5601, "step": 2042 }, { "epoch": 0.11440250867958338, "grad_norm": 1.0336729288101196, "learning_rate": 1.0205000000000001e-05, "loss": 0.3298, "step": 2043 }, { "epoch": 0.1144585059917124, "grad_norm": 13.27386474609375, "learning_rate": 1.021e-05, "loss": 0.4519, "step": 2044 }, { "epoch": 0.11451450330384141, "grad_norm": 1.9197330474853516, "learning_rate": 1.0215000000000001e-05, "loss": 0.4625, "step": 2045 }, { "epoch": 0.11457050061597043, "grad_norm": 1.1858235597610474, "learning_rate": 1.022e-05, "loss": 0.4263, "step": 2046 }, { "epoch": 0.11462649792809945, "grad_norm": 1.409785509109497, "learning_rate": 1.0225e-05, "loss": 0.5989, "step": 2047 }, { "epoch": 0.11468249524022847, "grad_norm": 1.406874656677246, "learning_rate": 1.023e-05, "loss": 0.4339, "step": 2048 }, { "epoch": 0.11473849255235749, "grad_norm": 2.0559372901916504, "learning_rate": 1.0235e-05, "loss": 0.4359, "step": 2049 }, { "epoch": 0.11479448986448651, "grad_norm": 1.2511943578720093, "learning_rate": 1.024e-05, "loss": 0.4325, "step": 2050 }, { "epoch": 0.11485048717661553, "grad_norm": 1.1281684637069702, "learning_rate": 1.0245000000000001e-05, "loss": 0.378, "step": 2051 }, { "epoch": 0.11490648448874453, "grad_norm": 1.5144761800765991, "learning_rate": 1.025e-05, "loss": 0.409, "step": 2052 }, { "epoch": 0.11496248180087355, "grad_norm": 1.0558453798294067, "learning_rate": 1.0255000000000001e-05, "loss": 0.3365, "step": 2053 }, { "epoch": 0.11501847911300257, "grad_norm": 1.1261556148529053, "learning_rate": 1.026e-05, "loss": 0.3968, "step": 2054 }, { "epoch": 0.11507447642513159, "grad_norm": 1.3315730094909668, "learning_rate": 1.0265e-05, "loss": 0.5828, "step": 2055 }, { "epoch": 0.11513047373726061, "grad_norm": 1.191011905670166, "learning_rate": 1.027e-05, "loss": 0.5525, "step": 2056 }, { "epoch": 0.11518647104938963, "grad_norm": 1.2746089696884155, "learning_rate": 1.0275e-05, "loss": 0.4807, "step": 2057 }, { "epoch": 0.11524246836151865, "grad_norm": 1.04375159740448, "learning_rate": 1.0280000000000002e-05, "loss": 0.3857, "step": 2058 }, { "epoch": 0.11529846567364767, "grad_norm": 1.2364025115966797, "learning_rate": 1.0285e-05, "loss": 0.4123, "step": 2059 }, { "epoch": 0.11535446298577669, "grad_norm": 1.1593568325042725, "learning_rate": 1.0290000000000001e-05, "loss": 0.3481, "step": 2060 }, { "epoch": 0.1154104602979057, "grad_norm": 1.2940306663513184, "learning_rate": 1.0295e-05, "loss": 0.4622, "step": 2061 }, { "epoch": 0.11546645761003471, "grad_norm": 1.0955636501312256, "learning_rate": 1.03e-05, "loss": 0.3879, "step": 2062 }, { "epoch": 0.11552245492216373, "grad_norm": 1.4514633417129517, "learning_rate": 1.0305e-05, "loss": 0.3795, "step": 2063 }, { "epoch": 0.11557845223429275, "grad_norm": 1.148901104927063, "learning_rate": 1.031e-05, "loss": 0.4643, "step": 2064 }, { "epoch": 0.11563444954642177, "grad_norm": 1.4800533056259155, "learning_rate": 1.0315e-05, "loss": 0.4183, "step": 2065 }, { "epoch": 0.11569044685855079, "grad_norm": 1.5105128288269043, "learning_rate": 1.0320000000000001e-05, "loss": 0.6919, "step": 2066 }, { "epoch": 0.11574644417067981, "grad_norm": 1.1953701972961426, "learning_rate": 1.0325e-05, "loss": 0.4238, "step": 2067 }, { "epoch": 0.11580244148280883, "grad_norm": 1.1766806840896606, "learning_rate": 1.0330000000000001e-05, "loss": 0.3962, "step": 2068 }, { "epoch": 0.11585843879493785, "grad_norm": 1.2753911018371582, "learning_rate": 1.0335e-05, "loss": 0.5049, "step": 2069 }, { "epoch": 0.11591443610706686, "grad_norm": 1.0954228639602661, "learning_rate": 1.0340000000000001e-05, "loss": 0.4322, "step": 2070 }, { "epoch": 0.11597043341919588, "grad_norm": 1.5553898811340332, "learning_rate": 1.0345e-05, "loss": 0.4716, "step": 2071 }, { "epoch": 0.1160264307313249, "grad_norm": 1.0009772777557373, "learning_rate": 1.035e-05, "loss": 0.3666, "step": 2072 }, { "epoch": 0.11608242804345391, "grad_norm": 1.017662525177002, "learning_rate": 1.0355000000000002e-05, "loss": 0.4066, "step": 2073 }, { "epoch": 0.11613842535558293, "grad_norm": 1.2353122234344482, "learning_rate": 1.036e-05, "loss": 0.5978, "step": 2074 }, { "epoch": 0.11619442266771195, "grad_norm": 1.3603179454803467, "learning_rate": 1.0365000000000002e-05, "loss": 0.5137, "step": 2075 }, { "epoch": 0.11625041997984097, "grad_norm": 1.1561442613601685, "learning_rate": 1.037e-05, "loss": 0.5091, "step": 2076 }, { "epoch": 0.11630641729196999, "grad_norm": 1.3805582523345947, "learning_rate": 1.0375e-05, "loss": 0.3834, "step": 2077 }, { "epoch": 0.116362414604099, "grad_norm": 1.298583745956421, "learning_rate": 1.038e-05, "loss": 0.4891, "step": 2078 }, { "epoch": 0.11641841191622802, "grad_norm": 1.1770060062408447, "learning_rate": 1.0385e-05, "loss": 0.513, "step": 2079 }, { "epoch": 0.11647440922835704, "grad_norm": 1.1579817533493042, "learning_rate": 1.039e-05, "loss": 0.3686, "step": 2080 }, { "epoch": 0.11653040654048606, "grad_norm": 0.9921775460243225, "learning_rate": 1.0395000000000001e-05, "loss": 0.3811, "step": 2081 }, { "epoch": 0.11658640385261508, "grad_norm": 1.3118376731872559, "learning_rate": 1.04e-05, "loss": 0.4713, "step": 2082 }, { "epoch": 0.11664240116474409, "grad_norm": 1.1961724758148193, "learning_rate": 1.0405000000000001e-05, "loss": 0.507, "step": 2083 }, { "epoch": 0.1166983984768731, "grad_norm": 1.0339444875717163, "learning_rate": 1.041e-05, "loss": 0.4746, "step": 2084 }, { "epoch": 0.11675439578900212, "grad_norm": 1.3444161415100098, "learning_rate": 1.0415000000000001e-05, "loss": 0.5601, "step": 2085 }, { "epoch": 0.11681039310113114, "grad_norm": 1.2207037210464478, "learning_rate": 1.042e-05, "loss": 0.6489, "step": 2086 }, { "epoch": 0.11686639041326016, "grad_norm": 1.1908115148544312, "learning_rate": 1.0425e-05, "loss": 0.4842, "step": 2087 }, { "epoch": 0.11692238772538918, "grad_norm": 1.1665292978286743, "learning_rate": 1.043e-05, "loss": 0.3457, "step": 2088 }, { "epoch": 0.1169783850375182, "grad_norm": 1.1667604446411133, "learning_rate": 1.0435000000000001e-05, "loss": 0.4657, "step": 2089 }, { "epoch": 0.11703438234964722, "grad_norm": 1.1318879127502441, "learning_rate": 1.0440000000000002e-05, "loss": 0.5898, "step": 2090 }, { "epoch": 0.11709037966177624, "grad_norm": 1.0579155683517456, "learning_rate": 1.0445e-05, "loss": 0.3713, "step": 2091 }, { "epoch": 0.11714637697390526, "grad_norm": 1.1594823598861694, "learning_rate": 1.045e-05, "loss": 0.5215, "step": 2092 }, { "epoch": 0.11720237428603426, "grad_norm": 1.0567612648010254, "learning_rate": 1.0455e-05, "loss": 0.3901, "step": 2093 }, { "epoch": 0.11725837159816328, "grad_norm": 1.176743507385254, "learning_rate": 1.046e-05, "loss": 0.522, "step": 2094 }, { "epoch": 0.1173143689102923, "grad_norm": 1.27535879611969, "learning_rate": 1.0465e-05, "loss": 0.3966, "step": 2095 }, { "epoch": 0.11737036622242132, "grad_norm": 1.0443687438964844, "learning_rate": 1.0470000000000001e-05, "loss": 0.3516, "step": 2096 }, { "epoch": 0.11742636353455034, "grad_norm": 1.2144715785980225, "learning_rate": 1.0475e-05, "loss": 0.3338, "step": 2097 }, { "epoch": 0.11748236084667936, "grad_norm": 1.1741278171539307, "learning_rate": 1.0480000000000001e-05, "loss": 0.5254, "step": 2098 }, { "epoch": 0.11753835815880838, "grad_norm": 1.267101526260376, "learning_rate": 1.0485e-05, "loss": 0.5502, "step": 2099 }, { "epoch": 0.1175943554709374, "grad_norm": 1.103272557258606, "learning_rate": 1.049e-05, "loss": 0.4883, "step": 2100 }, { "epoch": 0.11765035278306642, "grad_norm": 1.1672580242156982, "learning_rate": 1.0495e-05, "loss": 0.407, "step": 2101 }, { "epoch": 0.11770635009519544, "grad_norm": 1.3263310194015503, "learning_rate": 1.05e-05, "loss": 0.5444, "step": 2102 }, { "epoch": 0.11776234740732446, "grad_norm": 1.1278438568115234, "learning_rate": 1.0505e-05, "loss": 0.4182, "step": 2103 }, { "epoch": 0.11781834471945346, "grad_norm": 1.1948771476745605, "learning_rate": 1.0510000000000001e-05, "loss": 0.3452, "step": 2104 }, { "epoch": 0.11787434203158248, "grad_norm": 1.6579550504684448, "learning_rate": 1.0515e-05, "loss": 0.4885, "step": 2105 }, { "epoch": 0.1179303393437115, "grad_norm": 1.0571820735931396, "learning_rate": 1.0520000000000001e-05, "loss": 0.3004, "step": 2106 }, { "epoch": 0.11798633665584052, "grad_norm": 1.228864073753357, "learning_rate": 1.0525e-05, "loss": 0.4447, "step": 2107 }, { "epoch": 0.11804233396796954, "grad_norm": 1.2009637355804443, "learning_rate": 1.053e-05, "loss": 0.5708, "step": 2108 }, { "epoch": 0.11809833128009856, "grad_norm": 1.3223775625228882, "learning_rate": 1.0535e-05, "loss": 0.4903, "step": 2109 }, { "epoch": 0.11815432859222758, "grad_norm": 1.6229612827301025, "learning_rate": 1.0539999999999999e-05, "loss": 0.4234, "step": 2110 }, { "epoch": 0.1182103259043566, "grad_norm": 1.176546573638916, "learning_rate": 1.0545000000000002e-05, "loss": 0.4466, "step": 2111 }, { "epoch": 0.11826632321648561, "grad_norm": 1.011500358581543, "learning_rate": 1.055e-05, "loss": 0.3573, "step": 2112 }, { "epoch": 0.11832232052861463, "grad_norm": 1.4040199518203735, "learning_rate": 1.0555000000000001e-05, "loss": 0.5961, "step": 2113 }, { "epoch": 0.11837831784074364, "grad_norm": 1.226555585861206, "learning_rate": 1.056e-05, "loss": 0.386, "step": 2114 }, { "epoch": 0.11843431515287266, "grad_norm": 0.9699138402938843, "learning_rate": 1.0565e-05, "loss": 0.3447, "step": 2115 }, { "epoch": 0.11849031246500168, "grad_norm": 1.2999788522720337, "learning_rate": 1.057e-05, "loss": 0.4002, "step": 2116 }, { "epoch": 0.1185463097771307, "grad_norm": 1.2441914081573486, "learning_rate": 1.0575e-05, "loss": 0.4244, "step": 2117 }, { "epoch": 0.11860230708925971, "grad_norm": 1.3216584920883179, "learning_rate": 1.058e-05, "loss": 0.753, "step": 2118 }, { "epoch": 0.11865830440138873, "grad_norm": 1.2601038217544556, "learning_rate": 1.0585000000000001e-05, "loss": 0.4507, "step": 2119 }, { "epoch": 0.11871430171351775, "grad_norm": 1.233767032623291, "learning_rate": 1.059e-05, "loss": 0.44, "step": 2120 }, { "epoch": 0.11877029902564677, "grad_norm": 1.421810269355774, "learning_rate": 1.0595000000000001e-05, "loss": 0.4378, "step": 2121 }, { "epoch": 0.11882629633777579, "grad_norm": 1.1413922309875488, "learning_rate": 1.06e-05, "loss": 0.4579, "step": 2122 }, { "epoch": 0.11888229364990481, "grad_norm": 1.8250983953475952, "learning_rate": 1.0605000000000001e-05, "loss": 0.4029, "step": 2123 }, { "epoch": 0.11893829096203382, "grad_norm": 1.1074143648147583, "learning_rate": 1.061e-05, "loss": 0.4235, "step": 2124 }, { "epoch": 0.11899428827416283, "grad_norm": 1.3174351453781128, "learning_rate": 1.0615e-05, "loss": 0.4614, "step": 2125 }, { "epoch": 0.11905028558629185, "grad_norm": 1.3979164361953735, "learning_rate": 1.062e-05, "loss": 0.5064, "step": 2126 }, { "epoch": 0.11910628289842087, "grad_norm": 1.1409600973129272, "learning_rate": 1.0625e-05, "loss": 0.3579, "step": 2127 }, { "epoch": 0.11916228021054989, "grad_norm": 1.1139148473739624, "learning_rate": 1.0630000000000002e-05, "loss": 0.4408, "step": 2128 }, { "epoch": 0.11921827752267891, "grad_norm": 1.3749010562896729, "learning_rate": 1.0635e-05, "loss": 0.4861, "step": 2129 }, { "epoch": 0.11927427483480793, "grad_norm": 1.289667010307312, "learning_rate": 1.064e-05, "loss": 0.4676, "step": 2130 }, { "epoch": 0.11933027214693695, "grad_norm": 1.2668863534927368, "learning_rate": 1.0645e-05, "loss": 0.6976, "step": 2131 }, { "epoch": 0.11938626945906597, "grad_norm": 1.4886201620101929, "learning_rate": 1.065e-05, "loss": 0.5122, "step": 2132 }, { "epoch": 0.11944226677119499, "grad_norm": 15.780389785766602, "learning_rate": 1.0655e-05, "loss": 0.3455, "step": 2133 }, { "epoch": 0.11949826408332401, "grad_norm": 1.3982264995574951, "learning_rate": 1.0660000000000001e-05, "loss": 0.569, "step": 2134 }, { "epoch": 0.11955426139545301, "grad_norm": 1.2477631568908691, "learning_rate": 1.0665e-05, "loss": 0.4225, "step": 2135 }, { "epoch": 0.11961025870758203, "grad_norm": 1.1074628829956055, "learning_rate": 1.0670000000000001e-05, "loss": 0.3849, "step": 2136 }, { "epoch": 0.11966625601971105, "grad_norm": 1.1565700769424438, "learning_rate": 1.0675e-05, "loss": 0.3819, "step": 2137 }, { "epoch": 0.11972225333184007, "grad_norm": 1.2876352071762085, "learning_rate": 1.0680000000000001e-05, "loss": 0.4623, "step": 2138 }, { "epoch": 0.11977825064396909, "grad_norm": 1.0950559377670288, "learning_rate": 1.0685e-05, "loss": 0.3622, "step": 2139 }, { "epoch": 0.11983424795609811, "grad_norm": 1.1870139837265015, "learning_rate": 1.069e-05, "loss": 0.3958, "step": 2140 }, { "epoch": 0.11989024526822713, "grad_norm": 1.1585975885391235, "learning_rate": 1.0695e-05, "loss": 0.3928, "step": 2141 }, { "epoch": 0.11994624258035615, "grad_norm": 1.1703457832336426, "learning_rate": 1.0700000000000001e-05, "loss": 0.3748, "step": 2142 }, { "epoch": 0.12000223989248517, "grad_norm": 1.3534200191497803, "learning_rate": 1.0705000000000002e-05, "loss": 0.4758, "step": 2143 }, { "epoch": 0.12005823720461418, "grad_norm": 1.3202478885650635, "learning_rate": 1.071e-05, "loss": 0.461, "step": 2144 }, { "epoch": 0.12011423451674319, "grad_norm": 1.2206354141235352, "learning_rate": 1.0715e-05, "loss": 0.3884, "step": 2145 }, { "epoch": 0.12017023182887221, "grad_norm": 1.0906764268875122, "learning_rate": 1.072e-05, "loss": 0.431, "step": 2146 }, { "epoch": 0.12022622914100123, "grad_norm": 1.2466380596160889, "learning_rate": 1.0725e-05, "loss": 0.4785, "step": 2147 }, { "epoch": 0.12028222645313025, "grad_norm": 1.2545379400253296, "learning_rate": 1.073e-05, "loss": 0.4105, "step": 2148 }, { "epoch": 0.12033822376525927, "grad_norm": 1.18149733543396, "learning_rate": 1.0735000000000001e-05, "loss": 0.5563, "step": 2149 }, { "epoch": 0.12039422107738829, "grad_norm": 1.1253278255462646, "learning_rate": 1.074e-05, "loss": 0.4171, "step": 2150 }, { "epoch": 0.1204502183895173, "grad_norm": 1.4532430171966553, "learning_rate": 1.0745000000000001e-05, "loss": 0.4619, "step": 2151 }, { "epoch": 0.12050621570164632, "grad_norm": 1.6640323400497437, "learning_rate": 1.075e-05, "loss": 0.494, "step": 2152 }, { "epoch": 0.12056221301377534, "grad_norm": 1.1227400302886963, "learning_rate": 1.0755000000000001e-05, "loss": 0.362, "step": 2153 }, { "epoch": 0.12061821032590436, "grad_norm": 0.9881584048271179, "learning_rate": 1.076e-05, "loss": 0.3443, "step": 2154 }, { "epoch": 0.12067420763803337, "grad_norm": 1.3102385997772217, "learning_rate": 1.0765e-05, "loss": 0.489, "step": 2155 }, { "epoch": 0.12073020495016239, "grad_norm": 1.0849230289459229, "learning_rate": 1.077e-05, "loss": 0.4264, "step": 2156 }, { "epoch": 0.1207862022622914, "grad_norm": 1.1320273876190186, "learning_rate": 1.0775000000000001e-05, "loss": 0.4323, "step": 2157 }, { "epoch": 0.12084219957442043, "grad_norm": 1.3121912479400635, "learning_rate": 1.0780000000000002e-05, "loss": 0.5436, "step": 2158 }, { "epoch": 0.12089819688654944, "grad_norm": 1.8350551128387451, "learning_rate": 1.0785000000000001e-05, "loss": 0.4331, "step": 2159 }, { "epoch": 0.12095419419867846, "grad_norm": 1.2972077131271362, "learning_rate": 1.079e-05, "loss": 0.3937, "step": 2160 }, { "epoch": 0.12101019151080748, "grad_norm": 1.2910444736480713, "learning_rate": 1.0795e-05, "loss": 0.4605, "step": 2161 }, { "epoch": 0.1210661888229365, "grad_norm": 1.1541708707809448, "learning_rate": 1.08e-05, "loss": 0.3115, "step": 2162 }, { "epoch": 0.12112218613506552, "grad_norm": 1.3426966667175293, "learning_rate": 1.0804999999999999e-05, "loss": 0.4149, "step": 2163 }, { "epoch": 0.12117818344719454, "grad_norm": 1.162108302116394, "learning_rate": 1.081e-05, "loss": 0.3712, "step": 2164 }, { "epoch": 0.12123418075932356, "grad_norm": 1.4027280807495117, "learning_rate": 1.0815e-05, "loss": 0.4655, "step": 2165 }, { "epoch": 0.12129017807145256, "grad_norm": 1.7777239084243774, "learning_rate": 1.0820000000000001e-05, "loss": 0.5956, "step": 2166 }, { "epoch": 0.12134617538358158, "grad_norm": 1.2932366132736206, "learning_rate": 1.0825e-05, "loss": 0.4147, "step": 2167 }, { "epoch": 0.1214021726957106, "grad_norm": 1.4258068799972534, "learning_rate": 1.083e-05, "loss": 0.5727, "step": 2168 }, { "epoch": 0.12145817000783962, "grad_norm": 1.4387284517288208, "learning_rate": 1.0835e-05, "loss": 0.4624, "step": 2169 }, { "epoch": 0.12151416731996864, "grad_norm": 1.250396490097046, "learning_rate": 1.084e-05, "loss": 0.4046, "step": 2170 }, { "epoch": 0.12157016463209766, "grad_norm": 1.2278938293457031, "learning_rate": 1.0845e-05, "loss": 0.4158, "step": 2171 }, { "epoch": 0.12162616194422668, "grad_norm": 1.2678710222244263, "learning_rate": 1.0850000000000001e-05, "loss": 0.3746, "step": 2172 }, { "epoch": 0.1216821592563557, "grad_norm": 1.1719387769699097, "learning_rate": 1.0855e-05, "loss": 0.3382, "step": 2173 }, { "epoch": 0.12173815656848472, "grad_norm": 1.2299120426177979, "learning_rate": 1.0860000000000001e-05, "loss": 0.4751, "step": 2174 }, { "epoch": 0.12179415388061374, "grad_norm": 1.2288856506347656, "learning_rate": 1.0865e-05, "loss": 0.5048, "step": 2175 }, { "epoch": 0.12185015119274274, "grad_norm": 1.4430570602416992, "learning_rate": 1.0870000000000001e-05, "loss": 0.4628, "step": 2176 }, { "epoch": 0.12190614850487176, "grad_norm": 1.255171775817871, "learning_rate": 1.0875e-05, "loss": 0.405, "step": 2177 }, { "epoch": 0.12196214581700078, "grad_norm": 1.0752614736557007, "learning_rate": 1.088e-05, "loss": 0.5331, "step": 2178 }, { "epoch": 0.1220181431291298, "grad_norm": 1.3410518169403076, "learning_rate": 1.0885e-05, "loss": 0.4597, "step": 2179 }, { "epoch": 0.12207414044125882, "grad_norm": 1.2840832471847534, "learning_rate": 1.089e-05, "loss": 0.5856, "step": 2180 }, { "epoch": 0.12213013775338784, "grad_norm": 1.24722158908844, "learning_rate": 1.0895000000000002e-05, "loss": 0.4622, "step": 2181 }, { "epoch": 0.12218613506551686, "grad_norm": 1.6612390279769897, "learning_rate": 1.09e-05, "loss": 0.605, "step": 2182 }, { "epoch": 0.12224213237764588, "grad_norm": 1.183188796043396, "learning_rate": 1.0905e-05, "loss": 0.4119, "step": 2183 }, { "epoch": 0.1222981296897749, "grad_norm": 1.1382944583892822, "learning_rate": 1.091e-05, "loss": 0.4015, "step": 2184 }, { "epoch": 0.12235412700190391, "grad_norm": 1.2495747804641724, "learning_rate": 1.0915e-05, "loss": 0.3978, "step": 2185 }, { "epoch": 0.12241012431403292, "grad_norm": 1.3227219581604004, "learning_rate": 1.092e-05, "loss": 0.4539, "step": 2186 }, { "epoch": 0.12246612162616194, "grad_norm": 1.1677227020263672, "learning_rate": 1.0925000000000001e-05, "loss": 0.35, "step": 2187 }, { "epoch": 0.12252211893829096, "grad_norm": 1.3256142139434814, "learning_rate": 1.093e-05, "loss": 0.4132, "step": 2188 }, { "epoch": 0.12257811625041998, "grad_norm": 1.0301586389541626, "learning_rate": 1.0935000000000001e-05, "loss": 0.4447, "step": 2189 }, { "epoch": 0.122634113562549, "grad_norm": 1.344541311264038, "learning_rate": 1.094e-05, "loss": 0.5965, "step": 2190 }, { "epoch": 0.12269011087467802, "grad_norm": 1.0082032680511475, "learning_rate": 1.0945000000000001e-05, "loss": 0.3608, "step": 2191 }, { "epoch": 0.12274610818680703, "grad_norm": 1.1168889999389648, "learning_rate": 1.095e-05, "loss": 0.4109, "step": 2192 }, { "epoch": 0.12280210549893605, "grad_norm": 1.1556227207183838, "learning_rate": 1.0955e-05, "loss": 0.357, "step": 2193 }, { "epoch": 0.12285810281106507, "grad_norm": 1.1768267154693604, "learning_rate": 1.096e-05, "loss": 0.527, "step": 2194 }, { "epoch": 0.12291410012319409, "grad_norm": 1.3603123426437378, "learning_rate": 1.0965000000000001e-05, "loss": 0.5147, "step": 2195 }, { "epoch": 0.12297009743532311, "grad_norm": 1.364448070526123, "learning_rate": 1.0970000000000002e-05, "loss": 0.451, "step": 2196 }, { "epoch": 0.12302609474745212, "grad_norm": 1.1654924154281616, "learning_rate": 1.0975e-05, "loss": 0.3761, "step": 2197 }, { "epoch": 0.12308209205958114, "grad_norm": 1.1860575675964355, "learning_rate": 1.098e-05, "loss": 0.4211, "step": 2198 }, { "epoch": 0.12313808937171015, "grad_norm": 1.3204054832458496, "learning_rate": 1.0985e-05, "loss": 0.6236, "step": 2199 }, { "epoch": 0.12319408668383917, "grad_norm": 1.0727225542068481, "learning_rate": 1.099e-05, "loss": 0.4035, "step": 2200 }, { "epoch": 0.1232500839959682, "grad_norm": 1.1432641744613647, "learning_rate": 1.0995e-05, "loss": 0.3848, "step": 2201 }, { "epoch": 0.12330608130809721, "grad_norm": 1.2635337114334106, "learning_rate": 1.1000000000000001e-05, "loss": 0.4212, "step": 2202 }, { "epoch": 0.12336207862022623, "grad_norm": 1.2043837308883667, "learning_rate": 1.1005e-05, "loss": 0.5291, "step": 2203 }, { "epoch": 0.12341807593235525, "grad_norm": 1.2756997346878052, "learning_rate": 1.1010000000000001e-05, "loss": 0.4819, "step": 2204 }, { "epoch": 0.12347407324448427, "grad_norm": 1.1286869049072266, "learning_rate": 1.1015e-05, "loss": 0.3912, "step": 2205 }, { "epoch": 0.12353007055661329, "grad_norm": 1.1184715032577515, "learning_rate": 1.1020000000000001e-05, "loss": 0.3552, "step": 2206 }, { "epoch": 0.1235860678687423, "grad_norm": 1.805359959602356, "learning_rate": 1.1025e-05, "loss": 0.5879, "step": 2207 }, { "epoch": 0.12364206518087131, "grad_norm": 1.4591259956359863, "learning_rate": 1.103e-05, "loss": 0.6414, "step": 2208 }, { "epoch": 0.12369806249300033, "grad_norm": 1.0697208642959595, "learning_rate": 1.1035e-05, "loss": 0.3618, "step": 2209 }, { "epoch": 0.12375405980512935, "grad_norm": 1.4364018440246582, "learning_rate": 1.1040000000000001e-05, "loss": 0.4088, "step": 2210 }, { "epoch": 0.12381005711725837, "grad_norm": 1.1499199867248535, "learning_rate": 1.1045000000000002e-05, "loss": 0.371, "step": 2211 }, { "epoch": 0.12386605442938739, "grad_norm": 1.1446573734283447, "learning_rate": 1.1050000000000001e-05, "loss": 0.3733, "step": 2212 }, { "epoch": 0.12392205174151641, "grad_norm": 1.2948927879333496, "learning_rate": 1.1055e-05, "loss": 0.5075, "step": 2213 }, { "epoch": 0.12397804905364543, "grad_norm": 1.4152624607086182, "learning_rate": 1.106e-05, "loss": 0.5764, "step": 2214 }, { "epoch": 0.12403404636577445, "grad_norm": 1.479748249053955, "learning_rate": 1.1065e-05, "loss": 0.5262, "step": 2215 }, { "epoch": 0.12409004367790347, "grad_norm": 1.7374500036239624, "learning_rate": 1.107e-05, "loss": 0.5617, "step": 2216 }, { "epoch": 0.12414604099003247, "grad_norm": 1.1393375396728516, "learning_rate": 1.1075e-05, "loss": 0.4754, "step": 2217 }, { "epoch": 0.12420203830216149, "grad_norm": 1.1788643598556519, "learning_rate": 1.108e-05, "loss": 0.4177, "step": 2218 }, { "epoch": 0.12425803561429051, "grad_norm": 1.2186473608016968, "learning_rate": 1.1085000000000001e-05, "loss": 0.6167, "step": 2219 }, { "epoch": 0.12431403292641953, "grad_norm": 1.2026934623718262, "learning_rate": 1.109e-05, "loss": 0.3933, "step": 2220 }, { "epoch": 0.12437003023854855, "grad_norm": 1.1073105335235596, "learning_rate": 1.1095e-05, "loss": 0.5107, "step": 2221 }, { "epoch": 0.12442602755067757, "grad_norm": 1.339430570602417, "learning_rate": 1.11e-05, "loss": 0.4354, "step": 2222 }, { "epoch": 0.12448202486280659, "grad_norm": 1.357672095298767, "learning_rate": 1.1105e-05, "loss": 0.4647, "step": 2223 }, { "epoch": 0.1245380221749356, "grad_norm": 1.2059109210968018, "learning_rate": 1.111e-05, "loss": 0.4529, "step": 2224 }, { "epoch": 0.12459401948706463, "grad_norm": 1.2800805568695068, "learning_rate": 1.1115000000000001e-05, "loss": 0.4615, "step": 2225 }, { "epoch": 0.12465001679919364, "grad_norm": 1.1725817918777466, "learning_rate": 1.112e-05, "loss": 0.3642, "step": 2226 }, { "epoch": 0.12470601411132266, "grad_norm": 1.4158438444137573, "learning_rate": 1.1125000000000001e-05, "loss": 0.3824, "step": 2227 }, { "epoch": 0.12476201142345167, "grad_norm": 1.0686370134353638, "learning_rate": 1.113e-05, "loss": 0.4248, "step": 2228 }, { "epoch": 0.12481800873558069, "grad_norm": 1.151619553565979, "learning_rate": 1.1135000000000001e-05, "loss": 0.4378, "step": 2229 }, { "epoch": 0.1248740060477097, "grad_norm": 1.1525603532791138, "learning_rate": 1.114e-05, "loss": 0.3432, "step": 2230 }, { "epoch": 0.12493000335983873, "grad_norm": 1.104702353477478, "learning_rate": 1.1145e-05, "loss": 0.4535, "step": 2231 }, { "epoch": 0.12498600067196775, "grad_norm": 1.0402547121047974, "learning_rate": 1.115e-05, "loss": 0.3269, "step": 2232 }, { "epoch": 0.12504199798409676, "grad_norm": 1.2609214782714844, "learning_rate": 1.1155e-05, "loss": 0.4081, "step": 2233 }, { "epoch": 0.12509799529622578, "grad_norm": 1.3451021909713745, "learning_rate": 1.1160000000000002e-05, "loss": 0.5328, "step": 2234 }, { "epoch": 0.1251539926083548, "grad_norm": 1.1217625141143799, "learning_rate": 1.1165e-05, "loss": 0.4598, "step": 2235 }, { "epoch": 0.12520998992048382, "grad_norm": 1.4307420253753662, "learning_rate": 1.117e-05, "loss": 0.4804, "step": 2236 }, { "epoch": 0.12526598723261284, "grad_norm": 1.2126822471618652, "learning_rate": 1.1175e-05, "loss": 0.5237, "step": 2237 }, { "epoch": 0.12532198454474186, "grad_norm": 2.062070846557617, "learning_rate": 1.118e-05, "loss": 0.3978, "step": 2238 }, { "epoch": 0.12537798185687088, "grad_norm": 1.2071523666381836, "learning_rate": 1.1185e-05, "loss": 0.421, "step": 2239 }, { "epoch": 0.1254339791689999, "grad_norm": 1.3621774911880493, "learning_rate": 1.1190000000000001e-05, "loss": 0.4159, "step": 2240 }, { "epoch": 0.12548997648112892, "grad_norm": 1.3251384496688843, "learning_rate": 1.1195e-05, "loss": 0.5032, "step": 2241 }, { "epoch": 0.12554597379325794, "grad_norm": 1.3219131231307983, "learning_rate": 1.1200000000000001e-05, "loss": 0.4851, "step": 2242 }, { "epoch": 0.12560197110538693, "grad_norm": 1.1742477416992188, "learning_rate": 1.1205e-05, "loss": 0.415, "step": 2243 }, { "epoch": 0.12565796841751595, "grad_norm": 1.6238731145858765, "learning_rate": 1.1210000000000001e-05, "loss": 0.5411, "step": 2244 }, { "epoch": 0.12571396572964497, "grad_norm": 1.1181552410125732, "learning_rate": 1.1215e-05, "loss": 0.5741, "step": 2245 }, { "epoch": 0.12576996304177399, "grad_norm": 1.1299830675125122, "learning_rate": 1.122e-05, "loss": 0.3773, "step": 2246 }, { "epoch": 0.125825960353903, "grad_norm": 1.1737785339355469, "learning_rate": 1.1225e-05, "loss": 0.4077, "step": 2247 }, { "epoch": 0.12588195766603202, "grad_norm": 1.0765039920806885, "learning_rate": 1.1230000000000001e-05, "loss": 0.5425, "step": 2248 }, { "epoch": 0.12593795497816104, "grad_norm": 1.0482008457183838, "learning_rate": 1.1235000000000002e-05, "loss": 0.403, "step": 2249 }, { "epoch": 0.12599395229029006, "grad_norm": 1.0707420110702515, "learning_rate": 1.124e-05, "loss": 0.4714, "step": 2250 }, { "epoch": 0.12604994960241908, "grad_norm": 1.2540937662124634, "learning_rate": 1.1245e-05, "loss": 0.3512, "step": 2251 }, { "epoch": 0.1261059469145481, "grad_norm": 1.2286819219589233, "learning_rate": 1.125e-05, "loss": 0.397, "step": 2252 }, { "epoch": 0.12616194422667712, "grad_norm": 1.150970458984375, "learning_rate": 1.1255e-05, "loss": 0.4503, "step": 2253 }, { "epoch": 0.12621794153880614, "grad_norm": 1.4410957098007202, "learning_rate": 1.126e-05, "loss": 0.5917, "step": 2254 }, { "epoch": 0.12627393885093516, "grad_norm": 1.1711270809173584, "learning_rate": 1.1265e-05, "loss": 0.4428, "step": 2255 }, { "epoch": 0.12632993616306418, "grad_norm": 1.3283476829528809, "learning_rate": 1.127e-05, "loss": 0.5402, "step": 2256 }, { "epoch": 0.1263859334751932, "grad_norm": 1.4431616067886353, "learning_rate": 1.1275000000000001e-05, "loss": 0.5657, "step": 2257 }, { "epoch": 0.12644193078732222, "grad_norm": 1.122452974319458, "learning_rate": 1.128e-05, "loss": 0.4835, "step": 2258 }, { "epoch": 0.12649792809945123, "grad_norm": 1.05092191696167, "learning_rate": 1.1285000000000001e-05, "loss": 0.4517, "step": 2259 }, { "epoch": 0.12655392541158025, "grad_norm": 1.0658460855484009, "learning_rate": 1.129e-05, "loss": 0.4475, "step": 2260 }, { "epoch": 0.12660992272370927, "grad_norm": 1.3967729806900024, "learning_rate": 1.1295e-05, "loss": 0.4501, "step": 2261 }, { "epoch": 0.1266659200358383, "grad_norm": 1.4232945442199707, "learning_rate": 1.13e-05, "loss": 0.5232, "step": 2262 }, { "epoch": 0.1267219173479673, "grad_norm": 1.4301387071609497, "learning_rate": 1.1305000000000001e-05, "loss": 0.6171, "step": 2263 }, { "epoch": 0.1267779146600963, "grad_norm": 1.4325543642044067, "learning_rate": 1.1310000000000002e-05, "loss": 0.5432, "step": 2264 }, { "epoch": 0.12683391197222532, "grad_norm": 1.292751669883728, "learning_rate": 1.1315000000000001e-05, "loss": 0.5031, "step": 2265 }, { "epoch": 0.12688990928435434, "grad_norm": 1.2938718795776367, "learning_rate": 1.132e-05, "loss": 0.5866, "step": 2266 }, { "epoch": 0.12694590659648336, "grad_norm": 1.285792589187622, "learning_rate": 1.1325e-05, "loss": 0.4874, "step": 2267 }, { "epoch": 0.12700190390861238, "grad_norm": 1.2472320795059204, "learning_rate": 1.133e-05, "loss": 0.5701, "step": 2268 }, { "epoch": 0.1270579012207414, "grad_norm": 1.1968580484390259, "learning_rate": 1.1335e-05, "loss": 0.4281, "step": 2269 }, { "epoch": 0.12711389853287042, "grad_norm": 1.3408206701278687, "learning_rate": 1.134e-05, "loss": 0.4181, "step": 2270 }, { "epoch": 0.12716989584499944, "grad_norm": 1.2121702432632446, "learning_rate": 1.1345e-05, "loss": 0.4036, "step": 2271 }, { "epoch": 0.12722589315712846, "grad_norm": 1.0712432861328125, "learning_rate": 1.1350000000000001e-05, "loss": 0.4192, "step": 2272 }, { "epoch": 0.12728189046925747, "grad_norm": 1.1871991157531738, "learning_rate": 1.1355e-05, "loss": 0.3987, "step": 2273 }, { "epoch": 0.1273378877813865, "grad_norm": 1.0942261219024658, "learning_rate": 1.1360000000000001e-05, "loss": 0.4153, "step": 2274 }, { "epoch": 0.1273938850935155, "grad_norm": 1.2354952096939087, "learning_rate": 1.1365e-05, "loss": 0.5993, "step": 2275 }, { "epoch": 0.12744988240564453, "grad_norm": 1.4431953430175781, "learning_rate": 1.137e-05, "loss": 0.4021, "step": 2276 }, { "epoch": 0.12750587971777355, "grad_norm": 1.1220649480819702, "learning_rate": 1.1375e-05, "loss": 0.4295, "step": 2277 }, { "epoch": 0.12756187702990257, "grad_norm": 1.1681586503982544, "learning_rate": 1.1380000000000001e-05, "loss": 0.4202, "step": 2278 }, { "epoch": 0.1276178743420316, "grad_norm": 1.1978394985198975, "learning_rate": 1.1385000000000002e-05, "loss": 0.5624, "step": 2279 }, { "epoch": 0.1276738716541606, "grad_norm": 1.1839028596878052, "learning_rate": 1.1390000000000001e-05, "loss": 0.4514, "step": 2280 }, { "epoch": 0.12772986896628963, "grad_norm": 1.1288633346557617, "learning_rate": 1.1395e-05, "loss": 0.5819, "step": 2281 }, { "epoch": 0.12778586627841865, "grad_norm": 0.9166799783706665, "learning_rate": 1.1400000000000001e-05, "loss": 0.3726, "step": 2282 }, { "epoch": 0.12784186359054767, "grad_norm": 1.317753791809082, "learning_rate": 1.1405e-05, "loss": 0.3707, "step": 2283 }, { "epoch": 0.12789786090267666, "grad_norm": 1.3430582284927368, "learning_rate": 1.141e-05, "loss": 0.5908, "step": 2284 }, { "epoch": 0.12795385821480568, "grad_norm": 1.2576439380645752, "learning_rate": 1.1415e-05, "loss": 0.5255, "step": 2285 }, { "epoch": 0.1280098555269347, "grad_norm": 13.189178466796875, "learning_rate": 1.142e-05, "loss": 0.4606, "step": 2286 }, { "epoch": 0.12806585283906372, "grad_norm": 1.1022242307662964, "learning_rate": 1.1425000000000002e-05, "loss": 0.3685, "step": 2287 }, { "epoch": 0.12812185015119273, "grad_norm": 1.0560503005981445, "learning_rate": 1.143e-05, "loss": 0.4096, "step": 2288 }, { "epoch": 0.12817784746332175, "grad_norm": 1.2018636465072632, "learning_rate": 1.1435e-05, "loss": 0.4187, "step": 2289 }, { "epoch": 0.12823384477545077, "grad_norm": 1.204622745513916, "learning_rate": 1.144e-05, "loss": 0.4063, "step": 2290 }, { "epoch": 0.1282898420875798, "grad_norm": 1.3194224834442139, "learning_rate": 1.1445e-05, "loss": 0.6101, "step": 2291 }, { "epoch": 0.1283458393997088, "grad_norm": 1.1247808933258057, "learning_rate": 1.145e-05, "loss": 0.4507, "step": 2292 }, { "epoch": 0.12840183671183783, "grad_norm": 1.17900550365448, "learning_rate": 1.1455000000000001e-05, "loss": 0.4489, "step": 2293 }, { "epoch": 0.12845783402396685, "grad_norm": 1.0201923847198486, "learning_rate": 1.146e-05, "loss": 0.4107, "step": 2294 }, { "epoch": 0.12851383133609587, "grad_norm": 1.1454983949661255, "learning_rate": 1.1465000000000001e-05, "loss": 0.4719, "step": 2295 }, { "epoch": 0.1285698286482249, "grad_norm": 1.003904104232788, "learning_rate": 1.147e-05, "loss": 0.3516, "step": 2296 }, { "epoch": 0.1286258259603539, "grad_norm": 1.0594944953918457, "learning_rate": 1.1475000000000001e-05, "loss": 0.4612, "step": 2297 }, { "epoch": 0.12868182327248293, "grad_norm": 1.2085590362548828, "learning_rate": 1.148e-05, "loss": 0.4192, "step": 2298 }, { "epoch": 0.12873782058461194, "grad_norm": 1.2151544094085693, "learning_rate": 1.1485e-05, "loss": 0.3859, "step": 2299 }, { "epoch": 0.12879381789674096, "grad_norm": 1.1104528903961182, "learning_rate": 1.149e-05, "loss": 0.387, "step": 2300 }, { "epoch": 0.12884981520886998, "grad_norm": 1.1801403760910034, "learning_rate": 1.1495000000000001e-05, "loss": 0.4741, "step": 2301 }, { "epoch": 0.128905812520999, "grad_norm": 1.357081651687622, "learning_rate": 1.1500000000000002e-05, "loss": 0.5217, "step": 2302 }, { "epoch": 0.12896180983312802, "grad_norm": 1.403161883354187, "learning_rate": 1.1505e-05, "loss": 0.5856, "step": 2303 }, { "epoch": 0.12901780714525704, "grad_norm": 1.230513334274292, "learning_rate": 1.151e-05, "loss": 0.4272, "step": 2304 }, { "epoch": 0.12907380445738603, "grad_norm": 1.083959937095642, "learning_rate": 1.1515e-05, "loss": 0.4267, "step": 2305 }, { "epoch": 0.12912980176951505, "grad_norm": 1.5055561065673828, "learning_rate": 1.152e-05, "loss": 0.5365, "step": 2306 }, { "epoch": 0.12918579908164407, "grad_norm": 1.0667353868484497, "learning_rate": 1.1525e-05, "loss": 0.3674, "step": 2307 }, { "epoch": 0.1292417963937731, "grad_norm": 1.5784897804260254, "learning_rate": 1.153e-05, "loss": 0.5412, "step": 2308 }, { "epoch": 0.1292977937059021, "grad_norm": 1.2324721813201904, "learning_rate": 1.1535e-05, "loss": 0.4253, "step": 2309 }, { "epoch": 0.12935379101803113, "grad_norm": 1.1168930530548096, "learning_rate": 1.1540000000000001e-05, "loss": 0.565, "step": 2310 }, { "epoch": 0.12940978833016015, "grad_norm": 4.88847541809082, "learning_rate": 1.1545e-05, "loss": 0.3856, "step": 2311 }, { "epoch": 0.12946578564228917, "grad_norm": 1.3440314531326294, "learning_rate": 1.1550000000000001e-05, "loss": 0.4282, "step": 2312 }, { "epoch": 0.12952178295441819, "grad_norm": 1.2424455881118774, "learning_rate": 1.1555e-05, "loss": 0.5565, "step": 2313 }, { "epoch": 0.1295777802665472, "grad_norm": 1.1632091999053955, "learning_rate": 1.156e-05, "loss": 0.5432, "step": 2314 }, { "epoch": 0.12963377757867622, "grad_norm": 1.2187398672103882, "learning_rate": 1.1565e-05, "loss": 0.5051, "step": 2315 }, { "epoch": 0.12968977489080524, "grad_norm": 1.359572172164917, "learning_rate": 1.1570000000000001e-05, "loss": 0.4581, "step": 2316 }, { "epoch": 0.12974577220293426, "grad_norm": 1.4037069082260132, "learning_rate": 1.1575000000000002e-05, "loss": 0.5692, "step": 2317 }, { "epoch": 0.12980176951506328, "grad_norm": 1.292794942855835, "learning_rate": 1.1580000000000001e-05, "loss": 0.4582, "step": 2318 }, { "epoch": 0.1298577668271923, "grad_norm": 1.0918874740600586, "learning_rate": 1.1585e-05, "loss": 0.3554, "step": 2319 }, { "epoch": 0.12991376413932132, "grad_norm": 1.0295218229293823, "learning_rate": 1.159e-05, "loss": 0.3185, "step": 2320 }, { "epoch": 0.12996976145145034, "grad_norm": 1.1888872385025024, "learning_rate": 1.1595e-05, "loss": 0.3827, "step": 2321 }, { "epoch": 0.13002575876357936, "grad_norm": 1.218995451927185, "learning_rate": 1.16e-05, "loss": 0.4396, "step": 2322 }, { "epoch": 0.13008175607570838, "grad_norm": 1.046060562133789, "learning_rate": 1.1605e-05, "loss": 0.3439, "step": 2323 }, { "epoch": 0.1301377533878374, "grad_norm": 1.1816738843917847, "learning_rate": 1.161e-05, "loss": 0.4889, "step": 2324 }, { "epoch": 0.13019375069996642, "grad_norm": 1.1769605875015259, "learning_rate": 1.1615000000000001e-05, "loss": 0.5111, "step": 2325 }, { "epoch": 0.1302497480120954, "grad_norm": 11.965676307678223, "learning_rate": 1.162e-05, "loss": 0.4498, "step": 2326 }, { "epoch": 0.13030574532422443, "grad_norm": 1.245835542678833, "learning_rate": 1.1625000000000001e-05, "loss": 0.4435, "step": 2327 }, { "epoch": 0.13036174263635344, "grad_norm": 1.261703372001648, "learning_rate": 1.163e-05, "loss": 0.3417, "step": 2328 }, { "epoch": 0.13041773994848246, "grad_norm": 1.4906578063964844, "learning_rate": 1.1635e-05, "loss": 0.4779, "step": 2329 }, { "epoch": 0.13047373726061148, "grad_norm": 1.5169082880020142, "learning_rate": 1.164e-05, "loss": 0.5212, "step": 2330 }, { "epoch": 0.1305297345727405, "grad_norm": 1.599611520767212, "learning_rate": 1.1645000000000001e-05, "loss": 0.4286, "step": 2331 }, { "epoch": 0.13058573188486952, "grad_norm": 1.9279402494430542, "learning_rate": 1.1650000000000002e-05, "loss": 0.466, "step": 2332 }, { "epoch": 0.13064172919699854, "grad_norm": 1.1912360191345215, "learning_rate": 1.1655000000000001e-05, "loss": 0.4664, "step": 2333 }, { "epoch": 0.13069772650912756, "grad_norm": 1.3635510206222534, "learning_rate": 1.166e-05, "loss": 0.4613, "step": 2334 }, { "epoch": 0.13075372382125658, "grad_norm": 1.300800085067749, "learning_rate": 1.1665000000000001e-05, "loss": 0.5828, "step": 2335 }, { "epoch": 0.1308097211333856, "grad_norm": 1.1869381666183472, "learning_rate": 1.167e-05, "loss": 0.3331, "step": 2336 }, { "epoch": 0.13086571844551462, "grad_norm": 1.4955719709396362, "learning_rate": 1.1675000000000001e-05, "loss": 0.5642, "step": 2337 }, { "epoch": 0.13092171575764364, "grad_norm": 1.167914867401123, "learning_rate": 1.168e-05, "loss": 0.4144, "step": 2338 }, { "epoch": 0.13097771306977266, "grad_norm": 1.3202574253082275, "learning_rate": 1.1685e-05, "loss": 0.4789, "step": 2339 }, { "epoch": 0.13103371038190167, "grad_norm": 1.2649132013320923, "learning_rate": 1.1690000000000002e-05, "loss": 0.4761, "step": 2340 }, { "epoch": 0.1310897076940307, "grad_norm": 1.065508484840393, "learning_rate": 1.1695e-05, "loss": 0.4014, "step": 2341 }, { "epoch": 0.1311457050061597, "grad_norm": 0.9914081692695618, "learning_rate": 1.1700000000000001e-05, "loss": 0.3879, "step": 2342 }, { "epoch": 0.13120170231828873, "grad_norm": 1.1917837858200073, "learning_rate": 1.1705e-05, "loss": 0.4464, "step": 2343 }, { "epoch": 0.13125769963041775, "grad_norm": 1.1857242584228516, "learning_rate": 1.171e-05, "loss": 0.3995, "step": 2344 }, { "epoch": 0.13131369694254677, "grad_norm": 1.0622613430023193, "learning_rate": 1.1715e-05, "loss": 0.3963, "step": 2345 }, { "epoch": 0.13136969425467576, "grad_norm": 1.1169847249984741, "learning_rate": 1.172e-05, "loss": 0.3781, "step": 2346 }, { "epoch": 0.13142569156680478, "grad_norm": 1.2596418857574463, "learning_rate": 1.1725e-05, "loss": 0.4689, "step": 2347 }, { "epoch": 0.1314816888789338, "grad_norm": 1.2164725065231323, "learning_rate": 1.1730000000000001e-05, "loss": 0.4248, "step": 2348 }, { "epoch": 0.13153768619106282, "grad_norm": 1.1801409721374512, "learning_rate": 1.1735e-05, "loss": 0.4889, "step": 2349 }, { "epoch": 0.13159368350319184, "grad_norm": 0.9698452949523926, "learning_rate": 1.1740000000000001e-05, "loss": 0.4187, "step": 2350 }, { "epoch": 0.13164968081532086, "grad_norm": 1.3726561069488525, "learning_rate": 1.1745e-05, "loss": 0.6082, "step": 2351 }, { "epoch": 0.13170567812744988, "grad_norm": 1.310638666152954, "learning_rate": 1.175e-05, "loss": 0.4059, "step": 2352 }, { "epoch": 0.1317616754395789, "grad_norm": 1.0972820520401, "learning_rate": 1.1755e-05, "loss": 0.5182, "step": 2353 }, { "epoch": 0.13181767275170791, "grad_norm": 1.407310128211975, "learning_rate": 1.1760000000000001e-05, "loss": 0.4274, "step": 2354 }, { "epoch": 0.13187367006383693, "grad_norm": 1.238159418106079, "learning_rate": 1.1765000000000002e-05, "loss": 0.4569, "step": 2355 }, { "epoch": 0.13192966737596595, "grad_norm": 1.4061623811721802, "learning_rate": 1.177e-05, "loss": 0.5208, "step": 2356 }, { "epoch": 0.13198566468809497, "grad_norm": 1.0815753936767578, "learning_rate": 1.1775e-05, "loss": 0.5048, "step": 2357 }, { "epoch": 0.132041662000224, "grad_norm": 1.5622210502624512, "learning_rate": 1.178e-05, "loss": 0.4615, "step": 2358 }, { "epoch": 0.132097659312353, "grad_norm": 1.1877309083938599, "learning_rate": 1.1785e-05, "loss": 0.4608, "step": 2359 }, { "epoch": 0.13215365662448203, "grad_norm": 1.7222483158111572, "learning_rate": 1.179e-05, "loss": 0.4566, "step": 2360 }, { "epoch": 0.13220965393661105, "grad_norm": 0.9789606928825378, "learning_rate": 1.1795e-05, "loss": 0.3139, "step": 2361 }, { "epoch": 0.13226565124874007, "grad_norm": 1.3129633665084839, "learning_rate": 1.18e-05, "loss": 0.5787, "step": 2362 }, { "epoch": 0.1323216485608691, "grad_norm": 1.207702398300171, "learning_rate": 1.1805000000000001e-05, "loss": 0.5146, "step": 2363 }, { "epoch": 0.1323776458729981, "grad_norm": 1.3435063362121582, "learning_rate": 1.181e-05, "loss": 0.4662, "step": 2364 }, { "epoch": 0.13243364318512713, "grad_norm": 1.13418710231781, "learning_rate": 1.1815000000000001e-05, "loss": 0.3667, "step": 2365 }, { "epoch": 0.13248964049725614, "grad_norm": 1.2030316591262817, "learning_rate": 1.182e-05, "loss": 0.3939, "step": 2366 }, { "epoch": 0.13254563780938514, "grad_norm": 1.1721278429031372, "learning_rate": 1.1825e-05, "loss": 0.5194, "step": 2367 }, { "epoch": 0.13260163512151416, "grad_norm": 1.1768015623092651, "learning_rate": 1.183e-05, "loss": 0.3973, "step": 2368 }, { "epoch": 0.13265763243364317, "grad_norm": 1.225443720817566, "learning_rate": 1.1835000000000001e-05, "loss": 0.4661, "step": 2369 }, { "epoch": 0.1327136297457722, "grad_norm": 1.093108892440796, "learning_rate": 1.1840000000000002e-05, "loss": 0.3354, "step": 2370 }, { "epoch": 0.1327696270579012, "grad_norm": 1.1921272277832031, "learning_rate": 1.1845000000000001e-05, "loss": 0.4253, "step": 2371 }, { "epoch": 0.13282562437003023, "grad_norm": 1.2796928882598877, "learning_rate": 1.185e-05, "loss": 0.4149, "step": 2372 }, { "epoch": 0.13288162168215925, "grad_norm": 1.2408664226531982, "learning_rate": 1.1855e-05, "loss": 0.4325, "step": 2373 }, { "epoch": 0.13293761899428827, "grad_norm": 1.502134919166565, "learning_rate": 1.186e-05, "loss": 0.5105, "step": 2374 }, { "epoch": 0.1329936163064173, "grad_norm": 1.290805459022522, "learning_rate": 1.1865e-05, "loss": 0.5634, "step": 2375 }, { "epoch": 0.1330496136185463, "grad_norm": 1.6113225221633911, "learning_rate": 1.187e-05, "loss": 0.5746, "step": 2376 }, { "epoch": 0.13310561093067533, "grad_norm": 1.0114905834197998, "learning_rate": 1.1875e-05, "loss": 0.4165, "step": 2377 }, { "epoch": 0.13316160824280435, "grad_norm": 1.1047070026397705, "learning_rate": 1.1880000000000001e-05, "loss": 0.4955, "step": 2378 }, { "epoch": 0.13321760555493337, "grad_norm": 1.0724822282791138, "learning_rate": 1.1885e-05, "loss": 0.4241, "step": 2379 }, { "epoch": 0.13327360286706239, "grad_norm": 1.5702725648880005, "learning_rate": 1.1890000000000001e-05, "loss": 0.3546, "step": 2380 }, { "epoch": 0.1333296001791914, "grad_norm": 1.3315016031265259, "learning_rate": 1.1895e-05, "loss": 0.4321, "step": 2381 }, { "epoch": 0.13338559749132042, "grad_norm": 2.0095887184143066, "learning_rate": 1.19e-05, "loss": 0.4651, "step": 2382 }, { "epoch": 0.13344159480344944, "grad_norm": 1.2531713247299194, "learning_rate": 1.1905e-05, "loss": 0.4597, "step": 2383 }, { "epoch": 0.13349759211557846, "grad_norm": 1.2469711303710938, "learning_rate": 1.1910000000000001e-05, "loss": 0.4425, "step": 2384 }, { "epoch": 0.13355358942770748, "grad_norm": 1.7293294668197632, "learning_rate": 1.1915000000000002e-05, "loss": 0.4855, "step": 2385 }, { "epoch": 0.1336095867398365, "grad_norm": 1.332313895225525, "learning_rate": 1.1920000000000001e-05, "loss": 0.3875, "step": 2386 }, { "epoch": 0.13366558405196552, "grad_norm": 1.1652700901031494, "learning_rate": 1.1925e-05, "loss": 0.5291, "step": 2387 }, { "epoch": 0.1337215813640945, "grad_norm": 1.0502643585205078, "learning_rate": 1.1930000000000001e-05, "loss": 0.3097, "step": 2388 }, { "epoch": 0.13377757867622353, "grad_norm": 1.5209193229675293, "learning_rate": 1.1935e-05, "loss": 0.4903, "step": 2389 }, { "epoch": 0.13383357598835255, "grad_norm": 1.1833019256591797, "learning_rate": 1.1940000000000001e-05, "loss": 0.4808, "step": 2390 }, { "epoch": 0.13388957330048157, "grad_norm": 1.3718583583831787, "learning_rate": 1.1945e-05, "loss": 0.4008, "step": 2391 }, { "epoch": 0.1339455706126106, "grad_norm": 1.2104849815368652, "learning_rate": 1.195e-05, "loss": 0.4653, "step": 2392 }, { "epoch": 0.1340015679247396, "grad_norm": 1.029047966003418, "learning_rate": 1.1955000000000002e-05, "loss": 0.4049, "step": 2393 }, { "epoch": 0.13405756523686863, "grad_norm": 3.4568305015563965, "learning_rate": 1.196e-05, "loss": 0.4246, "step": 2394 }, { "epoch": 0.13411356254899764, "grad_norm": 1.1154710054397583, "learning_rate": 1.1965000000000001e-05, "loss": 0.4391, "step": 2395 }, { "epoch": 0.13416955986112666, "grad_norm": 1.150034785270691, "learning_rate": 1.197e-05, "loss": 0.3921, "step": 2396 }, { "epoch": 0.13422555717325568, "grad_norm": 1.2795981168746948, "learning_rate": 1.1975e-05, "loss": 0.3264, "step": 2397 }, { "epoch": 0.1342815544853847, "grad_norm": 1.226135015487671, "learning_rate": 1.198e-05, "loss": 0.5624, "step": 2398 }, { "epoch": 0.13433755179751372, "grad_norm": 1.088365077972412, "learning_rate": 1.1985e-05, "loss": 0.3313, "step": 2399 }, { "epoch": 0.13439354910964274, "grad_norm": 1.1446475982666016, "learning_rate": 1.199e-05, "loss": 0.4507, "step": 2400 }, { "epoch": 0.13444954642177176, "grad_norm": 1.3302208185195923, "learning_rate": 1.1995000000000001e-05, "loss": 0.4239, "step": 2401 }, { "epoch": 0.13450554373390078, "grad_norm": 1.1120394468307495, "learning_rate": 1.2e-05, "loss": 0.4509, "step": 2402 }, { "epoch": 0.1345615410460298, "grad_norm": 1.155068278312683, "learning_rate": 1.2005000000000001e-05, "loss": 0.434, "step": 2403 }, { "epoch": 0.13461753835815882, "grad_norm": 1.1789742708206177, "learning_rate": 1.201e-05, "loss": 0.2487, "step": 2404 }, { "epoch": 0.13467353567028784, "grad_norm": 1.2293139696121216, "learning_rate": 1.2015000000000001e-05, "loss": 0.5815, "step": 2405 }, { "epoch": 0.13472953298241686, "grad_norm": 1.2889957427978516, "learning_rate": 1.202e-05, "loss": 0.6689, "step": 2406 }, { "epoch": 0.13478553029454587, "grad_norm": 1.4162606000900269, "learning_rate": 1.2025000000000001e-05, "loss": 0.4166, "step": 2407 }, { "epoch": 0.13484152760667487, "grad_norm": 1.2048757076263428, "learning_rate": 1.2030000000000002e-05, "loss": 0.4165, "step": 2408 }, { "epoch": 0.13489752491880388, "grad_norm": 1.0300843715667725, "learning_rate": 1.2035e-05, "loss": 0.3927, "step": 2409 }, { "epoch": 0.1349535222309329, "grad_norm": 1.2152204513549805, "learning_rate": 1.204e-05, "loss": 0.507, "step": 2410 }, { "epoch": 0.13500951954306192, "grad_norm": 1.2543202638626099, "learning_rate": 1.2045e-05, "loss": 0.3755, "step": 2411 }, { "epoch": 0.13506551685519094, "grad_norm": 1.1340587139129639, "learning_rate": 1.205e-05, "loss": 0.4536, "step": 2412 }, { "epoch": 0.13512151416731996, "grad_norm": 1.5051394701004028, "learning_rate": 1.2055e-05, "loss": 0.4643, "step": 2413 }, { "epoch": 0.13517751147944898, "grad_norm": 1.0842450857162476, "learning_rate": 1.206e-05, "loss": 0.3299, "step": 2414 }, { "epoch": 0.135233508791578, "grad_norm": 1.1488196849822998, "learning_rate": 1.2065e-05, "loss": 0.445, "step": 2415 }, { "epoch": 0.13528950610370702, "grad_norm": 1.1640756130218506, "learning_rate": 1.2070000000000001e-05, "loss": 0.4106, "step": 2416 }, { "epoch": 0.13534550341583604, "grad_norm": 1.4307230710983276, "learning_rate": 1.2075e-05, "loss": 0.4711, "step": 2417 }, { "epoch": 0.13540150072796506, "grad_norm": 1.7485276460647583, "learning_rate": 1.2080000000000001e-05, "loss": 0.6179, "step": 2418 }, { "epoch": 0.13545749804009408, "grad_norm": 1.2945975065231323, "learning_rate": 1.2085e-05, "loss": 0.5587, "step": 2419 }, { "epoch": 0.1355134953522231, "grad_norm": 1.1517056226730347, "learning_rate": 1.209e-05, "loss": 0.3638, "step": 2420 }, { "epoch": 0.13556949266435211, "grad_norm": 1.4271259307861328, "learning_rate": 1.2095e-05, "loss": 0.4494, "step": 2421 }, { "epoch": 0.13562548997648113, "grad_norm": 1.8111350536346436, "learning_rate": 1.2100000000000001e-05, "loss": 0.5689, "step": 2422 }, { "epoch": 0.13568148728861015, "grad_norm": 1.0819214582443237, "learning_rate": 1.2105000000000002e-05, "loss": 0.4173, "step": 2423 }, { "epoch": 0.13573748460073917, "grad_norm": 1.2219858169555664, "learning_rate": 1.2110000000000001e-05, "loss": 0.4282, "step": 2424 }, { "epoch": 0.1357934819128682, "grad_norm": 1.6722608804702759, "learning_rate": 1.2115e-05, "loss": 0.5747, "step": 2425 }, { "epoch": 0.1358494792249972, "grad_norm": 1.2269113063812256, "learning_rate": 1.2120000000000001e-05, "loss": 0.3372, "step": 2426 }, { "epoch": 0.13590547653712623, "grad_norm": 1.5652239322662354, "learning_rate": 1.2125e-05, "loss": 0.4925, "step": 2427 }, { "epoch": 0.13596147384925525, "grad_norm": 1.2126359939575195, "learning_rate": 1.213e-05, "loss": 0.3988, "step": 2428 }, { "epoch": 0.13601747116138424, "grad_norm": 1.5335955619812012, "learning_rate": 1.2135e-05, "loss": 0.4273, "step": 2429 }, { "epoch": 0.13607346847351326, "grad_norm": 1.3891562223434448, "learning_rate": 1.214e-05, "loss": 0.4148, "step": 2430 }, { "epoch": 0.13612946578564228, "grad_norm": 1.6771533489227295, "learning_rate": 1.2145000000000001e-05, "loss": 0.5713, "step": 2431 }, { "epoch": 0.1361854630977713, "grad_norm": 1.255646824836731, "learning_rate": 1.215e-05, "loss": 0.4457, "step": 2432 }, { "epoch": 0.13624146040990032, "grad_norm": 1.1224887371063232, "learning_rate": 1.2155000000000001e-05, "loss": 0.3463, "step": 2433 }, { "epoch": 0.13629745772202934, "grad_norm": 1.122126817703247, "learning_rate": 1.216e-05, "loss": 0.4545, "step": 2434 }, { "epoch": 0.13635345503415836, "grad_norm": 1.541749119758606, "learning_rate": 1.2165e-05, "loss": 0.4453, "step": 2435 }, { "epoch": 0.13640945234628737, "grad_norm": 1.2225465774536133, "learning_rate": 1.217e-05, "loss": 0.5011, "step": 2436 }, { "epoch": 0.1364654496584164, "grad_norm": 1.091254711151123, "learning_rate": 1.2175e-05, "loss": 0.4753, "step": 2437 }, { "epoch": 0.1365214469705454, "grad_norm": 1.122660517692566, "learning_rate": 1.2180000000000002e-05, "loss": 0.438, "step": 2438 }, { "epoch": 0.13657744428267443, "grad_norm": 1.0282984972000122, "learning_rate": 1.2185000000000001e-05, "loss": 0.4516, "step": 2439 }, { "epoch": 0.13663344159480345, "grad_norm": 1.0135828256607056, "learning_rate": 1.219e-05, "loss": 0.4791, "step": 2440 }, { "epoch": 0.13668943890693247, "grad_norm": 1.4245688915252686, "learning_rate": 1.2195000000000001e-05, "loss": 0.462, "step": 2441 }, { "epoch": 0.1367454362190615, "grad_norm": 1.4579600095748901, "learning_rate": 1.22e-05, "loss": 0.5111, "step": 2442 }, { "epoch": 0.1368014335311905, "grad_norm": 1.064908742904663, "learning_rate": 1.2205000000000001e-05, "loss": 0.368, "step": 2443 }, { "epoch": 0.13685743084331953, "grad_norm": 1.4000297784805298, "learning_rate": 1.221e-05, "loss": 0.3009, "step": 2444 }, { "epoch": 0.13691342815544855, "grad_norm": 1.2969307899475098, "learning_rate": 1.2215e-05, "loss": 0.3928, "step": 2445 }, { "epoch": 0.13696942546757757, "grad_norm": 3.748359441757202, "learning_rate": 1.2220000000000002e-05, "loss": 0.3718, "step": 2446 }, { "epoch": 0.13702542277970658, "grad_norm": 1.4786757230758667, "learning_rate": 1.2225e-05, "loss": 0.4797, "step": 2447 }, { "epoch": 0.1370814200918356, "grad_norm": 1.3004447221755981, "learning_rate": 1.2230000000000001e-05, "loss": 0.5356, "step": 2448 }, { "epoch": 0.13713741740396462, "grad_norm": 1.1472911834716797, "learning_rate": 1.2235e-05, "loss": 0.3354, "step": 2449 }, { "epoch": 0.13719341471609361, "grad_norm": 1.274685025215149, "learning_rate": 1.224e-05, "loss": 0.4278, "step": 2450 }, { "epoch": 0.13724941202822263, "grad_norm": 1.412546157836914, "learning_rate": 1.2245e-05, "loss": 0.5157, "step": 2451 }, { "epoch": 0.13730540934035165, "grad_norm": 1.0137228965759277, "learning_rate": 1.225e-05, "loss": 0.4446, "step": 2452 }, { "epoch": 0.13736140665248067, "grad_norm": 2.3677423000335693, "learning_rate": 1.2255e-05, "loss": 0.5029, "step": 2453 }, { "epoch": 0.1374174039646097, "grad_norm": 1.2410162687301636, "learning_rate": 1.2260000000000001e-05, "loss": 0.4446, "step": 2454 }, { "epoch": 0.1374734012767387, "grad_norm": 1.2138422727584839, "learning_rate": 1.2265e-05, "loss": 0.4604, "step": 2455 }, { "epoch": 0.13752939858886773, "grad_norm": 1.2067878246307373, "learning_rate": 1.2270000000000001e-05, "loss": 0.4757, "step": 2456 }, { "epoch": 0.13758539590099675, "grad_norm": 1.2064911127090454, "learning_rate": 1.2275e-05, "loss": 0.4175, "step": 2457 }, { "epoch": 0.13764139321312577, "grad_norm": 1.3723119497299194, "learning_rate": 1.2280000000000001e-05, "loss": 0.4115, "step": 2458 }, { "epoch": 0.1376973905252548, "grad_norm": 1.29324209690094, "learning_rate": 1.2285e-05, "loss": 0.4133, "step": 2459 }, { "epoch": 0.1377533878373838, "grad_norm": 1.12079656124115, "learning_rate": 1.2290000000000001e-05, "loss": 0.4476, "step": 2460 }, { "epoch": 0.13780938514951283, "grad_norm": 1.2613892555236816, "learning_rate": 1.2295000000000002e-05, "loss": 0.5619, "step": 2461 }, { "epoch": 0.13786538246164184, "grad_norm": 1.4234511852264404, "learning_rate": 1.23e-05, "loss": 0.5854, "step": 2462 }, { "epoch": 0.13792137977377086, "grad_norm": 1.1991769075393677, "learning_rate": 1.2305000000000002e-05, "loss": 0.4518, "step": 2463 }, { "epoch": 0.13797737708589988, "grad_norm": 1.281947135925293, "learning_rate": 1.231e-05, "loss": 0.4044, "step": 2464 }, { "epoch": 0.1380333743980289, "grad_norm": 1.3853332996368408, "learning_rate": 1.2315e-05, "loss": 0.6167, "step": 2465 }, { "epoch": 0.13808937171015792, "grad_norm": 1.23692786693573, "learning_rate": 1.232e-05, "loss": 0.4404, "step": 2466 }, { "epoch": 0.13814536902228694, "grad_norm": 14.155194282531738, "learning_rate": 1.2325e-05, "loss": 0.7027, "step": 2467 }, { "epoch": 0.13820136633441596, "grad_norm": 1.47590172290802, "learning_rate": 1.233e-05, "loss": 0.3997, "step": 2468 }, { "epoch": 0.13825736364654498, "grad_norm": 1.3184940814971924, "learning_rate": 1.2335000000000001e-05, "loss": 0.3687, "step": 2469 }, { "epoch": 0.13831336095867397, "grad_norm": 1.1062796115875244, "learning_rate": 1.234e-05, "loss": 0.4121, "step": 2470 }, { "epoch": 0.138369358270803, "grad_norm": 1.1457158327102661, "learning_rate": 1.2345000000000001e-05, "loss": 0.5695, "step": 2471 }, { "epoch": 0.138425355582932, "grad_norm": 1.1479376554489136, "learning_rate": 1.235e-05, "loss": 0.5058, "step": 2472 }, { "epoch": 0.13848135289506103, "grad_norm": 1.411997675895691, "learning_rate": 1.2355e-05, "loss": 0.5817, "step": 2473 }, { "epoch": 0.13853735020719005, "grad_norm": 1.0818930864334106, "learning_rate": 1.236e-05, "loss": 0.5058, "step": 2474 }, { "epoch": 0.13859334751931907, "grad_norm": 1.1070023775100708, "learning_rate": 1.2365e-05, "loss": 0.3422, "step": 2475 }, { "epoch": 0.13864934483144808, "grad_norm": 1.2352498769760132, "learning_rate": 1.2370000000000002e-05, "loss": 0.3878, "step": 2476 }, { "epoch": 0.1387053421435771, "grad_norm": 1.0397839546203613, "learning_rate": 1.2375000000000001e-05, "loss": 0.3501, "step": 2477 }, { "epoch": 0.13876133945570612, "grad_norm": 1.1352453231811523, "learning_rate": 1.238e-05, "loss": 0.3688, "step": 2478 }, { "epoch": 0.13881733676783514, "grad_norm": 1.3301494121551514, "learning_rate": 1.2385000000000001e-05, "loss": 0.5199, "step": 2479 }, { "epoch": 0.13887333407996416, "grad_norm": 1.426438570022583, "learning_rate": 1.239e-05, "loss": 0.453, "step": 2480 }, { "epoch": 0.13892933139209318, "grad_norm": 1.164884090423584, "learning_rate": 1.2395e-05, "loss": 0.323, "step": 2481 }, { "epoch": 0.1389853287042222, "grad_norm": 1.1007789373397827, "learning_rate": 1.24e-05, "loss": 0.3785, "step": 2482 }, { "epoch": 0.13904132601635122, "grad_norm": 1.163779854774475, "learning_rate": 1.2405e-05, "loss": 0.4914, "step": 2483 }, { "epoch": 0.13909732332848024, "grad_norm": 1.3361643552780151, "learning_rate": 1.2410000000000001e-05, "loss": 0.4419, "step": 2484 }, { "epoch": 0.13915332064060926, "grad_norm": 1.0904892683029175, "learning_rate": 1.2415e-05, "loss": 0.4141, "step": 2485 }, { "epoch": 0.13920931795273828, "grad_norm": 1.2341094017028809, "learning_rate": 1.2420000000000001e-05, "loss": 0.4362, "step": 2486 }, { "epoch": 0.1392653152648673, "grad_norm": 1.0510798692703247, "learning_rate": 1.2425e-05, "loss": 0.415, "step": 2487 }, { "epoch": 0.13932131257699631, "grad_norm": 0.9391849637031555, "learning_rate": 1.243e-05, "loss": 0.4643, "step": 2488 }, { "epoch": 0.13937730988912533, "grad_norm": 1.0622152090072632, "learning_rate": 1.2435e-05, "loss": 0.3353, "step": 2489 }, { "epoch": 0.13943330720125435, "grad_norm": 1.3571791648864746, "learning_rate": 1.244e-05, "loss": 0.4687, "step": 2490 }, { "epoch": 0.13948930451338334, "grad_norm": 1.1573200225830078, "learning_rate": 1.2445e-05, "loss": 0.3999, "step": 2491 }, { "epoch": 0.13954530182551236, "grad_norm": 1.1886022090911865, "learning_rate": 1.2450000000000001e-05, "loss": 0.3521, "step": 2492 }, { "epoch": 0.13960129913764138, "grad_norm": 2.901169538497925, "learning_rate": 1.2455e-05, "loss": 0.4332, "step": 2493 }, { "epoch": 0.1396572964497704, "grad_norm": 2.1086270809173584, "learning_rate": 1.2460000000000001e-05, "loss": 0.4126, "step": 2494 }, { "epoch": 0.13971329376189942, "grad_norm": 1.3888367414474487, "learning_rate": 1.2465e-05, "loss": 0.4817, "step": 2495 }, { "epoch": 0.13976929107402844, "grad_norm": 1.083674669265747, "learning_rate": 1.2470000000000001e-05, "loss": 0.383, "step": 2496 }, { "epoch": 0.13982528838615746, "grad_norm": 1.3220771551132202, "learning_rate": 1.2475e-05, "loss": 0.5275, "step": 2497 }, { "epoch": 0.13988128569828648, "grad_norm": 1.1915440559387207, "learning_rate": 1.248e-05, "loss": 0.4635, "step": 2498 }, { "epoch": 0.1399372830104155, "grad_norm": 1.0927143096923828, "learning_rate": 1.2485000000000002e-05, "loss": 0.4208, "step": 2499 }, { "epoch": 0.13999328032254452, "grad_norm": 1.2048420906066895, "learning_rate": 1.249e-05, "loss": 0.4241, "step": 2500 }, { "epoch": 0.14004927763467354, "grad_norm": 1.1832644939422607, "learning_rate": 1.2495000000000001e-05, "loss": 0.5051, "step": 2501 }, { "epoch": 0.14010527494680255, "grad_norm": 1.1782584190368652, "learning_rate": 1.25e-05, "loss": 0.5329, "step": 2502 }, { "epoch": 0.14016127225893157, "grad_norm": 1.2893174886703491, "learning_rate": 1.2505e-05, "loss": 0.4063, "step": 2503 }, { "epoch": 0.1402172695710606, "grad_norm": 1.0172559022903442, "learning_rate": 1.2509999999999999e-05, "loss": 0.3683, "step": 2504 }, { "epoch": 0.1402732668831896, "grad_norm": 1.2625501155853271, "learning_rate": 1.2515000000000001e-05, "loss": 0.3565, "step": 2505 }, { "epoch": 0.14032926419531863, "grad_norm": 1.2588109970092773, "learning_rate": 1.252e-05, "loss": 0.4849, "step": 2506 }, { "epoch": 0.14038526150744765, "grad_norm": 1.3349969387054443, "learning_rate": 1.2525000000000001e-05, "loss": 0.5433, "step": 2507 }, { "epoch": 0.14044125881957667, "grad_norm": 1.1840282678604126, "learning_rate": 1.253e-05, "loss": 0.5035, "step": 2508 }, { "epoch": 0.1404972561317057, "grad_norm": 1.236934781074524, "learning_rate": 1.2535e-05, "loss": 0.3751, "step": 2509 }, { "epoch": 0.1405532534438347, "grad_norm": 1.3336421251296997, "learning_rate": 1.2540000000000002e-05, "loss": 0.5805, "step": 2510 }, { "epoch": 0.14060925075596373, "grad_norm": 1.1406078338623047, "learning_rate": 1.2545000000000001e-05, "loss": 0.4124, "step": 2511 }, { "epoch": 0.14066524806809272, "grad_norm": 1.195516586303711, "learning_rate": 1.255e-05, "loss": 0.4189, "step": 2512 }, { "epoch": 0.14072124538022174, "grad_norm": 1.338142991065979, "learning_rate": 1.2555000000000001e-05, "loss": 0.46, "step": 2513 }, { "epoch": 0.14077724269235076, "grad_norm": 1.2000536918640137, "learning_rate": 1.256e-05, "loss": 0.5513, "step": 2514 }, { "epoch": 0.14083324000447978, "grad_norm": 1.066293716430664, "learning_rate": 1.2565000000000003e-05, "loss": 0.3948, "step": 2515 }, { "epoch": 0.1408892373166088, "grad_norm": 1.0766243934631348, "learning_rate": 1.2570000000000002e-05, "loss": 0.5143, "step": 2516 }, { "epoch": 0.14094523462873781, "grad_norm": 1.296629786491394, "learning_rate": 1.2575e-05, "loss": 0.4408, "step": 2517 }, { "epoch": 0.14100123194086683, "grad_norm": 1.11188542842865, "learning_rate": 1.258e-05, "loss": 0.4171, "step": 2518 }, { "epoch": 0.14105722925299585, "grad_norm": 1.1227508783340454, "learning_rate": 1.2584999999999999e-05, "loss": 0.571, "step": 2519 }, { "epoch": 0.14111322656512487, "grad_norm": 1.1131638288497925, "learning_rate": 1.2590000000000001e-05, "loss": 0.5317, "step": 2520 }, { "epoch": 0.1411692238772539, "grad_norm": 1.5281466245651245, "learning_rate": 1.2595e-05, "loss": 0.3774, "step": 2521 }, { "epoch": 0.1412252211893829, "grad_norm": 1.1421079635620117, "learning_rate": 1.2600000000000001e-05, "loss": 0.4091, "step": 2522 }, { "epoch": 0.14128121850151193, "grad_norm": 1.5237642526626587, "learning_rate": 1.2605e-05, "loss": 0.446, "step": 2523 }, { "epoch": 0.14133721581364095, "grad_norm": 1.4897043704986572, "learning_rate": 1.261e-05, "loss": 0.5319, "step": 2524 }, { "epoch": 0.14139321312576997, "grad_norm": 1.6626853942871094, "learning_rate": 1.2615000000000002e-05, "loss": 0.5172, "step": 2525 }, { "epoch": 0.141449210437899, "grad_norm": 1.3545503616333008, "learning_rate": 1.2620000000000001e-05, "loss": 0.5435, "step": 2526 }, { "epoch": 0.141505207750028, "grad_norm": 3.209622383117676, "learning_rate": 1.2625e-05, "loss": 0.528, "step": 2527 }, { "epoch": 0.14156120506215703, "grad_norm": 1.1368006467819214, "learning_rate": 1.263e-05, "loss": 0.4018, "step": 2528 }, { "epoch": 0.14161720237428604, "grad_norm": 1.019187092781067, "learning_rate": 1.2635e-05, "loss": 0.329, "step": 2529 }, { "epoch": 0.14167319968641506, "grad_norm": 1.2222051620483398, "learning_rate": 1.2640000000000003e-05, "loss": 0.3975, "step": 2530 }, { "epoch": 0.14172919699854408, "grad_norm": 1.1408919095993042, "learning_rate": 1.2645000000000002e-05, "loss": 0.4006, "step": 2531 }, { "epoch": 0.14178519431067307, "grad_norm": 1.2872763872146606, "learning_rate": 1.2650000000000001e-05, "loss": 0.3789, "step": 2532 }, { "epoch": 0.1418411916228021, "grad_norm": 2.181398868560791, "learning_rate": 1.2655e-05, "loss": 0.3909, "step": 2533 }, { "epoch": 0.1418971889349311, "grad_norm": 1.4929990768432617, "learning_rate": 1.2659999999999999e-05, "loss": 0.4905, "step": 2534 }, { "epoch": 0.14195318624706013, "grad_norm": 1.4117430448532104, "learning_rate": 1.2665000000000002e-05, "loss": 0.4487, "step": 2535 }, { "epoch": 0.14200918355918915, "grad_norm": 1.4963791370391846, "learning_rate": 1.267e-05, "loss": 0.5112, "step": 2536 }, { "epoch": 0.14206518087131817, "grad_norm": 1.105371117591858, "learning_rate": 1.2675000000000001e-05, "loss": 0.3966, "step": 2537 }, { "epoch": 0.1421211781834472, "grad_norm": 1.4377449750900269, "learning_rate": 1.268e-05, "loss": 0.4421, "step": 2538 }, { "epoch": 0.1421771754955762, "grad_norm": 1.1643390655517578, "learning_rate": 1.2685e-05, "loss": 0.3825, "step": 2539 }, { "epoch": 0.14223317280770523, "grad_norm": 1.2283012866973877, "learning_rate": 1.2690000000000002e-05, "loss": 0.4719, "step": 2540 }, { "epoch": 0.14228917011983425, "grad_norm": 1.1322824954986572, "learning_rate": 1.2695000000000001e-05, "loss": 0.3118, "step": 2541 }, { "epoch": 0.14234516743196327, "grad_norm": 1.1129308938980103, "learning_rate": 1.27e-05, "loss": 0.3436, "step": 2542 }, { "epoch": 0.14240116474409228, "grad_norm": 1.2738077640533447, "learning_rate": 1.2705e-05, "loss": 0.3745, "step": 2543 }, { "epoch": 0.1424571620562213, "grad_norm": 1.1257846355438232, "learning_rate": 1.271e-05, "loss": 0.3746, "step": 2544 }, { "epoch": 0.14251315936835032, "grad_norm": 1.2664601802825928, "learning_rate": 1.2715000000000001e-05, "loss": 0.5354, "step": 2545 }, { "epoch": 0.14256915668047934, "grad_norm": 1.3434889316558838, "learning_rate": 1.2720000000000002e-05, "loss": 0.4078, "step": 2546 }, { "epoch": 0.14262515399260836, "grad_norm": 1.602239727973938, "learning_rate": 1.2725000000000001e-05, "loss": 0.4273, "step": 2547 }, { "epoch": 0.14268115130473738, "grad_norm": 1.0283304452896118, "learning_rate": 1.273e-05, "loss": 0.3713, "step": 2548 }, { "epoch": 0.1427371486168664, "grad_norm": 1.1225849390029907, "learning_rate": 1.2735e-05, "loss": 0.5288, "step": 2549 }, { "epoch": 0.14279314592899542, "grad_norm": 1.2365565299987793, "learning_rate": 1.2740000000000002e-05, "loss": 0.3731, "step": 2550 }, { "epoch": 0.14284914324112444, "grad_norm": 1.0750316381454468, "learning_rate": 1.2745e-05, "loss": 0.3926, "step": 2551 }, { "epoch": 0.14290514055325346, "grad_norm": 1.3363348245620728, "learning_rate": 1.2750000000000002e-05, "loss": 0.4665, "step": 2552 }, { "epoch": 0.14296113786538245, "grad_norm": 1.021571159362793, "learning_rate": 1.2755e-05, "loss": 0.4289, "step": 2553 }, { "epoch": 0.14301713517751147, "grad_norm": 1.1585955619812012, "learning_rate": 1.276e-05, "loss": 0.3102, "step": 2554 }, { "epoch": 0.1430731324896405, "grad_norm": 1.2394684553146362, "learning_rate": 1.2765000000000002e-05, "loss": 0.4, "step": 2555 }, { "epoch": 0.1431291298017695, "grad_norm": 1.216558575630188, "learning_rate": 1.2770000000000001e-05, "loss": 0.4016, "step": 2556 }, { "epoch": 0.14318512711389852, "grad_norm": 1.206903338432312, "learning_rate": 1.2775e-05, "loss": 0.4442, "step": 2557 }, { "epoch": 0.14324112442602754, "grad_norm": 0.900111198425293, "learning_rate": 1.278e-05, "loss": 0.3385, "step": 2558 }, { "epoch": 0.14329712173815656, "grad_norm": 1.3177870512008667, "learning_rate": 1.2785e-05, "loss": 0.3729, "step": 2559 }, { "epoch": 0.14335311905028558, "grad_norm": 1.273997187614441, "learning_rate": 1.2790000000000001e-05, "loss": 0.3623, "step": 2560 }, { "epoch": 0.1434091163624146, "grad_norm": 1.188696026802063, "learning_rate": 1.2795000000000002e-05, "loss": 0.3905, "step": 2561 }, { "epoch": 0.14346511367454362, "grad_norm": 1.4021480083465576, "learning_rate": 1.2800000000000001e-05, "loss": 0.5325, "step": 2562 }, { "epoch": 0.14352111098667264, "grad_norm": 1.4283920526504517, "learning_rate": 1.2805e-05, "loss": 0.4506, "step": 2563 }, { "epoch": 0.14357710829880166, "grad_norm": 1.5277773141860962, "learning_rate": 1.281e-05, "loss": 0.5579, "step": 2564 }, { "epoch": 0.14363310561093068, "grad_norm": 1.1661988496780396, "learning_rate": 1.2814999999999998e-05, "loss": 0.3988, "step": 2565 }, { "epoch": 0.1436891029230597, "grad_norm": 1.3075827360153198, "learning_rate": 1.2820000000000001e-05, "loss": 0.4105, "step": 2566 }, { "epoch": 0.14374510023518872, "grad_norm": 1.5823769569396973, "learning_rate": 1.2825000000000002e-05, "loss": 0.5679, "step": 2567 }, { "epoch": 0.14380109754731774, "grad_norm": 1.0940037965774536, "learning_rate": 1.283e-05, "loss": 0.3605, "step": 2568 }, { "epoch": 0.14385709485944675, "grad_norm": 1.1388133764266968, "learning_rate": 1.2835e-05, "loss": 0.3079, "step": 2569 }, { "epoch": 0.14391309217157577, "grad_norm": 1.4478360414505005, "learning_rate": 1.2839999999999999e-05, "loss": 0.4968, "step": 2570 }, { "epoch": 0.1439690894837048, "grad_norm": 1.2097203731536865, "learning_rate": 1.2845000000000002e-05, "loss": 0.4297, "step": 2571 }, { "epoch": 0.1440250867958338, "grad_norm": 1.618012547492981, "learning_rate": 1.285e-05, "loss": 0.419, "step": 2572 }, { "epoch": 0.14408108410796283, "grad_norm": 1.4660046100616455, "learning_rate": 1.2855e-05, "loss": 0.3765, "step": 2573 }, { "epoch": 0.14413708142009182, "grad_norm": 1.2173787355422974, "learning_rate": 1.286e-05, "loss": 0.3228, "step": 2574 }, { "epoch": 0.14419307873222084, "grad_norm": 1.3873306512832642, "learning_rate": 1.2865e-05, "loss": 0.5869, "step": 2575 }, { "epoch": 0.14424907604434986, "grad_norm": 1.5580289363861084, "learning_rate": 1.2870000000000002e-05, "loss": 0.4932, "step": 2576 }, { "epoch": 0.14430507335647888, "grad_norm": 1.7026492357254028, "learning_rate": 1.2875000000000001e-05, "loss": 0.5149, "step": 2577 }, { "epoch": 0.1443610706686079, "grad_norm": 1.3750256299972534, "learning_rate": 1.288e-05, "loss": 0.495, "step": 2578 }, { "epoch": 0.14441706798073692, "grad_norm": 1.2004586458206177, "learning_rate": 1.2885e-05, "loss": 0.4588, "step": 2579 }, { "epoch": 0.14447306529286594, "grad_norm": 1.4534201622009277, "learning_rate": 1.2889999999999999e-05, "loss": 0.5208, "step": 2580 }, { "epoch": 0.14452906260499496, "grad_norm": 1.2705984115600586, "learning_rate": 1.2895000000000001e-05, "loss": 0.3537, "step": 2581 }, { "epoch": 0.14458505991712398, "grad_norm": 1.1209765672683716, "learning_rate": 1.29e-05, "loss": 0.4387, "step": 2582 }, { "epoch": 0.144641057229253, "grad_norm": 1.1241347789764404, "learning_rate": 1.2905000000000001e-05, "loss": 0.3922, "step": 2583 }, { "epoch": 0.14469705454138201, "grad_norm": 1.6147980690002441, "learning_rate": 1.291e-05, "loss": 0.4985, "step": 2584 }, { "epoch": 0.14475305185351103, "grad_norm": 1.542745590209961, "learning_rate": 1.2915e-05, "loss": 0.6905, "step": 2585 }, { "epoch": 0.14480904916564005, "grad_norm": 1.2592793703079224, "learning_rate": 1.2920000000000002e-05, "loss": 0.4949, "step": 2586 }, { "epoch": 0.14486504647776907, "grad_norm": 1.1622910499572754, "learning_rate": 1.2925e-05, "loss": 0.3956, "step": 2587 }, { "epoch": 0.1449210437898981, "grad_norm": 1.2540090084075928, "learning_rate": 1.293e-05, "loss": 0.5471, "step": 2588 }, { "epoch": 0.1449770411020271, "grad_norm": 1.256731390953064, "learning_rate": 1.2935e-05, "loss": 0.3732, "step": 2589 }, { "epoch": 0.14503303841415613, "grad_norm": 1.5238062143325806, "learning_rate": 1.294e-05, "loss": 0.6349, "step": 2590 }, { "epoch": 0.14508903572628515, "grad_norm": 1.0773160457611084, "learning_rate": 1.2945000000000002e-05, "loss": 0.4166, "step": 2591 }, { "epoch": 0.14514503303841417, "grad_norm": 1.1376806497573853, "learning_rate": 1.2950000000000001e-05, "loss": 0.4292, "step": 2592 }, { "epoch": 0.1452010303505432, "grad_norm": 1.2171424627304077, "learning_rate": 1.2955e-05, "loss": 0.3987, "step": 2593 }, { "epoch": 0.14525702766267218, "grad_norm": 1.1606625318527222, "learning_rate": 1.296e-05, "loss": 0.4594, "step": 2594 }, { "epoch": 0.1453130249748012, "grad_norm": 1.031876802444458, "learning_rate": 1.2964999999999999e-05, "loss": 0.5854, "step": 2595 }, { "epoch": 0.14536902228693022, "grad_norm": 1.1607109308242798, "learning_rate": 1.2970000000000001e-05, "loss": 0.4157, "step": 2596 }, { "epoch": 0.14542501959905924, "grad_norm": 1.0473979711532593, "learning_rate": 1.2975e-05, "loss": 0.426, "step": 2597 }, { "epoch": 0.14548101691118825, "grad_norm": 1.1430045366287231, "learning_rate": 1.2980000000000001e-05, "loss": 0.4142, "step": 2598 }, { "epoch": 0.14553701422331727, "grad_norm": 1.156032919883728, "learning_rate": 1.2985e-05, "loss": 0.4853, "step": 2599 }, { "epoch": 0.1455930115354463, "grad_norm": 1.1408398151397705, "learning_rate": 1.299e-05, "loss": 0.4186, "step": 2600 }, { "epoch": 0.1456490088475753, "grad_norm": 1.059762716293335, "learning_rate": 1.2995000000000002e-05, "loss": 0.4544, "step": 2601 }, { "epoch": 0.14570500615970433, "grad_norm": 1.4854395389556885, "learning_rate": 1.3000000000000001e-05, "loss": 0.7862, "step": 2602 }, { "epoch": 0.14576100347183335, "grad_norm": 1.234014868736267, "learning_rate": 1.3005e-05, "loss": 0.4272, "step": 2603 }, { "epoch": 0.14581700078396237, "grad_norm": 5.808340072631836, "learning_rate": 1.301e-05, "loss": 0.4147, "step": 2604 }, { "epoch": 0.1458729980960914, "grad_norm": 1.2030842304229736, "learning_rate": 1.3015e-05, "loss": 0.3735, "step": 2605 }, { "epoch": 0.1459289954082204, "grad_norm": 1.3522212505340576, "learning_rate": 1.3020000000000002e-05, "loss": 0.3872, "step": 2606 }, { "epoch": 0.14598499272034943, "grad_norm": 1.1150144338607788, "learning_rate": 1.3025000000000002e-05, "loss": 0.5558, "step": 2607 }, { "epoch": 0.14604099003247845, "grad_norm": 1.0971496105194092, "learning_rate": 1.303e-05, "loss": 0.3065, "step": 2608 }, { "epoch": 0.14609698734460747, "grad_norm": 1.3269139528274536, "learning_rate": 1.3035e-05, "loss": 0.4228, "step": 2609 }, { "epoch": 0.14615298465673648, "grad_norm": 1.054341435432434, "learning_rate": 1.3039999999999999e-05, "loss": 0.3975, "step": 2610 }, { "epoch": 0.1462089819688655, "grad_norm": 1.7736667394638062, "learning_rate": 1.3045000000000001e-05, "loss": 0.4209, "step": 2611 }, { "epoch": 0.14626497928099452, "grad_norm": 1.1408703327178955, "learning_rate": 1.305e-05, "loss": 0.3248, "step": 2612 }, { "epoch": 0.14632097659312354, "grad_norm": 1.2324352264404297, "learning_rate": 1.3055000000000001e-05, "loss": 0.6046, "step": 2613 }, { "epoch": 0.14637697390525256, "grad_norm": 1.1829913854599, "learning_rate": 1.306e-05, "loss": 0.364, "step": 2614 }, { "epoch": 0.14643297121738155, "grad_norm": 1.1477619409561157, "learning_rate": 1.3065e-05, "loss": 0.3573, "step": 2615 }, { "epoch": 0.14648896852951057, "grad_norm": 1.2593798637390137, "learning_rate": 1.3070000000000002e-05, "loss": 0.3189, "step": 2616 }, { "epoch": 0.1465449658416396, "grad_norm": 1.5565099716186523, "learning_rate": 1.3075000000000001e-05, "loss": 0.4649, "step": 2617 }, { "epoch": 0.1466009631537686, "grad_norm": 1.105677604675293, "learning_rate": 1.308e-05, "loss": 0.4293, "step": 2618 }, { "epoch": 0.14665696046589763, "grad_norm": 1.015621304512024, "learning_rate": 1.3085e-05, "loss": 0.3262, "step": 2619 }, { "epoch": 0.14671295777802665, "grad_norm": 1.252323031425476, "learning_rate": 1.309e-05, "loss": 0.4461, "step": 2620 }, { "epoch": 0.14676895509015567, "grad_norm": 1.135851502418518, "learning_rate": 1.3095000000000003e-05, "loss": 0.4974, "step": 2621 }, { "epoch": 0.1468249524022847, "grad_norm": 1.3516892194747925, "learning_rate": 1.3100000000000002e-05, "loss": 0.5207, "step": 2622 }, { "epoch": 0.1468809497144137, "grad_norm": 1.4236551523208618, "learning_rate": 1.3105e-05, "loss": 0.448, "step": 2623 }, { "epoch": 0.14693694702654272, "grad_norm": 1.2360954284667969, "learning_rate": 1.311e-05, "loss": 0.4699, "step": 2624 }, { "epoch": 0.14699294433867174, "grad_norm": 1.2243598699569702, "learning_rate": 1.3114999999999999e-05, "loss": 0.4643, "step": 2625 }, { "epoch": 0.14704894165080076, "grad_norm": 1.3075956106185913, "learning_rate": 1.3120000000000001e-05, "loss": 0.6034, "step": 2626 }, { "epoch": 0.14710493896292978, "grad_norm": 1.52328360080719, "learning_rate": 1.3125e-05, "loss": 0.4696, "step": 2627 }, { "epoch": 0.1471609362750588, "grad_norm": 1.355431318283081, "learning_rate": 1.3130000000000001e-05, "loss": 0.5116, "step": 2628 }, { "epoch": 0.14721693358718782, "grad_norm": 1.3428106307983398, "learning_rate": 1.3135e-05, "loss": 0.3957, "step": 2629 }, { "epoch": 0.14727293089931684, "grad_norm": 1.2088851928710938, "learning_rate": 1.314e-05, "loss": 0.5184, "step": 2630 }, { "epoch": 0.14732892821144586, "grad_norm": 1.3796359300613403, "learning_rate": 1.3145000000000002e-05, "loss": 0.3684, "step": 2631 }, { "epoch": 0.14738492552357488, "grad_norm": 1.294643759727478, "learning_rate": 1.3150000000000001e-05, "loss": 0.4408, "step": 2632 }, { "epoch": 0.1474409228357039, "grad_norm": 1.2840946912765503, "learning_rate": 1.3155e-05, "loss": 0.4452, "step": 2633 }, { "epoch": 0.14749692014783292, "grad_norm": 1.0767340660095215, "learning_rate": 1.316e-05, "loss": 0.3368, "step": 2634 }, { "epoch": 0.14755291745996194, "grad_norm": 1.0889029502868652, "learning_rate": 1.3165e-05, "loss": 0.3129, "step": 2635 }, { "epoch": 0.14760891477209093, "grad_norm": 1.5532374382019043, "learning_rate": 1.3170000000000001e-05, "loss": 0.4134, "step": 2636 }, { "epoch": 0.14766491208421995, "grad_norm": 1.4018138647079468, "learning_rate": 1.3175000000000002e-05, "loss": 0.6566, "step": 2637 }, { "epoch": 0.14772090939634897, "grad_norm": 1.1475685834884644, "learning_rate": 1.3180000000000001e-05, "loss": 0.4866, "step": 2638 }, { "epoch": 0.14777690670847798, "grad_norm": 1.2613779306411743, "learning_rate": 1.3185e-05, "loss": 0.3669, "step": 2639 }, { "epoch": 0.147832904020607, "grad_norm": 1.3189911842346191, "learning_rate": 1.3189999999999999e-05, "loss": 0.4983, "step": 2640 }, { "epoch": 0.14788890133273602, "grad_norm": 1.0426316261291504, "learning_rate": 1.3195000000000002e-05, "loss": 0.3501, "step": 2641 }, { "epoch": 0.14794489864486504, "grad_norm": 1.1406067609786987, "learning_rate": 1.32e-05, "loss": 0.3735, "step": 2642 }, { "epoch": 0.14800089595699406, "grad_norm": 1.1951979398727417, "learning_rate": 1.3205000000000001e-05, "loss": 0.4212, "step": 2643 }, { "epoch": 0.14805689326912308, "grad_norm": 1.0712980031967163, "learning_rate": 1.321e-05, "loss": 0.3888, "step": 2644 }, { "epoch": 0.1481128905812521, "grad_norm": 1.1973940134048462, "learning_rate": 1.3215e-05, "loss": 0.4139, "step": 2645 }, { "epoch": 0.14816888789338112, "grad_norm": 1.3590928316116333, "learning_rate": 1.3220000000000002e-05, "loss": 0.5349, "step": 2646 }, { "epoch": 0.14822488520551014, "grad_norm": 1.2918621301651, "learning_rate": 1.3225000000000001e-05, "loss": 0.442, "step": 2647 }, { "epoch": 0.14828088251763916, "grad_norm": 1.199952483177185, "learning_rate": 1.323e-05, "loss": 0.5537, "step": 2648 }, { "epoch": 0.14833687982976818, "grad_norm": 1.1175743341445923, "learning_rate": 1.3235e-05, "loss": 0.4777, "step": 2649 }, { "epoch": 0.1483928771418972, "grad_norm": 1.4558801651000977, "learning_rate": 1.324e-05, "loss": 0.6484, "step": 2650 }, { "epoch": 0.14844887445402621, "grad_norm": 1.170845866203308, "learning_rate": 1.3245000000000001e-05, "loss": 0.4357, "step": 2651 }, { "epoch": 0.14850487176615523, "grad_norm": 1.2923656702041626, "learning_rate": 1.3250000000000002e-05, "loss": 0.5244, "step": 2652 }, { "epoch": 0.14856086907828425, "grad_norm": 1.153900146484375, "learning_rate": 1.3255000000000001e-05, "loss": 0.4252, "step": 2653 }, { "epoch": 0.14861686639041327, "grad_norm": 1.2607039213180542, "learning_rate": 1.326e-05, "loss": 0.4234, "step": 2654 }, { "epoch": 0.1486728637025423, "grad_norm": 1.284332513809204, "learning_rate": 1.3265e-05, "loss": 0.4065, "step": 2655 }, { "epoch": 0.14872886101467128, "grad_norm": 1.0736377239227295, "learning_rate": 1.3270000000000002e-05, "loss": 0.4597, "step": 2656 }, { "epoch": 0.1487848583268003, "grad_norm": 1.2907679080963135, "learning_rate": 1.3275e-05, "loss": 0.4447, "step": 2657 }, { "epoch": 0.14884085563892932, "grad_norm": 1.1318539381027222, "learning_rate": 1.3280000000000002e-05, "loss": 0.3366, "step": 2658 }, { "epoch": 0.14889685295105834, "grad_norm": 1.27273690700531, "learning_rate": 1.3285e-05, "loss": 0.5273, "step": 2659 }, { "epoch": 0.14895285026318736, "grad_norm": 1.3122705221176147, "learning_rate": 1.329e-05, "loss": 0.5537, "step": 2660 }, { "epoch": 0.14900884757531638, "grad_norm": 1.3192979097366333, "learning_rate": 1.3295000000000002e-05, "loss": 0.5078, "step": 2661 }, { "epoch": 0.1490648448874454, "grad_norm": 1.4099100828170776, "learning_rate": 1.3300000000000001e-05, "loss": 0.4531, "step": 2662 }, { "epoch": 0.14912084219957442, "grad_norm": 6.571522235870361, "learning_rate": 1.3305e-05, "loss": 0.4682, "step": 2663 }, { "epoch": 0.14917683951170344, "grad_norm": 1.0490186214447021, "learning_rate": 1.331e-05, "loss": 0.34, "step": 2664 }, { "epoch": 0.14923283682383245, "grad_norm": 1.4035292863845825, "learning_rate": 1.3315e-05, "loss": 0.4918, "step": 2665 }, { "epoch": 0.14928883413596147, "grad_norm": 1.2334212064743042, "learning_rate": 1.3320000000000001e-05, "loss": 0.543, "step": 2666 }, { "epoch": 0.1493448314480905, "grad_norm": 1.3419967889785767, "learning_rate": 1.3325000000000002e-05, "loss": 0.3962, "step": 2667 }, { "epoch": 0.1494008287602195, "grad_norm": 1.2629648447036743, "learning_rate": 1.3330000000000001e-05, "loss": 0.5449, "step": 2668 }, { "epoch": 0.14945682607234853, "grad_norm": 1.0612081289291382, "learning_rate": 1.3335e-05, "loss": 0.3091, "step": 2669 }, { "epoch": 0.14951282338447755, "grad_norm": 1.045214056968689, "learning_rate": 1.334e-05, "loss": 0.3962, "step": 2670 }, { "epoch": 0.14956882069660657, "grad_norm": 1.0915015935897827, "learning_rate": 1.3345000000000002e-05, "loss": 0.4691, "step": 2671 }, { "epoch": 0.1496248180087356, "grad_norm": 1.229937195777893, "learning_rate": 1.3350000000000001e-05, "loss": 0.3836, "step": 2672 }, { "epoch": 0.1496808153208646, "grad_norm": 1.303233027458191, "learning_rate": 1.3355e-05, "loss": 0.4157, "step": 2673 }, { "epoch": 0.14973681263299363, "grad_norm": 1.1198750734329224, "learning_rate": 1.336e-05, "loss": 0.4337, "step": 2674 }, { "epoch": 0.14979280994512265, "grad_norm": 1.3870460987091064, "learning_rate": 1.3365e-05, "loss": 0.5762, "step": 2675 }, { "epoch": 0.14984880725725166, "grad_norm": 1.2103842496871948, "learning_rate": 1.3370000000000002e-05, "loss": 0.4496, "step": 2676 }, { "epoch": 0.14990480456938066, "grad_norm": 0.9885027408599854, "learning_rate": 1.3375000000000002e-05, "loss": 0.3836, "step": 2677 }, { "epoch": 0.14996080188150968, "grad_norm": 1.1948119401931763, "learning_rate": 1.338e-05, "loss": 0.441, "step": 2678 }, { "epoch": 0.1500167991936387, "grad_norm": 1.1679539680480957, "learning_rate": 1.3385e-05, "loss": 0.4136, "step": 2679 }, { "epoch": 0.1500727965057677, "grad_norm": 1.3134140968322754, "learning_rate": 1.339e-05, "loss": 0.7125, "step": 2680 }, { "epoch": 0.15012879381789673, "grad_norm": 1.2312588691711426, "learning_rate": 1.3395000000000001e-05, "loss": 0.3682, "step": 2681 }, { "epoch": 0.15018479113002575, "grad_norm": 1.028341293334961, "learning_rate": 1.3400000000000002e-05, "loss": 0.2944, "step": 2682 }, { "epoch": 0.15024078844215477, "grad_norm": 1.3490902185440063, "learning_rate": 1.3405000000000001e-05, "loss": 0.4722, "step": 2683 }, { "epoch": 0.1502967857542838, "grad_norm": 1.3073066473007202, "learning_rate": 1.341e-05, "loss": 0.4111, "step": 2684 }, { "epoch": 0.1503527830664128, "grad_norm": 1.1631131172180176, "learning_rate": 1.3415e-05, "loss": 0.4215, "step": 2685 }, { "epoch": 0.15040878037854183, "grad_norm": 1.4284385442733765, "learning_rate": 1.3420000000000002e-05, "loss": 0.5716, "step": 2686 }, { "epoch": 0.15046477769067085, "grad_norm": 1.2655913829803467, "learning_rate": 1.3425000000000001e-05, "loss": 0.353, "step": 2687 }, { "epoch": 0.15052077500279987, "grad_norm": 1.0791429281234741, "learning_rate": 1.343e-05, "loss": 0.4433, "step": 2688 }, { "epoch": 0.1505767723149289, "grad_norm": 1.2483781576156616, "learning_rate": 1.3435000000000001e-05, "loss": 0.4055, "step": 2689 }, { "epoch": 0.1506327696270579, "grad_norm": 1.4182034730911255, "learning_rate": 1.344e-05, "loss": 0.4274, "step": 2690 }, { "epoch": 0.15068876693918692, "grad_norm": 1.1914207935333252, "learning_rate": 1.3445e-05, "loss": 0.3799, "step": 2691 }, { "epoch": 0.15074476425131594, "grad_norm": 1.1741597652435303, "learning_rate": 1.3450000000000002e-05, "loss": 0.4406, "step": 2692 }, { "epoch": 0.15080076156344496, "grad_norm": 1.1351898908615112, "learning_rate": 1.3455e-05, "loss": 0.3233, "step": 2693 }, { "epoch": 0.15085675887557398, "grad_norm": 1.2189592123031616, "learning_rate": 1.346e-05, "loss": 0.4002, "step": 2694 }, { "epoch": 0.150912756187703, "grad_norm": 1.1392837762832642, "learning_rate": 1.3465e-05, "loss": 0.3374, "step": 2695 }, { "epoch": 0.15096875349983202, "grad_norm": 1.401028037071228, "learning_rate": 1.347e-05, "loss": 0.4197, "step": 2696 }, { "epoch": 0.15102475081196104, "grad_norm": 1.1850882768630981, "learning_rate": 1.3475000000000002e-05, "loss": 0.4155, "step": 2697 }, { "epoch": 0.15108074812409003, "grad_norm": 1.4200987815856934, "learning_rate": 1.3480000000000001e-05, "loss": 0.381, "step": 2698 }, { "epoch": 0.15113674543621905, "grad_norm": 1.0592139959335327, "learning_rate": 1.3485e-05, "loss": 0.3618, "step": 2699 }, { "epoch": 0.15119274274834807, "grad_norm": 1.2360087633132935, "learning_rate": 1.349e-05, "loss": 0.3661, "step": 2700 }, { "epoch": 0.1512487400604771, "grad_norm": 1.242908239364624, "learning_rate": 1.3494999999999999e-05, "loss": 0.514, "step": 2701 }, { "epoch": 0.1513047373726061, "grad_norm": 1.3020732402801514, "learning_rate": 1.3500000000000001e-05, "loss": 0.4683, "step": 2702 }, { "epoch": 0.15136073468473513, "grad_norm": 1.5411075353622437, "learning_rate": 1.3505e-05, "loss": 0.421, "step": 2703 }, { "epoch": 0.15141673199686415, "grad_norm": 1.2832776308059692, "learning_rate": 1.3510000000000001e-05, "loss": 0.4048, "step": 2704 }, { "epoch": 0.15147272930899316, "grad_norm": 1.2762134075164795, "learning_rate": 1.3515e-05, "loss": 0.5117, "step": 2705 }, { "epoch": 0.15152872662112218, "grad_norm": 1.3318418264389038, "learning_rate": 1.352e-05, "loss": 0.4159, "step": 2706 }, { "epoch": 0.1515847239332512, "grad_norm": 1.4038443565368652, "learning_rate": 1.3525000000000002e-05, "loss": 0.5075, "step": 2707 }, { "epoch": 0.15164072124538022, "grad_norm": 1.3062500953674316, "learning_rate": 1.3530000000000001e-05, "loss": 0.6217, "step": 2708 }, { "epoch": 0.15169671855750924, "grad_norm": 1.1369431018829346, "learning_rate": 1.3535e-05, "loss": 0.4208, "step": 2709 }, { "epoch": 0.15175271586963826, "grad_norm": 1.464113473892212, "learning_rate": 1.3539999999999999e-05, "loss": 0.4137, "step": 2710 }, { "epoch": 0.15180871318176728, "grad_norm": 1.2051522731781006, "learning_rate": 1.3545e-05, "loss": 0.5511, "step": 2711 }, { "epoch": 0.1518647104938963, "grad_norm": 1.232174038887024, "learning_rate": 1.3550000000000002e-05, "loss": 0.4952, "step": 2712 }, { "epoch": 0.15192070780602532, "grad_norm": 1.2316820621490479, "learning_rate": 1.3555000000000002e-05, "loss": 0.4071, "step": 2713 }, { "epoch": 0.15197670511815434, "grad_norm": 1.3428975343704224, "learning_rate": 1.356e-05, "loss": 0.486, "step": 2714 }, { "epoch": 0.15203270243028336, "grad_norm": 1.1460320949554443, "learning_rate": 1.3565e-05, "loss": 0.4263, "step": 2715 }, { "epoch": 0.15208869974241238, "grad_norm": 1.3443206548690796, "learning_rate": 1.3569999999999999e-05, "loss": 0.4956, "step": 2716 }, { "epoch": 0.1521446970545414, "grad_norm": 1.0955899953842163, "learning_rate": 1.3575000000000001e-05, "loss": 0.3872, "step": 2717 }, { "epoch": 0.15220069436667039, "grad_norm": 1.1686948537826538, "learning_rate": 1.358e-05, "loss": 0.487, "step": 2718 }, { "epoch": 0.1522566916787994, "grad_norm": 1.1902086734771729, "learning_rate": 1.3585000000000001e-05, "loss": 0.4301, "step": 2719 }, { "epoch": 0.15231268899092842, "grad_norm": 1.2101625204086304, "learning_rate": 1.359e-05, "loss": 0.4894, "step": 2720 }, { "epoch": 0.15236868630305744, "grad_norm": 1.1747902631759644, "learning_rate": 1.3595e-05, "loss": 0.3868, "step": 2721 }, { "epoch": 0.15242468361518646, "grad_norm": 1.752328634262085, "learning_rate": 1.3600000000000002e-05, "loss": 0.5366, "step": 2722 }, { "epoch": 0.15248068092731548, "grad_norm": 1.3041211366653442, "learning_rate": 1.3605000000000001e-05, "loss": 0.4857, "step": 2723 }, { "epoch": 0.1525366782394445, "grad_norm": 1.0765955448150635, "learning_rate": 1.361e-05, "loss": 0.345, "step": 2724 }, { "epoch": 0.15259267555157352, "grad_norm": 1.4420080184936523, "learning_rate": 1.3615e-05, "loss": 0.5749, "step": 2725 }, { "epoch": 0.15264867286370254, "grad_norm": 1.1885454654693604, "learning_rate": 1.362e-05, "loss": 0.5055, "step": 2726 }, { "epoch": 0.15270467017583156, "grad_norm": 1.2062809467315674, "learning_rate": 1.3625e-05, "loss": 0.4599, "step": 2727 }, { "epoch": 0.15276066748796058, "grad_norm": 1.3495934009552002, "learning_rate": 1.3630000000000002e-05, "loss": 0.4824, "step": 2728 }, { "epoch": 0.1528166648000896, "grad_norm": 1.6765187978744507, "learning_rate": 1.3635e-05, "loss": 0.5063, "step": 2729 }, { "epoch": 0.15287266211221862, "grad_norm": 1.3617475032806396, "learning_rate": 1.364e-05, "loss": 0.475, "step": 2730 }, { "epoch": 0.15292865942434763, "grad_norm": 1.0638370513916016, "learning_rate": 1.3644999999999999e-05, "loss": 0.3662, "step": 2731 }, { "epoch": 0.15298465673647665, "grad_norm": 1.3291374444961548, "learning_rate": 1.3650000000000001e-05, "loss": 0.4763, "step": 2732 }, { "epoch": 0.15304065404860567, "grad_norm": 0.9607083797454834, "learning_rate": 1.3655e-05, "loss": 0.3983, "step": 2733 }, { "epoch": 0.1530966513607347, "grad_norm": 1.1476843357086182, "learning_rate": 1.3660000000000001e-05, "loss": 0.5913, "step": 2734 }, { "epoch": 0.1531526486728637, "grad_norm": 1.1318851709365845, "learning_rate": 1.3665e-05, "loss": 0.4258, "step": 2735 }, { "epoch": 0.15320864598499273, "grad_norm": 1.1070514917373657, "learning_rate": 1.367e-05, "loss": 0.4925, "step": 2736 }, { "epoch": 0.15326464329712175, "grad_norm": 1.3675343990325928, "learning_rate": 1.3675000000000002e-05, "loss": 0.5905, "step": 2737 }, { "epoch": 0.15332064060925077, "grad_norm": 1.3040297031402588, "learning_rate": 1.3680000000000001e-05, "loss": 0.4395, "step": 2738 }, { "epoch": 0.15337663792137976, "grad_norm": 1.1907511949539185, "learning_rate": 1.3685e-05, "loss": 0.3728, "step": 2739 }, { "epoch": 0.15343263523350878, "grad_norm": 1.1401329040527344, "learning_rate": 1.369e-05, "loss": 0.446, "step": 2740 }, { "epoch": 0.1534886325456378, "grad_norm": 1.0914613008499146, "learning_rate": 1.3695e-05, "loss": 0.3913, "step": 2741 }, { "epoch": 0.15354462985776682, "grad_norm": 1.1461143493652344, "learning_rate": 1.3700000000000001e-05, "loss": 0.4044, "step": 2742 }, { "epoch": 0.15360062716989584, "grad_norm": 1.229974627494812, "learning_rate": 1.3705000000000002e-05, "loss": 0.5203, "step": 2743 }, { "epoch": 0.15365662448202486, "grad_norm": 1.1887822151184082, "learning_rate": 1.3710000000000001e-05, "loss": 0.3773, "step": 2744 }, { "epoch": 0.15371262179415388, "grad_norm": 1.0333051681518555, "learning_rate": 1.3715e-05, "loss": 0.4415, "step": 2745 }, { "epoch": 0.1537686191062829, "grad_norm": 1.1656396389007568, "learning_rate": 1.3719999999999999e-05, "loss": 0.4399, "step": 2746 }, { "epoch": 0.1538246164184119, "grad_norm": 1.2455742359161377, "learning_rate": 1.3725000000000002e-05, "loss": 0.4364, "step": 2747 }, { "epoch": 0.15388061373054093, "grad_norm": 1.0791863203048706, "learning_rate": 1.373e-05, "loss": 0.3751, "step": 2748 }, { "epoch": 0.15393661104266995, "grad_norm": 1.1460646390914917, "learning_rate": 1.3735000000000001e-05, "loss": 0.4815, "step": 2749 }, { "epoch": 0.15399260835479897, "grad_norm": 1.4752322435379028, "learning_rate": 1.374e-05, "loss": 0.6131, "step": 2750 }, { "epoch": 0.154048605666928, "grad_norm": 1.1885502338409424, "learning_rate": 1.3745e-05, "loss": 0.5409, "step": 2751 }, { "epoch": 0.154104602979057, "grad_norm": 1.6608283519744873, "learning_rate": 1.3750000000000002e-05, "loss": 0.4526, "step": 2752 }, { "epoch": 0.15416060029118603, "grad_norm": 1.4488664865493774, "learning_rate": 1.3755000000000001e-05, "loss": 0.3633, "step": 2753 }, { "epoch": 0.15421659760331505, "grad_norm": 1.2881131172180176, "learning_rate": 1.376e-05, "loss": 0.5018, "step": 2754 }, { "epoch": 0.15427259491544407, "grad_norm": 1.3838692903518677, "learning_rate": 1.3765e-05, "loss": 0.5169, "step": 2755 }, { "epoch": 0.15432859222757309, "grad_norm": 1.2695553302764893, "learning_rate": 1.377e-05, "loss": 0.5031, "step": 2756 }, { "epoch": 0.1543845895397021, "grad_norm": 1.3294891119003296, "learning_rate": 1.3775000000000001e-05, "loss": 0.5022, "step": 2757 }, { "epoch": 0.15444058685183112, "grad_norm": 1.100566029548645, "learning_rate": 1.3780000000000002e-05, "loss": 0.4378, "step": 2758 }, { "epoch": 0.15449658416396014, "grad_norm": 1.1493943929672241, "learning_rate": 1.3785000000000001e-05, "loss": 0.5951, "step": 2759 }, { "epoch": 0.15455258147608913, "grad_norm": 1.3388559818267822, "learning_rate": 1.379e-05, "loss": 0.4385, "step": 2760 }, { "epoch": 0.15460857878821815, "grad_norm": 1.2471098899841309, "learning_rate": 1.3795e-05, "loss": 0.3845, "step": 2761 }, { "epoch": 0.15466457610034717, "grad_norm": 1.0742604732513428, "learning_rate": 1.3800000000000002e-05, "loss": 0.343, "step": 2762 }, { "epoch": 0.1547205734124762, "grad_norm": 1.1153868436813354, "learning_rate": 1.3805e-05, "loss": 0.3902, "step": 2763 }, { "epoch": 0.1547765707246052, "grad_norm": 1.2439930438995361, "learning_rate": 1.381e-05, "loss": 0.4284, "step": 2764 }, { "epoch": 0.15483256803673423, "grad_norm": 2.0175135135650635, "learning_rate": 1.3815e-05, "loss": 0.3932, "step": 2765 }, { "epoch": 0.15488856534886325, "grad_norm": 0.9710941910743713, "learning_rate": 1.382e-05, "loss": 0.2777, "step": 2766 }, { "epoch": 0.15494456266099227, "grad_norm": 1.301509976387024, "learning_rate": 1.3825000000000002e-05, "loss": 0.418, "step": 2767 }, { "epoch": 0.1550005599731213, "grad_norm": 1.266956090927124, "learning_rate": 1.3830000000000001e-05, "loss": 0.4642, "step": 2768 }, { "epoch": 0.1550565572852503, "grad_norm": 1.3388291597366333, "learning_rate": 1.3835e-05, "loss": 0.5071, "step": 2769 }, { "epoch": 0.15511255459737933, "grad_norm": 1.240614414215088, "learning_rate": 1.384e-05, "loss": 0.4912, "step": 2770 }, { "epoch": 0.15516855190950835, "grad_norm": 1.2789167165756226, "learning_rate": 1.3845e-05, "loss": 0.4411, "step": 2771 }, { "epoch": 0.15522454922163736, "grad_norm": 1.1685004234313965, "learning_rate": 1.3850000000000001e-05, "loss": 0.4822, "step": 2772 }, { "epoch": 0.15528054653376638, "grad_norm": 1.5487463474273682, "learning_rate": 1.3855000000000002e-05, "loss": 0.4153, "step": 2773 }, { "epoch": 0.1553365438458954, "grad_norm": 1.0213351249694824, "learning_rate": 1.3860000000000001e-05, "loss": 0.4142, "step": 2774 }, { "epoch": 0.15539254115802442, "grad_norm": 1.3269951343536377, "learning_rate": 1.3865e-05, "loss": 0.3837, "step": 2775 }, { "epoch": 0.15544853847015344, "grad_norm": 1.3169128894805908, "learning_rate": 1.387e-05, "loss": 0.3842, "step": 2776 }, { "epoch": 0.15550453578228246, "grad_norm": 1.3252760171890259, "learning_rate": 1.3875000000000002e-05, "loss": 0.3117, "step": 2777 }, { "epoch": 0.15556053309441148, "grad_norm": 1.3185728788375854, "learning_rate": 1.3880000000000001e-05, "loss": 0.5022, "step": 2778 }, { "epoch": 0.1556165304065405, "grad_norm": 1.1187517642974854, "learning_rate": 1.3885e-05, "loss": 0.4589, "step": 2779 }, { "epoch": 0.1556725277186695, "grad_norm": 1.306776762008667, "learning_rate": 1.389e-05, "loss": 0.6016, "step": 2780 }, { "epoch": 0.1557285250307985, "grad_norm": 1.2258596420288086, "learning_rate": 1.3895e-05, "loss": 0.506, "step": 2781 }, { "epoch": 0.15578452234292753, "grad_norm": 1.2695173025131226, "learning_rate": 1.3900000000000002e-05, "loss": 0.5451, "step": 2782 }, { "epoch": 0.15584051965505655, "grad_norm": 1.2983871698379517, "learning_rate": 1.3905000000000002e-05, "loss": 0.4632, "step": 2783 }, { "epoch": 0.15589651696718557, "grad_norm": 1.0823630094528198, "learning_rate": 1.391e-05, "loss": 0.3751, "step": 2784 }, { "epoch": 0.15595251427931459, "grad_norm": 1.118971586227417, "learning_rate": 1.3915e-05, "loss": 0.4565, "step": 2785 }, { "epoch": 0.1560085115914436, "grad_norm": 1.221749186515808, "learning_rate": 1.3919999999999999e-05, "loss": 0.415, "step": 2786 }, { "epoch": 0.15606450890357262, "grad_norm": 1.352028727531433, "learning_rate": 1.3925000000000001e-05, "loss": 0.4915, "step": 2787 }, { "epoch": 0.15612050621570164, "grad_norm": 1.284001111984253, "learning_rate": 1.3930000000000002e-05, "loss": 0.4595, "step": 2788 }, { "epoch": 0.15617650352783066, "grad_norm": 1.2041031122207642, "learning_rate": 1.3935000000000001e-05, "loss": 0.4143, "step": 2789 }, { "epoch": 0.15623250083995968, "grad_norm": 1.3998245000839233, "learning_rate": 1.394e-05, "loss": 0.5409, "step": 2790 }, { "epoch": 0.1562884981520887, "grad_norm": 1.4810839891433716, "learning_rate": 1.3945e-05, "loss": 0.4637, "step": 2791 }, { "epoch": 0.15634449546421772, "grad_norm": 1.242527961730957, "learning_rate": 1.3950000000000002e-05, "loss": 0.3989, "step": 2792 }, { "epoch": 0.15640049277634674, "grad_norm": 1.5063345432281494, "learning_rate": 1.3955000000000001e-05, "loss": 0.4764, "step": 2793 }, { "epoch": 0.15645649008847576, "grad_norm": 1.2918951511383057, "learning_rate": 1.396e-05, "loss": 0.469, "step": 2794 }, { "epoch": 0.15651248740060478, "grad_norm": 1.5124133825302124, "learning_rate": 1.3965000000000001e-05, "loss": 0.6527, "step": 2795 }, { "epoch": 0.1565684847127338, "grad_norm": 1.2914239168167114, "learning_rate": 1.397e-05, "loss": 0.4341, "step": 2796 }, { "epoch": 0.15662448202486282, "grad_norm": 1.5263826847076416, "learning_rate": 1.3975000000000003e-05, "loss": 0.5055, "step": 2797 }, { "epoch": 0.15668047933699183, "grad_norm": 1.289559245109558, "learning_rate": 1.3980000000000002e-05, "loss": 0.4069, "step": 2798 }, { "epoch": 0.15673647664912085, "grad_norm": 1.3386255502700806, "learning_rate": 1.3985e-05, "loss": 0.5132, "step": 2799 }, { "epoch": 0.15679247396124987, "grad_norm": 1.1883652210235596, "learning_rate": 1.399e-05, "loss": 0.5696, "step": 2800 }, { "epoch": 0.15684847127337886, "grad_norm": 1.2485971450805664, "learning_rate": 1.3994999999999999e-05, "loss": 0.3799, "step": 2801 }, { "epoch": 0.15690446858550788, "grad_norm": 1.091121792793274, "learning_rate": 1.4000000000000001e-05, "loss": 0.3849, "step": 2802 }, { "epoch": 0.1569604658976369, "grad_norm": 1.335418462753296, "learning_rate": 1.4005000000000002e-05, "loss": 0.5873, "step": 2803 }, { "epoch": 0.15701646320976592, "grad_norm": 1.2520933151245117, "learning_rate": 1.4010000000000001e-05, "loss": 0.388, "step": 2804 }, { "epoch": 0.15707246052189494, "grad_norm": 1.3190689086914062, "learning_rate": 1.4015e-05, "loss": 0.5136, "step": 2805 }, { "epoch": 0.15712845783402396, "grad_norm": 1.1924923658370972, "learning_rate": 1.402e-05, "loss": 0.5447, "step": 2806 }, { "epoch": 0.15718445514615298, "grad_norm": 1.170854091644287, "learning_rate": 1.4025000000000002e-05, "loss": 0.5238, "step": 2807 }, { "epoch": 0.157240452458282, "grad_norm": 1.2387036085128784, "learning_rate": 1.4030000000000001e-05, "loss": 0.5482, "step": 2808 }, { "epoch": 0.15729644977041102, "grad_norm": 1.2129141092300415, "learning_rate": 1.4035e-05, "loss": 0.4282, "step": 2809 }, { "epoch": 0.15735244708254004, "grad_norm": 1.2074551582336426, "learning_rate": 1.4040000000000001e-05, "loss": 0.4376, "step": 2810 }, { "epoch": 0.15740844439466906, "grad_norm": 1.2769153118133545, "learning_rate": 1.4045e-05, "loss": 0.5132, "step": 2811 }, { "epoch": 0.15746444170679808, "grad_norm": 1.108242154121399, "learning_rate": 1.4050000000000003e-05, "loss": 0.4453, "step": 2812 }, { "epoch": 0.1575204390189271, "grad_norm": 1.230486512184143, "learning_rate": 1.4055000000000002e-05, "loss": 0.3676, "step": 2813 }, { "epoch": 0.1575764363310561, "grad_norm": 1.2049204111099243, "learning_rate": 1.4060000000000001e-05, "loss": 0.4022, "step": 2814 }, { "epoch": 0.15763243364318513, "grad_norm": 1.43659508228302, "learning_rate": 1.4065e-05, "loss": 0.3815, "step": 2815 }, { "epoch": 0.15768843095531415, "grad_norm": 1.3100559711456299, "learning_rate": 1.4069999999999999e-05, "loss": 0.4313, "step": 2816 }, { "epoch": 0.15774442826744317, "grad_norm": 1.345204472541809, "learning_rate": 1.4075e-05, "loss": 0.3449, "step": 2817 }, { "epoch": 0.1578004255795722, "grad_norm": 1.1812174320220947, "learning_rate": 1.408e-05, "loss": 0.4178, "step": 2818 }, { "epoch": 0.1578564228917012, "grad_norm": 1.0329145193099976, "learning_rate": 1.4085000000000002e-05, "loss": 0.3366, "step": 2819 }, { "epoch": 0.15791242020383023, "grad_norm": 1.1096454858779907, "learning_rate": 1.409e-05, "loss": 0.4205, "step": 2820 }, { "epoch": 0.15796841751595925, "grad_norm": 1.0906081199645996, "learning_rate": 1.4095e-05, "loss": 0.41, "step": 2821 }, { "epoch": 0.15802441482808824, "grad_norm": 1.1314079761505127, "learning_rate": 1.4099999999999999e-05, "loss": 0.4788, "step": 2822 }, { "epoch": 0.15808041214021726, "grad_norm": 1.343037486076355, "learning_rate": 1.4105000000000001e-05, "loss": 0.4959, "step": 2823 }, { "epoch": 0.15813640945234628, "grad_norm": 1.146909236907959, "learning_rate": 1.411e-05, "loss": 0.4298, "step": 2824 }, { "epoch": 0.1581924067644753, "grad_norm": 1.0961363315582275, "learning_rate": 1.4115000000000001e-05, "loss": 0.3701, "step": 2825 }, { "epoch": 0.15824840407660432, "grad_norm": 1.1320148706436157, "learning_rate": 1.412e-05, "loss": 0.365, "step": 2826 }, { "epoch": 0.15830440138873333, "grad_norm": 1.2287229299545288, "learning_rate": 1.4125e-05, "loss": 0.3599, "step": 2827 }, { "epoch": 0.15836039870086235, "grad_norm": 1.4046834707260132, "learning_rate": 1.4130000000000002e-05, "loss": 0.4867, "step": 2828 }, { "epoch": 0.15841639601299137, "grad_norm": 1.079779863357544, "learning_rate": 1.4135000000000001e-05, "loss": 0.3229, "step": 2829 }, { "epoch": 0.1584723933251204, "grad_norm": 1.482107400894165, "learning_rate": 1.414e-05, "loss": 0.5059, "step": 2830 }, { "epoch": 0.1585283906372494, "grad_norm": 1.0793694257736206, "learning_rate": 1.4145e-05, "loss": 0.3989, "step": 2831 }, { "epoch": 0.15858438794937843, "grad_norm": 1.906036376953125, "learning_rate": 1.415e-05, "loss": 0.4956, "step": 2832 }, { "epoch": 0.15864038526150745, "grad_norm": 1.4872894287109375, "learning_rate": 1.4155000000000001e-05, "loss": 0.4636, "step": 2833 }, { "epoch": 0.15869638257363647, "grad_norm": 1.1432147026062012, "learning_rate": 1.4160000000000002e-05, "loss": 0.4302, "step": 2834 }, { "epoch": 0.1587523798857655, "grad_norm": 1.4246662855148315, "learning_rate": 1.4165e-05, "loss": 0.3449, "step": 2835 }, { "epoch": 0.1588083771978945, "grad_norm": 1.111423134803772, "learning_rate": 1.417e-05, "loss": 0.3431, "step": 2836 }, { "epoch": 0.15886437451002353, "grad_norm": 1.2800912857055664, "learning_rate": 1.4174999999999999e-05, "loss": 0.5825, "step": 2837 }, { "epoch": 0.15892037182215255, "grad_norm": 1.2523735761642456, "learning_rate": 1.4180000000000001e-05, "loss": 0.5138, "step": 2838 }, { "epoch": 0.15897636913428156, "grad_norm": 1.3163225650787354, "learning_rate": 1.4185e-05, "loss": 0.4908, "step": 2839 }, { "epoch": 0.15903236644641058, "grad_norm": 1.0494143962860107, "learning_rate": 1.4190000000000001e-05, "loss": 0.3915, "step": 2840 }, { "epoch": 0.1590883637585396, "grad_norm": 1.2246289253234863, "learning_rate": 1.4195e-05, "loss": 0.4301, "step": 2841 }, { "epoch": 0.1591443610706686, "grad_norm": 1.6092647314071655, "learning_rate": 1.42e-05, "loss": 0.6129, "step": 2842 }, { "epoch": 0.1592003583827976, "grad_norm": 1.4280298948287964, "learning_rate": 1.4205000000000002e-05, "loss": 0.517, "step": 2843 }, { "epoch": 0.15925635569492663, "grad_norm": 1.171018362045288, "learning_rate": 1.4210000000000001e-05, "loss": 0.3967, "step": 2844 }, { "epoch": 0.15931235300705565, "grad_norm": 1.0142732858657837, "learning_rate": 1.4215e-05, "loss": 0.3814, "step": 2845 }, { "epoch": 0.15936835031918467, "grad_norm": 1.1765437126159668, "learning_rate": 1.422e-05, "loss": 0.4179, "step": 2846 }, { "epoch": 0.1594243476313137, "grad_norm": 1.0797767639160156, "learning_rate": 1.4225e-05, "loss": 0.3455, "step": 2847 }, { "epoch": 0.1594803449434427, "grad_norm": 1.056195855140686, "learning_rate": 1.4230000000000001e-05, "loss": 0.4499, "step": 2848 }, { "epoch": 0.15953634225557173, "grad_norm": 1.177141785621643, "learning_rate": 1.4235000000000002e-05, "loss": 0.4186, "step": 2849 }, { "epoch": 0.15959233956770075, "grad_norm": 1.1607415676116943, "learning_rate": 1.4240000000000001e-05, "loss": 0.4834, "step": 2850 }, { "epoch": 0.15964833687982977, "grad_norm": 1.601799488067627, "learning_rate": 1.4245e-05, "loss": 0.455, "step": 2851 }, { "epoch": 0.15970433419195879, "grad_norm": 1.3250458240509033, "learning_rate": 1.4249999999999999e-05, "loss": 0.4554, "step": 2852 }, { "epoch": 0.1597603315040878, "grad_norm": 1.2698512077331543, "learning_rate": 1.4255000000000002e-05, "loss": 0.5106, "step": 2853 }, { "epoch": 0.15981632881621682, "grad_norm": 1.3289108276367188, "learning_rate": 1.426e-05, "loss": 0.4777, "step": 2854 }, { "epoch": 0.15987232612834584, "grad_norm": 1.2498283386230469, "learning_rate": 1.4265e-05, "loss": 0.471, "step": 2855 }, { "epoch": 0.15992832344047486, "grad_norm": 1.1666674613952637, "learning_rate": 1.427e-05, "loss": 0.4196, "step": 2856 }, { "epoch": 0.15998432075260388, "grad_norm": 1.1459076404571533, "learning_rate": 1.4275e-05, "loss": 0.4377, "step": 2857 }, { "epoch": 0.1600403180647329, "grad_norm": 1.3304330110549927, "learning_rate": 1.4280000000000002e-05, "loss": 0.6097, "step": 2858 }, { "epoch": 0.16009631537686192, "grad_norm": 1.171502947807312, "learning_rate": 1.4285000000000001e-05, "loss": 0.3562, "step": 2859 }, { "epoch": 0.16015231268899094, "grad_norm": 1.9733786582946777, "learning_rate": 1.429e-05, "loss": 0.4004, "step": 2860 }, { "epoch": 0.16020831000111996, "grad_norm": 1.3946818113327026, "learning_rate": 1.4295e-05, "loss": 0.4732, "step": 2861 }, { "epoch": 0.16026430731324898, "grad_norm": 1.0983641147613525, "learning_rate": 1.43e-05, "loss": 0.3928, "step": 2862 }, { "epoch": 0.16032030462537797, "grad_norm": 1.106260895729065, "learning_rate": 1.4305000000000001e-05, "loss": 0.3349, "step": 2863 }, { "epoch": 0.160376301937507, "grad_norm": 1.3615952730178833, "learning_rate": 1.4310000000000002e-05, "loss": 0.4601, "step": 2864 }, { "epoch": 0.160432299249636, "grad_norm": 1.24410879611969, "learning_rate": 1.4315000000000001e-05, "loss": 0.4041, "step": 2865 }, { "epoch": 0.16048829656176503, "grad_norm": 1.5198066234588623, "learning_rate": 1.432e-05, "loss": 0.4549, "step": 2866 }, { "epoch": 0.16054429387389405, "grad_norm": 1.0579211711883545, "learning_rate": 1.4325e-05, "loss": 0.414, "step": 2867 }, { "epoch": 0.16060029118602306, "grad_norm": 1.3212554454803467, "learning_rate": 1.4330000000000002e-05, "loss": 0.6388, "step": 2868 }, { "epoch": 0.16065628849815208, "grad_norm": 1.2160145044326782, "learning_rate": 1.4335e-05, "loss": 0.4994, "step": 2869 }, { "epoch": 0.1607122858102811, "grad_norm": 1.075178623199463, "learning_rate": 1.434e-05, "loss": 0.3752, "step": 2870 }, { "epoch": 0.16076828312241012, "grad_norm": 1.1443240642547607, "learning_rate": 1.4345e-05, "loss": 0.5189, "step": 2871 }, { "epoch": 0.16082428043453914, "grad_norm": 1.1939789056777954, "learning_rate": 1.435e-05, "loss": 0.4313, "step": 2872 }, { "epoch": 0.16088027774666816, "grad_norm": 1.0715328454971313, "learning_rate": 1.4355000000000002e-05, "loss": 0.3825, "step": 2873 }, { "epoch": 0.16093627505879718, "grad_norm": 1.2756158113479614, "learning_rate": 1.4360000000000001e-05, "loss": 0.5611, "step": 2874 }, { "epoch": 0.1609922723709262, "grad_norm": 1.0364662408828735, "learning_rate": 1.4365e-05, "loss": 0.3575, "step": 2875 }, { "epoch": 0.16104826968305522, "grad_norm": 1.26275634765625, "learning_rate": 1.437e-05, "loss": 0.5086, "step": 2876 }, { "epoch": 0.16110426699518424, "grad_norm": 1.209248423576355, "learning_rate": 1.4374999999999999e-05, "loss": 0.4786, "step": 2877 }, { "epoch": 0.16116026430731326, "grad_norm": 1.212764859199524, "learning_rate": 1.4380000000000001e-05, "loss": 0.404, "step": 2878 }, { "epoch": 0.16121626161944227, "grad_norm": 1.119140625, "learning_rate": 1.4385000000000002e-05, "loss": 0.3736, "step": 2879 }, { "epoch": 0.1612722589315713, "grad_norm": 2.300510883331299, "learning_rate": 1.4390000000000001e-05, "loss": 0.4957, "step": 2880 }, { "epoch": 0.1613282562437003, "grad_norm": 1.0119082927703857, "learning_rate": 1.4395e-05, "loss": 0.4613, "step": 2881 }, { "epoch": 0.16138425355582933, "grad_norm": 1.2868393659591675, "learning_rate": 1.44e-05, "loss": 0.7153, "step": 2882 }, { "epoch": 0.16144025086795835, "grad_norm": 1.080954909324646, "learning_rate": 1.4405000000000002e-05, "loss": 0.4354, "step": 2883 }, { "epoch": 0.16149624818008734, "grad_norm": 1.0970935821533203, "learning_rate": 1.4410000000000001e-05, "loss": 0.4083, "step": 2884 }, { "epoch": 0.16155224549221636, "grad_norm": 2.6635384559631348, "learning_rate": 1.4415e-05, "loss": 0.5151, "step": 2885 }, { "epoch": 0.16160824280434538, "grad_norm": 1.5100364685058594, "learning_rate": 1.4420000000000001e-05, "loss": 0.414, "step": 2886 }, { "epoch": 0.1616642401164744, "grad_norm": 1.3002201318740845, "learning_rate": 1.4425e-05, "loss": 0.5592, "step": 2887 }, { "epoch": 0.16172023742860342, "grad_norm": 1.2284952402114868, "learning_rate": 1.4430000000000002e-05, "loss": 0.4932, "step": 2888 }, { "epoch": 0.16177623474073244, "grad_norm": 1.1353243589401245, "learning_rate": 1.4435000000000002e-05, "loss": 0.455, "step": 2889 }, { "epoch": 0.16183223205286146, "grad_norm": 1.4194884300231934, "learning_rate": 1.444e-05, "loss": 0.4574, "step": 2890 }, { "epoch": 0.16188822936499048, "grad_norm": 0.9726737141609192, "learning_rate": 1.4445e-05, "loss": 0.4064, "step": 2891 }, { "epoch": 0.1619442266771195, "grad_norm": 1.0060579776763916, "learning_rate": 1.4449999999999999e-05, "loss": 0.3954, "step": 2892 }, { "epoch": 0.16200022398924852, "grad_norm": 1.358452320098877, "learning_rate": 1.4455000000000001e-05, "loss": 0.5166, "step": 2893 }, { "epoch": 0.16205622130137753, "grad_norm": 1.0128717422485352, "learning_rate": 1.4460000000000002e-05, "loss": 0.364, "step": 2894 }, { "epoch": 0.16211221861350655, "grad_norm": 1.2332308292388916, "learning_rate": 1.4465000000000001e-05, "loss": 0.3993, "step": 2895 }, { "epoch": 0.16216821592563557, "grad_norm": 1.2258899211883545, "learning_rate": 1.447e-05, "loss": 0.4307, "step": 2896 }, { "epoch": 0.1622242132377646, "grad_norm": 1.2921556234359741, "learning_rate": 1.4475e-05, "loss": 0.4426, "step": 2897 }, { "epoch": 0.1622802105498936, "grad_norm": 1.0930753946304321, "learning_rate": 1.4480000000000002e-05, "loss": 0.3127, "step": 2898 }, { "epoch": 0.16233620786202263, "grad_norm": 1.4140493869781494, "learning_rate": 1.4485000000000001e-05, "loss": 0.4241, "step": 2899 }, { "epoch": 0.16239220517415165, "grad_norm": 1.1093642711639404, "learning_rate": 1.449e-05, "loss": 0.4391, "step": 2900 }, { "epoch": 0.16244820248628067, "grad_norm": 0.9811723232269287, "learning_rate": 1.4495000000000001e-05, "loss": 0.3002, "step": 2901 }, { "epoch": 0.1625041997984097, "grad_norm": 1.0897340774536133, "learning_rate": 1.45e-05, "loss": 0.4447, "step": 2902 }, { "epoch": 0.1625601971105387, "grad_norm": 1.60068941116333, "learning_rate": 1.4505000000000003e-05, "loss": 0.4836, "step": 2903 }, { "epoch": 0.1626161944226677, "grad_norm": 1.0105966329574585, "learning_rate": 1.4510000000000002e-05, "loss": 0.4024, "step": 2904 }, { "epoch": 0.16267219173479672, "grad_norm": 1.241045594215393, "learning_rate": 1.4515e-05, "loss": 0.452, "step": 2905 }, { "epoch": 0.16272818904692574, "grad_norm": 1.3478405475616455, "learning_rate": 1.452e-05, "loss": 0.6681, "step": 2906 }, { "epoch": 0.16278418635905476, "grad_norm": 1.3035074472427368, "learning_rate": 1.4524999999999999e-05, "loss": 0.4031, "step": 2907 }, { "epoch": 0.16284018367118377, "grad_norm": 1.215703010559082, "learning_rate": 1.4530000000000001e-05, "loss": 0.4691, "step": 2908 }, { "epoch": 0.1628961809833128, "grad_norm": 1.289297103881836, "learning_rate": 1.4535e-05, "loss": 0.3669, "step": 2909 }, { "epoch": 0.1629521782954418, "grad_norm": 1.149066686630249, "learning_rate": 1.4540000000000001e-05, "loss": 0.4264, "step": 2910 }, { "epoch": 0.16300817560757083, "grad_norm": 1.2812926769256592, "learning_rate": 1.4545e-05, "loss": 0.4938, "step": 2911 }, { "epoch": 0.16306417291969985, "grad_norm": 1.0730267763137817, "learning_rate": 1.455e-05, "loss": 0.4693, "step": 2912 }, { "epoch": 0.16312017023182887, "grad_norm": 1.1804014444351196, "learning_rate": 1.4555000000000002e-05, "loss": 0.4372, "step": 2913 }, { "epoch": 0.1631761675439579, "grad_norm": 1.2339531183242798, "learning_rate": 1.4560000000000001e-05, "loss": 0.4266, "step": 2914 }, { "epoch": 0.1632321648560869, "grad_norm": 1.1102869510650635, "learning_rate": 1.4565e-05, "loss": 0.4657, "step": 2915 }, { "epoch": 0.16328816216821593, "grad_norm": 1.4539523124694824, "learning_rate": 1.4570000000000001e-05, "loss": 0.4176, "step": 2916 }, { "epoch": 0.16334415948034495, "grad_norm": 1.2633984088897705, "learning_rate": 1.4575e-05, "loss": 0.4214, "step": 2917 }, { "epoch": 0.16340015679247397, "grad_norm": 1.2158887386322021, "learning_rate": 1.4580000000000003e-05, "loss": 0.4991, "step": 2918 }, { "epoch": 0.16345615410460299, "grad_norm": 1.266831636428833, "learning_rate": 1.4585000000000002e-05, "loss": 0.5787, "step": 2919 }, { "epoch": 0.163512151416732, "grad_norm": 1.1851109266281128, "learning_rate": 1.4590000000000001e-05, "loss": 0.5018, "step": 2920 }, { "epoch": 0.16356814872886102, "grad_norm": 1.096369981765747, "learning_rate": 1.4595e-05, "loss": 0.3453, "step": 2921 }, { "epoch": 0.16362414604099004, "grad_norm": 1.7543258666992188, "learning_rate": 1.4599999999999999e-05, "loss": 0.5218, "step": 2922 }, { "epoch": 0.16368014335311906, "grad_norm": 1.2711646556854248, "learning_rate": 1.4605000000000002e-05, "loss": 0.3713, "step": 2923 }, { "epoch": 0.16373614066524808, "grad_norm": 1.3825998306274414, "learning_rate": 1.461e-05, "loss": 0.4534, "step": 2924 }, { "epoch": 0.16379213797737707, "grad_norm": 1.39090096950531, "learning_rate": 1.4615000000000002e-05, "loss": 0.6809, "step": 2925 }, { "epoch": 0.1638481352895061, "grad_norm": 1.1341646909713745, "learning_rate": 1.462e-05, "loss": 0.2352, "step": 2926 }, { "epoch": 0.1639041326016351, "grad_norm": 1.1790781021118164, "learning_rate": 1.4625e-05, "loss": 0.4092, "step": 2927 }, { "epoch": 0.16396012991376413, "grad_norm": 1.4217054843902588, "learning_rate": 1.4630000000000002e-05, "loss": 0.5113, "step": 2928 }, { "epoch": 0.16401612722589315, "grad_norm": 1.1839795112609863, "learning_rate": 1.4635000000000001e-05, "loss": 0.4317, "step": 2929 }, { "epoch": 0.16407212453802217, "grad_norm": 1.1309460401535034, "learning_rate": 1.464e-05, "loss": 0.3266, "step": 2930 }, { "epoch": 0.1641281218501512, "grad_norm": 1.3069939613342285, "learning_rate": 1.4645e-05, "loss": 0.5372, "step": 2931 }, { "epoch": 0.1641841191622802, "grad_norm": 1.2711175680160522, "learning_rate": 1.465e-05, "loss": 0.4304, "step": 2932 }, { "epoch": 0.16424011647440923, "grad_norm": 1.1943514347076416, "learning_rate": 1.4655000000000003e-05, "loss": 0.3806, "step": 2933 }, { "epoch": 0.16429611378653824, "grad_norm": 1.2070180177688599, "learning_rate": 1.4660000000000002e-05, "loss": 0.4196, "step": 2934 }, { "epoch": 0.16435211109866726, "grad_norm": 1.223737120628357, "learning_rate": 1.4665000000000001e-05, "loss": 0.4812, "step": 2935 }, { "epoch": 0.16440810841079628, "grad_norm": 1.440280556678772, "learning_rate": 1.467e-05, "loss": 0.4457, "step": 2936 }, { "epoch": 0.1644641057229253, "grad_norm": 1.0822083950042725, "learning_rate": 1.4675e-05, "loss": 0.3355, "step": 2937 }, { "epoch": 0.16452010303505432, "grad_norm": 1.3040459156036377, "learning_rate": 1.4680000000000002e-05, "loss": 0.4158, "step": 2938 }, { "epoch": 0.16457610034718334, "grad_norm": 1.1827491521835327, "learning_rate": 1.4685000000000001e-05, "loss": 0.4322, "step": 2939 }, { "epoch": 0.16463209765931236, "grad_norm": 1.0448668003082275, "learning_rate": 1.4690000000000002e-05, "loss": 0.3462, "step": 2940 }, { "epoch": 0.16468809497144138, "grad_norm": 1.0867873430252075, "learning_rate": 1.4695e-05, "loss": 0.4471, "step": 2941 }, { "epoch": 0.1647440922835704, "grad_norm": 1.357195496559143, "learning_rate": 1.47e-05, "loss": 0.491, "step": 2942 }, { "epoch": 0.16480008959569942, "grad_norm": 1.3898372650146484, "learning_rate": 1.4704999999999999e-05, "loss": 0.5863, "step": 2943 }, { "epoch": 0.16485608690782844, "grad_norm": 1.168304681777954, "learning_rate": 1.4710000000000001e-05, "loss": 0.3575, "step": 2944 }, { "epoch": 0.16491208421995746, "grad_norm": 1.2201783657073975, "learning_rate": 1.4715e-05, "loss": 0.4379, "step": 2945 }, { "epoch": 0.16496808153208645, "grad_norm": 1.1552715301513672, "learning_rate": 1.472e-05, "loss": 0.4842, "step": 2946 }, { "epoch": 0.16502407884421547, "grad_norm": 1.0281933546066284, "learning_rate": 1.4725e-05, "loss": 0.344, "step": 2947 }, { "epoch": 0.16508007615634449, "grad_norm": 1.1922563314437866, "learning_rate": 1.473e-05, "loss": 0.4426, "step": 2948 }, { "epoch": 0.1651360734684735, "grad_norm": 1.4431538581848145, "learning_rate": 1.4735000000000002e-05, "loss": 0.5686, "step": 2949 }, { "epoch": 0.16519207078060252, "grad_norm": 1.0972483158111572, "learning_rate": 1.4740000000000001e-05, "loss": 0.36, "step": 2950 }, { "epoch": 0.16524806809273154, "grad_norm": 1.3279143571853638, "learning_rate": 1.4745e-05, "loss": 0.4609, "step": 2951 }, { "epoch": 0.16530406540486056, "grad_norm": 1.2672828435897827, "learning_rate": 1.475e-05, "loss": 0.5439, "step": 2952 }, { "epoch": 0.16536006271698958, "grad_norm": 1.2774341106414795, "learning_rate": 1.4755e-05, "loss": 0.3992, "step": 2953 }, { "epoch": 0.1654160600291186, "grad_norm": 1.1626168489456177, "learning_rate": 1.4760000000000001e-05, "loss": 0.4066, "step": 2954 }, { "epoch": 0.16547205734124762, "grad_norm": 1.351238489151001, "learning_rate": 1.4765000000000002e-05, "loss": 0.4526, "step": 2955 }, { "epoch": 0.16552805465337664, "grad_norm": 1.3523439168930054, "learning_rate": 1.4770000000000001e-05, "loss": 0.2999, "step": 2956 }, { "epoch": 0.16558405196550566, "grad_norm": 1.2778282165527344, "learning_rate": 1.4775e-05, "loss": 0.4493, "step": 2957 }, { "epoch": 0.16564004927763468, "grad_norm": 1.4594963788986206, "learning_rate": 1.4779999999999999e-05, "loss": 0.4177, "step": 2958 }, { "epoch": 0.1656960465897637, "grad_norm": 1.1730430126190186, "learning_rate": 1.4785000000000002e-05, "loss": 0.4972, "step": 2959 }, { "epoch": 0.16575204390189271, "grad_norm": 1.3805487155914307, "learning_rate": 1.479e-05, "loss": 0.4852, "step": 2960 }, { "epoch": 0.16580804121402173, "grad_norm": 1.6004854440689087, "learning_rate": 1.4795e-05, "loss": 0.4528, "step": 2961 }, { "epoch": 0.16586403852615075, "grad_norm": 1.24202299118042, "learning_rate": 1.48e-05, "loss": 0.4216, "step": 2962 }, { "epoch": 0.16592003583827977, "grad_norm": 1.1655060052871704, "learning_rate": 1.4805e-05, "loss": 0.298, "step": 2963 }, { "epoch": 0.1659760331504088, "grad_norm": 1.1057674884796143, "learning_rate": 1.4810000000000002e-05, "loss": 0.3616, "step": 2964 }, { "epoch": 0.1660320304625378, "grad_norm": 1.1026337146759033, "learning_rate": 1.4815000000000001e-05, "loss": 0.4615, "step": 2965 }, { "epoch": 0.1660880277746668, "grad_norm": 1.2011353969573975, "learning_rate": 1.482e-05, "loss": 0.4535, "step": 2966 }, { "epoch": 0.16614402508679582, "grad_norm": 1.1062843799591064, "learning_rate": 1.4825e-05, "loss": 0.3815, "step": 2967 }, { "epoch": 0.16620002239892484, "grad_norm": 1.2528876066207886, "learning_rate": 1.4829999999999999e-05, "loss": 0.4937, "step": 2968 }, { "epoch": 0.16625601971105386, "grad_norm": 1.081746220588684, "learning_rate": 1.4835000000000001e-05, "loss": 0.4291, "step": 2969 }, { "epoch": 0.16631201702318288, "grad_norm": 1.4553874731063843, "learning_rate": 1.4840000000000002e-05, "loss": 0.5446, "step": 2970 }, { "epoch": 0.1663680143353119, "grad_norm": 1.3763076066970825, "learning_rate": 1.4845000000000001e-05, "loss": 0.4954, "step": 2971 }, { "epoch": 0.16642401164744092, "grad_norm": 1.634247899055481, "learning_rate": 1.485e-05, "loss": 0.5409, "step": 2972 }, { "epoch": 0.16648000895956994, "grad_norm": 1.0740395784378052, "learning_rate": 1.4855e-05, "loss": 0.4845, "step": 2973 }, { "epoch": 0.16653600627169896, "grad_norm": 1.0615090131759644, "learning_rate": 1.4860000000000002e-05, "loss": 0.431, "step": 2974 }, { "epoch": 0.16659200358382797, "grad_norm": 1.1746183633804321, "learning_rate": 1.4865e-05, "loss": 0.4175, "step": 2975 }, { "epoch": 0.166648000895957, "grad_norm": 1.1579493284225464, "learning_rate": 1.487e-05, "loss": 0.5676, "step": 2976 }, { "epoch": 0.166703998208086, "grad_norm": 1.1894301176071167, "learning_rate": 1.4875e-05, "loss": 0.3935, "step": 2977 }, { "epoch": 0.16675999552021503, "grad_norm": 1.4352726936340332, "learning_rate": 1.488e-05, "loss": 0.525, "step": 2978 }, { "epoch": 0.16681599283234405, "grad_norm": 1.2767298221588135, "learning_rate": 1.4885000000000002e-05, "loss": 0.4904, "step": 2979 }, { "epoch": 0.16687199014447307, "grad_norm": 1.2601221799850464, "learning_rate": 1.4890000000000001e-05, "loss": 0.4483, "step": 2980 }, { "epoch": 0.1669279874566021, "grad_norm": 1.524616003036499, "learning_rate": 1.4895e-05, "loss": 0.6168, "step": 2981 }, { "epoch": 0.1669839847687311, "grad_norm": 1.2325519323349, "learning_rate": 1.49e-05, "loss": 0.4345, "step": 2982 }, { "epoch": 0.16703998208086013, "grad_norm": 1.2944282293319702, "learning_rate": 1.4904999999999999e-05, "loss": 0.5398, "step": 2983 }, { "epoch": 0.16709597939298915, "grad_norm": 1.2927069664001465, "learning_rate": 1.4910000000000001e-05, "loss": 0.6528, "step": 2984 }, { "epoch": 0.16715197670511817, "grad_norm": 1.38869047164917, "learning_rate": 1.4915000000000002e-05, "loss": 0.4316, "step": 2985 }, { "epoch": 0.16720797401724719, "grad_norm": 1.2885438203811646, "learning_rate": 1.4920000000000001e-05, "loss": 0.4888, "step": 2986 }, { "epoch": 0.16726397132937618, "grad_norm": 1.0417410135269165, "learning_rate": 1.4925e-05, "loss": 0.3589, "step": 2987 }, { "epoch": 0.1673199686415052, "grad_norm": 1.3367822170257568, "learning_rate": 1.493e-05, "loss": 0.495, "step": 2988 }, { "epoch": 0.16737596595363421, "grad_norm": 1.2681429386138916, "learning_rate": 1.4935000000000002e-05, "loss": 0.411, "step": 2989 }, { "epoch": 0.16743196326576323, "grad_norm": 1.2044123411178589, "learning_rate": 1.4940000000000001e-05, "loss": 0.4475, "step": 2990 }, { "epoch": 0.16748796057789225, "grad_norm": 1.0016268491744995, "learning_rate": 1.4945e-05, "loss": 0.3603, "step": 2991 }, { "epoch": 0.16754395789002127, "grad_norm": 1.2403982877731323, "learning_rate": 1.4950000000000001e-05, "loss": 0.3746, "step": 2992 }, { "epoch": 0.1675999552021503, "grad_norm": 1.594496250152588, "learning_rate": 1.4955e-05, "loss": 0.625, "step": 2993 }, { "epoch": 0.1676559525142793, "grad_norm": 1.1329015493392944, "learning_rate": 1.4960000000000002e-05, "loss": 0.3695, "step": 2994 }, { "epoch": 0.16771194982640833, "grad_norm": 1.2726589441299438, "learning_rate": 1.4965000000000002e-05, "loss": 0.4215, "step": 2995 }, { "epoch": 0.16776794713853735, "grad_norm": 1.1244759559631348, "learning_rate": 1.497e-05, "loss": 0.4554, "step": 2996 }, { "epoch": 0.16782394445066637, "grad_norm": 1.028041958808899, "learning_rate": 1.4975e-05, "loss": 0.4082, "step": 2997 }, { "epoch": 0.1678799417627954, "grad_norm": 1.3320670127868652, "learning_rate": 1.4979999999999999e-05, "loss": 0.5336, "step": 2998 }, { "epoch": 0.1679359390749244, "grad_norm": 1.1810829639434814, "learning_rate": 1.4985000000000001e-05, "loss": 0.4698, "step": 2999 }, { "epoch": 0.16799193638705343, "grad_norm": 1.2294703722000122, "learning_rate": 1.499e-05, "loss": 0.5342, "step": 3000 }, { "epoch": 0.16804793369918244, "grad_norm": 13.666152954101562, "learning_rate": 1.4995000000000001e-05, "loss": 0.4374, "step": 3001 }, { "epoch": 0.16810393101131146, "grad_norm": 1.1388001441955566, "learning_rate": 1.5e-05, "loss": 0.4293, "step": 3002 }, { "epoch": 0.16815992832344048, "grad_norm": 1.140563726425171, "learning_rate": 1.5005e-05, "loss": 0.411, "step": 3003 }, { "epoch": 0.1682159256355695, "grad_norm": 1.1854054927825928, "learning_rate": 1.5010000000000002e-05, "loss": 0.3789, "step": 3004 }, { "epoch": 0.16827192294769852, "grad_norm": 1.3162811994552612, "learning_rate": 1.5015000000000001e-05, "loss": 0.5886, "step": 3005 }, { "epoch": 0.16832792025982754, "grad_norm": 1.1015150547027588, "learning_rate": 1.502e-05, "loss": 0.3127, "step": 3006 }, { "epoch": 0.16838391757195656, "grad_norm": 1.1969358921051025, "learning_rate": 1.5025000000000001e-05, "loss": 0.4865, "step": 3007 }, { "epoch": 0.16843991488408555, "grad_norm": 1.3611640930175781, "learning_rate": 1.503e-05, "loss": 0.5478, "step": 3008 }, { "epoch": 0.16849591219621457, "grad_norm": 1.1446624994277954, "learning_rate": 1.5035000000000003e-05, "loss": 0.4432, "step": 3009 }, { "epoch": 0.1685519095083436, "grad_norm": 1.3180516958236694, "learning_rate": 1.5040000000000002e-05, "loss": 0.4095, "step": 3010 }, { "epoch": 0.1686079068204726, "grad_norm": 1.0477768182754517, "learning_rate": 1.5045e-05, "loss": 0.4066, "step": 3011 }, { "epoch": 0.16866390413260163, "grad_norm": 1.051212191581726, "learning_rate": 1.505e-05, "loss": 0.3413, "step": 3012 }, { "epoch": 0.16871990144473065, "grad_norm": 1.187883734703064, "learning_rate": 1.5054999999999999e-05, "loss": 0.3827, "step": 3013 }, { "epoch": 0.16877589875685967, "grad_norm": 1.0885032415390015, "learning_rate": 1.5060000000000001e-05, "loss": 0.4144, "step": 3014 }, { "epoch": 0.16883189606898868, "grad_norm": 1.3088181018829346, "learning_rate": 1.5065e-05, "loss": 0.4413, "step": 3015 }, { "epoch": 0.1688878933811177, "grad_norm": 1.3181278705596924, "learning_rate": 1.5070000000000001e-05, "loss": 0.3683, "step": 3016 }, { "epoch": 0.16894389069324672, "grad_norm": 1.3163723945617676, "learning_rate": 1.5075e-05, "loss": 0.4511, "step": 3017 }, { "epoch": 0.16899988800537574, "grad_norm": 1.29971444606781, "learning_rate": 1.508e-05, "loss": 0.4471, "step": 3018 }, { "epoch": 0.16905588531750476, "grad_norm": 1.2885290384292603, "learning_rate": 1.5085000000000002e-05, "loss": 0.4663, "step": 3019 }, { "epoch": 0.16911188262963378, "grad_norm": 1.0854727029800415, "learning_rate": 1.5090000000000001e-05, "loss": 0.4764, "step": 3020 }, { "epoch": 0.1691678799417628, "grad_norm": 1.3393666744232178, "learning_rate": 1.5095e-05, "loss": 0.4239, "step": 3021 }, { "epoch": 0.16922387725389182, "grad_norm": 1.4022681713104248, "learning_rate": 1.51e-05, "loss": 0.5483, "step": 3022 }, { "epoch": 0.16927987456602084, "grad_norm": 1.2516348361968994, "learning_rate": 1.5105e-05, "loss": 0.4618, "step": 3023 }, { "epoch": 0.16933587187814986, "grad_norm": 1.2458224296569824, "learning_rate": 1.5110000000000003e-05, "loss": 0.4085, "step": 3024 }, { "epoch": 0.16939186919027888, "grad_norm": 1.6810840368270874, "learning_rate": 1.5115000000000002e-05, "loss": 0.3476, "step": 3025 }, { "epoch": 0.1694478665024079, "grad_norm": 1.6540249586105347, "learning_rate": 1.5120000000000001e-05, "loss": 0.4504, "step": 3026 }, { "epoch": 0.16950386381453691, "grad_norm": 1.420183777809143, "learning_rate": 1.5125e-05, "loss": 0.5645, "step": 3027 }, { "epoch": 0.16955986112666593, "grad_norm": 1.0648268461227417, "learning_rate": 1.5129999999999999e-05, "loss": 0.3991, "step": 3028 }, { "epoch": 0.16961585843879493, "grad_norm": 1.2405067682266235, "learning_rate": 1.5135000000000002e-05, "loss": 0.5124, "step": 3029 }, { "epoch": 0.16967185575092394, "grad_norm": 1.7114293575286865, "learning_rate": 1.514e-05, "loss": 0.5756, "step": 3030 }, { "epoch": 0.16972785306305296, "grad_norm": 1.1942071914672852, "learning_rate": 1.5145000000000002e-05, "loss": 0.4559, "step": 3031 }, { "epoch": 0.16978385037518198, "grad_norm": 1.2123992443084717, "learning_rate": 1.515e-05, "loss": 0.3711, "step": 3032 }, { "epoch": 0.169839847687311, "grad_norm": 1.1135144233703613, "learning_rate": 1.5155e-05, "loss": 0.5915, "step": 3033 }, { "epoch": 0.16989584499944002, "grad_norm": 1.4512019157409668, "learning_rate": 1.5160000000000002e-05, "loss": 0.4455, "step": 3034 }, { "epoch": 0.16995184231156904, "grad_norm": 2.1916909217834473, "learning_rate": 1.5165000000000001e-05, "loss": 0.4776, "step": 3035 }, { "epoch": 0.17000783962369806, "grad_norm": 1.0539445877075195, "learning_rate": 1.517e-05, "loss": 0.4514, "step": 3036 }, { "epoch": 0.17006383693582708, "grad_norm": 1.3222472667694092, "learning_rate": 1.5175e-05, "loss": 0.4183, "step": 3037 }, { "epoch": 0.1701198342479561, "grad_norm": 1.3580297231674194, "learning_rate": 1.518e-05, "loss": 0.5505, "step": 3038 }, { "epoch": 0.17017583156008512, "grad_norm": 1.196560025215149, "learning_rate": 1.5185000000000003e-05, "loss": 0.437, "step": 3039 }, { "epoch": 0.17023182887221414, "grad_norm": 1.0886143445968628, "learning_rate": 1.5190000000000002e-05, "loss": 0.332, "step": 3040 }, { "epoch": 0.17028782618434316, "grad_norm": 1.1416614055633545, "learning_rate": 1.5195000000000001e-05, "loss": 0.3313, "step": 3041 }, { "epoch": 0.17034382349647217, "grad_norm": 1.246839165687561, "learning_rate": 1.52e-05, "loss": 0.5246, "step": 3042 }, { "epoch": 0.1703998208086012, "grad_norm": 1.0934566259384155, "learning_rate": 1.5205e-05, "loss": 0.488, "step": 3043 }, { "epoch": 0.1704558181207302, "grad_norm": 1.2822872400283813, "learning_rate": 1.5210000000000002e-05, "loss": 0.4606, "step": 3044 }, { "epoch": 0.17051181543285923, "grad_norm": 1.2843599319458008, "learning_rate": 1.5215000000000001e-05, "loss": 0.4081, "step": 3045 }, { "epoch": 0.17056781274498825, "grad_norm": 1.3425087928771973, "learning_rate": 1.5220000000000002e-05, "loss": 0.4268, "step": 3046 }, { "epoch": 0.17062381005711727, "grad_norm": 1.0990785360336304, "learning_rate": 1.5225e-05, "loss": 0.3835, "step": 3047 }, { "epoch": 0.1706798073692463, "grad_norm": 1.4088256359100342, "learning_rate": 1.523e-05, "loss": 0.8543, "step": 3048 }, { "epoch": 0.17073580468137528, "grad_norm": 2.091273546218872, "learning_rate": 1.5235000000000002e-05, "loss": 0.4991, "step": 3049 }, { "epoch": 0.1707918019935043, "grad_norm": 1.6073461771011353, "learning_rate": 1.5240000000000001e-05, "loss": 0.3343, "step": 3050 }, { "epoch": 0.17084779930563332, "grad_norm": 1.0276155471801758, "learning_rate": 1.5245e-05, "loss": 0.3591, "step": 3051 }, { "epoch": 0.17090379661776234, "grad_norm": 1.3987497091293335, "learning_rate": 1.525e-05, "loss": 0.7108, "step": 3052 }, { "epoch": 0.17095979392989136, "grad_norm": 1.3775502443313599, "learning_rate": 1.5255e-05, "loss": 0.3814, "step": 3053 }, { "epoch": 0.17101579124202038, "grad_norm": 1.2856470346450806, "learning_rate": 1.5260000000000003e-05, "loss": 0.4444, "step": 3054 }, { "epoch": 0.1710717885541494, "grad_norm": 1.4564297199249268, "learning_rate": 1.5265e-05, "loss": 0.5069, "step": 3055 }, { "epoch": 0.17112778586627841, "grad_norm": 1.328133225440979, "learning_rate": 1.527e-05, "loss": 0.5168, "step": 3056 }, { "epoch": 0.17118378317840743, "grad_norm": 1.12852144241333, "learning_rate": 1.5275000000000002e-05, "loss": 0.4805, "step": 3057 }, { "epoch": 0.17123978049053645, "grad_norm": 1.114654541015625, "learning_rate": 1.528e-05, "loss": 0.4181, "step": 3058 }, { "epoch": 0.17129577780266547, "grad_norm": 1.1266412734985352, "learning_rate": 1.5285000000000004e-05, "loss": 0.3826, "step": 3059 }, { "epoch": 0.1713517751147945, "grad_norm": 1.3138937950134277, "learning_rate": 1.529e-05, "loss": 0.4112, "step": 3060 }, { "epoch": 0.1714077724269235, "grad_norm": 1.2326487302780151, "learning_rate": 1.5295000000000002e-05, "loss": 0.4183, "step": 3061 }, { "epoch": 0.17146376973905253, "grad_norm": 1.2712304592132568, "learning_rate": 1.53e-05, "loss": 0.5426, "step": 3062 }, { "epoch": 0.17151976705118155, "grad_norm": 2.414008617401123, "learning_rate": 1.5305e-05, "loss": 0.5064, "step": 3063 }, { "epoch": 0.17157576436331057, "grad_norm": 1.1649055480957031, "learning_rate": 1.531e-05, "loss": 0.4934, "step": 3064 }, { "epoch": 0.1716317616754396, "grad_norm": 1.3338874578475952, "learning_rate": 1.5315e-05, "loss": 0.3964, "step": 3065 }, { "epoch": 0.1716877589875686, "grad_norm": 1.398659348487854, "learning_rate": 1.5320000000000002e-05, "loss": 0.4128, "step": 3066 }, { "epoch": 0.17174375629969763, "grad_norm": 1.2956663370132446, "learning_rate": 1.5325e-05, "loss": 0.4504, "step": 3067 }, { "epoch": 0.17179975361182664, "grad_norm": 1.2123852968215942, "learning_rate": 1.533e-05, "loss": 0.3877, "step": 3068 }, { "epoch": 0.17185575092395566, "grad_norm": 1.2996089458465576, "learning_rate": 1.5334999999999998e-05, "loss": 0.3504, "step": 3069 }, { "epoch": 0.17191174823608465, "grad_norm": 1.2874212265014648, "learning_rate": 1.5340000000000002e-05, "loss": 0.4792, "step": 3070 }, { "epoch": 0.17196774554821367, "grad_norm": 1.4098880290985107, "learning_rate": 1.5345e-05, "loss": 0.5226, "step": 3071 }, { "epoch": 0.1720237428603427, "grad_norm": 1.5210481882095337, "learning_rate": 1.535e-05, "loss": 0.6552, "step": 3072 }, { "epoch": 0.1720797401724717, "grad_norm": 1.165636658668518, "learning_rate": 1.5355e-05, "loss": 0.3838, "step": 3073 }, { "epoch": 0.17213573748460073, "grad_norm": 1.2327104806900024, "learning_rate": 1.536e-05, "loss": 0.5711, "step": 3074 }, { "epoch": 0.17219173479672975, "grad_norm": 1.2012931108474731, "learning_rate": 1.5365000000000003e-05, "loss": 0.3197, "step": 3075 }, { "epoch": 0.17224773210885877, "grad_norm": 1.347421407699585, "learning_rate": 1.537e-05, "loss": 0.4665, "step": 3076 }, { "epoch": 0.1723037294209878, "grad_norm": 1.07391357421875, "learning_rate": 1.5375e-05, "loss": 0.3609, "step": 3077 }, { "epoch": 0.1723597267331168, "grad_norm": 1.2701846361160278, "learning_rate": 1.538e-05, "loss": 0.4394, "step": 3078 }, { "epoch": 0.17241572404524583, "grad_norm": 1.3560216426849365, "learning_rate": 1.5385e-05, "loss": 0.3959, "step": 3079 }, { "epoch": 0.17247172135737485, "grad_norm": 1.2126895189285278, "learning_rate": 1.539e-05, "loss": 0.4248, "step": 3080 }, { "epoch": 0.17252771866950387, "grad_norm": 1.192703127861023, "learning_rate": 1.5395e-05, "loss": 0.4845, "step": 3081 }, { "epoch": 0.17258371598163288, "grad_norm": 1.2717024087905884, "learning_rate": 1.54e-05, "loss": 0.3706, "step": 3082 }, { "epoch": 0.1726397132937619, "grad_norm": 1.38326096534729, "learning_rate": 1.5405e-05, "loss": 0.3618, "step": 3083 }, { "epoch": 0.17269571060589092, "grad_norm": 1.286408543586731, "learning_rate": 1.541e-05, "loss": 0.4486, "step": 3084 }, { "epoch": 0.17275170791801994, "grad_norm": 1.0604596138000488, "learning_rate": 1.5415e-05, "loss": 0.3375, "step": 3085 }, { "epoch": 0.17280770523014896, "grad_norm": 1.3629469871520996, "learning_rate": 1.542e-05, "loss": 0.4036, "step": 3086 }, { "epoch": 0.17286370254227798, "grad_norm": 1.4825447797775269, "learning_rate": 1.5425000000000002e-05, "loss": 0.4507, "step": 3087 }, { "epoch": 0.172919699854407, "grad_norm": 1.2464237213134766, "learning_rate": 1.543e-05, "loss": 0.4137, "step": 3088 }, { "epoch": 0.17297569716653602, "grad_norm": 1.3638365268707275, "learning_rate": 1.5435e-05, "loss": 0.3957, "step": 3089 }, { "epoch": 0.17303169447866504, "grad_norm": 1.085571050643921, "learning_rate": 1.544e-05, "loss": 0.4331, "step": 3090 }, { "epoch": 0.17308769179079403, "grad_norm": 1.0439016819000244, "learning_rate": 1.5445000000000002e-05, "loss": 0.4425, "step": 3091 }, { "epoch": 0.17314368910292305, "grad_norm": 1.42640221118927, "learning_rate": 1.545e-05, "loss": 0.5665, "step": 3092 }, { "epoch": 0.17319968641505207, "grad_norm": 1.3006322383880615, "learning_rate": 1.5455e-05, "loss": 0.5638, "step": 3093 }, { "epoch": 0.1732556837271811, "grad_norm": 1.4400304555892944, "learning_rate": 1.546e-05, "loss": 0.4891, "step": 3094 }, { "epoch": 0.1733116810393101, "grad_norm": 1.1101337671279907, "learning_rate": 1.5465000000000002e-05, "loss": 0.3336, "step": 3095 }, { "epoch": 0.17336767835143913, "grad_norm": 1.6164960861206055, "learning_rate": 1.5470000000000003e-05, "loss": 0.3791, "step": 3096 }, { "epoch": 0.17342367566356814, "grad_norm": 1.3061712980270386, "learning_rate": 1.5475e-05, "loss": 0.4327, "step": 3097 }, { "epoch": 0.17347967297569716, "grad_norm": 1.2732435464859009, "learning_rate": 1.548e-05, "loss": 0.4474, "step": 3098 }, { "epoch": 0.17353567028782618, "grad_norm": 1.1919748783111572, "learning_rate": 1.5484999999999998e-05, "loss": 0.3302, "step": 3099 }, { "epoch": 0.1735916675999552, "grad_norm": 1.0183470249176025, "learning_rate": 1.5490000000000002e-05, "loss": 0.3668, "step": 3100 }, { "epoch": 0.17364766491208422, "grad_norm": 1.2144869565963745, "learning_rate": 1.5495e-05, "loss": 0.3694, "step": 3101 }, { "epoch": 0.17370366222421324, "grad_norm": 1.2006837129592896, "learning_rate": 1.55e-05, "loss": 0.3806, "step": 3102 }, { "epoch": 0.17375965953634226, "grad_norm": 1.1483407020568848, "learning_rate": 1.5505e-05, "loss": 0.4532, "step": 3103 }, { "epoch": 0.17381565684847128, "grad_norm": 1.499038577079773, "learning_rate": 1.551e-05, "loss": 0.4937, "step": 3104 }, { "epoch": 0.1738716541606003, "grad_norm": 1.1571745872497559, "learning_rate": 1.5515000000000003e-05, "loss": 0.3937, "step": 3105 }, { "epoch": 0.17392765147272932, "grad_norm": 1.2323365211486816, "learning_rate": 1.552e-05, "loss": 0.407, "step": 3106 }, { "epoch": 0.17398364878485834, "grad_norm": 4.923003673553467, "learning_rate": 1.5525e-05, "loss": 0.3192, "step": 3107 }, { "epoch": 0.17403964609698735, "grad_norm": 1.1060408353805542, "learning_rate": 1.553e-05, "loss": 0.4702, "step": 3108 }, { "epoch": 0.17409564340911637, "grad_norm": 1.3280766010284424, "learning_rate": 1.5535e-05, "loss": 0.6155, "step": 3109 }, { "epoch": 0.1741516407212454, "grad_norm": 1.5248066186904907, "learning_rate": 1.554e-05, "loss": 0.5402, "step": 3110 }, { "epoch": 0.17420763803337438, "grad_norm": 1.3715664148330688, "learning_rate": 1.5545e-05, "loss": 0.4532, "step": 3111 }, { "epoch": 0.1742636353455034, "grad_norm": 1.1363284587860107, "learning_rate": 1.5550000000000002e-05, "loss": 0.5191, "step": 3112 }, { "epoch": 0.17431963265763242, "grad_norm": 1.1898376941680908, "learning_rate": 1.5555e-05, "loss": 0.3865, "step": 3113 }, { "epoch": 0.17437562996976144, "grad_norm": 1.2565094232559204, "learning_rate": 1.556e-05, "loss": 0.762, "step": 3114 }, { "epoch": 0.17443162728189046, "grad_norm": 1.0949702262878418, "learning_rate": 1.5565e-05, "loss": 0.4829, "step": 3115 }, { "epoch": 0.17448762459401948, "grad_norm": 1.1513077020645142, "learning_rate": 1.5570000000000002e-05, "loss": 0.5075, "step": 3116 }, { "epoch": 0.1745436219061485, "grad_norm": 1.1390165090560913, "learning_rate": 1.5575e-05, "loss": 0.4113, "step": 3117 }, { "epoch": 0.17459961921827752, "grad_norm": 1.1877434253692627, "learning_rate": 1.558e-05, "loss": 0.3997, "step": 3118 }, { "epoch": 0.17465561653040654, "grad_norm": 1.2542484998703003, "learning_rate": 1.5585e-05, "loss": 0.3045, "step": 3119 }, { "epoch": 0.17471161384253556, "grad_norm": 1.3228293657302856, "learning_rate": 1.559e-05, "loss": 0.4405, "step": 3120 }, { "epoch": 0.17476761115466458, "grad_norm": 1.401941180229187, "learning_rate": 1.5595000000000002e-05, "loss": 0.4046, "step": 3121 }, { "epoch": 0.1748236084667936, "grad_norm": 1.18621826171875, "learning_rate": 1.56e-05, "loss": 0.3908, "step": 3122 }, { "epoch": 0.17487960577892261, "grad_norm": 1.0453542470932007, "learning_rate": 1.5605e-05, "loss": 0.2901, "step": 3123 }, { "epoch": 0.17493560309105163, "grad_norm": 1.1331298351287842, "learning_rate": 1.561e-05, "loss": 0.3826, "step": 3124 }, { "epoch": 0.17499160040318065, "grad_norm": 1.3581092357635498, "learning_rate": 1.5615000000000002e-05, "loss": 0.585, "step": 3125 }, { "epoch": 0.17504759771530967, "grad_norm": 1.1543517112731934, "learning_rate": 1.5620000000000003e-05, "loss": 0.3554, "step": 3126 }, { "epoch": 0.1751035950274387, "grad_norm": 1.265600562095642, "learning_rate": 1.5625e-05, "loss": 0.4692, "step": 3127 }, { "epoch": 0.1751595923395677, "grad_norm": 1.3256622552871704, "learning_rate": 1.563e-05, "loss": 0.4647, "step": 3128 }, { "epoch": 0.17521558965169673, "grad_norm": 1.303101897239685, "learning_rate": 1.5635e-05, "loss": 0.359, "step": 3129 }, { "epoch": 0.17527158696382575, "grad_norm": 1.1615647077560425, "learning_rate": 1.5640000000000003e-05, "loss": 0.5086, "step": 3130 }, { "epoch": 0.17532758427595477, "grad_norm": 1.257786750793457, "learning_rate": 1.5645e-05, "loss": 0.4834, "step": 3131 }, { "epoch": 0.17538358158808376, "grad_norm": 1.3199831247329712, "learning_rate": 1.565e-05, "loss": 0.4673, "step": 3132 }, { "epoch": 0.17543957890021278, "grad_norm": 1.4349507093429565, "learning_rate": 1.5655000000000002e-05, "loss": 0.5471, "step": 3133 }, { "epoch": 0.1754955762123418, "grad_norm": 1.3560909032821655, "learning_rate": 1.566e-05, "loss": 0.4311, "step": 3134 }, { "epoch": 0.17555157352447082, "grad_norm": 1.2484694719314575, "learning_rate": 1.5665000000000003e-05, "loss": 0.4119, "step": 3135 }, { "epoch": 0.17560757083659984, "grad_norm": 1.0213234424591064, "learning_rate": 1.567e-05, "loss": 0.3423, "step": 3136 }, { "epoch": 0.17566356814872885, "grad_norm": 1.153052806854248, "learning_rate": 1.5675e-05, "loss": 0.4602, "step": 3137 }, { "epoch": 0.17571956546085787, "grad_norm": 1.1225149631500244, "learning_rate": 1.568e-05, "loss": 0.3676, "step": 3138 }, { "epoch": 0.1757755627729869, "grad_norm": 1.1485469341278076, "learning_rate": 1.5685e-05, "loss": 0.5003, "step": 3139 }, { "epoch": 0.1758315600851159, "grad_norm": 1.3653138875961304, "learning_rate": 1.569e-05, "loss": 0.497, "step": 3140 }, { "epoch": 0.17588755739724493, "grad_norm": 1.5089391469955444, "learning_rate": 1.5695e-05, "loss": 0.3692, "step": 3141 }, { "epoch": 0.17594355470937395, "grad_norm": 1.2435405254364014, "learning_rate": 1.5700000000000002e-05, "loss": 0.4499, "step": 3142 }, { "epoch": 0.17599955202150297, "grad_norm": 1.225533366203308, "learning_rate": 1.5705e-05, "loss": 0.4798, "step": 3143 }, { "epoch": 0.176055549333632, "grad_norm": 1.2281639575958252, "learning_rate": 1.571e-05, "loss": 0.4124, "step": 3144 }, { "epoch": 0.176111546645761, "grad_norm": 1.0934711694717407, "learning_rate": 1.5715e-05, "loss": 0.385, "step": 3145 }, { "epoch": 0.17616754395789003, "grad_norm": 1.3133177757263184, "learning_rate": 1.5720000000000002e-05, "loss": 0.6233, "step": 3146 }, { "epoch": 0.17622354127001905, "grad_norm": 0.9715774059295654, "learning_rate": 1.5725e-05, "loss": 0.3162, "step": 3147 }, { "epoch": 0.17627953858214807, "grad_norm": 1.1965703964233398, "learning_rate": 1.573e-05, "loss": 0.3621, "step": 3148 }, { "epoch": 0.17633553589427708, "grad_norm": 1.2315959930419922, "learning_rate": 1.5735e-05, "loss": 0.4043, "step": 3149 }, { "epoch": 0.1763915332064061, "grad_norm": 0.9554809927940369, "learning_rate": 1.5740000000000002e-05, "loss": 0.3764, "step": 3150 }, { "epoch": 0.17644753051853512, "grad_norm": 1.1647918224334717, "learning_rate": 1.5745000000000003e-05, "loss": 0.6171, "step": 3151 }, { "epoch": 0.17650352783066414, "grad_norm": 1.220215082168579, "learning_rate": 1.575e-05, "loss": 0.5622, "step": 3152 }, { "epoch": 0.17655952514279313, "grad_norm": 1.1583800315856934, "learning_rate": 1.5755e-05, "loss": 0.3858, "step": 3153 }, { "epoch": 0.17661552245492215, "grad_norm": 1.3775138854980469, "learning_rate": 1.5759999999999998e-05, "loss": 0.5959, "step": 3154 }, { "epoch": 0.17667151976705117, "grad_norm": 1.2710376977920532, "learning_rate": 1.5765000000000002e-05, "loss": 0.4621, "step": 3155 }, { "epoch": 0.1767275170791802, "grad_norm": 1.2439836263656616, "learning_rate": 1.577e-05, "loss": 0.4974, "step": 3156 }, { "epoch": 0.1767835143913092, "grad_norm": 1.2212399244308472, "learning_rate": 1.5775e-05, "loss": 0.5128, "step": 3157 }, { "epoch": 0.17683951170343823, "grad_norm": 1.3426244258880615, "learning_rate": 1.578e-05, "loss": 0.5533, "step": 3158 }, { "epoch": 0.17689550901556725, "grad_norm": 1.1366629600524902, "learning_rate": 1.5785e-05, "loss": 0.4811, "step": 3159 }, { "epoch": 0.17695150632769627, "grad_norm": 1.3456504344940186, "learning_rate": 1.5790000000000003e-05, "loss": 0.4341, "step": 3160 }, { "epoch": 0.1770075036398253, "grad_norm": 1.1240496635437012, "learning_rate": 1.5795e-05, "loss": 0.4233, "step": 3161 }, { "epoch": 0.1770635009519543, "grad_norm": 1.4094661474227905, "learning_rate": 1.58e-05, "loss": 0.5234, "step": 3162 }, { "epoch": 0.17711949826408332, "grad_norm": 1.1591876745224, "learning_rate": 1.5805000000000002e-05, "loss": 0.5352, "step": 3163 }, { "epoch": 0.17717549557621234, "grad_norm": 1.3503307104110718, "learning_rate": 1.581e-05, "loss": 0.5129, "step": 3164 }, { "epoch": 0.17723149288834136, "grad_norm": 1.1911805868148804, "learning_rate": 1.5815000000000004e-05, "loss": 0.4462, "step": 3165 }, { "epoch": 0.17728749020047038, "grad_norm": 1.2154439687728882, "learning_rate": 1.582e-05, "loss": 0.4113, "step": 3166 }, { "epoch": 0.1773434875125994, "grad_norm": 1.424020528793335, "learning_rate": 1.5825000000000002e-05, "loss": 0.4254, "step": 3167 }, { "epoch": 0.17739948482472842, "grad_norm": 1.2356969118118286, "learning_rate": 1.583e-05, "loss": 0.4238, "step": 3168 }, { "epoch": 0.17745548213685744, "grad_norm": 1.2835613489151, "learning_rate": 1.5835e-05, "loss": 0.4716, "step": 3169 }, { "epoch": 0.17751147944898646, "grad_norm": 1.5604387521743774, "learning_rate": 1.584e-05, "loss": 0.5455, "step": 3170 }, { "epoch": 0.17756747676111548, "grad_norm": 1.066110372543335, "learning_rate": 1.5845e-05, "loss": 0.4306, "step": 3171 }, { "epoch": 0.1776234740732445, "grad_norm": 1.3697227239608765, "learning_rate": 1.5850000000000002e-05, "loss": 0.4028, "step": 3172 }, { "epoch": 0.1776794713853735, "grad_norm": 1.1536370515823364, "learning_rate": 1.5855e-05, "loss": 0.5904, "step": 3173 }, { "epoch": 0.1777354686975025, "grad_norm": 1.0349966287612915, "learning_rate": 1.586e-05, "loss": 0.3471, "step": 3174 }, { "epoch": 0.17779146600963153, "grad_norm": 1.278933048248291, "learning_rate": 1.5865e-05, "loss": 0.4645, "step": 3175 }, { "epoch": 0.17784746332176055, "grad_norm": 1.2004108428955078, "learning_rate": 1.5870000000000002e-05, "loss": 0.5068, "step": 3176 }, { "epoch": 0.17790346063388957, "grad_norm": 1.3695133924484253, "learning_rate": 1.5875e-05, "loss": 0.5353, "step": 3177 }, { "epoch": 0.17795945794601858, "grad_norm": 1.1981608867645264, "learning_rate": 1.588e-05, "loss": 0.3492, "step": 3178 }, { "epoch": 0.1780154552581476, "grad_norm": 1.4457505941390991, "learning_rate": 1.5885e-05, "loss": 0.6995, "step": 3179 }, { "epoch": 0.17807145257027662, "grad_norm": 1.5259352922439575, "learning_rate": 1.5890000000000002e-05, "loss": 0.7483, "step": 3180 }, { "epoch": 0.17812744988240564, "grad_norm": 1.0876466035842896, "learning_rate": 1.5895000000000003e-05, "loss": 0.4789, "step": 3181 }, { "epoch": 0.17818344719453466, "grad_norm": 1.0859925746917725, "learning_rate": 1.59e-05, "loss": 0.4018, "step": 3182 }, { "epoch": 0.17823944450666368, "grad_norm": 1.1666775941848755, "learning_rate": 1.5905e-05, "loss": 0.4115, "step": 3183 }, { "epoch": 0.1782954418187927, "grad_norm": 1.1463675498962402, "learning_rate": 1.591e-05, "loss": 0.3401, "step": 3184 }, { "epoch": 0.17835143913092172, "grad_norm": 1.154799461364746, "learning_rate": 1.5915000000000003e-05, "loss": 0.3368, "step": 3185 }, { "epoch": 0.17840743644305074, "grad_norm": 1.1332404613494873, "learning_rate": 1.592e-05, "loss": 0.4372, "step": 3186 }, { "epoch": 0.17846343375517976, "grad_norm": 1.2234429121017456, "learning_rate": 1.5925e-05, "loss": 0.4924, "step": 3187 }, { "epoch": 0.17851943106730878, "grad_norm": 1.1957119703292847, "learning_rate": 1.593e-05, "loss": 0.4729, "step": 3188 }, { "epoch": 0.1785754283794378, "grad_norm": 1.2825809717178345, "learning_rate": 1.5935e-05, "loss": 0.4555, "step": 3189 }, { "epoch": 0.17863142569156681, "grad_norm": 3.089897871017456, "learning_rate": 1.594e-05, "loss": 0.4048, "step": 3190 }, { "epoch": 0.17868742300369583, "grad_norm": 1.099593997001648, "learning_rate": 1.5945e-05, "loss": 0.3809, "step": 3191 }, { "epoch": 0.17874342031582485, "grad_norm": 1.1275124549865723, "learning_rate": 1.595e-05, "loss": 0.3926, "step": 3192 }, { "epoch": 0.17879941762795387, "grad_norm": 1.413540005683899, "learning_rate": 1.5955e-05, "loss": 0.4899, "step": 3193 }, { "epoch": 0.17885541494008286, "grad_norm": 1.2551677227020264, "learning_rate": 1.596e-05, "loss": 0.4831, "step": 3194 }, { "epoch": 0.17891141225221188, "grad_norm": 1.3640986680984497, "learning_rate": 1.5965e-05, "loss": 0.6391, "step": 3195 }, { "epoch": 0.1789674095643409, "grad_norm": 1.2376562356948853, "learning_rate": 1.597e-05, "loss": 0.4542, "step": 3196 }, { "epoch": 0.17902340687646992, "grad_norm": 1.3746341466903687, "learning_rate": 1.5975000000000002e-05, "loss": 0.5066, "step": 3197 }, { "epoch": 0.17907940418859894, "grad_norm": 0.9634360074996948, "learning_rate": 1.598e-05, "loss": 0.3555, "step": 3198 }, { "epoch": 0.17913540150072796, "grad_norm": 1.433107852935791, "learning_rate": 1.5985e-05, "loss": 0.4511, "step": 3199 }, { "epoch": 0.17919139881285698, "grad_norm": 1.2713160514831543, "learning_rate": 1.599e-05, "loss": 0.3922, "step": 3200 }, { "epoch": 0.179247396124986, "grad_norm": 1.3845282793045044, "learning_rate": 1.5995000000000002e-05, "loss": 0.5921, "step": 3201 }, { "epoch": 0.17930339343711502, "grad_norm": 1.1872634887695312, "learning_rate": 1.6000000000000003e-05, "loss": 0.4316, "step": 3202 }, { "epoch": 0.17935939074924404, "grad_norm": 1.2111296653747559, "learning_rate": 1.6005e-05, "loss": 0.3921, "step": 3203 }, { "epoch": 0.17941538806137305, "grad_norm": 1.0769726037979126, "learning_rate": 1.601e-05, "loss": 0.3636, "step": 3204 }, { "epoch": 0.17947138537350207, "grad_norm": 1.1083983182907104, "learning_rate": 1.6014999999999998e-05, "loss": 0.3965, "step": 3205 }, { "epoch": 0.1795273826856311, "grad_norm": 1.2002310752868652, "learning_rate": 1.6020000000000002e-05, "loss": 0.369, "step": 3206 }, { "epoch": 0.1795833799977601, "grad_norm": 1.1439322233200073, "learning_rate": 1.6025e-05, "loss": 0.369, "step": 3207 }, { "epoch": 0.17963937730988913, "grad_norm": 1.3816206455230713, "learning_rate": 1.603e-05, "loss": 0.5202, "step": 3208 }, { "epoch": 0.17969537462201815, "grad_norm": 1.112925410270691, "learning_rate": 1.6035e-05, "loss": 0.3286, "step": 3209 }, { "epoch": 0.17975137193414717, "grad_norm": 1.315228819847107, "learning_rate": 1.604e-05, "loss": 0.4827, "step": 3210 }, { "epoch": 0.1798073692462762, "grad_norm": 2.721423387527466, "learning_rate": 1.6045000000000003e-05, "loss": 0.5205, "step": 3211 }, { "epoch": 0.1798633665584052, "grad_norm": 1.248304009437561, "learning_rate": 1.605e-05, "loss": 0.4123, "step": 3212 }, { "epoch": 0.17991936387053423, "grad_norm": 1.1161116361618042, "learning_rate": 1.6055e-05, "loss": 0.4418, "step": 3213 }, { "epoch": 0.17997536118266325, "grad_norm": 1.2112042903900146, "learning_rate": 1.606e-05, "loss": 0.5014, "step": 3214 }, { "epoch": 0.18003135849479224, "grad_norm": 1.0611644983291626, "learning_rate": 1.6065e-05, "loss": 0.3713, "step": 3215 }, { "epoch": 0.18008735580692126, "grad_norm": 1.357844352722168, "learning_rate": 1.607e-05, "loss": 0.5438, "step": 3216 }, { "epoch": 0.18014335311905028, "grad_norm": 0.9168925285339355, "learning_rate": 1.6075e-05, "loss": 0.3858, "step": 3217 }, { "epoch": 0.1801993504311793, "grad_norm": 1.3100770711898804, "learning_rate": 1.6080000000000002e-05, "loss": 0.5269, "step": 3218 }, { "epoch": 0.18025534774330831, "grad_norm": 1.1514203548431396, "learning_rate": 1.6085e-05, "loss": 0.4493, "step": 3219 }, { "epoch": 0.18031134505543733, "grad_norm": 1.176634669303894, "learning_rate": 1.609e-05, "loss": 0.4187, "step": 3220 }, { "epoch": 0.18036734236756635, "grad_norm": 1.0469194650650024, "learning_rate": 1.6095e-05, "loss": 0.43, "step": 3221 }, { "epoch": 0.18042333967969537, "grad_norm": 1.5243301391601562, "learning_rate": 1.6100000000000002e-05, "loss": 0.4707, "step": 3222 }, { "epoch": 0.1804793369918244, "grad_norm": 1.0046426057815552, "learning_rate": 1.6105e-05, "loss": 0.3883, "step": 3223 }, { "epoch": 0.1805353343039534, "grad_norm": 1.3109506368637085, "learning_rate": 1.611e-05, "loss": 0.5103, "step": 3224 }, { "epoch": 0.18059133161608243, "grad_norm": 1.446347951889038, "learning_rate": 1.6115e-05, "loss": 0.4054, "step": 3225 }, { "epoch": 0.18064732892821145, "grad_norm": 1.2103548049926758, "learning_rate": 1.612e-05, "loss": 0.5018, "step": 3226 }, { "epoch": 0.18070332624034047, "grad_norm": 1.17299222946167, "learning_rate": 1.6125000000000002e-05, "loss": 0.4382, "step": 3227 }, { "epoch": 0.1807593235524695, "grad_norm": 1.3501638174057007, "learning_rate": 1.613e-05, "loss": 0.5293, "step": 3228 }, { "epoch": 0.1808153208645985, "grad_norm": 1.369954228401184, "learning_rate": 1.6135e-05, "loss": 0.4372, "step": 3229 }, { "epoch": 0.18087131817672752, "grad_norm": 1.442979097366333, "learning_rate": 1.6139999999999998e-05, "loss": 0.3399, "step": 3230 }, { "epoch": 0.18092731548885654, "grad_norm": 1.3130812644958496, "learning_rate": 1.6145000000000002e-05, "loss": 0.3832, "step": 3231 }, { "epoch": 0.18098331280098556, "grad_norm": 1.3480647802352905, "learning_rate": 1.6150000000000003e-05, "loss": 0.4964, "step": 3232 }, { "epoch": 0.18103931011311458, "grad_norm": 1.1867642402648926, "learning_rate": 1.6155e-05, "loss": 0.3486, "step": 3233 }, { "epoch": 0.1810953074252436, "grad_norm": 1.3922346830368042, "learning_rate": 1.616e-05, "loss": 0.5325, "step": 3234 }, { "epoch": 0.1811513047373726, "grad_norm": 1.1357862949371338, "learning_rate": 1.6165e-05, "loss": 0.637, "step": 3235 }, { "epoch": 0.1812073020495016, "grad_norm": 1.2021393775939941, "learning_rate": 1.6170000000000003e-05, "loss": 0.4567, "step": 3236 }, { "epoch": 0.18126329936163063, "grad_norm": 1.1709843873977661, "learning_rate": 1.6175e-05, "loss": 0.4624, "step": 3237 }, { "epoch": 0.18131929667375965, "grad_norm": 1.1683293581008911, "learning_rate": 1.618e-05, "loss": 0.3118, "step": 3238 }, { "epoch": 0.18137529398588867, "grad_norm": 1.096571922302246, "learning_rate": 1.6185000000000002e-05, "loss": 0.3676, "step": 3239 }, { "epoch": 0.1814312912980177, "grad_norm": 1.2501559257507324, "learning_rate": 1.619e-05, "loss": 0.4443, "step": 3240 }, { "epoch": 0.1814872886101467, "grad_norm": 1.254988670349121, "learning_rate": 1.6195000000000003e-05, "loss": 0.5011, "step": 3241 }, { "epoch": 0.18154328592227573, "grad_norm": 1.2877148389816284, "learning_rate": 1.62e-05, "loss": 0.4825, "step": 3242 }, { "epoch": 0.18159928323440475, "grad_norm": 1.2468076944351196, "learning_rate": 1.6205e-05, "loss": 0.4051, "step": 3243 }, { "epoch": 0.18165528054653376, "grad_norm": 1.257940411567688, "learning_rate": 1.621e-05, "loss": 0.5026, "step": 3244 }, { "epoch": 0.18171127785866278, "grad_norm": 1.0413216352462769, "learning_rate": 1.6215e-05, "loss": 0.3557, "step": 3245 }, { "epoch": 0.1817672751707918, "grad_norm": 1.2333520650863647, "learning_rate": 1.622e-05, "loss": 0.4615, "step": 3246 }, { "epoch": 0.18182327248292082, "grad_norm": 1.1986799240112305, "learning_rate": 1.6225e-05, "loss": 0.4174, "step": 3247 }, { "epoch": 0.18187926979504984, "grad_norm": 1.9241962432861328, "learning_rate": 1.6230000000000002e-05, "loss": 0.4915, "step": 3248 }, { "epoch": 0.18193526710717886, "grad_norm": 1.2649354934692383, "learning_rate": 1.6235e-05, "loss": 0.654, "step": 3249 }, { "epoch": 0.18199126441930788, "grad_norm": 1.273526906967163, "learning_rate": 1.624e-05, "loss": 0.413, "step": 3250 }, { "epoch": 0.1820472617314369, "grad_norm": 1.438672661781311, "learning_rate": 1.6245e-05, "loss": 0.406, "step": 3251 }, { "epoch": 0.18210325904356592, "grad_norm": 1.3299928903579712, "learning_rate": 1.6250000000000002e-05, "loss": 0.4551, "step": 3252 }, { "epoch": 0.18215925635569494, "grad_norm": 1.1286715269088745, "learning_rate": 1.6255e-05, "loss": 0.437, "step": 3253 }, { "epoch": 0.18221525366782396, "grad_norm": 1.193117618560791, "learning_rate": 1.626e-05, "loss": 0.5152, "step": 3254 }, { "epoch": 0.18227125097995298, "grad_norm": 2.6552906036376953, "learning_rate": 1.6265e-05, "loss": 0.4532, "step": 3255 }, { "epoch": 0.18232724829208197, "grad_norm": 1.1225849390029907, "learning_rate": 1.6270000000000002e-05, "loss": 0.4489, "step": 3256 }, { "epoch": 0.182383245604211, "grad_norm": 1.1070002317428589, "learning_rate": 1.6275000000000003e-05, "loss": 0.2876, "step": 3257 }, { "epoch": 0.18243924291634, "grad_norm": 2.4725143909454346, "learning_rate": 1.628e-05, "loss": 0.3886, "step": 3258 }, { "epoch": 0.18249524022846902, "grad_norm": 1.3732391595840454, "learning_rate": 1.6285e-05, "loss": 0.5453, "step": 3259 }, { "epoch": 0.18255123754059804, "grad_norm": 1.3322089910507202, "learning_rate": 1.6289999999999998e-05, "loss": 0.4839, "step": 3260 }, { "epoch": 0.18260723485272706, "grad_norm": 1.0850098133087158, "learning_rate": 1.6295000000000002e-05, "loss": 0.4464, "step": 3261 }, { "epoch": 0.18266323216485608, "grad_norm": 1.1595327854156494, "learning_rate": 1.63e-05, "loss": 0.3385, "step": 3262 }, { "epoch": 0.1827192294769851, "grad_norm": 1.194762945175171, "learning_rate": 1.6305e-05, "loss": 0.4882, "step": 3263 }, { "epoch": 0.18277522678911412, "grad_norm": 1.1758840084075928, "learning_rate": 1.631e-05, "loss": 0.437, "step": 3264 }, { "epoch": 0.18283122410124314, "grad_norm": 1.1860482692718506, "learning_rate": 1.6315e-05, "loss": 0.6276, "step": 3265 }, { "epoch": 0.18288722141337216, "grad_norm": 1.4705352783203125, "learning_rate": 1.6320000000000003e-05, "loss": 0.4491, "step": 3266 }, { "epoch": 0.18294321872550118, "grad_norm": 1.955003023147583, "learning_rate": 1.6325e-05, "loss": 0.5621, "step": 3267 }, { "epoch": 0.1829992160376302, "grad_norm": 3.5246810913085938, "learning_rate": 1.633e-05, "loss": 0.4047, "step": 3268 }, { "epoch": 0.18305521334975922, "grad_norm": 1.4596385955810547, "learning_rate": 1.6335e-05, "loss": 0.4728, "step": 3269 }, { "epoch": 0.18311121066188824, "grad_norm": 1.2304860353469849, "learning_rate": 1.634e-05, "loss": 0.4617, "step": 3270 }, { "epoch": 0.18316720797401725, "grad_norm": 1.2931616306304932, "learning_rate": 1.6345000000000004e-05, "loss": 0.4159, "step": 3271 }, { "epoch": 0.18322320528614627, "grad_norm": 1.710740566253662, "learning_rate": 1.635e-05, "loss": 0.3767, "step": 3272 }, { "epoch": 0.1832792025982753, "grad_norm": 1.0282319784164429, "learning_rate": 1.6355000000000002e-05, "loss": 0.3537, "step": 3273 }, { "epoch": 0.1833351999104043, "grad_norm": 1.1052045822143555, "learning_rate": 1.636e-05, "loss": 0.5242, "step": 3274 }, { "epoch": 0.18339119722253333, "grad_norm": 1.0453317165374756, "learning_rate": 1.6365e-05, "loss": 0.4622, "step": 3275 }, { "epoch": 0.18344719453466235, "grad_norm": 1.3618289232254028, "learning_rate": 1.637e-05, "loss": 0.3347, "step": 3276 }, { "epoch": 0.18350319184679134, "grad_norm": 1.2078756093978882, "learning_rate": 1.6375e-05, "loss": 0.3666, "step": 3277 }, { "epoch": 0.18355918915892036, "grad_norm": 1.0806200504302979, "learning_rate": 1.6380000000000002e-05, "loss": 0.3669, "step": 3278 }, { "epoch": 0.18361518647104938, "grad_norm": 1.468646764755249, "learning_rate": 1.6385e-05, "loss": 0.4764, "step": 3279 }, { "epoch": 0.1836711837831784, "grad_norm": 1.1237543821334839, "learning_rate": 1.639e-05, "loss": 0.4137, "step": 3280 }, { "epoch": 0.18372718109530742, "grad_norm": 1.1732734441757202, "learning_rate": 1.6395e-05, "loss": 0.4237, "step": 3281 }, { "epoch": 0.18378317840743644, "grad_norm": 1.1597936153411865, "learning_rate": 1.6400000000000002e-05, "loss": 0.3797, "step": 3282 }, { "epoch": 0.18383917571956546, "grad_norm": 2.0686988830566406, "learning_rate": 1.6405e-05, "loss": 0.7229, "step": 3283 }, { "epoch": 0.18389517303169448, "grad_norm": 1.333763599395752, "learning_rate": 1.641e-05, "loss": 0.4357, "step": 3284 }, { "epoch": 0.1839511703438235, "grad_norm": 1.0924245119094849, "learning_rate": 1.6415e-05, "loss": 0.3907, "step": 3285 }, { "epoch": 0.1840071676559525, "grad_norm": 1.6450637578964233, "learning_rate": 1.6420000000000002e-05, "loss": 0.4214, "step": 3286 }, { "epoch": 0.18406316496808153, "grad_norm": 1.07072114944458, "learning_rate": 1.6425000000000003e-05, "loss": 0.4035, "step": 3287 }, { "epoch": 0.18411916228021055, "grad_norm": 1.138118028640747, "learning_rate": 1.643e-05, "loss": 0.3068, "step": 3288 }, { "epoch": 0.18417515959233957, "grad_norm": 1.1400213241577148, "learning_rate": 1.6435e-05, "loss": 0.4134, "step": 3289 }, { "epoch": 0.1842311569044686, "grad_norm": 1.229209303855896, "learning_rate": 1.644e-05, "loss": 0.6559, "step": 3290 }, { "epoch": 0.1842871542165976, "grad_norm": 1.224164605140686, "learning_rate": 1.6445000000000003e-05, "loss": 0.4453, "step": 3291 }, { "epoch": 0.18434315152872663, "grad_norm": 1.1119213104248047, "learning_rate": 1.645e-05, "loss": 0.3873, "step": 3292 }, { "epoch": 0.18439914884085565, "grad_norm": 1.228772759437561, "learning_rate": 1.6455e-05, "loss": 0.5373, "step": 3293 }, { "epoch": 0.18445514615298467, "grad_norm": 1.1952736377716064, "learning_rate": 1.646e-05, "loss": 0.4511, "step": 3294 }, { "epoch": 0.1845111434651137, "grad_norm": 1.3519757986068726, "learning_rate": 1.6465e-05, "loss": 0.5203, "step": 3295 }, { "epoch": 0.1845671407772427, "grad_norm": 1.3381385803222656, "learning_rate": 1.6470000000000003e-05, "loss": 0.4271, "step": 3296 }, { "epoch": 0.1846231380893717, "grad_norm": 1.0474534034729004, "learning_rate": 1.6475e-05, "loss": 0.4076, "step": 3297 }, { "epoch": 0.18467913540150072, "grad_norm": 1.3297818899154663, "learning_rate": 1.648e-05, "loss": 0.4738, "step": 3298 }, { "epoch": 0.18473513271362973, "grad_norm": 1.1280972957611084, "learning_rate": 1.6485e-05, "loss": 0.4053, "step": 3299 }, { "epoch": 0.18479113002575875, "grad_norm": 1.257645606994629, "learning_rate": 1.649e-05, "loss": 0.4157, "step": 3300 }, { "epoch": 0.18484712733788777, "grad_norm": 1.3946285247802734, "learning_rate": 1.6495e-05, "loss": 0.4413, "step": 3301 }, { "epoch": 0.1849031246500168, "grad_norm": 1.6009902954101562, "learning_rate": 1.65e-05, "loss": 0.6021, "step": 3302 }, { "epoch": 0.1849591219621458, "grad_norm": 1.2088947296142578, "learning_rate": 1.6505000000000002e-05, "loss": 0.4489, "step": 3303 }, { "epoch": 0.18501511927427483, "grad_norm": 1.2706345319747925, "learning_rate": 1.651e-05, "loss": 0.5796, "step": 3304 }, { "epoch": 0.18507111658640385, "grad_norm": 1.1603660583496094, "learning_rate": 1.6515e-05, "loss": 0.4107, "step": 3305 }, { "epoch": 0.18512711389853287, "grad_norm": 1.2458375692367554, "learning_rate": 1.652e-05, "loss": 0.4275, "step": 3306 }, { "epoch": 0.1851831112106619, "grad_norm": 1.3546783924102783, "learning_rate": 1.6525000000000002e-05, "loss": 0.4559, "step": 3307 }, { "epoch": 0.1852391085227909, "grad_norm": 1.1928722858428955, "learning_rate": 1.6530000000000003e-05, "loss": 0.455, "step": 3308 }, { "epoch": 0.18529510583491993, "grad_norm": 0.9799600839614868, "learning_rate": 1.6535e-05, "loss": 0.3385, "step": 3309 }, { "epoch": 0.18535110314704895, "grad_norm": 1.222354769706726, "learning_rate": 1.654e-05, "loss": 0.4966, "step": 3310 }, { "epoch": 0.18540710045917796, "grad_norm": 1.098609209060669, "learning_rate": 1.6545e-05, "loss": 0.3555, "step": 3311 }, { "epoch": 0.18546309777130698, "grad_norm": 1.2803016901016235, "learning_rate": 1.6550000000000002e-05, "loss": 0.4859, "step": 3312 }, { "epoch": 0.185519095083436, "grad_norm": 1.0965261459350586, "learning_rate": 1.6555e-05, "loss": 0.498, "step": 3313 }, { "epoch": 0.18557509239556502, "grad_norm": 1.2349803447723389, "learning_rate": 1.656e-05, "loss": 0.3836, "step": 3314 }, { "epoch": 0.18563108970769404, "grad_norm": 1.229987621307373, "learning_rate": 1.6565e-05, "loss": 0.4132, "step": 3315 }, { "epoch": 0.18568708701982306, "grad_norm": 1.1460660696029663, "learning_rate": 1.657e-05, "loss": 0.4971, "step": 3316 }, { "epoch": 0.18574308433195208, "grad_norm": 1.3114126920700073, "learning_rate": 1.6575000000000003e-05, "loss": 0.6052, "step": 3317 }, { "epoch": 0.18579908164408107, "grad_norm": 1.1734764575958252, "learning_rate": 1.658e-05, "loss": 0.4576, "step": 3318 }, { "epoch": 0.1858550789562101, "grad_norm": 1.2834408283233643, "learning_rate": 1.6585e-05, "loss": 0.4598, "step": 3319 }, { "epoch": 0.1859110762683391, "grad_norm": 1.509110689163208, "learning_rate": 1.659e-05, "loss": 0.4597, "step": 3320 }, { "epoch": 0.18596707358046813, "grad_norm": 1.226487398147583, "learning_rate": 1.6595e-05, "loss": 0.3691, "step": 3321 }, { "epoch": 0.18602307089259715, "grad_norm": 1.135722041130066, "learning_rate": 1.66e-05, "loss": 0.5141, "step": 3322 }, { "epoch": 0.18607906820472617, "grad_norm": 1.3855825662612915, "learning_rate": 1.6605e-05, "loss": 0.4158, "step": 3323 }, { "epoch": 0.18613506551685519, "grad_norm": 1.1391615867614746, "learning_rate": 1.6610000000000002e-05, "loss": 0.4244, "step": 3324 }, { "epoch": 0.1861910628289842, "grad_norm": 1.1902307271957397, "learning_rate": 1.6615e-05, "loss": 0.4648, "step": 3325 }, { "epoch": 0.18624706014111322, "grad_norm": 1.1830710172653198, "learning_rate": 1.662e-05, "loss": 0.4394, "step": 3326 }, { "epoch": 0.18630305745324224, "grad_norm": 1.219680905342102, "learning_rate": 1.6625e-05, "loss": 0.3814, "step": 3327 }, { "epoch": 0.18635905476537126, "grad_norm": 1.0361963510513306, "learning_rate": 1.6630000000000002e-05, "loss": 0.3575, "step": 3328 }, { "epoch": 0.18641505207750028, "grad_norm": 1.4810428619384766, "learning_rate": 1.6635e-05, "loss": 0.5595, "step": 3329 }, { "epoch": 0.1864710493896293, "grad_norm": 1.082762598991394, "learning_rate": 1.664e-05, "loss": 0.5685, "step": 3330 }, { "epoch": 0.18652704670175832, "grad_norm": 1.3045333623886108, "learning_rate": 1.6645e-05, "loss": 0.6185, "step": 3331 }, { "epoch": 0.18658304401388734, "grad_norm": 1.1538302898406982, "learning_rate": 1.665e-05, "loss": 0.4312, "step": 3332 }, { "epoch": 0.18663904132601636, "grad_norm": 1.336602807044983, "learning_rate": 1.6655000000000002e-05, "loss": 0.3832, "step": 3333 }, { "epoch": 0.18669503863814538, "grad_norm": 1.2260713577270508, "learning_rate": 1.666e-05, "loss": 0.3917, "step": 3334 }, { "epoch": 0.1867510359502744, "grad_norm": 1.078220248222351, "learning_rate": 1.6665e-05, "loss": 0.4161, "step": 3335 }, { "epoch": 0.18680703326240342, "grad_norm": 1.0990285873413086, "learning_rate": 1.6669999999999998e-05, "loss": 0.4115, "step": 3336 }, { "epoch": 0.18686303057453243, "grad_norm": 1.2535604238510132, "learning_rate": 1.6675000000000002e-05, "loss": 0.4842, "step": 3337 }, { "epoch": 0.18691902788666145, "grad_norm": 1.0767483711242676, "learning_rate": 1.668e-05, "loss": 0.3885, "step": 3338 }, { "epoch": 0.18697502519879045, "grad_norm": 1.1079645156860352, "learning_rate": 1.6685e-05, "loss": 0.4516, "step": 3339 }, { "epoch": 0.18703102251091946, "grad_norm": 1.3449302911758423, "learning_rate": 1.669e-05, "loss": 0.4821, "step": 3340 }, { "epoch": 0.18708701982304848, "grad_norm": 1.2095441818237305, "learning_rate": 1.6695e-05, "loss": 0.3834, "step": 3341 }, { "epoch": 0.1871430171351775, "grad_norm": 1.2906239032745361, "learning_rate": 1.6700000000000003e-05, "loss": 0.7066, "step": 3342 }, { "epoch": 0.18719901444730652, "grad_norm": 1.172589659690857, "learning_rate": 1.6705e-05, "loss": 0.4537, "step": 3343 }, { "epoch": 0.18725501175943554, "grad_norm": 1.1754515171051025, "learning_rate": 1.671e-05, "loss": 0.4595, "step": 3344 }, { "epoch": 0.18731100907156456, "grad_norm": 1.052424669265747, "learning_rate": 1.6715000000000002e-05, "loss": 0.3947, "step": 3345 }, { "epoch": 0.18736700638369358, "grad_norm": 1.1512207984924316, "learning_rate": 1.672e-05, "loss": 0.3732, "step": 3346 }, { "epoch": 0.1874230036958226, "grad_norm": 1.1828744411468506, "learning_rate": 1.6725000000000003e-05, "loss": 0.2665, "step": 3347 }, { "epoch": 0.18747900100795162, "grad_norm": 1.670555830001831, "learning_rate": 1.673e-05, "loss": 0.5061, "step": 3348 }, { "epoch": 0.18753499832008064, "grad_norm": 1.185208797454834, "learning_rate": 1.6735e-05, "loss": 0.3772, "step": 3349 }, { "epoch": 0.18759099563220966, "grad_norm": 1.3466157913208008, "learning_rate": 1.674e-05, "loss": 0.5246, "step": 3350 }, { "epoch": 0.18764699294433868, "grad_norm": 1.374879240989685, "learning_rate": 1.6745e-05, "loss": 0.5011, "step": 3351 }, { "epoch": 0.1877029902564677, "grad_norm": 1.1384507417678833, "learning_rate": 1.675e-05, "loss": 0.3486, "step": 3352 }, { "epoch": 0.1877589875685967, "grad_norm": 1.1417534351348877, "learning_rate": 1.6755e-05, "loss": 0.3765, "step": 3353 }, { "epoch": 0.18781498488072573, "grad_norm": 1.1946250200271606, "learning_rate": 1.6760000000000002e-05, "loss": 0.3828, "step": 3354 }, { "epoch": 0.18787098219285475, "grad_norm": 1.1479038000106812, "learning_rate": 1.6765e-05, "loss": 0.4193, "step": 3355 }, { "epoch": 0.18792697950498377, "grad_norm": 1.2469680309295654, "learning_rate": 1.677e-05, "loss": 0.3967, "step": 3356 }, { "epoch": 0.1879829768171128, "grad_norm": 1.0604054927825928, "learning_rate": 1.6775e-05, "loss": 0.3472, "step": 3357 }, { "epoch": 0.1880389741292418, "grad_norm": 1.4340620040893555, "learning_rate": 1.6780000000000002e-05, "loss": 0.4665, "step": 3358 }, { "epoch": 0.1880949714413708, "grad_norm": 1.5655089616775513, "learning_rate": 1.6785e-05, "loss": 0.4712, "step": 3359 }, { "epoch": 0.18815096875349982, "grad_norm": 1.0947039127349854, "learning_rate": 1.679e-05, "loss": 0.4547, "step": 3360 }, { "epoch": 0.18820696606562884, "grad_norm": 1.078403353691101, "learning_rate": 1.6795e-05, "loss": 0.4219, "step": 3361 }, { "epoch": 0.18826296337775786, "grad_norm": 1.148415446281433, "learning_rate": 1.6800000000000002e-05, "loss": 0.4868, "step": 3362 }, { "epoch": 0.18831896068988688, "grad_norm": 1.633833646774292, "learning_rate": 1.6805000000000003e-05, "loss": 0.5315, "step": 3363 }, { "epoch": 0.1883749580020159, "grad_norm": 1.1244282722473145, "learning_rate": 1.681e-05, "loss": 0.3964, "step": 3364 }, { "epoch": 0.18843095531414492, "grad_norm": 1.420272946357727, "learning_rate": 1.6815e-05, "loss": 0.4208, "step": 3365 }, { "epoch": 0.18848695262627393, "grad_norm": 1.3265632390975952, "learning_rate": 1.6819999999999998e-05, "loss": 0.3931, "step": 3366 }, { "epoch": 0.18854294993840295, "grad_norm": 1.3832519054412842, "learning_rate": 1.6825000000000002e-05, "loss": 0.4967, "step": 3367 }, { "epoch": 0.18859894725053197, "grad_norm": 1.217711329460144, "learning_rate": 1.683e-05, "loss": 0.4278, "step": 3368 }, { "epoch": 0.188654944562661, "grad_norm": 1.2732466459274292, "learning_rate": 1.6835e-05, "loss": 0.5072, "step": 3369 }, { "epoch": 0.18871094187479, "grad_norm": 1.0388083457946777, "learning_rate": 1.684e-05, "loss": 0.438, "step": 3370 }, { "epoch": 0.18876693918691903, "grad_norm": 1.2363613843917847, "learning_rate": 1.6845e-05, "loss": 0.4776, "step": 3371 }, { "epoch": 0.18882293649904805, "grad_norm": 1.2334297895431519, "learning_rate": 1.6850000000000003e-05, "loss": 0.4249, "step": 3372 }, { "epoch": 0.18887893381117707, "grad_norm": 1.1314977407455444, "learning_rate": 1.6855e-05, "loss": 0.3967, "step": 3373 }, { "epoch": 0.1889349311233061, "grad_norm": 1.0096185207366943, "learning_rate": 1.686e-05, "loss": 0.3685, "step": 3374 }, { "epoch": 0.1889909284354351, "grad_norm": 1.3609317541122437, "learning_rate": 1.6865e-05, "loss": 0.4632, "step": 3375 }, { "epoch": 0.18904692574756413, "grad_norm": 1.0902884006500244, "learning_rate": 1.687e-05, "loss": 0.3116, "step": 3376 }, { "epoch": 0.18910292305969315, "grad_norm": 1.6312155723571777, "learning_rate": 1.6875000000000004e-05, "loss": 0.4591, "step": 3377 }, { "epoch": 0.18915892037182216, "grad_norm": 1.2797203063964844, "learning_rate": 1.688e-05, "loss": 0.3855, "step": 3378 }, { "epoch": 0.18921491768395118, "grad_norm": 1.2569507360458374, "learning_rate": 1.6885000000000002e-05, "loss": 0.4277, "step": 3379 }, { "epoch": 0.18927091499608018, "grad_norm": 1.1022034883499146, "learning_rate": 1.689e-05, "loss": 0.3475, "step": 3380 }, { "epoch": 0.1893269123082092, "grad_norm": 1.084113359451294, "learning_rate": 1.6895e-05, "loss": 0.415, "step": 3381 }, { "epoch": 0.1893829096203382, "grad_norm": 1.3184095621109009, "learning_rate": 1.69e-05, "loss": 0.3926, "step": 3382 }, { "epoch": 0.18943890693246723, "grad_norm": 1.2320349216461182, "learning_rate": 1.6905e-05, "loss": 0.4394, "step": 3383 }, { "epoch": 0.18949490424459625, "grad_norm": 2.3696770668029785, "learning_rate": 1.6910000000000002e-05, "loss": 0.3514, "step": 3384 }, { "epoch": 0.18955090155672527, "grad_norm": 1.1212445497512817, "learning_rate": 1.6915e-05, "loss": 0.6016, "step": 3385 }, { "epoch": 0.1896068988688543, "grad_norm": 1.0768115520477295, "learning_rate": 1.692e-05, "loss": 0.4472, "step": 3386 }, { "epoch": 0.1896628961809833, "grad_norm": 1.5229803323745728, "learning_rate": 1.6925e-05, "loss": 0.4502, "step": 3387 }, { "epoch": 0.18971889349311233, "grad_norm": 1.1422045230865479, "learning_rate": 1.6930000000000002e-05, "loss": 0.2921, "step": 3388 }, { "epoch": 0.18977489080524135, "grad_norm": 1.3426090478897095, "learning_rate": 1.6935e-05, "loss": 0.4891, "step": 3389 }, { "epoch": 0.18983088811737037, "grad_norm": 1.250209093093872, "learning_rate": 1.694e-05, "loss": 0.446, "step": 3390 }, { "epoch": 0.18988688542949939, "grad_norm": 1.2611923217773438, "learning_rate": 1.6945e-05, "loss": 0.4895, "step": 3391 }, { "epoch": 0.1899428827416284, "grad_norm": 1.2335516214370728, "learning_rate": 1.6950000000000002e-05, "loss": 0.4183, "step": 3392 }, { "epoch": 0.18999888005375742, "grad_norm": 1.0964800119400024, "learning_rate": 1.6955000000000003e-05, "loss": 0.4134, "step": 3393 }, { "epoch": 0.19005487736588644, "grad_norm": 1.8114066123962402, "learning_rate": 1.696e-05, "loss": 0.5505, "step": 3394 }, { "epoch": 0.19011087467801546, "grad_norm": 1.3944091796875, "learning_rate": 1.6965e-05, "loss": 0.6571, "step": 3395 }, { "epoch": 0.19016687199014448, "grad_norm": 1.537448763847351, "learning_rate": 1.697e-05, "loss": 0.486, "step": 3396 }, { "epoch": 0.1902228693022735, "grad_norm": 1.3557907342910767, "learning_rate": 1.6975000000000003e-05, "loss": 0.4855, "step": 3397 }, { "epoch": 0.19027886661440252, "grad_norm": 1.2067703008651733, "learning_rate": 1.698e-05, "loss": 0.5486, "step": 3398 }, { "epoch": 0.19033486392653154, "grad_norm": 1.1005481481552124, "learning_rate": 1.6985e-05, "loss": 0.4072, "step": 3399 }, { "epoch": 0.19039086123866056, "grad_norm": 1.3335630893707275, "learning_rate": 1.699e-05, "loss": 0.4883, "step": 3400 }, { "epoch": 0.19044685855078955, "grad_norm": 1.3925936222076416, "learning_rate": 1.6995e-05, "loss": 0.4764, "step": 3401 }, { "epoch": 0.19050285586291857, "grad_norm": 2.9519593715667725, "learning_rate": 1.7000000000000003e-05, "loss": 0.5681, "step": 3402 }, { "epoch": 0.1905588531750476, "grad_norm": 1.0862815380096436, "learning_rate": 1.7005e-05, "loss": 0.3684, "step": 3403 }, { "epoch": 0.1906148504871766, "grad_norm": 1.3254729509353638, "learning_rate": 1.701e-05, "loss": 0.4242, "step": 3404 }, { "epoch": 0.19067084779930563, "grad_norm": 1.2939419746398926, "learning_rate": 1.7015e-05, "loss": 0.6702, "step": 3405 }, { "epoch": 0.19072684511143465, "grad_norm": 1.1743439435958862, "learning_rate": 1.702e-05, "loss": 0.4808, "step": 3406 }, { "epoch": 0.19078284242356366, "grad_norm": 1.2036492824554443, "learning_rate": 1.7025e-05, "loss": 0.3705, "step": 3407 }, { "epoch": 0.19083883973569268, "grad_norm": 1.1415599584579468, "learning_rate": 1.703e-05, "loss": 0.5025, "step": 3408 }, { "epoch": 0.1908948370478217, "grad_norm": 1.1346678733825684, "learning_rate": 1.7035000000000002e-05, "loss": 0.5311, "step": 3409 }, { "epoch": 0.19095083435995072, "grad_norm": 1.3246341943740845, "learning_rate": 1.704e-05, "loss": 0.5128, "step": 3410 }, { "epoch": 0.19100683167207974, "grad_norm": 1.2443675994873047, "learning_rate": 1.7045e-05, "loss": 0.6092, "step": 3411 }, { "epoch": 0.19106282898420876, "grad_norm": 1.1285314559936523, "learning_rate": 1.705e-05, "loss": 0.3824, "step": 3412 }, { "epoch": 0.19111882629633778, "grad_norm": 1.1717637777328491, "learning_rate": 1.7055000000000002e-05, "loss": 0.446, "step": 3413 }, { "epoch": 0.1911748236084668, "grad_norm": 1.0986918210983276, "learning_rate": 1.706e-05, "loss": 0.4009, "step": 3414 }, { "epoch": 0.19123082092059582, "grad_norm": 1.2700912952423096, "learning_rate": 1.7065e-05, "loss": 0.4518, "step": 3415 }, { "epoch": 0.19128681823272484, "grad_norm": 1.0021355152130127, "learning_rate": 1.707e-05, "loss": 0.3094, "step": 3416 }, { "epoch": 0.19134281554485386, "grad_norm": 1.3428599834442139, "learning_rate": 1.7075e-05, "loss": 0.3574, "step": 3417 }, { "epoch": 0.19139881285698288, "grad_norm": 1.2496545314788818, "learning_rate": 1.7080000000000002e-05, "loss": 0.4016, "step": 3418 }, { "epoch": 0.1914548101691119, "grad_norm": 1.1841208934783936, "learning_rate": 1.7085e-05, "loss": 0.4462, "step": 3419 }, { "epoch": 0.1915108074812409, "grad_norm": 1.4078991413116455, "learning_rate": 1.709e-05, "loss": 0.4315, "step": 3420 }, { "epoch": 0.1915668047933699, "grad_norm": 1.3215162754058838, "learning_rate": 1.7095e-05, "loss": 0.4588, "step": 3421 }, { "epoch": 0.19162280210549892, "grad_norm": 1.2057838439941406, "learning_rate": 1.7100000000000002e-05, "loss": 0.4078, "step": 3422 }, { "epoch": 0.19167879941762794, "grad_norm": 1.2553930282592773, "learning_rate": 1.7105000000000003e-05, "loss": 0.3858, "step": 3423 }, { "epoch": 0.19173479672975696, "grad_norm": 1.4046380519866943, "learning_rate": 1.711e-05, "loss": 0.6379, "step": 3424 }, { "epoch": 0.19179079404188598, "grad_norm": 1.4144824743270874, "learning_rate": 1.7115e-05, "loss": 0.4791, "step": 3425 }, { "epoch": 0.191846791354015, "grad_norm": 1.804913878440857, "learning_rate": 1.712e-05, "loss": 0.5213, "step": 3426 }, { "epoch": 0.19190278866614402, "grad_norm": 1.282642126083374, "learning_rate": 1.7125000000000003e-05, "loss": 0.5367, "step": 3427 }, { "epoch": 0.19195878597827304, "grad_norm": 1.3015738725662231, "learning_rate": 1.713e-05, "loss": 0.4267, "step": 3428 }, { "epoch": 0.19201478329040206, "grad_norm": 1.5885628461837769, "learning_rate": 1.7135e-05, "loss": 0.5507, "step": 3429 }, { "epoch": 0.19207078060253108, "grad_norm": 1.3165570497512817, "learning_rate": 1.7140000000000002e-05, "loss": 0.4275, "step": 3430 }, { "epoch": 0.1921267779146601, "grad_norm": 1.299818992614746, "learning_rate": 1.7145e-05, "loss": 0.5218, "step": 3431 }, { "epoch": 0.19218277522678912, "grad_norm": 1.2481540441513062, "learning_rate": 1.7150000000000004e-05, "loss": 0.4117, "step": 3432 }, { "epoch": 0.19223877253891813, "grad_norm": 1.2244377136230469, "learning_rate": 1.7155e-05, "loss": 0.4895, "step": 3433 }, { "epoch": 0.19229476985104715, "grad_norm": 1.1141343116760254, "learning_rate": 1.7160000000000002e-05, "loss": 0.488, "step": 3434 }, { "epoch": 0.19235076716317617, "grad_norm": 1.5054800510406494, "learning_rate": 1.7165e-05, "loss": 0.5853, "step": 3435 }, { "epoch": 0.1924067644753052, "grad_norm": 1.1070597171783447, "learning_rate": 1.717e-05, "loss": 0.4004, "step": 3436 }, { "epoch": 0.1924627617874342, "grad_norm": 1.2049047946929932, "learning_rate": 1.7175e-05, "loss": 0.437, "step": 3437 }, { "epoch": 0.19251875909956323, "grad_norm": 1.857879638671875, "learning_rate": 1.718e-05, "loss": 0.4563, "step": 3438 }, { "epoch": 0.19257475641169225, "grad_norm": 1.1342371702194214, "learning_rate": 1.7185000000000002e-05, "loss": 0.3864, "step": 3439 }, { "epoch": 0.19263075372382127, "grad_norm": 1.1872518062591553, "learning_rate": 1.719e-05, "loss": 0.5765, "step": 3440 }, { "epoch": 0.1926867510359503, "grad_norm": 1.1592533588409424, "learning_rate": 1.7195e-05, "loss": 0.3613, "step": 3441 }, { "epoch": 0.19274274834807928, "grad_norm": 1.2872483730316162, "learning_rate": 1.7199999999999998e-05, "loss": 0.4437, "step": 3442 }, { "epoch": 0.1927987456602083, "grad_norm": NaN, "learning_rate": 1.7199999999999998e-05, "loss": 0.2918, "step": 3443 }, { "epoch": 0.19285474297233732, "grad_norm": 1.2049381732940674, "learning_rate": 1.7205000000000002e-05, "loss": 0.383, "step": 3444 }, { "epoch": 0.19291074028446634, "grad_norm": 1.1656842231750488, "learning_rate": 1.721e-05, "loss": 0.4182, "step": 3445 }, { "epoch": 0.19296673759659536, "grad_norm": 1.127423644065857, "learning_rate": 1.7215e-05, "loss": 0.4357, "step": 3446 }, { "epoch": 0.19302273490872437, "grad_norm": 1.9054218530654907, "learning_rate": 1.722e-05, "loss": 0.3881, "step": 3447 }, { "epoch": 0.1930787322208534, "grad_norm": 1.3814903497695923, "learning_rate": 1.7225e-05, "loss": 0.6128, "step": 3448 }, { "epoch": 0.1931347295329824, "grad_norm": 1.302217960357666, "learning_rate": 1.7230000000000003e-05, "loss": 0.4546, "step": 3449 }, { "epoch": 0.19319072684511143, "grad_norm": 1.1936508417129517, "learning_rate": 1.7235e-05, "loss": 0.4362, "step": 3450 }, { "epoch": 0.19324672415724045, "grad_norm": 1.2464027404785156, "learning_rate": 1.724e-05, "loss": 0.4663, "step": 3451 }, { "epoch": 0.19330272146936947, "grad_norm": 1.2320516109466553, "learning_rate": 1.7245e-05, "loss": 0.497, "step": 3452 }, { "epoch": 0.1933587187814985, "grad_norm": 1.2153738737106323, "learning_rate": 1.725e-05, "loss": 0.5814, "step": 3453 }, { "epoch": 0.1934147160936275, "grad_norm": 1.3801615238189697, "learning_rate": 1.7255000000000003e-05, "loss": 0.478, "step": 3454 }, { "epoch": 0.19347071340575653, "grad_norm": 1.465854525566101, "learning_rate": 1.726e-05, "loss": 0.4406, "step": 3455 }, { "epoch": 0.19352671071788555, "grad_norm": 2.0683443546295166, "learning_rate": 1.7265e-05, "loss": 0.5539, "step": 3456 }, { "epoch": 0.19358270803001457, "grad_norm": 1.6779159307479858, "learning_rate": 1.727e-05, "loss": 0.5283, "step": 3457 }, { "epoch": 0.19363870534214359, "grad_norm": 1.1865346431732178, "learning_rate": 1.7275e-05, "loss": 0.5156, "step": 3458 }, { "epoch": 0.1936947026542726, "grad_norm": 1.2769807577133179, "learning_rate": 1.728e-05, "loss": 0.5397, "step": 3459 }, { "epoch": 0.19375069996640162, "grad_norm": 1.1956596374511719, "learning_rate": 1.7285e-05, "loss": 0.3929, "step": 3460 }, { "epoch": 0.19380669727853064, "grad_norm": 1.1267244815826416, "learning_rate": 1.7290000000000002e-05, "loss": 0.3968, "step": 3461 }, { "epoch": 0.19386269459065966, "grad_norm": 1.2932826280593872, "learning_rate": 1.7295e-05, "loss": 0.4719, "step": 3462 }, { "epoch": 0.19391869190278865, "grad_norm": 1.1786478757858276, "learning_rate": 1.73e-05, "loss": 0.4781, "step": 3463 }, { "epoch": 0.19397468921491767, "grad_norm": 1.1676743030548096, "learning_rate": 1.7305e-05, "loss": 0.3853, "step": 3464 }, { "epoch": 0.1940306865270467, "grad_norm": 1.2847338914871216, "learning_rate": 1.7310000000000002e-05, "loss": 0.5865, "step": 3465 }, { "epoch": 0.1940866838391757, "grad_norm": 1.2821464538574219, "learning_rate": 1.7315e-05, "loss": 0.4091, "step": 3466 }, { "epoch": 0.19414268115130473, "grad_norm": 1.1550301313400269, "learning_rate": 1.732e-05, "loss": 0.4035, "step": 3467 }, { "epoch": 0.19419867846343375, "grad_norm": 1.3662973642349243, "learning_rate": 1.7325e-05, "loss": 0.4234, "step": 3468 }, { "epoch": 0.19425467577556277, "grad_norm": 1.301316261291504, "learning_rate": 1.7330000000000002e-05, "loss": 0.4344, "step": 3469 }, { "epoch": 0.1943106730876918, "grad_norm": 1.1564757823944092, "learning_rate": 1.7335000000000003e-05, "loss": 0.5202, "step": 3470 }, { "epoch": 0.1943666703998208, "grad_norm": 1.756524682044983, "learning_rate": 1.734e-05, "loss": 0.5682, "step": 3471 }, { "epoch": 0.19442266771194983, "grad_norm": 1.184188961982727, "learning_rate": 1.7345e-05, "loss": 0.4627, "step": 3472 }, { "epoch": 0.19447866502407885, "grad_norm": 1.332406997680664, "learning_rate": 1.7349999999999998e-05, "loss": 0.4546, "step": 3473 }, { "epoch": 0.19453466233620786, "grad_norm": 1.2890806198120117, "learning_rate": 1.7355000000000002e-05, "loss": 0.4304, "step": 3474 }, { "epoch": 0.19459065964833688, "grad_norm": 1.150179386138916, "learning_rate": 1.736e-05, "loss": 0.4157, "step": 3475 }, { "epoch": 0.1946466569604659, "grad_norm": 1.432388186454773, "learning_rate": 1.7365e-05, "loss": 0.5514, "step": 3476 }, { "epoch": 0.19470265427259492, "grad_norm": 1.2167162895202637, "learning_rate": 1.737e-05, "loss": 0.4673, "step": 3477 }, { "epoch": 0.19475865158472394, "grad_norm": 1.4046660661697388, "learning_rate": 1.7375e-05, "loss": 0.5461, "step": 3478 }, { "epoch": 0.19481464889685296, "grad_norm": 1.2196552753448486, "learning_rate": 1.7380000000000003e-05, "loss": 0.425, "step": 3479 }, { "epoch": 0.19487064620898198, "grad_norm": 1.8962860107421875, "learning_rate": 1.7385e-05, "loss": 0.5941, "step": 3480 }, { "epoch": 0.194926643521111, "grad_norm": 1.1011854410171509, "learning_rate": 1.739e-05, "loss": 0.4297, "step": 3481 }, { "epoch": 0.19498264083324002, "grad_norm": 1.023834466934204, "learning_rate": 1.7395e-05, "loss": 0.3723, "step": 3482 }, { "epoch": 0.195038638145369, "grad_norm": 1.4861829280853271, "learning_rate": 1.74e-05, "loss": 0.4788, "step": 3483 }, { "epoch": 0.19509463545749803, "grad_norm": 1.1437004804611206, "learning_rate": 1.7405e-05, "loss": 0.364, "step": 3484 }, { "epoch": 0.19515063276962705, "grad_norm": 1.8231146335601807, "learning_rate": 1.741e-05, "loss": 0.5312, "step": 3485 }, { "epoch": 0.19520663008175607, "grad_norm": 1.5955363512039185, "learning_rate": 1.7415000000000002e-05, "loss": 0.4753, "step": 3486 }, { "epoch": 0.19526262739388509, "grad_norm": 1.3428277969360352, "learning_rate": 1.742e-05, "loss": 0.4872, "step": 3487 }, { "epoch": 0.1953186247060141, "grad_norm": 1.1213557720184326, "learning_rate": 1.7425e-05, "loss": 0.3974, "step": 3488 }, { "epoch": 0.19537462201814312, "grad_norm": 1.1225180625915527, "learning_rate": 1.743e-05, "loss": 0.3424, "step": 3489 }, { "epoch": 0.19543061933027214, "grad_norm": 1.5477772951126099, "learning_rate": 1.7435e-05, "loss": 0.6466, "step": 3490 }, { "epoch": 0.19548661664240116, "grad_norm": 1.1222776174545288, "learning_rate": 1.7440000000000002e-05, "loss": 0.4342, "step": 3491 }, { "epoch": 0.19554261395453018, "grad_norm": 1.058448314666748, "learning_rate": 1.7445e-05, "loss": 0.3581, "step": 3492 }, { "epoch": 0.1955986112666592, "grad_norm": 1.4802191257476807, "learning_rate": 1.745e-05, "loss": 0.4271, "step": 3493 }, { "epoch": 0.19565460857878822, "grad_norm": 1.29079008102417, "learning_rate": 1.7455e-05, "loss": 0.4136, "step": 3494 }, { "epoch": 0.19571060589091724, "grad_norm": 1.099254846572876, "learning_rate": 1.7460000000000002e-05, "loss": 0.3975, "step": 3495 }, { "epoch": 0.19576660320304626, "grad_norm": 1.9378408193588257, "learning_rate": 1.7465e-05, "loss": 0.5841, "step": 3496 }, { "epoch": 0.19582260051517528, "grad_norm": 1.5394763946533203, "learning_rate": 1.747e-05, "loss": 0.4734, "step": 3497 }, { "epoch": 0.1958785978273043, "grad_norm": 1.3304824829101562, "learning_rate": 1.7475e-05, "loss": 0.3832, "step": 3498 }, { "epoch": 0.19593459513943332, "grad_norm": 1.273561954498291, "learning_rate": 1.7480000000000002e-05, "loss": 0.5094, "step": 3499 }, { "epoch": 0.19599059245156233, "grad_norm": 1.340970516204834, "learning_rate": 1.7485000000000003e-05, "loss": 0.4204, "step": 3500 }, { "epoch": 0.19604658976369135, "grad_norm": 1.1625409126281738, "learning_rate": 1.749e-05, "loss": 0.4155, "step": 3501 }, { "epoch": 0.19610258707582037, "grad_norm": 0.9949480295181274, "learning_rate": 1.7495e-05, "loss": 0.4649, "step": 3502 }, { "epoch": 0.1961585843879494, "grad_norm": 1.329535961151123, "learning_rate": 1.75e-05, "loss": 0.5627, "step": 3503 }, { "epoch": 0.19621458170007838, "grad_norm": 1.1956413984298706, "learning_rate": 1.7505000000000003e-05, "loss": 0.3473, "step": 3504 }, { "epoch": 0.1962705790122074, "grad_norm": 1.3843954801559448, "learning_rate": 1.751e-05, "loss": 0.5833, "step": 3505 }, { "epoch": 0.19632657632433642, "grad_norm": 1.2624551057815552, "learning_rate": 1.7515e-05, "loss": 0.3751, "step": 3506 }, { "epoch": 0.19638257363646544, "grad_norm": 2.117694854736328, "learning_rate": 1.752e-05, "loss": 0.3899, "step": 3507 }, { "epoch": 0.19643857094859446, "grad_norm": 1.484227180480957, "learning_rate": 1.7525e-05, "loss": 0.4206, "step": 3508 }, { "epoch": 0.19649456826072348, "grad_norm": 1.1634652614593506, "learning_rate": 1.7530000000000003e-05, "loss": 0.3729, "step": 3509 }, { "epoch": 0.1965505655728525, "grad_norm": 1.2138384580612183, "learning_rate": 1.7535e-05, "loss": 0.3716, "step": 3510 }, { "epoch": 0.19660656288498152, "grad_norm": 1.0454835891723633, "learning_rate": 1.754e-05, "loss": 0.3648, "step": 3511 }, { "epoch": 0.19666256019711054, "grad_norm": 1.5565184354782104, "learning_rate": 1.7545e-05, "loss": 0.5028, "step": 3512 }, { "epoch": 0.19671855750923956, "grad_norm": 1.8158458471298218, "learning_rate": 1.755e-05, "loss": 0.4712, "step": 3513 }, { "epoch": 0.19677455482136857, "grad_norm": 0.9922124743461609, "learning_rate": 1.7555e-05, "loss": 0.4019, "step": 3514 }, { "epoch": 0.1968305521334976, "grad_norm": 1.0502554178237915, "learning_rate": 1.756e-05, "loss": 0.3467, "step": 3515 }, { "epoch": 0.1968865494456266, "grad_norm": 4.462704181671143, "learning_rate": 1.7565000000000002e-05, "loss": 0.5373, "step": 3516 }, { "epoch": 0.19694254675775563, "grad_norm": 1.2560471296310425, "learning_rate": 1.757e-05, "loss": 0.4816, "step": 3517 }, { "epoch": 0.19699854406988465, "grad_norm": 1.1651909351348877, "learning_rate": 1.7575e-05, "loss": 0.4926, "step": 3518 }, { "epoch": 0.19705454138201367, "grad_norm": 1.382475733757019, "learning_rate": 1.758e-05, "loss": 0.4079, "step": 3519 }, { "epoch": 0.1971105386941427, "grad_norm": 1.362947940826416, "learning_rate": 1.7585000000000002e-05, "loss": 0.6844, "step": 3520 }, { "epoch": 0.1971665360062717, "grad_norm": 1.4244499206542969, "learning_rate": 1.759e-05, "loss": 0.3927, "step": 3521 }, { "epoch": 0.19722253331840073, "grad_norm": 1.4018924236297607, "learning_rate": 1.7595e-05, "loss": 0.4086, "step": 3522 }, { "epoch": 0.19727853063052975, "grad_norm": 1.0669325590133667, "learning_rate": 1.76e-05, "loss": 0.3776, "step": 3523 }, { "epoch": 0.19733452794265877, "grad_norm": 1.1211423873901367, "learning_rate": 1.7605000000000002e-05, "loss": 0.3565, "step": 3524 }, { "epoch": 0.19739052525478776, "grad_norm": 1.3023756742477417, "learning_rate": 1.7610000000000002e-05, "loss": 0.4846, "step": 3525 }, { "epoch": 0.19744652256691678, "grad_norm": 1.5412284135818481, "learning_rate": 1.7615e-05, "loss": 0.6275, "step": 3526 }, { "epoch": 0.1975025198790458, "grad_norm": 1.2327158451080322, "learning_rate": 1.762e-05, "loss": 0.3694, "step": 3527 }, { "epoch": 0.19755851719117482, "grad_norm": 1.0797861814498901, "learning_rate": 1.7625e-05, "loss": 0.3528, "step": 3528 }, { "epoch": 0.19761451450330383, "grad_norm": 1.3560792207717896, "learning_rate": 1.7630000000000002e-05, "loss": 0.5039, "step": 3529 }, { "epoch": 0.19767051181543285, "grad_norm": 1.2430589199066162, "learning_rate": 1.7635000000000003e-05, "loss": 0.4804, "step": 3530 }, { "epoch": 0.19772650912756187, "grad_norm": 1.2607671022415161, "learning_rate": 1.764e-05, "loss": 0.4166, "step": 3531 }, { "epoch": 0.1977825064396909, "grad_norm": 1.0509237051010132, "learning_rate": 1.7645e-05, "loss": 0.3564, "step": 3532 }, { "epoch": 0.1978385037518199, "grad_norm": 1.234286904335022, "learning_rate": 1.765e-05, "loss": 0.4267, "step": 3533 }, { "epoch": 0.19789450106394893, "grad_norm": 1.2905102968215942, "learning_rate": 1.7655000000000003e-05, "loss": 0.425, "step": 3534 }, { "epoch": 0.19795049837607795, "grad_norm": 1.1756755113601685, "learning_rate": 1.766e-05, "loss": 0.3897, "step": 3535 }, { "epoch": 0.19800649568820697, "grad_norm": 1.2536176443099976, "learning_rate": 1.7665e-05, "loss": 0.4033, "step": 3536 }, { "epoch": 0.198062493000336, "grad_norm": 1.1823030710220337, "learning_rate": 1.7670000000000002e-05, "loss": 0.4084, "step": 3537 }, { "epoch": 0.198118490312465, "grad_norm": 1.295543909072876, "learning_rate": 1.7675e-05, "loss": 0.3927, "step": 3538 }, { "epoch": 0.19817448762459403, "grad_norm": 1.1278620958328247, "learning_rate": 1.7680000000000004e-05, "loss": 0.4166, "step": 3539 }, { "epoch": 0.19823048493672304, "grad_norm": 1.3221131563186646, "learning_rate": 1.7685e-05, "loss": 0.4792, "step": 3540 }, { "epoch": 0.19828648224885206, "grad_norm": 1.1316249370574951, "learning_rate": 1.7690000000000002e-05, "loss": 0.6107, "step": 3541 }, { "epoch": 0.19834247956098108, "grad_norm": 1.281059741973877, "learning_rate": 1.7695e-05, "loss": 0.4603, "step": 3542 }, { "epoch": 0.1983984768731101, "grad_norm": 1.519079327583313, "learning_rate": 1.77e-05, "loss": 0.9461, "step": 3543 }, { "epoch": 0.19845447418523912, "grad_norm": 1.1021186113357544, "learning_rate": 1.7705e-05, "loss": 0.4054, "step": 3544 }, { "epoch": 0.1985104714973681, "grad_norm": 1.2137590646743774, "learning_rate": 1.771e-05, "loss": 0.3485, "step": 3545 }, { "epoch": 0.19856646880949713, "grad_norm": 1.8117284774780273, "learning_rate": 1.7715000000000002e-05, "loss": 0.4094, "step": 3546 }, { "epoch": 0.19862246612162615, "grad_norm": 1.588281273841858, "learning_rate": 1.772e-05, "loss": 0.4485, "step": 3547 }, { "epoch": 0.19867846343375517, "grad_norm": 1.4057315587997437, "learning_rate": 1.7725e-05, "loss": 0.5001, "step": 3548 }, { "epoch": 0.1987344607458842, "grad_norm": 1.0868313312530518, "learning_rate": 1.773e-05, "loss": 0.42, "step": 3549 }, { "epoch": 0.1987904580580132, "grad_norm": 1.1870468854904175, "learning_rate": 1.7735000000000002e-05, "loss": 0.4441, "step": 3550 }, { "epoch": 0.19884645537014223, "grad_norm": 1.0263683795928955, "learning_rate": 1.774e-05, "loss": 0.4931, "step": 3551 }, { "epoch": 0.19890245268227125, "grad_norm": 1.1085573434829712, "learning_rate": 1.7745e-05, "loss": 0.4928, "step": 3552 }, { "epoch": 0.19895844999440027, "grad_norm": 1.0907138586044312, "learning_rate": 1.775e-05, "loss": 0.481, "step": 3553 }, { "epoch": 0.19901444730652929, "grad_norm": 1.4030075073242188, "learning_rate": 1.7755000000000002e-05, "loss": 0.6967, "step": 3554 }, { "epoch": 0.1990704446186583, "grad_norm": 1.2095900774002075, "learning_rate": 1.7760000000000003e-05, "loss": 0.3484, "step": 3555 }, { "epoch": 0.19912644193078732, "grad_norm": 1.1291601657867432, "learning_rate": 1.7765e-05, "loss": 0.3844, "step": 3556 }, { "epoch": 0.19918243924291634, "grad_norm": 1.094610571861267, "learning_rate": 1.777e-05, "loss": 0.4705, "step": 3557 }, { "epoch": 0.19923843655504536, "grad_norm": 1.1452468633651733, "learning_rate": 1.7775e-05, "loss": 0.4597, "step": 3558 }, { "epoch": 0.19929443386717438, "grad_norm": 1.232744812965393, "learning_rate": 1.7780000000000003e-05, "loss": 0.5608, "step": 3559 }, { "epoch": 0.1993504311793034, "grad_norm": 1.1906026601791382, "learning_rate": 1.7785e-05, "loss": 0.3942, "step": 3560 }, { "epoch": 0.19940642849143242, "grad_norm": 0.969206690788269, "learning_rate": 1.779e-05, "loss": 0.2845, "step": 3561 }, { "epoch": 0.19946242580356144, "grad_norm": 1.2132002115249634, "learning_rate": 1.7795e-05, "loss": 0.4097, "step": 3562 }, { "epoch": 0.19951842311569046, "grad_norm": 1.3831021785736084, "learning_rate": 1.78e-05, "loss": 0.4446, "step": 3563 }, { "epoch": 0.19957442042781948, "grad_norm": 1.3817284107208252, "learning_rate": 1.7805000000000003e-05, "loss": 0.3767, "step": 3564 }, { "epoch": 0.1996304177399485, "grad_norm": 1.3107645511627197, "learning_rate": 1.781e-05, "loss": 0.4725, "step": 3565 }, { "epoch": 0.1996864150520775, "grad_norm": 1.0882208347320557, "learning_rate": 1.7815e-05, "loss": 0.4093, "step": 3566 }, { "epoch": 0.1997424123642065, "grad_norm": 1.105525255203247, "learning_rate": 1.7820000000000002e-05, "loss": 0.3445, "step": 3567 }, { "epoch": 0.19979840967633553, "grad_norm": 1.329720377922058, "learning_rate": 1.7825e-05, "loss": 0.4393, "step": 3568 }, { "epoch": 0.19985440698846454, "grad_norm": 1.0822575092315674, "learning_rate": 1.783e-05, "loss": 0.3839, "step": 3569 }, { "epoch": 0.19991040430059356, "grad_norm": 1.0894161462783813, "learning_rate": 1.7835e-05, "loss": 0.344, "step": 3570 }, { "epoch": 0.19996640161272258, "grad_norm": 1.1637275218963623, "learning_rate": 1.7840000000000002e-05, "loss": 0.4248, "step": 3571 }, { "epoch": 0.2000223989248516, "grad_norm": 1.4276413917541504, "learning_rate": 1.7845e-05, "loss": 0.4069, "step": 3572 }, { "epoch": 0.20007839623698062, "grad_norm": 1.1614675521850586, "learning_rate": 1.785e-05, "loss": 0.3726, "step": 3573 }, { "epoch": 0.20013439354910964, "grad_norm": 1.1339586973190308, "learning_rate": 1.7855e-05, "loss": 0.6507, "step": 3574 }, { "epoch": 0.20019039086123866, "grad_norm": 1.5252258777618408, "learning_rate": 1.7860000000000002e-05, "loss": 0.4611, "step": 3575 }, { "epoch": 0.20024638817336768, "grad_norm": 1.1716711521148682, "learning_rate": 1.7865000000000003e-05, "loss": 0.3847, "step": 3576 }, { "epoch": 0.2003023854854967, "grad_norm": 1.0965520143508911, "learning_rate": 1.787e-05, "loss": 0.4186, "step": 3577 }, { "epoch": 0.20035838279762572, "grad_norm": 1.3718756437301636, "learning_rate": 1.7875e-05, "loss": 0.5508, "step": 3578 }, { "epoch": 0.20041438010975474, "grad_norm": 1.2677582502365112, "learning_rate": 1.7879999999999998e-05, "loss": 0.4108, "step": 3579 }, { "epoch": 0.20047037742188376, "grad_norm": 1.3012787103652954, "learning_rate": 1.7885000000000002e-05, "loss": 0.5334, "step": 3580 }, { "epoch": 0.20052637473401277, "grad_norm": 1.152340292930603, "learning_rate": 1.789e-05, "loss": 0.3269, "step": 3581 }, { "epoch": 0.2005823720461418, "grad_norm": 1.3426048755645752, "learning_rate": 1.7895e-05, "loss": 0.5069, "step": 3582 }, { "epoch": 0.2006383693582708, "grad_norm": 1.2388484477996826, "learning_rate": 1.79e-05, "loss": 0.544, "step": 3583 }, { "epoch": 0.20069436667039983, "grad_norm": 1.2543814182281494, "learning_rate": 1.7905e-05, "loss": 0.4034, "step": 3584 }, { "epoch": 0.20075036398252885, "grad_norm": 1.060953140258789, "learning_rate": 1.7910000000000003e-05, "loss": 0.4458, "step": 3585 }, { "epoch": 0.20080636129465787, "grad_norm": 1.3129549026489258, "learning_rate": 1.7915e-05, "loss": 0.5294, "step": 3586 }, { "epoch": 0.20086235860678686, "grad_norm": 1.1931536197662354, "learning_rate": 1.792e-05, "loss": 0.4113, "step": 3587 }, { "epoch": 0.20091835591891588, "grad_norm": 1.254481315612793, "learning_rate": 1.7925e-05, "loss": 0.4261, "step": 3588 }, { "epoch": 0.2009743532310449, "grad_norm": 1.1662412881851196, "learning_rate": 1.793e-05, "loss": 0.348, "step": 3589 }, { "epoch": 0.20103035054317392, "grad_norm": 1.0701323747634888, "learning_rate": 1.7935e-05, "loss": 0.4522, "step": 3590 }, { "epoch": 0.20108634785530294, "grad_norm": 1.308609127998352, "learning_rate": 1.794e-05, "loss": 0.4527, "step": 3591 }, { "epoch": 0.20114234516743196, "grad_norm": 1.1434340476989746, "learning_rate": 1.7945000000000002e-05, "loss": 0.3729, "step": 3592 }, { "epoch": 0.20119834247956098, "grad_norm": 1.10639488697052, "learning_rate": 1.795e-05, "loss": 0.402, "step": 3593 }, { "epoch": 0.20125433979169, "grad_norm": 2.03780460357666, "learning_rate": 1.7955e-05, "loss": 0.4185, "step": 3594 }, { "epoch": 0.20131033710381901, "grad_norm": 1.222267508506775, "learning_rate": 1.796e-05, "loss": 0.4905, "step": 3595 }, { "epoch": 0.20136633441594803, "grad_norm": 1.2427198886871338, "learning_rate": 1.7965e-05, "loss": 0.4275, "step": 3596 }, { "epoch": 0.20142233172807705, "grad_norm": 1.4295467138290405, "learning_rate": 1.797e-05, "loss": 0.4698, "step": 3597 }, { "epoch": 0.20147832904020607, "grad_norm": 1.0984424352645874, "learning_rate": 1.7975e-05, "loss": 0.4006, "step": 3598 }, { "epoch": 0.2015343263523351, "grad_norm": 1.083338975906372, "learning_rate": 1.798e-05, "loss": 0.4357, "step": 3599 }, { "epoch": 0.2015903236644641, "grad_norm": 1.1363531351089478, "learning_rate": 1.7985e-05, "loss": 0.4991, "step": 3600 }, { "epoch": 0.20164632097659313, "grad_norm": 1.5071083307266235, "learning_rate": 1.7990000000000002e-05, "loss": 0.4102, "step": 3601 }, { "epoch": 0.20170231828872215, "grad_norm": 1.1894534826278687, "learning_rate": 1.7995e-05, "loss": 0.3997, "step": 3602 }, { "epoch": 0.20175831560085117, "grad_norm": 1.1389291286468506, "learning_rate": 1.8e-05, "loss": 0.5243, "step": 3603 }, { "epoch": 0.2018143129129802, "grad_norm": 1.225938081741333, "learning_rate": 1.8005e-05, "loss": 0.3694, "step": 3604 }, { "epoch": 0.2018703102251092, "grad_norm": 1.445648431777954, "learning_rate": 1.8010000000000002e-05, "loss": 0.3522, "step": 3605 }, { "epoch": 0.20192630753723823, "grad_norm": 1.427200436592102, "learning_rate": 1.8015000000000003e-05, "loss": 0.5047, "step": 3606 }, { "epoch": 0.20198230484936722, "grad_norm": 1.5686529874801636, "learning_rate": 1.802e-05, "loss": 0.6975, "step": 3607 }, { "epoch": 0.20203830216149624, "grad_norm": 1.1451834440231323, "learning_rate": 1.8025e-05, "loss": 0.4568, "step": 3608 }, { "epoch": 0.20209429947362526, "grad_norm": 1.3459903001785278, "learning_rate": 1.803e-05, "loss": 0.5214, "step": 3609 }, { "epoch": 0.20215029678575427, "grad_norm": 1.2125604152679443, "learning_rate": 1.8035000000000003e-05, "loss": 0.3619, "step": 3610 }, { "epoch": 0.2022062940978833, "grad_norm": 0.9751166105270386, "learning_rate": 1.804e-05, "loss": 0.5279, "step": 3611 }, { "epoch": 0.2022622914100123, "grad_norm": 1.3576555252075195, "learning_rate": 1.8045e-05, "loss": 0.4481, "step": 3612 }, { "epoch": 0.20231828872214133, "grad_norm": 2.4999935626983643, "learning_rate": 1.805e-05, "loss": 0.4593, "step": 3613 }, { "epoch": 0.20237428603427035, "grad_norm": 0.9820563197135925, "learning_rate": 1.8055e-05, "loss": 0.3046, "step": 3614 }, { "epoch": 0.20243028334639937, "grad_norm": 1.426029920578003, "learning_rate": 1.8060000000000003e-05, "loss": 0.4896, "step": 3615 }, { "epoch": 0.2024862806585284, "grad_norm": 1.2235302925109863, "learning_rate": 1.8065e-05, "loss": 0.3901, "step": 3616 }, { "epoch": 0.2025422779706574, "grad_norm": 1.4202797412872314, "learning_rate": 1.807e-05, "loss": 0.4293, "step": 3617 }, { "epoch": 0.20259827528278643, "grad_norm": 1.1697888374328613, "learning_rate": 1.8075e-05, "loss": 0.4151, "step": 3618 }, { "epoch": 0.20265427259491545, "grad_norm": 1.2334222793579102, "learning_rate": 1.808e-05, "loss": 0.4224, "step": 3619 }, { "epoch": 0.20271026990704447, "grad_norm": 1.3797261714935303, "learning_rate": 1.8085e-05, "loss": 0.488, "step": 3620 }, { "epoch": 0.20276626721917348, "grad_norm": 0.9996871948242188, "learning_rate": 1.809e-05, "loss": 0.3075, "step": 3621 }, { "epoch": 0.2028222645313025, "grad_norm": 1.453730583190918, "learning_rate": 1.8095000000000002e-05, "loss": 0.394, "step": 3622 }, { "epoch": 0.20287826184343152, "grad_norm": 1.36322820186615, "learning_rate": 1.81e-05, "loss": 0.3731, "step": 3623 }, { "epoch": 0.20293425915556054, "grad_norm": 1.352899432182312, "learning_rate": 1.8105e-05, "loss": 0.5815, "step": 3624 }, { "epoch": 0.20299025646768956, "grad_norm": 1.3098249435424805, "learning_rate": 1.811e-05, "loss": 0.5279, "step": 3625 }, { "epoch": 0.20304625377981858, "grad_norm": 1.2320561408996582, "learning_rate": 1.8115000000000002e-05, "loss": 0.4337, "step": 3626 }, { "epoch": 0.2031022510919476, "grad_norm": 1.6283172369003296, "learning_rate": 1.812e-05, "loss": 0.3657, "step": 3627 }, { "epoch": 0.2031582484040766, "grad_norm": 1.1894358396530151, "learning_rate": 1.8125e-05, "loss": 0.4657, "step": 3628 }, { "epoch": 0.2032142457162056, "grad_norm": 1.1958972215652466, "learning_rate": 1.813e-05, "loss": 0.4009, "step": 3629 }, { "epoch": 0.20327024302833463, "grad_norm": 1.127791404724121, "learning_rate": 1.8135000000000002e-05, "loss": 0.4063, "step": 3630 }, { "epoch": 0.20332624034046365, "grad_norm": 1.1458134651184082, "learning_rate": 1.8140000000000003e-05, "loss": 0.4677, "step": 3631 }, { "epoch": 0.20338223765259267, "grad_norm": 1.2265417575836182, "learning_rate": 1.8145e-05, "loss": 0.4175, "step": 3632 }, { "epoch": 0.2034382349647217, "grad_norm": 1.3974417448043823, "learning_rate": 1.815e-05, "loss": 0.4813, "step": 3633 }, { "epoch": 0.2034942322768507, "grad_norm": 1.2176661491394043, "learning_rate": 1.8154999999999998e-05, "loss": 0.4138, "step": 3634 }, { "epoch": 0.20355022958897973, "grad_norm": 1.1092934608459473, "learning_rate": 1.8160000000000002e-05, "loss": 0.4615, "step": 3635 }, { "epoch": 0.20360622690110874, "grad_norm": 1.218031883239746, "learning_rate": 1.8165000000000003e-05, "loss": 0.5333, "step": 3636 }, { "epoch": 0.20366222421323776, "grad_norm": 1.1778126955032349, "learning_rate": 1.817e-05, "loss": 0.4052, "step": 3637 }, { "epoch": 0.20371822152536678, "grad_norm": 1.1710573434829712, "learning_rate": 1.8175e-05, "loss": 0.3803, "step": 3638 }, { "epoch": 0.2037742188374958, "grad_norm": 1.1477383375167847, "learning_rate": 1.818e-05, "loss": 0.4362, "step": 3639 }, { "epoch": 0.20383021614962482, "grad_norm": 1.2956640720367432, "learning_rate": 1.8185000000000003e-05, "loss": 0.4826, "step": 3640 }, { "epoch": 0.20388621346175384, "grad_norm": 1.3115049600601196, "learning_rate": 1.819e-05, "loss": 0.56, "step": 3641 }, { "epoch": 0.20394221077388286, "grad_norm": 1.4746919870376587, "learning_rate": 1.8195e-05, "loss": 0.4711, "step": 3642 }, { "epoch": 0.20399820808601188, "grad_norm": 1.923878788948059, "learning_rate": 1.8200000000000002e-05, "loss": 0.5686, "step": 3643 }, { "epoch": 0.2040542053981409, "grad_norm": 1.1285382509231567, "learning_rate": 1.8205e-05, "loss": 0.4183, "step": 3644 }, { "epoch": 0.20411020271026992, "grad_norm": 1.5769509077072144, "learning_rate": 1.8210000000000004e-05, "loss": 0.4979, "step": 3645 }, { "epoch": 0.20416620002239894, "grad_norm": 1.2398681640625, "learning_rate": 1.8215e-05, "loss": 0.4363, "step": 3646 }, { "epoch": 0.20422219733452796, "grad_norm": 1.1484941244125366, "learning_rate": 1.8220000000000002e-05, "loss": 0.391, "step": 3647 }, { "epoch": 0.20427819464665697, "grad_norm": 1.3237804174423218, "learning_rate": 1.8225e-05, "loss": 0.6652, "step": 3648 }, { "epoch": 0.20433419195878597, "grad_norm": 1.0015528202056885, "learning_rate": 1.823e-05, "loss": 0.3516, "step": 3649 }, { "epoch": 0.20439018927091498, "grad_norm": 1.793995976448059, "learning_rate": 1.8235e-05, "loss": 0.7392, "step": 3650 }, { "epoch": 0.204446186583044, "grad_norm": 1.415885329246521, "learning_rate": 1.824e-05, "loss": 0.3627, "step": 3651 }, { "epoch": 0.20450218389517302, "grad_norm": 1.268105149269104, "learning_rate": 1.8245000000000002e-05, "loss": 0.4041, "step": 3652 }, { "epoch": 0.20455818120730204, "grad_norm": 1.5501044988632202, "learning_rate": 1.825e-05, "loss": 0.4487, "step": 3653 }, { "epoch": 0.20461417851943106, "grad_norm": 2.0684571266174316, "learning_rate": 1.8255e-05, "loss": 0.4629, "step": 3654 }, { "epoch": 0.20467017583156008, "grad_norm": 1.2597365379333496, "learning_rate": 1.826e-05, "loss": 0.4913, "step": 3655 }, { "epoch": 0.2047261731436891, "grad_norm": 0.9715607166290283, "learning_rate": 1.8265000000000002e-05, "loss": 0.5863, "step": 3656 }, { "epoch": 0.20478217045581812, "grad_norm": 1.2507647275924683, "learning_rate": 1.827e-05, "loss": 0.4449, "step": 3657 }, { "epoch": 0.20483816776794714, "grad_norm": 1.1853148937225342, "learning_rate": 1.8275e-05, "loss": 0.4402, "step": 3658 }, { "epoch": 0.20489416508007616, "grad_norm": 1.6511564254760742, "learning_rate": 1.828e-05, "loss": 0.4787, "step": 3659 }, { "epoch": 0.20495016239220518, "grad_norm": 1.2150965929031372, "learning_rate": 1.8285000000000002e-05, "loss": 0.4149, "step": 3660 }, { "epoch": 0.2050061597043342, "grad_norm": 1.2021454572677612, "learning_rate": 1.8290000000000003e-05, "loss": 0.4282, "step": 3661 }, { "epoch": 0.20506215701646321, "grad_norm": 1.0750913619995117, "learning_rate": 1.8295e-05, "loss": 0.411, "step": 3662 }, { "epoch": 0.20511815432859223, "grad_norm": 1.0906314849853516, "learning_rate": 1.83e-05, "loss": 0.4069, "step": 3663 }, { "epoch": 0.20517415164072125, "grad_norm": 1.1319752931594849, "learning_rate": 1.8305e-05, "loss": 0.4575, "step": 3664 }, { "epoch": 0.20523014895285027, "grad_norm": 1.0121747255325317, "learning_rate": 1.8310000000000003e-05, "loss": 0.4482, "step": 3665 }, { "epoch": 0.2052861462649793, "grad_norm": 1.1749929189682007, "learning_rate": 1.8315e-05, "loss": 0.3555, "step": 3666 }, { "epoch": 0.2053421435771083, "grad_norm": 1.1696017980575562, "learning_rate": 1.832e-05, "loss": 0.3813, "step": 3667 }, { "epoch": 0.20539814088923733, "grad_norm": 1.187969446182251, "learning_rate": 1.8325e-05, "loss": 0.5685, "step": 3668 }, { "epoch": 0.20545413820136632, "grad_norm": 1.239634394645691, "learning_rate": 1.833e-05, "loss": 0.4809, "step": 3669 }, { "epoch": 0.20551013551349534, "grad_norm": 1.0861446857452393, "learning_rate": 1.8335000000000003e-05, "loss": 0.3965, "step": 3670 }, { "epoch": 0.20556613282562436, "grad_norm": 1.1064029932022095, "learning_rate": 1.834e-05, "loss": 0.5242, "step": 3671 }, { "epoch": 0.20562213013775338, "grad_norm": 1.0597418546676636, "learning_rate": 1.8345e-05, "loss": 0.4466, "step": 3672 }, { "epoch": 0.2056781274498824, "grad_norm": 1.2875657081604004, "learning_rate": 1.8350000000000002e-05, "loss": 0.3883, "step": 3673 }, { "epoch": 0.20573412476201142, "grad_norm": 1.3540278673171997, "learning_rate": 1.8355e-05, "loss": 0.5198, "step": 3674 }, { "epoch": 0.20579012207414044, "grad_norm": 1.4080015420913696, "learning_rate": 1.8360000000000004e-05, "loss": 0.4586, "step": 3675 }, { "epoch": 0.20584611938626945, "grad_norm": 1.2323344945907593, "learning_rate": 1.8365e-05, "loss": 0.5165, "step": 3676 }, { "epoch": 0.20590211669839847, "grad_norm": 1.1104493141174316, "learning_rate": 1.8370000000000002e-05, "loss": 0.3237, "step": 3677 }, { "epoch": 0.2059581140105275, "grad_norm": 1.1640260219573975, "learning_rate": 1.8375e-05, "loss": 0.4496, "step": 3678 }, { "epoch": 0.2060141113226565, "grad_norm": 1.1987717151641846, "learning_rate": 1.838e-05, "loss": 0.383, "step": 3679 }, { "epoch": 0.20607010863478553, "grad_norm": 1.0047123432159424, "learning_rate": 1.8385e-05, "loss": 0.4024, "step": 3680 }, { "epoch": 0.20612610594691455, "grad_norm": 1.3158748149871826, "learning_rate": 1.8390000000000002e-05, "loss": 0.3559, "step": 3681 }, { "epoch": 0.20618210325904357, "grad_norm": 1.124266505241394, "learning_rate": 1.8395000000000003e-05, "loss": 0.3636, "step": 3682 }, { "epoch": 0.2062381005711726, "grad_norm": 1.1218291521072388, "learning_rate": 1.84e-05, "loss": 0.4406, "step": 3683 }, { "epoch": 0.2062940978833016, "grad_norm": 1.4445165395736694, "learning_rate": 1.8405e-05, "loss": 0.6085, "step": 3684 }, { "epoch": 0.20635009519543063, "grad_norm": 1.0143694877624512, "learning_rate": 1.841e-05, "loss": 0.3806, "step": 3685 }, { "epoch": 0.20640609250755965, "grad_norm": 1.389467477798462, "learning_rate": 1.8415000000000002e-05, "loss": 0.5131, "step": 3686 }, { "epoch": 0.20646208981968867, "grad_norm": 1.1578280925750732, "learning_rate": 1.842e-05, "loss": 0.4549, "step": 3687 }, { "epoch": 0.20651808713181768, "grad_norm": 1.009496808052063, "learning_rate": 1.8425e-05, "loss": 0.3502, "step": 3688 }, { "epoch": 0.2065740844439467, "grad_norm": 1.3251539468765259, "learning_rate": 1.843e-05, "loss": 0.4412, "step": 3689 }, { "epoch": 0.2066300817560757, "grad_norm": 1.3728529214859009, "learning_rate": 1.8435000000000002e-05, "loss": 0.4851, "step": 3690 }, { "epoch": 0.20668607906820471, "grad_norm": 1.215119481086731, "learning_rate": 1.8440000000000003e-05, "loss": 0.4707, "step": 3691 }, { "epoch": 0.20674207638033373, "grad_norm": 1.2378907203674316, "learning_rate": 1.8445e-05, "loss": 0.4372, "step": 3692 }, { "epoch": 0.20679807369246275, "grad_norm": 1.4404093027114868, "learning_rate": 1.845e-05, "loss": 0.4095, "step": 3693 }, { "epoch": 0.20685407100459177, "grad_norm": 1.1341023445129395, "learning_rate": 1.8455e-05, "loss": 0.5088, "step": 3694 }, { "epoch": 0.2069100683167208, "grad_norm": 1.326324701309204, "learning_rate": 1.846e-05, "loss": 0.3672, "step": 3695 }, { "epoch": 0.2069660656288498, "grad_norm": 1.1963605880737305, "learning_rate": 1.8465e-05, "loss": 0.4715, "step": 3696 }, { "epoch": 0.20702206294097883, "grad_norm": 1.351576566696167, "learning_rate": 1.847e-05, "loss": 0.538, "step": 3697 }, { "epoch": 0.20707806025310785, "grad_norm": 1.4372308254241943, "learning_rate": 1.8475000000000002e-05, "loss": 0.3944, "step": 3698 }, { "epoch": 0.20713405756523687, "grad_norm": 1.6565874814987183, "learning_rate": 1.848e-05, "loss": 0.4954, "step": 3699 }, { "epoch": 0.2071900548773659, "grad_norm": 1.2670085430145264, "learning_rate": 1.8485e-05, "loss": 0.3647, "step": 3700 }, { "epoch": 0.2072460521894949, "grad_norm": 1.227007508277893, "learning_rate": 1.849e-05, "loss": 0.3455, "step": 3701 }, { "epoch": 0.20730204950162393, "grad_norm": 1.4911811351776123, "learning_rate": 1.8495e-05, "loss": 0.5222, "step": 3702 }, { "epoch": 0.20735804681375294, "grad_norm": 1.2862108945846558, "learning_rate": 1.85e-05, "loss": 0.4129, "step": 3703 }, { "epoch": 0.20741404412588196, "grad_norm": 1.2085920572280884, "learning_rate": 1.8505e-05, "loss": 0.4963, "step": 3704 }, { "epoch": 0.20747004143801098, "grad_norm": 1.2827470302581787, "learning_rate": 1.851e-05, "loss": 0.4467, "step": 3705 }, { "epoch": 0.20752603875014, "grad_norm": 1.2292492389678955, "learning_rate": 1.8515e-05, "loss": 0.5528, "step": 3706 }, { "epoch": 0.20758203606226902, "grad_norm": 1.120884895324707, "learning_rate": 1.8520000000000002e-05, "loss": 0.3977, "step": 3707 }, { "epoch": 0.20763803337439804, "grad_norm": 1.2557817697525024, "learning_rate": 1.8525e-05, "loss": 0.3356, "step": 3708 }, { "epoch": 0.20769403068652706, "grad_norm": 1.2214707136154175, "learning_rate": 1.853e-05, "loss": 0.3781, "step": 3709 }, { "epoch": 0.20775002799865608, "grad_norm": 1.42682945728302, "learning_rate": 1.8535e-05, "loss": 0.5651, "step": 3710 }, { "epoch": 0.20780602531078507, "grad_norm": 1.4732400178909302, "learning_rate": 1.8540000000000002e-05, "loss": 0.5905, "step": 3711 }, { "epoch": 0.2078620226229141, "grad_norm": 1.1726034879684448, "learning_rate": 1.8545000000000003e-05, "loss": 0.5278, "step": 3712 }, { "epoch": 0.2079180199350431, "grad_norm": 1.4470715522766113, "learning_rate": 1.855e-05, "loss": 0.4314, "step": 3713 }, { "epoch": 0.20797401724717213, "grad_norm": 2.5902199745178223, "learning_rate": 1.8555e-05, "loss": 0.3967, "step": 3714 }, { "epoch": 0.20803001455930115, "grad_norm": 1.2021496295928955, "learning_rate": 1.856e-05, "loss": 0.3446, "step": 3715 }, { "epoch": 0.20808601187143017, "grad_norm": 1.0416159629821777, "learning_rate": 1.8565000000000003e-05, "loss": 0.3733, "step": 3716 }, { "epoch": 0.20814200918355918, "grad_norm": 1.1497995853424072, "learning_rate": 1.857e-05, "loss": 0.4278, "step": 3717 }, { "epoch": 0.2081980064956882, "grad_norm": 1.2033779621124268, "learning_rate": 1.8575e-05, "loss": 0.4386, "step": 3718 }, { "epoch": 0.20825400380781722, "grad_norm": 1.0341720581054688, "learning_rate": 1.858e-05, "loss": 0.4024, "step": 3719 }, { "epoch": 0.20831000111994624, "grad_norm": 1.464414358139038, "learning_rate": 1.8585e-05, "loss": 0.3869, "step": 3720 }, { "epoch": 0.20836599843207526, "grad_norm": 1.1235709190368652, "learning_rate": 1.8590000000000003e-05, "loss": 0.5638, "step": 3721 }, { "epoch": 0.20842199574420428, "grad_norm": 1.870194911956787, "learning_rate": 1.8595e-05, "loss": 0.4259, "step": 3722 }, { "epoch": 0.2084779930563333, "grad_norm": 1.689988613128662, "learning_rate": 1.86e-05, "loss": 0.5645, "step": 3723 }, { "epoch": 0.20853399036846232, "grad_norm": 1.531330943107605, "learning_rate": 1.8605e-05, "loss": 0.3632, "step": 3724 }, { "epoch": 0.20858998768059134, "grad_norm": 1.2564024925231934, "learning_rate": 1.861e-05, "loss": 0.3609, "step": 3725 }, { "epoch": 0.20864598499272036, "grad_norm": 1.226952314376831, "learning_rate": 1.8615e-05, "loss": 0.4338, "step": 3726 }, { "epoch": 0.20870198230484938, "grad_norm": 1.4002666473388672, "learning_rate": 1.862e-05, "loss": 0.4459, "step": 3727 }, { "epoch": 0.2087579796169784, "grad_norm": 1.1037551164627075, "learning_rate": 1.8625000000000002e-05, "loss": 0.4196, "step": 3728 }, { "epoch": 0.20881397692910741, "grad_norm": 1.1568371057510376, "learning_rate": 1.863e-05, "loss": 0.6159, "step": 3729 }, { "epoch": 0.20886997424123643, "grad_norm": 1.2428112030029297, "learning_rate": 1.8635e-05, "loss": 0.5167, "step": 3730 }, { "epoch": 0.20892597155336542, "grad_norm": 1.3571763038635254, "learning_rate": 1.864e-05, "loss": 0.4118, "step": 3731 }, { "epoch": 0.20898196886549444, "grad_norm": 1.3883105516433716, "learning_rate": 1.8645000000000002e-05, "loss": 0.4858, "step": 3732 }, { "epoch": 0.20903796617762346, "grad_norm": 1.0853148698806763, "learning_rate": 1.865e-05, "loss": 0.4217, "step": 3733 }, { "epoch": 0.20909396348975248, "grad_norm": 1.1567766666412354, "learning_rate": 1.8655e-05, "loss": 0.4391, "step": 3734 }, { "epoch": 0.2091499608018815, "grad_norm": 1.1084656715393066, "learning_rate": 1.866e-05, "loss": 0.4222, "step": 3735 }, { "epoch": 0.20920595811401052, "grad_norm": 1.4658235311508179, "learning_rate": 1.8665000000000002e-05, "loss": 0.4633, "step": 3736 }, { "epoch": 0.20926195542613954, "grad_norm": 1.1171648502349854, "learning_rate": 1.8670000000000003e-05, "loss": 0.4406, "step": 3737 }, { "epoch": 0.20931795273826856, "grad_norm": 1.4457217454910278, "learning_rate": 1.8675e-05, "loss": 0.4031, "step": 3738 }, { "epoch": 0.20937395005039758, "grad_norm": 1.199464201927185, "learning_rate": 1.868e-05, "loss": 0.4826, "step": 3739 }, { "epoch": 0.2094299473625266, "grad_norm": 1.1101138591766357, "learning_rate": 1.8684999999999998e-05, "loss": 0.4125, "step": 3740 }, { "epoch": 0.20948594467465562, "grad_norm": 1.6544722318649292, "learning_rate": 1.8690000000000002e-05, "loss": 0.4482, "step": 3741 }, { "epoch": 0.20954194198678464, "grad_norm": 1.0128848552703857, "learning_rate": 1.8695e-05, "loss": 0.3927, "step": 3742 }, { "epoch": 0.20959793929891365, "grad_norm": 1.020386815071106, "learning_rate": 1.87e-05, "loss": 0.317, "step": 3743 }, { "epoch": 0.20965393661104267, "grad_norm": 1.2744553089141846, "learning_rate": 1.8705e-05, "loss": 0.4031, "step": 3744 }, { "epoch": 0.2097099339231717, "grad_norm": 1.2086420059204102, "learning_rate": 1.871e-05, "loss": 0.4046, "step": 3745 }, { "epoch": 0.2097659312353007, "grad_norm": 1.2720385789871216, "learning_rate": 1.8715000000000003e-05, "loss": 0.4306, "step": 3746 }, { "epoch": 0.20982192854742973, "grad_norm": 1.245012640953064, "learning_rate": 1.872e-05, "loss": 0.4181, "step": 3747 }, { "epoch": 0.20987792585955875, "grad_norm": 1.219249963760376, "learning_rate": 1.8725e-05, "loss": 0.4987, "step": 3748 }, { "epoch": 0.20993392317168777, "grad_norm": 1.035294532775879, "learning_rate": 1.8730000000000002e-05, "loss": 0.3916, "step": 3749 }, { "epoch": 0.2099899204838168, "grad_norm": 1.2753604650497437, "learning_rate": 1.8735e-05, "loss": 0.4066, "step": 3750 }, { "epoch": 0.2100459177959458, "grad_norm": 2.5616674423217773, "learning_rate": 1.8740000000000004e-05, "loss": 0.5153, "step": 3751 }, { "epoch": 0.2101019151080748, "grad_norm": 1.2599841356277466, "learning_rate": 1.8745e-05, "loss": 0.4677, "step": 3752 }, { "epoch": 0.21015791242020382, "grad_norm": 1.5064753293991089, "learning_rate": 1.8750000000000002e-05, "loss": 0.559, "step": 3753 }, { "epoch": 0.21021390973233284, "grad_norm": 1.3997769355773926, "learning_rate": 1.8755e-05, "loss": 0.4659, "step": 3754 }, { "epoch": 0.21026990704446186, "grad_norm": 1.1182029247283936, "learning_rate": 1.876e-05, "loss": 0.2785, "step": 3755 }, { "epoch": 0.21032590435659088, "grad_norm": 1.0288310050964355, "learning_rate": 1.8765e-05, "loss": 0.3188, "step": 3756 }, { "epoch": 0.2103819016687199, "grad_norm": 1.3543319702148438, "learning_rate": 1.877e-05, "loss": 0.4312, "step": 3757 }, { "epoch": 0.21043789898084891, "grad_norm": 1.339989423751831, "learning_rate": 1.8775000000000002e-05, "loss": 0.5552, "step": 3758 }, { "epoch": 0.21049389629297793, "grad_norm": 1.3205184936523438, "learning_rate": 1.878e-05, "loss": 0.6186, "step": 3759 }, { "epoch": 0.21054989360510695, "grad_norm": 1.0515899658203125, "learning_rate": 1.8785e-05, "loss": 0.2768, "step": 3760 }, { "epoch": 0.21060589091723597, "grad_norm": 1.117658257484436, "learning_rate": 1.879e-05, "loss": 0.5759, "step": 3761 }, { "epoch": 0.210661888229365, "grad_norm": 1.1721341609954834, "learning_rate": 1.8795000000000002e-05, "loss": 0.369, "step": 3762 }, { "epoch": 0.210717885541494, "grad_norm": 1.2887459993362427, "learning_rate": 1.88e-05, "loss": 0.4338, "step": 3763 }, { "epoch": 0.21077388285362303, "grad_norm": 1.096023440361023, "learning_rate": 1.8805e-05, "loss": 0.3805, "step": 3764 }, { "epoch": 0.21082988016575205, "grad_norm": 1.596562147140503, "learning_rate": 1.881e-05, "loss": 0.4453, "step": 3765 }, { "epoch": 0.21088587747788107, "grad_norm": 1.3703455924987793, "learning_rate": 1.8815000000000002e-05, "loss": 0.4875, "step": 3766 }, { "epoch": 0.2109418747900101, "grad_norm": 1.202178716659546, "learning_rate": 1.8820000000000003e-05, "loss": 0.4183, "step": 3767 }, { "epoch": 0.2109978721021391, "grad_norm": 1.2643823623657227, "learning_rate": 1.8825e-05, "loss": 0.3607, "step": 3768 }, { "epoch": 0.21105386941426812, "grad_norm": 1.2469091415405273, "learning_rate": 1.883e-05, "loss": 0.5585, "step": 3769 }, { "epoch": 0.21110986672639714, "grad_norm": 1.277901530265808, "learning_rate": 1.8835e-05, "loss": 0.4921, "step": 3770 }, { "epoch": 0.21116586403852616, "grad_norm": 1.6774253845214844, "learning_rate": 1.8840000000000003e-05, "loss": 0.7642, "step": 3771 }, { "epoch": 0.21122186135065518, "grad_norm": 1.413717269897461, "learning_rate": 1.8845e-05, "loss": 0.4913, "step": 3772 }, { "epoch": 0.21127785866278417, "grad_norm": 1.151466965675354, "learning_rate": 1.885e-05, "loss": 0.3912, "step": 3773 }, { "epoch": 0.2113338559749132, "grad_norm": 1.195562720298767, "learning_rate": 1.8855e-05, "loss": 0.3835, "step": 3774 }, { "epoch": 0.2113898532870422, "grad_norm": 1.1106895208358765, "learning_rate": 1.886e-05, "loss": 0.4649, "step": 3775 }, { "epoch": 0.21144585059917123, "grad_norm": 1.3207541704177856, "learning_rate": 1.8865000000000003e-05, "loss": 0.5763, "step": 3776 }, { "epoch": 0.21150184791130025, "grad_norm": 1.8818163871765137, "learning_rate": 1.887e-05, "loss": 0.3591, "step": 3777 }, { "epoch": 0.21155784522342927, "grad_norm": 1.256289005279541, "learning_rate": 1.8875e-05, "loss": 0.4754, "step": 3778 }, { "epoch": 0.2116138425355583, "grad_norm": 1.5186047554016113, "learning_rate": 1.888e-05, "loss": 0.6175, "step": 3779 }, { "epoch": 0.2116698398476873, "grad_norm": 1.3236587047576904, "learning_rate": 1.8885e-05, "loss": 0.4407, "step": 3780 }, { "epoch": 0.21172583715981633, "grad_norm": 1.3085664510726929, "learning_rate": 1.8890000000000004e-05, "loss": 0.4197, "step": 3781 }, { "epoch": 0.21178183447194535, "grad_norm": 1.1369835138320923, "learning_rate": 1.8895e-05, "loss": 0.3776, "step": 3782 }, { "epoch": 0.21183783178407437, "grad_norm": 1.1113334894180298, "learning_rate": 1.8900000000000002e-05, "loss": 0.3816, "step": 3783 }, { "epoch": 0.21189382909620338, "grad_norm": 1.2961411476135254, "learning_rate": 1.8905e-05, "loss": 0.4108, "step": 3784 }, { "epoch": 0.2119498264083324, "grad_norm": 1.045507550239563, "learning_rate": 1.891e-05, "loss": 0.3978, "step": 3785 }, { "epoch": 0.21200582372046142, "grad_norm": 1.2163691520690918, "learning_rate": 1.8915e-05, "loss": 0.4174, "step": 3786 }, { "epoch": 0.21206182103259044, "grad_norm": 1.1749398708343506, "learning_rate": 1.8920000000000002e-05, "loss": 0.5375, "step": 3787 }, { "epoch": 0.21211781834471946, "grad_norm": 1.2830992937088013, "learning_rate": 1.8925000000000003e-05, "loss": 0.4793, "step": 3788 }, { "epoch": 0.21217381565684848, "grad_norm": 1.1235780715942383, "learning_rate": 1.893e-05, "loss": 0.4108, "step": 3789 }, { "epoch": 0.2122298129689775, "grad_norm": 1.4002350568771362, "learning_rate": 1.8935e-05, "loss": 0.6034, "step": 3790 }, { "epoch": 0.21228581028110652, "grad_norm": 1.2956151962280273, "learning_rate": 1.894e-05, "loss": 0.5659, "step": 3791 }, { "epoch": 0.21234180759323554, "grad_norm": 1.1184310913085938, "learning_rate": 1.8945000000000002e-05, "loss": 0.3904, "step": 3792 }, { "epoch": 0.21239780490536453, "grad_norm": 1.0486329793930054, "learning_rate": 1.895e-05, "loss": 0.3441, "step": 3793 }, { "epoch": 0.21245380221749355, "grad_norm": 1.1602412462234497, "learning_rate": 1.8955e-05, "loss": 0.4124, "step": 3794 }, { "epoch": 0.21250979952962257, "grad_norm": 1.1501165628433228, "learning_rate": 1.896e-05, "loss": 0.4024, "step": 3795 }, { "epoch": 0.2125657968417516, "grad_norm": 1.0928531885147095, "learning_rate": 1.8965000000000002e-05, "loss": 0.3906, "step": 3796 }, { "epoch": 0.2126217941538806, "grad_norm": 1.1243789196014404, "learning_rate": 1.8970000000000003e-05, "loss": 0.3966, "step": 3797 }, { "epoch": 0.21267779146600962, "grad_norm": 1.264003872871399, "learning_rate": 1.8975e-05, "loss": 0.5284, "step": 3798 }, { "epoch": 0.21273378877813864, "grad_norm": 1.400215983390808, "learning_rate": 1.898e-05, "loss": 0.4005, "step": 3799 }, { "epoch": 0.21278978609026766, "grad_norm": 1.1315284967422485, "learning_rate": 1.8985e-05, "loss": 0.3573, "step": 3800 }, { "epoch": 0.21284578340239668, "grad_norm": 1.10823392868042, "learning_rate": 1.8990000000000003e-05, "loss": 0.4071, "step": 3801 }, { "epoch": 0.2129017807145257, "grad_norm": 1.2107127904891968, "learning_rate": 1.8995e-05, "loss": 0.4024, "step": 3802 }, { "epoch": 0.21295777802665472, "grad_norm": 1.29421067237854, "learning_rate": 1.9e-05, "loss": 0.4936, "step": 3803 }, { "epoch": 0.21301377533878374, "grad_norm": 2.305823802947998, "learning_rate": 1.9005000000000002e-05, "loss": 0.326, "step": 3804 }, { "epoch": 0.21306977265091276, "grad_norm": 1.4411567449569702, "learning_rate": 1.901e-05, "loss": 0.5825, "step": 3805 }, { "epoch": 0.21312576996304178, "grad_norm": 0.961778461933136, "learning_rate": 1.9015000000000003e-05, "loss": 0.3066, "step": 3806 }, { "epoch": 0.2131817672751708, "grad_norm": 1.2145847082138062, "learning_rate": 1.902e-05, "loss": 0.4319, "step": 3807 }, { "epoch": 0.21323776458729982, "grad_norm": 1.2316093444824219, "learning_rate": 1.9025e-05, "loss": 0.3642, "step": 3808 }, { "epoch": 0.21329376189942884, "grad_norm": 1.1941778659820557, "learning_rate": 1.903e-05, "loss": 0.4714, "step": 3809 }, { "epoch": 0.21334975921155785, "grad_norm": 1.402944564819336, "learning_rate": 1.9035e-05, "loss": 0.4408, "step": 3810 }, { "epoch": 0.21340575652368687, "grad_norm": 1.1802160739898682, "learning_rate": 1.904e-05, "loss": 0.3552, "step": 3811 }, { "epoch": 0.2134617538358159, "grad_norm": 1.1805145740509033, "learning_rate": 1.9045e-05, "loss": 0.4566, "step": 3812 }, { "epoch": 0.2135177511479449, "grad_norm": 1.1233844757080078, "learning_rate": 1.9050000000000002e-05, "loss": 0.3404, "step": 3813 }, { "epoch": 0.2135737484600739, "grad_norm": 1.4245500564575195, "learning_rate": 1.9055e-05, "loss": 0.4563, "step": 3814 }, { "epoch": 0.21362974577220292, "grad_norm": 1.065510869026184, "learning_rate": 1.906e-05, "loss": 0.3891, "step": 3815 }, { "epoch": 0.21368574308433194, "grad_norm": 1.2466912269592285, "learning_rate": 1.9064999999999998e-05, "loss": 0.4299, "step": 3816 }, { "epoch": 0.21374174039646096, "grad_norm": 1.3753843307495117, "learning_rate": 1.9070000000000002e-05, "loss": 0.5251, "step": 3817 }, { "epoch": 0.21379773770858998, "grad_norm": 1.0985254049301147, "learning_rate": 1.9075000000000003e-05, "loss": 0.4184, "step": 3818 }, { "epoch": 0.213853735020719, "grad_norm": 1.2996129989624023, "learning_rate": 1.908e-05, "loss": 0.3592, "step": 3819 }, { "epoch": 0.21390973233284802, "grad_norm": 1.3616174459457397, "learning_rate": 1.9085e-05, "loss": 0.4883, "step": 3820 }, { "epoch": 0.21396572964497704, "grad_norm": 1.2674696445465088, "learning_rate": 1.909e-05, "loss": 0.5054, "step": 3821 }, { "epoch": 0.21402172695710606, "grad_norm": 1.243328332901001, "learning_rate": 1.9095000000000003e-05, "loss": 0.3761, "step": 3822 }, { "epoch": 0.21407772426923508, "grad_norm": 1.2152774333953857, "learning_rate": 1.91e-05, "loss": 0.4544, "step": 3823 }, { "epoch": 0.2141337215813641, "grad_norm": 1.3071707487106323, "learning_rate": 1.9105e-05, "loss": 0.4506, "step": 3824 }, { "epoch": 0.21418971889349311, "grad_norm": 1.2705812454223633, "learning_rate": 1.911e-05, "loss": 0.4259, "step": 3825 }, { "epoch": 0.21424571620562213, "grad_norm": 1.104065179824829, "learning_rate": 1.9115e-05, "loss": 0.4035, "step": 3826 }, { "epoch": 0.21430171351775115, "grad_norm": 1.5568335056304932, "learning_rate": 1.9120000000000003e-05, "loss": 0.3943, "step": 3827 }, { "epoch": 0.21435771082988017, "grad_norm": 1.2089629173278809, "learning_rate": 1.9125e-05, "loss": 0.4169, "step": 3828 }, { "epoch": 0.2144137081420092, "grad_norm": 1.3171229362487793, "learning_rate": 1.913e-05, "loss": 0.4887, "step": 3829 }, { "epoch": 0.2144697054541382, "grad_norm": 1.2432467937469482, "learning_rate": 1.9135e-05, "loss": 0.3612, "step": 3830 }, { "epoch": 0.21452570276626723, "grad_norm": 1.291901707649231, "learning_rate": 1.914e-05, "loss": 0.4334, "step": 3831 }, { "epoch": 0.21458170007839625, "grad_norm": 1.3384559154510498, "learning_rate": 1.9145e-05, "loss": 0.648, "step": 3832 }, { "epoch": 0.21463769739052527, "grad_norm": 1.176081657409668, "learning_rate": 1.915e-05, "loss": 0.4228, "step": 3833 }, { "epoch": 0.2146936947026543, "grad_norm": 1.0742981433868408, "learning_rate": 1.9155000000000002e-05, "loss": 0.386, "step": 3834 }, { "epoch": 0.21474969201478328, "grad_norm": 1.1499167680740356, "learning_rate": 1.916e-05, "loss": 0.457, "step": 3835 }, { "epoch": 0.2148056893269123, "grad_norm": 1.1521497964859009, "learning_rate": 1.9165e-05, "loss": 0.4305, "step": 3836 }, { "epoch": 0.21486168663904132, "grad_norm": 1.220842957496643, "learning_rate": 1.917e-05, "loss": 0.4883, "step": 3837 }, { "epoch": 0.21491768395117034, "grad_norm": 1.277765154838562, "learning_rate": 1.9175000000000002e-05, "loss": 0.5959, "step": 3838 }, { "epoch": 0.21497368126329935, "grad_norm": 1.2280707359313965, "learning_rate": 1.918e-05, "loss": 0.3969, "step": 3839 }, { "epoch": 0.21502967857542837, "grad_norm": 1.1846415996551514, "learning_rate": 1.9185e-05, "loss": 0.4591, "step": 3840 }, { "epoch": 0.2150856758875574, "grad_norm": 1.2512959241867065, "learning_rate": 1.919e-05, "loss": 0.4162, "step": 3841 }, { "epoch": 0.2151416731996864, "grad_norm": 1.3382731676101685, "learning_rate": 1.9195000000000002e-05, "loss": 0.4439, "step": 3842 }, { "epoch": 0.21519767051181543, "grad_norm": 1.1869760751724243, "learning_rate": 1.9200000000000003e-05, "loss": 0.3983, "step": 3843 }, { "epoch": 0.21525366782394445, "grad_norm": 1.209533929824829, "learning_rate": 1.9205e-05, "loss": 0.4118, "step": 3844 }, { "epoch": 0.21530966513607347, "grad_norm": 1.108127474784851, "learning_rate": 1.921e-05, "loss": 0.4376, "step": 3845 }, { "epoch": 0.2153656624482025, "grad_norm": 1.2668966054916382, "learning_rate": 1.9214999999999998e-05, "loss": 0.4207, "step": 3846 }, { "epoch": 0.2154216597603315, "grad_norm": 1.006713628768921, "learning_rate": 1.9220000000000002e-05, "loss": 0.4193, "step": 3847 }, { "epoch": 0.21547765707246053, "grad_norm": 1.2234450578689575, "learning_rate": 1.9225e-05, "loss": 0.4321, "step": 3848 }, { "epoch": 0.21553365438458955, "grad_norm": 1.158087134361267, "learning_rate": 1.923e-05, "loss": 0.4531, "step": 3849 }, { "epoch": 0.21558965169671856, "grad_norm": 1.0506086349487305, "learning_rate": 1.9235e-05, "loss": 0.3995, "step": 3850 }, { "epoch": 0.21564564900884758, "grad_norm": 1.504860520362854, "learning_rate": 1.924e-05, "loss": 0.4866, "step": 3851 }, { "epoch": 0.2157016463209766, "grad_norm": 1.173839807510376, "learning_rate": 1.9245000000000003e-05, "loss": 0.3839, "step": 3852 }, { "epoch": 0.21575764363310562, "grad_norm": 1.140916347503662, "learning_rate": 1.925e-05, "loss": 0.4344, "step": 3853 }, { "epoch": 0.21581364094523464, "grad_norm": 1.2121093273162842, "learning_rate": 1.9255e-05, "loss": 0.4094, "step": 3854 }, { "epoch": 0.21586963825736363, "grad_norm": 0.9509993195533752, "learning_rate": 1.9260000000000002e-05, "loss": 0.4051, "step": 3855 }, { "epoch": 0.21592563556949265, "grad_norm": 1.1066420078277588, "learning_rate": 1.9265e-05, "loss": 0.5323, "step": 3856 }, { "epoch": 0.21598163288162167, "grad_norm": 1.3245099782943726, "learning_rate": 1.9270000000000004e-05, "loss": 0.3931, "step": 3857 }, { "epoch": 0.2160376301937507, "grad_norm": 1.0722681283950806, "learning_rate": 1.9275e-05, "loss": 0.3758, "step": 3858 }, { "epoch": 0.2160936275058797, "grad_norm": 1.0039154291152954, "learning_rate": 1.9280000000000002e-05, "loss": 0.294, "step": 3859 }, { "epoch": 0.21614962481800873, "grad_norm": 1.2737786769866943, "learning_rate": 1.9285e-05, "loss": 0.3626, "step": 3860 }, { "epoch": 0.21620562213013775, "grad_norm": 1.5499736070632935, "learning_rate": 1.929e-05, "loss": 0.4792, "step": 3861 }, { "epoch": 0.21626161944226677, "grad_norm": 1.185977578163147, "learning_rate": 1.9295e-05, "loss": 0.3455, "step": 3862 }, { "epoch": 0.2163176167543958, "grad_norm": 1.3004982471466064, "learning_rate": 1.93e-05, "loss": 0.4465, "step": 3863 }, { "epoch": 0.2163736140665248, "grad_norm": 1.3049736022949219, "learning_rate": 1.9305000000000002e-05, "loss": 0.4994, "step": 3864 }, { "epoch": 0.21642961137865382, "grad_norm": 1.0772696733474731, "learning_rate": 1.931e-05, "loss": 0.3288, "step": 3865 }, { "epoch": 0.21648560869078284, "grad_norm": 1.2598570585250854, "learning_rate": 1.9315e-05, "loss": 0.4872, "step": 3866 }, { "epoch": 0.21654160600291186, "grad_norm": 1.141111135482788, "learning_rate": 1.932e-05, "loss": 0.3861, "step": 3867 }, { "epoch": 0.21659760331504088, "grad_norm": 1.1527847051620483, "learning_rate": 1.9325000000000002e-05, "loss": 0.3721, "step": 3868 }, { "epoch": 0.2166536006271699, "grad_norm": 1.2805637121200562, "learning_rate": 1.933e-05, "loss": 0.4316, "step": 3869 }, { "epoch": 0.21670959793929892, "grad_norm": 1.1219383478164673, "learning_rate": 1.9335e-05, "loss": 0.3921, "step": 3870 }, { "epoch": 0.21676559525142794, "grad_norm": 1.264259696006775, "learning_rate": 1.934e-05, "loss": 0.518, "step": 3871 }, { "epoch": 0.21682159256355696, "grad_norm": 1.3242542743682861, "learning_rate": 1.9345000000000002e-05, "loss": 0.7451, "step": 3872 }, { "epoch": 0.21687758987568598, "grad_norm": 1.1558231115341187, "learning_rate": 1.9350000000000003e-05, "loss": 0.3191, "step": 3873 }, { "epoch": 0.216933587187815, "grad_norm": 1.0650345087051392, "learning_rate": 1.9355e-05, "loss": 0.3282, "step": 3874 }, { "epoch": 0.21698958449994402, "grad_norm": 1.3427801132202148, "learning_rate": 1.936e-05, "loss": 0.4338, "step": 3875 }, { "epoch": 0.217045581812073, "grad_norm": 1.2418391704559326, "learning_rate": 1.9365e-05, "loss": 0.3861, "step": 3876 }, { "epoch": 0.21710157912420203, "grad_norm": 1.1236191987991333, "learning_rate": 1.9370000000000003e-05, "loss": 0.3656, "step": 3877 }, { "epoch": 0.21715757643633105, "grad_norm": 1.2333683967590332, "learning_rate": 1.9375e-05, "loss": 0.455, "step": 3878 }, { "epoch": 0.21721357374846006, "grad_norm": 1.2741773128509521, "learning_rate": 1.938e-05, "loss": 0.5378, "step": 3879 }, { "epoch": 0.21726957106058908, "grad_norm": 1.2516076564788818, "learning_rate": 1.9385e-05, "loss": 0.5302, "step": 3880 }, { "epoch": 0.2173255683727181, "grad_norm": 1.4284601211547852, "learning_rate": 1.939e-05, "loss": 0.5134, "step": 3881 }, { "epoch": 0.21738156568484712, "grad_norm": 1.147981882095337, "learning_rate": 1.9395000000000003e-05, "loss": 0.4111, "step": 3882 }, { "epoch": 0.21743756299697614, "grad_norm": 1.3357466459274292, "learning_rate": 1.94e-05, "loss": 0.4595, "step": 3883 }, { "epoch": 0.21749356030910516, "grad_norm": 1.2503118515014648, "learning_rate": 1.9405e-05, "loss": 0.4039, "step": 3884 }, { "epoch": 0.21754955762123418, "grad_norm": 1.1886709928512573, "learning_rate": 1.941e-05, "loss": 0.5563, "step": 3885 }, { "epoch": 0.2176055549333632, "grad_norm": 1.2082996368408203, "learning_rate": 1.9415e-05, "loss": 0.386, "step": 3886 }, { "epoch": 0.21766155224549222, "grad_norm": 1.1673897504806519, "learning_rate": 1.942e-05, "loss": 0.4305, "step": 3887 }, { "epoch": 0.21771754955762124, "grad_norm": 1.125820279121399, "learning_rate": 1.9425e-05, "loss": 0.3498, "step": 3888 }, { "epoch": 0.21777354686975026, "grad_norm": 1.2262734174728394, "learning_rate": 1.9430000000000002e-05, "loss": 0.4673, "step": 3889 }, { "epoch": 0.21782954418187928, "grad_norm": 1.3357839584350586, "learning_rate": 1.9435e-05, "loss": 0.3753, "step": 3890 }, { "epoch": 0.2178855414940083, "grad_norm": 1.0969672203063965, "learning_rate": 1.944e-05, "loss": 0.3779, "step": 3891 }, { "epoch": 0.2179415388061373, "grad_norm": 1.1183667182922363, "learning_rate": 1.9445e-05, "loss": 0.5111, "step": 3892 }, { "epoch": 0.21799753611826633, "grad_norm": 1.522701621055603, "learning_rate": 1.9450000000000002e-05, "loss": 0.5003, "step": 3893 }, { "epoch": 0.21805353343039535, "grad_norm": 1.3292375802993774, "learning_rate": 1.9455000000000003e-05, "loss": 0.4663, "step": 3894 }, { "epoch": 0.21810953074252437, "grad_norm": 1.110558032989502, "learning_rate": 1.946e-05, "loss": 0.4689, "step": 3895 }, { "epoch": 0.2181655280546534, "grad_norm": 1.293588638305664, "learning_rate": 1.9465e-05, "loss": 0.3785, "step": 3896 }, { "epoch": 0.21822152536678238, "grad_norm": 1.0624083280563354, "learning_rate": 1.947e-05, "loss": 0.3824, "step": 3897 }, { "epoch": 0.2182775226789114, "grad_norm": 1.5652928352355957, "learning_rate": 1.9475000000000002e-05, "loss": 0.491, "step": 3898 }, { "epoch": 0.21833351999104042, "grad_norm": 1.1526103019714355, "learning_rate": 1.948e-05, "loss": 0.4683, "step": 3899 }, { "epoch": 0.21838951730316944, "grad_norm": 0.9201048612594604, "learning_rate": 1.9485e-05, "loss": 0.2474, "step": 3900 }, { "epoch": 0.21844551461529846, "grad_norm": 1.3379740715026855, "learning_rate": 1.949e-05, "loss": 0.4053, "step": 3901 }, { "epoch": 0.21850151192742748, "grad_norm": 1.0218881368637085, "learning_rate": 1.9495000000000002e-05, "loss": 0.3582, "step": 3902 }, { "epoch": 0.2185575092395565, "grad_norm": 1.1562334299087524, "learning_rate": 1.9500000000000003e-05, "loss": 0.6028, "step": 3903 }, { "epoch": 0.21861350655168552, "grad_norm": 1.180961012840271, "learning_rate": 1.9505e-05, "loss": 0.5034, "step": 3904 }, { "epoch": 0.21866950386381453, "grad_norm": 1.1061652898788452, "learning_rate": 1.951e-05, "loss": 0.3482, "step": 3905 }, { "epoch": 0.21872550117594355, "grad_norm": 1.178895115852356, "learning_rate": 1.9515e-05, "loss": 0.4866, "step": 3906 }, { "epoch": 0.21878149848807257, "grad_norm": 1.2782617807388306, "learning_rate": 1.9520000000000003e-05, "loss": 0.4267, "step": 3907 }, { "epoch": 0.2188374958002016, "grad_norm": 1.481217622756958, "learning_rate": 1.9525e-05, "loss": 0.4508, "step": 3908 }, { "epoch": 0.2188934931123306, "grad_norm": 1.2187516689300537, "learning_rate": 1.953e-05, "loss": 0.5677, "step": 3909 }, { "epoch": 0.21894949042445963, "grad_norm": 1.1630263328552246, "learning_rate": 1.9535000000000002e-05, "loss": 0.4421, "step": 3910 }, { "epoch": 0.21900548773658865, "grad_norm": 1.4248945713043213, "learning_rate": 1.954e-05, "loss": 0.4959, "step": 3911 }, { "epoch": 0.21906148504871767, "grad_norm": 1.3881953954696655, "learning_rate": 1.9545000000000003e-05, "loss": 0.5092, "step": 3912 }, { "epoch": 0.2191174823608467, "grad_norm": 1.2719542980194092, "learning_rate": 1.955e-05, "loss": 0.5339, "step": 3913 }, { "epoch": 0.2191734796729757, "grad_norm": 1.3698889017105103, "learning_rate": 1.9555e-05, "loss": 0.452, "step": 3914 }, { "epoch": 0.21922947698510473, "grad_norm": 1.281928300857544, "learning_rate": 1.956e-05, "loss": 0.4297, "step": 3915 }, { "epoch": 0.21928547429723375, "grad_norm": 1.2747917175292969, "learning_rate": 1.9565e-05, "loss": 0.4263, "step": 3916 }, { "epoch": 0.21934147160936274, "grad_norm": 1.3958020210266113, "learning_rate": 1.957e-05, "loss": 0.4027, "step": 3917 }, { "epoch": 0.21939746892149176, "grad_norm": 1.191336989402771, "learning_rate": 1.9575e-05, "loss": 0.4204, "step": 3918 }, { "epoch": 0.21945346623362078, "grad_norm": 1.225386142730713, "learning_rate": 1.9580000000000002e-05, "loss": 0.4535, "step": 3919 }, { "epoch": 0.2195094635457498, "grad_norm": 1.052686095237732, "learning_rate": 1.9585e-05, "loss": 0.3126, "step": 3920 }, { "epoch": 0.2195654608578788, "grad_norm": 1.013994574546814, "learning_rate": 1.959e-05, "loss": 0.4611, "step": 3921 }, { "epoch": 0.21962145817000783, "grad_norm": 1.1470279693603516, "learning_rate": 1.9595e-05, "loss": 0.3578, "step": 3922 }, { "epoch": 0.21967745548213685, "grad_norm": 1.25358247756958, "learning_rate": 1.9600000000000002e-05, "loss": 0.3754, "step": 3923 }, { "epoch": 0.21973345279426587, "grad_norm": 1.0770961046218872, "learning_rate": 1.9605e-05, "loss": 0.4533, "step": 3924 }, { "epoch": 0.2197894501063949, "grad_norm": 1.4587650299072266, "learning_rate": 1.961e-05, "loss": 0.465, "step": 3925 }, { "epoch": 0.2198454474185239, "grad_norm": 1.3744086027145386, "learning_rate": 1.9615e-05, "loss": 0.5234, "step": 3926 }, { "epoch": 0.21990144473065293, "grad_norm": 5.708791732788086, "learning_rate": 1.9620000000000002e-05, "loss": 0.3558, "step": 3927 }, { "epoch": 0.21995744204278195, "grad_norm": 1.502600908279419, "learning_rate": 1.9625000000000003e-05, "loss": 0.6503, "step": 3928 }, { "epoch": 0.22001343935491097, "grad_norm": 1.393741488456726, "learning_rate": 1.963e-05, "loss": 0.4353, "step": 3929 }, { "epoch": 0.22006943666703999, "grad_norm": 1.2048612833023071, "learning_rate": 1.9635e-05, "loss": 0.4566, "step": 3930 }, { "epoch": 0.220125433979169, "grad_norm": 1.2519006729125977, "learning_rate": 1.9640000000000002e-05, "loss": 0.6378, "step": 3931 }, { "epoch": 0.22018143129129802, "grad_norm": 1.1331911087036133, "learning_rate": 1.9645000000000002e-05, "loss": 0.3402, "step": 3932 }, { "epoch": 0.22023742860342704, "grad_norm": 1.2613248825073242, "learning_rate": 1.9650000000000003e-05, "loss": 0.4721, "step": 3933 }, { "epoch": 0.22029342591555606, "grad_norm": 1.1872540712356567, "learning_rate": 1.9655e-05, "loss": 0.4616, "step": 3934 }, { "epoch": 0.22034942322768508, "grad_norm": 1.1382859945297241, "learning_rate": 1.966e-05, "loss": 0.3649, "step": 3935 }, { "epoch": 0.2204054205398141, "grad_norm": 1.139190673828125, "learning_rate": 1.9665e-05, "loss": 0.4429, "step": 3936 }, { "epoch": 0.22046141785194312, "grad_norm": 1.1289645433425903, "learning_rate": 1.9670000000000003e-05, "loss": 0.4831, "step": 3937 }, { "epoch": 0.2205174151640721, "grad_norm": 1.1161208152770996, "learning_rate": 1.9675e-05, "loss": 0.516, "step": 3938 }, { "epoch": 0.22057341247620113, "grad_norm": 1.1716969013214111, "learning_rate": 1.968e-05, "loss": 0.3657, "step": 3939 }, { "epoch": 0.22062940978833015, "grad_norm": 1.2149205207824707, "learning_rate": 1.9685000000000002e-05, "loss": 0.5391, "step": 3940 }, { "epoch": 0.22068540710045917, "grad_norm": 1.528893232345581, "learning_rate": 1.969e-05, "loss": 0.4852, "step": 3941 }, { "epoch": 0.2207414044125882, "grad_norm": 1.6157225370407104, "learning_rate": 1.9695e-05, "loss": 0.4268, "step": 3942 }, { "epoch": 0.2207974017247172, "grad_norm": 1.2216227054595947, "learning_rate": 1.97e-05, "loss": 0.4365, "step": 3943 }, { "epoch": 0.22085339903684623, "grad_norm": 1.2056119441986084, "learning_rate": 1.9705000000000002e-05, "loss": 0.3892, "step": 3944 }, { "epoch": 0.22090939634897525, "grad_norm": 1.2248709201812744, "learning_rate": 1.971e-05, "loss": 0.4097, "step": 3945 }, { "epoch": 0.22096539366110426, "grad_norm": 1.1583555936813354, "learning_rate": 1.9715e-05, "loss": 0.3983, "step": 3946 }, { "epoch": 0.22102139097323328, "grad_norm": 1.0924626588821411, "learning_rate": 1.972e-05, "loss": 0.3504, "step": 3947 }, { "epoch": 0.2210773882853623, "grad_norm": 1.202951431274414, "learning_rate": 1.9725000000000002e-05, "loss": 0.5088, "step": 3948 }, { "epoch": 0.22113338559749132, "grad_norm": 1.0647072792053223, "learning_rate": 1.9730000000000003e-05, "loss": 0.4107, "step": 3949 }, { "epoch": 0.22118938290962034, "grad_norm": 1.2809765338897705, "learning_rate": 1.9735e-05, "loss": 0.4364, "step": 3950 }, { "epoch": 0.22124538022174936, "grad_norm": 1.095226526260376, "learning_rate": 1.974e-05, "loss": 0.4434, "step": 3951 }, { "epoch": 0.22130137753387838, "grad_norm": 1.2039659023284912, "learning_rate": 1.9744999999999998e-05, "loss": 0.4319, "step": 3952 }, { "epoch": 0.2213573748460074, "grad_norm": 1.1134363412857056, "learning_rate": 1.9750000000000002e-05, "loss": 0.3769, "step": 3953 }, { "epoch": 0.22141337215813642, "grad_norm": 1.2964189052581787, "learning_rate": 1.9755e-05, "loss": 0.4453, "step": 3954 }, { "epoch": 0.22146936947026544, "grad_norm": 1.1339832544326782, "learning_rate": 1.976e-05, "loss": 0.4056, "step": 3955 }, { "epoch": 0.22152536678239446, "grad_norm": 1.169204831123352, "learning_rate": 1.9765e-05, "loss": 0.5727, "step": 3956 }, { "epoch": 0.22158136409452348, "grad_norm": 1.220909833908081, "learning_rate": 1.977e-05, "loss": 0.4262, "step": 3957 }, { "epoch": 0.2216373614066525, "grad_norm": 3.000483512878418, "learning_rate": 1.9775000000000003e-05, "loss": 0.5093, "step": 3958 }, { "epoch": 0.22169335871878149, "grad_norm": 1.0850926637649536, "learning_rate": 1.978e-05, "loss": 0.3535, "step": 3959 }, { "epoch": 0.2217493560309105, "grad_norm": 1.3514493703842163, "learning_rate": 1.9785e-05, "loss": 0.5564, "step": 3960 }, { "epoch": 0.22180535334303952, "grad_norm": 1.3057911396026611, "learning_rate": 1.979e-05, "loss": 0.4899, "step": 3961 }, { "epoch": 0.22186135065516854, "grad_norm": 1.3611317873001099, "learning_rate": 1.9795e-05, "loss": 0.461, "step": 3962 }, { "epoch": 0.22191734796729756, "grad_norm": 0.9021110534667969, "learning_rate": 1.9800000000000004e-05, "loss": 0.3009, "step": 3963 }, { "epoch": 0.22197334527942658, "grad_norm": 1.1194170713424683, "learning_rate": 1.9805e-05, "loss": 0.4004, "step": 3964 }, { "epoch": 0.2220293425915556, "grad_norm": 1.1084462404251099, "learning_rate": 1.9810000000000002e-05, "loss": 0.4546, "step": 3965 }, { "epoch": 0.22208533990368462, "grad_norm": 1.375201940536499, "learning_rate": 1.9815e-05, "loss": 0.4415, "step": 3966 }, { "epoch": 0.22214133721581364, "grad_norm": 1.2064272165298462, "learning_rate": 1.982e-05, "loss": 0.3728, "step": 3967 }, { "epoch": 0.22219733452794266, "grad_norm": 1.4434298276901245, "learning_rate": 1.9825e-05, "loss": 0.3807, "step": 3968 }, { "epoch": 0.22225333184007168, "grad_norm": 1.2069168090820312, "learning_rate": 1.983e-05, "loss": 0.616, "step": 3969 }, { "epoch": 0.2223093291522007, "grad_norm": 1.4005426168441772, "learning_rate": 1.9835000000000002e-05, "loss": 0.4133, "step": 3970 }, { "epoch": 0.22236532646432972, "grad_norm": 1.2770768404006958, "learning_rate": 1.984e-05, "loss": 0.4056, "step": 3971 }, { "epoch": 0.22242132377645873, "grad_norm": 1.7044603824615479, "learning_rate": 1.9845e-05, "loss": 0.5747, "step": 3972 }, { "epoch": 0.22247732108858775, "grad_norm": 1.3524123430252075, "learning_rate": 1.985e-05, "loss": 0.5833, "step": 3973 }, { "epoch": 0.22253331840071677, "grad_norm": 1.2796204090118408, "learning_rate": 1.9855000000000002e-05, "loss": 0.472, "step": 3974 }, { "epoch": 0.2225893157128458, "grad_norm": 1.0586299896240234, "learning_rate": 1.986e-05, "loss": 0.4362, "step": 3975 }, { "epoch": 0.2226453130249748, "grad_norm": 1.1387319564819336, "learning_rate": 1.9865e-05, "loss": 0.5358, "step": 3976 }, { "epoch": 0.22270131033710383, "grad_norm": 1.3744721412658691, "learning_rate": 1.987e-05, "loss": 0.5624, "step": 3977 }, { "epoch": 0.22275730764923285, "grad_norm": 1.2997620105743408, "learning_rate": 1.9875000000000002e-05, "loss": 0.4796, "step": 3978 }, { "epoch": 0.22281330496136184, "grad_norm": 1.1262180805206299, "learning_rate": 1.9880000000000003e-05, "loss": 0.5009, "step": 3979 }, { "epoch": 0.22286930227349086, "grad_norm": 1.2399729490280151, "learning_rate": 1.9885e-05, "loss": 0.4173, "step": 3980 }, { "epoch": 0.22292529958561988, "grad_norm": 1.1392662525177002, "learning_rate": 1.989e-05, "loss": 0.5183, "step": 3981 }, { "epoch": 0.2229812968977489, "grad_norm": 1.3595386743545532, "learning_rate": 1.9895e-05, "loss": 0.4768, "step": 3982 }, { "epoch": 0.22303729420987792, "grad_norm": 1.2113465070724487, "learning_rate": 1.9900000000000003e-05, "loss": 0.4527, "step": 3983 }, { "epoch": 0.22309329152200694, "grad_norm": 1.2641816139221191, "learning_rate": 1.9905e-05, "loss": 0.4014, "step": 3984 }, { "epoch": 0.22314928883413596, "grad_norm": 1.269322156906128, "learning_rate": 1.991e-05, "loss": 0.4136, "step": 3985 }, { "epoch": 0.22320528614626498, "grad_norm": 1.7250635623931885, "learning_rate": 1.9915e-05, "loss": 0.5067, "step": 3986 }, { "epoch": 0.223261283458394, "grad_norm": 1.4223933219909668, "learning_rate": 1.992e-05, "loss": 0.3975, "step": 3987 }, { "epoch": 0.223317280770523, "grad_norm": 1.2370339632034302, "learning_rate": 1.9925000000000003e-05, "loss": 0.4593, "step": 3988 }, { "epoch": 0.22337327808265203, "grad_norm": 1.584632396697998, "learning_rate": 1.993e-05, "loss": 0.4526, "step": 3989 }, { "epoch": 0.22342927539478105, "grad_norm": 1.1744674444198608, "learning_rate": 1.9935e-05, "loss": 0.4607, "step": 3990 }, { "epoch": 0.22348527270691007, "grad_norm": 1.1270041465759277, "learning_rate": 1.994e-05, "loss": 0.3616, "step": 3991 }, { "epoch": 0.2235412700190391, "grad_norm": 1.543769359588623, "learning_rate": 1.9945e-05, "loss": 0.439, "step": 3992 }, { "epoch": 0.2235972673311681, "grad_norm": 1.010480284690857, "learning_rate": 1.995e-05, "loss": 0.3787, "step": 3993 }, { "epoch": 0.22365326464329713, "grad_norm": 1.2416563034057617, "learning_rate": 1.9955e-05, "loss": 0.4614, "step": 3994 }, { "epoch": 0.22370926195542615, "grad_norm": 1.3135097026824951, "learning_rate": 1.9960000000000002e-05, "loss": 0.499, "step": 3995 }, { "epoch": 0.22376525926755517, "grad_norm": 1.3679999113082886, "learning_rate": 1.9965e-05, "loss": 0.3731, "step": 3996 }, { "epoch": 0.22382125657968419, "grad_norm": 1.2174789905548096, "learning_rate": 1.997e-05, "loss": 0.3717, "step": 3997 }, { "epoch": 0.2238772538918132, "grad_norm": 1.1787962913513184, "learning_rate": 1.9975e-05, "loss": 0.4345, "step": 3998 }, { "epoch": 0.22393325120394222, "grad_norm": 1.3835020065307617, "learning_rate": 1.9980000000000002e-05, "loss": 0.4765, "step": 3999 }, { "epoch": 0.22398924851607122, "grad_norm": 1.0391042232513428, "learning_rate": 1.9985000000000003e-05, "loss": 0.4041, "step": 4000 }, { "epoch": 0.22404524582820023, "grad_norm": 1.2916306257247925, "learning_rate": 1.999e-05, "loss": 0.5135, "step": 4001 }, { "epoch": 0.22410124314032925, "grad_norm": 1.3005999326705933, "learning_rate": 1.9995e-05, "loss": 0.3908, "step": 4002 }, { "epoch": 0.22415724045245827, "grad_norm": 1.5253877639770508, "learning_rate": 2e-05, "loss": 0.5192, "step": 4003 }, { "epoch": 0.2242132377645873, "grad_norm": 1.100231647491455, "learning_rate": 2.0005000000000002e-05, "loss": 0.4016, "step": 4004 }, { "epoch": 0.2242692350767163, "grad_norm": 1.447638988494873, "learning_rate": 2.001e-05, "loss": 0.4658, "step": 4005 }, { "epoch": 0.22432523238884533, "grad_norm": 1.2481557130813599, "learning_rate": 2.0015e-05, "loss": 0.4316, "step": 4006 }, { "epoch": 0.22438122970097435, "grad_norm": 1.5969074964523315, "learning_rate": 2.002e-05, "loss": 0.5882, "step": 4007 }, { "epoch": 0.22443722701310337, "grad_norm": 1.2705366611480713, "learning_rate": 2.0025000000000002e-05, "loss": 0.5284, "step": 4008 }, { "epoch": 0.2244932243252324, "grad_norm": 1.133697509765625, "learning_rate": 2.0030000000000003e-05, "loss": 0.4072, "step": 4009 }, { "epoch": 0.2245492216373614, "grad_norm": 0.9591661691665649, "learning_rate": 2.0035e-05, "loss": 0.4083, "step": 4010 }, { "epoch": 0.22460521894949043, "grad_norm": 1.754600167274475, "learning_rate": 2.004e-05, "loss": 0.4694, "step": 4011 }, { "epoch": 0.22466121626161945, "grad_norm": 1.204201340675354, "learning_rate": 2.0045e-05, "loss": 0.4599, "step": 4012 }, { "epoch": 0.22471721357374846, "grad_norm": 1.0342845916748047, "learning_rate": 2.0050000000000003e-05, "loss": 0.2756, "step": 4013 }, { "epoch": 0.22477321088587748, "grad_norm": 1.1289149522781372, "learning_rate": 2.0055e-05, "loss": 0.2926, "step": 4014 }, { "epoch": 0.2248292081980065, "grad_norm": 1.4004709720611572, "learning_rate": 2.006e-05, "loss": 0.4329, "step": 4015 }, { "epoch": 0.22488520551013552, "grad_norm": 1.1849826574325562, "learning_rate": 2.0065000000000002e-05, "loss": 0.4633, "step": 4016 }, { "epoch": 0.22494120282226454, "grad_norm": 1.387434482574463, "learning_rate": 2.007e-05, "loss": 0.535, "step": 4017 }, { "epoch": 0.22499720013439356, "grad_norm": 1.1173462867736816, "learning_rate": 2.0075000000000003e-05, "loss": 0.385, "step": 4018 }, { "epoch": 0.22505319744652258, "grad_norm": 1.1251437664031982, "learning_rate": 2.008e-05, "loss": 0.4063, "step": 4019 }, { "epoch": 0.2251091947586516, "grad_norm": 1.127577304840088, "learning_rate": 2.0085e-05, "loss": 0.3449, "step": 4020 }, { "epoch": 0.2251651920707806, "grad_norm": 1.073022723197937, "learning_rate": 2.009e-05, "loss": 0.4331, "step": 4021 }, { "epoch": 0.2252211893829096, "grad_norm": 1.1744555234909058, "learning_rate": 2.0095e-05, "loss": 0.3727, "step": 4022 }, { "epoch": 0.22527718669503863, "grad_norm": 1.1534401178359985, "learning_rate": 2.01e-05, "loss": 0.4825, "step": 4023 }, { "epoch": 0.22533318400716765, "grad_norm": 3.915041208267212, "learning_rate": 2.0105e-05, "loss": 0.3737, "step": 4024 }, { "epoch": 0.22538918131929667, "grad_norm": 1.0764929056167603, "learning_rate": 2.0110000000000002e-05, "loss": 0.3697, "step": 4025 }, { "epoch": 0.22544517863142569, "grad_norm": 1.2173855304718018, "learning_rate": 2.0115e-05, "loss": 0.4043, "step": 4026 }, { "epoch": 0.2255011759435547, "grad_norm": 1.309613585472107, "learning_rate": 2.012e-05, "loss": 0.4685, "step": 4027 }, { "epoch": 0.22555717325568372, "grad_norm": 1.1317449808120728, "learning_rate": 2.0125e-05, "loss": 0.5362, "step": 4028 }, { "epoch": 0.22561317056781274, "grad_norm": 1.1389638185501099, "learning_rate": 2.0130000000000002e-05, "loss": 0.336, "step": 4029 }, { "epoch": 0.22566916787994176, "grad_norm": 1.1004079580307007, "learning_rate": 2.0135e-05, "loss": 0.3205, "step": 4030 }, { "epoch": 0.22572516519207078, "grad_norm": 1.545257329940796, "learning_rate": 2.014e-05, "loss": 0.5128, "step": 4031 }, { "epoch": 0.2257811625041998, "grad_norm": 1.1331626176834106, "learning_rate": 2.0145e-05, "loss": 0.4678, "step": 4032 }, { "epoch": 0.22583715981632882, "grad_norm": 1.08872652053833, "learning_rate": 2.0150000000000002e-05, "loss": 0.4114, "step": 4033 }, { "epoch": 0.22589315712845784, "grad_norm": 1.0941650867462158, "learning_rate": 2.0155000000000003e-05, "loss": 0.4096, "step": 4034 }, { "epoch": 0.22594915444058686, "grad_norm": 1.186246633529663, "learning_rate": 2.016e-05, "loss": 0.407, "step": 4035 }, { "epoch": 0.22600515175271588, "grad_norm": 1.2471370697021484, "learning_rate": 2.0165e-05, "loss": 0.4056, "step": 4036 }, { "epoch": 0.2260611490648449, "grad_norm": 1.18431556224823, "learning_rate": 2.017e-05, "loss": 0.3996, "step": 4037 }, { "epoch": 0.22611714637697392, "grad_norm": 1.24180006980896, "learning_rate": 2.0175000000000003e-05, "loss": 0.4061, "step": 4038 }, { "epoch": 0.22617314368910293, "grad_norm": 1.2733039855957031, "learning_rate": 2.0180000000000003e-05, "loss": 0.4062, "step": 4039 }, { "epoch": 0.22622914100123195, "grad_norm": 1.229092001914978, "learning_rate": 2.0185e-05, "loss": 0.4002, "step": 4040 }, { "epoch": 0.22628513831336095, "grad_norm": 1.216475009918213, "learning_rate": 2.019e-05, "loss": 0.4704, "step": 4041 }, { "epoch": 0.22634113562548996, "grad_norm": 1.1875768899917603, "learning_rate": 2.0195e-05, "loss": 0.3753, "step": 4042 }, { "epoch": 0.22639713293761898, "grad_norm": 1.3118387460708618, "learning_rate": 2.0200000000000003e-05, "loss": 0.3932, "step": 4043 }, { "epoch": 0.226453130249748, "grad_norm": 1.1569195985794067, "learning_rate": 2.0205e-05, "loss": 0.4384, "step": 4044 }, { "epoch": 0.22650912756187702, "grad_norm": 1.3034249544143677, "learning_rate": 2.021e-05, "loss": 0.4687, "step": 4045 }, { "epoch": 0.22656512487400604, "grad_norm": 1.364123821258545, "learning_rate": 2.0215000000000002e-05, "loss": 0.4705, "step": 4046 }, { "epoch": 0.22662112218613506, "grad_norm": 1.1235162019729614, "learning_rate": 2.022e-05, "loss": 0.373, "step": 4047 }, { "epoch": 0.22667711949826408, "grad_norm": 1.688078761100769, "learning_rate": 2.0225000000000004e-05, "loss": 0.4103, "step": 4048 }, { "epoch": 0.2267331168103931, "grad_norm": 1.3599395751953125, "learning_rate": 2.023e-05, "loss": 0.4554, "step": 4049 }, { "epoch": 0.22678911412252212, "grad_norm": 1.1683887243270874, "learning_rate": 2.0235000000000002e-05, "loss": 0.5024, "step": 4050 }, { "epoch": 0.22684511143465114, "grad_norm": 1.1350637674331665, "learning_rate": 2.024e-05, "loss": 0.3886, "step": 4051 }, { "epoch": 0.22690110874678016, "grad_norm": 1.2215251922607422, "learning_rate": 2.0245e-05, "loss": 0.4779, "step": 4052 }, { "epoch": 0.22695710605890917, "grad_norm": 1.2877695560455322, "learning_rate": 2.025e-05, "loss": 0.5126, "step": 4053 }, { "epoch": 0.2270131033710382, "grad_norm": 1.397469162940979, "learning_rate": 2.0255000000000002e-05, "loss": 0.5584, "step": 4054 }, { "epoch": 0.2270691006831672, "grad_norm": 1.0369408130645752, "learning_rate": 2.0260000000000003e-05, "loss": 0.3528, "step": 4055 }, { "epoch": 0.22712509799529623, "grad_norm": 1.805389165878296, "learning_rate": 2.0265e-05, "loss": 0.4645, "step": 4056 }, { "epoch": 0.22718109530742525, "grad_norm": 1.299943208694458, "learning_rate": 2.027e-05, "loss": 0.4551, "step": 4057 }, { "epoch": 0.22723709261955427, "grad_norm": 1.1022294759750366, "learning_rate": 2.0275e-05, "loss": 0.3104, "step": 4058 }, { "epoch": 0.2272930899316833, "grad_norm": 1.477035641670227, "learning_rate": 2.0280000000000002e-05, "loss": 0.4018, "step": 4059 }, { "epoch": 0.2273490872438123, "grad_norm": 1.0145372152328491, "learning_rate": 2.0285e-05, "loss": 0.3613, "step": 4060 }, { "epoch": 0.22740508455594133, "grad_norm": 1.1505661010742188, "learning_rate": 2.029e-05, "loss": 0.4782, "step": 4061 }, { "epoch": 0.22746108186807032, "grad_norm": 1.1380447149276733, "learning_rate": 2.0295e-05, "loss": 0.3925, "step": 4062 }, { "epoch": 0.22751707918019934, "grad_norm": 1.3335957527160645, "learning_rate": 2.0300000000000002e-05, "loss": 0.3813, "step": 4063 }, { "epoch": 0.22757307649232836, "grad_norm": 1.0564080476760864, "learning_rate": 2.0305000000000003e-05, "loss": 0.4491, "step": 4064 }, { "epoch": 0.22762907380445738, "grad_norm": 1.1848081350326538, "learning_rate": 2.031e-05, "loss": 0.4345, "step": 4065 }, { "epoch": 0.2276850711165864, "grad_norm": 1.1922739744186401, "learning_rate": 2.0315e-05, "loss": 0.4108, "step": 4066 }, { "epoch": 0.22774106842871542, "grad_norm": 1.2298710346221924, "learning_rate": 2.032e-05, "loss": 0.474, "step": 4067 }, { "epoch": 0.22779706574084443, "grad_norm": 1.4893807172775269, "learning_rate": 2.0325e-05, "loss": 0.567, "step": 4068 }, { "epoch": 0.22785306305297345, "grad_norm": 1.437177062034607, "learning_rate": 2.033e-05, "loss": 0.4056, "step": 4069 }, { "epoch": 0.22790906036510247, "grad_norm": 1.269019365310669, "learning_rate": 2.0335e-05, "loss": 0.4033, "step": 4070 }, { "epoch": 0.2279650576772315, "grad_norm": 1.282313585281372, "learning_rate": 2.0340000000000002e-05, "loss": 0.4129, "step": 4071 }, { "epoch": 0.2280210549893605, "grad_norm": 1.614296317100525, "learning_rate": 2.0345e-05, "loss": 0.4413, "step": 4072 }, { "epoch": 0.22807705230148953, "grad_norm": 1.1022065877914429, "learning_rate": 2.035e-05, "loss": 0.2764, "step": 4073 }, { "epoch": 0.22813304961361855, "grad_norm": 1.1078407764434814, "learning_rate": 2.0355e-05, "loss": 0.4086, "step": 4074 }, { "epoch": 0.22818904692574757, "grad_norm": 1.0584485530853271, "learning_rate": 2.036e-05, "loss": 0.4224, "step": 4075 }, { "epoch": 0.2282450442378766, "grad_norm": 11.007890701293945, "learning_rate": 2.0365000000000002e-05, "loss": 0.6179, "step": 4076 }, { "epoch": 0.2283010415500056, "grad_norm": 1.19613778591156, "learning_rate": 2.037e-05, "loss": 0.4075, "step": 4077 }, { "epoch": 0.22835703886213463, "grad_norm": 1.2499363422393799, "learning_rate": 2.0375e-05, "loss": 0.3791, "step": 4078 }, { "epoch": 0.22841303617426365, "grad_norm": 1.310246229171753, "learning_rate": 2.038e-05, "loss": 0.4022, "step": 4079 }, { "epoch": 0.22846903348639266, "grad_norm": 1.4538041353225708, "learning_rate": 2.0385000000000002e-05, "loss": 0.5483, "step": 4080 }, { "epoch": 0.22852503079852168, "grad_norm": 1.1844452619552612, "learning_rate": 2.039e-05, "loss": 0.4425, "step": 4081 }, { "epoch": 0.2285810281106507, "grad_norm": 1.2127856016159058, "learning_rate": 2.0395e-05, "loss": 0.4141, "step": 4082 }, { "epoch": 0.2286370254227797, "grad_norm": 1.1689798831939697, "learning_rate": 2.04e-05, "loss": 0.4201, "step": 4083 }, { "epoch": 0.2286930227349087, "grad_norm": 1.2544913291931152, "learning_rate": 2.0405000000000002e-05, "loss": 0.5006, "step": 4084 }, { "epoch": 0.22874902004703773, "grad_norm": 1.3648995161056519, "learning_rate": 2.0410000000000003e-05, "loss": 0.386, "step": 4085 }, { "epoch": 0.22880501735916675, "grad_norm": 1.1508864164352417, "learning_rate": 2.0415e-05, "loss": 0.364, "step": 4086 }, { "epoch": 0.22886101467129577, "grad_norm": 1.1088776588439941, "learning_rate": 2.042e-05, "loss": 0.397, "step": 4087 }, { "epoch": 0.2289170119834248, "grad_norm": 1.291202425956726, "learning_rate": 2.0425e-05, "loss": 0.4527, "step": 4088 }, { "epoch": 0.2289730092955538, "grad_norm": 1.4738807678222656, "learning_rate": 2.0430000000000003e-05, "loss": 0.4579, "step": 4089 }, { "epoch": 0.22902900660768283, "grad_norm": 1.0973998308181763, "learning_rate": 2.0435e-05, "loss": 0.2876, "step": 4090 }, { "epoch": 0.22908500391981185, "grad_norm": 1.024002194404602, "learning_rate": 2.044e-05, "loss": 0.4462, "step": 4091 }, { "epoch": 0.22914100123194087, "grad_norm": 1.2808518409729004, "learning_rate": 2.0445e-05, "loss": 0.3622, "step": 4092 }, { "epoch": 0.22919699854406989, "grad_norm": 1.0888512134552002, "learning_rate": 2.045e-05, "loss": 0.3681, "step": 4093 }, { "epoch": 0.2292529958561989, "grad_norm": 0.9593205451965332, "learning_rate": 2.0455000000000003e-05, "loss": 0.3167, "step": 4094 }, { "epoch": 0.22930899316832792, "grad_norm": 1.2199854850769043, "learning_rate": 2.046e-05, "loss": 0.441, "step": 4095 }, { "epoch": 0.22936499048045694, "grad_norm": 1.281033992767334, "learning_rate": 2.0465e-05, "loss": 0.5411, "step": 4096 }, { "epoch": 0.22942098779258596, "grad_norm": 1.123522162437439, "learning_rate": 2.047e-05, "loss": 0.3339, "step": 4097 }, { "epoch": 0.22947698510471498, "grad_norm": 0.994236409664154, "learning_rate": 2.0475e-05, "loss": 0.3565, "step": 4098 }, { "epoch": 0.229532982416844, "grad_norm": 1.1076732873916626, "learning_rate": 2.048e-05, "loss": 0.4364, "step": 4099 }, { "epoch": 0.22958897972897302, "grad_norm": 1.6202208995819092, "learning_rate": 2.0485e-05, "loss": 0.4889, "step": 4100 }, { "epoch": 0.22964497704110204, "grad_norm": 1.2983157634735107, "learning_rate": 2.0490000000000002e-05, "loss": 0.5233, "step": 4101 }, { "epoch": 0.22970097435323106, "grad_norm": 1.2962462902069092, "learning_rate": 2.0495e-05, "loss": 0.5092, "step": 4102 }, { "epoch": 0.22975697166536005, "grad_norm": 1.4778438806533813, "learning_rate": 2.05e-05, "loss": 0.5732, "step": 4103 }, { "epoch": 0.22981296897748907, "grad_norm": 1.2814544439315796, "learning_rate": 2.0505e-05, "loss": 0.4424, "step": 4104 }, { "epoch": 0.2298689662896181, "grad_norm": 1.4536662101745605, "learning_rate": 2.0510000000000002e-05, "loss": 0.5741, "step": 4105 }, { "epoch": 0.2299249636017471, "grad_norm": 1.1617083549499512, "learning_rate": 2.0515e-05, "loss": 0.4934, "step": 4106 }, { "epoch": 0.22998096091387613, "grad_norm": 1.3995189666748047, "learning_rate": 2.052e-05, "loss": 0.3733, "step": 4107 }, { "epoch": 0.23003695822600514, "grad_norm": 0.9614766836166382, "learning_rate": 2.0525e-05, "loss": 0.3484, "step": 4108 }, { "epoch": 0.23009295553813416, "grad_norm": 1.136983036994934, "learning_rate": 2.053e-05, "loss": 0.3716, "step": 4109 }, { "epoch": 0.23014895285026318, "grad_norm": 1.257845163345337, "learning_rate": 2.0535000000000002e-05, "loss": 0.4114, "step": 4110 }, { "epoch": 0.2302049501623922, "grad_norm": 1.6189639568328857, "learning_rate": 2.054e-05, "loss": 0.6255, "step": 4111 }, { "epoch": 0.23026094747452122, "grad_norm": 1.0244381427764893, "learning_rate": 2.0545e-05, "loss": 0.2547, "step": 4112 }, { "epoch": 0.23031694478665024, "grad_norm": 1.1978487968444824, "learning_rate": 2.055e-05, "loss": 0.3887, "step": 4113 }, { "epoch": 0.23037294209877926, "grad_norm": 1.0918502807617188, "learning_rate": 2.0555000000000002e-05, "loss": 0.3757, "step": 4114 }, { "epoch": 0.23042893941090828, "grad_norm": 1.8906546831130981, "learning_rate": 2.0560000000000003e-05, "loss": 0.4998, "step": 4115 }, { "epoch": 0.2304849367230373, "grad_norm": 1.2287076711654663, "learning_rate": 2.0565e-05, "loss": 0.4845, "step": 4116 }, { "epoch": 0.23054093403516632, "grad_norm": 1.193910002708435, "learning_rate": 2.057e-05, "loss": 0.3618, "step": 4117 }, { "epoch": 0.23059693134729534, "grad_norm": 1.2260141372680664, "learning_rate": 2.0575e-05, "loss": 0.414, "step": 4118 }, { "epoch": 0.23065292865942436, "grad_norm": 1.1606512069702148, "learning_rate": 2.0580000000000003e-05, "loss": 0.3471, "step": 4119 }, { "epoch": 0.23070892597155337, "grad_norm": 1.0225188732147217, "learning_rate": 2.0585e-05, "loss": 0.4305, "step": 4120 }, { "epoch": 0.2307649232836824, "grad_norm": 1.2030072212219238, "learning_rate": 2.059e-05, "loss": 0.3368, "step": 4121 }, { "epoch": 0.2308209205958114, "grad_norm": 1.2328826189041138, "learning_rate": 2.0595000000000002e-05, "loss": 0.5293, "step": 4122 }, { "epoch": 0.23087691790794043, "grad_norm": 1.2912899255752563, "learning_rate": 2.06e-05, "loss": 0.5007, "step": 4123 }, { "epoch": 0.23093291522006942, "grad_norm": 1.3425554037094116, "learning_rate": 2.0605000000000003e-05, "loss": 0.5251, "step": 4124 }, { "epoch": 0.23098891253219844, "grad_norm": 1.1544756889343262, "learning_rate": 2.061e-05, "loss": 0.3617, "step": 4125 }, { "epoch": 0.23104490984432746, "grad_norm": 1.1377413272857666, "learning_rate": 2.0615e-05, "loss": 0.3603, "step": 4126 }, { "epoch": 0.23110090715645648, "grad_norm": 1.1988937854766846, "learning_rate": 2.062e-05, "loss": 0.473, "step": 4127 }, { "epoch": 0.2311569044685855, "grad_norm": 1.0015439987182617, "learning_rate": 2.0625e-05, "loss": 0.4049, "step": 4128 }, { "epoch": 0.23121290178071452, "grad_norm": 1.2456467151641846, "learning_rate": 2.063e-05, "loss": 0.403, "step": 4129 }, { "epoch": 0.23126889909284354, "grad_norm": 1.124685287475586, "learning_rate": 2.0635e-05, "loss": 0.405, "step": 4130 }, { "epoch": 0.23132489640497256, "grad_norm": 1.2655459642410278, "learning_rate": 2.0640000000000002e-05, "loss": 0.354, "step": 4131 }, { "epoch": 0.23138089371710158, "grad_norm": 1.039529800415039, "learning_rate": 2.0645e-05, "loss": 0.354, "step": 4132 }, { "epoch": 0.2314368910292306, "grad_norm": 1.238182783126831, "learning_rate": 2.065e-05, "loss": 0.3983, "step": 4133 }, { "epoch": 0.23149288834135962, "grad_norm": 1.2212363481521606, "learning_rate": 2.0655e-05, "loss": 0.4486, "step": 4134 }, { "epoch": 0.23154888565348863, "grad_norm": 1.1392161846160889, "learning_rate": 2.0660000000000002e-05, "loss": 0.3777, "step": 4135 }, { "epoch": 0.23160488296561765, "grad_norm": 1.199353575706482, "learning_rate": 2.0665e-05, "loss": 0.2687, "step": 4136 }, { "epoch": 0.23166088027774667, "grad_norm": 1.4285528659820557, "learning_rate": 2.067e-05, "loss": 0.2833, "step": 4137 }, { "epoch": 0.2317168775898757, "grad_norm": 1.139581561088562, "learning_rate": 2.0675e-05, "loss": 0.4413, "step": 4138 }, { "epoch": 0.2317728749020047, "grad_norm": 1.2959959506988525, "learning_rate": 2.0680000000000002e-05, "loss": 0.4027, "step": 4139 }, { "epoch": 0.23182887221413373, "grad_norm": 1.315169095993042, "learning_rate": 2.0685000000000003e-05, "loss": 0.4878, "step": 4140 }, { "epoch": 0.23188486952626275, "grad_norm": 1.1356548070907593, "learning_rate": 2.069e-05, "loss": 0.4531, "step": 4141 }, { "epoch": 0.23194086683839177, "grad_norm": 1.150536060333252, "learning_rate": 2.0695e-05, "loss": 0.4032, "step": 4142 }, { "epoch": 0.2319968641505208, "grad_norm": 1.3828458786010742, "learning_rate": 2.07e-05, "loss": 0.6142, "step": 4143 }, { "epoch": 0.2320528614626498, "grad_norm": 1.0190892219543457, "learning_rate": 2.0705000000000003e-05, "loss": 0.4076, "step": 4144 }, { "epoch": 0.2321088587747788, "grad_norm": 1.242693543434143, "learning_rate": 2.0710000000000003e-05, "loss": 0.4358, "step": 4145 }, { "epoch": 0.23216485608690782, "grad_norm": 1.4580390453338623, "learning_rate": 2.0715e-05, "loss": 0.5017, "step": 4146 }, { "epoch": 0.23222085339903684, "grad_norm": 1.630743384361267, "learning_rate": 2.072e-05, "loss": 0.5403, "step": 4147 }, { "epoch": 0.23227685071116586, "grad_norm": 1.3127927780151367, "learning_rate": 2.0725e-05, "loss": 0.5772, "step": 4148 }, { "epoch": 0.23233284802329487, "grad_norm": 1.32776939868927, "learning_rate": 2.0730000000000003e-05, "loss": 0.4103, "step": 4149 }, { "epoch": 0.2323888453354239, "grad_norm": 1.1280535459518433, "learning_rate": 2.0735e-05, "loss": 0.3944, "step": 4150 }, { "epoch": 0.2324448426475529, "grad_norm": 1.0871868133544922, "learning_rate": 2.074e-05, "loss": 0.4578, "step": 4151 }, { "epoch": 0.23250083995968193, "grad_norm": 1.211596965789795, "learning_rate": 2.0745000000000002e-05, "loss": 0.3587, "step": 4152 }, { "epoch": 0.23255683727181095, "grad_norm": 1.4957422018051147, "learning_rate": 2.075e-05, "loss": 0.4731, "step": 4153 }, { "epoch": 0.23261283458393997, "grad_norm": 1.413071632385254, "learning_rate": 2.0755000000000004e-05, "loss": 0.5488, "step": 4154 }, { "epoch": 0.232668831896069, "grad_norm": 1.5196342468261719, "learning_rate": 2.076e-05, "loss": 0.4164, "step": 4155 }, { "epoch": 0.232724829208198, "grad_norm": 4.492232799530029, "learning_rate": 2.0765000000000002e-05, "loss": 0.589, "step": 4156 }, { "epoch": 0.23278082652032703, "grad_norm": 1.2077269554138184, "learning_rate": 2.077e-05, "loss": 0.3548, "step": 4157 }, { "epoch": 0.23283682383245605, "grad_norm": 0.9782938957214355, "learning_rate": 2.0775e-05, "loss": 0.301, "step": 4158 }, { "epoch": 0.23289282114458507, "grad_norm": 1.1544853448867798, "learning_rate": 2.078e-05, "loss": 0.428, "step": 4159 }, { "epoch": 0.23294881845671409, "grad_norm": 1.3924914598464966, "learning_rate": 2.0785000000000002e-05, "loss": 0.3908, "step": 4160 }, { "epoch": 0.2330048157688431, "grad_norm": 1.404755711555481, "learning_rate": 2.0790000000000003e-05, "loss": 0.5763, "step": 4161 }, { "epoch": 0.23306081308097212, "grad_norm": 1.3203518390655518, "learning_rate": 2.0795e-05, "loss": 0.4953, "step": 4162 }, { "epoch": 0.23311681039310114, "grad_norm": 1.2612299919128418, "learning_rate": 2.08e-05, "loss": 0.3418, "step": 4163 }, { "epoch": 0.23317280770523016, "grad_norm": 1.3685733079910278, "learning_rate": 2.0805e-05, "loss": 0.4777, "step": 4164 }, { "epoch": 0.23322880501735915, "grad_norm": 1.4660611152648926, "learning_rate": 2.0810000000000002e-05, "loss": 0.5761, "step": 4165 }, { "epoch": 0.23328480232948817, "grad_norm": 2.6750452518463135, "learning_rate": 2.0815e-05, "loss": 0.3986, "step": 4166 }, { "epoch": 0.2333407996416172, "grad_norm": 1.106176495552063, "learning_rate": 2.082e-05, "loss": 0.4198, "step": 4167 }, { "epoch": 0.2333967969537462, "grad_norm": 1.3173162937164307, "learning_rate": 2.0825e-05, "loss": 0.6655, "step": 4168 }, { "epoch": 0.23345279426587523, "grad_norm": 1.1135119199752808, "learning_rate": 2.0830000000000002e-05, "loss": 0.4669, "step": 4169 }, { "epoch": 0.23350879157800425, "grad_norm": 1.1758356094360352, "learning_rate": 2.0835000000000003e-05, "loss": 0.4082, "step": 4170 }, { "epoch": 0.23356478889013327, "grad_norm": 1.1846275329589844, "learning_rate": 2.084e-05, "loss": 0.4446, "step": 4171 }, { "epoch": 0.2336207862022623, "grad_norm": 1.242761492729187, "learning_rate": 2.0845e-05, "loss": 0.3611, "step": 4172 }, { "epoch": 0.2336767835143913, "grad_norm": 1.046606183052063, "learning_rate": 2.085e-05, "loss": 0.424, "step": 4173 }, { "epoch": 0.23373278082652033, "grad_norm": 3.2558372020721436, "learning_rate": 2.0855000000000003e-05, "loss": 0.4671, "step": 4174 }, { "epoch": 0.23378877813864934, "grad_norm": 1.3819587230682373, "learning_rate": 2.086e-05, "loss": 0.4429, "step": 4175 }, { "epoch": 0.23384477545077836, "grad_norm": 0.9712210297584534, "learning_rate": 2.0865e-05, "loss": 0.3406, "step": 4176 }, { "epoch": 0.23390077276290738, "grad_norm": 1.188122034072876, "learning_rate": 2.0870000000000002e-05, "loss": 0.4788, "step": 4177 }, { "epoch": 0.2339567700750364, "grad_norm": 1.0752432346343994, "learning_rate": 2.0875e-05, "loss": 0.471, "step": 4178 }, { "epoch": 0.23401276738716542, "grad_norm": 1.4987022876739502, "learning_rate": 2.0880000000000003e-05, "loss": 0.4276, "step": 4179 }, { "epoch": 0.23406876469929444, "grad_norm": 1.4521417617797852, "learning_rate": 2.0885e-05, "loss": 0.3481, "step": 4180 }, { "epoch": 0.23412476201142346, "grad_norm": 1.2257338762283325, "learning_rate": 2.089e-05, "loss": 0.4603, "step": 4181 }, { "epoch": 0.23418075932355248, "grad_norm": 1.2818821668624878, "learning_rate": 2.0895e-05, "loss": 0.3515, "step": 4182 }, { "epoch": 0.2342367566356815, "grad_norm": 1.5747284889221191, "learning_rate": 2.09e-05, "loss": 0.4984, "step": 4183 }, { "epoch": 0.23429275394781052, "grad_norm": 1.2921171188354492, "learning_rate": 2.0905000000000004e-05, "loss": 0.5154, "step": 4184 }, { "epoch": 0.23434875125993954, "grad_norm": 1.3256133794784546, "learning_rate": 2.091e-05, "loss": 0.4911, "step": 4185 }, { "epoch": 0.23440474857206853, "grad_norm": 1.235826015472412, "learning_rate": 2.0915000000000002e-05, "loss": 0.3948, "step": 4186 }, { "epoch": 0.23446074588419755, "grad_norm": 1.1025710105895996, "learning_rate": 2.092e-05, "loss": 0.3685, "step": 4187 }, { "epoch": 0.23451674319632657, "grad_norm": 1.6832563877105713, "learning_rate": 2.0925e-05, "loss": 0.4591, "step": 4188 }, { "epoch": 0.23457274050845558, "grad_norm": 1.313822627067566, "learning_rate": 2.093e-05, "loss": 0.483, "step": 4189 }, { "epoch": 0.2346287378205846, "grad_norm": 1.299112319946289, "learning_rate": 2.0935000000000002e-05, "loss": 0.5008, "step": 4190 }, { "epoch": 0.23468473513271362, "grad_norm": 1.257529616355896, "learning_rate": 2.0940000000000003e-05, "loss": 0.5007, "step": 4191 }, { "epoch": 0.23474073244484264, "grad_norm": 3.786053419113159, "learning_rate": 2.0945e-05, "loss": 0.4312, "step": 4192 }, { "epoch": 0.23479672975697166, "grad_norm": 1.1573549509048462, "learning_rate": 2.095e-05, "loss": 0.3606, "step": 4193 }, { "epoch": 0.23485272706910068, "grad_norm": 1.4084893465042114, "learning_rate": 2.0955e-05, "loss": 0.5302, "step": 4194 }, { "epoch": 0.2349087243812297, "grad_norm": 1.0694756507873535, "learning_rate": 2.0960000000000003e-05, "loss": 0.309, "step": 4195 }, { "epoch": 0.23496472169335872, "grad_norm": 1.0330634117126465, "learning_rate": 2.0965e-05, "loss": 0.3656, "step": 4196 }, { "epoch": 0.23502071900548774, "grad_norm": 1.2291839122772217, "learning_rate": 2.097e-05, "loss": 0.3507, "step": 4197 }, { "epoch": 0.23507671631761676, "grad_norm": 1.1577256917953491, "learning_rate": 2.0975e-05, "loss": 0.3888, "step": 4198 }, { "epoch": 0.23513271362974578, "grad_norm": 1.1568584442138672, "learning_rate": 2.098e-05, "loss": 0.5337, "step": 4199 }, { "epoch": 0.2351887109418748, "grad_norm": 1.1812026500701904, "learning_rate": 2.0985000000000003e-05, "loss": 0.4119, "step": 4200 }, { "epoch": 0.23524470825400381, "grad_norm": 1.2275822162628174, "learning_rate": 2.099e-05, "loss": 0.353, "step": 4201 }, { "epoch": 0.23530070556613283, "grad_norm": 1.1405919790267944, "learning_rate": 2.0995e-05, "loss": 0.4058, "step": 4202 }, { "epoch": 0.23535670287826185, "grad_norm": 1.1904031038284302, "learning_rate": 2.1e-05, "loss": 0.3385, "step": 4203 }, { "epoch": 0.23541270019039087, "grad_norm": 1.063535213470459, "learning_rate": 2.1005e-05, "loss": 0.3502, "step": 4204 }, { "epoch": 0.2354686975025199, "grad_norm": 1.3525657653808594, "learning_rate": 2.101e-05, "loss": 0.5297, "step": 4205 }, { "epoch": 0.2355246948146489, "grad_norm": 1.166908621788025, "learning_rate": 2.1015e-05, "loss": 0.3891, "step": 4206 }, { "epoch": 0.2355806921267779, "grad_norm": 1.3686550855636597, "learning_rate": 2.1020000000000002e-05, "loss": 0.497, "step": 4207 }, { "epoch": 0.23563668943890692, "grad_norm": 1.311750888824463, "learning_rate": 2.1025e-05, "loss": 0.4479, "step": 4208 }, { "epoch": 0.23569268675103594, "grad_norm": 1.2289323806762695, "learning_rate": 2.103e-05, "loss": 0.4434, "step": 4209 }, { "epoch": 0.23574868406316496, "grad_norm": 1.266406536102295, "learning_rate": 2.1035e-05, "loss": 0.4205, "step": 4210 }, { "epoch": 0.23580468137529398, "grad_norm": 1.1448001861572266, "learning_rate": 2.1040000000000002e-05, "loss": 0.3897, "step": 4211 }, { "epoch": 0.235860678687423, "grad_norm": 1.3236624002456665, "learning_rate": 2.1045e-05, "loss": 0.4088, "step": 4212 }, { "epoch": 0.23591667599955202, "grad_norm": 1.2581989765167236, "learning_rate": 2.105e-05, "loss": 0.379, "step": 4213 }, { "epoch": 0.23597267331168104, "grad_norm": 1.0380505323410034, "learning_rate": 2.1055e-05, "loss": 0.3879, "step": 4214 }, { "epoch": 0.23602867062381006, "grad_norm": 1.1456139087677002, "learning_rate": 2.106e-05, "loss": 0.4532, "step": 4215 }, { "epoch": 0.23608466793593907, "grad_norm": 1.1638354063034058, "learning_rate": 2.1065000000000002e-05, "loss": 0.4703, "step": 4216 }, { "epoch": 0.2361406652480681, "grad_norm": 1.608616590499878, "learning_rate": 2.107e-05, "loss": 0.5295, "step": 4217 }, { "epoch": 0.2361966625601971, "grad_norm": 1.3021283149719238, "learning_rate": 2.1075e-05, "loss": 0.3815, "step": 4218 }, { "epoch": 0.23625265987232613, "grad_norm": 5.347463607788086, "learning_rate": 2.1079999999999998e-05, "loss": 0.3893, "step": 4219 }, { "epoch": 0.23630865718445515, "grad_norm": 1.0997823476791382, "learning_rate": 2.1085000000000002e-05, "loss": 0.3264, "step": 4220 }, { "epoch": 0.23636465449658417, "grad_norm": 1.0885735750198364, "learning_rate": 2.1090000000000003e-05, "loss": 0.3894, "step": 4221 }, { "epoch": 0.2364206518087132, "grad_norm": 1.0570690631866455, "learning_rate": 2.1095e-05, "loss": 0.374, "step": 4222 }, { "epoch": 0.2364766491208422, "grad_norm": 1.38384211063385, "learning_rate": 2.11e-05, "loss": 0.4647, "step": 4223 }, { "epoch": 0.23653264643297123, "grad_norm": 1.2475320100784302, "learning_rate": 2.1105e-05, "loss": 0.5061, "step": 4224 }, { "epoch": 0.23658864374510025, "grad_norm": 1.3273953199386597, "learning_rate": 2.1110000000000003e-05, "loss": 0.4165, "step": 4225 }, { "epoch": 0.23664464105722927, "grad_norm": 1.0664968490600586, "learning_rate": 2.1115e-05, "loss": 0.3721, "step": 4226 }, { "epoch": 0.23670063836935826, "grad_norm": 1.255656361579895, "learning_rate": 2.112e-05, "loss": 0.4174, "step": 4227 }, { "epoch": 0.23675663568148728, "grad_norm": 1.2060269117355347, "learning_rate": 2.1125000000000002e-05, "loss": 0.4766, "step": 4228 }, { "epoch": 0.2368126329936163, "grad_norm": 1.2741271257400513, "learning_rate": 2.113e-05, "loss": 0.4044, "step": 4229 }, { "epoch": 0.23686863030574531, "grad_norm": 1.3632087707519531, "learning_rate": 2.1135000000000003e-05, "loss": 0.4712, "step": 4230 }, { "epoch": 0.23692462761787433, "grad_norm": 1.3004961013793945, "learning_rate": 2.114e-05, "loss": 0.4695, "step": 4231 }, { "epoch": 0.23698062493000335, "grad_norm": 1.64434814453125, "learning_rate": 2.1145e-05, "loss": 0.463, "step": 4232 }, { "epoch": 0.23703662224213237, "grad_norm": 1.0939937829971313, "learning_rate": 2.115e-05, "loss": 0.4293, "step": 4233 }, { "epoch": 0.2370926195542614, "grad_norm": 1.2327302694320679, "learning_rate": 2.1155e-05, "loss": 0.4478, "step": 4234 }, { "epoch": 0.2371486168663904, "grad_norm": 1.1831134557724, "learning_rate": 2.116e-05, "loss": 0.4402, "step": 4235 }, { "epoch": 0.23720461417851943, "grad_norm": 1.0770121812820435, "learning_rate": 2.1165e-05, "loss": 0.6058, "step": 4236 }, { "epoch": 0.23726061149064845, "grad_norm": 1.3046811819076538, "learning_rate": 2.1170000000000002e-05, "loss": 0.4307, "step": 4237 }, { "epoch": 0.23731660880277747, "grad_norm": 1.263245701789856, "learning_rate": 2.1175e-05, "loss": 0.4071, "step": 4238 }, { "epoch": 0.2373726061149065, "grad_norm": 1.2050762176513672, "learning_rate": 2.118e-05, "loss": 0.5376, "step": 4239 }, { "epoch": 0.2374286034270355, "grad_norm": 1.1449065208435059, "learning_rate": 2.1185e-05, "loss": 0.4948, "step": 4240 }, { "epoch": 0.23748460073916453, "grad_norm": 1.1334929466247559, "learning_rate": 2.1190000000000002e-05, "loss": 0.3879, "step": 4241 }, { "epoch": 0.23754059805129354, "grad_norm": 1.0854896306991577, "learning_rate": 2.1195e-05, "loss": 0.3292, "step": 4242 }, { "epoch": 0.23759659536342256, "grad_norm": 1.4370067119598389, "learning_rate": 2.12e-05, "loss": 0.6236, "step": 4243 }, { "epoch": 0.23765259267555158, "grad_norm": 1.1262603998184204, "learning_rate": 2.1205e-05, "loss": 0.4683, "step": 4244 }, { "epoch": 0.2377085899876806, "grad_norm": 1.2429581880569458, "learning_rate": 2.1210000000000002e-05, "loss": 0.4212, "step": 4245 }, { "epoch": 0.23776458729980962, "grad_norm": 1.2421106100082397, "learning_rate": 2.1215000000000003e-05, "loss": 0.6051, "step": 4246 }, { "epoch": 0.23782058461193864, "grad_norm": 1.3536938428878784, "learning_rate": 2.122e-05, "loss": 0.5184, "step": 4247 }, { "epoch": 0.23787658192406763, "grad_norm": 1.1302820444107056, "learning_rate": 2.1225e-05, "loss": 0.4098, "step": 4248 }, { "epoch": 0.23793257923619665, "grad_norm": 1.2246413230895996, "learning_rate": 2.123e-05, "loss": 0.4006, "step": 4249 }, { "epoch": 0.23798857654832567, "grad_norm": 2.1079390048980713, "learning_rate": 2.1235000000000003e-05, "loss": 0.483, "step": 4250 }, { "epoch": 0.2380445738604547, "grad_norm": 1.4226256608963013, "learning_rate": 2.124e-05, "loss": 0.4082, "step": 4251 }, { "epoch": 0.2381005711725837, "grad_norm": 1.766867995262146, "learning_rate": 2.1245e-05, "loss": 0.3793, "step": 4252 }, { "epoch": 0.23815656848471273, "grad_norm": 1.7468613386154175, "learning_rate": 2.125e-05, "loss": 0.6336, "step": 4253 }, { "epoch": 0.23821256579684175, "grad_norm": 1.261499285697937, "learning_rate": 2.1255e-05, "loss": 0.4244, "step": 4254 }, { "epoch": 0.23826856310897077, "grad_norm": 2.6207706928253174, "learning_rate": 2.1260000000000003e-05, "loss": 0.4324, "step": 4255 }, { "epoch": 0.23832456042109978, "grad_norm": 1.1398931741714478, "learning_rate": 2.1265e-05, "loss": 0.4442, "step": 4256 }, { "epoch": 0.2383805577332288, "grad_norm": 1.4394181966781616, "learning_rate": 2.127e-05, "loss": 0.4277, "step": 4257 }, { "epoch": 0.23843655504535782, "grad_norm": 1.1773593425750732, "learning_rate": 2.1275000000000002e-05, "loss": 0.4644, "step": 4258 }, { "epoch": 0.23849255235748684, "grad_norm": 1.4122238159179688, "learning_rate": 2.128e-05, "loss": 0.5031, "step": 4259 }, { "epoch": 0.23854854966961586, "grad_norm": 1.206392765045166, "learning_rate": 2.1285000000000004e-05, "loss": 0.407, "step": 4260 }, { "epoch": 0.23860454698174488, "grad_norm": 1.189375638961792, "learning_rate": 2.129e-05, "loss": 0.3432, "step": 4261 }, { "epoch": 0.2386605442938739, "grad_norm": 1.1893759965896606, "learning_rate": 2.1295000000000002e-05, "loss": 0.4062, "step": 4262 }, { "epoch": 0.23871654160600292, "grad_norm": 1.5574356317520142, "learning_rate": 2.13e-05, "loss": 0.3862, "step": 4263 }, { "epoch": 0.23877253891813194, "grad_norm": 1.6357659101486206, "learning_rate": 2.1305e-05, "loss": 0.4128, "step": 4264 }, { "epoch": 0.23882853623026096, "grad_norm": 1.0961475372314453, "learning_rate": 2.131e-05, "loss": 0.303, "step": 4265 }, { "epoch": 0.23888453354238998, "grad_norm": 1.287390947341919, "learning_rate": 2.1315000000000002e-05, "loss": 0.5198, "step": 4266 }, { "epoch": 0.238940530854519, "grad_norm": 1.3111846446990967, "learning_rate": 2.1320000000000003e-05, "loss": 0.4619, "step": 4267 }, { "epoch": 0.23899652816664801, "grad_norm": 1.5583844184875488, "learning_rate": 2.1325e-05, "loss": 0.3147, "step": 4268 }, { "epoch": 0.239052525478777, "grad_norm": 1.2882252931594849, "learning_rate": 2.133e-05, "loss": 0.3848, "step": 4269 }, { "epoch": 0.23910852279090603, "grad_norm": 1.083142638206482, "learning_rate": 2.1335e-05, "loss": 0.4959, "step": 4270 }, { "epoch": 0.23916452010303504, "grad_norm": 1.1915721893310547, "learning_rate": 2.1340000000000002e-05, "loss": 0.3234, "step": 4271 }, { "epoch": 0.23922051741516406, "grad_norm": 1.1333130598068237, "learning_rate": 2.1345e-05, "loss": 0.371, "step": 4272 }, { "epoch": 0.23927651472729308, "grad_norm": 1.2794235944747925, "learning_rate": 2.135e-05, "loss": 0.415, "step": 4273 }, { "epoch": 0.2393325120394221, "grad_norm": 1.1736987829208374, "learning_rate": 2.1355e-05, "loss": 0.3776, "step": 4274 }, { "epoch": 0.23938850935155112, "grad_norm": 1.090275526046753, "learning_rate": 2.1360000000000002e-05, "loss": 0.433, "step": 4275 }, { "epoch": 0.23944450666368014, "grad_norm": 1.1244940757751465, "learning_rate": 2.1365000000000003e-05, "loss": 0.5467, "step": 4276 }, { "epoch": 0.23950050397580916, "grad_norm": 1.1872576475143433, "learning_rate": 2.137e-05, "loss": 0.4813, "step": 4277 }, { "epoch": 0.23955650128793818, "grad_norm": 1.2008262872695923, "learning_rate": 2.1375e-05, "loss": 0.4253, "step": 4278 }, { "epoch": 0.2396124986000672, "grad_norm": 1.2897939682006836, "learning_rate": 2.138e-05, "loss": 0.415, "step": 4279 }, { "epoch": 0.23966849591219622, "grad_norm": 1.1022006273269653, "learning_rate": 2.1385000000000003e-05, "loss": 0.4679, "step": 4280 }, { "epoch": 0.23972449322432524, "grad_norm": 1.2431297302246094, "learning_rate": 2.139e-05, "loss": 0.3932, "step": 4281 }, { "epoch": 0.23978049053645425, "grad_norm": 1.5248690843582153, "learning_rate": 2.1395e-05, "loss": 0.4623, "step": 4282 }, { "epoch": 0.23983648784858327, "grad_norm": 1.0249652862548828, "learning_rate": 2.1400000000000002e-05, "loss": 0.3644, "step": 4283 }, { "epoch": 0.2398924851607123, "grad_norm": 1.4337689876556396, "learning_rate": 2.1405e-05, "loss": 0.4221, "step": 4284 }, { "epoch": 0.2399484824728413, "grad_norm": 1.283555269241333, "learning_rate": 2.1410000000000003e-05, "loss": 0.453, "step": 4285 }, { "epoch": 0.24000447978497033, "grad_norm": 1.3956892490386963, "learning_rate": 2.1415e-05, "loss": 0.4818, "step": 4286 }, { "epoch": 0.24006047709709935, "grad_norm": 1.484602451324463, "learning_rate": 2.142e-05, "loss": 0.5885, "step": 4287 }, { "epoch": 0.24011647440922837, "grad_norm": 1.120192050933838, "learning_rate": 2.1425e-05, "loss": 0.366, "step": 4288 }, { "epoch": 0.24017247172135736, "grad_norm": 5.211393356323242, "learning_rate": 2.143e-05, "loss": 0.451, "step": 4289 }, { "epoch": 0.24022846903348638, "grad_norm": 1.0974055528640747, "learning_rate": 2.1435000000000004e-05, "loss": 0.3342, "step": 4290 }, { "epoch": 0.2402844663456154, "grad_norm": 1.5038377046585083, "learning_rate": 2.144e-05, "loss": 0.4989, "step": 4291 }, { "epoch": 0.24034046365774442, "grad_norm": 0.933678150177002, "learning_rate": 2.1445000000000002e-05, "loss": 0.2991, "step": 4292 }, { "epoch": 0.24039646096987344, "grad_norm": 1.149804949760437, "learning_rate": 2.145e-05, "loss": 0.5103, "step": 4293 }, { "epoch": 0.24045245828200246, "grad_norm": 1.0830732583999634, "learning_rate": 2.1455e-05, "loss": 0.4144, "step": 4294 }, { "epoch": 0.24050845559413148, "grad_norm": 1.2888356447219849, "learning_rate": 2.146e-05, "loss": 0.4835, "step": 4295 }, { "epoch": 0.2405644529062605, "grad_norm": 5.33606481552124, "learning_rate": 2.1465000000000002e-05, "loss": 0.5273, "step": 4296 }, { "epoch": 0.24062045021838951, "grad_norm": 1.4628833532333374, "learning_rate": 2.1470000000000003e-05, "loss": 0.4482, "step": 4297 }, { "epoch": 0.24067644753051853, "grad_norm": 1.1477042436599731, "learning_rate": 2.1475e-05, "loss": 0.5156, "step": 4298 }, { "epoch": 0.24073244484264755, "grad_norm": 1.2502121925354004, "learning_rate": 2.148e-05, "loss": 0.3495, "step": 4299 }, { "epoch": 0.24078844215477657, "grad_norm": 1.1966519355773926, "learning_rate": 2.1485000000000002e-05, "loss": 0.4084, "step": 4300 }, { "epoch": 0.2408444394669056, "grad_norm": 1.095430850982666, "learning_rate": 2.1490000000000003e-05, "loss": 0.4155, "step": 4301 }, { "epoch": 0.2409004367790346, "grad_norm": 1.3317145109176636, "learning_rate": 2.1495e-05, "loss": 0.3883, "step": 4302 }, { "epoch": 0.24095643409116363, "grad_norm": 1.2294490337371826, "learning_rate": 2.15e-05, "loss": 0.5213, "step": 4303 }, { "epoch": 0.24101243140329265, "grad_norm": 1.0221933126449585, "learning_rate": 2.1505e-05, "loss": 0.2962, "step": 4304 }, { "epoch": 0.24106842871542167, "grad_norm": 1.0682564973831177, "learning_rate": 2.1510000000000002e-05, "loss": 0.4275, "step": 4305 }, { "epoch": 0.2411244260275507, "grad_norm": 1.2987785339355469, "learning_rate": 2.1515000000000003e-05, "loss": 0.3958, "step": 4306 }, { "epoch": 0.2411804233396797, "grad_norm": 1.083464503288269, "learning_rate": 2.152e-05, "loss": 0.4409, "step": 4307 }, { "epoch": 0.24123642065180873, "grad_norm": 1.1261757612228394, "learning_rate": 2.1525e-05, "loss": 0.4545, "step": 4308 }, { "epoch": 0.24129241796393774, "grad_norm": 1.1840627193450928, "learning_rate": 2.153e-05, "loss": 0.4417, "step": 4309 }, { "epoch": 0.24134841527606674, "grad_norm": 2.27750301361084, "learning_rate": 2.1535000000000003e-05, "loss": 0.5773, "step": 4310 }, { "epoch": 0.24140441258819575, "grad_norm": 1.263756513595581, "learning_rate": 2.154e-05, "loss": 0.4707, "step": 4311 }, { "epoch": 0.24146040990032477, "grad_norm": 1.3066948652267456, "learning_rate": 2.1545e-05, "loss": 0.4886, "step": 4312 }, { "epoch": 0.2415164072124538, "grad_norm": 1.117456078529358, "learning_rate": 2.1550000000000002e-05, "loss": 0.4034, "step": 4313 }, { "epoch": 0.2415724045245828, "grad_norm": 1.612994909286499, "learning_rate": 2.1555e-05, "loss": 0.3843, "step": 4314 }, { "epoch": 0.24162840183671183, "grad_norm": 1.3376786708831787, "learning_rate": 2.1560000000000004e-05, "loss": 0.5396, "step": 4315 }, { "epoch": 0.24168439914884085, "grad_norm": 1.0367299318313599, "learning_rate": 2.1565e-05, "loss": 0.3505, "step": 4316 }, { "epoch": 0.24174039646096987, "grad_norm": 1.2690515518188477, "learning_rate": 2.1570000000000002e-05, "loss": 0.4122, "step": 4317 }, { "epoch": 0.2417963937730989, "grad_norm": 1.1069188117980957, "learning_rate": 2.1575e-05, "loss": 0.4649, "step": 4318 }, { "epoch": 0.2418523910852279, "grad_norm": 1.1699316501617432, "learning_rate": 2.158e-05, "loss": 0.531, "step": 4319 }, { "epoch": 0.24190838839735693, "grad_norm": 1.2313868999481201, "learning_rate": 2.1585e-05, "loss": 0.4042, "step": 4320 }, { "epoch": 0.24196438570948595, "grad_norm": 1.5419354438781738, "learning_rate": 2.159e-05, "loss": 0.5642, "step": 4321 }, { "epoch": 0.24202038302161497, "grad_norm": 1.1893784999847412, "learning_rate": 2.1595000000000002e-05, "loss": 0.3844, "step": 4322 }, { "epoch": 0.24207638033374398, "grad_norm": 1.4173463582992554, "learning_rate": 2.16e-05, "loss": 0.5144, "step": 4323 }, { "epoch": 0.242132377645873, "grad_norm": 0.9899412393569946, "learning_rate": 2.1605e-05, "loss": 0.3475, "step": 4324 }, { "epoch": 0.24218837495800202, "grad_norm": 1.3944251537322998, "learning_rate": 2.1609999999999998e-05, "loss": 0.4864, "step": 4325 }, { "epoch": 0.24224437227013104, "grad_norm": 1.2878979444503784, "learning_rate": 2.1615000000000002e-05, "loss": 0.3878, "step": 4326 }, { "epoch": 0.24230036958226006, "grad_norm": 1.1655343770980835, "learning_rate": 2.162e-05, "loss": 0.284, "step": 4327 }, { "epoch": 0.24235636689438908, "grad_norm": 1.329520583152771, "learning_rate": 2.1625e-05, "loss": 0.518, "step": 4328 }, { "epoch": 0.2424123642065181, "grad_norm": 1.3053680658340454, "learning_rate": 2.163e-05, "loss": 0.5209, "step": 4329 }, { "epoch": 0.24246836151864712, "grad_norm": 1.5850566625595093, "learning_rate": 2.1635e-05, "loss": 0.4758, "step": 4330 }, { "epoch": 0.2425243588307761, "grad_norm": 1.0623127222061157, "learning_rate": 2.1640000000000003e-05, "loss": 0.4205, "step": 4331 }, { "epoch": 0.24258035614290513, "grad_norm": 1.486602783203125, "learning_rate": 2.1645e-05, "loss": 0.5736, "step": 4332 }, { "epoch": 0.24263635345503415, "grad_norm": 1.1264127492904663, "learning_rate": 2.165e-05, "loss": 0.3344, "step": 4333 }, { "epoch": 0.24269235076716317, "grad_norm": 1.114925503730774, "learning_rate": 2.1655000000000002e-05, "loss": 0.3654, "step": 4334 }, { "epoch": 0.2427483480792922, "grad_norm": 0.9926008582115173, "learning_rate": 2.166e-05, "loss": 0.4218, "step": 4335 }, { "epoch": 0.2428043453914212, "grad_norm": 1.0779087543487549, "learning_rate": 2.1665000000000003e-05, "loss": 0.3215, "step": 4336 }, { "epoch": 0.24286034270355022, "grad_norm": 1.2170718908309937, "learning_rate": 2.167e-05, "loss": 0.6787, "step": 4337 }, { "epoch": 0.24291634001567924, "grad_norm": 1.7566055059432983, "learning_rate": 2.1675e-05, "loss": 0.5167, "step": 4338 }, { "epoch": 0.24297233732780826, "grad_norm": 1.25746488571167, "learning_rate": 2.168e-05, "loss": 0.4718, "step": 4339 }, { "epoch": 0.24302833463993728, "grad_norm": 1.1558181047439575, "learning_rate": 2.1685e-05, "loss": 0.475, "step": 4340 }, { "epoch": 0.2430843319520663, "grad_norm": 9.28864860534668, "learning_rate": 2.169e-05, "loss": 0.4129, "step": 4341 }, { "epoch": 0.24314032926419532, "grad_norm": 1.0949761867523193, "learning_rate": 2.1695e-05, "loss": 0.3915, "step": 4342 }, { "epoch": 0.24319632657632434, "grad_norm": 1.1187132596969604, "learning_rate": 2.1700000000000002e-05, "loss": 0.3921, "step": 4343 }, { "epoch": 0.24325232388845336, "grad_norm": 1.0224883556365967, "learning_rate": 2.1705e-05, "loss": 0.3613, "step": 4344 }, { "epoch": 0.24330832120058238, "grad_norm": 1.2836576700210571, "learning_rate": 2.171e-05, "loss": 0.4491, "step": 4345 }, { "epoch": 0.2433643185127114, "grad_norm": 1.13035249710083, "learning_rate": 2.1715e-05, "loss": 0.3769, "step": 4346 }, { "epoch": 0.24342031582484042, "grad_norm": 1.0483314990997314, "learning_rate": 2.1720000000000002e-05, "loss": 0.4401, "step": 4347 }, { "epoch": 0.24347631313696944, "grad_norm": 1.3753732442855835, "learning_rate": 2.1725e-05, "loss": 0.5379, "step": 4348 }, { "epoch": 0.24353231044909845, "grad_norm": 4.958712100982666, "learning_rate": 2.173e-05, "loss": 0.3993, "step": 4349 }, { "epoch": 0.24358830776122747, "grad_norm": 1.0264718532562256, "learning_rate": 2.1735e-05, "loss": 0.3764, "step": 4350 }, { "epoch": 0.24364430507335647, "grad_norm": 1.439278483390808, "learning_rate": 2.1740000000000002e-05, "loss": 0.651, "step": 4351 }, { "epoch": 0.24370030238548548, "grad_norm": 1.1682460308074951, "learning_rate": 2.1745000000000003e-05, "loss": 0.3624, "step": 4352 }, { "epoch": 0.2437562996976145, "grad_norm": 1.3046634197235107, "learning_rate": 2.175e-05, "loss": 0.4369, "step": 4353 }, { "epoch": 0.24381229700974352, "grad_norm": 1.123918890953064, "learning_rate": 2.1755e-05, "loss": 0.4421, "step": 4354 }, { "epoch": 0.24386829432187254, "grad_norm": 1.194492220878601, "learning_rate": 2.176e-05, "loss": 0.3583, "step": 4355 }, { "epoch": 0.24392429163400156, "grad_norm": 1.5091285705566406, "learning_rate": 2.1765000000000003e-05, "loss": 0.557, "step": 4356 }, { "epoch": 0.24398028894613058, "grad_norm": 1.6220325231552124, "learning_rate": 2.177e-05, "loss": 0.563, "step": 4357 }, { "epoch": 0.2440362862582596, "grad_norm": 1.1459437608718872, "learning_rate": 2.1775e-05, "loss": 0.4278, "step": 4358 }, { "epoch": 0.24409228357038862, "grad_norm": 1.2954154014587402, "learning_rate": 2.178e-05, "loss": 0.5298, "step": 4359 }, { "epoch": 0.24414828088251764, "grad_norm": 1.2371245622634888, "learning_rate": 2.1785e-05, "loss": 0.343, "step": 4360 }, { "epoch": 0.24420427819464666, "grad_norm": 1.1733187437057495, "learning_rate": 2.1790000000000003e-05, "loss": 0.3858, "step": 4361 }, { "epoch": 0.24426027550677568, "grad_norm": 1.2497729063034058, "learning_rate": 2.1795e-05, "loss": 0.3206, "step": 4362 }, { "epoch": 0.2443162728189047, "grad_norm": 1.3116555213928223, "learning_rate": 2.18e-05, "loss": 0.5136, "step": 4363 }, { "epoch": 0.24437227013103371, "grad_norm": 1.1510710716247559, "learning_rate": 2.1805e-05, "loss": 0.4088, "step": 4364 }, { "epoch": 0.24442826744316273, "grad_norm": 1.3727792501449585, "learning_rate": 2.181e-05, "loss": 0.4046, "step": 4365 }, { "epoch": 0.24448426475529175, "grad_norm": 1.2303552627563477, "learning_rate": 2.1815000000000004e-05, "loss": 0.3942, "step": 4366 }, { "epoch": 0.24454026206742077, "grad_norm": 1.3398981094360352, "learning_rate": 2.182e-05, "loss": 0.4133, "step": 4367 }, { "epoch": 0.2445962593795498, "grad_norm": 1.425965428352356, "learning_rate": 2.1825000000000002e-05, "loss": 0.4376, "step": 4368 }, { "epoch": 0.2446522566916788, "grad_norm": 1.1586977243423462, "learning_rate": 2.183e-05, "loss": 0.4468, "step": 4369 }, { "epoch": 0.24470825400380783, "grad_norm": 1.0181491374969482, "learning_rate": 2.1835e-05, "loss": 0.4791, "step": 4370 }, { "epoch": 0.24476425131593685, "grad_norm": 1.1308424472808838, "learning_rate": 2.184e-05, "loss": 0.3961, "step": 4371 }, { "epoch": 0.24482024862806584, "grad_norm": 1.4323550462722778, "learning_rate": 2.1845000000000002e-05, "loss": 0.5017, "step": 4372 }, { "epoch": 0.24487624594019486, "grad_norm": 1.1525380611419678, "learning_rate": 2.1850000000000003e-05, "loss": 0.4749, "step": 4373 }, { "epoch": 0.24493224325232388, "grad_norm": 1.2878707647323608, "learning_rate": 2.1855e-05, "loss": 0.4322, "step": 4374 }, { "epoch": 0.2449882405644529, "grad_norm": 1.210702896118164, "learning_rate": 2.186e-05, "loss": 0.4982, "step": 4375 }, { "epoch": 0.24504423787658192, "grad_norm": 1.2443805932998657, "learning_rate": 2.1865e-05, "loss": 0.3842, "step": 4376 }, { "epoch": 0.24510023518871094, "grad_norm": 1.083103060722351, "learning_rate": 2.1870000000000002e-05, "loss": 0.3987, "step": 4377 }, { "epoch": 0.24515623250083995, "grad_norm": 1.1591500043869019, "learning_rate": 2.1875e-05, "loss": 0.3353, "step": 4378 }, { "epoch": 0.24521222981296897, "grad_norm": 1.045145869255066, "learning_rate": 2.188e-05, "loss": 0.4127, "step": 4379 }, { "epoch": 0.245268227125098, "grad_norm": 1.5242241621017456, "learning_rate": 2.1885e-05, "loss": 0.5309, "step": 4380 }, { "epoch": 0.245324224437227, "grad_norm": 1.0925313234329224, "learning_rate": 2.1890000000000002e-05, "loss": 0.456, "step": 4381 }, { "epoch": 0.24538022174935603, "grad_norm": 1.1851149797439575, "learning_rate": 2.1895000000000003e-05, "loss": 0.3898, "step": 4382 }, { "epoch": 0.24543621906148505, "grad_norm": 1.1446529626846313, "learning_rate": 2.19e-05, "loss": 0.357, "step": 4383 }, { "epoch": 0.24549221637361407, "grad_norm": 1.3524234294891357, "learning_rate": 2.1905e-05, "loss": 0.4854, "step": 4384 }, { "epoch": 0.2455482136857431, "grad_norm": 1.198757290840149, "learning_rate": 2.191e-05, "loss": 0.4094, "step": 4385 }, { "epoch": 0.2456042109978721, "grad_norm": 1.1887050867080688, "learning_rate": 2.1915000000000003e-05, "loss": 0.3984, "step": 4386 }, { "epoch": 0.24566020831000113, "grad_norm": 1.2815239429473877, "learning_rate": 2.192e-05, "loss": 0.3801, "step": 4387 }, { "epoch": 0.24571620562213015, "grad_norm": 1.220858097076416, "learning_rate": 2.1925e-05, "loss": 0.429, "step": 4388 }, { "epoch": 0.24577220293425917, "grad_norm": 1.093279480934143, "learning_rate": 2.1930000000000002e-05, "loss": 0.3266, "step": 4389 }, { "epoch": 0.24582820024638818, "grad_norm": 1.432355523109436, "learning_rate": 2.1935e-05, "loss": 0.4068, "step": 4390 }, { "epoch": 0.2458841975585172, "grad_norm": 1.2643381357192993, "learning_rate": 2.1940000000000003e-05, "loss": 0.5241, "step": 4391 }, { "epoch": 0.24594019487064622, "grad_norm": 1.1043672561645508, "learning_rate": 2.1945e-05, "loss": 0.4859, "step": 4392 }, { "epoch": 0.24599619218277521, "grad_norm": 1.317258596420288, "learning_rate": 2.195e-05, "loss": 0.5257, "step": 4393 }, { "epoch": 0.24605218949490423, "grad_norm": 1.2772438526153564, "learning_rate": 2.1955e-05, "loss": 0.4314, "step": 4394 }, { "epoch": 0.24610818680703325, "grad_norm": 3.390718460083008, "learning_rate": 2.196e-05, "loss": 0.4308, "step": 4395 }, { "epoch": 0.24616418411916227, "grad_norm": 1.1928256750106812, "learning_rate": 2.1965e-05, "loss": 0.4142, "step": 4396 }, { "epoch": 0.2462201814312913, "grad_norm": 1.1900676488876343, "learning_rate": 2.197e-05, "loss": 0.3713, "step": 4397 }, { "epoch": 0.2462761787434203, "grad_norm": 1.3439291715621948, "learning_rate": 2.1975000000000002e-05, "loss": 0.5114, "step": 4398 }, { "epoch": 0.24633217605554933, "grad_norm": 1.282044768333435, "learning_rate": 2.198e-05, "loss": 0.5487, "step": 4399 }, { "epoch": 0.24638817336767835, "grad_norm": 1.2520606517791748, "learning_rate": 2.1985e-05, "loss": 0.4649, "step": 4400 }, { "epoch": 0.24644417067980737, "grad_norm": 1.1088248491287231, "learning_rate": 2.199e-05, "loss": 0.3805, "step": 4401 }, { "epoch": 0.2465001679919364, "grad_norm": 1.6109001636505127, "learning_rate": 2.1995000000000002e-05, "loss": 0.3532, "step": 4402 }, { "epoch": 0.2465561653040654, "grad_norm": 1.2714914083480835, "learning_rate": 2.2000000000000003e-05, "loss": 0.4865, "step": 4403 }, { "epoch": 0.24661216261619442, "grad_norm": 1.204676628112793, "learning_rate": 2.2005e-05, "loss": 0.5088, "step": 4404 }, { "epoch": 0.24666815992832344, "grad_norm": 1.2741031646728516, "learning_rate": 2.201e-05, "loss": 0.4747, "step": 4405 }, { "epoch": 0.24672415724045246, "grad_norm": 1.3649706840515137, "learning_rate": 2.2015000000000002e-05, "loss": 0.5744, "step": 4406 }, { "epoch": 0.24678015455258148, "grad_norm": 1.3730049133300781, "learning_rate": 2.2020000000000003e-05, "loss": 0.5204, "step": 4407 }, { "epoch": 0.2468361518647105, "grad_norm": 1.2699775695800781, "learning_rate": 2.2025e-05, "loss": 0.4023, "step": 4408 }, { "epoch": 0.24689214917683952, "grad_norm": 1.1627721786499023, "learning_rate": 2.203e-05, "loss": 0.4179, "step": 4409 }, { "epoch": 0.24694814648896854, "grad_norm": 1.1745370626449585, "learning_rate": 2.2035e-05, "loss": 0.4344, "step": 4410 }, { "epoch": 0.24700414380109756, "grad_norm": 1.1994861364364624, "learning_rate": 2.2040000000000002e-05, "loss": 0.4245, "step": 4411 }, { "epoch": 0.24706014111322658, "grad_norm": 0.9576858282089233, "learning_rate": 2.2045000000000003e-05, "loss": 0.3195, "step": 4412 }, { "epoch": 0.24711613842535557, "grad_norm": 1.258575201034546, "learning_rate": 2.205e-05, "loss": 0.425, "step": 4413 }, { "epoch": 0.2471721357374846, "grad_norm": 1.6441184282302856, "learning_rate": 2.2055e-05, "loss": 0.511, "step": 4414 }, { "epoch": 0.2472281330496136, "grad_norm": 1.2258238792419434, "learning_rate": 2.206e-05, "loss": 0.391, "step": 4415 }, { "epoch": 0.24728413036174263, "grad_norm": 1.3207824230194092, "learning_rate": 2.2065000000000003e-05, "loss": 0.4906, "step": 4416 }, { "epoch": 0.24734012767387165, "grad_norm": 1.6287943124771118, "learning_rate": 2.207e-05, "loss": 0.6048, "step": 4417 }, { "epoch": 0.24739612498600067, "grad_norm": 1.5828886032104492, "learning_rate": 2.2075e-05, "loss": 0.4398, "step": 4418 }, { "epoch": 0.24745212229812968, "grad_norm": 1.2290335893630981, "learning_rate": 2.2080000000000002e-05, "loss": 0.4713, "step": 4419 }, { "epoch": 0.2475081196102587, "grad_norm": 1.4252212047576904, "learning_rate": 2.2085e-05, "loss": 0.4958, "step": 4420 }, { "epoch": 0.24756411692238772, "grad_norm": 1.0584359169006348, "learning_rate": 2.2090000000000004e-05, "loss": 0.4633, "step": 4421 }, { "epoch": 0.24762011423451674, "grad_norm": 0.9757269024848938, "learning_rate": 2.2095e-05, "loss": 0.3672, "step": 4422 }, { "epoch": 0.24767611154664576, "grad_norm": 1.0999170541763306, "learning_rate": 2.2100000000000002e-05, "loss": 0.4658, "step": 4423 }, { "epoch": 0.24773210885877478, "grad_norm": 1.0702378749847412, "learning_rate": 2.2105e-05, "loss": 0.3745, "step": 4424 }, { "epoch": 0.2477881061709038, "grad_norm": 1.335506796836853, "learning_rate": 2.211e-05, "loss": 0.423, "step": 4425 }, { "epoch": 0.24784410348303282, "grad_norm": 1.1725655794143677, "learning_rate": 2.2115e-05, "loss": 0.4771, "step": 4426 }, { "epoch": 0.24790010079516184, "grad_norm": 1.1515663862228394, "learning_rate": 2.212e-05, "loss": 0.3655, "step": 4427 }, { "epoch": 0.24795609810729086, "grad_norm": 1.1827774047851562, "learning_rate": 2.2125000000000002e-05, "loss": 0.4368, "step": 4428 }, { "epoch": 0.24801209541941988, "grad_norm": 1.267818570137024, "learning_rate": 2.213e-05, "loss": 0.4739, "step": 4429 }, { "epoch": 0.2480680927315489, "grad_norm": 1.034224510192871, "learning_rate": 2.2135e-05, "loss": 0.3464, "step": 4430 }, { "epoch": 0.24812409004367791, "grad_norm": 1.0499874353408813, "learning_rate": 2.214e-05, "loss": 0.44, "step": 4431 }, { "epoch": 0.24818008735580693, "grad_norm": 1.1445691585540771, "learning_rate": 2.2145000000000002e-05, "loss": 0.3484, "step": 4432 }, { "epoch": 0.24823608466793595, "grad_norm": 1.0334786176681519, "learning_rate": 2.215e-05, "loss": 0.5041, "step": 4433 }, { "epoch": 0.24829208198006494, "grad_norm": 1.2405943870544434, "learning_rate": 2.2155e-05, "loss": 0.4128, "step": 4434 }, { "epoch": 0.24834807929219396, "grad_norm": 1.390884280204773, "learning_rate": 2.216e-05, "loss": 0.5113, "step": 4435 }, { "epoch": 0.24840407660432298, "grad_norm": 1.0858657360076904, "learning_rate": 2.2165000000000002e-05, "loss": 0.363, "step": 4436 }, { "epoch": 0.248460073916452, "grad_norm": 1.241275429725647, "learning_rate": 2.2170000000000003e-05, "loss": 0.3941, "step": 4437 }, { "epoch": 0.24851607122858102, "grad_norm": 1.1823296546936035, "learning_rate": 2.2175e-05, "loss": 0.3404, "step": 4438 }, { "epoch": 0.24857206854071004, "grad_norm": 1.2853224277496338, "learning_rate": 2.218e-05, "loss": 0.3665, "step": 4439 }, { "epoch": 0.24862806585283906, "grad_norm": 1.2950419187545776, "learning_rate": 2.2185000000000002e-05, "loss": 0.4266, "step": 4440 }, { "epoch": 0.24868406316496808, "grad_norm": 1.5352532863616943, "learning_rate": 2.219e-05, "loss": 0.6076, "step": 4441 }, { "epoch": 0.2487400604770971, "grad_norm": 1.2307060956954956, "learning_rate": 2.2195000000000003e-05, "loss": 0.4616, "step": 4442 }, { "epoch": 0.24879605778922612, "grad_norm": 1.073006510734558, "learning_rate": 2.22e-05, "loss": 0.3265, "step": 4443 }, { "epoch": 0.24885205510135514, "grad_norm": 1.1062570810317993, "learning_rate": 2.2205000000000002e-05, "loss": 0.3564, "step": 4444 }, { "epoch": 0.24890805241348415, "grad_norm": 1.2196799516677856, "learning_rate": 2.221e-05, "loss": 0.4025, "step": 4445 }, { "epoch": 0.24896404972561317, "grad_norm": 1.1566624641418457, "learning_rate": 2.2215e-05, "loss": 0.3357, "step": 4446 }, { "epoch": 0.2490200470377422, "grad_norm": 1.2082751989364624, "learning_rate": 2.222e-05, "loss": 0.442, "step": 4447 }, { "epoch": 0.2490760443498712, "grad_norm": 1.264952540397644, "learning_rate": 2.2225e-05, "loss": 0.5093, "step": 4448 }, { "epoch": 0.24913204166200023, "grad_norm": 1.0614506006240845, "learning_rate": 2.2230000000000002e-05, "loss": 0.3028, "step": 4449 }, { "epoch": 0.24918803897412925, "grad_norm": 1.1484986543655396, "learning_rate": 2.2235e-05, "loss": 0.4714, "step": 4450 }, { "epoch": 0.24924403628625827, "grad_norm": 1.2769389152526855, "learning_rate": 2.224e-05, "loss": 0.5447, "step": 4451 }, { "epoch": 0.2493000335983873, "grad_norm": 1.1708201169967651, "learning_rate": 2.2245e-05, "loss": 0.3064, "step": 4452 }, { "epoch": 0.2493560309105163, "grad_norm": 1.3028026819229126, "learning_rate": 2.2250000000000002e-05, "loss": 0.4678, "step": 4453 }, { "epoch": 0.24941202822264533, "grad_norm": 1.1337742805480957, "learning_rate": 2.2255e-05, "loss": 0.4317, "step": 4454 }, { "epoch": 0.24946802553477432, "grad_norm": 1.2387633323669434, "learning_rate": 2.226e-05, "loss": 0.4296, "step": 4455 }, { "epoch": 0.24952402284690334, "grad_norm": 1.2744488716125488, "learning_rate": 2.2265e-05, "loss": 0.4337, "step": 4456 }, { "epoch": 0.24958002015903236, "grad_norm": 1.4655698537826538, "learning_rate": 2.2270000000000002e-05, "loss": 0.3449, "step": 4457 }, { "epoch": 0.24963601747116138, "grad_norm": 1.068646788597107, "learning_rate": 2.2275000000000003e-05, "loss": 0.3799, "step": 4458 }, { "epoch": 0.2496920147832904, "grad_norm": 1.1122599840164185, "learning_rate": 2.228e-05, "loss": 0.4005, "step": 4459 }, { "epoch": 0.2497480120954194, "grad_norm": 1.5223112106323242, "learning_rate": 2.2285e-05, "loss": 0.4872, "step": 4460 }, { "epoch": 0.24980400940754843, "grad_norm": 1.0510022640228271, "learning_rate": 2.229e-05, "loss": 0.3729, "step": 4461 }, { "epoch": 0.24986000671967745, "grad_norm": 1.342260718345642, "learning_rate": 2.2295000000000003e-05, "loss": 0.5689, "step": 4462 }, { "epoch": 0.24991600403180647, "grad_norm": 1.2196329832077026, "learning_rate": 2.23e-05, "loss": 0.4905, "step": 4463 }, { "epoch": 0.2499720013439355, "grad_norm": 1.9157620668411255, "learning_rate": 2.2305e-05, "loss": 0.4368, "step": 4464 }, { "epoch": 0.2500279986560645, "grad_norm": 1.316776990890503, "learning_rate": 2.231e-05, "loss": 0.3954, "step": 4465 }, { "epoch": 0.25008399596819353, "grad_norm": 1.326110601425171, "learning_rate": 2.2315e-05, "loss": 0.4895, "step": 4466 }, { "epoch": 0.25013999328032255, "grad_norm": 1.2966643571853638, "learning_rate": 2.2320000000000003e-05, "loss": 0.451, "step": 4467 }, { "epoch": 0.25019599059245157, "grad_norm": 1.364499807357788, "learning_rate": 2.2325e-05, "loss": 0.4063, "step": 4468 }, { "epoch": 0.2502519879045806, "grad_norm": 1.3014533519744873, "learning_rate": 2.233e-05, "loss": 0.3914, "step": 4469 }, { "epoch": 0.2503079852167096, "grad_norm": 0.955682098865509, "learning_rate": 2.2335e-05, "loss": 0.4266, "step": 4470 }, { "epoch": 0.2503639825288386, "grad_norm": 1.215097188949585, "learning_rate": 2.234e-05, "loss": 0.3631, "step": 4471 }, { "epoch": 0.25041997984096764, "grad_norm": 1.2024786472320557, "learning_rate": 2.2345e-05, "loss": 0.4752, "step": 4472 }, { "epoch": 0.25047597715309666, "grad_norm": 1.1775621175765991, "learning_rate": 2.235e-05, "loss": 0.5077, "step": 4473 }, { "epoch": 0.2505319744652257, "grad_norm": 1.1632726192474365, "learning_rate": 2.2355000000000002e-05, "loss": 0.428, "step": 4474 }, { "epoch": 0.2505879717773547, "grad_norm": 1.2444415092468262, "learning_rate": 2.236e-05, "loss": 0.4456, "step": 4475 }, { "epoch": 0.2506439690894837, "grad_norm": 1.2137200832366943, "learning_rate": 2.2365e-05, "loss": 0.3735, "step": 4476 }, { "epoch": 0.25069996640161274, "grad_norm": 1.350601315498352, "learning_rate": 2.237e-05, "loss": 0.4603, "step": 4477 }, { "epoch": 0.25075596371374176, "grad_norm": 1.1135917901992798, "learning_rate": 2.2375000000000002e-05, "loss": 0.4001, "step": 4478 }, { "epoch": 0.2508119610258708, "grad_norm": 1.1618399620056152, "learning_rate": 2.2380000000000003e-05, "loss": 0.4734, "step": 4479 }, { "epoch": 0.2508679583379998, "grad_norm": 1.1615564823150635, "learning_rate": 2.2385e-05, "loss": 0.4212, "step": 4480 }, { "epoch": 0.2509239556501288, "grad_norm": 2.1652941703796387, "learning_rate": 2.239e-05, "loss": 0.424, "step": 4481 }, { "epoch": 0.25097995296225784, "grad_norm": 1.3567856550216675, "learning_rate": 2.2395e-05, "loss": 0.7095, "step": 4482 }, { "epoch": 0.25103595027438685, "grad_norm": 1.3855979442596436, "learning_rate": 2.2400000000000002e-05, "loss": 0.5495, "step": 4483 }, { "epoch": 0.2510919475865159, "grad_norm": 1.2875161170959473, "learning_rate": 2.2405e-05, "loss": 0.3818, "step": 4484 }, { "epoch": 0.2511479448986449, "grad_norm": 1.3533802032470703, "learning_rate": 2.241e-05, "loss": 0.4189, "step": 4485 }, { "epoch": 0.25120394221077386, "grad_norm": 1.444612741470337, "learning_rate": 2.2415e-05, "loss": 0.4597, "step": 4486 }, { "epoch": 0.2512599395229029, "grad_norm": 1.1549077033996582, "learning_rate": 2.2420000000000002e-05, "loss": 0.4765, "step": 4487 }, { "epoch": 0.2513159368350319, "grad_norm": 1.3089532852172852, "learning_rate": 2.2425000000000003e-05, "loss": 0.5499, "step": 4488 }, { "epoch": 0.2513719341471609, "grad_norm": 1.3971365690231323, "learning_rate": 2.243e-05, "loss": 0.6296, "step": 4489 }, { "epoch": 0.25142793145928993, "grad_norm": 1.27325439453125, "learning_rate": 2.2435e-05, "loss": 0.4193, "step": 4490 }, { "epoch": 0.25148392877141895, "grad_norm": 1.5754708051681519, "learning_rate": 2.244e-05, "loss": 0.4805, "step": 4491 }, { "epoch": 0.25153992608354797, "grad_norm": 1.695016622543335, "learning_rate": 2.2445000000000003e-05, "loss": 0.5813, "step": 4492 }, { "epoch": 0.251595923395677, "grad_norm": 1.0356920957565308, "learning_rate": 2.245e-05, "loss": 0.4802, "step": 4493 }, { "epoch": 0.251651920707806, "grad_norm": 1.0675708055496216, "learning_rate": 2.2455e-05, "loss": 0.3035, "step": 4494 }, { "epoch": 0.25170791801993503, "grad_norm": 1.0566827058792114, "learning_rate": 2.2460000000000002e-05, "loss": 0.3935, "step": 4495 }, { "epoch": 0.25176391533206405, "grad_norm": 1.114900827407837, "learning_rate": 2.2465e-05, "loss": 0.38, "step": 4496 }, { "epoch": 0.25181991264419307, "grad_norm": 1.1412712335586548, "learning_rate": 2.2470000000000003e-05, "loss": 0.463, "step": 4497 }, { "epoch": 0.2518759099563221, "grad_norm": 1.238297939300537, "learning_rate": 2.2475e-05, "loss": 0.3508, "step": 4498 }, { "epoch": 0.2519319072684511, "grad_norm": 1.077498197555542, "learning_rate": 2.248e-05, "loss": 0.3731, "step": 4499 }, { "epoch": 0.2519879045805801, "grad_norm": 1.1671607494354248, "learning_rate": 2.2485e-05, "loss": 0.2541, "step": 4500 }, { "epoch": 0.25204390189270914, "grad_norm": 1.0954142808914185, "learning_rate": 2.249e-05, "loss": 0.3176, "step": 4501 }, { "epoch": 0.25209989920483816, "grad_norm": 1.1871159076690674, "learning_rate": 2.2495e-05, "loss": 0.3175, "step": 4502 }, { "epoch": 0.2521558965169672, "grad_norm": 1.5153894424438477, "learning_rate": 2.25e-05, "loss": 0.4673, "step": 4503 }, { "epoch": 0.2522118938290962, "grad_norm": 1.1715959310531616, "learning_rate": 2.2505000000000002e-05, "loss": 0.4213, "step": 4504 }, { "epoch": 0.2522678911412252, "grad_norm": 1.47342050075531, "learning_rate": 2.251e-05, "loss": 0.4968, "step": 4505 }, { "epoch": 0.25232388845335424, "grad_norm": 12.813328742980957, "learning_rate": 2.2515e-05, "loss": 0.4618, "step": 4506 }, { "epoch": 0.25237988576548326, "grad_norm": 1.367955207824707, "learning_rate": 2.252e-05, "loss": 0.4697, "step": 4507 }, { "epoch": 0.2524358830776123, "grad_norm": 1.470633625984192, "learning_rate": 2.2525000000000002e-05, "loss": 0.4447, "step": 4508 }, { "epoch": 0.2524918803897413, "grad_norm": 1.6107702255249023, "learning_rate": 2.253e-05, "loss": 0.5733, "step": 4509 }, { "epoch": 0.2525478777018703, "grad_norm": 1.2405142784118652, "learning_rate": 2.2535e-05, "loss": 0.5126, "step": 4510 }, { "epoch": 0.25260387501399933, "grad_norm": 1.1081312894821167, "learning_rate": 2.254e-05, "loss": 0.326, "step": 4511 }, { "epoch": 0.25265987232612835, "grad_norm": 1.2331016063690186, "learning_rate": 2.2545000000000002e-05, "loss": 0.4817, "step": 4512 }, { "epoch": 0.2527158696382574, "grad_norm": 1.1779156923294067, "learning_rate": 2.2550000000000003e-05, "loss": 0.4658, "step": 4513 }, { "epoch": 0.2527718669503864, "grad_norm": 1.5347821712493896, "learning_rate": 2.2555e-05, "loss": 0.4134, "step": 4514 }, { "epoch": 0.2528278642625154, "grad_norm": 1.191646695137024, "learning_rate": 2.256e-05, "loss": 0.5631, "step": 4515 }, { "epoch": 0.25288386157464443, "grad_norm": 1.340574860572815, "learning_rate": 2.2565e-05, "loss": 0.4469, "step": 4516 }, { "epoch": 0.25293985888677345, "grad_norm": 1.1392805576324463, "learning_rate": 2.2570000000000002e-05, "loss": 0.4925, "step": 4517 }, { "epoch": 0.25299585619890247, "grad_norm": 1.2708933353424072, "learning_rate": 2.2575000000000003e-05, "loss": 0.4406, "step": 4518 }, { "epoch": 0.2530518535110315, "grad_norm": 1.3625980615615845, "learning_rate": 2.258e-05, "loss": 0.497, "step": 4519 }, { "epoch": 0.2531078508231605, "grad_norm": 0.9875698685646057, "learning_rate": 2.2585e-05, "loss": 0.3762, "step": 4520 }, { "epoch": 0.2531638481352895, "grad_norm": 1.1133662462234497, "learning_rate": 2.259e-05, "loss": 0.3702, "step": 4521 }, { "epoch": 0.25321984544741855, "grad_norm": 1.538754940032959, "learning_rate": 2.2595000000000003e-05, "loss": 0.5965, "step": 4522 }, { "epoch": 0.25327584275954756, "grad_norm": 1.3976726531982422, "learning_rate": 2.26e-05, "loss": 0.3976, "step": 4523 }, { "epoch": 0.2533318400716766, "grad_norm": 1.1165637969970703, "learning_rate": 2.2605e-05, "loss": 0.389, "step": 4524 }, { "epoch": 0.2533878373838056, "grad_norm": 1.373055338859558, "learning_rate": 2.2610000000000002e-05, "loss": 0.4778, "step": 4525 }, { "epoch": 0.2534438346959346, "grad_norm": 1.2408130168914795, "learning_rate": 2.2615e-05, "loss": 0.6502, "step": 4526 }, { "epoch": 0.2534998320080636, "grad_norm": 0.9629552364349365, "learning_rate": 2.2620000000000004e-05, "loss": 0.4091, "step": 4527 }, { "epoch": 0.2535558293201926, "grad_norm": 1.3871678113937378, "learning_rate": 2.2625e-05, "loss": 0.4843, "step": 4528 }, { "epoch": 0.2536118266323216, "grad_norm": 1.508987545967102, "learning_rate": 2.2630000000000002e-05, "loss": 0.4926, "step": 4529 }, { "epoch": 0.25366782394445064, "grad_norm": 1.3006188869476318, "learning_rate": 2.2635e-05, "loss": 0.4367, "step": 4530 }, { "epoch": 0.25372382125657966, "grad_norm": 1.3513522148132324, "learning_rate": 2.264e-05, "loss": 0.4645, "step": 4531 }, { "epoch": 0.2537798185687087, "grad_norm": 1.414544939994812, "learning_rate": 2.2645e-05, "loss": 0.6809, "step": 4532 }, { "epoch": 0.2538358158808377, "grad_norm": 1.3013323545455933, "learning_rate": 2.265e-05, "loss": 0.5165, "step": 4533 }, { "epoch": 0.2538918131929667, "grad_norm": 1.2263951301574707, "learning_rate": 2.2655000000000002e-05, "loss": 0.5141, "step": 4534 }, { "epoch": 0.25394781050509574, "grad_norm": 1.914273738861084, "learning_rate": 2.266e-05, "loss": 0.4634, "step": 4535 }, { "epoch": 0.25400380781722476, "grad_norm": 1.4267395734786987, "learning_rate": 2.2665e-05, "loss": 0.4487, "step": 4536 }, { "epoch": 0.2540598051293538, "grad_norm": 1.1767939329147339, "learning_rate": 2.267e-05, "loss": 0.434, "step": 4537 }, { "epoch": 0.2541158024414828, "grad_norm": 1.0303541421890259, "learning_rate": 2.2675000000000002e-05, "loss": 0.3804, "step": 4538 }, { "epoch": 0.2541717997536118, "grad_norm": 1.2321490049362183, "learning_rate": 2.268e-05, "loss": 0.4092, "step": 4539 }, { "epoch": 0.25422779706574083, "grad_norm": 1.1032359600067139, "learning_rate": 2.2685e-05, "loss": 0.4038, "step": 4540 }, { "epoch": 0.25428379437786985, "grad_norm": 1.090529441833496, "learning_rate": 2.269e-05, "loss": 0.4244, "step": 4541 }, { "epoch": 0.2543397916899989, "grad_norm": 1.335461139678955, "learning_rate": 2.2695000000000002e-05, "loss": 0.5703, "step": 4542 }, { "epoch": 0.2543957890021279, "grad_norm": 1.1057159900665283, "learning_rate": 2.2700000000000003e-05, "loss": 0.3951, "step": 4543 }, { "epoch": 0.2544517863142569, "grad_norm": 1.1145004034042358, "learning_rate": 2.2705e-05, "loss": 0.3985, "step": 4544 }, { "epoch": 0.25450778362638593, "grad_norm": 1.1383142471313477, "learning_rate": 2.271e-05, "loss": 0.4392, "step": 4545 }, { "epoch": 0.25456378093851495, "grad_norm": 1.4747370481491089, "learning_rate": 2.2715e-05, "loss": 0.4301, "step": 4546 }, { "epoch": 0.25461977825064397, "grad_norm": 1.2272024154663086, "learning_rate": 2.2720000000000003e-05, "loss": 0.3965, "step": 4547 }, { "epoch": 0.254675775562773, "grad_norm": 1.4545232057571411, "learning_rate": 2.2725000000000003e-05, "loss": 0.4497, "step": 4548 }, { "epoch": 0.254731772874902, "grad_norm": 1.2840518951416016, "learning_rate": 2.273e-05, "loss": 0.3729, "step": 4549 }, { "epoch": 0.254787770187031, "grad_norm": 1.1751893758773804, "learning_rate": 2.2735000000000002e-05, "loss": 0.3626, "step": 4550 }, { "epoch": 0.25484376749916005, "grad_norm": 1.1711511611938477, "learning_rate": 2.274e-05, "loss": 0.3928, "step": 4551 }, { "epoch": 0.25489976481128906, "grad_norm": 1.4283241033554077, "learning_rate": 2.2745000000000003e-05, "loss": 0.4136, "step": 4552 }, { "epoch": 0.2549557621234181, "grad_norm": 1.297680139541626, "learning_rate": 2.275e-05, "loss": 0.4825, "step": 4553 }, { "epoch": 0.2550117594355471, "grad_norm": 1.3692355155944824, "learning_rate": 2.2755e-05, "loss": 0.4102, "step": 4554 }, { "epoch": 0.2550677567476761, "grad_norm": 1.418385624885559, "learning_rate": 2.2760000000000002e-05, "loss": 0.5126, "step": 4555 }, { "epoch": 0.25512375405980514, "grad_norm": 1.3031096458435059, "learning_rate": 2.2765e-05, "loss": 0.5565, "step": 4556 }, { "epoch": 0.25517975137193416, "grad_norm": 1.1401340961456299, "learning_rate": 2.2770000000000004e-05, "loss": 0.4568, "step": 4557 }, { "epoch": 0.2552357486840632, "grad_norm": 1.2938733100891113, "learning_rate": 2.2775e-05, "loss": 0.4326, "step": 4558 }, { "epoch": 0.2552917459961922, "grad_norm": 1.1616764068603516, "learning_rate": 2.2780000000000002e-05, "loss": 0.36, "step": 4559 }, { "epoch": 0.2553477433083212, "grad_norm": 1.2217586040496826, "learning_rate": 2.2785e-05, "loss": 0.5322, "step": 4560 }, { "epoch": 0.25540374062045024, "grad_norm": 1.309910774230957, "learning_rate": 2.279e-05, "loss": 0.6896, "step": 4561 }, { "epoch": 0.25545973793257926, "grad_norm": 1.2356170415878296, "learning_rate": 2.2795e-05, "loss": 0.4242, "step": 4562 }, { "epoch": 0.2555157352447083, "grad_norm": 1.4275574684143066, "learning_rate": 2.2800000000000002e-05, "loss": 0.5966, "step": 4563 }, { "epoch": 0.2555717325568373, "grad_norm": 1.1685105562210083, "learning_rate": 2.2805000000000003e-05, "loss": 0.3881, "step": 4564 }, { "epoch": 0.2556277298689663, "grad_norm": 1.184205174446106, "learning_rate": 2.281e-05, "loss": 0.5306, "step": 4565 }, { "epoch": 0.25568372718109533, "grad_norm": 1.4231535196304321, "learning_rate": 2.2815e-05, "loss": 0.3465, "step": 4566 }, { "epoch": 0.25573972449322435, "grad_norm": 1.3884516954421997, "learning_rate": 2.282e-05, "loss": 0.4995, "step": 4567 }, { "epoch": 0.2557957218053533, "grad_norm": 1.2389317750930786, "learning_rate": 2.2825000000000003e-05, "loss": 0.3583, "step": 4568 }, { "epoch": 0.25585171911748233, "grad_norm": 1.3554555177688599, "learning_rate": 2.283e-05, "loss": 0.4887, "step": 4569 }, { "epoch": 0.25590771642961135, "grad_norm": 1.494165301322937, "learning_rate": 2.2835e-05, "loss": 0.44, "step": 4570 }, { "epoch": 0.2559637137417404, "grad_norm": 1.0958404541015625, "learning_rate": 2.284e-05, "loss": 0.499, "step": 4571 }, { "epoch": 0.2560197110538694, "grad_norm": 1.5103360414505005, "learning_rate": 2.2845e-05, "loss": 0.5023, "step": 4572 }, { "epoch": 0.2560757083659984, "grad_norm": 1.204907774925232, "learning_rate": 2.2850000000000003e-05, "loss": 0.4267, "step": 4573 }, { "epoch": 0.25613170567812743, "grad_norm": 1.4339368343353271, "learning_rate": 2.2855e-05, "loss": 0.5432, "step": 4574 }, { "epoch": 0.25618770299025645, "grad_norm": 1.1898574829101562, "learning_rate": 2.286e-05, "loss": 0.3848, "step": 4575 }, { "epoch": 0.25624370030238547, "grad_norm": 1.053621530532837, "learning_rate": 2.2865e-05, "loss": 0.3681, "step": 4576 }, { "epoch": 0.2562996976145145, "grad_norm": 1.312021017074585, "learning_rate": 2.287e-05, "loss": 0.464, "step": 4577 }, { "epoch": 0.2563556949266435, "grad_norm": 1.321693778038025, "learning_rate": 2.2875e-05, "loss": 0.4943, "step": 4578 }, { "epoch": 0.2564116922387725, "grad_norm": 1.1725354194641113, "learning_rate": 2.288e-05, "loss": 0.3536, "step": 4579 }, { "epoch": 0.25646768955090155, "grad_norm": 1.213531732559204, "learning_rate": 2.2885000000000002e-05, "loss": 0.3764, "step": 4580 }, { "epoch": 0.25652368686303056, "grad_norm": 1.1212034225463867, "learning_rate": 2.289e-05, "loss": 0.5052, "step": 4581 }, { "epoch": 0.2565796841751596, "grad_norm": 1.2362000942230225, "learning_rate": 2.2895e-05, "loss": 0.4205, "step": 4582 }, { "epoch": 0.2566356814872886, "grad_norm": 1.1426849365234375, "learning_rate": 2.29e-05, "loss": 0.4204, "step": 4583 }, { "epoch": 0.2566916787994176, "grad_norm": 1.246262788772583, "learning_rate": 2.2905000000000002e-05, "loss": 0.3154, "step": 4584 }, { "epoch": 0.25674767611154664, "grad_norm": 1.2284398078918457, "learning_rate": 2.2910000000000003e-05, "loss": 0.4645, "step": 4585 }, { "epoch": 0.25680367342367566, "grad_norm": 1.2259362936019897, "learning_rate": 2.2915e-05, "loss": 0.4662, "step": 4586 }, { "epoch": 0.2568596707358047, "grad_norm": 1.1868433952331543, "learning_rate": 2.292e-05, "loss": 0.4249, "step": 4587 }, { "epoch": 0.2569156680479337, "grad_norm": 1.2697373628616333, "learning_rate": 2.2925e-05, "loss": 0.4276, "step": 4588 }, { "epoch": 0.2569716653600627, "grad_norm": 1.3835608959197998, "learning_rate": 2.2930000000000002e-05, "loss": 0.6027, "step": 4589 }, { "epoch": 0.25702766267219174, "grad_norm": 1.2019926309585571, "learning_rate": 2.2935e-05, "loss": 0.4143, "step": 4590 }, { "epoch": 0.25708365998432076, "grad_norm": 1.1837432384490967, "learning_rate": 2.294e-05, "loss": 0.4683, "step": 4591 }, { "epoch": 0.2571396572964498, "grad_norm": 1.3229570388793945, "learning_rate": 2.2945e-05, "loss": 0.6599, "step": 4592 }, { "epoch": 0.2571956546085788, "grad_norm": 1.2265925407409668, "learning_rate": 2.2950000000000002e-05, "loss": 0.4594, "step": 4593 }, { "epoch": 0.2572516519207078, "grad_norm": 1.4257705211639404, "learning_rate": 2.2955000000000003e-05, "loss": 0.3496, "step": 4594 }, { "epoch": 0.25730764923283683, "grad_norm": 1.018993854522705, "learning_rate": 2.296e-05, "loss": 0.4226, "step": 4595 }, { "epoch": 0.25736364654496585, "grad_norm": 1.0288788080215454, "learning_rate": 2.2965e-05, "loss": 0.3671, "step": 4596 }, { "epoch": 0.25741964385709487, "grad_norm": 1.3952146768569946, "learning_rate": 2.297e-05, "loss": 0.3726, "step": 4597 }, { "epoch": 0.2574756411692239, "grad_norm": 1.4173377752304077, "learning_rate": 2.2975000000000003e-05, "loss": 0.549, "step": 4598 }, { "epoch": 0.2575316384813529, "grad_norm": 1.2097254991531372, "learning_rate": 2.298e-05, "loss": 0.5329, "step": 4599 }, { "epoch": 0.25758763579348193, "grad_norm": 1.0523312091827393, "learning_rate": 2.2985e-05, "loss": 0.4327, "step": 4600 }, { "epoch": 0.25764363310561095, "grad_norm": 1.171477198600769, "learning_rate": 2.2990000000000002e-05, "loss": 0.3262, "step": 4601 }, { "epoch": 0.25769963041773997, "grad_norm": 1.3108364343643188, "learning_rate": 2.2995e-05, "loss": 0.4516, "step": 4602 }, { "epoch": 0.257755627729869, "grad_norm": 1.0478096008300781, "learning_rate": 2.3000000000000003e-05, "loss": 0.3064, "step": 4603 }, { "epoch": 0.257811625041998, "grad_norm": 1.145826816558838, "learning_rate": 2.3005e-05, "loss": 0.4085, "step": 4604 }, { "epoch": 0.257867622354127, "grad_norm": 1.2956432104110718, "learning_rate": 2.301e-05, "loss": 0.4424, "step": 4605 }, { "epoch": 0.25792361966625604, "grad_norm": 1.2124707698822021, "learning_rate": 2.3015e-05, "loss": 0.4248, "step": 4606 }, { "epoch": 0.25797961697838506, "grad_norm": 1.0245577096939087, "learning_rate": 2.302e-05, "loss": 0.4546, "step": 4607 }, { "epoch": 0.2580356142905141, "grad_norm": 1.367613434791565, "learning_rate": 2.3025e-05, "loss": 0.4073, "step": 4608 }, { "epoch": 0.2580916116026431, "grad_norm": 1.5224664211273193, "learning_rate": 2.303e-05, "loss": 0.4822, "step": 4609 }, { "epoch": 0.25814760891477206, "grad_norm": 1.2754371166229248, "learning_rate": 2.3035000000000002e-05, "loss": 0.5098, "step": 4610 }, { "epoch": 0.2582036062269011, "grad_norm": 1.6209826469421387, "learning_rate": 2.304e-05, "loss": 0.4425, "step": 4611 }, { "epoch": 0.2582596035390301, "grad_norm": 1.2482712268829346, "learning_rate": 2.3045e-05, "loss": 0.5628, "step": 4612 }, { "epoch": 0.2583156008511591, "grad_norm": 1.3711938858032227, "learning_rate": 2.305e-05, "loss": 0.4381, "step": 4613 }, { "epoch": 0.25837159816328814, "grad_norm": 1.0197200775146484, "learning_rate": 2.3055000000000002e-05, "loss": 0.3743, "step": 4614 }, { "epoch": 0.25842759547541716, "grad_norm": 1.2958955764770508, "learning_rate": 2.306e-05, "loss": 0.386, "step": 4615 }, { "epoch": 0.2584835927875462, "grad_norm": 1.3102233409881592, "learning_rate": 2.3065e-05, "loss": 0.5122, "step": 4616 }, { "epoch": 0.2585395900996752, "grad_norm": 1.064424991607666, "learning_rate": 2.307e-05, "loss": 0.4189, "step": 4617 }, { "epoch": 0.2585955874118042, "grad_norm": 1.2254605293273926, "learning_rate": 2.3075000000000002e-05, "loss": 0.4455, "step": 4618 }, { "epoch": 0.25865158472393324, "grad_norm": 1.271575927734375, "learning_rate": 2.3080000000000003e-05, "loss": 0.4599, "step": 4619 }, { "epoch": 0.25870758203606226, "grad_norm": 1.0416302680969238, "learning_rate": 2.3085e-05, "loss": 0.4125, "step": 4620 }, { "epoch": 0.2587635793481913, "grad_norm": 1.469797134399414, "learning_rate": 2.309e-05, "loss": 0.4717, "step": 4621 }, { "epoch": 0.2588195766603203, "grad_norm": 1.3082259893417358, "learning_rate": 2.3095e-05, "loss": 0.4036, "step": 4622 }, { "epoch": 0.2588755739724493, "grad_norm": 1.3652042150497437, "learning_rate": 2.3100000000000002e-05, "loss": 0.5413, "step": 4623 }, { "epoch": 0.25893157128457833, "grad_norm": 1.3426355123519897, "learning_rate": 2.3105000000000003e-05, "loss": 0.5378, "step": 4624 }, { "epoch": 0.25898756859670735, "grad_norm": 1.111953854560852, "learning_rate": 2.311e-05, "loss": 0.2973, "step": 4625 }, { "epoch": 0.25904356590883637, "grad_norm": 1.1020268201828003, "learning_rate": 2.3115e-05, "loss": 0.4528, "step": 4626 }, { "epoch": 0.2590995632209654, "grad_norm": 1.0999373197555542, "learning_rate": 2.312e-05, "loss": 0.3916, "step": 4627 }, { "epoch": 0.2591555605330944, "grad_norm": 1.2418850660324097, "learning_rate": 2.3125000000000003e-05, "loss": 0.483, "step": 4628 }, { "epoch": 0.25921155784522343, "grad_norm": 1.1667850017547607, "learning_rate": 2.313e-05, "loss": 0.4024, "step": 4629 }, { "epoch": 0.25926755515735245, "grad_norm": 1.1083531379699707, "learning_rate": 2.3135e-05, "loss": 0.4499, "step": 4630 }, { "epoch": 0.25932355246948147, "grad_norm": 1.260475516319275, "learning_rate": 2.3140000000000002e-05, "loss": 0.5076, "step": 4631 }, { "epoch": 0.2593795497816105, "grad_norm": 1.1606007814407349, "learning_rate": 2.3145e-05, "loss": 0.4015, "step": 4632 }, { "epoch": 0.2594355470937395, "grad_norm": 1.4891753196716309, "learning_rate": 2.3150000000000004e-05, "loss": 0.5287, "step": 4633 }, { "epoch": 0.2594915444058685, "grad_norm": 1.2093613147735596, "learning_rate": 2.3155e-05, "loss": 0.5178, "step": 4634 }, { "epoch": 0.25954754171799754, "grad_norm": 1.1132348775863647, "learning_rate": 2.3160000000000002e-05, "loss": 0.5762, "step": 4635 }, { "epoch": 0.25960353903012656, "grad_norm": 1.8130476474761963, "learning_rate": 2.3165e-05, "loss": 0.553, "step": 4636 }, { "epoch": 0.2596595363422556, "grad_norm": 1.2123457193374634, "learning_rate": 2.317e-05, "loss": 0.6021, "step": 4637 }, { "epoch": 0.2597155336543846, "grad_norm": 1.457024335861206, "learning_rate": 2.3175e-05, "loss": 0.5136, "step": 4638 }, { "epoch": 0.2597715309665136, "grad_norm": 1.319503664970398, "learning_rate": 2.318e-05, "loss": 0.4162, "step": 4639 }, { "epoch": 0.25982752827864264, "grad_norm": 1.2477689981460571, "learning_rate": 2.3185000000000002e-05, "loss": 0.4707, "step": 4640 }, { "epoch": 0.25988352559077166, "grad_norm": 1.1592180728912354, "learning_rate": 2.319e-05, "loss": 0.2992, "step": 4641 }, { "epoch": 0.2599395229029007, "grad_norm": 1.3189729452133179, "learning_rate": 2.3195e-05, "loss": 0.52, "step": 4642 }, { "epoch": 0.2599955202150297, "grad_norm": 1.237096905708313, "learning_rate": 2.32e-05, "loss": 0.6822, "step": 4643 }, { "epoch": 0.2600515175271587, "grad_norm": 1.4841668605804443, "learning_rate": 2.3205000000000002e-05, "loss": 0.4308, "step": 4644 }, { "epoch": 0.26010751483928773, "grad_norm": 1.2012847661972046, "learning_rate": 2.321e-05, "loss": 0.5243, "step": 4645 }, { "epoch": 0.26016351215141675, "grad_norm": 1.1921275854110718, "learning_rate": 2.3215e-05, "loss": 0.4918, "step": 4646 }, { "epoch": 0.2602195094635458, "grad_norm": 1.1845691204071045, "learning_rate": 2.322e-05, "loss": 0.384, "step": 4647 }, { "epoch": 0.2602755067756748, "grad_norm": 0.9477851986885071, "learning_rate": 2.3225000000000002e-05, "loss": 0.3095, "step": 4648 }, { "epoch": 0.2603315040878038, "grad_norm": 1.0472500324249268, "learning_rate": 2.3230000000000003e-05, "loss": 0.349, "step": 4649 }, { "epoch": 0.26038750139993283, "grad_norm": 1.1473208665847778, "learning_rate": 2.3235e-05, "loss": 0.3444, "step": 4650 }, { "epoch": 0.2604434987120618, "grad_norm": 1.0765466690063477, "learning_rate": 2.324e-05, "loss": 0.356, "step": 4651 }, { "epoch": 0.2604994960241908, "grad_norm": 1.3327019214630127, "learning_rate": 2.3245e-05, "loss": 0.4309, "step": 4652 }, { "epoch": 0.26055549333631983, "grad_norm": 1.0411831140518188, "learning_rate": 2.3250000000000003e-05, "loss": 0.4155, "step": 4653 }, { "epoch": 0.26061149064844885, "grad_norm": 1.1315257549285889, "learning_rate": 2.3255e-05, "loss": 0.3979, "step": 4654 }, { "epoch": 0.26066748796057787, "grad_norm": 1.1296149492263794, "learning_rate": 2.326e-05, "loss": 0.3826, "step": 4655 }, { "epoch": 0.2607234852727069, "grad_norm": 1.3033127784729004, "learning_rate": 2.3265000000000002e-05, "loss": 0.4008, "step": 4656 }, { "epoch": 0.2607794825848359, "grad_norm": 1.210599660873413, "learning_rate": 2.327e-05, "loss": 0.5096, "step": 4657 }, { "epoch": 0.26083547989696493, "grad_norm": 1.4044979810714722, "learning_rate": 2.3275000000000003e-05, "loss": 0.4934, "step": 4658 }, { "epoch": 0.26089147720909395, "grad_norm": 4.5497307777404785, "learning_rate": 2.328e-05, "loss": 0.3402, "step": 4659 }, { "epoch": 0.26094747452122297, "grad_norm": 1.3142727613449097, "learning_rate": 2.3285e-05, "loss": 0.397, "step": 4660 }, { "epoch": 0.261003471833352, "grad_norm": 1.4795187711715698, "learning_rate": 2.3290000000000002e-05, "loss": 0.4657, "step": 4661 }, { "epoch": 0.261059469145481, "grad_norm": 1.0734316110610962, "learning_rate": 2.3295e-05, "loss": 0.4536, "step": 4662 }, { "epoch": 0.26111546645761, "grad_norm": 1.2687352895736694, "learning_rate": 2.3300000000000004e-05, "loss": 0.4132, "step": 4663 }, { "epoch": 0.26117146376973904, "grad_norm": 1.1907485723495483, "learning_rate": 2.3305e-05, "loss": 0.5572, "step": 4664 }, { "epoch": 0.26122746108186806, "grad_norm": 1.0302767753601074, "learning_rate": 2.3310000000000002e-05, "loss": 0.3827, "step": 4665 }, { "epoch": 0.2612834583939971, "grad_norm": 2.662245750427246, "learning_rate": 2.3315e-05, "loss": 0.3729, "step": 4666 }, { "epoch": 0.2613394557061261, "grad_norm": 1.0700167417526245, "learning_rate": 2.332e-05, "loss": 0.5333, "step": 4667 }, { "epoch": 0.2613954530182551, "grad_norm": 1.3027064800262451, "learning_rate": 2.3325e-05, "loss": 0.449, "step": 4668 }, { "epoch": 0.26145145033038414, "grad_norm": 1.1364097595214844, "learning_rate": 2.3330000000000002e-05, "loss": 0.3691, "step": 4669 }, { "epoch": 0.26150744764251316, "grad_norm": 1.3689920902252197, "learning_rate": 2.3335000000000003e-05, "loss": 0.5063, "step": 4670 }, { "epoch": 0.2615634449546422, "grad_norm": 1.2601542472839355, "learning_rate": 2.334e-05, "loss": 0.48, "step": 4671 }, { "epoch": 0.2616194422667712, "grad_norm": 1.0073635578155518, "learning_rate": 2.3345e-05, "loss": 0.3379, "step": 4672 }, { "epoch": 0.2616754395789002, "grad_norm": 1.1468099355697632, "learning_rate": 2.3350000000000002e-05, "loss": 0.3992, "step": 4673 }, { "epoch": 0.26173143689102923, "grad_norm": 1.1332155466079712, "learning_rate": 2.3355000000000003e-05, "loss": 0.4549, "step": 4674 }, { "epoch": 0.26178743420315825, "grad_norm": 4.049678325653076, "learning_rate": 2.336e-05, "loss": 0.4229, "step": 4675 }, { "epoch": 0.2618434315152873, "grad_norm": 0.9749440550804138, "learning_rate": 2.3365e-05, "loss": 0.3084, "step": 4676 }, { "epoch": 0.2618994288274163, "grad_norm": 1.0869070291519165, "learning_rate": 2.337e-05, "loss": 0.4879, "step": 4677 }, { "epoch": 0.2619554261395453, "grad_norm": 1.350502848625183, "learning_rate": 2.3375000000000002e-05, "loss": 0.483, "step": 4678 }, { "epoch": 0.26201142345167433, "grad_norm": 1.2524774074554443, "learning_rate": 2.3380000000000003e-05, "loss": 0.4149, "step": 4679 }, { "epoch": 0.26206742076380335, "grad_norm": 1.0579578876495361, "learning_rate": 2.3385e-05, "loss": 0.3859, "step": 4680 }, { "epoch": 0.26212341807593237, "grad_norm": 1.2938880920410156, "learning_rate": 2.339e-05, "loss": 0.4728, "step": 4681 }, { "epoch": 0.2621794153880614, "grad_norm": 1.2401838302612305, "learning_rate": 2.3395e-05, "loss": 0.4062, "step": 4682 }, { "epoch": 0.2622354127001904, "grad_norm": 1.2220298051834106, "learning_rate": 2.3400000000000003e-05, "loss": 0.3831, "step": 4683 }, { "epoch": 0.2622914100123194, "grad_norm": 1.1159085035324097, "learning_rate": 2.3405e-05, "loss": 0.4694, "step": 4684 }, { "epoch": 0.26234740732444845, "grad_norm": 1.4172265529632568, "learning_rate": 2.341e-05, "loss": 0.376, "step": 4685 }, { "epoch": 0.26240340463657746, "grad_norm": 1.4832614660263062, "learning_rate": 2.3415000000000002e-05, "loss": 0.4544, "step": 4686 }, { "epoch": 0.2624594019487065, "grad_norm": 1.2928845882415771, "learning_rate": 2.342e-05, "loss": 0.5758, "step": 4687 }, { "epoch": 0.2625153992608355, "grad_norm": 1.3649362325668335, "learning_rate": 2.3425000000000004e-05, "loss": 0.435, "step": 4688 }, { "epoch": 0.2625713965729645, "grad_norm": 1.32871413230896, "learning_rate": 2.343e-05, "loss": 0.4684, "step": 4689 }, { "epoch": 0.26262739388509354, "grad_norm": 1.632665753364563, "learning_rate": 2.3435000000000002e-05, "loss": 0.5861, "step": 4690 }, { "epoch": 0.26268339119722256, "grad_norm": 1.1872894763946533, "learning_rate": 2.344e-05, "loss": 0.4616, "step": 4691 }, { "epoch": 0.2627393885093515, "grad_norm": 1.101766586303711, "learning_rate": 2.3445e-05, "loss": 0.3728, "step": 4692 }, { "epoch": 0.26279538582148054, "grad_norm": 1.2853742837905884, "learning_rate": 2.345e-05, "loss": 0.4049, "step": 4693 }, { "epoch": 0.26285138313360956, "grad_norm": 1.1566184759140015, "learning_rate": 2.3455e-05, "loss": 0.3942, "step": 4694 }, { "epoch": 0.2629073804457386, "grad_norm": 1.2635637521743774, "learning_rate": 2.3460000000000002e-05, "loss": 0.5816, "step": 4695 }, { "epoch": 0.2629633777578676, "grad_norm": 1.4154095649719238, "learning_rate": 2.3465e-05, "loss": 0.4768, "step": 4696 }, { "epoch": 0.2630193750699966, "grad_norm": 1.0704032182693481, "learning_rate": 2.347e-05, "loss": 0.3733, "step": 4697 }, { "epoch": 0.26307537238212564, "grad_norm": 1.040293574333191, "learning_rate": 2.3475e-05, "loss": 0.4653, "step": 4698 }, { "epoch": 0.26313136969425466, "grad_norm": 1.2215741872787476, "learning_rate": 2.3480000000000002e-05, "loss": 0.3926, "step": 4699 }, { "epoch": 0.2631873670063837, "grad_norm": 1.3420178890228271, "learning_rate": 2.3485000000000003e-05, "loss": 0.5665, "step": 4700 }, { "epoch": 0.2632433643185127, "grad_norm": 1.307134985923767, "learning_rate": 2.349e-05, "loss": 0.3772, "step": 4701 }, { "epoch": 0.2632993616306417, "grad_norm": 1.4651503562927246, "learning_rate": 2.3495e-05, "loss": 0.5187, "step": 4702 }, { "epoch": 0.26335535894277073, "grad_norm": 1.3955353498458862, "learning_rate": 2.35e-05, "loss": 0.3472, "step": 4703 }, { "epoch": 0.26341135625489975, "grad_norm": 1.3448361158370972, "learning_rate": 2.3505000000000003e-05, "loss": 0.5255, "step": 4704 }, { "epoch": 0.2634673535670288, "grad_norm": 1.1564198732376099, "learning_rate": 2.351e-05, "loss": 0.4308, "step": 4705 }, { "epoch": 0.2635233508791578, "grad_norm": 1.274255633354187, "learning_rate": 2.3515e-05, "loss": 0.4997, "step": 4706 }, { "epoch": 0.2635793481912868, "grad_norm": 1.2160266637802124, "learning_rate": 2.3520000000000002e-05, "loss": 0.4688, "step": 4707 }, { "epoch": 0.26363534550341583, "grad_norm": 1.0255680084228516, "learning_rate": 2.3525e-05, "loss": 0.291, "step": 4708 }, { "epoch": 0.26369134281554485, "grad_norm": 1.1800979375839233, "learning_rate": 2.3530000000000003e-05, "loss": 0.4176, "step": 4709 }, { "epoch": 0.26374734012767387, "grad_norm": 1.404802918434143, "learning_rate": 2.3535e-05, "loss": 0.4543, "step": 4710 }, { "epoch": 0.2638033374398029, "grad_norm": 3.526089668273926, "learning_rate": 2.354e-05, "loss": 0.4589, "step": 4711 }, { "epoch": 0.2638593347519319, "grad_norm": 1.169753909111023, "learning_rate": 2.3545e-05, "loss": 0.3912, "step": 4712 }, { "epoch": 0.2639153320640609, "grad_norm": 1.3709254264831543, "learning_rate": 2.355e-05, "loss": 0.5337, "step": 4713 }, { "epoch": 0.26397132937618994, "grad_norm": 1.6002558469772339, "learning_rate": 2.3555e-05, "loss": 0.3886, "step": 4714 }, { "epoch": 0.26402732668831896, "grad_norm": 1.47130286693573, "learning_rate": 2.356e-05, "loss": 0.4143, "step": 4715 }, { "epoch": 0.264083324000448, "grad_norm": 1.2650200128555298, "learning_rate": 2.3565000000000002e-05, "loss": 0.4887, "step": 4716 }, { "epoch": 0.264139321312577, "grad_norm": 1.438278317451477, "learning_rate": 2.357e-05, "loss": 0.5166, "step": 4717 }, { "epoch": 0.264195318624706, "grad_norm": 1.1774498224258423, "learning_rate": 2.3575e-05, "loss": 0.3873, "step": 4718 }, { "epoch": 0.26425131593683504, "grad_norm": 1.2495230436325073, "learning_rate": 2.358e-05, "loss": 0.4097, "step": 4719 }, { "epoch": 0.26430731324896406, "grad_norm": 1.2106897830963135, "learning_rate": 2.3585000000000002e-05, "loss": 0.4468, "step": 4720 }, { "epoch": 0.2643633105610931, "grad_norm": 1.0755109786987305, "learning_rate": 2.359e-05, "loss": 0.4026, "step": 4721 }, { "epoch": 0.2644193078732221, "grad_norm": 1.3256511688232422, "learning_rate": 2.3595e-05, "loss": 0.4691, "step": 4722 }, { "epoch": 0.2644753051853511, "grad_norm": 1.1157100200653076, "learning_rate": 2.36e-05, "loss": 0.4084, "step": 4723 }, { "epoch": 0.26453130249748014, "grad_norm": 1.227482795715332, "learning_rate": 2.3605000000000002e-05, "loss": 0.463, "step": 4724 }, { "epoch": 0.26458729980960916, "grad_norm": 1.2092044353485107, "learning_rate": 2.3610000000000003e-05, "loss": 0.4472, "step": 4725 }, { "epoch": 0.2646432971217382, "grad_norm": 1.1431828737258911, "learning_rate": 2.3615e-05, "loss": 0.3422, "step": 4726 }, { "epoch": 0.2646992944338672, "grad_norm": 1.1278163194656372, "learning_rate": 2.362e-05, "loss": 0.3907, "step": 4727 }, { "epoch": 0.2647552917459962, "grad_norm": 2.944944381713867, "learning_rate": 2.3624999999999998e-05, "loss": 0.3977, "step": 4728 }, { "epoch": 0.26481128905812523, "grad_norm": 1.0838154554367065, "learning_rate": 2.3630000000000002e-05, "loss": 0.4222, "step": 4729 }, { "epoch": 0.26486728637025425, "grad_norm": 1.3145902156829834, "learning_rate": 2.3635000000000003e-05, "loss": 0.4228, "step": 4730 }, { "epoch": 0.26492328368238327, "grad_norm": 1.237244725227356, "learning_rate": 2.364e-05, "loss": 0.496, "step": 4731 }, { "epoch": 0.2649792809945123, "grad_norm": 1.3676848411560059, "learning_rate": 2.3645e-05, "loss": 0.4149, "step": 4732 }, { "epoch": 0.2650352783066413, "grad_norm": 1.361025333404541, "learning_rate": 2.365e-05, "loss": 0.3989, "step": 4733 }, { "epoch": 0.2650912756187703, "grad_norm": 1.2569867372512817, "learning_rate": 2.3655000000000003e-05, "loss": 0.4032, "step": 4734 }, { "epoch": 0.2651472729308993, "grad_norm": 1.121619701385498, "learning_rate": 2.366e-05, "loss": 0.4134, "step": 4735 }, { "epoch": 0.2652032702430283, "grad_norm": 1.3902980089187622, "learning_rate": 2.3665e-05, "loss": 0.4361, "step": 4736 }, { "epoch": 0.26525926755515733, "grad_norm": 1.2956088781356812, "learning_rate": 2.3670000000000002e-05, "loss": 0.5531, "step": 4737 }, { "epoch": 0.26531526486728635, "grad_norm": 1.6822242736816406, "learning_rate": 2.3675e-05, "loss": 0.5115, "step": 4738 }, { "epoch": 0.26537126217941537, "grad_norm": 1.040383219718933, "learning_rate": 2.3680000000000004e-05, "loss": 0.4566, "step": 4739 }, { "epoch": 0.2654272594915444, "grad_norm": 1.2056008577346802, "learning_rate": 2.3685e-05, "loss": 0.3719, "step": 4740 }, { "epoch": 0.2654832568036734, "grad_norm": 1.303937554359436, "learning_rate": 2.3690000000000002e-05, "loss": 0.4788, "step": 4741 }, { "epoch": 0.2655392541158024, "grad_norm": 1.337628960609436, "learning_rate": 2.3695e-05, "loss": 0.45, "step": 4742 }, { "epoch": 0.26559525142793144, "grad_norm": 1.2148423194885254, "learning_rate": 2.37e-05, "loss": 0.448, "step": 4743 }, { "epoch": 0.26565124874006046, "grad_norm": 1.4554351568222046, "learning_rate": 2.3705e-05, "loss": 0.3779, "step": 4744 }, { "epoch": 0.2657072460521895, "grad_norm": 1.3510658740997314, "learning_rate": 2.371e-05, "loss": 0.4273, "step": 4745 }, { "epoch": 0.2657632433643185, "grad_norm": 1.185594081878662, "learning_rate": 2.3715000000000002e-05, "loss": 0.5057, "step": 4746 }, { "epoch": 0.2658192406764475, "grad_norm": 1.1551557779312134, "learning_rate": 2.372e-05, "loss": 0.3967, "step": 4747 }, { "epoch": 0.26587523798857654, "grad_norm": 1.1626313924789429, "learning_rate": 2.3725e-05, "loss": 0.423, "step": 4748 }, { "epoch": 0.26593123530070556, "grad_norm": 1.1862657070159912, "learning_rate": 2.373e-05, "loss": 0.4447, "step": 4749 }, { "epoch": 0.2659872326128346, "grad_norm": 0.9451472163200378, "learning_rate": 2.3735000000000002e-05, "loss": 0.3167, "step": 4750 }, { "epoch": 0.2660432299249636, "grad_norm": 1.0458323955535889, "learning_rate": 2.374e-05, "loss": 0.5032, "step": 4751 }, { "epoch": 0.2660992272370926, "grad_norm": 1.0098243951797485, "learning_rate": 2.3745e-05, "loss": 0.399, "step": 4752 }, { "epoch": 0.26615522454922164, "grad_norm": 1.2983150482177734, "learning_rate": 2.375e-05, "loss": 0.4968, "step": 4753 }, { "epoch": 0.26621122186135066, "grad_norm": 1.238157868385315, "learning_rate": 2.3755000000000002e-05, "loss": 0.4248, "step": 4754 }, { "epoch": 0.2662672191734797, "grad_norm": 1.2264575958251953, "learning_rate": 2.3760000000000003e-05, "loss": 0.4707, "step": 4755 }, { "epoch": 0.2663232164856087, "grad_norm": 1.0791478157043457, "learning_rate": 2.3765e-05, "loss": 0.3632, "step": 4756 }, { "epoch": 0.2663792137977377, "grad_norm": 1.2311959266662598, "learning_rate": 2.377e-05, "loss": 0.4551, "step": 4757 }, { "epoch": 0.26643521110986673, "grad_norm": 1.0826047658920288, "learning_rate": 2.3775e-05, "loss": 0.3833, "step": 4758 }, { "epoch": 0.26649120842199575, "grad_norm": 1.1972943544387817, "learning_rate": 2.3780000000000003e-05, "loss": 0.4986, "step": 4759 }, { "epoch": 0.26654720573412477, "grad_norm": 1.5173351764678955, "learning_rate": 2.3785e-05, "loss": 0.4581, "step": 4760 }, { "epoch": 0.2666032030462538, "grad_norm": 1.081595540046692, "learning_rate": 2.379e-05, "loss": 0.4176, "step": 4761 }, { "epoch": 0.2666592003583828, "grad_norm": 1.485235333442688, "learning_rate": 2.3795000000000002e-05, "loss": 0.4463, "step": 4762 }, { "epoch": 0.2667151976705118, "grad_norm": 1.043321132659912, "learning_rate": 2.38e-05, "loss": 0.3156, "step": 4763 }, { "epoch": 0.26677119498264085, "grad_norm": 1.2274316549301147, "learning_rate": 2.3805000000000003e-05, "loss": 0.6176, "step": 4764 }, { "epoch": 0.26682719229476987, "grad_norm": 1.2730541229248047, "learning_rate": 2.381e-05, "loss": 0.4126, "step": 4765 }, { "epoch": 0.2668831896068989, "grad_norm": 1.2899019718170166, "learning_rate": 2.3815e-05, "loss": 0.3958, "step": 4766 }, { "epoch": 0.2669391869190279, "grad_norm": 1.1440935134887695, "learning_rate": 2.3820000000000002e-05, "loss": 0.3716, "step": 4767 }, { "epoch": 0.2669951842311569, "grad_norm": 1.1878410577774048, "learning_rate": 2.3825e-05, "loss": 0.4676, "step": 4768 }, { "epoch": 0.26705118154328594, "grad_norm": 1.21403169631958, "learning_rate": 2.3830000000000004e-05, "loss": 0.4589, "step": 4769 }, { "epoch": 0.26710717885541496, "grad_norm": 1.1536064147949219, "learning_rate": 2.3835e-05, "loss": 0.4081, "step": 4770 }, { "epoch": 0.267163176167544, "grad_norm": 1.207689642906189, "learning_rate": 2.3840000000000002e-05, "loss": 0.6582, "step": 4771 }, { "epoch": 0.267219173479673, "grad_norm": 1.1608504056930542, "learning_rate": 2.3845e-05, "loss": 0.4031, "step": 4772 }, { "epoch": 0.267275170791802, "grad_norm": 1.3046009540557861, "learning_rate": 2.385e-05, "loss": 0.4721, "step": 4773 }, { "epoch": 0.26733116810393104, "grad_norm": 1.1998666524887085, "learning_rate": 2.3855e-05, "loss": 0.3622, "step": 4774 }, { "epoch": 0.26738716541606, "grad_norm": 6.5772223472595215, "learning_rate": 2.3860000000000002e-05, "loss": 0.4529, "step": 4775 }, { "epoch": 0.267443162728189, "grad_norm": 1.2209452390670776, "learning_rate": 2.3865000000000003e-05, "loss": 0.3514, "step": 4776 }, { "epoch": 0.26749916004031804, "grad_norm": 1.2021710872650146, "learning_rate": 2.387e-05, "loss": 0.4096, "step": 4777 }, { "epoch": 0.26755515735244706, "grad_norm": 1.1438333988189697, "learning_rate": 2.3875e-05, "loss": 0.3843, "step": 4778 }, { "epoch": 0.2676111546645761, "grad_norm": 1.3314814567565918, "learning_rate": 2.3880000000000002e-05, "loss": 0.4809, "step": 4779 }, { "epoch": 0.2676671519767051, "grad_norm": 1.1786727905273438, "learning_rate": 2.3885000000000003e-05, "loss": 0.3968, "step": 4780 }, { "epoch": 0.2677231492888341, "grad_norm": 1.0819017887115479, "learning_rate": 2.389e-05, "loss": 0.3484, "step": 4781 }, { "epoch": 0.26777914660096314, "grad_norm": 1.17559015750885, "learning_rate": 2.3895e-05, "loss": 0.4232, "step": 4782 }, { "epoch": 0.26783514391309216, "grad_norm": 1.4684512615203857, "learning_rate": 2.39e-05, "loss": 0.4547, "step": 4783 }, { "epoch": 0.2678911412252212, "grad_norm": 1.2338345050811768, "learning_rate": 2.3905000000000002e-05, "loss": 0.4311, "step": 4784 }, { "epoch": 0.2679471385373502, "grad_norm": 1.3646914958953857, "learning_rate": 2.3910000000000003e-05, "loss": 0.5086, "step": 4785 }, { "epoch": 0.2680031358494792, "grad_norm": 1.2001941204071045, "learning_rate": 2.3915e-05, "loss": 0.385, "step": 4786 }, { "epoch": 0.26805913316160823, "grad_norm": 1.1692379713058472, "learning_rate": 2.392e-05, "loss": 0.6629, "step": 4787 }, { "epoch": 0.26811513047373725, "grad_norm": 1.214898943901062, "learning_rate": 2.3925e-05, "loss": 0.415, "step": 4788 }, { "epoch": 0.26817112778586627, "grad_norm": 1.2813019752502441, "learning_rate": 2.3930000000000003e-05, "loss": 0.4171, "step": 4789 }, { "epoch": 0.2682271250979953, "grad_norm": 1.1769672632217407, "learning_rate": 2.3935e-05, "loss": 0.3184, "step": 4790 }, { "epoch": 0.2682831224101243, "grad_norm": 1.01447594165802, "learning_rate": 2.394e-05, "loss": 0.3295, "step": 4791 }, { "epoch": 0.2683391197222533, "grad_norm": 1.1792190074920654, "learning_rate": 2.3945000000000002e-05, "loss": 0.3586, "step": 4792 }, { "epoch": 0.26839511703438235, "grad_norm": 1.2017855644226074, "learning_rate": 2.395e-05, "loss": 0.3741, "step": 4793 }, { "epoch": 0.26845111434651137, "grad_norm": 1.1356403827667236, "learning_rate": 2.3955000000000004e-05, "loss": 0.5755, "step": 4794 }, { "epoch": 0.2685071116586404, "grad_norm": 1.1810275316238403, "learning_rate": 2.396e-05, "loss": 0.5044, "step": 4795 }, { "epoch": 0.2685631089707694, "grad_norm": 1.492426872253418, "learning_rate": 2.3965000000000002e-05, "loss": 0.6112, "step": 4796 }, { "epoch": 0.2686191062828984, "grad_norm": 1.587020993232727, "learning_rate": 2.397e-05, "loss": 0.4583, "step": 4797 }, { "epoch": 0.26867510359502744, "grad_norm": 1.438361644744873, "learning_rate": 2.3975e-05, "loss": 0.4967, "step": 4798 }, { "epoch": 0.26873110090715646, "grad_norm": 1.1004462242126465, "learning_rate": 2.398e-05, "loss": 0.5319, "step": 4799 }, { "epoch": 0.2687870982192855, "grad_norm": 1.253659963607788, "learning_rate": 2.3985e-05, "loss": 0.4341, "step": 4800 }, { "epoch": 0.2688430955314145, "grad_norm": 1.118528962135315, "learning_rate": 2.3990000000000002e-05, "loss": 0.3381, "step": 4801 }, { "epoch": 0.2688990928435435, "grad_norm": 1.5777441263198853, "learning_rate": 2.3995e-05, "loss": 0.3385, "step": 4802 }, { "epoch": 0.26895509015567254, "grad_norm": 1.1764894723892212, "learning_rate": 2.4e-05, "loss": 0.4373, "step": 4803 }, { "epoch": 0.26901108746780156, "grad_norm": 1.0975189208984375, "learning_rate": 2.4005e-05, "loss": 0.3497, "step": 4804 }, { "epoch": 0.2690670847799306, "grad_norm": 1.1311278343200684, "learning_rate": 2.4010000000000002e-05, "loss": 0.3729, "step": 4805 }, { "epoch": 0.2691230820920596, "grad_norm": 1.2189937829971313, "learning_rate": 2.4015000000000003e-05, "loss": 0.5284, "step": 4806 }, { "epoch": 0.2691790794041886, "grad_norm": 2.796288251876831, "learning_rate": 2.402e-05, "loss": 0.3669, "step": 4807 }, { "epoch": 0.26923507671631763, "grad_norm": 1.243399977684021, "learning_rate": 2.4025e-05, "loss": 0.4037, "step": 4808 }, { "epoch": 0.26929107402844665, "grad_norm": 1.410094976425171, "learning_rate": 2.4030000000000002e-05, "loss": 0.5426, "step": 4809 }, { "epoch": 0.26934707134057567, "grad_norm": 1.1329196691513062, "learning_rate": 2.4035000000000003e-05, "loss": 0.3994, "step": 4810 }, { "epoch": 0.2694030686527047, "grad_norm": 1.3636788129806519, "learning_rate": 2.404e-05, "loss": 0.4161, "step": 4811 }, { "epoch": 0.2694590659648337, "grad_norm": 1.0871763229370117, "learning_rate": 2.4045e-05, "loss": 0.3582, "step": 4812 }, { "epoch": 0.26951506327696273, "grad_norm": 1.1364787817001343, "learning_rate": 2.4050000000000002e-05, "loss": 0.4485, "step": 4813 }, { "epoch": 0.26957106058909175, "grad_norm": 1.3407330513000488, "learning_rate": 2.4055000000000003e-05, "loss": 0.4873, "step": 4814 }, { "epoch": 0.26962705790122077, "grad_norm": 1.1668158769607544, "learning_rate": 2.4060000000000003e-05, "loss": 0.6243, "step": 4815 }, { "epoch": 0.26968305521334973, "grad_norm": 1.3728643655776978, "learning_rate": 2.4065e-05, "loss": 0.4324, "step": 4816 }, { "epoch": 0.26973905252547875, "grad_norm": 1.2123281955718994, "learning_rate": 2.407e-05, "loss": 0.4673, "step": 4817 }, { "epoch": 0.26979504983760777, "grad_norm": 1.2647933959960938, "learning_rate": 2.4075e-05, "loss": 0.355, "step": 4818 }, { "epoch": 0.2698510471497368, "grad_norm": 1.1391644477844238, "learning_rate": 2.408e-05, "loss": 0.4842, "step": 4819 }, { "epoch": 0.2699070444618658, "grad_norm": 1.1071332693099976, "learning_rate": 2.4085e-05, "loss": 0.3956, "step": 4820 }, { "epoch": 0.2699630417739948, "grad_norm": 1.0850937366485596, "learning_rate": 2.409e-05, "loss": 0.4921, "step": 4821 }, { "epoch": 0.27001903908612385, "grad_norm": 1.157414197921753, "learning_rate": 2.4095000000000002e-05, "loss": 0.4969, "step": 4822 }, { "epoch": 0.27007503639825287, "grad_norm": 1.525781273841858, "learning_rate": 2.41e-05, "loss": 0.4886, "step": 4823 }, { "epoch": 0.2701310337103819, "grad_norm": 1.0770370960235596, "learning_rate": 2.4105e-05, "loss": 0.4066, "step": 4824 }, { "epoch": 0.2701870310225109, "grad_norm": 1.1031999588012695, "learning_rate": 2.411e-05, "loss": 0.3887, "step": 4825 }, { "epoch": 0.2702430283346399, "grad_norm": 1.0829763412475586, "learning_rate": 2.4115000000000002e-05, "loss": 0.3964, "step": 4826 }, { "epoch": 0.27029902564676894, "grad_norm": 1.165955901145935, "learning_rate": 2.412e-05, "loss": 0.3823, "step": 4827 }, { "epoch": 0.27035502295889796, "grad_norm": 0.955703854560852, "learning_rate": 2.4125e-05, "loss": 0.3086, "step": 4828 }, { "epoch": 0.270411020271027, "grad_norm": 1.2926037311553955, "learning_rate": 2.413e-05, "loss": 0.4862, "step": 4829 }, { "epoch": 0.270467017583156, "grad_norm": 1.4200623035430908, "learning_rate": 2.4135000000000002e-05, "loss": 0.591, "step": 4830 }, { "epoch": 0.270523014895285, "grad_norm": 1.1878716945648193, "learning_rate": 2.4140000000000003e-05, "loss": 0.3831, "step": 4831 }, { "epoch": 0.27057901220741404, "grad_norm": 1.1649898290634155, "learning_rate": 2.4145e-05, "loss": 0.3933, "step": 4832 }, { "epoch": 0.27063500951954306, "grad_norm": 1.396077275276184, "learning_rate": 2.415e-05, "loss": 0.3464, "step": 4833 }, { "epoch": 0.2706910068316721, "grad_norm": 1.1895679235458374, "learning_rate": 2.4154999999999998e-05, "loss": 0.4139, "step": 4834 }, { "epoch": 0.2707470041438011, "grad_norm": 1.3763668537139893, "learning_rate": 2.4160000000000002e-05, "loss": 0.451, "step": 4835 }, { "epoch": 0.2708030014559301, "grad_norm": 1.2594642639160156, "learning_rate": 2.4165e-05, "loss": 0.3893, "step": 4836 }, { "epoch": 0.27085899876805913, "grad_norm": 1.2071425914764404, "learning_rate": 2.417e-05, "loss": 0.4495, "step": 4837 }, { "epoch": 0.27091499608018815, "grad_norm": 1.3346881866455078, "learning_rate": 2.4175e-05, "loss": 0.5326, "step": 4838 }, { "epoch": 0.27097099339231717, "grad_norm": 1.264471173286438, "learning_rate": 2.418e-05, "loss": 0.5254, "step": 4839 }, { "epoch": 0.2710269907044462, "grad_norm": 1.154289960861206, "learning_rate": 2.4185000000000003e-05, "loss": 0.4498, "step": 4840 }, { "epoch": 0.2710829880165752, "grad_norm": 1.2578608989715576, "learning_rate": 2.419e-05, "loss": 0.3984, "step": 4841 }, { "epoch": 0.27113898532870423, "grad_norm": 1.193702220916748, "learning_rate": 2.4195e-05, "loss": 0.4636, "step": 4842 }, { "epoch": 0.27119498264083325, "grad_norm": 6.047183990478516, "learning_rate": 2.4200000000000002e-05, "loss": 0.4836, "step": 4843 }, { "epoch": 0.27125097995296227, "grad_norm": 1.906936764717102, "learning_rate": 2.4205e-05, "loss": 0.4673, "step": 4844 }, { "epoch": 0.2713069772650913, "grad_norm": 1.150715708732605, "learning_rate": 2.4210000000000004e-05, "loss": 0.5078, "step": 4845 }, { "epoch": 0.2713629745772203, "grad_norm": 1.1989617347717285, "learning_rate": 2.4215e-05, "loss": 0.479, "step": 4846 }, { "epoch": 0.2714189718893493, "grad_norm": 1.1006189584732056, "learning_rate": 2.4220000000000002e-05, "loss": 0.4624, "step": 4847 }, { "epoch": 0.27147496920147834, "grad_norm": 1.0710850954055786, "learning_rate": 2.4225e-05, "loss": 0.3288, "step": 4848 }, { "epoch": 0.27153096651360736, "grad_norm": 1.242953896522522, "learning_rate": 2.423e-05, "loss": 0.4696, "step": 4849 }, { "epoch": 0.2715869638257364, "grad_norm": 1.2073276042938232, "learning_rate": 2.4235e-05, "loss": 0.4357, "step": 4850 }, { "epoch": 0.2716429611378654, "grad_norm": 1.162598967552185, "learning_rate": 2.4240000000000002e-05, "loss": 0.4673, "step": 4851 }, { "epoch": 0.2716989584499944, "grad_norm": 1.40058171749115, "learning_rate": 2.4245000000000002e-05, "loss": 0.5045, "step": 4852 }, { "epoch": 0.27175495576212344, "grad_norm": 1.334394931793213, "learning_rate": 2.425e-05, "loss": 0.4338, "step": 4853 }, { "epoch": 0.27181095307425246, "grad_norm": 1.1808335781097412, "learning_rate": 2.4255e-05, "loss": 0.5973, "step": 4854 }, { "epoch": 0.2718669503863815, "grad_norm": 1.288703441619873, "learning_rate": 2.426e-05, "loss": 0.4282, "step": 4855 }, { "epoch": 0.2719229476985105, "grad_norm": 1.2336734533309937, "learning_rate": 2.4265000000000002e-05, "loss": 0.6002, "step": 4856 }, { "epoch": 0.2719789450106395, "grad_norm": 1.362411618232727, "learning_rate": 2.427e-05, "loss": 0.4054, "step": 4857 }, { "epoch": 0.2720349423227685, "grad_norm": 1.4240525960922241, "learning_rate": 2.4275e-05, "loss": 0.5253, "step": 4858 }, { "epoch": 0.2720909396348975, "grad_norm": 1.1944591999053955, "learning_rate": 2.428e-05, "loss": 0.481, "step": 4859 }, { "epoch": 0.2721469369470265, "grad_norm": 1.2200754880905151, "learning_rate": 2.4285000000000002e-05, "loss": 0.281, "step": 4860 }, { "epoch": 0.27220293425915554, "grad_norm": 1.2856251001358032, "learning_rate": 2.4290000000000003e-05, "loss": 0.3931, "step": 4861 }, { "epoch": 0.27225893157128456, "grad_norm": 1.1528245210647583, "learning_rate": 2.4295e-05, "loss": 0.4851, "step": 4862 }, { "epoch": 0.2723149288834136, "grad_norm": 1.2538505792617798, "learning_rate": 2.43e-05, "loss": 0.4604, "step": 4863 }, { "epoch": 0.2723709261955426, "grad_norm": 1.1997997760772705, "learning_rate": 2.4305e-05, "loss": 0.4507, "step": 4864 }, { "epoch": 0.2724269235076716, "grad_norm": 1.1665483713150024, "learning_rate": 2.4310000000000003e-05, "loss": 0.3933, "step": 4865 }, { "epoch": 0.27248292081980063, "grad_norm": 1.152035117149353, "learning_rate": 2.4315e-05, "loss": 0.5699, "step": 4866 }, { "epoch": 0.27253891813192965, "grad_norm": 1.3405358791351318, "learning_rate": 2.432e-05, "loss": 0.525, "step": 4867 }, { "epoch": 0.27259491544405867, "grad_norm": 1.1431670188903809, "learning_rate": 2.4325000000000002e-05, "loss": 0.3511, "step": 4868 }, { "epoch": 0.2726509127561877, "grad_norm": 1.0083286762237549, "learning_rate": 2.433e-05, "loss": 0.3615, "step": 4869 }, { "epoch": 0.2727069100683167, "grad_norm": 1.4361419677734375, "learning_rate": 2.4335000000000003e-05, "loss": 0.3746, "step": 4870 }, { "epoch": 0.27276290738044573, "grad_norm": 1.2798997163772583, "learning_rate": 2.434e-05, "loss": 0.5562, "step": 4871 }, { "epoch": 0.27281890469257475, "grad_norm": 1.5994699001312256, "learning_rate": 2.4345e-05, "loss": 0.4758, "step": 4872 }, { "epoch": 0.27287490200470377, "grad_norm": 1.4659181833267212, "learning_rate": 2.435e-05, "loss": 0.4897, "step": 4873 }, { "epoch": 0.2729308993168328, "grad_norm": 1.0324087142944336, "learning_rate": 2.4355e-05, "loss": 0.3914, "step": 4874 }, { "epoch": 0.2729868966289618, "grad_norm": 1.11372709274292, "learning_rate": 2.4360000000000004e-05, "loss": 0.4483, "step": 4875 }, { "epoch": 0.2730428939410908, "grad_norm": 1.0878872871398926, "learning_rate": 2.4365e-05, "loss": 0.4028, "step": 4876 }, { "epoch": 0.27309889125321984, "grad_norm": 1.1511904001235962, "learning_rate": 2.4370000000000002e-05, "loss": 0.2844, "step": 4877 }, { "epoch": 0.27315488856534886, "grad_norm": 1.0558775663375854, "learning_rate": 2.4375e-05, "loss": 0.4916, "step": 4878 }, { "epoch": 0.2732108858774779, "grad_norm": 1.0329701900482178, "learning_rate": 2.438e-05, "loss": 0.3883, "step": 4879 }, { "epoch": 0.2732668831896069, "grad_norm": 1.5741260051727295, "learning_rate": 2.4385e-05, "loss": 0.468, "step": 4880 }, { "epoch": 0.2733228805017359, "grad_norm": 1.1136219501495361, "learning_rate": 2.4390000000000002e-05, "loss": 0.3881, "step": 4881 }, { "epoch": 0.27337887781386494, "grad_norm": 1.2174041271209717, "learning_rate": 2.4395000000000003e-05, "loss": 0.4176, "step": 4882 }, { "epoch": 0.27343487512599396, "grad_norm": 1.137325644493103, "learning_rate": 2.44e-05, "loss": 0.4695, "step": 4883 }, { "epoch": 0.273490872438123, "grad_norm": 1.1314067840576172, "learning_rate": 2.4405e-05, "loss": 0.3367, "step": 4884 }, { "epoch": 0.273546869750252, "grad_norm": 1.1522043943405151, "learning_rate": 2.4410000000000002e-05, "loss": 0.4068, "step": 4885 }, { "epoch": 0.273602867062381, "grad_norm": 1.210898995399475, "learning_rate": 2.4415000000000003e-05, "loss": 0.5382, "step": 4886 }, { "epoch": 0.27365886437451004, "grad_norm": 1.389197826385498, "learning_rate": 2.442e-05, "loss": 0.5014, "step": 4887 }, { "epoch": 0.27371486168663905, "grad_norm": 1.416251540184021, "learning_rate": 2.4425e-05, "loss": 0.5832, "step": 4888 }, { "epoch": 0.2737708589987681, "grad_norm": 1.4145478010177612, "learning_rate": 2.443e-05, "loss": 0.5117, "step": 4889 }, { "epoch": 0.2738268563108971, "grad_norm": 1.226279854774475, "learning_rate": 2.4435000000000002e-05, "loss": 0.4941, "step": 4890 }, { "epoch": 0.2738828536230261, "grad_norm": 0.9975038766860962, "learning_rate": 2.4440000000000003e-05, "loss": 0.3055, "step": 4891 }, { "epoch": 0.27393885093515513, "grad_norm": 1.38507080078125, "learning_rate": 2.4445e-05, "loss": 0.4014, "step": 4892 }, { "epoch": 0.27399484824728415, "grad_norm": 1.3080692291259766, "learning_rate": 2.445e-05, "loss": 0.502, "step": 4893 }, { "epoch": 0.27405084555941317, "grad_norm": 1.0307468175888062, "learning_rate": 2.4455e-05, "loss": 0.3938, "step": 4894 }, { "epoch": 0.2741068428715422, "grad_norm": 1.2235243320465088, "learning_rate": 2.4460000000000003e-05, "loss": 0.3463, "step": 4895 }, { "epoch": 0.2741628401836712, "grad_norm": 1.427044153213501, "learning_rate": 2.4465e-05, "loss": 0.5066, "step": 4896 }, { "epoch": 0.2742188374958002, "grad_norm": 1.1579972505569458, "learning_rate": 2.447e-05, "loss": 0.4708, "step": 4897 }, { "epoch": 0.27427483480792925, "grad_norm": 1.1244254112243652, "learning_rate": 2.4475000000000002e-05, "loss": 0.3776, "step": 4898 }, { "epoch": 0.2743308321200582, "grad_norm": 1.1676359176635742, "learning_rate": 2.448e-05, "loss": 0.4447, "step": 4899 }, { "epoch": 0.27438682943218723, "grad_norm": 1.186279058456421, "learning_rate": 2.4485000000000004e-05, "loss": 0.5561, "step": 4900 }, { "epoch": 0.27444282674431625, "grad_norm": 1.1873775720596313, "learning_rate": 2.449e-05, "loss": 0.4251, "step": 4901 }, { "epoch": 0.27449882405644527, "grad_norm": 1.4521654844284058, "learning_rate": 2.4495000000000002e-05, "loss": 0.5958, "step": 4902 }, { "epoch": 0.2745548213685743, "grad_norm": 1.1759424209594727, "learning_rate": 2.45e-05, "loss": 0.3257, "step": 4903 }, { "epoch": 0.2746108186807033, "grad_norm": 1.4622273445129395, "learning_rate": 2.4505e-05, "loss": 0.4231, "step": 4904 }, { "epoch": 0.2746668159928323, "grad_norm": 1.1844336986541748, "learning_rate": 2.451e-05, "loss": 0.465, "step": 4905 }, { "epoch": 0.27472281330496134, "grad_norm": 1.2939742803573608, "learning_rate": 2.4515e-05, "loss": 0.4654, "step": 4906 }, { "epoch": 0.27477881061709036, "grad_norm": 1.2372941970825195, "learning_rate": 2.4520000000000002e-05, "loss": 0.4132, "step": 4907 }, { "epoch": 0.2748348079292194, "grad_norm": 1.1188563108444214, "learning_rate": 2.4525e-05, "loss": 0.37, "step": 4908 }, { "epoch": 0.2748908052413484, "grad_norm": 1.455541729927063, "learning_rate": 2.453e-05, "loss": 0.4644, "step": 4909 }, { "epoch": 0.2749468025534774, "grad_norm": 1.6214908361434937, "learning_rate": 2.4535e-05, "loss": 0.4694, "step": 4910 }, { "epoch": 0.27500279986560644, "grad_norm": 1.17264986038208, "learning_rate": 2.4540000000000002e-05, "loss": 0.3957, "step": 4911 }, { "epoch": 0.27505879717773546, "grad_norm": 1.0448565483093262, "learning_rate": 2.4545000000000003e-05, "loss": 0.3585, "step": 4912 }, { "epoch": 0.2751147944898645, "grad_norm": 1.3260798454284668, "learning_rate": 2.455e-05, "loss": 0.4594, "step": 4913 }, { "epoch": 0.2751707918019935, "grad_norm": 1.7412939071655273, "learning_rate": 2.4555e-05, "loss": 0.4749, "step": 4914 }, { "epoch": 0.2752267891141225, "grad_norm": 1.4049450159072876, "learning_rate": 2.4560000000000002e-05, "loss": 0.4712, "step": 4915 }, { "epoch": 0.27528278642625154, "grad_norm": 1.2056244611740112, "learning_rate": 2.4565000000000003e-05, "loss": 0.4464, "step": 4916 }, { "epoch": 0.27533878373838055, "grad_norm": 1.6052457094192505, "learning_rate": 2.457e-05, "loss": 0.4002, "step": 4917 }, { "epoch": 0.2753947810505096, "grad_norm": 1.1523957252502441, "learning_rate": 2.4575e-05, "loss": 0.425, "step": 4918 }, { "epoch": 0.2754507783626386, "grad_norm": 1.3647980690002441, "learning_rate": 2.4580000000000002e-05, "loss": 0.536, "step": 4919 }, { "epoch": 0.2755067756747676, "grad_norm": 1.340335726737976, "learning_rate": 2.4585000000000003e-05, "loss": 0.4708, "step": 4920 }, { "epoch": 0.27556277298689663, "grad_norm": 1.038291096687317, "learning_rate": 2.4590000000000003e-05, "loss": 0.3694, "step": 4921 }, { "epoch": 0.27561877029902565, "grad_norm": 1.2275313138961792, "learning_rate": 2.4595e-05, "loss": 0.5274, "step": 4922 }, { "epoch": 0.27567476761115467, "grad_norm": 1.0066707134246826, "learning_rate": 2.46e-05, "loss": 0.332, "step": 4923 }, { "epoch": 0.2757307649232837, "grad_norm": 1.0508103370666504, "learning_rate": 2.4605e-05, "loss": 0.4163, "step": 4924 }, { "epoch": 0.2757867622354127, "grad_norm": 1.1964588165283203, "learning_rate": 2.4610000000000003e-05, "loss": 0.419, "step": 4925 }, { "epoch": 0.2758427595475417, "grad_norm": 1.2009811401367188, "learning_rate": 2.4615e-05, "loss": 0.4048, "step": 4926 }, { "epoch": 0.27589875685967075, "grad_norm": 1.2661802768707275, "learning_rate": 2.462e-05, "loss": 0.3793, "step": 4927 }, { "epoch": 0.27595475417179977, "grad_norm": 1.1443170309066772, "learning_rate": 2.4625000000000002e-05, "loss": 0.375, "step": 4928 }, { "epoch": 0.2760107514839288, "grad_norm": 1.1995105743408203, "learning_rate": 2.463e-05, "loss": 0.3884, "step": 4929 }, { "epoch": 0.2760667487960578, "grad_norm": 1.284255862236023, "learning_rate": 2.4635000000000004e-05, "loss": 0.3677, "step": 4930 }, { "epoch": 0.2761227461081868, "grad_norm": 1.3781988620758057, "learning_rate": 2.464e-05, "loss": 0.459, "step": 4931 }, { "epoch": 0.27617874342031584, "grad_norm": 1.1450660228729248, "learning_rate": 2.4645000000000002e-05, "loss": 0.3256, "step": 4932 }, { "epoch": 0.27623474073244486, "grad_norm": 1.0806472301483154, "learning_rate": 2.465e-05, "loss": 0.4452, "step": 4933 }, { "epoch": 0.2762907380445739, "grad_norm": 1.2479525804519653, "learning_rate": 2.4655e-05, "loss": 0.3918, "step": 4934 }, { "epoch": 0.2763467353567029, "grad_norm": 1.154831051826477, "learning_rate": 2.466e-05, "loss": 0.3139, "step": 4935 }, { "epoch": 0.2764027326688319, "grad_norm": 1.1486061811447144, "learning_rate": 2.4665000000000002e-05, "loss": 0.3987, "step": 4936 }, { "epoch": 0.27645872998096094, "grad_norm": 1.382432222366333, "learning_rate": 2.4670000000000003e-05, "loss": 0.3491, "step": 4937 }, { "epoch": 0.27651472729308996, "grad_norm": 1.035082221031189, "learning_rate": 2.4675e-05, "loss": 0.3382, "step": 4938 }, { "epoch": 0.276570724605219, "grad_norm": 1.1332824230194092, "learning_rate": 2.468e-05, "loss": 0.3603, "step": 4939 }, { "epoch": 0.27662672191734794, "grad_norm": 1.4208630323410034, "learning_rate": 2.4685e-05, "loss": 0.4386, "step": 4940 }, { "epoch": 0.27668271922947696, "grad_norm": 1.26140296459198, "learning_rate": 2.4690000000000002e-05, "loss": 0.4126, "step": 4941 }, { "epoch": 0.276738716541606, "grad_norm": 1.047723412513733, "learning_rate": 2.4695e-05, "loss": 0.3948, "step": 4942 }, { "epoch": 0.276794713853735, "grad_norm": 1.1719717979431152, "learning_rate": 2.47e-05, "loss": 0.5086, "step": 4943 }, { "epoch": 0.276850711165864, "grad_norm": 1.5609759092330933, "learning_rate": 2.4705e-05, "loss": 0.6026, "step": 4944 }, { "epoch": 0.27690670847799304, "grad_norm": 1.3194032907485962, "learning_rate": 2.471e-05, "loss": 0.3067, "step": 4945 }, { "epoch": 0.27696270579012205, "grad_norm": 1.4805982112884521, "learning_rate": 2.4715000000000003e-05, "loss": 0.5675, "step": 4946 }, { "epoch": 0.2770187031022511, "grad_norm": 1.563145399093628, "learning_rate": 2.472e-05, "loss": 0.3775, "step": 4947 }, { "epoch": 0.2770747004143801, "grad_norm": 1.1520642042160034, "learning_rate": 2.4725e-05, "loss": 0.4203, "step": 4948 }, { "epoch": 0.2771306977265091, "grad_norm": 1.2293246984481812, "learning_rate": 2.473e-05, "loss": 0.4264, "step": 4949 }, { "epoch": 0.27718669503863813, "grad_norm": 1.1341722011566162, "learning_rate": 2.4735e-05, "loss": 0.3552, "step": 4950 }, { "epoch": 0.27724269235076715, "grad_norm": 1.4974896907806396, "learning_rate": 2.4740000000000004e-05, "loss": 0.4868, "step": 4951 }, { "epoch": 0.27729868966289617, "grad_norm": 1.3844859600067139, "learning_rate": 2.4745e-05, "loss": 0.4703, "step": 4952 }, { "epoch": 0.2773546869750252, "grad_norm": 1.133491039276123, "learning_rate": 2.4750000000000002e-05, "loss": 0.3953, "step": 4953 }, { "epoch": 0.2774106842871542, "grad_norm": 1.1602778434753418, "learning_rate": 2.4755e-05, "loss": 0.4219, "step": 4954 }, { "epoch": 0.2774666815992832, "grad_norm": 1.3232240676879883, "learning_rate": 2.476e-05, "loss": 0.3785, "step": 4955 }, { "epoch": 0.27752267891141225, "grad_norm": 1.514744520187378, "learning_rate": 2.4765e-05, "loss": 0.4758, "step": 4956 }, { "epoch": 0.27757867622354127, "grad_norm": 1.1780812740325928, "learning_rate": 2.4770000000000002e-05, "loss": 0.3897, "step": 4957 }, { "epoch": 0.2776346735356703, "grad_norm": 1.2148231267929077, "learning_rate": 2.4775000000000003e-05, "loss": 0.3999, "step": 4958 }, { "epoch": 0.2776906708477993, "grad_norm": 1.3397095203399658, "learning_rate": 2.478e-05, "loss": 0.3621, "step": 4959 }, { "epoch": 0.2777466681599283, "grad_norm": 1.153542160987854, "learning_rate": 2.4785e-05, "loss": 0.3358, "step": 4960 }, { "epoch": 0.27780266547205734, "grad_norm": 1.0039702653884888, "learning_rate": 2.479e-05, "loss": 0.3771, "step": 4961 }, { "epoch": 0.27785866278418636, "grad_norm": 1.048505425453186, "learning_rate": 2.4795000000000002e-05, "loss": 0.4062, "step": 4962 }, { "epoch": 0.2779146600963154, "grad_norm": 1.4633502960205078, "learning_rate": 2.48e-05, "loss": 0.4179, "step": 4963 }, { "epoch": 0.2779706574084444, "grad_norm": 1.3561614751815796, "learning_rate": 2.4805e-05, "loss": 0.395, "step": 4964 }, { "epoch": 0.2780266547205734, "grad_norm": 1.1100013256072998, "learning_rate": 2.481e-05, "loss": 0.4721, "step": 4965 }, { "epoch": 0.27808265203270244, "grad_norm": 1.2962788343429565, "learning_rate": 2.4815000000000002e-05, "loss": 0.5376, "step": 4966 }, { "epoch": 0.27813864934483146, "grad_norm": 1.1644928455352783, "learning_rate": 2.4820000000000003e-05, "loss": 0.4759, "step": 4967 }, { "epoch": 0.2781946466569605, "grad_norm": 1.102250337600708, "learning_rate": 2.4825e-05, "loss": 0.358, "step": 4968 }, { "epoch": 0.2782506439690895, "grad_norm": 0.9479377269744873, "learning_rate": 2.483e-05, "loss": 0.3354, "step": 4969 }, { "epoch": 0.2783066412812185, "grad_norm": 1.2795408964157104, "learning_rate": 2.4835e-05, "loss": 0.3787, "step": 4970 }, { "epoch": 0.27836263859334753, "grad_norm": 1.070845365524292, "learning_rate": 2.4840000000000003e-05, "loss": 0.4005, "step": 4971 }, { "epoch": 0.27841863590547655, "grad_norm": 1.3072528839111328, "learning_rate": 2.4845e-05, "loss": 0.5137, "step": 4972 }, { "epoch": 0.27847463321760557, "grad_norm": 1.3732484579086304, "learning_rate": 2.485e-05, "loss": 0.5127, "step": 4973 }, { "epoch": 0.2785306305297346, "grad_norm": 1.2259336709976196, "learning_rate": 2.4855000000000002e-05, "loss": 0.4656, "step": 4974 }, { "epoch": 0.2785866278418636, "grad_norm": 1.290574073791504, "learning_rate": 2.486e-05, "loss": 0.6261, "step": 4975 }, { "epoch": 0.27864262515399263, "grad_norm": 1.3530707359313965, "learning_rate": 2.4865000000000003e-05, "loss": 0.649, "step": 4976 }, { "epoch": 0.27869862246612165, "grad_norm": 1.1110451221466064, "learning_rate": 2.487e-05, "loss": 0.4133, "step": 4977 }, { "epoch": 0.27875461977825067, "grad_norm": 1.2195521593093872, "learning_rate": 2.4875e-05, "loss": 0.4227, "step": 4978 }, { "epoch": 0.2788106170903797, "grad_norm": 1.1598562002182007, "learning_rate": 2.488e-05, "loss": 0.4384, "step": 4979 }, { "epoch": 0.2788666144025087, "grad_norm": 1.4165732860565186, "learning_rate": 2.4885e-05, "loss": 0.4252, "step": 4980 }, { "epoch": 0.2789226117146377, "grad_norm": 1.760197639465332, "learning_rate": 2.489e-05, "loss": 0.3759, "step": 4981 }, { "epoch": 0.2789786090267667, "grad_norm": 1.1311376094818115, "learning_rate": 2.4895e-05, "loss": 0.37, "step": 4982 }, { "epoch": 0.2790346063388957, "grad_norm": 1.2808183431625366, "learning_rate": 2.4900000000000002e-05, "loss": 0.4915, "step": 4983 }, { "epoch": 0.2790906036510247, "grad_norm": 1.4143792390823364, "learning_rate": 2.4905e-05, "loss": 0.5387, "step": 4984 }, { "epoch": 0.27914660096315375, "grad_norm": 1.1529659032821655, "learning_rate": 2.491e-05, "loss": 0.4424, "step": 4985 }, { "epoch": 0.27920259827528277, "grad_norm": 1.3599112033843994, "learning_rate": 2.4915e-05, "loss": 0.3906, "step": 4986 }, { "epoch": 0.2792585955874118, "grad_norm": 1.1522306203842163, "learning_rate": 2.4920000000000002e-05, "loss": 0.448, "step": 4987 }, { "epoch": 0.2793145928995408, "grad_norm": 1.0313642024993896, "learning_rate": 2.4925000000000003e-05, "loss": 0.4001, "step": 4988 }, { "epoch": 0.2793705902116698, "grad_norm": 1.1960699558258057, "learning_rate": 2.493e-05, "loss": 0.3986, "step": 4989 }, { "epoch": 0.27942658752379884, "grad_norm": 1.370259404182434, "learning_rate": 2.4935e-05, "loss": 0.4686, "step": 4990 }, { "epoch": 0.27948258483592786, "grad_norm": 1.4210662841796875, "learning_rate": 2.4940000000000002e-05, "loss": 0.4857, "step": 4991 }, { "epoch": 0.2795385821480569, "grad_norm": 1.4025802612304688, "learning_rate": 2.4945000000000003e-05, "loss": 0.4234, "step": 4992 }, { "epoch": 0.2795945794601859, "grad_norm": 2.0384538173675537, "learning_rate": 2.495e-05, "loss": 0.4057, "step": 4993 }, { "epoch": 0.2796505767723149, "grad_norm": 1.112663745880127, "learning_rate": 2.4955e-05, "loss": 0.3578, "step": 4994 }, { "epoch": 0.27970657408444394, "grad_norm": 1.6824556589126587, "learning_rate": 2.496e-05, "loss": 0.5093, "step": 4995 }, { "epoch": 0.27976257139657296, "grad_norm": 1.216307282447815, "learning_rate": 2.4965000000000002e-05, "loss": 0.3959, "step": 4996 }, { "epoch": 0.279818568708702, "grad_norm": 1.1118022203445435, "learning_rate": 2.4970000000000003e-05, "loss": 0.4064, "step": 4997 }, { "epoch": 0.279874566020831, "grad_norm": 1.4133844375610352, "learning_rate": 2.4975e-05, "loss": 0.4684, "step": 4998 }, { "epoch": 0.27993056333296, "grad_norm": 1.2977321147918701, "learning_rate": 2.498e-05, "loss": 0.4066, "step": 4999 }, { "epoch": 0.27998656064508903, "grad_norm": 1.1011120080947876, "learning_rate": 2.4985e-05, "loss": 0.3026, "step": 5000 }, { "epoch": 0.28004255795721805, "grad_norm": 1.4148776531219482, "learning_rate": 2.4990000000000003e-05, "loss": 0.5396, "step": 5001 }, { "epoch": 0.28009855526934707, "grad_norm": 1.0133522748947144, "learning_rate": 2.4995e-05, "loss": 0.3985, "step": 5002 }, { "epoch": 0.2801545525814761, "grad_norm": 1.296737790107727, "learning_rate": 2.5e-05, "loss": 0.4786, "step": 5003 }, { "epoch": 0.2802105498936051, "grad_norm": 1.4818477630615234, "learning_rate": 2.5005000000000002e-05, "loss": 0.5541, "step": 5004 }, { "epoch": 0.28026654720573413, "grad_norm": 1.2512661218643188, "learning_rate": 2.501e-05, "loss": 0.4498, "step": 5005 }, { "epoch": 0.28032254451786315, "grad_norm": 1.0812656879425049, "learning_rate": 2.5015e-05, "loss": 0.3263, "step": 5006 }, { "epoch": 0.28037854182999217, "grad_norm": 1.1355310678482056, "learning_rate": 2.5019999999999998e-05, "loss": 0.4299, "step": 5007 }, { "epoch": 0.2804345391421212, "grad_norm": 1.1510392427444458, "learning_rate": 2.5025e-05, "loss": 0.4134, "step": 5008 }, { "epoch": 0.2804905364542502, "grad_norm": 1.1566981077194214, "learning_rate": 2.5030000000000003e-05, "loss": 0.3365, "step": 5009 }, { "epoch": 0.2805465337663792, "grad_norm": 1.1639187335968018, "learning_rate": 2.5035000000000003e-05, "loss": 0.4559, "step": 5010 }, { "epoch": 0.28060253107850824, "grad_norm": 1.1393630504608154, "learning_rate": 2.504e-05, "loss": 0.3888, "step": 5011 }, { "epoch": 0.28065852839063726, "grad_norm": 1.2431306838989258, "learning_rate": 2.5045e-05, "loss": 0.5282, "step": 5012 }, { "epoch": 0.2807145257027663, "grad_norm": 1.3750158548355103, "learning_rate": 2.5050000000000002e-05, "loss": 0.6099, "step": 5013 }, { "epoch": 0.2807705230148953, "grad_norm": 1.3600974082946777, "learning_rate": 2.5055e-05, "loss": 0.3914, "step": 5014 }, { "epoch": 0.2808265203270243, "grad_norm": 1.1310267448425293, "learning_rate": 2.506e-05, "loss": 0.3774, "step": 5015 }, { "epoch": 0.28088251763915334, "grad_norm": 1.2618962526321411, "learning_rate": 2.5064999999999998e-05, "loss": 0.4592, "step": 5016 }, { "epoch": 0.28093851495128236, "grad_norm": 1.211580514907837, "learning_rate": 2.507e-05, "loss": 0.4236, "step": 5017 }, { "epoch": 0.2809945122634114, "grad_norm": 1.16752028465271, "learning_rate": 2.5075e-05, "loss": 0.3997, "step": 5018 }, { "epoch": 0.2810505095755404, "grad_norm": 1.2811846733093262, "learning_rate": 2.5080000000000004e-05, "loss": 0.4685, "step": 5019 }, { "epoch": 0.2811065068876694, "grad_norm": 1.071851372718811, "learning_rate": 2.5085000000000005e-05, "loss": 0.3922, "step": 5020 }, { "epoch": 0.28116250419979844, "grad_norm": 1.2484806776046753, "learning_rate": 2.5090000000000002e-05, "loss": 0.4412, "step": 5021 }, { "epoch": 0.28121850151192745, "grad_norm": 1.1056876182556152, "learning_rate": 2.5095000000000003e-05, "loss": 0.4058, "step": 5022 }, { "epoch": 0.2812744988240564, "grad_norm": 1.1852781772613525, "learning_rate": 2.51e-05, "loss": 0.4035, "step": 5023 }, { "epoch": 0.28133049613618544, "grad_norm": 1.5772156715393066, "learning_rate": 2.5105e-05, "loss": 0.4305, "step": 5024 }, { "epoch": 0.28138649344831446, "grad_norm": 1.1575450897216797, "learning_rate": 2.5110000000000002e-05, "loss": 0.4063, "step": 5025 }, { "epoch": 0.2814424907604435, "grad_norm": 1.2748372554779053, "learning_rate": 2.5115e-05, "loss": 0.4925, "step": 5026 }, { "epoch": 0.2814984880725725, "grad_norm": 1.3930341005325317, "learning_rate": 2.512e-05, "loss": 0.441, "step": 5027 }, { "epoch": 0.2815544853847015, "grad_norm": 1.5599757432937622, "learning_rate": 2.5124999999999997e-05, "loss": 0.4644, "step": 5028 }, { "epoch": 0.28161048269683053, "grad_norm": 1.35392427444458, "learning_rate": 2.5130000000000005e-05, "loss": 0.5732, "step": 5029 }, { "epoch": 0.28166648000895955, "grad_norm": 2.7221648693084717, "learning_rate": 2.5135000000000002e-05, "loss": 0.3834, "step": 5030 }, { "epoch": 0.28172247732108857, "grad_norm": 1.168181300163269, "learning_rate": 2.5140000000000003e-05, "loss": 0.4182, "step": 5031 }, { "epoch": 0.2817784746332176, "grad_norm": 1.1578096151351929, "learning_rate": 2.5145e-05, "loss": 0.3981, "step": 5032 }, { "epoch": 0.2818344719453466, "grad_norm": 1.066641926765442, "learning_rate": 2.515e-05, "loss": 0.4481, "step": 5033 }, { "epoch": 0.28189046925747563, "grad_norm": 1.20148766040802, "learning_rate": 2.5155000000000002e-05, "loss": 0.4609, "step": 5034 }, { "epoch": 0.28194646656960465, "grad_norm": 1.1593502759933472, "learning_rate": 2.516e-05, "loss": 0.5019, "step": 5035 }, { "epoch": 0.28200246388173367, "grad_norm": 1.2151565551757812, "learning_rate": 2.5165e-05, "loss": 0.4255, "step": 5036 }, { "epoch": 0.2820584611938627, "grad_norm": 1.204663872718811, "learning_rate": 2.5169999999999998e-05, "loss": 0.4624, "step": 5037 }, { "epoch": 0.2821144585059917, "grad_norm": 1.0738803148269653, "learning_rate": 2.5175e-05, "loss": 0.4004, "step": 5038 }, { "epoch": 0.2821704558181207, "grad_norm": 1.1140012741088867, "learning_rate": 2.5180000000000003e-05, "loss": 0.3801, "step": 5039 }, { "epoch": 0.28222645313024974, "grad_norm": 1.0218623876571655, "learning_rate": 2.5185000000000004e-05, "loss": 0.3478, "step": 5040 }, { "epoch": 0.28228245044237876, "grad_norm": 1.285437822341919, "learning_rate": 2.519e-05, "loss": 0.4935, "step": 5041 }, { "epoch": 0.2823384477545078, "grad_norm": 1.3076173067092896, "learning_rate": 2.5195000000000002e-05, "loss": 0.3867, "step": 5042 }, { "epoch": 0.2823944450666368, "grad_norm": 1.273854374885559, "learning_rate": 2.5200000000000003e-05, "loss": 0.4336, "step": 5043 }, { "epoch": 0.2824504423787658, "grad_norm": 1.2694003582000732, "learning_rate": 2.5205e-05, "loss": 0.4235, "step": 5044 }, { "epoch": 0.28250643969089484, "grad_norm": 1.1363548040390015, "learning_rate": 2.521e-05, "loss": 0.3504, "step": 5045 }, { "epoch": 0.28256243700302386, "grad_norm": 1.2498666048049927, "learning_rate": 2.5214999999999998e-05, "loss": 0.4245, "step": 5046 }, { "epoch": 0.2826184343151529, "grad_norm": 1.1381645202636719, "learning_rate": 2.522e-05, "loss": 0.3956, "step": 5047 }, { "epoch": 0.2826744316272819, "grad_norm": 1.3356531858444214, "learning_rate": 2.5225e-05, "loss": 0.4546, "step": 5048 }, { "epoch": 0.2827304289394109, "grad_norm": 1.0721495151519775, "learning_rate": 2.5230000000000004e-05, "loss": 0.5557, "step": 5049 }, { "epoch": 0.28278642625153994, "grad_norm": 1.2902302742004395, "learning_rate": 2.5235e-05, "loss": 0.5123, "step": 5050 }, { "epoch": 0.28284242356366895, "grad_norm": 1.2670661211013794, "learning_rate": 2.5240000000000002e-05, "loss": 0.4056, "step": 5051 }, { "epoch": 0.282898420875798, "grad_norm": 1.1348469257354736, "learning_rate": 2.5245000000000003e-05, "loss": 0.4311, "step": 5052 }, { "epoch": 0.282954418187927, "grad_norm": 1.2487995624542236, "learning_rate": 2.525e-05, "loss": 0.4668, "step": 5053 }, { "epoch": 0.283010415500056, "grad_norm": 1.5302445888519287, "learning_rate": 2.5255e-05, "loss": 0.4978, "step": 5054 }, { "epoch": 0.28306641281218503, "grad_norm": 1.219663143157959, "learning_rate": 2.526e-05, "loss": 0.3641, "step": 5055 }, { "epoch": 0.28312241012431405, "grad_norm": 1.0319263935089111, "learning_rate": 2.5265e-05, "loss": 0.3338, "step": 5056 }, { "epoch": 0.28317840743644307, "grad_norm": 1.1401755809783936, "learning_rate": 2.527e-05, "loss": 0.3796, "step": 5057 }, { "epoch": 0.2832344047485721, "grad_norm": 1.1127512454986572, "learning_rate": 2.5274999999999998e-05, "loss": 0.5296, "step": 5058 }, { "epoch": 0.2832904020607011, "grad_norm": 1.3074079751968384, "learning_rate": 2.5280000000000005e-05, "loss": 0.3188, "step": 5059 }, { "epoch": 0.2833463993728301, "grad_norm": 1.3833627700805664, "learning_rate": 2.5285000000000003e-05, "loss": 0.5894, "step": 5060 }, { "epoch": 0.28340239668495915, "grad_norm": 1.3308225870132446, "learning_rate": 2.5290000000000004e-05, "loss": 0.4272, "step": 5061 }, { "epoch": 0.28345839399708816, "grad_norm": 1.2617563009262085, "learning_rate": 2.5295e-05, "loss": 0.3797, "step": 5062 }, { "epoch": 0.2835143913092172, "grad_norm": 1.2066013813018799, "learning_rate": 2.5300000000000002e-05, "loss": 0.4103, "step": 5063 }, { "epoch": 0.28357038862134615, "grad_norm": 1.3341902494430542, "learning_rate": 2.5305000000000003e-05, "loss": 0.3874, "step": 5064 }, { "epoch": 0.28362638593347517, "grad_norm": 1.2941293716430664, "learning_rate": 2.531e-05, "loss": 0.3671, "step": 5065 }, { "epoch": 0.2836823832456042, "grad_norm": 0.9018704891204834, "learning_rate": 2.5315e-05, "loss": 0.3604, "step": 5066 }, { "epoch": 0.2837383805577332, "grad_norm": 4.461050510406494, "learning_rate": 2.5319999999999998e-05, "loss": 0.4973, "step": 5067 }, { "epoch": 0.2837943778698622, "grad_norm": 1.8063386678695679, "learning_rate": 2.5325e-05, "loss": 0.4332, "step": 5068 }, { "epoch": 0.28385037518199124, "grad_norm": 1.1380176544189453, "learning_rate": 2.5330000000000003e-05, "loss": 0.3468, "step": 5069 }, { "epoch": 0.28390637249412026, "grad_norm": 1.3340381383895874, "learning_rate": 2.5335000000000004e-05, "loss": 0.4126, "step": 5070 }, { "epoch": 0.2839623698062493, "grad_norm": 1.4871255159378052, "learning_rate": 2.534e-05, "loss": 0.5937, "step": 5071 }, { "epoch": 0.2840183671183783, "grad_norm": 1.2137213945388794, "learning_rate": 2.5345000000000002e-05, "loss": 0.3692, "step": 5072 }, { "epoch": 0.2840743644305073, "grad_norm": 1.7437152862548828, "learning_rate": 2.5350000000000003e-05, "loss": 0.4339, "step": 5073 }, { "epoch": 0.28413036174263634, "grad_norm": 1.2067270278930664, "learning_rate": 2.5355e-05, "loss": 0.4506, "step": 5074 }, { "epoch": 0.28418635905476536, "grad_norm": 1.5555673837661743, "learning_rate": 2.536e-05, "loss": 0.4121, "step": 5075 }, { "epoch": 0.2842423563668944, "grad_norm": 1.3813008069992065, "learning_rate": 2.5365e-05, "loss": 0.4734, "step": 5076 }, { "epoch": 0.2842983536790234, "grad_norm": 1.4264832735061646, "learning_rate": 2.537e-05, "loss": 0.4616, "step": 5077 }, { "epoch": 0.2843543509911524, "grad_norm": 1.092544674873352, "learning_rate": 2.5375e-05, "loss": 0.3485, "step": 5078 }, { "epoch": 0.28441034830328144, "grad_norm": 1.1917117834091187, "learning_rate": 2.5380000000000004e-05, "loss": 0.5205, "step": 5079 }, { "epoch": 0.28446634561541045, "grad_norm": 1.355815052986145, "learning_rate": 2.5385000000000002e-05, "loss": 0.4565, "step": 5080 }, { "epoch": 0.2845223429275395, "grad_norm": 1.0870096683502197, "learning_rate": 2.5390000000000003e-05, "loss": 0.4144, "step": 5081 }, { "epoch": 0.2845783402396685, "grad_norm": 1.094519853591919, "learning_rate": 2.5395000000000003e-05, "loss": 0.4035, "step": 5082 }, { "epoch": 0.2846343375517975, "grad_norm": 1.3015124797821045, "learning_rate": 2.54e-05, "loss": 0.4412, "step": 5083 }, { "epoch": 0.28469033486392653, "grad_norm": 1.3095399141311646, "learning_rate": 2.5405e-05, "loss": 0.3885, "step": 5084 }, { "epoch": 0.28474633217605555, "grad_norm": 1.2276149988174438, "learning_rate": 2.541e-05, "loss": 0.4867, "step": 5085 }, { "epoch": 0.28480232948818457, "grad_norm": 1.4077141284942627, "learning_rate": 2.5415e-05, "loss": 0.4127, "step": 5086 }, { "epoch": 0.2848583268003136, "grad_norm": 7.861964702606201, "learning_rate": 2.542e-05, "loss": 0.372, "step": 5087 }, { "epoch": 0.2849143241124426, "grad_norm": 1.4856375455856323, "learning_rate": 2.5424999999999998e-05, "loss": 0.5014, "step": 5088 }, { "epoch": 0.2849703214245716, "grad_norm": 1.215444564819336, "learning_rate": 2.5430000000000002e-05, "loss": 0.4289, "step": 5089 }, { "epoch": 0.28502631873670065, "grad_norm": 1.1757819652557373, "learning_rate": 2.5435000000000003e-05, "loss": 0.4394, "step": 5090 }, { "epoch": 0.28508231604882966, "grad_norm": 1.2638258934020996, "learning_rate": 2.5440000000000004e-05, "loss": 0.4226, "step": 5091 }, { "epoch": 0.2851383133609587, "grad_norm": 1.574288010597229, "learning_rate": 2.5445e-05, "loss": 0.3677, "step": 5092 }, { "epoch": 0.2851943106730877, "grad_norm": 1.1244778633117676, "learning_rate": 2.5450000000000002e-05, "loss": 0.4044, "step": 5093 }, { "epoch": 0.2852503079852167, "grad_norm": 1.2923802137374878, "learning_rate": 2.5455e-05, "loss": 0.4191, "step": 5094 }, { "epoch": 0.28530630529734574, "grad_norm": 1.1234909296035767, "learning_rate": 2.546e-05, "loss": 0.5451, "step": 5095 }, { "epoch": 0.28536230260947476, "grad_norm": 0.9855756759643555, "learning_rate": 2.5465e-05, "loss": 0.4568, "step": 5096 }, { "epoch": 0.2854182999216038, "grad_norm": 1.175218105316162, "learning_rate": 2.547e-05, "loss": 0.3712, "step": 5097 }, { "epoch": 0.2854742972337328, "grad_norm": 0.9445306658744812, "learning_rate": 2.5475e-05, "loss": 0.3229, "step": 5098 }, { "epoch": 0.2855302945458618, "grad_norm": 1.227358102798462, "learning_rate": 2.5480000000000003e-05, "loss": 0.4326, "step": 5099 }, { "epoch": 0.28558629185799084, "grad_norm": 1.0580247640609741, "learning_rate": 2.5485000000000004e-05, "loss": 0.3549, "step": 5100 }, { "epoch": 0.28564228917011986, "grad_norm": 1.212801218032837, "learning_rate": 2.549e-05, "loss": 0.4072, "step": 5101 }, { "epoch": 0.2856982864822489, "grad_norm": 1.2784329652786255, "learning_rate": 2.5495000000000002e-05, "loss": 0.4814, "step": 5102 }, { "epoch": 0.2857542837943779, "grad_norm": 1.2336822748184204, "learning_rate": 2.5500000000000003e-05, "loss": 0.4123, "step": 5103 }, { "epoch": 0.2858102811065069, "grad_norm": 1.3024848699569702, "learning_rate": 2.5505e-05, "loss": 0.3914, "step": 5104 }, { "epoch": 0.28586627841863593, "grad_norm": 1.5754663944244385, "learning_rate": 2.551e-05, "loss": 0.5698, "step": 5105 }, { "epoch": 0.2859222757307649, "grad_norm": 1.086853265762329, "learning_rate": 2.5515e-05, "loss": 0.4066, "step": 5106 }, { "epoch": 0.2859782730428939, "grad_norm": 1.2721822261810303, "learning_rate": 2.552e-05, "loss": 0.4683, "step": 5107 }, { "epoch": 0.28603427035502293, "grad_norm": 1.3510558605194092, "learning_rate": 2.5525e-05, "loss": 0.4937, "step": 5108 }, { "epoch": 0.28609026766715195, "grad_norm": 1.0186558961868286, "learning_rate": 2.5530000000000005e-05, "loss": 0.3634, "step": 5109 }, { "epoch": 0.286146264979281, "grad_norm": 1.2649930715560913, "learning_rate": 2.5535000000000002e-05, "loss": 0.4996, "step": 5110 }, { "epoch": 0.28620226229141, "grad_norm": 1.2046400308609009, "learning_rate": 2.5540000000000003e-05, "loss": 0.4747, "step": 5111 }, { "epoch": 0.286258259603539, "grad_norm": 1.1831685304641724, "learning_rate": 2.5545000000000004e-05, "loss": 0.4545, "step": 5112 }, { "epoch": 0.28631425691566803, "grad_norm": 1.6218205690383911, "learning_rate": 2.555e-05, "loss": 0.6066, "step": 5113 }, { "epoch": 0.28637025422779705, "grad_norm": 1.156431794166565, "learning_rate": 2.5555000000000002e-05, "loss": 0.3627, "step": 5114 }, { "epoch": 0.28642625153992607, "grad_norm": 1.2976090908050537, "learning_rate": 2.556e-05, "loss": 0.4457, "step": 5115 }, { "epoch": 0.2864822488520551, "grad_norm": 1.2460031509399414, "learning_rate": 2.5565e-05, "loss": 0.6375, "step": 5116 }, { "epoch": 0.2865382461641841, "grad_norm": 1.0107266902923584, "learning_rate": 2.557e-05, "loss": 0.3809, "step": 5117 }, { "epoch": 0.2865942434763131, "grad_norm": 1.262149691581726, "learning_rate": 2.5574999999999998e-05, "loss": 0.3705, "step": 5118 }, { "epoch": 0.28665024078844215, "grad_norm": 1.5213390588760376, "learning_rate": 2.5580000000000002e-05, "loss": 0.4707, "step": 5119 }, { "epoch": 0.28670623810057116, "grad_norm": 1.0707709789276123, "learning_rate": 2.5585000000000003e-05, "loss": 0.4222, "step": 5120 }, { "epoch": 0.2867622354127002, "grad_norm": 1.4900261163711548, "learning_rate": 2.5590000000000004e-05, "loss": 0.4853, "step": 5121 }, { "epoch": 0.2868182327248292, "grad_norm": 1.1637797355651855, "learning_rate": 2.5595e-05, "loss": 0.3548, "step": 5122 }, { "epoch": 0.2868742300369582, "grad_norm": 1.3067561388015747, "learning_rate": 2.5600000000000002e-05, "loss": 0.5567, "step": 5123 }, { "epoch": 0.28693022734908724, "grad_norm": 1.1704126596450806, "learning_rate": 2.5605e-05, "loss": 0.4554, "step": 5124 }, { "epoch": 0.28698622466121626, "grad_norm": 2.44575834274292, "learning_rate": 2.561e-05, "loss": 0.4063, "step": 5125 }, { "epoch": 0.2870422219733453, "grad_norm": 1.180340051651001, "learning_rate": 2.5615e-05, "loss": 0.3816, "step": 5126 }, { "epoch": 0.2870982192854743, "grad_norm": 1.2813947200775146, "learning_rate": 2.562e-05, "loss": 0.5276, "step": 5127 }, { "epoch": 0.2871542165976033, "grad_norm": 1.2095357179641724, "learning_rate": 2.5625e-05, "loss": 0.3904, "step": 5128 }, { "epoch": 0.28721021390973234, "grad_norm": 1.0724081993103027, "learning_rate": 2.5629999999999997e-05, "loss": 0.4676, "step": 5129 }, { "epoch": 0.28726621122186136, "grad_norm": 1.3954498767852783, "learning_rate": 2.5635000000000004e-05, "loss": 0.4394, "step": 5130 }, { "epoch": 0.2873222085339904, "grad_norm": 1.6469731330871582, "learning_rate": 2.5640000000000002e-05, "loss": 0.5405, "step": 5131 }, { "epoch": 0.2873782058461194, "grad_norm": 1.2044121026992798, "learning_rate": 2.5645000000000003e-05, "loss": 0.4052, "step": 5132 }, { "epoch": 0.2874342031582484, "grad_norm": 1.0413527488708496, "learning_rate": 2.5650000000000003e-05, "loss": 0.2997, "step": 5133 }, { "epoch": 0.28749020047037743, "grad_norm": 1.354501485824585, "learning_rate": 2.5655e-05, "loss": 0.3402, "step": 5134 }, { "epoch": 0.28754619778250645, "grad_norm": 1.4380000829696655, "learning_rate": 2.566e-05, "loss": 0.4822, "step": 5135 }, { "epoch": 0.28760219509463547, "grad_norm": 1.2316100597381592, "learning_rate": 2.5665e-05, "loss": 0.5041, "step": 5136 }, { "epoch": 0.2876581924067645, "grad_norm": 1.3962647914886475, "learning_rate": 2.567e-05, "loss": 0.5794, "step": 5137 }, { "epoch": 0.2877141897188935, "grad_norm": 1.6493854522705078, "learning_rate": 2.5675e-05, "loss": 0.5601, "step": 5138 }, { "epoch": 0.28777018703102253, "grad_norm": 1.103853464126587, "learning_rate": 2.5679999999999998e-05, "loss": 0.617, "step": 5139 }, { "epoch": 0.28782618434315155, "grad_norm": 1.2031441926956177, "learning_rate": 2.5685000000000002e-05, "loss": 0.4776, "step": 5140 }, { "epoch": 0.28788218165528057, "grad_norm": 1.3442846536636353, "learning_rate": 2.5690000000000003e-05, "loss": 0.4194, "step": 5141 }, { "epoch": 0.2879381789674096, "grad_norm": 1.297261118888855, "learning_rate": 2.5695000000000004e-05, "loss": 0.5212, "step": 5142 }, { "epoch": 0.2879941762795386, "grad_norm": 1.2500529289245605, "learning_rate": 2.57e-05, "loss": 0.4298, "step": 5143 }, { "epoch": 0.2880501735916676, "grad_norm": 1.1209262609481812, "learning_rate": 2.5705000000000002e-05, "loss": 0.3535, "step": 5144 }, { "epoch": 0.28810617090379664, "grad_norm": 1.3996305465698242, "learning_rate": 2.571e-05, "loss": 0.4195, "step": 5145 }, { "epoch": 0.28816216821592566, "grad_norm": 1.3168365955352783, "learning_rate": 2.5715e-05, "loss": 0.4505, "step": 5146 }, { "epoch": 0.2882181655280546, "grad_norm": 1.3115452527999878, "learning_rate": 2.572e-05, "loss": 0.4509, "step": 5147 }, { "epoch": 0.28827416284018365, "grad_norm": 1.2151601314544678, "learning_rate": 2.5725e-05, "loss": 0.5359, "step": 5148 }, { "epoch": 0.28833016015231266, "grad_norm": 1.0965923070907593, "learning_rate": 2.573e-05, "loss": 0.4087, "step": 5149 }, { "epoch": 0.2883861574644417, "grad_norm": 1.2941659688949585, "learning_rate": 2.5735000000000003e-05, "loss": 0.4825, "step": 5150 }, { "epoch": 0.2884421547765707, "grad_norm": 1.1040208339691162, "learning_rate": 2.5740000000000004e-05, "loss": 0.4537, "step": 5151 }, { "epoch": 0.2884981520886997, "grad_norm": 1.198751449584961, "learning_rate": 2.5745e-05, "loss": 0.4545, "step": 5152 }, { "epoch": 0.28855414940082874, "grad_norm": 1.3784911632537842, "learning_rate": 2.5750000000000002e-05, "loss": 0.4048, "step": 5153 }, { "epoch": 0.28861014671295776, "grad_norm": 1.2006009817123413, "learning_rate": 2.5755e-05, "loss": 0.4406, "step": 5154 }, { "epoch": 0.2886661440250868, "grad_norm": 1.1796938180923462, "learning_rate": 2.576e-05, "loss": 0.4501, "step": 5155 }, { "epoch": 0.2887221413372158, "grad_norm": 1.0965385437011719, "learning_rate": 2.5765e-05, "loss": 0.3665, "step": 5156 }, { "epoch": 0.2887781386493448, "grad_norm": 1.062350869178772, "learning_rate": 2.577e-05, "loss": 0.3916, "step": 5157 }, { "epoch": 0.28883413596147384, "grad_norm": 1.24132239818573, "learning_rate": 2.5775e-05, "loss": 0.3841, "step": 5158 }, { "epoch": 0.28889013327360286, "grad_norm": 1.4969040155410767, "learning_rate": 2.5779999999999997e-05, "loss": 0.5066, "step": 5159 }, { "epoch": 0.2889461305857319, "grad_norm": 1.4512512683868408, "learning_rate": 2.5785000000000005e-05, "loss": 0.4499, "step": 5160 }, { "epoch": 0.2890021278978609, "grad_norm": 1.1753685474395752, "learning_rate": 2.5790000000000002e-05, "loss": 0.3539, "step": 5161 }, { "epoch": 0.2890581252099899, "grad_norm": 1.3325785398483276, "learning_rate": 2.5795000000000003e-05, "loss": 0.5696, "step": 5162 }, { "epoch": 0.28911412252211893, "grad_norm": 1.0597665309906006, "learning_rate": 2.58e-05, "loss": 0.3675, "step": 5163 }, { "epoch": 0.28917011983424795, "grad_norm": 1.442156195640564, "learning_rate": 2.5805e-05, "loss": 0.4321, "step": 5164 }, { "epoch": 0.28922611714637697, "grad_norm": 1.0994163751602173, "learning_rate": 2.5810000000000002e-05, "loss": 0.3789, "step": 5165 }, { "epoch": 0.289282114458506, "grad_norm": 1.2429791688919067, "learning_rate": 2.5815e-05, "loss": 0.4146, "step": 5166 }, { "epoch": 0.289338111770635, "grad_norm": 1.4033571481704712, "learning_rate": 2.582e-05, "loss": 0.4169, "step": 5167 }, { "epoch": 0.28939410908276403, "grad_norm": 1.0820399522781372, "learning_rate": 2.5824999999999998e-05, "loss": 0.3315, "step": 5168 }, { "epoch": 0.28945010639489305, "grad_norm": 1.3229745626449585, "learning_rate": 2.583e-05, "loss": 0.4174, "step": 5169 }, { "epoch": 0.28950610370702207, "grad_norm": 1.4531428813934326, "learning_rate": 2.5835000000000003e-05, "loss": 0.5334, "step": 5170 }, { "epoch": 0.2895621010191511, "grad_norm": 1.3618649244308472, "learning_rate": 2.5840000000000003e-05, "loss": 0.4907, "step": 5171 }, { "epoch": 0.2896180983312801, "grad_norm": 1.25014066696167, "learning_rate": 2.5845000000000004e-05, "loss": 0.465, "step": 5172 }, { "epoch": 0.2896740956434091, "grad_norm": 1.3573648929595947, "learning_rate": 2.585e-05, "loss": 0.4262, "step": 5173 }, { "epoch": 0.28973009295553814, "grad_norm": 1.3631477355957031, "learning_rate": 2.5855000000000002e-05, "loss": 0.5082, "step": 5174 }, { "epoch": 0.28978609026766716, "grad_norm": 1.305877447128296, "learning_rate": 2.586e-05, "loss": 0.497, "step": 5175 }, { "epoch": 0.2898420875797962, "grad_norm": 1.152976632118225, "learning_rate": 2.5865e-05, "loss": 0.437, "step": 5176 }, { "epoch": 0.2898980848919252, "grad_norm": 1.2623867988586426, "learning_rate": 2.587e-05, "loss": 0.4915, "step": 5177 }, { "epoch": 0.2899540822040542, "grad_norm": 1.0398708581924438, "learning_rate": 2.5875e-05, "loss": 0.435, "step": 5178 }, { "epoch": 0.29001007951618324, "grad_norm": 1.1778053045272827, "learning_rate": 2.588e-05, "loss": 0.524, "step": 5179 }, { "epoch": 0.29006607682831226, "grad_norm": 1.231372356414795, "learning_rate": 2.5885000000000004e-05, "loss": 0.4914, "step": 5180 }, { "epoch": 0.2901220741404413, "grad_norm": 1.2939479351043701, "learning_rate": 2.5890000000000005e-05, "loss": 0.3787, "step": 5181 }, { "epoch": 0.2901780714525703, "grad_norm": 1.2962067127227783, "learning_rate": 2.5895000000000002e-05, "loss": 0.468, "step": 5182 }, { "epoch": 0.2902340687646993, "grad_norm": 1.5894113779067993, "learning_rate": 2.5900000000000003e-05, "loss": 0.5801, "step": 5183 }, { "epoch": 0.29029006607682833, "grad_norm": 1.2547224760055542, "learning_rate": 2.5905e-05, "loss": 0.4094, "step": 5184 }, { "epoch": 0.29034606338895735, "grad_norm": 2.165175676345825, "learning_rate": 2.591e-05, "loss": 0.4425, "step": 5185 }, { "epoch": 0.2904020607010864, "grad_norm": 1.166042685508728, "learning_rate": 2.5915000000000002e-05, "loss": 0.376, "step": 5186 }, { "epoch": 0.2904580580132154, "grad_norm": 1.2118500471115112, "learning_rate": 2.592e-05, "loss": 0.3256, "step": 5187 }, { "epoch": 0.29051405532534436, "grad_norm": 1.4399436712265015, "learning_rate": 2.5925e-05, "loss": 0.5189, "step": 5188 }, { "epoch": 0.2905700526374734, "grad_norm": 1.0340296030044556, "learning_rate": 2.5929999999999997e-05, "loss": 0.2661, "step": 5189 }, { "epoch": 0.2906260499496024, "grad_norm": 1.224493384361267, "learning_rate": 2.5935000000000005e-05, "loss": 0.4585, "step": 5190 }, { "epoch": 0.2906820472617314, "grad_norm": 1.3201141357421875, "learning_rate": 2.5940000000000002e-05, "loss": 0.4362, "step": 5191 }, { "epoch": 0.29073804457386043, "grad_norm": 1.560363531112671, "learning_rate": 2.5945000000000003e-05, "loss": 0.5274, "step": 5192 }, { "epoch": 0.29079404188598945, "grad_norm": 1.2800922393798828, "learning_rate": 2.595e-05, "loss": 0.3426, "step": 5193 }, { "epoch": 0.29085003919811847, "grad_norm": 1.2105079889297485, "learning_rate": 2.5955e-05, "loss": 0.362, "step": 5194 }, { "epoch": 0.2909060365102475, "grad_norm": 1.2776095867156982, "learning_rate": 2.5960000000000002e-05, "loss": 0.3604, "step": 5195 }, { "epoch": 0.2909620338223765, "grad_norm": 1.4299017190933228, "learning_rate": 2.5965e-05, "loss": 0.4469, "step": 5196 }, { "epoch": 0.29101803113450553, "grad_norm": 1.1238104104995728, "learning_rate": 2.597e-05, "loss": 0.3655, "step": 5197 }, { "epoch": 0.29107402844663455, "grad_norm": 1.1724493503570557, "learning_rate": 2.5974999999999998e-05, "loss": 0.4825, "step": 5198 }, { "epoch": 0.29113002575876357, "grad_norm": 1.156067967414856, "learning_rate": 2.598e-05, "loss": 0.4406, "step": 5199 }, { "epoch": 0.2911860230708926, "grad_norm": 1.392245888710022, "learning_rate": 2.5985000000000003e-05, "loss": 0.4914, "step": 5200 }, { "epoch": 0.2912420203830216, "grad_norm": 1.2607609033584595, "learning_rate": 2.5990000000000004e-05, "loss": 0.3389, "step": 5201 }, { "epoch": 0.2912980176951506, "grad_norm": 1.2951900959014893, "learning_rate": 2.5995000000000004e-05, "loss": 0.5232, "step": 5202 }, { "epoch": 0.29135401500727964, "grad_norm": 1.1071255207061768, "learning_rate": 2.6000000000000002e-05, "loss": 0.3389, "step": 5203 }, { "epoch": 0.29141001231940866, "grad_norm": 1.5917093753814697, "learning_rate": 2.6005000000000003e-05, "loss": 0.4165, "step": 5204 }, { "epoch": 0.2914660096315377, "grad_norm": 1.740160346031189, "learning_rate": 2.601e-05, "loss": 0.5188, "step": 5205 }, { "epoch": 0.2915220069436667, "grad_norm": 1.6251461505889893, "learning_rate": 2.6015e-05, "loss": 0.6349, "step": 5206 }, { "epoch": 0.2915780042557957, "grad_norm": 1.1782416105270386, "learning_rate": 2.602e-05, "loss": 0.4572, "step": 5207 }, { "epoch": 0.29163400156792474, "grad_norm": 1.1010757684707642, "learning_rate": 2.6025e-05, "loss": 0.3086, "step": 5208 }, { "epoch": 0.29168999888005376, "grad_norm": 1.2914388179779053, "learning_rate": 2.603e-05, "loss": 0.4165, "step": 5209 }, { "epoch": 0.2917459961921828, "grad_norm": 1.0075737237930298, "learning_rate": 2.6035000000000004e-05, "loss": 0.3249, "step": 5210 }, { "epoch": 0.2918019935043118, "grad_norm": 1.1916017532348633, "learning_rate": 2.6040000000000005e-05, "loss": 0.4307, "step": 5211 }, { "epoch": 0.2918579908164408, "grad_norm": 1.2970917224884033, "learning_rate": 2.6045000000000002e-05, "loss": 0.5413, "step": 5212 }, { "epoch": 0.29191398812856983, "grad_norm": 1.0869152545928955, "learning_rate": 2.6050000000000003e-05, "loss": 0.3936, "step": 5213 }, { "epoch": 0.29196998544069885, "grad_norm": 1.3564529418945312, "learning_rate": 2.6055e-05, "loss": 0.3797, "step": 5214 }, { "epoch": 0.2920259827528279, "grad_norm": 1.2981070280075073, "learning_rate": 2.606e-05, "loss": 0.4802, "step": 5215 }, { "epoch": 0.2920819800649569, "grad_norm": 1.3548084497451782, "learning_rate": 2.6065000000000002e-05, "loss": 0.3205, "step": 5216 }, { "epoch": 0.2921379773770859, "grad_norm": 1.379685878753662, "learning_rate": 2.607e-05, "loss": 0.4269, "step": 5217 }, { "epoch": 0.29219397468921493, "grad_norm": 1.2204341888427734, "learning_rate": 2.6075e-05, "loss": 0.4196, "step": 5218 }, { "epoch": 0.29224997200134395, "grad_norm": 1.3498417139053345, "learning_rate": 2.6079999999999998e-05, "loss": 0.3891, "step": 5219 }, { "epoch": 0.29230596931347297, "grad_norm": 1.0870332717895508, "learning_rate": 2.6085000000000005e-05, "loss": 0.3441, "step": 5220 }, { "epoch": 0.292361966625602, "grad_norm": 1.2560654878616333, "learning_rate": 2.6090000000000003e-05, "loss": 0.4986, "step": 5221 }, { "epoch": 0.292417963937731, "grad_norm": 1.2160749435424805, "learning_rate": 2.6095000000000003e-05, "loss": 0.5128, "step": 5222 }, { "epoch": 0.29247396124986, "grad_norm": 1.0488581657409668, "learning_rate": 2.61e-05, "loss": 0.334, "step": 5223 }, { "epoch": 0.29252995856198905, "grad_norm": 1.336931824684143, "learning_rate": 2.6105e-05, "loss": 0.4615, "step": 5224 }, { "epoch": 0.29258595587411806, "grad_norm": 1.1971161365509033, "learning_rate": 2.6110000000000002e-05, "loss": 0.5235, "step": 5225 }, { "epoch": 0.2926419531862471, "grad_norm": 1.2192145586013794, "learning_rate": 2.6115e-05, "loss": 0.4185, "step": 5226 }, { "epoch": 0.2926979504983761, "grad_norm": 1.3168421983718872, "learning_rate": 2.612e-05, "loss": 0.4166, "step": 5227 }, { "epoch": 0.2927539478105051, "grad_norm": 1.271488070487976, "learning_rate": 2.6124999999999998e-05, "loss": 0.5113, "step": 5228 }, { "epoch": 0.29280994512263414, "grad_norm": 1.1534937620162964, "learning_rate": 2.613e-05, "loss": 0.4285, "step": 5229 }, { "epoch": 0.2928659424347631, "grad_norm": 1.4117984771728516, "learning_rate": 2.6135000000000003e-05, "loss": 0.4157, "step": 5230 }, { "epoch": 0.2929219397468921, "grad_norm": 1.0014228820800781, "learning_rate": 2.6140000000000004e-05, "loss": 0.3379, "step": 5231 }, { "epoch": 0.29297793705902114, "grad_norm": 1.0868574380874634, "learning_rate": 2.6145e-05, "loss": 0.4443, "step": 5232 }, { "epoch": 0.29303393437115016, "grad_norm": 1.707353949546814, "learning_rate": 2.6150000000000002e-05, "loss": 0.4213, "step": 5233 }, { "epoch": 0.2930899316832792, "grad_norm": 1.113995909690857, "learning_rate": 2.6155000000000003e-05, "loss": 0.2867, "step": 5234 }, { "epoch": 0.2931459289954082, "grad_norm": 0.9036622047424316, "learning_rate": 2.616e-05, "loss": 0.2777, "step": 5235 }, { "epoch": 0.2932019263075372, "grad_norm": 5.723801136016846, "learning_rate": 2.6165e-05, "loss": 0.4799, "step": 5236 }, { "epoch": 0.29325792361966624, "grad_norm": 1.1289016008377075, "learning_rate": 2.617e-05, "loss": 0.3623, "step": 5237 }, { "epoch": 0.29331392093179526, "grad_norm": 1.2886630296707153, "learning_rate": 2.6175e-05, "loss": 0.3451, "step": 5238 }, { "epoch": 0.2933699182439243, "grad_norm": 1.478747844696045, "learning_rate": 2.618e-05, "loss": 0.4712, "step": 5239 }, { "epoch": 0.2934259155560533, "grad_norm": 1.2470319271087646, "learning_rate": 2.6185000000000004e-05, "loss": 0.4373, "step": 5240 }, { "epoch": 0.2934819128681823, "grad_norm": 1.5660260915756226, "learning_rate": 2.6190000000000005e-05, "loss": 0.5966, "step": 5241 }, { "epoch": 0.29353791018031133, "grad_norm": 1.3090187311172485, "learning_rate": 2.6195000000000002e-05, "loss": 0.4731, "step": 5242 }, { "epoch": 0.29359390749244035, "grad_norm": 1.6193612813949585, "learning_rate": 2.6200000000000003e-05, "loss": 0.4805, "step": 5243 }, { "epoch": 0.2936499048045694, "grad_norm": 1.3604973554611206, "learning_rate": 2.6205e-05, "loss": 0.4926, "step": 5244 }, { "epoch": 0.2937059021166984, "grad_norm": 1.1638587713241577, "learning_rate": 2.621e-05, "loss": 0.4017, "step": 5245 }, { "epoch": 0.2937618994288274, "grad_norm": 1.0777242183685303, "learning_rate": 2.6215000000000002e-05, "loss": 0.371, "step": 5246 }, { "epoch": 0.29381789674095643, "grad_norm": 1.2868236303329468, "learning_rate": 2.622e-05, "loss": 0.4956, "step": 5247 }, { "epoch": 0.29387389405308545, "grad_norm": 1.0613162517547607, "learning_rate": 2.6225e-05, "loss": 0.3956, "step": 5248 }, { "epoch": 0.29392989136521447, "grad_norm": 1.9690322875976562, "learning_rate": 2.6229999999999998e-05, "loss": 0.4669, "step": 5249 }, { "epoch": 0.2939858886773435, "grad_norm": 1.1180588006973267, "learning_rate": 2.6235000000000005e-05, "loss": 0.4524, "step": 5250 }, { "epoch": 0.2940418859894725, "grad_norm": 1.1822772026062012, "learning_rate": 2.6240000000000003e-05, "loss": 0.4241, "step": 5251 }, { "epoch": 0.2940978833016015, "grad_norm": 1.2001798152923584, "learning_rate": 2.6245000000000004e-05, "loss": 0.4325, "step": 5252 }, { "epoch": 0.29415388061373055, "grad_norm": 1.5586347579956055, "learning_rate": 2.625e-05, "loss": 0.6026, "step": 5253 }, { "epoch": 0.29420987792585956, "grad_norm": 1.2429914474487305, "learning_rate": 2.6255000000000002e-05, "loss": 0.4933, "step": 5254 }, { "epoch": 0.2942658752379886, "grad_norm": 1.1091340780258179, "learning_rate": 2.6260000000000003e-05, "loss": 0.416, "step": 5255 }, { "epoch": 0.2943218725501176, "grad_norm": 1.7527451515197754, "learning_rate": 2.6265e-05, "loss": 0.656, "step": 5256 }, { "epoch": 0.2943778698622466, "grad_norm": 1.5141398906707764, "learning_rate": 2.627e-05, "loss": 0.6055, "step": 5257 }, { "epoch": 0.29443386717437564, "grad_norm": 1.19267737865448, "learning_rate": 2.6275e-05, "loss": 0.3927, "step": 5258 }, { "epoch": 0.29448986448650466, "grad_norm": 1.6720216274261475, "learning_rate": 2.628e-05, "loss": 0.5716, "step": 5259 }, { "epoch": 0.2945458617986337, "grad_norm": 1.1752185821533203, "learning_rate": 2.6285e-05, "loss": 0.4485, "step": 5260 }, { "epoch": 0.2946018591107627, "grad_norm": 1.1701414585113525, "learning_rate": 2.6290000000000004e-05, "loss": 0.4355, "step": 5261 }, { "epoch": 0.2946578564228917, "grad_norm": 1.6818978786468506, "learning_rate": 2.6295e-05, "loss": 0.5101, "step": 5262 }, { "epoch": 0.29471385373502074, "grad_norm": 1.0774511098861694, "learning_rate": 2.6300000000000002e-05, "loss": 0.3872, "step": 5263 }, { "epoch": 0.29476985104714976, "grad_norm": 1.1634888648986816, "learning_rate": 2.6305000000000003e-05, "loss": 0.4338, "step": 5264 }, { "epoch": 0.2948258483592788, "grad_norm": 1.1789129972457886, "learning_rate": 2.631e-05, "loss": 0.4044, "step": 5265 }, { "epoch": 0.2948818456714078, "grad_norm": 1.176689624786377, "learning_rate": 2.6315e-05, "loss": 0.4996, "step": 5266 }, { "epoch": 0.2949378429835368, "grad_norm": 1.0302584171295166, "learning_rate": 2.632e-05, "loss": 0.3189, "step": 5267 }, { "epoch": 0.29499384029566583, "grad_norm": 2.2208893299102783, "learning_rate": 2.6325e-05, "loss": 0.6289, "step": 5268 }, { "epoch": 0.29504983760779485, "grad_norm": 1.3514935970306396, "learning_rate": 2.633e-05, "loss": 0.5208, "step": 5269 }, { "epoch": 0.29510583491992387, "grad_norm": 1.4031091928482056, "learning_rate": 2.6334999999999998e-05, "loss": 0.4056, "step": 5270 }, { "epoch": 0.29516183223205283, "grad_norm": 1.3235684633255005, "learning_rate": 2.6340000000000002e-05, "loss": 0.4818, "step": 5271 }, { "epoch": 0.29521782954418185, "grad_norm": 1.2179088592529297, "learning_rate": 2.6345000000000003e-05, "loss": 0.4226, "step": 5272 }, { "epoch": 0.2952738268563109, "grad_norm": 1.4105010032653809, "learning_rate": 2.6350000000000004e-05, "loss": 0.5017, "step": 5273 }, { "epoch": 0.2953298241684399, "grad_norm": 3.7600419521331787, "learning_rate": 2.6355e-05, "loss": 0.4592, "step": 5274 }, { "epoch": 0.2953858214805689, "grad_norm": 1.1268954277038574, "learning_rate": 2.6360000000000002e-05, "loss": 0.4334, "step": 5275 }, { "epoch": 0.29544181879269793, "grad_norm": 1.2404820919036865, "learning_rate": 2.6365e-05, "loss": 0.3669, "step": 5276 }, { "epoch": 0.29549781610482695, "grad_norm": 1.1679812669754028, "learning_rate": 2.637e-05, "loss": 0.478, "step": 5277 }, { "epoch": 0.29555381341695597, "grad_norm": 1.0536789894104004, "learning_rate": 2.6375e-05, "loss": 0.3539, "step": 5278 }, { "epoch": 0.295609810729085, "grad_norm": 1.0712404251098633, "learning_rate": 2.6379999999999998e-05, "loss": 0.4798, "step": 5279 }, { "epoch": 0.295665808041214, "grad_norm": 1.2229100465774536, "learning_rate": 2.6385e-05, "loss": 0.3869, "step": 5280 }, { "epoch": 0.295721805353343, "grad_norm": 1.2663319110870361, "learning_rate": 2.6390000000000003e-05, "loss": 0.4911, "step": 5281 }, { "epoch": 0.29577780266547204, "grad_norm": 1.721091628074646, "learning_rate": 2.6395000000000004e-05, "loss": 0.5603, "step": 5282 }, { "epoch": 0.29583379997760106, "grad_norm": 1.3844891786575317, "learning_rate": 2.64e-05, "loss": 0.4141, "step": 5283 }, { "epoch": 0.2958897972897301, "grad_norm": 1.3836339712142944, "learning_rate": 2.6405000000000002e-05, "loss": 0.485, "step": 5284 }, { "epoch": 0.2959457946018591, "grad_norm": 1.2061278820037842, "learning_rate": 2.6410000000000003e-05, "loss": 0.3872, "step": 5285 }, { "epoch": 0.2960017919139881, "grad_norm": 1.4442470073699951, "learning_rate": 2.6415e-05, "loss": 0.3686, "step": 5286 }, { "epoch": 0.29605778922611714, "grad_norm": 1.3720992803573608, "learning_rate": 2.642e-05, "loss": 0.5564, "step": 5287 }, { "epoch": 0.29611378653824616, "grad_norm": 1.1148436069488525, "learning_rate": 2.6425e-05, "loss": 0.3856, "step": 5288 }, { "epoch": 0.2961697838503752, "grad_norm": 1.1165697574615479, "learning_rate": 2.643e-05, "loss": 0.3491, "step": 5289 }, { "epoch": 0.2962257811625042, "grad_norm": 1.0927644968032837, "learning_rate": 2.6435e-05, "loss": 0.3341, "step": 5290 }, { "epoch": 0.2962817784746332, "grad_norm": 1.2208930253982544, "learning_rate": 2.6440000000000004e-05, "loss": 0.4179, "step": 5291 }, { "epoch": 0.29633777578676224, "grad_norm": 1.1353740692138672, "learning_rate": 2.6445000000000002e-05, "loss": 0.3374, "step": 5292 }, { "epoch": 0.29639377309889126, "grad_norm": 1.3508774042129517, "learning_rate": 2.6450000000000003e-05, "loss": 0.4931, "step": 5293 }, { "epoch": 0.2964497704110203, "grad_norm": 1.188261866569519, "learning_rate": 2.6455000000000003e-05, "loss": 0.3865, "step": 5294 }, { "epoch": 0.2965057677231493, "grad_norm": 1.3219794034957886, "learning_rate": 2.646e-05, "loss": 0.4982, "step": 5295 }, { "epoch": 0.2965617650352783, "grad_norm": 1.3908417224884033, "learning_rate": 2.6465e-05, "loss": 0.5614, "step": 5296 }, { "epoch": 0.29661776234740733, "grad_norm": 0.9926604628562927, "learning_rate": 2.647e-05, "loss": 0.3857, "step": 5297 }, { "epoch": 0.29667375965953635, "grad_norm": 1.7230104207992554, "learning_rate": 2.6475e-05, "loss": 0.5869, "step": 5298 }, { "epoch": 0.29672975697166537, "grad_norm": 1.4793049097061157, "learning_rate": 2.648e-05, "loss": 0.4726, "step": 5299 }, { "epoch": 0.2967857542837944, "grad_norm": 1.1072078943252563, "learning_rate": 2.6484999999999998e-05, "loss": 0.3418, "step": 5300 }, { "epoch": 0.2968417515959234, "grad_norm": 1.1886978149414062, "learning_rate": 2.6490000000000002e-05, "loss": 0.395, "step": 5301 }, { "epoch": 0.29689774890805243, "grad_norm": 1.1559021472930908, "learning_rate": 2.6495000000000003e-05, "loss": 0.4542, "step": 5302 }, { "epoch": 0.29695374622018145, "grad_norm": 1.074407935142517, "learning_rate": 2.6500000000000004e-05, "loss": 0.4714, "step": 5303 }, { "epoch": 0.29700974353231047, "grad_norm": 1.2388737201690674, "learning_rate": 2.6505e-05, "loss": 0.4807, "step": 5304 }, { "epoch": 0.2970657408444395, "grad_norm": 1.353883981704712, "learning_rate": 2.6510000000000002e-05, "loss": 0.372, "step": 5305 }, { "epoch": 0.2971217381565685, "grad_norm": 0.8923586010932922, "learning_rate": 2.6515e-05, "loss": 0.254, "step": 5306 }, { "epoch": 0.2971777354686975, "grad_norm": 1.645772933959961, "learning_rate": 2.652e-05, "loss": 0.5328, "step": 5307 }, { "epoch": 0.29723373278082654, "grad_norm": 1.2057169675827026, "learning_rate": 2.6525e-05, "loss": 0.4626, "step": 5308 }, { "epoch": 0.29728973009295556, "grad_norm": 1.567265510559082, "learning_rate": 2.653e-05, "loss": 0.6087, "step": 5309 }, { "epoch": 0.2973457274050846, "grad_norm": 1.189358115196228, "learning_rate": 2.6535e-05, "loss": 0.5019, "step": 5310 }, { "epoch": 0.2974017247172136, "grad_norm": 1.2605701684951782, "learning_rate": 2.6540000000000003e-05, "loss": 0.4445, "step": 5311 }, { "epoch": 0.29745772202934256, "grad_norm": 1.3523656129837036, "learning_rate": 2.6545000000000004e-05, "loss": 0.4203, "step": 5312 }, { "epoch": 0.2975137193414716, "grad_norm": 1.2546820640563965, "learning_rate": 2.655e-05, "loss": 0.3697, "step": 5313 }, { "epoch": 0.2975697166536006, "grad_norm": 1.0982375144958496, "learning_rate": 2.6555000000000002e-05, "loss": 0.3868, "step": 5314 }, { "epoch": 0.2976257139657296, "grad_norm": 1.1533113718032837, "learning_rate": 2.6560000000000003e-05, "loss": 0.4109, "step": 5315 }, { "epoch": 0.29768171127785864, "grad_norm": 1.237347960472107, "learning_rate": 2.6565e-05, "loss": 0.4632, "step": 5316 }, { "epoch": 0.29773770858998766, "grad_norm": 1.1130071878433228, "learning_rate": 2.657e-05, "loss": 0.4277, "step": 5317 }, { "epoch": 0.2977937059021167, "grad_norm": 1.2339191436767578, "learning_rate": 2.6575e-05, "loss": 0.3036, "step": 5318 }, { "epoch": 0.2978497032142457, "grad_norm": 1.2246606349945068, "learning_rate": 2.658e-05, "loss": 0.4144, "step": 5319 }, { "epoch": 0.2979057005263747, "grad_norm": 1.2826143503189087, "learning_rate": 2.6585e-05, "loss": 0.5132, "step": 5320 }, { "epoch": 0.29796169783850374, "grad_norm": 1.2820706367492676, "learning_rate": 2.6590000000000005e-05, "loss": 0.3742, "step": 5321 }, { "epoch": 0.29801769515063276, "grad_norm": 1.1958434581756592, "learning_rate": 2.6595000000000002e-05, "loss": 0.4186, "step": 5322 }, { "epoch": 0.2980736924627618, "grad_norm": 1.3425551652908325, "learning_rate": 2.6600000000000003e-05, "loss": 0.3936, "step": 5323 }, { "epoch": 0.2981296897748908, "grad_norm": 1.3036679029464722, "learning_rate": 2.6605000000000004e-05, "loss": 0.4485, "step": 5324 }, { "epoch": 0.2981856870870198, "grad_norm": 1.1589664220809937, "learning_rate": 2.661e-05, "loss": 0.384, "step": 5325 }, { "epoch": 0.29824168439914883, "grad_norm": 1.0981476306915283, "learning_rate": 2.6615000000000002e-05, "loss": 0.3953, "step": 5326 }, { "epoch": 0.29829768171127785, "grad_norm": 1.6495417356491089, "learning_rate": 2.662e-05, "loss": 0.4254, "step": 5327 }, { "epoch": 0.29835367902340687, "grad_norm": 2.0696768760681152, "learning_rate": 2.6625e-05, "loss": 0.4943, "step": 5328 }, { "epoch": 0.2984096763355359, "grad_norm": 1.2074174880981445, "learning_rate": 2.663e-05, "loss": 0.3937, "step": 5329 }, { "epoch": 0.2984656736476649, "grad_norm": 1.1499083042144775, "learning_rate": 2.6634999999999998e-05, "loss": 0.3489, "step": 5330 }, { "epoch": 0.2985216709597939, "grad_norm": 1.2360188961029053, "learning_rate": 2.6640000000000002e-05, "loss": 0.4464, "step": 5331 }, { "epoch": 0.29857766827192295, "grad_norm": 1.1654911041259766, "learning_rate": 2.6645000000000003e-05, "loss": 0.4109, "step": 5332 }, { "epoch": 0.29863366558405197, "grad_norm": 1.2289232015609741, "learning_rate": 2.6650000000000004e-05, "loss": 0.3535, "step": 5333 }, { "epoch": 0.298689662896181, "grad_norm": 1.474075436592102, "learning_rate": 2.6655e-05, "loss": 0.5, "step": 5334 }, { "epoch": 0.29874566020831, "grad_norm": 1.3659262657165527, "learning_rate": 2.6660000000000002e-05, "loss": 0.5725, "step": 5335 }, { "epoch": 0.298801657520439, "grad_norm": 1.4412240982055664, "learning_rate": 2.6665e-05, "loss": 0.4145, "step": 5336 }, { "epoch": 0.29885765483256804, "grad_norm": 1.2272512912750244, "learning_rate": 2.667e-05, "loss": 0.5215, "step": 5337 }, { "epoch": 0.29891365214469706, "grad_norm": 1.1624239683151245, "learning_rate": 2.6675e-05, "loss": 0.545, "step": 5338 }, { "epoch": 0.2989696494568261, "grad_norm": 1.381621241569519, "learning_rate": 2.668e-05, "loss": 0.6109, "step": 5339 }, { "epoch": 0.2990256467689551, "grad_norm": 1.4536820650100708, "learning_rate": 2.6685e-05, "loss": 0.5272, "step": 5340 }, { "epoch": 0.2990816440810841, "grad_norm": 0.9184737801551819, "learning_rate": 2.6690000000000004e-05, "loss": 0.3607, "step": 5341 }, { "epoch": 0.29913764139321314, "grad_norm": 1.2206549644470215, "learning_rate": 2.6695000000000004e-05, "loss": 0.4182, "step": 5342 }, { "epoch": 0.29919363870534216, "grad_norm": 1.0115301609039307, "learning_rate": 2.6700000000000002e-05, "loss": 0.3466, "step": 5343 }, { "epoch": 0.2992496360174712, "grad_norm": 1.1144261360168457, "learning_rate": 2.6705000000000003e-05, "loss": 0.387, "step": 5344 }, { "epoch": 0.2993056333296002, "grad_norm": 1.4079464673995972, "learning_rate": 2.671e-05, "loss": 0.6056, "step": 5345 }, { "epoch": 0.2993616306417292, "grad_norm": 1.191615104675293, "learning_rate": 2.6715e-05, "loss": 0.4494, "step": 5346 }, { "epoch": 0.29941762795385823, "grad_norm": 0.9942251443862915, "learning_rate": 2.672e-05, "loss": 0.3627, "step": 5347 }, { "epoch": 0.29947362526598725, "grad_norm": 1.1981528997421265, "learning_rate": 2.6725e-05, "loss": 0.5028, "step": 5348 }, { "epoch": 0.2995296225781163, "grad_norm": 1.0284557342529297, "learning_rate": 2.673e-05, "loss": 0.4042, "step": 5349 }, { "epoch": 0.2995856198902453, "grad_norm": 1.6800273656845093, "learning_rate": 2.6734999999999997e-05, "loss": 0.5364, "step": 5350 }, { "epoch": 0.2996416172023743, "grad_norm": 1.4248425960540771, "learning_rate": 2.6740000000000005e-05, "loss": 0.5269, "step": 5351 }, { "epoch": 0.29969761451450333, "grad_norm": 1.2523330450057983, "learning_rate": 2.6745000000000002e-05, "loss": 0.5303, "step": 5352 }, { "epoch": 0.29975361182663235, "grad_norm": 1.0883934497833252, "learning_rate": 2.6750000000000003e-05, "loss": 0.3841, "step": 5353 }, { "epoch": 0.2998096091387613, "grad_norm": 1.0700803995132446, "learning_rate": 2.6755000000000004e-05, "loss": 0.393, "step": 5354 }, { "epoch": 0.29986560645089033, "grad_norm": 1.1892080307006836, "learning_rate": 2.676e-05, "loss": 0.299, "step": 5355 }, { "epoch": 0.29992160376301935, "grad_norm": 0.9703457355499268, "learning_rate": 2.6765000000000002e-05, "loss": 0.3694, "step": 5356 }, { "epoch": 0.29997760107514837, "grad_norm": 4.798460483551025, "learning_rate": 2.677e-05, "loss": 0.4071, "step": 5357 }, { "epoch": 0.3000335983872774, "grad_norm": 1.0188037157058716, "learning_rate": 2.6775e-05, "loss": 0.4911, "step": 5358 }, { "epoch": 0.3000895956994064, "grad_norm": 1.1308927536010742, "learning_rate": 2.678e-05, "loss": 0.4291, "step": 5359 }, { "epoch": 0.3001455930115354, "grad_norm": 1.2467466592788696, "learning_rate": 2.6785e-05, "loss": 0.4322, "step": 5360 }, { "epoch": 0.30020159032366445, "grad_norm": 1.0073515176773071, "learning_rate": 2.6790000000000003e-05, "loss": 0.3015, "step": 5361 }, { "epoch": 0.30025758763579347, "grad_norm": 1.4517663717269897, "learning_rate": 2.6795000000000003e-05, "loss": 0.5408, "step": 5362 }, { "epoch": 0.3003135849479225, "grad_norm": 1.2505501508712769, "learning_rate": 2.6800000000000004e-05, "loss": 0.4191, "step": 5363 }, { "epoch": 0.3003695822600515, "grad_norm": 1.104689359664917, "learning_rate": 2.6805000000000002e-05, "loss": 0.3857, "step": 5364 }, { "epoch": 0.3004255795721805, "grad_norm": 1.179711103439331, "learning_rate": 2.6810000000000003e-05, "loss": 0.5031, "step": 5365 }, { "epoch": 0.30048157688430954, "grad_norm": 1.175329327583313, "learning_rate": 2.6815e-05, "loss": 0.3959, "step": 5366 }, { "epoch": 0.30053757419643856, "grad_norm": 1.5125898122787476, "learning_rate": 2.682e-05, "loss": 0.345, "step": 5367 }, { "epoch": 0.3005935715085676, "grad_norm": 1.0912848711013794, "learning_rate": 2.6825e-05, "loss": 0.4557, "step": 5368 }, { "epoch": 0.3006495688206966, "grad_norm": 1.1836665868759155, "learning_rate": 2.683e-05, "loss": 0.4663, "step": 5369 }, { "epoch": 0.3007055661328256, "grad_norm": 1.2546424865722656, "learning_rate": 2.6835e-05, "loss": 0.4343, "step": 5370 }, { "epoch": 0.30076156344495464, "grad_norm": 1.251499891281128, "learning_rate": 2.6840000000000004e-05, "loss": 0.5248, "step": 5371 }, { "epoch": 0.30081756075708366, "grad_norm": 1.3481758832931519, "learning_rate": 2.6845000000000005e-05, "loss": 0.3643, "step": 5372 }, { "epoch": 0.3008735580692127, "grad_norm": 1.3178684711456299, "learning_rate": 2.6850000000000002e-05, "loss": 0.4057, "step": 5373 }, { "epoch": 0.3009295553813417, "grad_norm": 2.1568729877471924, "learning_rate": 2.6855000000000003e-05, "loss": 0.4088, "step": 5374 }, { "epoch": 0.3009855526934707, "grad_norm": 1.8466178178787231, "learning_rate": 2.686e-05, "loss": 0.5227, "step": 5375 }, { "epoch": 0.30104155000559973, "grad_norm": 1.2152745723724365, "learning_rate": 2.6865e-05, "loss": 0.5257, "step": 5376 }, { "epoch": 0.30109754731772875, "grad_norm": 1.4119421243667603, "learning_rate": 2.6870000000000002e-05, "loss": 0.3585, "step": 5377 }, { "epoch": 0.3011535446298578, "grad_norm": 1.2167298793792725, "learning_rate": 2.6875e-05, "loss": 0.4351, "step": 5378 }, { "epoch": 0.3012095419419868, "grad_norm": 1.2719478607177734, "learning_rate": 2.688e-05, "loss": 0.4216, "step": 5379 }, { "epoch": 0.3012655392541158, "grad_norm": 1.229483962059021, "learning_rate": 2.6884999999999998e-05, "loss": 0.4044, "step": 5380 }, { "epoch": 0.30132153656624483, "grad_norm": 1.5179332494735718, "learning_rate": 2.689e-05, "loss": 0.3877, "step": 5381 }, { "epoch": 0.30137753387837385, "grad_norm": 1.4955464601516724, "learning_rate": 2.6895000000000003e-05, "loss": 0.5172, "step": 5382 }, { "epoch": 0.30143353119050287, "grad_norm": 1.1615431308746338, "learning_rate": 2.6900000000000003e-05, "loss": 0.443, "step": 5383 }, { "epoch": 0.3014895285026319, "grad_norm": 1.117387294769287, "learning_rate": 2.6905e-05, "loss": 0.3921, "step": 5384 }, { "epoch": 0.3015455258147609, "grad_norm": 1.1309614181518555, "learning_rate": 2.691e-05, "loss": 0.4126, "step": 5385 }, { "epoch": 0.3016015231268899, "grad_norm": 1.202384352684021, "learning_rate": 2.6915000000000002e-05, "loss": 0.4628, "step": 5386 }, { "epoch": 0.30165752043901894, "grad_norm": 1.3168935775756836, "learning_rate": 2.692e-05, "loss": 0.4685, "step": 5387 }, { "epoch": 0.30171351775114796, "grad_norm": 1.3338346481323242, "learning_rate": 2.6925e-05, "loss": 0.4951, "step": 5388 }, { "epoch": 0.301769515063277, "grad_norm": 1.2364870309829712, "learning_rate": 2.693e-05, "loss": 0.3585, "step": 5389 }, { "epoch": 0.301825512375406, "grad_norm": 1.512573003768921, "learning_rate": 2.6935e-05, "loss": 0.4172, "step": 5390 }, { "epoch": 0.301881509687535, "grad_norm": 1.2886953353881836, "learning_rate": 2.694e-05, "loss": 0.4066, "step": 5391 }, { "epoch": 0.30193750699966404, "grad_norm": 1.155105471611023, "learning_rate": 2.6945000000000004e-05, "loss": 0.3305, "step": 5392 }, { "epoch": 0.30199350431179306, "grad_norm": 1.0350037813186646, "learning_rate": 2.6950000000000005e-05, "loss": 0.4095, "step": 5393 }, { "epoch": 0.3020495016239221, "grad_norm": 1.0273566246032715, "learning_rate": 2.6955000000000002e-05, "loss": 0.3563, "step": 5394 }, { "epoch": 0.30210549893605104, "grad_norm": 1.1574289798736572, "learning_rate": 2.6960000000000003e-05, "loss": 0.5187, "step": 5395 }, { "epoch": 0.30216149624818006, "grad_norm": 1.1745827198028564, "learning_rate": 2.6965e-05, "loss": 0.3549, "step": 5396 }, { "epoch": 0.3022174935603091, "grad_norm": 1.4617410898208618, "learning_rate": 2.697e-05, "loss": 0.5529, "step": 5397 }, { "epoch": 0.3022734908724381, "grad_norm": 2.2632431983947754, "learning_rate": 2.6975000000000002e-05, "loss": 0.5178, "step": 5398 }, { "epoch": 0.3023294881845671, "grad_norm": 1.0546752214431763, "learning_rate": 2.698e-05, "loss": 0.3277, "step": 5399 }, { "epoch": 0.30238548549669614, "grad_norm": 1.3697587251663208, "learning_rate": 2.6985e-05, "loss": 0.4477, "step": 5400 }, { "epoch": 0.30244148280882516, "grad_norm": 1.1841217279434204, "learning_rate": 2.6989999999999997e-05, "loss": 0.6534, "step": 5401 }, { "epoch": 0.3024974801209542, "grad_norm": 1.2722182273864746, "learning_rate": 2.6995000000000005e-05, "loss": 0.3931, "step": 5402 }, { "epoch": 0.3025534774330832, "grad_norm": 1.2848879098892212, "learning_rate": 2.7000000000000002e-05, "loss": 0.3851, "step": 5403 }, { "epoch": 0.3026094747452122, "grad_norm": 1.2802610397338867, "learning_rate": 2.7005000000000003e-05, "loss": 0.5178, "step": 5404 }, { "epoch": 0.30266547205734123, "grad_norm": 1.2036280632019043, "learning_rate": 2.701e-05, "loss": 0.4296, "step": 5405 }, { "epoch": 0.30272146936947025, "grad_norm": 1.3071026802062988, "learning_rate": 2.7015e-05, "loss": 0.5502, "step": 5406 }, { "epoch": 0.30277746668159927, "grad_norm": 1.4859977960586548, "learning_rate": 2.7020000000000002e-05, "loss": 0.5912, "step": 5407 }, { "epoch": 0.3028334639937283, "grad_norm": 1.4547327756881714, "learning_rate": 2.7025e-05, "loss": 0.4606, "step": 5408 }, { "epoch": 0.3028894613058573, "grad_norm": 1.0255621671676636, "learning_rate": 2.703e-05, "loss": 0.4704, "step": 5409 }, { "epoch": 0.30294545861798633, "grad_norm": 29.907569885253906, "learning_rate": 2.7034999999999998e-05, "loss": 0.3666, "step": 5410 }, { "epoch": 0.30300145593011535, "grad_norm": 1.1806755065917969, "learning_rate": 2.704e-05, "loss": 0.443, "step": 5411 }, { "epoch": 0.30305745324224437, "grad_norm": 1.1206432580947876, "learning_rate": 2.7045000000000003e-05, "loss": 0.3466, "step": 5412 }, { "epoch": 0.3031134505543734, "grad_norm": 1.1510664224624634, "learning_rate": 2.7050000000000004e-05, "loss": 0.5531, "step": 5413 }, { "epoch": 0.3031694478665024, "grad_norm": 1.128298044204712, "learning_rate": 2.7055e-05, "loss": 0.4528, "step": 5414 }, { "epoch": 0.3032254451786314, "grad_norm": 1.5585412979125977, "learning_rate": 2.7060000000000002e-05, "loss": 0.6488, "step": 5415 }, { "epoch": 0.30328144249076044, "grad_norm": 1.4108123779296875, "learning_rate": 2.7065000000000003e-05, "loss": 0.4687, "step": 5416 }, { "epoch": 0.30333743980288946, "grad_norm": 0.9559827446937561, "learning_rate": 2.707e-05, "loss": 0.2918, "step": 5417 }, { "epoch": 0.3033934371150185, "grad_norm": 1.0924289226531982, "learning_rate": 2.7075e-05, "loss": 0.4397, "step": 5418 }, { "epoch": 0.3034494344271475, "grad_norm": 1.3185755014419556, "learning_rate": 2.7079999999999998e-05, "loss": 0.4814, "step": 5419 }, { "epoch": 0.3035054317392765, "grad_norm": 1.189787745475769, "learning_rate": 2.7085e-05, "loss": 0.4277, "step": 5420 }, { "epoch": 0.30356142905140554, "grad_norm": 1.108835220336914, "learning_rate": 2.709e-05, "loss": 0.4128, "step": 5421 }, { "epoch": 0.30361742636353456, "grad_norm": 1.2378427982330322, "learning_rate": 2.7095000000000004e-05, "loss": 0.3862, "step": 5422 }, { "epoch": 0.3036734236756636, "grad_norm": 1.2699310779571533, "learning_rate": 2.7100000000000005e-05, "loss": 0.3449, "step": 5423 }, { "epoch": 0.3037294209877926, "grad_norm": 1.3861756324768066, "learning_rate": 2.7105000000000002e-05, "loss": 0.5577, "step": 5424 }, { "epoch": 0.3037854182999216, "grad_norm": 1.676414966583252, "learning_rate": 2.7110000000000003e-05, "loss": 0.4003, "step": 5425 }, { "epoch": 0.30384141561205064, "grad_norm": 1.1581931114196777, "learning_rate": 2.7115e-05, "loss": 0.4549, "step": 5426 }, { "epoch": 0.30389741292417966, "grad_norm": 1.4321802854537964, "learning_rate": 2.712e-05, "loss": 0.53, "step": 5427 }, { "epoch": 0.3039534102363087, "grad_norm": 1.5010488033294678, "learning_rate": 2.7125000000000002e-05, "loss": 0.4593, "step": 5428 }, { "epoch": 0.3040094075484377, "grad_norm": 1.455575704574585, "learning_rate": 2.713e-05, "loss": 0.4798, "step": 5429 }, { "epoch": 0.3040654048605667, "grad_norm": 1.265520691871643, "learning_rate": 2.7135e-05, "loss": 0.6806, "step": 5430 }, { "epoch": 0.30412140217269573, "grad_norm": 1.3100696802139282, "learning_rate": 2.7139999999999998e-05, "loss": 0.6165, "step": 5431 }, { "epoch": 0.30417739948482475, "grad_norm": 1.181958556175232, "learning_rate": 2.7145000000000005e-05, "loss": 0.4008, "step": 5432 }, { "epoch": 0.30423339679695377, "grad_norm": 1.2225289344787598, "learning_rate": 2.7150000000000003e-05, "loss": 0.3583, "step": 5433 }, { "epoch": 0.3042893941090828, "grad_norm": 1.6007293462753296, "learning_rate": 2.7155000000000003e-05, "loss": 0.4714, "step": 5434 }, { "epoch": 0.3043453914212118, "grad_norm": 1.3652026653289795, "learning_rate": 2.716e-05, "loss": 0.4413, "step": 5435 }, { "epoch": 0.30440138873334077, "grad_norm": 1.3229869604110718, "learning_rate": 2.7165e-05, "loss": 0.3679, "step": 5436 }, { "epoch": 0.3044573860454698, "grad_norm": 1.6404391527175903, "learning_rate": 2.7170000000000002e-05, "loss": 0.4618, "step": 5437 }, { "epoch": 0.3045133833575988, "grad_norm": 1.1321698427200317, "learning_rate": 2.7175e-05, "loss": 0.4808, "step": 5438 }, { "epoch": 0.30456938066972783, "grad_norm": 1.2999236583709717, "learning_rate": 2.718e-05, "loss": 0.433, "step": 5439 }, { "epoch": 0.30462537798185685, "grad_norm": 1.1787896156311035, "learning_rate": 2.7184999999999998e-05, "loss": 0.5162, "step": 5440 }, { "epoch": 0.30468137529398587, "grad_norm": 1.1056458950042725, "learning_rate": 2.719e-05, "loss": 0.368, "step": 5441 }, { "epoch": 0.3047373726061149, "grad_norm": 1.2190624475479126, "learning_rate": 2.7195000000000003e-05, "loss": 0.4291, "step": 5442 }, { "epoch": 0.3047933699182439, "grad_norm": 1.6064512729644775, "learning_rate": 2.7200000000000004e-05, "loss": 0.493, "step": 5443 }, { "epoch": 0.3048493672303729, "grad_norm": 1.5153958797454834, "learning_rate": 2.7205e-05, "loss": 0.4283, "step": 5444 }, { "epoch": 0.30490536454250194, "grad_norm": 1.2916572093963623, "learning_rate": 2.7210000000000002e-05, "loss": 0.4106, "step": 5445 }, { "epoch": 0.30496136185463096, "grad_norm": 1.1816558837890625, "learning_rate": 2.7215000000000003e-05, "loss": 0.3662, "step": 5446 }, { "epoch": 0.30501735916676, "grad_norm": 1.0784937143325806, "learning_rate": 2.722e-05, "loss": 0.4712, "step": 5447 }, { "epoch": 0.305073356478889, "grad_norm": 1.6013766527175903, "learning_rate": 2.7225e-05, "loss": 0.5618, "step": 5448 }, { "epoch": 0.305129353791018, "grad_norm": 1.3034296035766602, "learning_rate": 2.723e-05, "loss": 0.5378, "step": 5449 }, { "epoch": 0.30518535110314704, "grad_norm": 1.2664868831634521, "learning_rate": 2.7235e-05, "loss": 0.4298, "step": 5450 }, { "epoch": 0.30524134841527606, "grad_norm": 1.2620458602905273, "learning_rate": 2.724e-05, "loss": 0.3901, "step": 5451 }, { "epoch": 0.3052973457274051, "grad_norm": 1.152858018875122, "learning_rate": 2.7245000000000004e-05, "loss": 0.4303, "step": 5452 }, { "epoch": 0.3053533430395341, "grad_norm": 1.1570677757263184, "learning_rate": 2.725e-05, "loss": 0.4018, "step": 5453 }, { "epoch": 0.3054093403516631, "grad_norm": 1.1236780881881714, "learning_rate": 2.7255000000000002e-05, "loss": 0.3916, "step": 5454 }, { "epoch": 0.30546533766379214, "grad_norm": 1.2584707736968994, "learning_rate": 2.7260000000000003e-05, "loss": 0.4971, "step": 5455 }, { "epoch": 0.30552133497592115, "grad_norm": 1.286197304725647, "learning_rate": 2.7265e-05, "loss": 0.4123, "step": 5456 }, { "epoch": 0.3055773322880502, "grad_norm": 1.1834160089492798, "learning_rate": 2.727e-05, "loss": 0.4101, "step": 5457 }, { "epoch": 0.3056333296001792, "grad_norm": 0.9636449217796326, "learning_rate": 2.7275e-05, "loss": 0.3525, "step": 5458 }, { "epoch": 0.3056893269123082, "grad_norm": 1.1810790300369263, "learning_rate": 2.728e-05, "loss": 0.4266, "step": 5459 }, { "epoch": 0.30574532422443723, "grad_norm": 1.2433044910430908, "learning_rate": 2.7285e-05, "loss": 0.4085, "step": 5460 }, { "epoch": 0.30580132153656625, "grad_norm": 1.2087873220443726, "learning_rate": 2.7289999999999998e-05, "loss": 0.4045, "step": 5461 }, { "epoch": 0.30585731884869527, "grad_norm": 1.1909713745117188, "learning_rate": 2.7295000000000005e-05, "loss": 0.4767, "step": 5462 }, { "epoch": 0.3059133161608243, "grad_norm": 1.2529692649841309, "learning_rate": 2.7300000000000003e-05, "loss": 0.3872, "step": 5463 }, { "epoch": 0.3059693134729533, "grad_norm": 1.424086093902588, "learning_rate": 2.7305000000000004e-05, "loss": 0.5771, "step": 5464 }, { "epoch": 0.3060253107850823, "grad_norm": 1.120654821395874, "learning_rate": 2.731e-05, "loss": 0.3698, "step": 5465 }, { "epoch": 0.30608130809721135, "grad_norm": 1.3193612098693848, "learning_rate": 2.7315000000000002e-05, "loss": 0.417, "step": 5466 }, { "epoch": 0.30613730540934037, "grad_norm": 1.2496871948242188, "learning_rate": 2.7320000000000003e-05, "loss": 0.4239, "step": 5467 }, { "epoch": 0.3061933027214694, "grad_norm": 1.2125095129013062, "learning_rate": 2.7325e-05, "loss": 0.3446, "step": 5468 }, { "epoch": 0.3062493000335984, "grad_norm": 1.3269721269607544, "learning_rate": 2.733e-05, "loss": 0.4778, "step": 5469 }, { "epoch": 0.3063052973457274, "grad_norm": 1.2976957559585571, "learning_rate": 2.7335e-05, "loss": 0.4254, "step": 5470 }, { "epoch": 0.30636129465785644, "grad_norm": 1.2981770038604736, "learning_rate": 2.734e-05, "loss": 0.4417, "step": 5471 }, { "epoch": 0.30641729196998546, "grad_norm": 1.0654748678207397, "learning_rate": 2.7345000000000003e-05, "loss": 0.3958, "step": 5472 }, { "epoch": 0.3064732892821145, "grad_norm": 1.086867094039917, "learning_rate": 2.7350000000000004e-05, "loss": 0.4133, "step": 5473 }, { "epoch": 0.3065292865942435, "grad_norm": 1.3270715475082397, "learning_rate": 2.7355e-05, "loss": 0.5076, "step": 5474 }, { "epoch": 0.3065852839063725, "grad_norm": 1.3053569793701172, "learning_rate": 2.7360000000000002e-05, "loss": 0.5096, "step": 5475 }, { "epoch": 0.30664128121850154, "grad_norm": 1.1925352811813354, "learning_rate": 2.7365000000000003e-05, "loss": 0.4149, "step": 5476 }, { "epoch": 0.30669727853063056, "grad_norm": 1.3278213739395142, "learning_rate": 2.737e-05, "loss": 0.4896, "step": 5477 }, { "epoch": 0.3067532758427595, "grad_norm": 1.2239010334014893, "learning_rate": 2.7375e-05, "loss": 0.529, "step": 5478 }, { "epoch": 0.30680927315488854, "grad_norm": 1.0836764574050903, "learning_rate": 2.738e-05, "loss": 0.3727, "step": 5479 }, { "epoch": 0.30686527046701756, "grad_norm": 1.2639875411987305, "learning_rate": 2.7385e-05, "loss": 0.4574, "step": 5480 }, { "epoch": 0.3069212677791466, "grad_norm": 1.3477859497070312, "learning_rate": 2.739e-05, "loss": 0.5604, "step": 5481 }, { "epoch": 0.3069772650912756, "grad_norm": 1.1119790077209473, "learning_rate": 2.7395000000000005e-05, "loss": 0.4352, "step": 5482 }, { "epoch": 0.3070332624034046, "grad_norm": 1.3080546855926514, "learning_rate": 2.7400000000000002e-05, "loss": 0.4564, "step": 5483 }, { "epoch": 0.30708925971553364, "grad_norm": 1.1802984476089478, "learning_rate": 2.7405000000000003e-05, "loss": 0.4341, "step": 5484 }, { "epoch": 0.30714525702766265, "grad_norm": 1.335644006729126, "learning_rate": 2.7410000000000004e-05, "loss": 0.6299, "step": 5485 }, { "epoch": 0.3072012543397917, "grad_norm": 1.1943070888519287, "learning_rate": 2.7415e-05, "loss": 0.4315, "step": 5486 }, { "epoch": 0.3072572516519207, "grad_norm": 1.4698803424835205, "learning_rate": 2.7420000000000002e-05, "loss": 0.4656, "step": 5487 }, { "epoch": 0.3073132489640497, "grad_norm": 1.4018625020980835, "learning_rate": 2.7425e-05, "loss": 0.3524, "step": 5488 }, { "epoch": 0.30736924627617873, "grad_norm": 1.2805378437042236, "learning_rate": 2.743e-05, "loss": 0.4766, "step": 5489 }, { "epoch": 0.30742524358830775, "grad_norm": 1.030600905418396, "learning_rate": 2.7435e-05, "loss": 0.365, "step": 5490 }, { "epoch": 0.30748124090043677, "grad_norm": 1.0351371765136719, "learning_rate": 2.7439999999999998e-05, "loss": 0.3058, "step": 5491 }, { "epoch": 0.3075372382125658, "grad_norm": 1.1799652576446533, "learning_rate": 2.7445000000000002e-05, "loss": 0.4724, "step": 5492 }, { "epoch": 0.3075932355246948, "grad_norm": 1.1889554262161255, "learning_rate": 2.7450000000000003e-05, "loss": 0.3925, "step": 5493 }, { "epoch": 0.3076492328368238, "grad_norm": 1.3212112188339233, "learning_rate": 2.7455000000000004e-05, "loss": 0.4217, "step": 5494 }, { "epoch": 0.30770523014895285, "grad_norm": 1.0993441343307495, "learning_rate": 2.746e-05, "loss": 0.3788, "step": 5495 }, { "epoch": 0.30776122746108187, "grad_norm": 1.5850281715393066, "learning_rate": 2.7465000000000002e-05, "loss": 0.4674, "step": 5496 }, { "epoch": 0.3078172247732109, "grad_norm": 1.312206506729126, "learning_rate": 2.7470000000000003e-05, "loss": 0.4479, "step": 5497 }, { "epoch": 0.3078732220853399, "grad_norm": 1.1603442430496216, "learning_rate": 2.7475e-05, "loss": 0.4297, "step": 5498 }, { "epoch": 0.3079292193974689, "grad_norm": 1.311500072479248, "learning_rate": 2.748e-05, "loss": 0.4327, "step": 5499 }, { "epoch": 0.30798521670959794, "grad_norm": 1.2915374040603638, "learning_rate": 2.7485e-05, "loss": 0.5015, "step": 5500 }, { "epoch": 0.30804121402172696, "grad_norm": 1.0879619121551514, "learning_rate": 2.749e-05, "loss": 0.3734, "step": 5501 }, { "epoch": 0.308097211333856, "grad_norm": 1.2090144157409668, "learning_rate": 2.7495000000000004e-05, "loss": 0.4948, "step": 5502 }, { "epoch": 0.308153208645985, "grad_norm": 1.6563881635665894, "learning_rate": 2.7500000000000004e-05, "loss": 0.4062, "step": 5503 }, { "epoch": 0.308209205958114, "grad_norm": 1.4525583982467651, "learning_rate": 2.7505000000000002e-05, "loss": 0.3904, "step": 5504 }, { "epoch": 0.30826520327024304, "grad_norm": 1.367253303527832, "learning_rate": 2.7510000000000003e-05, "loss": 0.3965, "step": 5505 }, { "epoch": 0.30832120058237206, "grad_norm": 1.3779171705245972, "learning_rate": 2.7515000000000003e-05, "loss": 0.4004, "step": 5506 }, { "epoch": 0.3083771978945011, "grad_norm": 1.220894694328308, "learning_rate": 2.752e-05, "loss": 0.4829, "step": 5507 }, { "epoch": 0.3084331952066301, "grad_norm": 1.0637099742889404, "learning_rate": 2.7525e-05, "loss": 0.3627, "step": 5508 }, { "epoch": 0.3084891925187591, "grad_norm": 1.20713210105896, "learning_rate": 2.753e-05, "loss": 0.3328, "step": 5509 }, { "epoch": 0.30854518983088813, "grad_norm": 1.1569844484329224, "learning_rate": 2.7535e-05, "loss": 0.5242, "step": 5510 }, { "epoch": 0.30860118714301715, "grad_norm": 1.340274453163147, "learning_rate": 2.754e-05, "loss": 0.4154, "step": 5511 }, { "epoch": 0.30865718445514617, "grad_norm": 1.2659398317337036, "learning_rate": 2.7544999999999998e-05, "loss": 0.368, "step": 5512 }, { "epoch": 0.3087131817672752, "grad_norm": 0.991502046585083, "learning_rate": 2.7550000000000002e-05, "loss": 0.3658, "step": 5513 }, { "epoch": 0.3087691790794042, "grad_norm": 1.056311845779419, "learning_rate": 2.7555000000000003e-05, "loss": 0.4026, "step": 5514 }, { "epoch": 0.30882517639153323, "grad_norm": 1.4860235452651978, "learning_rate": 2.7560000000000004e-05, "loss": 0.4851, "step": 5515 }, { "epoch": 0.30888117370366225, "grad_norm": 1.1402136087417603, "learning_rate": 2.7565e-05, "loss": 0.315, "step": 5516 }, { "epoch": 0.30893717101579127, "grad_norm": 1.2493200302124023, "learning_rate": 2.7570000000000002e-05, "loss": 0.467, "step": 5517 }, { "epoch": 0.3089931683279203, "grad_norm": 1.1811987161636353, "learning_rate": 2.7575e-05, "loss": 0.3523, "step": 5518 }, { "epoch": 0.30904916564004925, "grad_norm": 2.222553253173828, "learning_rate": 2.758e-05, "loss": 0.542, "step": 5519 }, { "epoch": 0.30910516295217827, "grad_norm": 1.4518640041351318, "learning_rate": 2.7585e-05, "loss": 0.4165, "step": 5520 }, { "epoch": 0.3091611602643073, "grad_norm": 1.4288419485092163, "learning_rate": 2.759e-05, "loss": 0.4544, "step": 5521 }, { "epoch": 0.3092171575764363, "grad_norm": 1.4100167751312256, "learning_rate": 2.7595e-05, "loss": 0.5303, "step": 5522 }, { "epoch": 0.3092731548885653, "grad_norm": 1.3606417179107666, "learning_rate": 2.7600000000000003e-05, "loss": 0.4296, "step": 5523 }, { "epoch": 0.30932915220069435, "grad_norm": 1.1433569192886353, "learning_rate": 2.7605000000000004e-05, "loss": 0.389, "step": 5524 }, { "epoch": 0.30938514951282337, "grad_norm": 1.2820134162902832, "learning_rate": 2.761e-05, "loss": 0.5378, "step": 5525 }, { "epoch": 0.3094411468249524, "grad_norm": 1.375495433807373, "learning_rate": 2.7615000000000002e-05, "loss": 0.4937, "step": 5526 }, { "epoch": 0.3094971441370814, "grad_norm": 1.3101216554641724, "learning_rate": 2.762e-05, "loss": 0.4153, "step": 5527 }, { "epoch": 0.3095531414492104, "grad_norm": 1.2939258813858032, "learning_rate": 2.7625e-05, "loss": 0.4385, "step": 5528 }, { "epoch": 0.30960913876133944, "grad_norm": 1.3447812795639038, "learning_rate": 2.763e-05, "loss": 0.5507, "step": 5529 }, { "epoch": 0.30966513607346846, "grad_norm": 1.0491257905960083, "learning_rate": 2.7635e-05, "loss": 0.388, "step": 5530 }, { "epoch": 0.3097211333855975, "grad_norm": 1.0880898237228394, "learning_rate": 2.764e-05, "loss": 0.4203, "step": 5531 }, { "epoch": 0.3097771306977265, "grad_norm": 1.275537371635437, "learning_rate": 2.7644999999999997e-05, "loss": 0.4122, "step": 5532 }, { "epoch": 0.3098331280098555, "grad_norm": 1.2868367433547974, "learning_rate": 2.7650000000000005e-05, "loss": 0.4093, "step": 5533 }, { "epoch": 0.30988912532198454, "grad_norm": 1.0064719915390015, "learning_rate": 2.7655000000000002e-05, "loss": 0.4153, "step": 5534 }, { "epoch": 0.30994512263411356, "grad_norm": 1.1891820430755615, "learning_rate": 2.7660000000000003e-05, "loss": 0.4099, "step": 5535 }, { "epoch": 0.3100011199462426, "grad_norm": 1.3737549781799316, "learning_rate": 2.7665000000000004e-05, "loss": 0.5673, "step": 5536 }, { "epoch": 0.3100571172583716, "grad_norm": 1.3942443132400513, "learning_rate": 2.767e-05, "loss": 0.4315, "step": 5537 }, { "epoch": 0.3101131145705006, "grad_norm": 1.105025291442871, "learning_rate": 2.7675000000000002e-05, "loss": 0.4325, "step": 5538 }, { "epoch": 0.31016911188262963, "grad_norm": 1.2726826667785645, "learning_rate": 2.768e-05, "loss": 0.419, "step": 5539 }, { "epoch": 0.31022510919475865, "grad_norm": 1.1925272941589355, "learning_rate": 2.7685e-05, "loss": 0.3958, "step": 5540 }, { "epoch": 0.31028110650688767, "grad_norm": 1.500717043876648, "learning_rate": 2.769e-05, "loss": 0.3668, "step": 5541 }, { "epoch": 0.3103371038190167, "grad_norm": 1.0971508026123047, "learning_rate": 2.7694999999999998e-05, "loss": 0.33, "step": 5542 }, { "epoch": 0.3103931011311457, "grad_norm": 1.1650553941726685, "learning_rate": 2.7700000000000002e-05, "loss": 0.4002, "step": 5543 }, { "epoch": 0.31044909844327473, "grad_norm": 1.1508090496063232, "learning_rate": 2.7705000000000003e-05, "loss": 0.5138, "step": 5544 }, { "epoch": 0.31050509575540375, "grad_norm": 1.3230493068695068, "learning_rate": 2.7710000000000004e-05, "loss": 0.5699, "step": 5545 }, { "epoch": 0.31056109306753277, "grad_norm": 1.0767821073532104, "learning_rate": 2.7715e-05, "loss": 0.4049, "step": 5546 }, { "epoch": 0.3106170903796618, "grad_norm": 1.247046947479248, "learning_rate": 2.7720000000000002e-05, "loss": 0.4443, "step": 5547 }, { "epoch": 0.3106730876917908, "grad_norm": 1.0499744415283203, "learning_rate": 2.7725e-05, "loss": 0.4459, "step": 5548 }, { "epoch": 0.3107290850039198, "grad_norm": 1.4015235900878906, "learning_rate": 2.773e-05, "loss": 0.5551, "step": 5549 }, { "epoch": 0.31078508231604884, "grad_norm": 1.2208417654037476, "learning_rate": 2.7735e-05, "loss": 0.4329, "step": 5550 }, { "epoch": 0.31084107962817786, "grad_norm": 1.5305595397949219, "learning_rate": 2.774e-05, "loss": 0.4818, "step": 5551 }, { "epoch": 0.3108970769403069, "grad_norm": 1.173004388809204, "learning_rate": 2.7745e-05, "loss": 0.4242, "step": 5552 }, { "epoch": 0.3109530742524359, "grad_norm": 1.2876002788543701, "learning_rate": 2.7750000000000004e-05, "loss": 0.36, "step": 5553 }, { "epoch": 0.3110090715645649, "grad_norm": 1.264246940612793, "learning_rate": 2.7755000000000004e-05, "loss": 0.3828, "step": 5554 }, { "epoch": 0.31106506887669394, "grad_norm": 1.0596487522125244, "learning_rate": 2.7760000000000002e-05, "loss": 0.3565, "step": 5555 }, { "epoch": 0.31112106618882296, "grad_norm": 1.1578136682510376, "learning_rate": 2.7765000000000003e-05, "loss": 0.4483, "step": 5556 }, { "epoch": 0.311177063500952, "grad_norm": 1.1922684907913208, "learning_rate": 2.777e-05, "loss": 0.3374, "step": 5557 }, { "epoch": 0.311233060813081, "grad_norm": 1.4355252981185913, "learning_rate": 2.7775e-05, "loss": 0.5503, "step": 5558 }, { "epoch": 0.31128905812521, "grad_norm": 1.160586953163147, "learning_rate": 2.778e-05, "loss": 0.3731, "step": 5559 }, { "epoch": 0.311345055437339, "grad_norm": 1.2258049249649048, "learning_rate": 2.7785e-05, "loss": 0.3638, "step": 5560 }, { "epoch": 0.311401052749468, "grad_norm": 1.42200767993927, "learning_rate": 2.779e-05, "loss": 0.4598, "step": 5561 }, { "epoch": 0.311457050061597, "grad_norm": 1.2269688844680786, "learning_rate": 2.7794999999999997e-05, "loss": 0.3328, "step": 5562 }, { "epoch": 0.31151304737372604, "grad_norm": 1.2529104948043823, "learning_rate": 2.7800000000000005e-05, "loss": 0.5513, "step": 5563 }, { "epoch": 0.31156904468585506, "grad_norm": 1.2473069429397583, "learning_rate": 2.7805000000000002e-05, "loss": 0.5143, "step": 5564 }, { "epoch": 0.3116250419979841, "grad_norm": 1.274416446685791, "learning_rate": 2.7810000000000003e-05, "loss": 0.4876, "step": 5565 }, { "epoch": 0.3116810393101131, "grad_norm": 1.2429513931274414, "learning_rate": 2.7815e-05, "loss": 0.4585, "step": 5566 }, { "epoch": 0.3117370366222421, "grad_norm": 1.2814960479736328, "learning_rate": 2.782e-05, "loss": 0.4002, "step": 5567 }, { "epoch": 0.31179303393437113, "grad_norm": 1.2824534177780151, "learning_rate": 2.7825000000000002e-05, "loss": 0.4641, "step": 5568 }, { "epoch": 0.31184903124650015, "grad_norm": 1.194035291671753, "learning_rate": 2.783e-05, "loss": 0.4814, "step": 5569 }, { "epoch": 0.31190502855862917, "grad_norm": 1.5696885585784912, "learning_rate": 2.7835e-05, "loss": 0.5196, "step": 5570 }, { "epoch": 0.3119610258707582, "grad_norm": 1.2227907180786133, "learning_rate": 2.7839999999999998e-05, "loss": 0.3883, "step": 5571 }, { "epoch": 0.3120170231828872, "grad_norm": 0.9320754408836365, "learning_rate": 2.7845e-05, "loss": 0.3771, "step": 5572 }, { "epoch": 0.31207302049501623, "grad_norm": 1.4009031057357788, "learning_rate": 2.7850000000000003e-05, "loss": 0.3545, "step": 5573 }, { "epoch": 0.31212901780714525, "grad_norm": 1.3426100015640259, "learning_rate": 2.7855000000000004e-05, "loss": 0.4465, "step": 5574 }, { "epoch": 0.31218501511927427, "grad_norm": 1.4786632061004639, "learning_rate": 2.7860000000000004e-05, "loss": 0.4563, "step": 5575 }, { "epoch": 0.3122410124314033, "grad_norm": 1.123327612876892, "learning_rate": 2.7865000000000002e-05, "loss": 0.4662, "step": 5576 }, { "epoch": 0.3122970097435323, "grad_norm": 1.3635311126708984, "learning_rate": 2.7870000000000003e-05, "loss": 0.5339, "step": 5577 }, { "epoch": 0.3123530070556613, "grad_norm": 7.306573390960693, "learning_rate": 2.7875e-05, "loss": 0.4075, "step": 5578 }, { "epoch": 0.31240900436779034, "grad_norm": 1.197943925857544, "learning_rate": 2.788e-05, "loss": 0.456, "step": 5579 }, { "epoch": 0.31246500167991936, "grad_norm": 1.428166151046753, "learning_rate": 2.7885e-05, "loss": 0.3866, "step": 5580 }, { "epoch": 0.3125209989920484, "grad_norm": 1.0873808860778809, "learning_rate": 2.789e-05, "loss": 0.3884, "step": 5581 }, { "epoch": 0.3125769963041774, "grad_norm": 1.3680000305175781, "learning_rate": 2.7895e-05, "loss": 0.4507, "step": 5582 }, { "epoch": 0.3126329936163064, "grad_norm": 1.1258691549301147, "learning_rate": 2.7900000000000004e-05, "loss": 0.4959, "step": 5583 }, { "epoch": 0.31268899092843544, "grad_norm": 1.0404407978057861, "learning_rate": 2.7905000000000005e-05, "loss": 0.4571, "step": 5584 }, { "epoch": 0.31274498824056446, "grad_norm": 1.3987343311309814, "learning_rate": 2.7910000000000002e-05, "loss": 0.4018, "step": 5585 }, { "epoch": 0.3128009855526935, "grad_norm": 1.401898980140686, "learning_rate": 2.7915000000000003e-05, "loss": 0.3769, "step": 5586 }, { "epoch": 0.3128569828648225, "grad_norm": 1.24025559425354, "learning_rate": 2.792e-05, "loss": 0.401, "step": 5587 }, { "epoch": 0.3129129801769515, "grad_norm": 1.172914743423462, "learning_rate": 2.7925e-05, "loss": 0.4351, "step": 5588 }, { "epoch": 0.31296897748908054, "grad_norm": 1.3653998374938965, "learning_rate": 2.7930000000000002e-05, "loss": 0.4432, "step": 5589 }, { "epoch": 0.31302497480120955, "grad_norm": 1.4646999835968018, "learning_rate": 2.7935e-05, "loss": 0.5552, "step": 5590 }, { "epoch": 0.3130809721133386, "grad_norm": 1.4121801853179932, "learning_rate": 2.794e-05, "loss": 0.391, "step": 5591 }, { "epoch": 0.3131369694254676, "grad_norm": 1.7955048084259033, "learning_rate": 2.7944999999999998e-05, "loss": 0.53, "step": 5592 }, { "epoch": 0.3131929667375966, "grad_norm": 1.044741153717041, "learning_rate": 2.7950000000000005e-05, "loss": 0.401, "step": 5593 }, { "epoch": 0.31324896404972563, "grad_norm": 1.3014229536056519, "learning_rate": 2.7955000000000003e-05, "loss": 0.4103, "step": 5594 }, { "epoch": 0.31330496136185465, "grad_norm": 1.2207649946212769, "learning_rate": 2.7960000000000003e-05, "loss": 0.4124, "step": 5595 }, { "epoch": 0.31336095867398367, "grad_norm": 1.241531252861023, "learning_rate": 2.7965e-05, "loss": 0.3804, "step": 5596 }, { "epoch": 0.3134169559861127, "grad_norm": 1.4336752891540527, "learning_rate": 2.797e-05, "loss": 0.4088, "step": 5597 }, { "epoch": 0.3134729532982417, "grad_norm": 1.685155987739563, "learning_rate": 2.7975000000000002e-05, "loss": 0.5316, "step": 5598 }, { "epoch": 0.3135289506103707, "grad_norm": 1.1184717416763306, "learning_rate": 2.798e-05, "loss": 0.3776, "step": 5599 }, { "epoch": 0.31358494792249975, "grad_norm": 1.1784001588821411, "learning_rate": 2.7985e-05, "loss": 0.3587, "step": 5600 }, { "epoch": 0.31364094523462877, "grad_norm": 1.4661425352096558, "learning_rate": 2.7989999999999998e-05, "loss": 0.628, "step": 5601 }, { "epoch": 0.31369694254675773, "grad_norm": 1.212763786315918, "learning_rate": 2.7995e-05, "loss": 0.3783, "step": 5602 }, { "epoch": 0.31375293985888675, "grad_norm": 1.6633150577545166, "learning_rate": 2.8000000000000003e-05, "loss": 0.4354, "step": 5603 }, { "epoch": 0.31380893717101577, "grad_norm": 1.237869381904602, "learning_rate": 2.8005000000000004e-05, "loss": 0.4162, "step": 5604 }, { "epoch": 0.3138649344831448, "grad_norm": 1.1660290956497192, "learning_rate": 2.8010000000000005e-05, "loss": 0.3373, "step": 5605 }, { "epoch": 0.3139209317952738, "grad_norm": 1.2090556621551514, "learning_rate": 2.8015000000000002e-05, "loss": 0.5203, "step": 5606 }, { "epoch": 0.3139769291074028, "grad_norm": 1.4198402166366577, "learning_rate": 2.8020000000000003e-05, "loss": 0.4898, "step": 5607 }, { "epoch": 0.31403292641953184, "grad_norm": 1.1494313478469849, "learning_rate": 2.8025e-05, "loss": 0.4086, "step": 5608 }, { "epoch": 0.31408892373166086, "grad_norm": 1.2049280405044556, "learning_rate": 2.803e-05, "loss": 0.3833, "step": 5609 }, { "epoch": 0.3141449210437899, "grad_norm": 1.3241567611694336, "learning_rate": 2.8035000000000002e-05, "loss": 0.5409, "step": 5610 }, { "epoch": 0.3142009183559189, "grad_norm": 1.1381441354751587, "learning_rate": 2.804e-05, "loss": 0.416, "step": 5611 }, { "epoch": 0.3142569156680479, "grad_norm": 1.4359488487243652, "learning_rate": 2.8045e-05, "loss": 0.4532, "step": 5612 }, { "epoch": 0.31431291298017694, "grad_norm": 1.5256915092468262, "learning_rate": 2.8050000000000004e-05, "loss": 0.5876, "step": 5613 }, { "epoch": 0.31436891029230596, "grad_norm": 1.111158847808838, "learning_rate": 2.8055000000000005e-05, "loss": 0.3589, "step": 5614 }, { "epoch": 0.314424907604435, "grad_norm": 1.328284740447998, "learning_rate": 2.8060000000000002e-05, "loss": 0.4164, "step": 5615 }, { "epoch": 0.314480904916564, "grad_norm": 1.2936842441558838, "learning_rate": 2.8065000000000003e-05, "loss": 0.6145, "step": 5616 }, { "epoch": 0.314536902228693, "grad_norm": 1.5181560516357422, "learning_rate": 2.807e-05, "loss": 0.4223, "step": 5617 }, { "epoch": 0.31459289954082204, "grad_norm": 1.1503472328186035, "learning_rate": 2.8075e-05, "loss": 0.4605, "step": 5618 }, { "epoch": 0.31464889685295105, "grad_norm": 1.248275637626648, "learning_rate": 2.8080000000000002e-05, "loss": 0.3591, "step": 5619 }, { "epoch": 0.3147048941650801, "grad_norm": 1.1627516746520996, "learning_rate": 2.8085e-05, "loss": 0.3392, "step": 5620 }, { "epoch": 0.3147608914772091, "grad_norm": 1.1381075382232666, "learning_rate": 2.809e-05, "loss": 0.4387, "step": 5621 }, { "epoch": 0.3148168887893381, "grad_norm": 1.2067393064498901, "learning_rate": 2.8094999999999998e-05, "loss": 0.4865, "step": 5622 }, { "epoch": 0.31487288610146713, "grad_norm": 1.7616690397262573, "learning_rate": 2.8100000000000005e-05, "loss": 0.4576, "step": 5623 }, { "epoch": 0.31492888341359615, "grad_norm": 1.2296456098556519, "learning_rate": 2.8105000000000003e-05, "loss": 0.403, "step": 5624 }, { "epoch": 0.31498488072572517, "grad_norm": 1.0594286918640137, "learning_rate": 2.8110000000000004e-05, "loss": 0.4521, "step": 5625 }, { "epoch": 0.3150408780378542, "grad_norm": 1.0713024139404297, "learning_rate": 2.8115e-05, "loss": 0.3054, "step": 5626 }, { "epoch": 0.3150968753499832, "grad_norm": 1.0795053243637085, "learning_rate": 2.8120000000000002e-05, "loss": 0.3588, "step": 5627 }, { "epoch": 0.3151528726621122, "grad_norm": 1.4612751007080078, "learning_rate": 2.8125000000000003e-05, "loss": 0.5633, "step": 5628 }, { "epoch": 0.31520886997424125, "grad_norm": 1.1460578441619873, "learning_rate": 2.813e-05, "loss": 0.3124, "step": 5629 }, { "epoch": 0.31526486728637027, "grad_norm": 1.154659628868103, "learning_rate": 2.8135e-05, "loss": 0.4332, "step": 5630 }, { "epoch": 0.3153208645984993, "grad_norm": 1.8693031072616577, "learning_rate": 2.8139999999999998e-05, "loss": 0.5789, "step": 5631 }, { "epoch": 0.3153768619106283, "grad_norm": 1.6017730236053467, "learning_rate": 2.8145e-05, "loss": 0.473, "step": 5632 }, { "epoch": 0.3154328592227573, "grad_norm": 4.376621246337891, "learning_rate": 2.815e-05, "loss": 0.4659, "step": 5633 }, { "epoch": 0.31548885653488634, "grad_norm": 1.125675916671753, "learning_rate": 2.8155000000000004e-05, "loss": 0.4019, "step": 5634 }, { "epoch": 0.31554485384701536, "grad_norm": 1.2513413429260254, "learning_rate": 2.816e-05, "loss": 0.4662, "step": 5635 }, { "epoch": 0.3156008511591444, "grad_norm": 0.9438375234603882, "learning_rate": 2.8165000000000002e-05, "loss": 0.3095, "step": 5636 }, { "epoch": 0.3156568484712734, "grad_norm": 1.113909363746643, "learning_rate": 2.8170000000000003e-05, "loss": 0.5185, "step": 5637 }, { "epoch": 0.3157128457834024, "grad_norm": 1.4468607902526855, "learning_rate": 2.8175e-05, "loss": 0.4859, "step": 5638 }, { "epoch": 0.31576884309553144, "grad_norm": 1.0653977394104004, "learning_rate": 2.818e-05, "loss": 0.3885, "step": 5639 }, { "epoch": 0.31582484040766046, "grad_norm": 1.389087438583374, "learning_rate": 2.8185e-05, "loss": 0.3641, "step": 5640 }, { "epoch": 0.3158808377197895, "grad_norm": 1.0898454189300537, "learning_rate": 2.819e-05, "loss": 0.3619, "step": 5641 }, { "epoch": 0.3159368350319185, "grad_norm": 1.2313554286956787, "learning_rate": 2.8195e-05, "loss": 0.3261, "step": 5642 }, { "epoch": 0.31599283234404746, "grad_norm": 1.072247862815857, "learning_rate": 2.8199999999999998e-05, "loss": 0.375, "step": 5643 }, { "epoch": 0.3160488296561765, "grad_norm": 1.2600675821304321, "learning_rate": 2.8205000000000005e-05, "loss": 0.3451, "step": 5644 }, { "epoch": 0.3161048269683055, "grad_norm": 1.0342261791229248, "learning_rate": 2.8210000000000003e-05, "loss": 0.3224, "step": 5645 }, { "epoch": 0.3161608242804345, "grad_norm": 1.348740816116333, "learning_rate": 2.8215000000000003e-05, "loss": 0.5904, "step": 5646 }, { "epoch": 0.31621682159256354, "grad_norm": 1.1729305982589722, "learning_rate": 2.822e-05, "loss": 0.3687, "step": 5647 }, { "epoch": 0.31627281890469255, "grad_norm": 1.4918018579483032, "learning_rate": 2.8225e-05, "loss": 0.5176, "step": 5648 }, { "epoch": 0.3163288162168216, "grad_norm": 1.546281337738037, "learning_rate": 2.8230000000000002e-05, "loss": 0.5422, "step": 5649 }, { "epoch": 0.3163848135289506, "grad_norm": 1.0162818431854248, "learning_rate": 2.8235e-05, "loss": 0.4217, "step": 5650 }, { "epoch": 0.3164408108410796, "grad_norm": 1.5955443382263184, "learning_rate": 2.824e-05, "loss": 0.4193, "step": 5651 }, { "epoch": 0.31649680815320863, "grad_norm": 1.1460846662521362, "learning_rate": 2.8244999999999998e-05, "loss": 0.3683, "step": 5652 }, { "epoch": 0.31655280546533765, "grad_norm": 1.2553859949111938, "learning_rate": 2.825e-05, "loss": 0.5352, "step": 5653 }, { "epoch": 0.31660880277746667, "grad_norm": 1.315063714981079, "learning_rate": 2.8255000000000003e-05, "loss": 0.3756, "step": 5654 }, { "epoch": 0.3166648000895957, "grad_norm": 1.206869125366211, "learning_rate": 2.8260000000000004e-05, "loss": 0.452, "step": 5655 }, { "epoch": 0.3167207974017247, "grad_norm": 1.225957989692688, "learning_rate": 2.8265e-05, "loss": 0.539, "step": 5656 }, { "epoch": 0.3167767947138537, "grad_norm": 1.1983261108398438, "learning_rate": 2.8270000000000002e-05, "loss": 0.4334, "step": 5657 }, { "epoch": 0.31683279202598275, "grad_norm": 1.1372549533843994, "learning_rate": 2.8275000000000003e-05, "loss": 0.2428, "step": 5658 }, { "epoch": 0.31688878933811176, "grad_norm": 1.3845407962799072, "learning_rate": 2.828e-05, "loss": 0.4571, "step": 5659 }, { "epoch": 0.3169447866502408, "grad_norm": 1.270957350730896, "learning_rate": 2.8285e-05, "loss": 0.3377, "step": 5660 }, { "epoch": 0.3170007839623698, "grad_norm": 1.497532606124878, "learning_rate": 2.829e-05, "loss": 0.3983, "step": 5661 }, { "epoch": 0.3170567812744988, "grad_norm": 1.2636052370071411, "learning_rate": 2.8295e-05, "loss": 0.5258, "step": 5662 }, { "epoch": 0.31711277858662784, "grad_norm": 1.3569202423095703, "learning_rate": 2.83e-05, "loss": 0.5553, "step": 5663 }, { "epoch": 0.31716877589875686, "grad_norm": 1.2245830297470093, "learning_rate": 2.8305000000000004e-05, "loss": 0.3707, "step": 5664 }, { "epoch": 0.3172247732108859, "grad_norm": 1.4354572296142578, "learning_rate": 2.8310000000000002e-05, "loss": 0.4876, "step": 5665 }, { "epoch": 0.3172807705230149, "grad_norm": 1.2246136665344238, "learning_rate": 2.8315000000000002e-05, "loss": 0.5164, "step": 5666 }, { "epoch": 0.3173367678351439, "grad_norm": 1.0793654918670654, "learning_rate": 2.8320000000000003e-05, "loss": 0.3274, "step": 5667 }, { "epoch": 0.31739276514727294, "grad_norm": 1.2143782377243042, "learning_rate": 2.8325e-05, "loss": 0.5446, "step": 5668 }, { "epoch": 0.31744876245940196, "grad_norm": 1.2183564901351929, "learning_rate": 2.833e-05, "loss": 0.495, "step": 5669 }, { "epoch": 0.317504759771531, "grad_norm": 1.2809075117111206, "learning_rate": 2.8335e-05, "loss": 0.487, "step": 5670 }, { "epoch": 0.31756075708366, "grad_norm": 1.1521151065826416, "learning_rate": 2.834e-05, "loss": 0.4489, "step": 5671 }, { "epoch": 0.317616754395789, "grad_norm": 1.0466850996017456, "learning_rate": 2.8345e-05, "loss": 0.3761, "step": 5672 }, { "epoch": 0.31767275170791803, "grad_norm": 1.2535468339920044, "learning_rate": 2.8349999999999998e-05, "loss": 0.3309, "step": 5673 }, { "epoch": 0.31772874902004705, "grad_norm": 1.3420789241790771, "learning_rate": 2.8355000000000002e-05, "loss": 0.4376, "step": 5674 }, { "epoch": 0.31778474633217607, "grad_norm": 1.7165738344192505, "learning_rate": 2.8360000000000003e-05, "loss": 0.4187, "step": 5675 }, { "epoch": 0.3178407436443051, "grad_norm": 1.2799228429794312, "learning_rate": 2.8365000000000004e-05, "loss": 0.5057, "step": 5676 }, { "epoch": 0.3178967409564341, "grad_norm": 1.2187089920043945, "learning_rate": 2.837e-05, "loss": 0.4693, "step": 5677 }, { "epoch": 0.31795273826856313, "grad_norm": 1.1975899934768677, "learning_rate": 2.8375000000000002e-05, "loss": 0.5128, "step": 5678 }, { "epoch": 0.31800873558069215, "grad_norm": 1.2510820627212524, "learning_rate": 2.8380000000000003e-05, "loss": 0.3987, "step": 5679 }, { "epoch": 0.31806473289282117, "grad_norm": 1.257769227027893, "learning_rate": 2.8385e-05, "loss": 0.4245, "step": 5680 }, { "epoch": 0.3181207302049502, "grad_norm": 1.138027310371399, "learning_rate": 2.839e-05, "loss": 0.4206, "step": 5681 }, { "epoch": 0.3181767275170792, "grad_norm": 1.360088586807251, "learning_rate": 2.8395e-05, "loss": 0.6143, "step": 5682 }, { "epoch": 0.3182327248292082, "grad_norm": 1.3086761236190796, "learning_rate": 2.84e-05, "loss": 0.4096, "step": 5683 }, { "epoch": 0.3182887221413372, "grad_norm": 1.507885217666626, "learning_rate": 2.8405000000000003e-05, "loss": 0.4123, "step": 5684 }, { "epoch": 0.3183447194534662, "grad_norm": 1.4379240274429321, "learning_rate": 2.8410000000000004e-05, "loss": 0.3721, "step": 5685 }, { "epoch": 0.3184007167655952, "grad_norm": 1.6149991750717163, "learning_rate": 2.8415e-05, "loss": 0.4733, "step": 5686 }, { "epoch": 0.31845671407772425, "grad_norm": 1.240517497062683, "learning_rate": 2.8420000000000002e-05, "loss": 0.4846, "step": 5687 }, { "epoch": 0.31851271138985326, "grad_norm": 1.319047451019287, "learning_rate": 2.8425000000000003e-05, "loss": 0.3765, "step": 5688 }, { "epoch": 0.3185687087019823, "grad_norm": 1.7602545022964478, "learning_rate": 2.843e-05, "loss": 0.476, "step": 5689 }, { "epoch": 0.3186247060141113, "grad_norm": 1.3483355045318604, "learning_rate": 2.8435e-05, "loss": 0.3854, "step": 5690 }, { "epoch": 0.3186807033262403, "grad_norm": 1.6905882358551025, "learning_rate": 2.844e-05, "loss": 0.5992, "step": 5691 }, { "epoch": 0.31873670063836934, "grad_norm": 1.2765854597091675, "learning_rate": 2.8445e-05, "loss": 0.4451, "step": 5692 }, { "epoch": 0.31879269795049836, "grad_norm": 1.7190817594528198, "learning_rate": 2.845e-05, "loss": 0.5721, "step": 5693 }, { "epoch": 0.3188486952626274, "grad_norm": 1.3409048318862915, "learning_rate": 2.8455000000000005e-05, "loss": 0.6477, "step": 5694 }, { "epoch": 0.3189046925747564, "grad_norm": 1.4718717336654663, "learning_rate": 2.8460000000000002e-05, "loss": 0.5517, "step": 5695 }, { "epoch": 0.3189606898868854, "grad_norm": 1.3900972604751587, "learning_rate": 2.8465000000000003e-05, "loss": 0.5744, "step": 5696 }, { "epoch": 0.31901668719901444, "grad_norm": 1.1296007633209229, "learning_rate": 2.8470000000000004e-05, "loss": 0.3525, "step": 5697 }, { "epoch": 0.31907268451114346, "grad_norm": 1.5070109367370605, "learning_rate": 2.8475e-05, "loss": 0.6085, "step": 5698 }, { "epoch": 0.3191286818232725, "grad_norm": 1.1078076362609863, "learning_rate": 2.8480000000000002e-05, "loss": 0.4084, "step": 5699 }, { "epoch": 0.3191846791354015, "grad_norm": 1.2499613761901855, "learning_rate": 2.8485e-05, "loss": 0.3357, "step": 5700 }, { "epoch": 0.3192406764475305, "grad_norm": 1.331945538520813, "learning_rate": 2.849e-05, "loss": 0.5819, "step": 5701 }, { "epoch": 0.31929667375965953, "grad_norm": 1.3197909593582153, "learning_rate": 2.8495e-05, "loss": 0.3637, "step": 5702 }, { "epoch": 0.31935267107178855, "grad_norm": 1.1270290613174438, "learning_rate": 2.8499999999999998e-05, "loss": 0.4476, "step": 5703 }, { "epoch": 0.31940866838391757, "grad_norm": 3.274280309677124, "learning_rate": 2.8505000000000002e-05, "loss": 0.4659, "step": 5704 }, { "epoch": 0.3194646656960466, "grad_norm": 1.2685467004776, "learning_rate": 2.8510000000000003e-05, "loss": 0.3911, "step": 5705 }, { "epoch": 0.3195206630081756, "grad_norm": 1.19549560546875, "learning_rate": 2.8515000000000004e-05, "loss": 0.4517, "step": 5706 }, { "epoch": 0.31957666032030463, "grad_norm": 1.320735216140747, "learning_rate": 2.852e-05, "loss": 0.4496, "step": 5707 }, { "epoch": 0.31963265763243365, "grad_norm": 1.380415678024292, "learning_rate": 2.8525000000000002e-05, "loss": 0.4642, "step": 5708 }, { "epoch": 0.31968865494456267, "grad_norm": 1.2442623376846313, "learning_rate": 2.853e-05, "loss": 0.465, "step": 5709 }, { "epoch": 0.3197446522566917, "grad_norm": 1.3137507438659668, "learning_rate": 2.8535e-05, "loss": 0.4749, "step": 5710 }, { "epoch": 0.3198006495688207, "grad_norm": 1.3102186918258667, "learning_rate": 2.854e-05, "loss": 0.4644, "step": 5711 }, { "epoch": 0.3198566468809497, "grad_norm": 1.0463544130325317, "learning_rate": 2.8545e-05, "loss": 0.3313, "step": 5712 }, { "epoch": 0.31991264419307874, "grad_norm": 1.3802485466003418, "learning_rate": 2.855e-05, "loss": 0.4341, "step": 5713 }, { "epoch": 0.31996864150520776, "grad_norm": 3.6623709201812744, "learning_rate": 2.8555000000000004e-05, "loss": 0.5099, "step": 5714 }, { "epoch": 0.3200246388173368, "grad_norm": 1.4516994953155518, "learning_rate": 2.8560000000000004e-05, "loss": 0.4278, "step": 5715 }, { "epoch": 0.3200806361294658, "grad_norm": 1.276397466659546, "learning_rate": 2.8565000000000002e-05, "loss": 0.4023, "step": 5716 }, { "epoch": 0.3201366334415948, "grad_norm": 1.2371009588241577, "learning_rate": 2.8570000000000003e-05, "loss": 0.445, "step": 5717 }, { "epoch": 0.32019263075372384, "grad_norm": 1.18202543258667, "learning_rate": 2.8575000000000003e-05, "loss": 0.5315, "step": 5718 }, { "epoch": 0.32024862806585286, "grad_norm": 1.3691107034683228, "learning_rate": 2.858e-05, "loss": 0.3928, "step": 5719 }, { "epoch": 0.3203046253779819, "grad_norm": 1.5681381225585938, "learning_rate": 2.8585e-05, "loss": 0.4085, "step": 5720 }, { "epoch": 0.3203606226901109, "grad_norm": 1.030142068862915, "learning_rate": 2.859e-05, "loss": 0.3923, "step": 5721 }, { "epoch": 0.3204166200022399, "grad_norm": 1.2838459014892578, "learning_rate": 2.8595e-05, "loss": 0.5295, "step": 5722 }, { "epoch": 0.32047261731436893, "grad_norm": 1.3073571920394897, "learning_rate": 2.86e-05, "loss": 0.4917, "step": 5723 }, { "epoch": 0.32052861462649795, "grad_norm": 1.3806993961334229, "learning_rate": 2.8605000000000005e-05, "loss": 0.5803, "step": 5724 }, { "epoch": 0.320584611938627, "grad_norm": 1.4577785730361938, "learning_rate": 2.8610000000000002e-05, "loss": 0.4319, "step": 5725 }, { "epoch": 0.32064060925075594, "grad_norm": 1.2400891780853271, "learning_rate": 2.8615000000000003e-05, "loss": 0.5681, "step": 5726 }, { "epoch": 0.32069660656288496, "grad_norm": 1.2736188173294067, "learning_rate": 2.8620000000000004e-05, "loss": 0.496, "step": 5727 }, { "epoch": 0.320752603875014, "grad_norm": 1.7031136751174927, "learning_rate": 2.8625e-05, "loss": 0.314, "step": 5728 }, { "epoch": 0.320808601187143, "grad_norm": 1.4622594118118286, "learning_rate": 2.8630000000000002e-05, "loss": 0.4218, "step": 5729 }, { "epoch": 0.320864598499272, "grad_norm": 1.282241702079773, "learning_rate": 2.8635e-05, "loss": 0.5403, "step": 5730 }, { "epoch": 0.32092059581140103, "grad_norm": 1.379214882850647, "learning_rate": 2.864e-05, "loss": 0.3999, "step": 5731 }, { "epoch": 0.32097659312353005, "grad_norm": 1.029502272605896, "learning_rate": 2.8645e-05, "loss": 0.3157, "step": 5732 }, { "epoch": 0.32103259043565907, "grad_norm": 1.2138969898223877, "learning_rate": 2.865e-05, "loss": 0.4902, "step": 5733 }, { "epoch": 0.3210885877477881, "grad_norm": 1.1390117406845093, "learning_rate": 2.8655000000000003e-05, "loss": 0.3516, "step": 5734 }, { "epoch": 0.3211445850599171, "grad_norm": 1.102679967880249, "learning_rate": 2.8660000000000003e-05, "loss": 0.4028, "step": 5735 }, { "epoch": 0.32120058237204613, "grad_norm": 1.5025776624679565, "learning_rate": 2.8665000000000004e-05, "loss": 0.5073, "step": 5736 }, { "epoch": 0.32125657968417515, "grad_norm": 2.1668403148651123, "learning_rate": 2.867e-05, "loss": 0.4717, "step": 5737 }, { "epoch": 0.32131257699630417, "grad_norm": 1.0500065088272095, "learning_rate": 2.8675000000000002e-05, "loss": 0.4076, "step": 5738 }, { "epoch": 0.3213685743084332, "grad_norm": 1.29715096950531, "learning_rate": 2.868e-05, "loss": 0.5435, "step": 5739 }, { "epoch": 0.3214245716205622, "grad_norm": 2.245598316192627, "learning_rate": 2.8685e-05, "loss": 0.4601, "step": 5740 }, { "epoch": 0.3214805689326912, "grad_norm": 1.4639257192611694, "learning_rate": 2.869e-05, "loss": 0.3894, "step": 5741 }, { "epoch": 0.32153656624482024, "grad_norm": 1.1997876167297363, "learning_rate": 2.8695e-05, "loss": 0.3474, "step": 5742 }, { "epoch": 0.32159256355694926, "grad_norm": 1.290321707725525, "learning_rate": 2.87e-05, "loss": 0.4773, "step": 5743 }, { "epoch": 0.3216485608690783, "grad_norm": 1.3255780935287476, "learning_rate": 2.8705000000000004e-05, "loss": 0.54, "step": 5744 }, { "epoch": 0.3217045581812073, "grad_norm": 5.353636264801025, "learning_rate": 2.8710000000000005e-05, "loss": 0.4118, "step": 5745 }, { "epoch": 0.3217605554933363, "grad_norm": 1.1815154552459717, "learning_rate": 2.8715000000000002e-05, "loss": 0.327, "step": 5746 }, { "epoch": 0.32181655280546534, "grad_norm": 1.3262768983840942, "learning_rate": 2.8720000000000003e-05, "loss": 0.4177, "step": 5747 }, { "epoch": 0.32187255011759436, "grad_norm": 1.1953827142715454, "learning_rate": 2.8725e-05, "loss": 0.4005, "step": 5748 }, { "epoch": 0.3219285474297234, "grad_norm": 1.3174980878829956, "learning_rate": 2.873e-05, "loss": 0.4097, "step": 5749 }, { "epoch": 0.3219845447418524, "grad_norm": 1.2677258253097534, "learning_rate": 2.8735000000000002e-05, "loss": 0.3018, "step": 5750 }, { "epoch": 0.3220405420539814, "grad_norm": 1.2160332202911377, "learning_rate": 2.874e-05, "loss": 0.5729, "step": 5751 }, { "epoch": 0.32209653936611043, "grad_norm": 1.1369692087173462, "learning_rate": 2.8745e-05, "loss": 0.387, "step": 5752 }, { "epoch": 0.32215253667823945, "grad_norm": 1.3775033950805664, "learning_rate": 2.8749999999999997e-05, "loss": 0.5746, "step": 5753 }, { "epoch": 0.3222085339903685, "grad_norm": 1.3365129232406616, "learning_rate": 2.8754999999999998e-05, "loss": 0.4738, "step": 5754 }, { "epoch": 0.3222645313024975, "grad_norm": 1.5569610595703125, "learning_rate": 2.8760000000000002e-05, "loss": 0.4895, "step": 5755 }, { "epoch": 0.3223205286146265, "grad_norm": 1.3198693990707397, "learning_rate": 2.8765000000000003e-05, "loss": 0.3842, "step": 5756 }, { "epoch": 0.32237652592675553, "grad_norm": 1.2834845781326294, "learning_rate": 2.8770000000000004e-05, "loss": 0.4862, "step": 5757 }, { "epoch": 0.32243252323888455, "grad_norm": 1.2282882928848267, "learning_rate": 2.8775e-05, "loss": 0.3435, "step": 5758 }, { "epoch": 0.32248852055101357, "grad_norm": 1.120464563369751, "learning_rate": 2.8780000000000002e-05, "loss": 0.3643, "step": 5759 }, { "epoch": 0.3225445178631426, "grad_norm": 1.117025375366211, "learning_rate": 2.8785e-05, "loss": 0.4665, "step": 5760 }, { "epoch": 0.3226005151752716, "grad_norm": 1.0791250467300415, "learning_rate": 2.879e-05, "loss": 0.3406, "step": 5761 }, { "epoch": 0.3226565124874006, "grad_norm": 1.3383996486663818, "learning_rate": 2.8795e-05, "loss": 0.3864, "step": 5762 }, { "epoch": 0.32271250979952965, "grad_norm": 1.1708077192306519, "learning_rate": 2.88e-05, "loss": 0.3926, "step": 5763 }, { "epoch": 0.32276850711165866, "grad_norm": 1.342558741569519, "learning_rate": 2.8805e-05, "loss": 0.3435, "step": 5764 }, { "epoch": 0.3228245044237877, "grad_norm": 1.3170289993286133, "learning_rate": 2.8810000000000004e-05, "loss": 0.4149, "step": 5765 }, { "epoch": 0.3228805017359167, "grad_norm": 1.3614591360092163, "learning_rate": 2.8815000000000004e-05, "loss": 0.3713, "step": 5766 }, { "epoch": 0.32293649904804567, "grad_norm": 1.175022840499878, "learning_rate": 2.8820000000000002e-05, "loss": 0.4052, "step": 5767 }, { "epoch": 0.3229924963601747, "grad_norm": 0.9799365997314453, "learning_rate": 2.8825000000000003e-05, "loss": 0.2728, "step": 5768 }, { "epoch": 0.3230484936723037, "grad_norm": 1.7954214811325073, "learning_rate": 2.883e-05, "loss": 0.5044, "step": 5769 }, { "epoch": 0.3231044909844327, "grad_norm": 1.3334827423095703, "learning_rate": 2.8835e-05, "loss": 0.4983, "step": 5770 }, { "epoch": 0.32316048829656174, "grad_norm": 1.1819570064544678, "learning_rate": 2.8840000000000002e-05, "loss": 0.4125, "step": 5771 }, { "epoch": 0.32321648560869076, "grad_norm": 1.1033072471618652, "learning_rate": 2.8845e-05, "loss": 0.3211, "step": 5772 }, { "epoch": 0.3232724829208198, "grad_norm": 1.1614561080932617, "learning_rate": 2.885e-05, "loss": 0.5159, "step": 5773 }, { "epoch": 0.3233284802329488, "grad_norm": 1.4699541330337524, "learning_rate": 2.8854999999999997e-05, "loss": 0.6175, "step": 5774 }, { "epoch": 0.3233844775450778, "grad_norm": 0.9454047083854675, "learning_rate": 2.8860000000000005e-05, "loss": 0.3131, "step": 5775 }, { "epoch": 0.32344047485720684, "grad_norm": 1.3530412912368774, "learning_rate": 2.8865000000000002e-05, "loss": 0.4906, "step": 5776 }, { "epoch": 0.32349647216933586, "grad_norm": 1.1419742107391357, "learning_rate": 2.8870000000000003e-05, "loss": 0.4197, "step": 5777 }, { "epoch": 0.3235524694814649, "grad_norm": 1.2685377597808838, "learning_rate": 2.8875e-05, "loss": 0.4257, "step": 5778 }, { "epoch": 0.3236084667935939, "grad_norm": 1.2069966793060303, "learning_rate": 2.888e-05, "loss": 0.479, "step": 5779 }, { "epoch": 0.3236644641057229, "grad_norm": 1.2854137420654297, "learning_rate": 2.8885000000000002e-05, "loss": 0.2771, "step": 5780 }, { "epoch": 0.32372046141785193, "grad_norm": 1.4132953882217407, "learning_rate": 2.889e-05, "loss": 0.5198, "step": 5781 }, { "epoch": 0.32377645872998095, "grad_norm": 1.051746129989624, "learning_rate": 2.8895e-05, "loss": 0.3799, "step": 5782 }, { "epoch": 0.32383245604211, "grad_norm": 1.1334714889526367, "learning_rate": 2.8899999999999998e-05, "loss": 0.3857, "step": 5783 }, { "epoch": 0.323888453354239, "grad_norm": 1.6783074140548706, "learning_rate": 2.8905e-05, "loss": 0.5421, "step": 5784 }, { "epoch": 0.323944450666368, "grad_norm": 1.5858482122421265, "learning_rate": 2.8910000000000003e-05, "loss": 0.5914, "step": 5785 }, { "epoch": 0.32400044797849703, "grad_norm": 1.5772172212600708, "learning_rate": 2.8915000000000004e-05, "loss": 0.4384, "step": 5786 }, { "epoch": 0.32405644529062605, "grad_norm": 1.386919379234314, "learning_rate": 2.8920000000000004e-05, "loss": 0.5433, "step": 5787 }, { "epoch": 0.32411244260275507, "grad_norm": 1.2350701093673706, "learning_rate": 2.8925000000000002e-05, "loss": 0.2706, "step": 5788 }, { "epoch": 0.3241684399148841, "grad_norm": 1.2195590734481812, "learning_rate": 2.8930000000000003e-05, "loss": 0.4213, "step": 5789 }, { "epoch": 0.3242244372270131, "grad_norm": 1.4639583826065063, "learning_rate": 2.8935e-05, "loss": 0.5474, "step": 5790 }, { "epoch": 0.3242804345391421, "grad_norm": 1.1940836906433105, "learning_rate": 2.894e-05, "loss": 0.489, "step": 5791 }, { "epoch": 0.32433643185127115, "grad_norm": 1.021995186805725, "learning_rate": 2.8945e-05, "loss": 0.3867, "step": 5792 }, { "epoch": 0.32439242916340016, "grad_norm": 1.2153525352478027, "learning_rate": 2.895e-05, "loss": 0.3555, "step": 5793 }, { "epoch": 0.3244484264755292, "grad_norm": 1.187088966369629, "learning_rate": 2.8955e-05, "loss": 0.4573, "step": 5794 }, { "epoch": 0.3245044237876582, "grad_norm": 6.31917142868042, "learning_rate": 2.8960000000000004e-05, "loss": 0.3281, "step": 5795 }, { "epoch": 0.3245604210997872, "grad_norm": 1.2404654026031494, "learning_rate": 2.8965000000000005e-05, "loss": 0.372, "step": 5796 }, { "epoch": 0.32461641841191624, "grad_norm": 1.165102243423462, "learning_rate": 2.8970000000000002e-05, "loss": 0.3417, "step": 5797 }, { "epoch": 0.32467241572404526, "grad_norm": 1.2347089052200317, "learning_rate": 2.8975000000000003e-05, "loss": 0.4466, "step": 5798 }, { "epoch": 0.3247284130361743, "grad_norm": 1.1590970754623413, "learning_rate": 2.898e-05, "loss": 0.4122, "step": 5799 }, { "epoch": 0.3247844103483033, "grad_norm": 1.2014491558074951, "learning_rate": 2.8985e-05, "loss": 0.5527, "step": 5800 }, { "epoch": 0.3248404076604323, "grad_norm": 1.2441595792770386, "learning_rate": 2.8990000000000002e-05, "loss": 0.6121, "step": 5801 }, { "epoch": 0.32489640497256134, "grad_norm": 0.9242920875549316, "learning_rate": 2.8995e-05, "loss": 0.3175, "step": 5802 }, { "epoch": 0.32495240228469036, "grad_norm": 1.5692812204360962, "learning_rate": 2.9e-05, "loss": 0.6651, "step": 5803 }, { "epoch": 0.3250083995968194, "grad_norm": 1.4126709699630737, "learning_rate": 2.9004999999999998e-05, "loss": 0.3985, "step": 5804 }, { "epoch": 0.3250643969089484, "grad_norm": 1.0389025211334229, "learning_rate": 2.9010000000000005e-05, "loss": 0.3518, "step": 5805 }, { "epoch": 0.3251203942210774, "grad_norm": 1.4933347702026367, "learning_rate": 2.9015000000000003e-05, "loss": 0.4727, "step": 5806 }, { "epoch": 0.32517639153320643, "grad_norm": 1.28818941116333, "learning_rate": 2.9020000000000003e-05, "loss": 0.4388, "step": 5807 }, { "epoch": 0.3252323888453354, "grad_norm": 1.238250494003296, "learning_rate": 2.9025e-05, "loss": 0.5254, "step": 5808 }, { "epoch": 0.3252883861574644, "grad_norm": 5.895007610321045, "learning_rate": 2.903e-05, "loss": 0.3471, "step": 5809 }, { "epoch": 0.32534438346959343, "grad_norm": 1.2327629327774048, "learning_rate": 2.9035000000000002e-05, "loss": 0.3286, "step": 5810 }, { "epoch": 0.32540038078172245, "grad_norm": 1.2726235389709473, "learning_rate": 2.904e-05, "loss": 0.474, "step": 5811 }, { "epoch": 0.3254563780938515, "grad_norm": 1.2369911670684814, "learning_rate": 2.9045e-05, "loss": 0.4916, "step": 5812 }, { "epoch": 0.3255123754059805, "grad_norm": 1.1697537899017334, "learning_rate": 2.9049999999999998e-05, "loss": 0.4745, "step": 5813 }, { "epoch": 0.3255683727181095, "grad_norm": 1.4516698122024536, "learning_rate": 2.9055e-05, "loss": 0.4536, "step": 5814 }, { "epoch": 0.32562437003023853, "grad_norm": 1.222223162651062, "learning_rate": 2.9060000000000003e-05, "loss": 0.455, "step": 5815 }, { "epoch": 0.32568036734236755, "grad_norm": 1.2108324766159058, "learning_rate": 2.9065000000000004e-05, "loss": 0.4331, "step": 5816 }, { "epoch": 0.32573636465449657, "grad_norm": 1.1987415552139282, "learning_rate": 2.907e-05, "loss": 0.4094, "step": 5817 }, { "epoch": 0.3257923619666256, "grad_norm": 1.2771337032318115, "learning_rate": 2.9075000000000002e-05, "loss": 0.4081, "step": 5818 }, { "epoch": 0.3258483592787546, "grad_norm": 1.3818776607513428, "learning_rate": 2.9080000000000003e-05, "loss": 0.5237, "step": 5819 }, { "epoch": 0.3259043565908836, "grad_norm": 1.0041522979736328, "learning_rate": 2.9085e-05, "loss": 0.4008, "step": 5820 }, { "epoch": 0.32596035390301265, "grad_norm": 1.1087275743484497, "learning_rate": 2.909e-05, "loss": 0.3765, "step": 5821 }, { "epoch": 0.32601635121514166, "grad_norm": 1.0786428451538086, "learning_rate": 2.9095e-05, "loss": 0.4555, "step": 5822 }, { "epoch": 0.3260723485272707, "grad_norm": 1.2589988708496094, "learning_rate": 2.91e-05, "loss": 0.3989, "step": 5823 }, { "epoch": 0.3261283458393997, "grad_norm": 1.1303046941757202, "learning_rate": 2.9105e-05, "loss": 0.4542, "step": 5824 }, { "epoch": 0.3261843431515287, "grad_norm": 1.238532543182373, "learning_rate": 2.9110000000000004e-05, "loss": 0.4887, "step": 5825 }, { "epoch": 0.32624034046365774, "grad_norm": 1.1298034191131592, "learning_rate": 2.9115000000000005e-05, "loss": 0.3971, "step": 5826 }, { "epoch": 0.32629633777578676, "grad_norm": 1.25812828540802, "learning_rate": 2.9120000000000002e-05, "loss": 0.3751, "step": 5827 }, { "epoch": 0.3263523350879158, "grad_norm": 2.0425925254821777, "learning_rate": 2.9125000000000003e-05, "loss": 0.6093, "step": 5828 }, { "epoch": 0.3264083324000448, "grad_norm": 1.1871418952941895, "learning_rate": 2.913e-05, "loss": 0.3733, "step": 5829 }, { "epoch": 0.3264643297121738, "grad_norm": 1.1049633026123047, "learning_rate": 2.9135e-05, "loss": 0.3699, "step": 5830 }, { "epoch": 0.32652032702430284, "grad_norm": 1.097481369972229, "learning_rate": 2.9140000000000002e-05, "loss": 0.3666, "step": 5831 }, { "epoch": 0.32657632433643186, "grad_norm": 1.218788743019104, "learning_rate": 2.9145e-05, "loss": 0.4946, "step": 5832 }, { "epoch": 0.3266323216485609, "grad_norm": 1.073500394821167, "learning_rate": 2.915e-05, "loss": 0.4256, "step": 5833 }, { "epoch": 0.3266883189606899, "grad_norm": 1.147639513015747, "learning_rate": 2.9154999999999998e-05, "loss": 0.4234, "step": 5834 }, { "epoch": 0.3267443162728189, "grad_norm": 1.3192564249038696, "learning_rate": 2.9160000000000005e-05, "loss": 0.398, "step": 5835 }, { "epoch": 0.32680031358494793, "grad_norm": 1.2515380382537842, "learning_rate": 2.9165000000000003e-05, "loss": 0.3749, "step": 5836 }, { "epoch": 0.32685631089707695, "grad_norm": 1.0096896886825562, "learning_rate": 2.9170000000000004e-05, "loss": 0.377, "step": 5837 }, { "epoch": 0.32691230820920597, "grad_norm": 1.4385930299758911, "learning_rate": 2.9175e-05, "loss": 0.42, "step": 5838 }, { "epoch": 0.326968305521335, "grad_norm": 1.1319615840911865, "learning_rate": 2.9180000000000002e-05, "loss": 0.4285, "step": 5839 }, { "epoch": 0.327024302833464, "grad_norm": 1.2909079790115356, "learning_rate": 2.9185000000000003e-05, "loss": 0.3371, "step": 5840 }, { "epoch": 0.32708030014559303, "grad_norm": 1.3051002025604248, "learning_rate": 2.919e-05, "loss": 0.4579, "step": 5841 }, { "epoch": 0.32713629745772205, "grad_norm": 1.2702544927597046, "learning_rate": 2.9195e-05, "loss": 0.4637, "step": 5842 }, { "epoch": 0.32719229476985107, "grad_norm": 1.6683790683746338, "learning_rate": 2.9199999999999998e-05, "loss": 0.3962, "step": 5843 }, { "epoch": 0.3272482920819801, "grad_norm": 1.2359310388565063, "learning_rate": 2.9205e-05, "loss": 0.4835, "step": 5844 }, { "epoch": 0.3273042893941091, "grad_norm": 1.5957162380218506, "learning_rate": 2.9210000000000003e-05, "loss": 0.529, "step": 5845 }, { "epoch": 0.3273602867062381, "grad_norm": 1.5851261615753174, "learning_rate": 2.9215000000000004e-05, "loss": 0.6152, "step": 5846 }, { "epoch": 0.32741628401836714, "grad_norm": 1.1549354791641235, "learning_rate": 2.922e-05, "loss": 0.5002, "step": 5847 }, { "epoch": 0.32747228133049616, "grad_norm": 1.7438483238220215, "learning_rate": 2.9225000000000002e-05, "loss": 0.5194, "step": 5848 }, { "epoch": 0.3275282786426252, "grad_norm": 1.0823826789855957, "learning_rate": 2.9230000000000003e-05, "loss": 0.3454, "step": 5849 }, { "epoch": 0.32758427595475414, "grad_norm": 1.1117078065872192, "learning_rate": 2.9235e-05, "loss": 0.3429, "step": 5850 }, { "epoch": 0.32764027326688316, "grad_norm": 1.3410907983779907, "learning_rate": 2.924e-05, "loss": 0.4798, "step": 5851 }, { "epoch": 0.3276962705790122, "grad_norm": 1.2882479429244995, "learning_rate": 2.9245e-05, "loss": 0.4065, "step": 5852 }, { "epoch": 0.3277522678911412, "grad_norm": 1.0705493688583374, "learning_rate": 2.925e-05, "loss": 0.4158, "step": 5853 }, { "epoch": 0.3278082652032702, "grad_norm": 1.15053129196167, "learning_rate": 2.9255e-05, "loss": 0.44, "step": 5854 }, { "epoch": 0.32786426251539924, "grad_norm": 1.1476150751113892, "learning_rate": 2.9260000000000004e-05, "loss": 0.4863, "step": 5855 }, { "epoch": 0.32792025982752826, "grad_norm": 1.0913232564926147, "learning_rate": 2.9265000000000002e-05, "loss": 0.3419, "step": 5856 }, { "epoch": 0.3279762571396573, "grad_norm": 1.147476315498352, "learning_rate": 2.9270000000000003e-05, "loss": 0.524, "step": 5857 }, { "epoch": 0.3280322544517863, "grad_norm": 1.0973833799362183, "learning_rate": 2.9275000000000003e-05, "loss": 0.4205, "step": 5858 }, { "epoch": 0.3280882517639153, "grad_norm": 1.4630779027938843, "learning_rate": 2.928e-05, "loss": 0.5707, "step": 5859 }, { "epoch": 0.32814424907604434, "grad_norm": 1.075952172279358, "learning_rate": 2.9285e-05, "loss": 0.276, "step": 5860 }, { "epoch": 0.32820024638817336, "grad_norm": 1.5002340078353882, "learning_rate": 2.929e-05, "loss": 0.4981, "step": 5861 }, { "epoch": 0.3282562437003024, "grad_norm": 0.9377226233482361, "learning_rate": 2.9295e-05, "loss": 0.3876, "step": 5862 }, { "epoch": 0.3283122410124314, "grad_norm": 1.3477425575256348, "learning_rate": 2.93e-05, "loss": 0.4883, "step": 5863 }, { "epoch": 0.3283682383245604, "grad_norm": 1.6481932401657104, "learning_rate": 2.9304999999999998e-05, "loss": 0.5014, "step": 5864 }, { "epoch": 0.32842423563668943, "grad_norm": 1.1377789974212646, "learning_rate": 2.9310000000000006e-05, "loss": 0.3973, "step": 5865 }, { "epoch": 0.32848023294881845, "grad_norm": 1.1514617204666138, "learning_rate": 2.9315000000000003e-05, "loss": 0.4386, "step": 5866 }, { "epoch": 0.32853623026094747, "grad_norm": 1.2926123142242432, "learning_rate": 2.9320000000000004e-05, "loss": 0.4478, "step": 5867 }, { "epoch": 0.3285922275730765, "grad_norm": 1.2897146940231323, "learning_rate": 2.9325e-05, "loss": 0.3578, "step": 5868 }, { "epoch": 0.3286482248852055, "grad_norm": 1.2622413635253906, "learning_rate": 2.9330000000000002e-05, "loss": 0.4091, "step": 5869 }, { "epoch": 0.32870422219733453, "grad_norm": 1.2460603713989258, "learning_rate": 2.9335000000000003e-05, "loss": 0.3957, "step": 5870 }, { "epoch": 0.32876021950946355, "grad_norm": 1.0655896663665771, "learning_rate": 2.934e-05, "loss": 0.4109, "step": 5871 }, { "epoch": 0.32881621682159257, "grad_norm": 1.2387356758117676, "learning_rate": 2.9345e-05, "loss": 0.3508, "step": 5872 }, { "epoch": 0.3288722141337216, "grad_norm": 1.2638161182403564, "learning_rate": 2.935e-05, "loss": 0.4697, "step": 5873 }, { "epoch": 0.3289282114458506, "grad_norm": 1.4944034814834595, "learning_rate": 2.9355e-05, "loss": 0.4519, "step": 5874 }, { "epoch": 0.3289842087579796, "grad_norm": 1.0900880098342896, "learning_rate": 2.9360000000000003e-05, "loss": 0.4708, "step": 5875 }, { "epoch": 0.32904020607010864, "grad_norm": 1.1817508935928345, "learning_rate": 2.9365000000000004e-05, "loss": 0.4162, "step": 5876 }, { "epoch": 0.32909620338223766, "grad_norm": 1.2182482481002808, "learning_rate": 2.9370000000000002e-05, "loss": 0.4709, "step": 5877 }, { "epoch": 0.3291522006943667, "grad_norm": 1.1931767463684082, "learning_rate": 2.9375000000000003e-05, "loss": 0.3835, "step": 5878 }, { "epoch": 0.3292081980064957, "grad_norm": 1.245471715927124, "learning_rate": 2.9380000000000003e-05, "loss": 0.4692, "step": 5879 }, { "epoch": 0.3292641953186247, "grad_norm": 1.108049750328064, "learning_rate": 2.9385e-05, "loss": 0.5946, "step": 5880 }, { "epoch": 0.32932019263075374, "grad_norm": 1.0938096046447754, "learning_rate": 2.939e-05, "loss": 0.3256, "step": 5881 }, { "epoch": 0.32937618994288276, "grad_norm": 1.4213799238204956, "learning_rate": 2.9395e-05, "loss": 0.3809, "step": 5882 }, { "epoch": 0.3294321872550118, "grad_norm": 0.97994464635849, "learning_rate": 2.94e-05, "loss": 0.2664, "step": 5883 }, { "epoch": 0.3294881845671408, "grad_norm": 1.332228183746338, "learning_rate": 2.9405e-05, "loss": 0.4286, "step": 5884 }, { "epoch": 0.3295441818792698, "grad_norm": 1.1238287687301636, "learning_rate": 2.9409999999999998e-05, "loss": 0.3891, "step": 5885 }, { "epoch": 0.32960017919139883, "grad_norm": 1.1983394622802734, "learning_rate": 2.9415000000000002e-05, "loss": 0.4452, "step": 5886 }, { "epoch": 0.32965617650352785, "grad_norm": 1.1007509231567383, "learning_rate": 2.9420000000000003e-05, "loss": 0.3467, "step": 5887 }, { "epoch": 0.3297121738156569, "grad_norm": 1.1530849933624268, "learning_rate": 2.9425000000000004e-05, "loss": 0.3385, "step": 5888 }, { "epoch": 0.3297681711277859, "grad_norm": 1.2139053344726562, "learning_rate": 2.943e-05, "loss": 0.3979, "step": 5889 }, { "epoch": 0.3298241684399149, "grad_norm": 1.1592355966567993, "learning_rate": 2.9435000000000002e-05, "loss": 0.4563, "step": 5890 }, { "epoch": 0.3298801657520439, "grad_norm": 1.191756248474121, "learning_rate": 2.944e-05, "loss": 0.3815, "step": 5891 }, { "epoch": 0.3299361630641729, "grad_norm": 1.5103602409362793, "learning_rate": 2.9445e-05, "loss": 0.5304, "step": 5892 }, { "epoch": 0.3299921603763019, "grad_norm": 1.2559441328048706, "learning_rate": 2.945e-05, "loss": 0.4914, "step": 5893 }, { "epoch": 0.33004815768843093, "grad_norm": 1.2686264514923096, "learning_rate": 2.9455e-05, "loss": 0.4153, "step": 5894 }, { "epoch": 0.33010415500055995, "grad_norm": 1.3157553672790527, "learning_rate": 2.946e-05, "loss": 0.3492, "step": 5895 }, { "epoch": 0.33016015231268897, "grad_norm": 1.3160196542739868, "learning_rate": 2.9465000000000003e-05, "loss": 0.4295, "step": 5896 }, { "epoch": 0.330216149624818, "grad_norm": 1.253178596496582, "learning_rate": 2.9470000000000004e-05, "loss": 0.3636, "step": 5897 }, { "epoch": 0.330272146936947, "grad_norm": 1.1357029676437378, "learning_rate": 2.9475e-05, "loss": 0.3601, "step": 5898 }, { "epoch": 0.33032814424907603, "grad_norm": 1.3248504400253296, "learning_rate": 2.9480000000000002e-05, "loss": 0.5028, "step": 5899 }, { "epoch": 0.33038414156120505, "grad_norm": 0.962448239326477, "learning_rate": 2.9485000000000003e-05, "loss": 0.296, "step": 5900 }, { "epoch": 0.33044013887333407, "grad_norm": 1.3095908164978027, "learning_rate": 2.949e-05, "loss": 0.3847, "step": 5901 }, { "epoch": 0.3304961361854631, "grad_norm": 0.9791283011436462, "learning_rate": 2.9495e-05, "loss": 0.2507, "step": 5902 }, { "epoch": 0.3305521334975921, "grad_norm": 1.371442198753357, "learning_rate": 2.95e-05, "loss": 0.3895, "step": 5903 }, { "epoch": 0.3306081308097211, "grad_norm": 1.227273941040039, "learning_rate": 2.9505e-05, "loss": 0.3704, "step": 5904 }, { "epoch": 0.33066412812185014, "grad_norm": 1.2468384504318237, "learning_rate": 2.951e-05, "loss": 0.4539, "step": 5905 }, { "epoch": 0.33072012543397916, "grad_norm": 1.2714205980300903, "learning_rate": 2.9515000000000005e-05, "loss": 0.4572, "step": 5906 }, { "epoch": 0.3307761227461082, "grad_norm": 2.2266745567321777, "learning_rate": 2.9520000000000002e-05, "loss": 0.4953, "step": 5907 }, { "epoch": 0.3308321200582372, "grad_norm": 1.3567014932632446, "learning_rate": 2.9525000000000003e-05, "loss": 0.5034, "step": 5908 }, { "epoch": 0.3308881173703662, "grad_norm": 0.9874616265296936, "learning_rate": 2.9530000000000004e-05, "loss": 0.3476, "step": 5909 }, { "epoch": 0.33094411468249524, "grad_norm": 1.2961267232894897, "learning_rate": 2.9535e-05, "loss": 0.6049, "step": 5910 }, { "epoch": 0.33100011199462426, "grad_norm": 1.4374582767486572, "learning_rate": 2.9540000000000002e-05, "loss": 0.4624, "step": 5911 }, { "epoch": 0.3310561093067533, "grad_norm": 2.4435040950775146, "learning_rate": 2.9545e-05, "loss": 0.5334, "step": 5912 }, { "epoch": 0.3311121066188823, "grad_norm": 1.7158364057540894, "learning_rate": 2.955e-05, "loss": 0.5466, "step": 5913 }, { "epoch": 0.3311681039310113, "grad_norm": 1.1626513004302979, "learning_rate": 2.9555e-05, "loss": 0.3731, "step": 5914 }, { "epoch": 0.33122410124314033, "grad_norm": 1.1088207960128784, "learning_rate": 2.9559999999999998e-05, "loss": 0.5113, "step": 5915 }, { "epoch": 0.33128009855526935, "grad_norm": 1.0978524684906006, "learning_rate": 2.9565000000000002e-05, "loss": 0.3087, "step": 5916 }, { "epoch": 0.3313360958673984, "grad_norm": 1.5576510429382324, "learning_rate": 2.9570000000000003e-05, "loss": 0.3773, "step": 5917 }, { "epoch": 0.3313920931795274, "grad_norm": 1.8396542072296143, "learning_rate": 2.9575000000000004e-05, "loss": 0.4837, "step": 5918 }, { "epoch": 0.3314480904916564, "grad_norm": 1.2947006225585938, "learning_rate": 2.958e-05, "loss": 0.4933, "step": 5919 }, { "epoch": 0.33150408780378543, "grad_norm": 1.352721095085144, "learning_rate": 2.9585000000000002e-05, "loss": 0.4512, "step": 5920 }, { "epoch": 0.33156008511591445, "grad_norm": 1.2786140441894531, "learning_rate": 2.959e-05, "loss": 0.5311, "step": 5921 }, { "epoch": 0.33161608242804347, "grad_norm": 1.3983354568481445, "learning_rate": 2.9595e-05, "loss": 0.4592, "step": 5922 }, { "epoch": 0.3316720797401725, "grad_norm": 1.2974272966384888, "learning_rate": 2.96e-05, "loss": 0.3691, "step": 5923 }, { "epoch": 0.3317280770523015, "grad_norm": 1.0443476438522339, "learning_rate": 2.9605e-05, "loss": 0.4561, "step": 5924 }, { "epoch": 0.3317840743644305, "grad_norm": 1.1290427446365356, "learning_rate": 2.961e-05, "loss": 0.3246, "step": 5925 }, { "epoch": 0.33184007167655954, "grad_norm": 1.2534984350204468, "learning_rate": 2.9615000000000004e-05, "loss": 0.4785, "step": 5926 }, { "epoch": 0.33189606898868856, "grad_norm": 1.167789340019226, "learning_rate": 2.9620000000000004e-05, "loss": 0.3557, "step": 5927 }, { "epoch": 0.3319520663008176, "grad_norm": 1.1558021306991577, "learning_rate": 2.9625000000000002e-05, "loss": 0.4502, "step": 5928 }, { "epoch": 0.3320080636129466, "grad_norm": 1.1212310791015625, "learning_rate": 2.9630000000000003e-05, "loss": 0.3688, "step": 5929 }, { "epoch": 0.3320640609250756, "grad_norm": 1.2709614038467407, "learning_rate": 2.9635e-05, "loss": 0.5522, "step": 5930 }, { "epoch": 0.33212005823720464, "grad_norm": 1.2580879926681519, "learning_rate": 2.964e-05, "loss": 0.3608, "step": 5931 }, { "epoch": 0.3321760555493336, "grad_norm": 1.2257146835327148, "learning_rate": 2.9645e-05, "loss": 0.3542, "step": 5932 }, { "epoch": 0.3322320528614626, "grad_norm": 2.1395881175994873, "learning_rate": 2.965e-05, "loss": 0.3611, "step": 5933 }, { "epoch": 0.33228805017359164, "grad_norm": 1.2016584873199463, "learning_rate": 2.9655e-05, "loss": 0.3914, "step": 5934 }, { "epoch": 0.33234404748572066, "grad_norm": 1.1124950647354126, "learning_rate": 2.9659999999999997e-05, "loss": 0.3443, "step": 5935 }, { "epoch": 0.3324000447978497, "grad_norm": 0.9924523234367371, "learning_rate": 2.9665000000000005e-05, "loss": 0.343, "step": 5936 }, { "epoch": 0.3324560421099787, "grad_norm": 1.3250898122787476, "learning_rate": 2.9670000000000002e-05, "loss": 0.5017, "step": 5937 }, { "epoch": 0.3325120394221077, "grad_norm": 1.2025245428085327, "learning_rate": 2.9675000000000003e-05, "loss": 0.4645, "step": 5938 }, { "epoch": 0.33256803673423674, "grad_norm": 1.294891119003296, "learning_rate": 2.9680000000000004e-05, "loss": 0.3881, "step": 5939 }, { "epoch": 0.33262403404636576, "grad_norm": 1.1268936395645142, "learning_rate": 2.9685e-05, "loss": 0.3423, "step": 5940 }, { "epoch": 0.3326800313584948, "grad_norm": 1.1726906299591064, "learning_rate": 2.9690000000000002e-05, "loss": 0.4491, "step": 5941 }, { "epoch": 0.3327360286706238, "grad_norm": 1.1992634534835815, "learning_rate": 2.9695e-05, "loss": 0.527, "step": 5942 }, { "epoch": 0.3327920259827528, "grad_norm": 1.3620399236679077, "learning_rate": 2.97e-05, "loss": 0.4762, "step": 5943 }, { "epoch": 0.33284802329488183, "grad_norm": 1.216871738433838, "learning_rate": 2.9705e-05, "loss": 0.468, "step": 5944 }, { "epoch": 0.33290402060701085, "grad_norm": 1.324371099472046, "learning_rate": 2.971e-05, "loss": 0.3604, "step": 5945 }, { "epoch": 0.3329600179191399, "grad_norm": 1.2822390794754028, "learning_rate": 2.9715000000000003e-05, "loss": 0.6002, "step": 5946 }, { "epoch": 0.3330160152312689, "grad_norm": 1.1421071290969849, "learning_rate": 2.9720000000000003e-05, "loss": 0.3899, "step": 5947 }, { "epoch": 0.3330720125433979, "grad_norm": 1.2771941423416138, "learning_rate": 2.9725000000000004e-05, "loss": 0.4653, "step": 5948 }, { "epoch": 0.33312800985552693, "grad_norm": 1.1364119052886963, "learning_rate": 2.973e-05, "loss": 0.4113, "step": 5949 }, { "epoch": 0.33318400716765595, "grad_norm": 1.1866883039474487, "learning_rate": 2.9735000000000002e-05, "loss": 0.3082, "step": 5950 }, { "epoch": 0.33324000447978497, "grad_norm": 1.2553757429122925, "learning_rate": 2.974e-05, "loss": 0.4513, "step": 5951 }, { "epoch": 0.333296001791914, "grad_norm": 1.2461782693862915, "learning_rate": 2.9745e-05, "loss": 0.4556, "step": 5952 }, { "epoch": 0.333351999104043, "grad_norm": 1.210750699043274, "learning_rate": 2.975e-05, "loss": 0.4, "step": 5953 }, { "epoch": 0.333407996416172, "grad_norm": 1.0719366073608398, "learning_rate": 2.9755e-05, "loss": 0.315, "step": 5954 }, { "epoch": 0.33346399372830104, "grad_norm": 2.0919089317321777, "learning_rate": 2.976e-05, "loss": 0.4499, "step": 5955 }, { "epoch": 0.33351999104043006, "grad_norm": 1.1930122375488281, "learning_rate": 2.9765000000000004e-05, "loss": 0.4203, "step": 5956 }, { "epoch": 0.3335759883525591, "grad_norm": 1.2995959520339966, "learning_rate": 2.9770000000000005e-05, "loss": 0.4431, "step": 5957 }, { "epoch": 0.3336319856646881, "grad_norm": 1.2213457822799683, "learning_rate": 2.9775000000000002e-05, "loss": 0.4014, "step": 5958 }, { "epoch": 0.3336879829768171, "grad_norm": 1.2256245613098145, "learning_rate": 2.9780000000000003e-05, "loss": 0.3807, "step": 5959 }, { "epoch": 0.33374398028894614, "grad_norm": 1.2050431966781616, "learning_rate": 2.9785e-05, "loss": 0.4637, "step": 5960 }, { "epoch": 0.33379997760107516, "grad_norm": 1.3042926788330078, "learning_rate": 2.979e-05, "loss": 0.4021, "step": 5961 }, { "epoch": 0.3338559749132042, "grad_norm": 1.3158693313598633, "learning_rate": 2.9795000000000002e-05, "loss": 0.3686, "step": 5962 }, { "epoch": 0.3339119722253332, "grad_norm": 1.1798471212387085, "learning_rate": 2.98e-05, "loss": 0.5074, "step": 5963 }, { "epoch": 0.3339679695374622, "grad_norm": 4.180051326751709, "learning_rate": 2.9805e-05, "loss": 0.4549, "step": 5964 }, { "epoch": 0.33402396684959124, "grad_norm": 1.3571497201919556, "learning_rate": 2.9809999999999997e-05, "loss": 0.5164, "step": 5965 }, { "epoch": 0.33407996416172026, "grad_norm": 1.260483980178833, "learning_rate": 2.9815000000000005e-05, "loss": 0.4295, "step": 5966 }, { "epoch": 0.3341359614738493, "grad_norm": 4.337609767913818, "learning_rate": 2.9820000000000002e-05, "loss": 0.5291, "step": 5967 }, { "epoch": 0.3341919587859783, "grad_norm": 1.2313674688339233, "learning_rate": 2.9825000000000003e-05, "loss": 0.3771, "step": 5968 }, { "epoch": 0.3342479560981073, "grad_norm": 1.0947515964508057, "learning_rate": 2.9830000000000004e-05, "loss": 0.3706, "step": 5969 }, { "epoch": 0.33430395341023633, "grad_norm": 1.1112008094787598, "learning_rate": 2.9835e-05, "loss": 0.3836, "step": 5970 }, { "epoch": 0.33435995072236535, "grad_norm": 1.6684881448745728, "learning_rate": 2.9840000000000002e-05, "loss": 0.777, "step": 5971 }, { "epoch": 0.33441594803449437, "grad_norm": 1.222672939300537, "learning_rate": 2.9845e-05, "loss": 0.3733, "step": 5972 }, { "epoch": 0.3344719453466234, "grad_norm": 1.3642115592956543, "learning_rate": 2.985e-05, "loss": 0.4429, "step": 5973 }, { "epoch": 0.33452794265875235, "grad_norm": 1.3494887351989746, "learning_rate": 2.9855e-05, "loss": 0.5372, "step": 5974 }, { "epoch": 0.33458393997088137, "grad_norm": 1.0940035581588745, "learning_rate": 2.986e-05, "loss": 0.3721, "step": 5975 }, { "epoch": 0.3346399372830104, "grad_norm": 1.239007830619812, "learning_rate": 2.9865000000000003e-05, "loss": 0.4012, "step": 5976 }, { "epoch": 0.3346959345951394, "grad_norm": 1.5419095754623413, "learning_rate": 2.9870000000000004e-05, "loss": 0.4955, "step": 5977 }, { "epoch": 0.33475193190726843, "grad_norm": 1.0845848321914673, "learning_rate": 2.9875000000000004e-05, "loss": 0.4384, "step": 5978 }, { "epoch": 0.33480792921939745, "grad_norm": 1.198300838470459, "learning_rate": 2.9880000000000002e-05, "loss": 0.4088, "step": 5979 }, { "epoch": 0.33486392653152647, "grad_norm": 1.2984102964401245, "learning_rate": 2.9885000000000003e-05, "loss": 0.5256, "step": 5980 }, { "epoch": 0.3349199238436555, "grad_norm": 1.0907014608383179, "learning_rate": 2.989e-05, "loss": 0.317, "step": 5981 }, { "epoch": 0.3349759211557845, "grad_norm": 1.1750301122665405, "learning_rate": 2.9895e-05, "loss": 0.3831, "step": 5982 }, { "epoch": 0.3350319184679135, "grad_norm": 0.9335674047470093, "learning_rate": 2.9900000000000002e-05, "loss": 0.3426, "step": 5983 }, { "epoch": 0.33508791578004254, "grad_norm": 1.2949601411819458, "learning_rate": 2.9905e-05, "loss": 0.5453, "step": 5984 }, { "epoch": 0.33514391309217156, "grad_norm": 1.1657267808914185, "learning_rate": 2.991e-05, "loss": 0.4275, "step": 5985 }, { "epoch": 0.3351999104043006, "grad_norm": 0.9324960708618164, "learning_rate": 2.9915000000000004e-05, "loss": 0.247, "step": 5986 }, { "epoch": 0.3352559077164296, "grad_norm": 1.3305065631866455, "learning_rate": 2.9920000000000005e-05, "loss": 0.3834, "step": 5987 }, { "epoch": 0.3353119050285586, "grad_norm": 1.611104965209961, "learning_rate": 2.9925000000000002e-05, "loss": 0.4356, "step": 5988 }, { "epoch": 0.33536790234068764, "grad_norm": 1.1590913534164429, "learning_rate": 2.9930000000000003e-05, "loss": 0.4541, "step": 5989 }, { "epoch": 0.33542389965281666, "grad_norm": 1.1663968563079834, "learning_rate": 2.9935e-05, "loss": 0.3898, "step": 5990 }, { "epoch": 0.3354798969649457, "grad_norm": 0.9980918169021606, "learning_rate": 2.994e-05, "loss": 0.4104, "step": 5991 }, { "epoch": 0.3355358942770747, "grad_norm": 1.218346118927002, "learning_rate": 2.9945000000000002e-05, "loss": 0.3951, "step": 5992 }, { "epoch": 0.3355918915892037, "grad_norm": 1.4852898120880127, "learning_rate": 2.995e-05, "loss": 0.5311, "step": 5993 }, { "epoch": 0.33564788890133274, "grad_norm": 1.5939909219741821, "learning_rate": 2.9955e-05, "loss": 0.5085, "step": 5994 }, { "epoch": 0.33570388621346176, "grad_norm": 1.1286392211914062, "learning_rate": 2.9959999999999998e-05, "loss": 0.3972, "step": 5995 }, { "epoch": 0.3357598835255908, "grad_norm": 1.3640192747116089, "learning_rate": 2.9965000000000005e-05, "loss": 0.6368, "step": 5996 }, { "epoch": 0.3358158808377198, "grad_norm": 1.1870054006576538, "learning_rate": 2.9970000000000003e-05, "loss": 0.3638, "step": 5997 }, { "epoch": 0.3358718781498488, "grad_norm": 1.1932722330093384, "learning_rate": 2.9975000000000004e-05, "loss": 0.3375, "step": 5998 }, { "epoch": 0.33592787546197783, "grad_norm": 1.1448731422424316, "learning_rate": 2.998e-05, "loss": 0.4382, "step": 5999 }, { "epoch": 0.33598387277410685, "grad_norm": 1.493430256843567, "learning_rate": 2.9985000000000002e-05, "loss": 0.3791, "step": 6000 }, { "epoch": 0.33603987008623587, "grad_norm": 1.4207919836044312, "learning_rate": 2.9990000000000003e-05, "loss": 0.4886, "step": 6001 }, { "epoch": 0.3360958673983649, "grad_norm": 1.2372599840164185, "learning_rate": 2.9995e-05, "loss": 0.3462, "step": 6002 }, { "epoch": 0.3361518647104939, "grad_norm": 1.1469321250915527, "learning_rate": 3e-05, "loss": 0.2863, "step": 6003 }, { "epoch": 0.3362078620226229, "grad_norm": 1.5369642972946167, "learning_rate": 3.0004999999999998e-05, "loss": 0.42, "step": 6004 }, { "epoch": 0.33626385933475195, "grad_norm": 1.1388967037200928, "learning_rate": 3.001e-05, "loss": 0.3926, "step": 6005 }, { "epoch": 0.33631985664688097, "grad_norm": 1.1151459217071533, "learning_rate": 3.0015e-05, "loss": 0.32, "step": 6006 }, { "epoch": 0.33637585395901, "grad_norm": 1.7051790952682495, "learning_rate": 3.0020000000000004e-05, "loss": 0.3744, "step": 6007 }, { "epoch": 0.336431851271139, "grad_norm": 1.3285667896270752, "learning_rate": 3.0025000000000005e-05, "loss": 0.4068, "step": 6008 }, { "epoch": 0.336487848583268, "grad_norm": 1.3671685457229614, "learning_rate": 3.0030000000000002e-05, "loss": 0.476, "step": 6009 }, { "epoch": 0.33654384589539704, "grad_norm": 1.2600650787353516, "learning_rate": 3.0035000000000003e-05, "loss": 0.4219, "step": 6010 }, { "epoch": 0.33659984320752606, "grad_norm": 1.5153084993362427, "learning_rate": 3.004e-05, "loss": 0.5961, "step": 6011 }, { "epoch": 0.3366558405196551, "grad_norm": 1.4338983297348022, "learning_rate": 3.0045e-05, "loss": 0.4875, "step": 6012 }, { "epoch": 0.3367118378317841, "grad_norm": 1.2349830865859985, "learning_rate": 3.0050000000000002e-05, "loss": 0.3986, "step": 6013 }, { "epoch": 0.3367678351439131, "grad_norm": 1.2845345735549927, "learning_rate": 3.0055e-05, "loss": 0.3558, "step": 6014 }, { "epoch": 0.3368238324560421, "grad_norm": 1.5571088790893555, "learning_rate": 3.006e-05, "loss": 0.3869, "step": 6015 }, { "epoch": 0.3368798297681711, "grad_norm": 1.8339508771896362, "learning_rate": 3.0064999999999998e-05, "loss": 0.3918, "step": 6016 }, { "epoch": 0.3369358270803001, "grad_norm": 1.1436814069747925, "learning_rate": 3.0070000000000005e-05, "loss": 0.327, "step": 6017 }, { "epoch": 0.33699182439242914, "grad_norm": 1.402848720550537, "learning_rate": 3.0075000000000003e-05, "loss": 0.4811, "step": 6018 }, { "epoch": 0.33704782170455816, "grad_norm": 1.5328079462051392, "learning_rate": 3.0080000000000003e-05, "loss": 0.4196, "step": 6019 }, { "epoch": 0.3371038190166872, "grad_norm": 1.229766607284546, "learning_rate": 3.0085e-05, "loss": 0.4686, "step": 6020 }, { "epoch": 0.3371598163288162, "grad_norm": 1.4152299165725708, "learning_rate": 3.009e-05, "loss": 0.3973, "step": 6021 }, { "epoch": 0.3372158136409452, "grad_norm": 1.3779040575027466, "learning_rate": 3.0095000000000002e-05, "loss": 0.5503, "step": 6022 }, { "epoch": 0.33727181095307424, "grad_norm": 1.532416820526123, "learning_rate": 3.01e-05, "loss": 0.4722, "step": 6023 }, { "epoch": 0.33732780826520326, "grad_norm": 1.2227298021316528, "learning_rate": 3.0105e-05, "loss": 0.3752, "step": 6024 }, { "epoch": 0.3373838055773323, "grad_norm": 1.1379321813583374, "learning_rate": 3.0109999999999998e-05, "loss": 0.5022, "step": 6025 }, { "epoch": 0.3374398028894613, "grad_norm": 1.117943525314331, "learning_rate": 3.0115e-05, "loss": 0.4367, "step": 6026 }, { "epoch": 0.3374958002015903, "grad_norm": 1.1439199447631836, "learning_rate": 3.0120000000000003e-05, "loss": 0.372, "step": 6027 }, { "epoch": 0.33755179751371933, "grad_norm": 1.347422480583191, "learning_rate": 3.0125000000000004e-05, "loss": 0.5321, "step": 6028 }, { "epoch": 0.33760779482584835, "grad_norm": 1.2958378791809082, "learning_rate": 3.013e-05, "loss": 0.6347, "step": 6029 }, { "epoch": 0.33766379213797737, "grad_norm": 1.301550269126892, "learning_rate": 3.0135000000000002e-05, "loss": 0.3741, "step": 6030 }, { "epoch": 0.3377197894501064, "grad_norm": 1.3525784015655518, "learning_rate": 3.0140000000000003e-05, "loss": 0.3619, "step": 6031 }, { "epoch": 0.3377757867622354, "grad_norm": 1.347469449043274, "learning_rate": 3.0145e-05, "loss": 0.4806, "step": 6032 }, { "epoch": 0.3378317840743644, "grad_norm": 1.193375825881958, "learning_rate": 3.015e-05, "loss": 0.4192, "step": 6033 }, { "epoch": 0.33788778138649345, "grad_norm": 1.3086562156677246, "learning_rate": 3.0155e-05, "loss": 0.3881, "step": 6034 }, { "epoch": 0.33794377869862247, "grad_norm": 1.2944972515106201, "learning_rate": 3.016e-05, "loss": 0.4319, "step": 6035 }, { "epoch": 0.3379997760107515, "grad_norm": 1.7294611930847168, "learning_rate": 3.0165e-05, "loss": 0.4066, "step": 6036 }, { "epoch": 0.3380557733228805, "grad_norm": 1.4674289226531982, "learning_rate": 3.0170000000000004e-05, "loss": 0.4316, "step": 6037 }, { "epoch": 0.3381117706350095, "grad_norm": 1.7740062475204468, "learning_rate": 3.0175e-05, "loss": 0.402, "step": 6038 }, { "epoch": 0.33816776794713854, "grad_norm": 1.2527861595153809, "learning_rate": 3.0180000000000002e-05, "loss": 0.3912, "step": 6039 }, { "epoch": 0.33822376525926756, "grad_norm": 1.3782498836517334, "learning_rate": 3.0185000000000003e-05, "loss": 0.5425, "step": 6040 }, { "epoch": 0.3382797625713966, "grad_norm": 1.044850468635559, "learning_rate": 3.019e-05, "loss": 0.4195, "step": 6041 }, { "epoch": 0.3383357598835256, "grad_norm": 1.184483528137207, "learning_rate": 3.0195e-05, "loss": 0.3694, "step": 6042 }, { "epoch": 0.3383917571956546, "grad_norm": 5.612485885620117, "learning_rate": 3.02e-05, "loss": 0.6494, "step": 6043 }, { "epoch": 0.33844775450778364, "grad_norm": 1.1050502061843872, "learning_rate": 3.0205e-05, "loss": 0.4542, "step": 6044 }, { "epoch": 0.33850375181991266, "grad_norm": 1.2551862001419067, "learning_rate": 3.021e-05, "loss": 0.4103, "step": 6045 }, { "epoch": 0.3385597491320417, "grad_norm": 1.382351040840149, "learning_rate": 3.0214999999999998e-05, "loss": 0.4621, "step": 6046 }, { "epoch": 0.3386157464441707, "grad_norm": 0.9998094439506531, "learning_rate": 3.0220000000000005e-05, "loss": 0.3793, "step": 6047 }, { "epoch": 0.3386717437562997, "grad_norm": 1.1794143915176392, "learning_rate": 3.0225000000000003e-05, "loss": 0.3553, "step": 6048 }, { "epoch": 0.33872774106842873, "grad_norm": 1.4121116399765015, "learning_rate": 3.0230000000000004e-05, "loss": 0.3912, "step": 6049 }, { "epoch": 0.33878373838055775, "grad_norm": 1.1807981729507446, "learning_rate": 3.0235e-05, "loss": 0.4023, "step": 6050 }, { "epoch": 0.33883973569268677, "grad_norm": 1.160921573638916, "learning_rate": 3.0240000000000002e-05, "loss": 0.3538, "step": 6051 }, { "epoch": 0.3388957330048158, "grad_norm": 1.1883474588394165, "learning_rate": 3.0245000000000003e-05, "loss": 0.3711, "step": 6052 }, { "epoch": 0.3389517303169448, "grad_norm": 1.2117105722427368, "learning_rate": 3.025e-05, "loss": 0.3819, "step": 6053 }, { "epoch": 0.33900772762907383, "grad_norm": 1.452558159828186, "learning_rate": 3.0255e-05, "loss": 0.4038, "step": 6054 }, { "epoch": 0.33906372494120285, "grad_norm": 1.2886942625045776, "learning_rate": 3.0259999999999998e-05, "loss": 0.5197, "step": 6055 }, { "epoch": 0.33911972225333187, "grad_norm": 1.525838017463684, "learning_rate": 3.0265e-05, "loss": 0.4201, "step": 6056 }, { "epoch": 0.33917571956546083, "grad_norm": 1.502649188041687, "learning_rate": 3.0270000000000003e-05, "loss": 0.5808, "step": 6057 }, { "epoch": 0.33923171687758985, "grad_norm": 1.1106334924697876, "learning_rate": 3.0275000000000004e-05, "loss": 0.4563, "step": 6058 }, { "epoch": 0.33928771418971887, "grad_norm": 1.3896900415420532, "learning_rate": 3.028e-05, "loss": 0.4421, "step": 6059 }, { "epoch": 0.3393437115018479, "grad_norm": 1.3931236267089844, "learning_rate": 3.0285000000000002e-05, "loss": 0.3937, "step": 6060 }, { "epoch": 0.3393997088139769, "grad_norm": 1.1702464818954468, "learning_rate": 3.0290000000000003e-05, "loss": 0.4756, "step": 6061 }, { "epoch": 0.3394557061261059, "grad_norm": 1.3623554706573486, "learning_rate": 3.0295e-05, "loss": 0.4031, "step": 6062 }, { "epoch": 0.33951170343823495, "grad_norm": 1.37270188331604, "learning_rate": 3.03e-05, "loss": 0.503, "step": 6063 }, { "epoch": 0.33956770075036397, "grad_norm": 1.3576231002807617, "learning_rate": 3.0305e-05, "loss": 0.5454, "step": 6064 }, { "epoch": 0.339623698062493, "grad_norm": 1.1892614364624023, "learning_rate": 3.031e-05, "loss": 0.4471, "step": 6065 }, { "epoch": 0.339679695374622, "grad_norm": 1.1943871974945068, "learning_rate": 3.0315e-05, "loss": 0.5917, "step": 6066 }, { "epoch": 0.339735692686751, "grad_norm": 1.1706382036209106, "learning_rate": 3.0320000000000004e-05, "loss": 0.5538, "step": 6067 }, { "epoch": 0.33979168999888004, "grad_norm": 1.2243510484695435, "learning_rate": 3.0325000000000002e-05, "loss": 0.2955, "step": 6068 }, { "epoch": 0.33984768731100906, "grad_norm": 1.0546960830688477, "learning_rate": 3.0330000000000003e-05, "loss": 0.3759, "step": 6069 }, { "epoch": 0.3399036846231381, "grad_norm": 1.1734250783920288, "learning_rate": 3.0335000000000003e-05, "loss": 0.3993, "step": 6070 }, { "epoch": 0.3399596819352671, "grad_norm": 1.121383547782898, "learning_rate": 3.034e-05, "loss": 0.4006, "step": 6071 }, { "epoch": 0.3400156792473961, "grad_norm": 1.3144326210021973, "learning_rate": 3.0345e-05, "loss": 0.4595, "step": 6072 }, { "epoch": 0.34007167655952514, "grad_norm": 1.1103761196136475, "learning_rate": 3.035e-05, "loss": 0.3824, "step": 6073 }, { "epoch": 0.34012767387165416, "grad_norm": 1.0838195085525513, "learning_rate": 3.0355e-05, "loss": 0.344, "step": 6074 }, { "epoch": 0.3401836711837832, "grad_norm": 1.0776463747024536, "learning_rate": 3.036e-05, "loss": 0.3973, "step": 6075 }, { "epoch": 0.3402396684959122, "grad_norm": 1.2838622331619263, "learning_rate": 3.0364999999999998e-05, "loss": 0.5617, "step": 6076 }, { "epoch": 0.3402956658080412, "grad_norm": 1.385256290435791, "learning_rate": 3.0370000000000006e-05, "loss": 0.7115, "step": 6077 }, { "epoch": 0.34035166312017023, "grad_norm": 1.182480812072754, "learning_rate": 3.0375000000000003e-05, "loss": 0.4803, "step": 6078 }, { "epoch": 0.34040766043229925, "grad_norm": 1.7291700839996338, "learning_rate": 3.0380000000000004e-05, "loss": 0.5289, "step": 6079 }, { "epoch": 0.34046365774442827, "grad_norm": 1.4423400163650513, "learning_rate": 3.0385e-05, "loss": 0.521, "step": 6080 }, { "epoch": 0.3405196550565573, "grad_norm": 2.5962367057800293, "learning_rate": 3.0390000000000002e-05, "loss": 0.5251, "step": 6081 }, { "epoch": 0.3405756523686863, "grad_norm": 1.3955833911895752, "learning_rate": 3.0395000000000003e-05, "loss": 0.4798, "step": 6082 }, { "epoch": 0.34063164968081533, "grad_norm": 1.157979130744934, "learning_rate": 3.04e-05, "loss": 0.3445, "step": 6083 }, { "epoch": 0.34068764699294435, "grad_norm": 1.4732646942138672, "learning_rate": 3.0405e-05, "loss": 0.6209, "step": 6084 }, { "epoch": 0.34074364430507337, "grad_norm": 1.9608231782913208, "learning_rate": 3.041e-05, "loss": 0.5513, "step": 6085 }, { "epoch": 0.3407996416172024, "grad_norm": 1.2772008180618286, "learning_rate": 3.0415e-05, "loss": 0.4158, "step": 6086 }, { "epoch": 0.3408556389293314, "grad_norm": 1.2368332147598267, "learning_rate": 3.0420000000000004e-05, "loss": 0.5146, "step": 6087 }, { "epoch": 0.3409116362414604, "grad_norm": 1.4972248077392578, "learning_rate": 3.0425000000000004e-05, "loss": 0.5596, "step": 6088 }, { "epoch": 0.34096763355358944, "grad_norm": 1.2302608489990234, "learning_rate": 3.0430000000000002e-05, "loss": 0.3766, "step": 6089 }, { "epoch": 0.34102363086571846, "grad_norm": 1.5643738508224487, "learning_rate": 3.0435000000000003e-05, "loss": 0.3371, "step": 6090 }, { "epoch": 0.3410796281778475, "grad_norm": 1.244739294052124, "learning_rate": 3.0440000000000003e-05, "loss": 0.4079, "step": 6091 }, { "epoch": 0.3411356254899765, "grad_norm": 1.2407468557357788, "learning_rate": 3.0445e-05, "loss": 0.4041, "step": 6092 }, { "epoch": 0.3411916228021055, "grad_norm": 1.6655876636505127, "learning_rate": 3.045e-05, "loss": 0.3608, "step": 6093 }, { "epoch": 0.34124762011423454, "grad_norm": 1.256253957748413, "learning_rate": 3.0455e-05, "loss": 0.4899, "step": 6094 }, { "epoch": 0.34130361742636356, "grad_norm": 1.1335060596466064, "learning_rate": 3.046e-05, "loss": 0.4612, "step": 6095 }, { "epoch": 0.3413596147384926, "grad_norm": 1.1727112531661987, "learning_rate": 3.0465e-05, "loss": 0.3644, "step": 6096 }, { "epoch": 0.3414156120506216, "grad_norm": 1.1319737434387207, "learning_rate": 3.0470000000000005e-05, "loss": 0.3832, "step": 6097 }, { "epoch": 0.34147160936275056, "grad_norm": 1.1234557628631592, "learning_rate": 3.0475000000000002e-05, "loss": 0.3799, "step": 6098 }, { "epoch": 0.3415276066748796, "grad_norm": 1.4665559530258179, "learning_rate": 3.0480000000000003e-05, "loss": 0.5543, "step": 6099 }, { "epoch": 0.3415836039870086, "grad_norm": 1.3194949626922607, "learning_rate": 3.0485000000000004e-05, "loss": 0.4193, "step": 6100 }, { "epoch": 0.3416396012991376, "grad_norm": 1.2255374193191528, "learning_rate": 3.049e-05, "loss": 0.4217, "step": 6101 }, { "epoch": 0.34169559861126664, "grad_norm": 1.2952872514724731, "learning_rate": 3.0495000000000002e-05, "loss": 0.4849, "step": 6102 }, { "epoch": 0.34175159592339566, "grad_norm": 1.278414011001587, "learning_rate": 3.05e-05, "loss": 0.5239, "step": 6103 }, { "epoch": 0.3418075932355247, "grad_norm": 1.3654206991195679, "learning_rate": 3.0505e-05, "loss": 0.3876, "step": 6104 }, { "epoch": 0.3418635905476537, "grad_norm": 1.132808804512024, "learning_rate": 3.051e-05, "loss": 0.3932, "step": 6105 }, { "epoch": 0.3419195878597827, "grad_norm": 1.2543237209320068, "learning_rate": 3.0515e-05, "loss": 0.4998, "step": 6106 }, { "epoch": 0.34197558517191173, "grad_norm": 1.3329524993896484, "learning_rate": 3.0520000000000006e-05, "loss": 0.4027, "step": 6107 }, { "epoch": 0.34203158248404075, "grad_norm": 1.3539350032806396, "learning_rate": 3.0525e-05, "loss": 0.5102, "step": 6108 }, { "epoch": 0.34208757979616977, "grad_norm": 1.3504351377487183, "learning_rate": 3.053e-05, "loss": 0.3668, "step": 6109 }, { "epoch": 0.3421435771082988, "grad_norm": 1.2522965669631958, "learning_rate": 3.0535000000000005e-05, "loss": 0.4975, "step": 6110 }, { "epoch": 0.3421995744204278, "grad_norm": 1.115391492843628, "learning_rate": 3.054e-05, "loss": 0.3742, "step": 6111 }, { "epoch": 0.34225557173255683, "grad_norm": 1.205297827720642, "learning_rate": 3.0545e-05, "loss": 0.3266, "step": 6112 }, { "epoch": 0.34231156904468585, "grad_norm": 1.2326608896255493, "learning_rate": 3.0550000000000004e-05, "loss": 0.3953, "step": 6113 }, { "epoch": 0.34236756635681487, "grad_norm": 1.3323166370391846, "learning_rate": 3.0555e-05, "loss": 0.4736, "step": 6114 }, { "epoch": 0.3424235636689439, "grad_norm": 1.5239416360855103, "learning_rate": 3.056e-05, "loss": 0.4383, "step": 6115 }, { "epoch": 0.3424795609810729, "grad_norm": 1.2085906267166138, "learning_rate": 3.0564999999999996e-05, "loss": 0.3913, "step": 6116 }, { "epoch": 0.3425355582932019, "grad_norm": 1.346198320388794, "learning_rate": 3.057000000000001e-05, "loss": 0.3611, "step": 6117 }, { "epoch": 0.34259155560533094, "grad_norm": 1.3381444215774536, "learning_rate": 3.0575000000000005e-05, "loss": 0.3948, "step": 6118 }, { "epoch": 0.34264755291745996, "grad_norm": 1.4198280572891235, "learning_rate": 3.058e-05, "loss": 0.5018, "step": 6119 }, { "epoch": 0.342703550229589, "grad_norm": 1.2343220710754395, "learning_rate": 3.0585e-05, "loss": 0.4075, "step": 6120 }, { "epoch": 0.342759547541718, "grad_norm": 1.4251677989959717, "learning_rate": 3.0590000000000004e-05, "loss": 0.3703, "step": 6121 }, { "epoch": 0.342815544853847, "grad_norm": 1.425933837890625, "learning_rate": 3.0595e-05, "loss": 0.5023, "step": 6122 }, { "epoch": 0.34287154216597604, "grad_norm": 1.3435081243515015, "learning_rate": 3.06e-05, "loss": 0.422, "step": 6123 }, { "epoch": 0.34292753947810506, "grad_norm": 1.4446288347244263, "learning_rate": 3.0605e-05, "loss": 0.5023, "step": 6124 }, { "epoch": 0.3429835367902341, "grad_norm": 1.1390122175216675, "learning_rate": 3.061e-05, "loss": 0.363, "step": 6125 }, { "epoch": 0.3430395341023631, "grad_norm": 1.2695108652114868, "learning_rate": 3.0615e-05, "loss": 0.4795, "step": 6126 }, { "epoch": 0.3430955314144921, "grad_norm": 1.1526217460632324, "learning_rate": 3.062e-05, "loss": 0.4332, "step": 6127 }, { "epoch": 0.34315152872662114, "grad_norm": 1.4268572330474854, "learning_rate": 3.0625000000000006e-05, "loss": 0.4968, "step": 6128 }, { "epoch": 0.34320752603875015, "grad_norm": 1.3458688259124756, "learning_rate": 3.063e-05, "loss": 0.4948, "step": 6129 }, { "epoch": 0.3432635233508792, "grad_norm": 1.4268699884414673, "learning_rate": 3.0635e-05, "loss": 0.5031, "step": 6130 }, { "epoch": 0.3433195206630082, "grad_norm": 1.0886454582214355, "learning_rate": 3.0640000000000005e-05, "loss": 0.3386, "step": 6131 }, { "epoch": 0.3433755179751372, "grad_norm": 1.8940868377685547, "learning_rate": 3.0645e-05, "loss": 0.4986, "step": 6132 }, { "epoch": 0.34343151528726623, "grad_norm": 1.2075221538543701, "learning_rate": 3.065e-05, "loss": 0.3753, "step": 6133 }, { "epoch": 0.34348751259939525, "grad_norm": 1.0235775709152222, "learning_rate": 3.0655e-05, "loss": 0.3847, "step": 6134 }, { "epoch": 0.34354350991152427, "grad_norm": 1.2436648607254028, "learning_rate": 3.066e-05, "loss": 0.5126, "step": 6135 }, { "epoch": 0.3435995072236533, "grad_norm": 9.105645179748535, "learning_rate": 3.0665e-05, "loss": 0.4121, "step": 6136 }, { "epoch": 0.3436555045357823, "grad_norm": 1.3691940307617188, "learning_rate": 3.0669999999999996e-05, "loss": 0.4754, "step": 6137 }, { "epoch": 0.3437115018479113, "grad_norm": 1.1193677186965942, "learning_rate": 3.067500000000001e-05, "loss": 0.3744, "step": 6138 }, { "epoch": 0.3437674991600403, "grad_norm": 1.0840986967086792, "learning_rate": 3.0680000000000004e-05, "loss": 0.4402, "step": 6139 }, { "epoch": 0.3438234964721693, "grad_norm": 1.5513265132904053, "learning_rate": 3.0685e-05, "loss": 0.3624, "step": 6140 }, { "epoch": 0.34387949378429833, "grad_norm": 1.279994010925293, "learning_rate": 3.069e-05, "loss": 0.4096, "step": 6141 }, { "epoch": 0.34393549109642735, "grad_norm": 1.1125075817108154, "learning_rate": 3.0695000000000003e-05, "loss": 0.4031, "step": 6142 }, { "epoch": 0.34399148840855637, "grad_norm": 1.304520606994629, "learning_rate": 3.07e-05, "loss": 0.367, "step": 6143 }, { "epoch": 0.3440474857206854, "grad_norm": 1.2914345264434814, "learning_rate": 3.0705e-05, "loss": 0.4538, "step": 6144 }, { "epoch": 0.3441034830328144, "grad_norm": 1.1114362478256226, "learning_rate": 3.071e-05, "loss": 0.3721, "step": 6145 }, { "epoch": 0.3441594803449434, "grad_norm": 1.2352871894836426, "learning_rate": 3.0715e-05, "loss": 0.4383, "step": 6146 }, { "epoch": 0.34421547765707244, "grad_norm": 1.0656942129135132, "learning_rate": 3.072e-05, "loss": 0.4117, "step": 6147 }, { "epoch": 0.34427147496920146, "grad_norm": 1.1222028732299805, "learning_rate": 3.0725e-05, "loss": 0.3683, "step": 6148 }, { "epoch": 0.3443274722813305, "grad_norm": 1.280902624130249, "learning_rate": 3.0730000000000006e-05, "loss": 0.5762, "step": 6149 }, { "epoch": 0.3443834695934595, "grad_norm": 1.1570007801055908, "learning_rate": 3.0735e-05, "loss": 0.366, "step": 6150 }, { "epoch": 0.3444394669055885, "grad_norm": 1.252055048942566, "learning_rate": 3.074e-05, "loss": 0.3943, "step": 6151 }, { "epoch": 0.34449546421771754, "grad_norm": 1.2108405828475952, "learning_rate": 3.0745000000000005e-05, "loss": 0.3738, "step": 6152 }, { "epoch": 0.34455146152984656, "grad_norm": 1.0659451484680176, "learning_rate": 3.075e-05, "loss": 0.4407, "step": 6153 }, { "epoch": 0.3446074588419756, "grad_norm": 1.2742671966552734, "learning_rate": 3.0755e-05, "loss": 0.409, "step": 6154 }, { "epoch": 0.3446634561541046, "grad_norm": 1.1289572715759277, "learning_rate": 3.076e-05, "loss": 0.4658, "step": 6155 }, { "epoch": 0.3447194534662336, "grad_norm": 1.2440177202224731, "learning_rate": 3.0765e-05, "loss": 0.4304, "step": 6156 }, { "epoch": 0.34477545077836264, "grad_norm": 1.1720653772354126, "learning_rate": 3.077e-05, "loss": 0.4095, "step": 6157 }, { "epoch": 0.34483144809049165, "grad_norm": 7.197729110717773, "learning_rate": 3.0775e-05, "loss": 0.5063, "step": 6158 }, { "epoch": 0.3448874454026207, "grad_norm": 1.2122024297714233, "learning_rate": 3.078e-05, "loss": 0.4376, "step": 6159 }, { "epoch": 0.3449434427147497, "grad_norm": 1.476108193397522, "learning_rate": 3.0785000000000004e-05, "loss": 0.5088, "step": 6160 }, { "epoch": 0.3449994400268787, "grad_norm": 1.1375744342803955, "learning_rate": 3.079e-05, "loss": 0.4141, "step": 6161 }, { "epoch": 0.34505543733900773, "grad_norm": 1.2847084999084473, "learning_rate": 3.0795e-05, "loss": 0.458, "step": 6162 }, { "epoch": 0.34511143465113675, "grad_norm": 1.247577428817749, "learning_rate": 3.08e-05, "loss": 0.4325, "step": 6163 }, { "epoch": 0.34516743196326577, "grad_norm": 1.1049706935882568, "learning_rate": 3.0805e-05, "loss": 0.4173, "step": 6164 }, { "epoch": 0.3452234292753948, "grad_norm": 1.5211737155914307, "learning_rate": 3.081e-05, "loss": 0.6595, "step": 6165 }, { "epoch": 0.3452794265875238, "grad_norm": 1.4209784269332886, "learning_rate": 3.0815e-05, "loss": 0.461, "step": 6166 }, { "epoch": 0.3453354238996528, "grad_norm": 1.1894135475158691, "learning_rate": 3.082e-05, "loss": 0.6291, "step": 6167 }, { "epoch": 0.34539142121178185, "grad_norm": 1.2749963998794556, "learning_rate": 3.0825000000000004e-05, "loss": 0.4693, "step": 6168 }, { "epoch": 0.34544741852391087, "grad_norm": 1.2753534317016602, "learning_rate": 3.083e-05, "loss": 0.4812, "step": 6169 }, { "epoch": 0.3455034158360399, "grad_norm": 1.4958391189575195, "learning_rate": 3.0835000000000005e-05, "loss": 0.4399, "step": 6170 }, { "epoch": 0.3455594131481689, "grad_norm": 1.2740752696990967, "learning_rate": 3.084e-05, "loss": 0.3982, "step": 6171 }, { "epoch": 0.3456154104602979, "grad_norm": 1.0687915086746216, "learning_rate": 3.0845e-05, "loss": 0.3227, "step": 6172 }, { "epoch": 0.34567140777242694, "grad_norm": 1.1849191188812256, "learning_rate": 3.0850000000000004e-05, "loss": 0.3809, "step": 6173 }, { "epoch": 0.34572740508455596, "grad_norm": 1.2271841764450073, "learning_rate": 3.0855e-05, "loss": 0.4786, "step": 6174 }, { "epoch": 0.345783402396685, "grad_norm": 1.063286304473877, "learning_rate": 3.086e-05, "loss": 0.3983, "step": 6175 }, { "epoch": 0.345839399708814, "grad_norm": 0.9903095364570618, "learning_rate": 3.0865e-05, "loss": 0.3459, "step": 6176 }, { "epoch": 0.345895397020943, "grad_norm": 1.322891354560852, "learning_rate": 3.087e-05, "loss": 0.4494, "step": 6177 }, { "epoch": 0.34595139433307204, "grad_norm": 1.1799235343933105, "learning_rate": 3.0875000000000005e-05, "loss": 0.3814, "step": 6178 }, { "epoch": 0.34600739164520106, "grad_norm": 1.1407090425491333, "learning_rate": 3.088e-05, "loss": 0.409, "step": 6179 }, { "epoch": 0.3460633889573301, "grad_norm": 1.314505934715271, "learning_rate": 3.0885e-05, "loss": 0.412, "step": 6180 }, { "epoch": 0.34611938626945904, "grad_norm": 1.1176612377166748, "learning_rate": 3.0890000000000004e-05, "loss": 0.4532, "step": 6181 }, { "epoch": 0.34617538358158806, "grad_norm": 1.3744513988494873, "learning_rate": 3.0895e-05, "loss": 0.5124, "step": 6182 }, { "epoch": 0.3462313808937171, "grad_norm": 1.1842596530914307, "learning_rate": 3.09e-05, "loss": 0.451, "step": 6183 }, { "epoch": 0.3462873782058461, "grad_norm": 1.231649398803711, "learning_rate": 3.0905e-05, "loss": 0.3933, "step": 6184 }, { "epoch": 0.3463433755179751, "grad_norm": 1.2014415264129639, "learning_rate": 3.091e-05, "loss": 0.3315, "step": 6185 }, { "epoch": 0.34639937283010414, "grad_norm": 5.304755687713623, "learning_rate": 3.0915e-05, "loss": 0.4169, "step": 6186 }, { "epoch": 0.34645537014223315, "grad_norm": 1.3249987363815308, "learning_rate": 3.092e-05, "loss": 0.4412, "step": 6187 }, { "epoch": 0.3465113674543622, "grad_norm": 3.466057062149048, "learning_rate": 3.0925000000000006e-05, "loss": 0.4771, "step": 6188 }, { "epoch": 0.3465673647664912, "grad_norm": 1.0502848625183105, "learning_rate": 3.0930000000000004e-05, "loss": 0.3134, "step": 6189 }, { "epoch": 0.3466233620786202, "grad_norm": 1.3575304746627808, "learning_rate": 3.0935e-05, "loss": 0.4014, "step": 6190 }, { "epoch": 0.34667935939074923, "grad_norm": 1.1503523588180542, "learning_rate": 3.0940000000000005e-05, "loss": 0.4059, "step": 6191 }, { "epoch": 0.34673535670287825, "grad_norm": 1.267883062362671, "learning_rate": 3.0945e-05, "loss": 0.5432, "step": 6192 }, { "epoch": 0.34679135401500727, "grad_norm": 2.9755451679229736, "learning_rate": 3.095e-05, "loss": 0.3337, "step": 6193 }, { "epoch": 0.3468473513271363, "grad_norm": 1.181565523147583, "learning_rate": 3.0955e-05, "loss": 0.3977, "step": 6194 }, { "epoch": 0.3469033486392653, "grad_norm": 1.145254373550415, "learning_rate": 3.096e-05, "loss": 0.4545, "step": 6195 }, { "epoch": 0.3469593459513943, "grad_norm": 1.4250879287719727, "learning_rate": 3.0965e-05, "loss": 0.484, "step": 6196 }, { "epoch": 0.34701534326352335, "grad_norm": 1.1087721586227417, "learning_rate": 3.0969999999999997e-05, "loss": 0.458, "step": 6197 }, { "epoch": 0.34707134057565237, "grad_norm": 1.4464209079742432, "learning_rate": 3.0975e-05, "loss": 0.5284, "step": 6198 }, { "epoch": 0.3471273378877814, "grad_norm": 1.2910740375518799, "learning_rate": 3.0980000000000005e-05, "loss": 0.3903, "step": 6199 }, { "epoch": 0.3471833351999104, "grad_norm": 1.1853680610656738, "learning_rate": 3.0985e-05, "loss": 0.3896, "step": 6200 }, { "epoch": 0.3472393325120394, "grad_norm": 1.0837509632110596, "learning_rate": 3.099e-05, "loss": 0.427, "step": 6201 }, { "epoch": 0.34729532982416844, "grad_norm": 1.038487434387207, "learning_rate": 3.0995000000000004e-05, "loss": 0.4323, "step": 6202 }, { "epoch": 0.34735132713629746, "grad_norm": 1.403473138809204, "learning_rate": 3.1e-05, "loss": 0.4597, "step": 6203 }, { "epoch": 0.3474073244484265, "grad_norm": 1.3064563274383545, "learning_rate": 3.1005e-05, "loss": 0.5115, "step": 6204 }, { "epoch": 0.3474633217605555, "grad_norm": 1.1536040306091309, "learning_rate": 3.101e-05, "loss": 0.3336, "step": 6205 }, { "epoch": 0.3475193190726845, "grad_norm": 1.3112775087356567, "learning_rate": 3.1015e-05, "loss": 0.4123, "step": 6206 }, { "epoch": 0.34757531638481354, "grad_norm": 1.2330294847488403, "learning_rate": 3.102e-05, "loss": 0.3784, "step": 6207 }, { "epoch": 0.34763131369694256, "grad_norm": 1.2423793077468872, "learning_rate": 3.1025e-05, "loss": 0.4646, "step": 6208 }, { "epoch": 0.3476873110090716, "grad_norm": 1.0161997079849243, "learning_rate": 3.1030000000000006e-05, "loss": 0.3805, "step": 6209 }, { "epoch": 0.3477433083212006, "grad_norm": 1.2694252729415894, "learning_rate": 3.1035000000000004e-05, "loss": 0.385, "step": 6210 }, { "epoch": 0.3477993056333296, "grad_norm": 1.343778133392334, "learning_rate": 3.104e-05, "loss": 0.4388, "step": 6211 }, { "epoch": 0.34785530294545863, "grad_norm": 1.2166775465011597, "learning_rate": 3.1045000000000005e-05, "loss": 0.3247, "step": 6212 }, { "epoch": 0.34791130025758765, "grad_norm": 1.2452661991119385, "learning_rate": 3.105e-05, "loss": 0.339, "step": 6213 }, { "epoch": 0.34796729756971667, "grad_norm": 1.1289180517196655, "learning_rate": 3.1055e-05, "loss": 0.4189, "step": 6214 }, { "epoch": 0.3480232948818457, "grad_norm": 0.9198927879333496, "learning_rate": 3.106e-05, "loss": 0.361, "step": 6215 }, { "epoch": 0.3480792921939747, "grad_norm": 1.1886038780212402, "learning_rate": 3.1065e-05, "loss": 0.5015, "step": 6216 }, { "epoch": 0.34813528950610373, "grad_norm": 1.1647417545318604, "learning_rate": 3.107e-05, "loss": 0.3268, "step": 6217 }, { "epoch": 0.34819128681823275, "grad_norm": 1.2943861484527588, "learning_rate": 3.1075e-05, "loss": 0.3943, "step": 6218 }, { "epoch": 0.34824728413036177, "grad_norm": 1.125791072845459, "learning_rate": 3.108e-05, "loss": 0.3445, "step": 6219 }, { "epoch": 0.3483032814424908, "grad_norm": 1.4532657861709595, "learning_rate": 3.1085000000000005e-05, "loss": 0.4646, "step": 6220 }, { "epoch": 0.3483592787546198, "grad_norm": 1.043227195739746, "learning_rate": 3.109e-05, "loss": 0.3693, "step": 6221 }, { "epoch": 0.34841527606674877, "grad_norm": 1.2873167991638184, "learning_rate": 3.1095e-05, "loss": 0.4276, "step": 6222 }, { "epoch": 0.3484712733788778, "grad_norm": 1.4242085218429565, "learning_rate": 3.1100000000000004e-05, "loss": 0.5273, "step": 6223 }, { "epoch": 0.3485272706910068, "grad_norm": 1.1898281574249268, "learning_rate": 3.1105e-05, "loss": 0.4503, "step": 6224 }, { "epoch": 0.3485832680031358, "grad_norm": 1.1694384813308716, "learning_rate": 3.111e-05, "loss": 0.3438, "step": 6225 }, { "epoch": 0.34863926531526485, "grad_norm": 1.185025930404663, "learning_rate": 3.1115e-05, "loss": 0.337, "step": 6226 }, { "epoch": 0.34869526262739386, "grad_norm": 1.0493113994598389, "learning_rate": 3.112e-05, "loss": 0.3695, "step": 6227 }, { "epoch": 0.3487512599395229, "grad_norm": 1.441767692565918, "learning_rate": 3.1125000000000004e-05, "loss": 0.3575, "step": 6228 }, { "epoch": 0.3488072572516519, "grad_norm": 1.06027090549469, "learning_rate": 3.113e-05, "loss": 0.2929, "step": 6229 }, { "epoch": 0.3488632545637809, "grad_norm": 1.1034297943115234, "learning_rate": 3.1135000000000006e-05, "loss": 0.4501, "step": 6230 }, { "epoch": 0.34891925187590994, "grad_norm": 1.1654653549194336, "learning_rate": 3.1140000000000003e-05, "loss": 0.4183, "step": 6231 }, { "epoch": 0.34897524918803896, "grad_norm": 1.605057954788208, "learning_rate": 3.1145e-05, "loss": 0.4588, "step": 6232 }, { "epoch": 0.349031246500168, "grad_norm": 1.2390538454055786, "learning_rate": 3.115e-05, "loss": 0.4808, "step": 6233 }, { "epoch": 0.349087243812297, "grad_norm": 1.6129049062728882, "learning_rate": 3.1155e-05, "loss": 0.6286, "step": 6234 }, { "epoch": 0.349143241124426, "grad_norm": 1.207960605621338, "learning_rate": 3.116e-05, "loss": 0.5001, "step": 6235 }, { "epoch": 0.34919923843655504, "grad_norm": 1.1692736148834229, "learning_rate": 3.1165e-05, "loss": 0.3719, "step": 6236 }, { "epoch": 0.34925523574868406, "grad_norm": 1.7736616134643555, "learning_rate": 3.117e-05, "loss": 0.609, "step": 6237 }, { "epoch": 0.3493112330608131, "grad_norm": 1.7082831859588623, "learning_rate": 3.1175000000000006e-05, "loss": 0.4509, "step": 6238 }, { "epoch": 0.3493672303729421, "grad_norm": 2.39262056350708, "learning_rate": 3.118e-05, "loss": 0.6077, "step": 6239 }, { "epoch": 0.3494232276850711, "grad_norm": 1.184689998626709, "learning_rate": 3.1185e-05, "loss": 0.4831, "step": 6240 }, { "epoch": 0.34947922499720013, "grad_norm": 1.4418725967407227, "learning_rate": 3.1190000000000005e-05, "loss": 0.5638, "step": 6241 }, { "epoch": 0.34953522230932915, "grad_norm": 1.3676230907440186, "learning_rate": 3.1195e-05, "loss": 0.5351, "step": 6242 }, { "epoch": 0.34959121962145817, "grad_norm": 1.1918115615844727, "learning_rate": 3.12e-05, "loss": 0.488, "step": 6243 }, { "epoch": 0.3496472169335872, "grad_norm": 1.1994301080703735, "learning_rate": 3.1205000000000004e-05, "loss": 0.4618, "step": 6244 }, { "epoch": 0.3497032142457162, "grad_norm": 1.205239176750183, "learning_rate": 3.121e-05, "loss": 0.4535, "step": 6245 }, { "epoch": 0.34975921155784523, "grad_norm": 1.0689464807510376, "learning_rate": 3.1215e-05, "loss": 0.4064, "step": 6246 }, { "epoch": 0.34981520886997425, "grad_norm": 1.5116850137710571, "learning_rate": 3.122e-05, "loss": 0.5589, "step": 6247 }, { "epoch": 0.34987120618210327, "grad_norm": 1.1299906969070435, "learning_rate": 3.122500000000001e-05, "loss": 0.361, "step": 6248 }, { "epoch": 0.3499272034942323, "grad_norm": 1.441096544265747, "learning_rate": 3.1230000000000004e-05, "loss": 0.488, "step": 6249 }, { "epoch": 0.3499832008063613, "grad_norm": 1.3029266595840454, "learning_rate": 3.1235e-05, "loss": 0.4782, "step": 6250 }, { "epoch": 0.3500391981184903, "grad_norm": 1.3709391355514526, "learning_rate": 3.1240000000000006e-05, "loss": 0.5139, "step": 6251 }, { "epoch": 0.35009519543061934, "grad_norm": 1.1097272634506226, "learning_rate": 3.1245e-05, "loss": 0.4393, "step": 6252 }, { "epoch": 0.35015119274274836, "grad_norm": 1.2350056171417236, "learning_rate": 3.125e-05, "loss": 0.4682, "step": 6253 }, { "epoch": 0.3502071900548774, "grad_norm": 1.2020292282104492, "learning_rate": 3.1255e-05, "loss": 0.4677, "step": 6254 }, { "epoch": 0.3502631873670064, "grad_norm": 1.249942421913147, "learning_rate": 3.126e-05, "loss": 0.3179, "step": 6255 }, { "epoch": 0.3503191846791354, "grad_norm": 1.1817785501480103, "learning_rate": 3.1265e-05, "loss": 0.4654, "step": 6256 }, { "epoch": 0.35037518199126444, "grad_norm": 1.414600133895874, "learning_rate": 3.127e-05, "loss": 0.5809, "step": 6257 }, { "epoch": 0.35043117930339346, "grad_norm": 1.3949699401855469, "learning_rate": 3.1275e-05, "loss": 0.3887, "step": 6258 }, { "epoch": 0.3504871766155225, "grad_norm": 1.4005590677261353, "learning_rate": 3.1280000000000005e-05, "loss": 0.4218, "step": 6259 }, { "epoch": 0.3505431739276515, "grad_norm": 1.2349687814712524, "learning_rate": 3.1285e-05, "loss": 0.4686, "step": 6260 }, { "epoch": 0.3505991712397805, "grad_norm": 1.2598515748977661, "learning_rate": 3.129e-05, "loss": 0.3772, "step": 6261 }, { "epoch": 0.35065516855190954, "grad_norm": 1.1200759410858154, "learning_rate": 3.1295000000000004e-05, "loss": 0.3523, "step": 6262 }, { "epoch": 0.3507111658640385, "grad_norm": 1.159464955329895, "learning_rate": 3.13e-05, "loss": 0.3585, "step": 6263 }, { "epoch": 0.3507671631761675, "grad_norm": 1.3332213163375854, "learning_rate": 3.1305e-05, "loss": 0.4859, "step": 6264 }, { "epoch": 0.35082316048829654, "grad_norm": 0.9965168833732605, "learning_rate": 3.1310000000000003e-05, "loss": 0.3704, "step": 6265 }, { "epoch": 0.35087915780042556, "grad_norm": 1.3566290140151978, "learning_rate": 3.1315e-05, "loss": 0.5692, "step": 6266 }, { "epoch": 0.3509351551125546, "grad_norm": 1.304772138595581, "learning_rate": 3.132e-05, "loss": 0.3989, "step": 6267 }, { "epoch": 0.3509911524246836, "grad_norm": 1.2333190441131592, "learning_rate": 3.1324999999999996e-05, "loss": 0.429, "step": 6268 }, { "epoch": 0.3510471497368126, "grad_norm": 1.3603678941726685, "learning_rate": 3.133000000000001e-05, "loss": 0.3178, "step": 6269 }, { "epoch": 0.35110314704894163, "grad_norm": 1.4109737873077393, "learning_rate": 3.1335000000000004e-05, "loss": 0.6681, "step": 6270 }, { "epoch": 0.35115914436107065, "grad_norm": 1.0344195365905762, "learning_rate": 3.134e-05, "loss": 0.2755, "step": 6271 }, { "epoch": 0.35121514167319967, "grad_norm": 1.2682287693023682, "learning_rate": 3.1345e-05, "loss": 0.5373, "step": 6272 }, { "epoch": 0.3512711389853287, "grad_norm": 1.3705300092697144, "learning_rate": 3.135e-05, "loss": 0.497, "step": 6273 }, { "epoch": 0.3513271362974577, "grad_norm": 1.1386823654174805, "learning_rate": 3.1355e-05, "loss": 0.4038, "step": 6274 }, { "epoch": 0.35138313360958673, "grad_norm": 1.5180588960647583, "learning_rate": 3.136e-05, "loss": 0.4209, "step": 6275 }, { "epoch": 0.35143913092171575, "grad_norm": 1.1405197381973267, "learning_rate": 3.1365e-05, "loss": 0.3937, "step": 6276 }, { "epoch": 0.35149512823384477, "grad_norm": 1.2759274244308472, "learning_rate": 3.137e-05, "loss": 0.4227, "step": 6277 }, { "epoch": 0.3515511255459738, "grad_norm": 1.3097624778747559, "learning_rate": 3.1375e-05, "loss": 0.3797, "step": 6278 }, { "epoch": 0.3516071228581028, "grad_norm": 1.594214677810669, "learning_rate": 3.138e-05, "loss": 0.3762, "step": 6279 }, { "epoch": 0.3516631201702318, "grad_norm": 1.5521080493927002, "learning_rate": 3.1385000000000005e-05, "loss": 0.4929, "step": 6280 }, { "epoch": 0.35171911748236084, "grad_norm": 1.3778748512268066, "learning_rate": 3.139e-05, "loss": 0.4782, "step": 6281 }, { "epoch": 0.35177511479448986, "grad_norm": 1.325209617614746, "learning_rate": 3.1395e-05, "loss": 0.3956, "step": 6282 }, { "epoch": 0.3518311121066189, "grad_norm": 1.1175185441970825, "learning_rate": 3.1400000000000004e-05, "loss": 0.3477, "step": 6283 }, { "epoch": 0.3518871094187479, "grad_norm": 1.3731361627578735, "learning_rate": 3.1405e-05, "loss": 0.3739, "step": 6284 }, { "epoch": 0.3519431067308769, "grad_norm": 1.2805825471878052, "learning_rate": 3.141e-05, "loss": 0.4728, "step": 6285 }, { "epoch": 0.35199910404300594, "grad_norm": 1.4022307395935059, "learning_rate": 3.1415e-05, "loss": 0.5399, "step": 6286 }, { "epoch": 0.35205510135513496, "grad_norm": 1.5376461744308472, "learning_rate": 3.142e-05, "loss": 0.4116, "step": 6287 }, { "epoch": 0.352111098667264, "grad_norm": 1.169945240020752, "learning_rate": 3.1425e-05, "loss": 0.4146, "step": 6288 }, { "epoch": 0.352167095979393, "grad_norm": 1.2208759784698486, "learning_rate": 3.143e-05, "loss": 0.3352, "step": 6289 }, { "epoch": 0.352223093291522, "grad_norm": 1.0417909622192383, "learning_rate": 3.1435000000000007e-05, "loss": 0.3359, "step": 6290 }, { "epoch": 0.35227909060365103, "grad_norm": 1.1003497838974, "learning_rate": 3.1440000000000004e-05, "loss": 0.4339, "step": 6291 }, { "epoch": 0.35233508791578005, "grad_norm": 1.071714162826538, "learning_rate": 3.1445e-05, "loss": 0.4731, "step": 6292 }, { "epoch": 0.3523910852279091, "grad_norm": 1.254522681236267, "learning_rate": 3.145e-05, "loss": 0.4213, "step": 6293 }, { "epoch": 0.3524470825400381, "grad_norm": 1.2709230184555054, "learning_rate": 3.1455e-05, "loss": 0.4648, "step": 6294 }, { "epoch": 0.3525030798521671, "grad_norm": 1.3310257196426392, "learning_rate": 3.146e-05, "loss": 0.4866, "step": 6295 }, { "epoch": 0.35255907716429613, "grad_norm": 1.053220272064209, "learning_rate": 3.1465e-05, "loss": 0.3499, "step": 6296 }, { "epoch": 0.35261507447642515, "grad_norm": 1.2098735570907593, "learning_rate": 3.147e-05, "loss": 0.4933, "step": 6297 }, { "epoch": 0.35267107178855417, "grad_norm": 1.2267911434173584, "learning_rate": 3.1475e-05, "loss": 0.4664, "step": 6298 }, { "epoch": 0.3527270691006832, "grad_norm": 1.0496174097061157, "learning_rate": 3.1480000000000004e-05, "loss": 0.3439, "step": 6299 }, { "epoch": 0.3527830664128122, "grad_norm": 1.759591817855835, "learning_rate": 3.1485e-05, "loss": 0.3957, "step": 6300 }, { "epoch": 0.3528390637249412, "grad_norm": 1.3334206342697144, "learning_rate": 3.1490000000000005e-05, "loss": 0.5113, "step": 6301 }, { "epoch": 0.35289506103707025, "grad_norm": 0.9650449156761169, "learning_rate": 3.1495e-05, "loss": 0.3999, "step": 6302 }, { "epoch": 0.35295105834919926, "grad_norm": 1.84669029712677, "learning_rate": 3.15e-05, "loss": 0.4424, "step": 6303 }, { "epoch": 0.3530070556613283, "grad_norm": 1.1730831861495972, "learning_rate": 3.1505000000000004e-05, "loss": 0.5259, "step": 6304 }, { "epoch": 0.35306305297345725, "grad_norm": 1.1850773096084595, "learning_rate": 3.151e-05, "loss": 0.4617, "step": 6305 }, { "epoch": 0.35311905028558627, "grad_norm": 1.1357048749923706, "learning_rate": 3.1515e-05, "loss": 0.4219, "step": 6306 }, { "epoch": 0.3531750475977153, "grad_norm": 1.2091214656829834, "learning_rate": 3.1519999999999996e-05, "loss": 0.397, "step": 6307 }, { "epoch": 0.3532310449098443, "grad_norm": 1.161665678024292, "learning_rate": 3.1525e-05, "loss": 0.539, "step": 6308 }, { "epoch": 0.3532870422219733, "grad_norm": 1.1879373788833618, "learning_rate": 3.1530000000000005e-05, "loss": 0.4209, "step": 6309 }, { "epoch": 0.35334303953410234, "grad_norm": 1.4861319065093994, "learning_rate": 3.1535e-05, "loss": 0.4973, "step": 6310 }, { "epoch": 0.35339903684623136, "grad_norm": 1.3899377584457397, "learning_rate": 3.154e-05, "loss": 0.4838, "step": 6311 }, { "epoch": 0.3534550341583604, "grad_norm": 1.3385703563690186, "learning_rate": 3.1545000000000004e-05, "loss": 0.5009, "step": 6312 }, { "epoch": 0.3535110314704894, "grad_norm": 1.191323161125183, "learning_rate": 3.155e-05, "loss": 0.4146, "step": 6313 }, { "epoch": 0.3535670287826184, "grad_norm": 1.427211880683899, "learning_rate": 3.1555e-05, "loss": 0.4842, "step": 6314 }, { "epoch": 0.35362302609474744, "grad_norm": 1.081580638885498, "learning_rate": 3.156e-05, "loss": 0.4151, "step": 6315 }, { "epoch": 0.35367902340687646, "grad_norm": 1.265127182006836, "learning_rate": 3.1565e-05, "loss": 0.3041, "step": 6316 }, { "epoch": 0.3537350207190055, "grad_norm": 1.2927141189575195, "learning_rate": 3.157e-05, "loss": 0.5042, "step": 6317 }, { "epoch": 0.3537910180311345, "grad_norm": 1.1545687913894653, "learning_rate": 3.1575e-05, "loss": 0.386, "step": 6318 }, { "epoch": 0.3538470153432635, "grad_norm": 1.1653871536254883, "learning_rate": 3.1580000000000006e-05, "loss": 0.3908, "step": 6319 }, { "epoch": 0.35390301265539253, "grad_norm": 1.319900393486023, "learning_rate": 3.1585e-05, "loss": 0.4542, "step": 6320 }, { "epoch": 0.35395900996752155, "grad_norm": 0.9685698747634888, "learning_rate": 3.159e-05, "loss": 0.3191, "step": 6321 }, { "epoch": 0.3540150072796506, "grad_norm": 1.3467005491256714, "learning_rate": 3.1595000000000005e-05, "loss": 0.4291, "step": 6322 }, { "epoch": 0.3540710045917796, "grad_norm": 2.2519562244415283, "learning_rate": 3.16e-05, "loss": 0.4011, "step": 6323 }, { "epoch": 0.3541270019039086, "grad_norm": 1.185945987701416, "learning_rate": 3.1605e-05, "loss": 0.3492, "step": 6324 }, { "epoch": 0.35418299921603763, "grad_norm": 1.2187808752059937, "learning_rate": 3.1610000000000004e-05, "loss": 0.4182, "step": 6325 }, { "epoch": 0.35423899652816665, "grad_norm": 1.1608519554138184, "learning_rate": 3.1615e-05, "loss": 0.4505, "step": 6326 }, { "epoch": 0.35429499384029567, "grad_norm": 1.1894186735153198, "learning_rate": 3.162e-05, "loss": 0.5263, "step": 6327 }, { "epoch": 0.3543509911524247, "grad_norm": 1.2153644561767578, "learning_rate": 3.1624999999999996e-05, "loss": 0.4624, "step": 6328 }, { "epoch": 0.3544069884645537, "grad_norm": 1.2270259857177734, "learning_rate": 3.163000000000001e-05, "loss": 0.455, "step": 6329 }, { "epoch": 0.3544629857766827, "grad_norm": 0.9415087103843689, "learning_rate": 3.1635000000000005e-05, "loss": 0.2646, "step": 6330 }, { "epoch": 0.35451898308881175, "grad_norm": 1.7275992631912231, "learning_rate": 3.164e-05, "loss": 0.4851, "step": 6331 }, { "epoch": 0.35457498040094076, "grad_norm": 1.3599194288253784, "learning_rate": 3.1645e-05, "loss": 0.43, "step": 6332 }, { "epoch": 0.3546309777130698, "grad_norm": 1.7129243612289429, "learning_rate": 3.1650000000000004e-05, "loss": 0.4948, "step": 6333 }, { "epoch": 0.3546869750251988, "grad_norm": 1.1714155673980713, "learning_rate": 3.1655e-05, "loss": 0.3515, "step": 6334 }, { "epoch": 0.3547429723373278, "grad_norm": 1.2481791973114014, "learning_rate": 3.166e-05, "loss": 0.4746, "step": 6335 }, { "epoch": 0.35479896964945684, "grad_norm": 1.1287357807159424, "learning_rate": 3.1665e-05, "loss": 0.4145, "step": 6336 }, { "epoch": 0.35485496696158586, "grad_norm": 1.4542763233184814, "learning_rate": 3.167e-05, "loss": 0.3924, "step": 6337 }, { "epoch": 0.3549109642737149, "grad_norm": 1.4236118793487549, "learning_rate": 3.1675e-05, "loss": 0.4936, "step": 6338 }, { "epoch": 0.3549669615858439, "grad_norm": 1.5000228881835938, "learning_rate": 3.168e-05, "loss": 0.6088, "step": 6339 }, { "epoch": 0.3550229588979729, "grad_norm": 1.1576695442199707, "learning_rate": 3.1685000000000006e-05, "loss": 0.4053, "step": 6340 }, { "epoch": 0.35507895621010194, "grad_norm": 1.2773702144622803, "learning_rate": 3.169e-05, "loss": 0.4207, "step": 6341 }, { "epoch": 0.35513495352223096, "grad_norm": 1.1120816469192505, "learning_rate": 3.1695e-05, "loss": 0.3528, "step": 6342 }, { "epoch": 0.35519095083436, "grad_norm": 1.0784268379211426, "learning_rate": 3.1700000000000005e-05, "loss": 0.3445, "step": 6343 }, { "epoch": 0.355246948146489, "grad_norm": 1.5653364658355713, "learning_rate": 3.1705e-05, "loss": 0.5955, "step": 6344 }, { "epoch": 0.355302945458618, "grad_norm": 1.2151880264282227, "learning_rate": 3.171e-05, "loss": 0.4883, "step": 6345 }, { "epoch": 0.355358942770747, "grad_norm": 1.3910470008850098, "learning_rate": 3.1715e-05, "loss": 0.542, "step": 6346 }, { "epoch": 0.355414940082876, "grad_norm": 1.0868487358093262, "learning_rate": 3.172e-05, "loss": 0.3592, "step": 6347 }, { "epoch": 0.355470937395005, "grad_norm": 1.9859716892242432, "learning_rate": 3.1725e-05, "loss": 0.3893, "step": 6348 }, { "epoch": 0.35552693470713403, "grad_norm": 1.0183755159378052, "learning_rate": 3.173e-05, "loss": 0.401, "step": 6349 }, { "epoch": 0.35558293201926305, "grad_norm": 1.2667468786239624, "learning_rate": 3.1735e-05, "loss": 0.434, "step": 6350 }, { "epoch": 0.3556389293313921, "grad_norm": 1.2418057918548584, "learning_rate": 3.1740000000000004e-05, "loss": 0.58, "step": 6351 }, { "epoch": 0.3556949266435211, "grad_norm": 1.1524723768234253, "learning_rate": 3.1745e-05, "loss": 0.4516, "step": 6352 }, { "epoch": 0.3557509239556501, "grad_norm": 1.9870307445526123, "learning_rate": 3.175e-05, "loss": 0.457, "step": 6353 }, { "epoch": 0.35580692126777913, "grad_norm": 1.1945549249649048, "learning_rate": 3.1755000000000003e-05, "loss": 0.6001, "step": 6354 }, { "epoch": 0.35586291857990815, "grad_norm": 1.2876054048538208, "learning_rate": 3.176e-05, "loss": 0.444, "step": 6355 }, { "epoch": 0.35591891589203717, "grad_norm": 1.264521837234497, "learning_rate": 3.1765e-05, "loss": 0.3927, "step": 6356 }, { "epoch": 0.3559749132041662, "grad_norm": 1.2872791290283203, "learning_rate": 3.177e-05, "loss": 0.3936, "step": 6357 }, { "epoch": 0.3560309105162952, "grad_norm": 1.345687747001648, "learning_rate": 3.1775e-05, "loss": 0.4547, "step": 6358 }, { "epoch": 0.3560869078284242, "grad_norm": 1.096648097038269, "learning_rate": 3.1780000000000004e-05, "loss": 0.3369, "step": 6359 }, { "epoch": 0.35614290514055325, "grad_norm": 1.0514757633209229, "learning_rate": 3.1785e-05, "loss": 0.3599, "step": 6360 }, { "epoch": 0.35619890245268226, "grad_norm": 1.674454927444458, "learning_rate": 3.1790000000000006e-05, "loss": 0.6327, "step": 6361 }, { "epoch": 0.3562548997648113, "grad_norm": 1.044646143913269, "learning_rate": 3.1795e-05, "loss": 0.3736, "step": 6362 }, { "epoch": 0.3563108970769403, "grad_norm": 1.229859709739685, "learning_rate": 3.18e-05, "loss": 0.3778, "step": 6363 }, { "epoch": 0.3563668943890693, "grad_norm": 1.2639544010162354, "learning_rate": 3.1805000000000005e-05, "loss": 0.496, "step": 6364 }, { "epoch": 0.35642289170119834, "grad_norm": 1.1227244138717651, "learning_rate": 3.181e-05, "loss": 0.3985, "step": 6365 }, { "epoch": 0.35647888901332736, "grad_norm": 1.4106838703155518, "learning_rate": 3.1815e-05, "loss": 0.4, "step": 6366 }, { "epoch": 0.3565348863254564, "grad_norm": 1.2340143918991089, "learning_rate": 3.182e-05, "loss": 0.4061, "step": 6367 }, { "epoch": 0.3565908836375854, "grad_norm": 1.538635492324829, "learning_rate": 3.1825e-05, "loss": 0.5551, "step": 6368 }, { "epoch": 0.3566468809497144, "grad_norm": 1.2566502094268799, "learning_rate": 3.1830000000000005e-05, "loss": 0.5342, "step": 6369 }, { "epoch": 0.35670287826184344, "grad_norm": 1.134588599205017, "learning_rate": 3.1835e-05, "loss": 0.5413, "step": 6370 }, { "epoch": 0.35675887557397246, "grad_norm": 1.3127809762954712, "learning_rate": 3.184e-05, "loss": 0.3591, "step": 6371 }, { "epoch": 0.3568148728861015, "grad_norm": 1.045646071434021, "learning_rate": 3.1845000000000004e-05, "loss": 0.4485, "step": 6372 }, { "epoch": 0.3568708701982305, "grad_norm": 1.19120454788208, "learning_rate": 3.185e-05, "loss": 0.4151, "step": 6373 }, { "epoch": 0.3569268675103595, "grad_norm": 1.1111185550689697, "learning_rate": 3.1855e-05, "loss": 0.5048, "step": 6374 }, { "epoch": 0.35698286482248853, "grad_norm": 1.6205518245697021, "learning_rate": 3.186e-05, "loss": 0.4923, "step": 6375 }, { "epoch": 0.35703886213461755, "grad_norm": 1.3513946533203125, "learning_rate": 3.1865e-05, "loss": 0.4694, "step": 6376 }, { "epoch": 0.35709485944674657, "grad_norm": 1.2300530672073364, "learning_rate": 3.187e-05, "loss": 0.4352, "step": 6377 }, { "epoch": 0.3571508567588756, "grad_norm": 1.2493767738342285, "learning_rate": 3.1875e-05, "loss": 0.5207, "step": 6378 }, { "epoch": 0.3572068540710046, "grad_norm": 1.2247203588485718, "learning_rate": 3.188e-05, "loss": 0.32, "step": 6379 }, { "epoch": 0.35726285138313363, "grad_norm": 1.0390961170196533, "learning_rate": 3.1885000000000004e-05, "loss": 0.3408, "step": 6380 }, { "epoch": 0.35731884869526265, "grad_norm": 3.3146679401397705, "learning_rate": 3.189e-05, "loss": 0.4566, "step": 6381 }, { "epoch": 0.35737484600739167, "grad_norm": 1.019629955291748, "learning_rate": 3.1895000000000005e-05, "loss": 0.3753, "step": 6382 }, { "epoch": 0.3574308433195207, "grad_norm": 1.3227936029434204, "learning_rate": 3.19e-05, "loss": 0.6134, "step": 6383 }, { "epoch": 0.3574868406316497, "grad_norm": 1.313201904296875, "learning_rate": 3.1905e-05, "loss": 0.4308, "step": 6384 }, { "epoch": 0.3575428379437787, "grad_norm": 1.2985275983810425, "learning_rate": 3.191e-05, "loss": 0.4359, "step": 6385 }, { "epoch": 0.35759883525590774, "grad_norm": 1.136992335319519, "learning_rate": 3.1915e-05, "loss": 0.4, "step": 6386 }, { "epoch": 0.3576548325680367, "grad_norm": 1.094826102256775, "learning_rate": 3.192e-05, "loss": 0.3882, "step": 6387 }, { "epoch": 0.3577108298801657, "grad_norm": 1.4285268783569336, "learning_rate": 3.1925e-05, "loss": 0.4504, "step": 6388 }, { "epoch": 0.35776682719229475, "grad_norm": 1.1870620250701904, "learning_rate": 3.193e-05, "loss": 0.5015, "step": 6389 }, { "epoch": 0.35782282450442376, "grad_norm": 1.3874123096466064, "learning_rate": 3.1935000000000005e-05, "loss": 0.6506, "step": 6390 }, { "epoch": 0.3578788218165528, "grad_norm": 1.2326061725616455, "learning_rate": 3.194e-05, "loss": 0.3939, "step": 6391 }, { "epoch": 0.3579348191286818, "grad_norm": 1.4296495914459229, "learning_rate": 3.1945e-05, "loss": 0.6558, "step": 6392 }, { "epoch": 0.3579908164408108, "grad_norm": 1.5314677953720093, "learning_rate": 3.1950000000000004e-05, "loss": 0.3221, "step": 6393 }, { "epoch": 0.35804681375293984, "grad_norm": 1.0734069347381592, "learning_rate": 3.1955e-05, "loss": 0.3728, "step": 6394 }, { "epoch": 0.35810281106506886, "grad_norm": 1.264939308166504, "learning_rate": 3.196e-05, "loss": 0.4224, "step": 6395 }, { "epoch": 0.3581588083771979, "grad_norm": 1.4076383113861084, "learning_rate": 3.1965e-05, "loss": 0.4076, "step": 6396 }, { "epoch": 0.3582148056893269, "grad_norm": 1.0512858629226685, "learning_rate": 3.197e-05, "loss": 0.4198, "step": 6397 }, { "epoch": 0.3582708030014559, "grad_norm": 1.4213348627090454, "learning_rate": 3.1975e-05, "loss": 0.3753, "step": 6398 }, { "epoch": 0.35832680031358494, "grad_norm": 1.445689082145691, "learning_rate": 3.198e-05, "loss": 0.4111, "step": 6399 }, { "epoch": 0.35838279762571396, "grad_norm": 1.4471769332885742, "learning_rate": 3.1985000000000006e-05, "loss": 0.3986, "step": 6400 }, { "epoch": 0.358438794937843, "grad_norm": 1.280672311782837, "learning_rate": 3.1990000000000004e-05, "loss": 0.4805, "step": 6401 }, { "epoch": 0.358494792249972, "grad_norm": 1.2855925559997559, "learning_rate": 3.1995e-05, "loss": 0.4193, "step": 6402 }, { "epoch": 0.358550789562101, "grad_norm": 1.4518502950668335, "learning_rate": 3.2000000000000005e-05, "loss": 0.4552, "step": 6403 }, { "epoch": 0.35860678687423003, "grad_norm": 1.4236962795257568, "learning_rate": 3.2005e-05, "loss": 0.5373, "step": 6404 }, { "epoch": 0.35866278418635905, "grad_norm": 1.2707767486572266, "learning_rate": 3.201e-05, "loss": 0.3918, "step": 6405 }, { "epoch": 0.35871878149848807, "grad_norm": 1.2630311250686646, "learning_rate": 3.2015e-05, "loss": 0.5197, "step": 6406 }, { "epoch": 0.3587747788106171, "grad_norm": 1.2726080417633057, "learning_rate": 3.202e-05, "loss": 0.3782, "step": 6407 }, { "epoch": 0.3588307761227461, "grad_norm": 1.275527000427246, "learning_rate": 3.2025e-05, "loss": 0.4598, "step": 6408 }, { "epoch": 0.35888677343487513, "grad_norm": 1.2320438623428345, "learning_rate": 3.2029999999999997e-05, "loss": 0.4221, "step": 6409 }, { "epoch": 0.35894277074700415, "grad_norm": 1.4411370754241943, "learning_rate": 3.2035e-05, "loss": 0.5037, "step": 6410 }, { "epoch": 0.35899876805913317, "grad_norm": 1.2561625242233276, "learning_rate": 3.2040000000000005e-05, "loss": 0.3273, "step": 6411 }, { "epoch": 0.3590547653712622, "grad_norm": 1.2090235948562622, "learning_rate": 3.2045e-05, "loss": 0.4458, "step": 6412 }, { "epoch": 0.3591107626833912, "grad_norm": 1.248092532157898, "learning_rate": 3.205e-05, "loss": 0.4784, "step": 6413 }, { "epoch": 0.3591667599955202, "grad_norm": 1.1258567571640015, "learning_rate": 3.2055000000000004e-05, "loss": 0.4477, "step": 6414 }, { "epoch": 0.35922275730764924, "grad_norm": 1.2157437801361084, "learning_rate": 3.206e-05, "loss": 0.4207, "step": 6415 }, { "epoch": 0.35927875461977826, "grad_norm": 1.399810791015625, "learning_rate": 3.2065e-05, "loss": 0.3596, "step": 6416 }, { "epoch": 0.3593347519319073, "grad_norm": 1.2439607381820679, "learning_rate": 3.207e-05, "loss": 0.467, "step": 6417 }, { "epoch": 0.3593907492440363, "grad_norm": 1.3221555948257446, "learning_rate": 3.2075e-05, "loss": 0.4603, "step": 6418 }, { "epoch": 0.3594467465561653, "grad_norm": 1.1428906917572021, "learning_rate": 3.208e-05, "loss": 0.3442, "step": 6419 }, { "epoch": 0.35950274386829434, "grad_norm": 1.3556814193725586, "learning_rate": 3.2085e-05, "loss": 0.4593, "step": 6420 }, { "epoch": 0.35955874118042336, "grad_norm": 1.1192015409469604, "learning_rate": 3.2090000000000006e-05, "loss": 0.3109, "step": 6421 }, { "epoch": 0.3596147384925524, "grad_norm": 1.3313668966293335, "learning_rate": 3.2095000000000004e-05, "loss": 0.5065, "step": 6422 }, { "epoch": 0.3596707358046814, "grad_norm": 1.2944973707199097, "learning_rate": 3.21e-05, "loss": 0.4034, "step": 6423 }, { "epoch": 0.3597267331168104, "grad_norm": 1.1574398279190063, "learning_rate": 3.2105e-05, "loss": 0.3418, "step": 6424 }, { "epoch": 0.35978273042893943, "grad_norm": 1.1419357061386108, "learning_rate": 3.211e-05, "loss": 0.349, "step": 6425 }, { "epoch": 0.35983872774106845, "grad_norm": 1.5139812231063843, "learning_rate": 3.2115e-05, "loss": 0.3879, "step": 6426 }, { "epoch": 0.3598947250531975, "grad_norm": 1.2404354810714722, "learning_rate": 3.212e-05, "loss": 0.4619, "step": 6427 }, { "epoch": 0.3599507223653265, "grad_norm": 1.1889538764953613, "learning_rate": 3.2125e-05, "loss": 0.4278, "step": 6428 }, { "epoch": 0.36000671967745546, "grad_norm": 1.2059705257415771, "learning_rate": 3.213e-05, "loss": 0.4227, "step": 6429 }, { "epoch": 0.3600627169895845, "grad_norm": 1.1609004735946655, "learning_rate": 3.2135e-05, "loss": 0.404, "step": 6430 }, { "epoch": 0.3601187143017135, "grad_norm": 1.007348656654358, "learning_rate": 3.214e-05, "loss": 0.4094, "step": 6431 }, { "epoch": 0.3601747116138425, "grad_norm": 1.18724524974823, "learning_rate": 3.2145000000000005e-05, "loss": 0.4617, "step": 6432 }, { "epoch": 0.36023070892597153, "grad_norm": 1.2899388074874878, "learning_rate": 3.215e-05, "loss": 0.5475, "step": 6433 }, { "epoch": 0.36028670623810055, "grad_norm": 1.316030740737915, "learning_rate": 3.2155e-05, "loss": 0.5201, "step": 6434 }, { "epoch": 0.36034270355022957, "grad_norm": 1.1159597635269165, "learning_rate": 3.2160000000000004e-05, "loss": 0.4133, "step": 6435 }, { "epoch": 0.3603987008623586, "grad_norm": 1.3339399099349976, "learning_rate": 3.2165e-05, "loss": 0.4596, "step": 6436 }, { "epoch": 0.3604546981744876, "grad_norm": 1.3317457437515259, "learning_rate": 3.217e-05, "loss": 0.4083, "step": 6437 }, { "epoch": 0.36051069548661663, "grad_norm": 1.309380054473877, "learning_rate": 3.2175e-05, "loss": 0.4438, "step": 6438 }, { "epoch": 0.36056669279874565, "grad_norm": 1.1014667749404907, "learning_rate": 3.218e-05, "loss": 0.5102, "step": 6439 }, { "epoch": 0.36062269011087467, "grad_norm": 1.2829416990280151, "learning_rate": 3.2185000000000004e-05, "loss": 0.4968, "step": 6440 }, { "epoch": 0.3606786874230037, "grad_norm": 1.0472525358200073, "learning_rate": 3.219e-05, "loss": 0.3369, "step": 6441 }, { "epoch": 0.3607346847351327, "grad_norm": 1.2059707641601562, "learning_rate": 3.2195000000000006e-05, "loss": 0.4293, "step": 6442 }, { "epoch": 0.3607906820472617, "grad_norm": 1.0743045806884766, "learning_rate": 3.2200000000000003e-05, "loss": 0.2988, "step": 6443 }, { "epoch": 0.36084667935939074, "grad_norm": 1.1963176727294922, "learning_rate": 3.2205e-05, "loss": 0.4514, "step": 6444 }, { "epoch": 0.36090267667151976, "grad_norm": 1.4290324449539185, "learning_rate": 3.221e-05, "loss": 0.4848, "step": 6445 }, { "epoch": 0.3609586739836488, "grad_norm": 1.4431935548782349, "learning_rate": 3.2215e-05, "loss": 0.4874, "step": 6446 }, { "epoch": 0.3610146712957778, "grad_norm": 1.5518471002578735, "learning_rate": 3.222e-05, "loss": 0.6615, "step": 6447 }, { "epoch": 0.3610706686079068, "grad_norm": 1.2346594333648682, "learning_rate": 3.2225e-05, "loss": 0.4037, "step": 6448 }, { "epoch": 0.36112666592003584, "grad_norm": 1.3797889947891235, "learning_rate": 3.223e-05, "loss": 0.3868, "step": 6449 }, { "epoch": 0.36118266323216486, "grad_norm": 1.1214135885238647, "learning_rate": 3.2235000000000006e-05, "loss": 0.3973, "step": 6450 }, { "epoch": 0.3612386605442939, "grad_norm": 1.1265441179275513, "learning_rate": 3.224e-05, "loss": 0.3838, "step": 6451 }, { "epoch": 0.3612946578564229, "grad_norm": 1.2349735498428345, "learning_rate": 3.2245e-05, "loss": 0.388, "step": 6452 }, { "epoch": 0.3613506551685519, "grad_norm": 1.1909857988357544, "learning_rate": 3.2250000000000005e-05, "loss": 0.4089, "step": 6453 }, { "epoch": 0.36140665248068093, "grad_norm": 1.2351858615875244, "learning_rate": 3.2255e-05, "loss": 0.4081, "step": 6454 }, { "epoch": 0.36146264979280995, "grad_norm": 1.1014418601989746, "learning_rate": 3.226e-05, "loss": 0.3513, "step": 6455 }, { "epoch": 0.361518647104939, "grad_norm": 1.296622395515442, "learning_rate": 3.2265000000000004e-05, "loss": 0.4217, "step": 6456 }, { "epoch": 0.361574644417068, "grad_norm": 1.2340731620788574, "learning_rate": 3.227e-05, "loss": 0.4694, "step": 6457 }, { "epoch": 0.361630641729197, "grad_norm": 1.2117183208465576, "learning_rate": 3.2275e-05, "loss": 0.3697, "step": 6458 }, { "epoch": 0.36168663904132603, "grad_norm": 1.2185648679733276, "learning_rate": 3.2279999999999996e-05, "loss": 0.4544, "step": 6459 }, { "epoch": 0.36174263635345505, "grad_norm": 1.2371495962142944, "learning_rate": 3.228500000000001e-05, "loss": 0.4588, "step": 6460 }, { "epoch": 0.36179863366558407, "grad_norm": 1.1846920251846313, "learning_rate": 3.2290000000000004e-05, "loss": 0.4448, "step": 6461 }, { "epoch": 0.3618546309777131, "grad_norm": 1.308303713798523, "learning_rate": 3.2295e-05, "loss": 0.4576, "step": 6462 }, { "epoch": 0.3619106282898421, "grad_norm": 1.1310920715332031, "learning_rate": 3.2300000000000006e-05, "loss": 0.4626, "step": 6463 }, { "epoch": 0.3619666256019711, "grad_norm": 1.205828070640564, "learning_rate": 3.2305e-05, "loss": 0.4259, "step": 6464 }, { "epoch": 0.36202262291410015, "grad_norm": 1.215994954109192, "learning_rate": 3.231e-05, "loss": 0.3851, "step": 6465 }, { "epoch": 0.36207862022622916, "grad_norm": 1.2289583683013916, "learning_rate": 3.2315e-05, "loss": 0.4878, "step": 6466 }, { "epoch": 0.3621346175383582, "grad_norm": 1.1727473735809326, "learning_rate": 3.232e-05, "loss": 0.3297, "step": 6467 }, { "epoch": 0.3621906148504872, "grad_norm": 1.1282618045806885, "learning_rate": 3.2325e-05, "loss": 0.402, "step": 6468 }, { "epoch": 0.3622466121626162, "grad_norm": 1.4074574708938599, "learning_rate": 3.233e-05, "loss": 0.4783, "step": 6469 }, { "epoch": 0.3623026094747452, "grad_norm": 2.0473852157592773, "learning_rate": 3.2335e-05, "loss": 0.5243, "step": 6470 }, { "epoch": 0.3623586067868742, "grad_norm": 1.3587144613265991, "learning_rate": 3.2340000000000005e-05, "loss": 0.385, "step": 6471 }, { "epoch": 0.3624146040990032, "grad_norm": 1.1086276769638062, "learning_rate": 3.2345e-05, "loss": 0.3856, "step": 6472 }, { "epoch": 0.36247060141113224, "grad_norm": 1.2624282836914062, "learning_rate": 3.235e-05, "loss": 0.5057, "step": 6473 }, { "epoch": 0.36252659872326126, "grad_norm": 1.1324260234832764, "learning_rate": 3.2355000000000004e-05, "loss": 0.4098, "step": 6474 }, { "epoch": 0.3625825960353903, "grad_norm": 1.260992407798767, "learning_rate": 3.236e-05, "loss": 0.3632, "step": 6475 }, { "epoch": 0.3626385933475193, "grad_norm": 1.2799453735351562, "learning_rate": 3.2365e-05, "loss": 0.3703, "step": 6476 }, { "epoch": 0.3626945906596483, "grad_norm": 1.233601689338684, "learning_rate": 3.2370000000000003e-05, "loss": 0.5573, "step": 6477 }, { "epoch": 0.36275058797177734, "grad_norm": 1.1402902603149414, "learning_rate": 3.2375e-05, "loss": 0.3808, "step": 6478 }, { "epoch": 0.36280658528390636, "grad_norm": 1.103020429611206, "learning_rate": 3.238e-05, "loss": 0.386, "step": 6479 }, { "epoch": 0.3628625825960354, "grad_norm": 1.1356475353240967, "learning_rate": 3.2385e-05, "loss": 0.406, "step": 6480 }, { "epoch": 0.3629185799081644, "grad_norm": 1.1693097352981567, "learning_rate": 3.239000000000001e-05, "loss": 0.4771, "step": 6481 }, { "epoch": 0.3629745772202934, "grad_norm": 1.4075266122817993, "learning_rate": 3.2395000000000004e-05, "loss": 0.5045, "step": 6482 }, { "epoch": 0.36303057453242243, "grad_norm": 1.3106882572174072, "learning_rate": 3.24e-05, "loss": 0.4642, "step": 6483 }, { "epoch": 0.36308657184455145, "grad_norm": 1.2524840831756592, "learning_rate": 3.2405e-05, "loss": 0.4903, "step": 6484 }, { "epoch": 0.3631425691566805, "grad_norm": 1.4604068994522095, "learning_rate": 3.241e-05, "loss": 0.4731, "step": 6485 }, { "epoch": 0.3631985664688095, "grad_norm": 1.555456280708313, "learning_rate": 3.2415e-05, "loss": 0.4913, "step": 6486 }, { "epoch": 0.3632545637809385, "grad_norm": 1.2731359004974365, "learning_rate": 3.242e-05, "loss": 0.3683, "step": 6487 }, { "epoch": 0.36331056109306753, "grad_norm": 1.37197744846344, "learning_rate": 3.2425e-05, "loss": 0.418, "step": 6488 }, { "epoch": 0.36336655840519655, "grad_norm": 1.0760390758514404, "learning_rate": 3.243e-05, "loss": 0.364, "step": 6489 }, { "epoch": 0.36342255571732557, "grad_norm": 1.191428542137146, "learning_rate": 3.2435000000000004e-05, "loss": 0.4739, "step": 6490 }, { "epoch": 0.3634785530294546, "grad_norm": 2.9587643146514893, "learning_rate": 3.244e-05, "loss": 0.4657, "step": 6491 }, { "epoch": 0.3635345503415836, "grad_norm": 1.2364811897277832, "learning_rate": 3.2445000000000005e-05, "loss": 0.53, "step": 6492 }, { "epoch": 0.3635905476537126, "grad_norm": 1.3529722690582275, "learning_rate": 3.245e-05, "loss": 0.4389, "step": 6493 }, { "epoch": 0.36364654496584164, "grad_norm": 1.2198383808135986, "learning_rate": 3.2455e-05, "loss": 0.4862, "step": 6494 }, { "epoch": 0.36370254227797066, "grad_norm": 1.2174612283706665, "learning_rate": 3.2460000000000004e-05, "loss": 0.4445, "step": 6495 }, { "epoch": 0.3637585395900997, "grad_norm": 0.9913917183876038, "learning_rate": 3.2465e-05, "loss": 0.4996, "step": 6496 }, { "epoch": 0.3638145369022287, "grad_norm": 1.5098800659179688, "learning_rate": 3.247e-05, "loss": 0.393, "step": 6497 }, { "epoch": 0.3638705342143577, "grad_norm": 1.3465111255645752, "learning_rate": 3.2474999999999997e-05, "loss": 0.4528, "step": 6498 }, { "epoch": 0.36392653152648674, "grad_norm": 1.342793345451355, "learning_rate": 3.248e-05, "loss": 0.5098, "step": 6499 }, { "epoch": 0.36398252883861576, "grad_norm": 1.5947413444519043, "learning_rate": 3.2485000000000005e-05, "loss": 0.4775, "step": 6500 }, { "epoch": 0.3640385261507448, "grad_norm": 1.0939754247665405, "learning_rate": 3.249e-05, "loss": 0.3968, "step": 6501 }, { "epoch": 0.3640945234628738, "grad_norm": 1.2496519088745117, "learning_rate": 3.2495000000000007e-05, "loss": 0.4816, "step": 6502 }, { "epoch": 0.3641505207750028, "grad_norm": 1.2200695276260376, "learning_rate": 3.2500000000000004e-05, "loss": 0.449, "step": 6503 }, { "epoch": 0.36420651808713184, "grad_norm": 1.184432029724121, "learning_rate": 3.2505e-05, "loss": 0.4768, "step": 6504 }, { "epoch": 0.36426251539926086, "grad_norm": 1.0775400400161743, "learning_rate": 3.251e-05, "loss": 0.3957, "step": 6505 }, { "epoch": 0.3643185127113899, "grad_norm": 1.1742106676101685, "learning_rate": 3.2515e-05, "loss": 0.4073, "step": 6506 }, { "epoch": 0.3643745100235189, "grad_norm": 1.1494220495224, "learning_rate": 3.252e-05, "loss": 0.4473, "step": 6507 }, { "epoch": 0.3644305073356479, "grad_norm": 1.1055777072906494, "learning_rate": 3.2525e-05, "loss": 0.3384, "step": 6508 }, { "epoch": 0.36448650464777693, "grad_norm": 1.275314211845398, "learning_rate": 3.253e-05, "loss": 0.4473, "step": 6509 }, { "epoch": 0.36454250195990595, "grad_norm": 1.1592401266098022, "learning_rate": 3.2535e-05, "loss": 0.4372, "step": 6510 }, { "epoch": 0.3645984992720349, "grad_norm": 1.2728155851364136, "learning_rate": 3.2540000000000004e-05, "loss": 0.4949, "step": 6511 }, { "epoch": 0.36465449658416393, "grad_norm": 1.1379468441009521, "learning_rate": 3.2545e-05, "loss": 0.4844, "step": 6512 }, { "epoch": 0.36471049389629295, "grad_norm": 1.2028402090072632, "learning_rate": 3.2550000000000005e-05, "loss": 0.4344, "step": 6513 }, { "epoch": 0.364766491208422, "grad_norm": 1.402000069618225, "learning_rate": 3.2555e-05, "loss": 0.3926, "step": 6514 }, { "epoch": 0.364822488520551, "grad_norm": 1.4037359952926636, "learning_rate": 3.256e-05, "loss": 0.2992, "step": 6515 }, { "epoch": 0.36487848583268, "grad_norm": 1.2170379161834717, "learning_rate": 3.2565000000000004e-05, "loss": 0.5853, "step": 6516 }, { "epoch": 0.36493448314480903, "grad_norm": 1.0671933889389038, "learning_rate": 3.257e-05, "loss": 0.4087, "step": 6517 }, { "epoch": 0.36499048045693805, "grad_norm": 1.2431203126907349, "learning_rate": 3.2575e-05, "loss": 0.4131, "step": 6518 }, { "epoch": 0.36504647776906707, "grad_norm": 1.300535798072815, "learning_rate": 3.2579999999999996e-05, "loss": 0.3265, "step": 6519 }, { "epoch": 0.3651024750811961, "grad_norm": 1.442633867263794, "learning_rate": 3.2585e-05, "loss": 0.4488, "step": 6520 }, { "epoch": 0.3651584723933251, "grad_norm": 1.062958002090454, "learning_rate": 3.2590000000000005e-05, "loss": 0.2811, "step": 6521 }, { "epoch": 0.3652144697054541, "grad_norm": 1.154906153678894, "learning_rate": 3.2595e-05, "loss": 0.3751, "step": 6522 }, { "epoch": 0.36527046701758314, "grad_norm": 1.2711113691329956, "learning_rate": 3.26e-05, "loss": 0.5827, "step": 6523 }, { "epoch": 0.36532646432971216, "grad_norm": 1.325225591659546, "learning_rate": 3.2605000000000004e-05, "loss": 0.319, "step": 6524 }, { "epoch": 0.3653824616418412, "grad_norm": 1.3151487112045288, "learning_rate": 3.261e-05, "loss": 0.4581, "step": 6525 }, { "epoch": 0.3654384589539702, "grad_norm": 1.2263975143432617, "learning_rate": 3.2615e-05, "loss": 0.4808, "step": 6526 }, { "epoch": 0.3654944562660992, "grad_norm": 1.1650111675262451, "learning_rate": 3.262e-05, "loss": 0.3016, "step": 6527 }, { "epoch": 0.36555045357822824, "grad_norm": 1.2386382818222046, "learning_rate": 3.2625e-05, "loss": 0.4782, "step": 6528 }, { "epoch": 0.36560645089035726, "grad_norm": 1.1024527549743652, "learning_rate": 3.263e-05, "loss": 0.3295, "step": 6529 }, { "epoch": 0.3656624482024863, "grad_norm": 1.4803990125656128, "learning_rate": 3.2635e-05, "loss": 0.5028, "step": 6530 }, { "epoch": 0.3657184455146153, "grad_norm": 1.300776720046997, "learning_rate": 3.2640000000000006e-05, "loss": 0.5235, "step": 6531 }, { "epoch": 0.3657744428267443, "grad_norm": 1.1828978061676025, "learning_rate": 3.2645e-05, "loss": 0.3642, "step": 6532 }, { "epoch": 0.36583044013887334, "grad_norm": 1.1352730989456177, "learning_rate": 3.265e-05, "loss": 0.4256, "step": 6533 }, { "epoch": 0.36588643745100236, "grad_norm": 1.3790315389633179, "learning_rate": 3.2655000000000005e-05, "loss": 0.4952, "step": 6534 }, { "epoch": 0.3659424347631314, "grad_norm": 1.1742215156555176, "learning_rate": 3.266e-05, "loss": 0.4102, "step": 6535 }, { "epoch": 0.3659984320752604, "grad_norm": 1.2382011413574219, "learning_rate": 3.2665e-05, "loss": 0.4639, "step": 6536 }, { "epoch": 0.3660544293873894, "grad_norm": 1.2191896438598633, "learning_rate": 3.267e-05, "loss": 0.3776, "step": 6537 }, { "epoch": 0.36611042669951843, "grad_norm": 1.1040349006652832, "learning_rate": 3.2675e-05, "loss": 0.4356, "step": 6538 }, { "epoch": 0.36616642401164745, "grad_norm": 1.3455814123153687, "learning_rate": 3.268e-05, "loss": 0.4472, "step": 6539 }, { "epoch": 0.36622242132377647, "grad_norm": 1.3614933490753174, "learning_rate": 3.2684999999999996e-05, "loss": 0.6831, "step": 6540 }, { "epoch": 0.3662784186359055, "grad_norm": 1.1957603693008423, "learning_rate": 3.269000000000001e-05, "loss": 0.5281, "step": 6541 }, { "epoch": 0.3663344159480345, "grad_norm": 1.596968173980713, "learning_rate": 3.2695000000000005e-05, "loss": 0.4646, "step": 6542 }, { "epoch": 0.3663904132601635, "grad_norm": 1.2392882108688354, "learning_rate": 3.27e-05, "loss": 0.5475, "step": 6543 }, { "epoch": 0.36644641057229255, "grad_norm": 1.2253211736679077, "learning_rate": 3.2705e-05, "loss": 0.3998, "step": 6544 }, { "epoch": 0.36650240788442157, "grad_norm": 1.2598199844360352, "learning_rate": 3.2710000000000004e-05, "loss": 0.3963, "step": 6545 }, { "epoch": 0.3665584051965506, "grad_norm": 1.208227276802063, "learning_rate": 3.2715e-05, "loss": 0.5363, "step": 6546 }, { "epoch": 0.3666144025086796, "grad_norm": 1.0527280569076538, "learning_rate": 3.272e-05, "loss": 0.3906, "step": 6547 }, { "epoch": 0.3666703998208086, "grad_norm": 1.2148113250732422, "learning_rate": 3.2725e-05, "loss": 0.4611, "step": 6548 }, { "epoch": 0.36672639713293764, "grad_norm": 1.420346736907959, "learning_rate": 3.273e-05, "loss": 0.5132, "step": 6549 }, { "epoch": 0.36678239444506666, "grad_norm": 1.3327460289001465, "learning_rate": 3.2735e-05, "loss": 0.3674, "step": 6550 }, { "epoch": 0.3668383917571957, "grad_norm": 1.1744295358657837, "learning_rate": 3.274e-05, "loss": 0.3614, "step": 6551 }, { "epoch": 0.3668943890693247, "grad_norm": 1.3436514139175415, "learning_rate": 3.2745000000000006e-05, "loss": 0.3978, "step": 6552 }, { "epoch": 0.36695038638145366, "grad_norm": 1.2072014808654785, "learning_rate": 3.275e-05, "loss": 0.3727, "step": 6553 }, { "epoch": 0.3670063836935827, "grad_norm": 1.1398029327392578, "learning_rate": 3.2755e-05, "loss": 0.4489, "step": 6554 }, { "epoch": 0.3670623810057117, "grad_norm": 1.4085496664047241, "learning_rate": 3.2760000000000005e-05, "loss": 0.694, "step": 6555 }, { "epoch": 0.3671183783178407, "grad_norm": 1.164756417274475, "learning_rate": 3.2765e-05, "loss": 0.4144, "step": 6556 }, { "epoch": 0.36717437562996974, "grad_norm": 1.3206162452697754, "learning_rate": 3.277e-05, "loss": 0.4299, "step": 6557 }, { "epoch": 0.36723037294209876, "grad_norm": 1.059890866279602, "learning_rate": 3.2775e-05, "loss": 0.3972, "step": 6558 }, { "epoch": 0.3672863702542278, "grad_norm": 1.2624162435531616, "learning_rate": 3.278e-05, "loss": 0.4341, "step": 6559 }, { "epoch": 0.3673423675663568, "grad_norm": 1.3863948583602905, "learning_rate": 3.2785e-05, "loss": 0.5112, "step": 6560 }, { "epoch": 0.3673983648784858, "grad_norm": 1.219224214553833, "learning_rate": 3.279e-05, "loss": 0.4561, "step": 6561 }, { "epoch": 0.36745436219061484, "grad_norm": 1.2431224584579468, "learning_rate": 3.2795e-05, "loss": 0.4304, "step": 6562 }, { "epoch": 0.36751035950274386, "grad_norm": 1.2224704027175903, "learning_rate": 3.2800000000000004e-05, "loss": 0.3912, "step": 6563 }, { "epoch": 0.3675663568148729, "grad_norm": 2.1497962474823, "learning_rate": 3.2805e-05, "loss": 0.668, "step": 6564 }, { "epoch": 0.3676223541270019, "grad_norm": 1.4057832956314087, "learning_rate": 3.281e-05, "loss": 0.7726, "step": 6565 }, { "epoch": 0.3676783514391309, "grad_norm": 1.4491353034973145, "learning_rate": 3.2815000000000003e-05, "loss": 0.4135, "step": 6566 }, { "epoch": 0.36773434875125993, "grad_norm": 1.0267125368118286, "learning_rate": 3.282e-05, "loss": 0.3927, "step": 6567 }, { "epoch": 0.36779034606338895, "grad_norm": 1.2104021310806274, "learning_rate": 3.2825e-05, "loss": 0.3914, "step": 6568 }, { "epoch": 0.36784634337551797, "grad_norm": 1.4252570867538452, "learning_rate": 3.283e-05, "loss": 0.4011, "step": 6569 }, { "epoch": 0.367902340687647, "grad_norm": 1.1146646738052368, "learning_rate": 3.2835e-05, "loss": 0.5136, "step": 6570 }, { "epoch": 0.367958337999776, "grad_norm": 1.1819242238998413, "learning_rate": 3.2840000000000004e-05, "loss": 0.3871, "step": 6571 }, { "epoch": 0.368014335311905, "grad_norm": 1.1665692329406738, "learning_rate": 3.2845e-05, "loss": 0.3557, "step": 6572 }, { "epoch": 0.36807033262403405, "grad_norm": 1.575913906097412, "learning_rate": 3.2850000000000006e-05, "loss": 0.4912, "step": 6573 }, { "epoch": 0.36812632993616307, "grad_norm": 1.8078118562698364, "learning_rate": 3.2855e-05, "loss": 0.3965, "step": 6574 }, { "epoch": 0.3681823272482921, "grad_norm": 1.1450138092041016, "learning_rate": 3.286e-05, "loss": 0.319, "step": 6575 }, { "epoch": 0.3682383245604211, "grad_norm": 1.0713926553726196, "learning_rate": 3.2865000000000005e-05, "loss": 0.3741, "step": 6576 }, { "epoch": 0.3682943218725501, "grad_norm": 1.2838785648345947, "learning_rate": 3.287e-05, "loss": 0.3915, "step": 6577 }, { "epoch": 0.36835031918467914, "grad_norm": 1.1639436483383179, "learning_rate": 3.2875e-05, "loss": 0.4957, "step": 6578 }, { "epoch": 0.36840631649680816, "grad_norm": 1.3116776943206787, "learning_rate": 3.288e-05, "loss": 0.4058, "step": 6579 }, { "epoch": 0.3684623138089372, "grad_norm": 1.3612570762634277, "learning_rate": 3.2885e-05, "loss": 0.4285, "step": 6580 }, { "epoch": 0.3685183111210662, "grad_norm": 1.1726627349853516, "learning_rate": 3.2890000000000005e-05, "loss": 0.3682, "step": 6581 }, { "epoch": 0.3685743084331952, "grad_norm": 1.2552701234817505, "learning_rate": 3.2895e-05, "loss": 0.404, "step": 6582 }, { "epoch": 0.36863030574532424, "grad_norm": 1.3821288347244263, "learning_rate": 3.29e-05, "loss": 0.5341, "step": 6583 }, { "epoch": 0.36868630305745326, "grad_norm": 1.3465137481689453, "learning_rate": 3.2905000000000004e-05, "loss": 0.5916, "step": 6584 }, { "epoch": 0.3687423003695823, "grad_norm": 1.141660451889038, "learning_rate": 3.291e-05, "loss": 0.4408, "step": 6585 }, { "epoch": 0.3687982976817113, "grad_norm": 1.1541905403137207, "learning_rate": 3.2915e-05, "loss": 0.3921, "step": 6586 }, { "epoch": 0.3688542949938403, "grad_norm": 1.2907469272613525, "learning_rate": 3.292e-05, "loss": 0.51, "step": 6587 }, { "epoch": 0.36891029230596933, "grad_norm": 1.2633461952209473, "learning_rate": 3.2925e-05, "loss": 0.4051, "step": 6588 }, { "epoch": 0.36896628961809835, "grad_norm": 1.078071117401123, "learning_rate": 3.293e-05, "loss": 0.3231, "step": 6589 }, { "epoch": 0.3690222869302274, "grad_norm": 1.319933295249939, "learning_rate": 3.2935e-05, "loss": 0.4022, "step": 6590 }, { "epoch": 0.3690782842423564, "grad_norm": 1.2563138008117676, "learning_rate": 3.2940000000000006e-05, "loss": 0.4468, "step": 6591 }, { "epoch": 0.3691342815544854, "grad_norm": 1.109421730041504, "learning_rate": 3.2945000000000004e-05, "loss": 0.3579, "step": 6592 }, { "epoch": 0.36919027886661443, "grad_norm": 1.5637937784194946, "learning_rate": 3.295e-05, "loss": 0.5781, "step": 6593 }, { "epoch": 0.3692462761787434, "grad_norm": 1.3978664875030518, "learning_rate": 3.2955000000000006e-05, "loss": 0.4671, "step": 6594 }, { "epoch": 0.3693022734908724, "grad_norm": 1.0309531688690186, "learning_rate": 3.296e-05, "loss": 0.3874, "step": 6595 }, { "epoch": 0.36935827080300143, "grad_norm": 1.4158213138580322, "learning_rate": 3.2965e-05, "loss": 0.3398, "step": 6596 }, { "epoch": 0.36941426811513045, "grad_norm": 1.2440460920333862, "learning_rate": 3.297e-05, "loss": 0.4682, "step": 6597 }, { "epoch": 0.36947026542725947, "grad_norm": 1.157631516456604, "learning_rate": 3.2975e-05, "loss": 0.363, "step": 6598 }, { "epoch": 0.3695262627393885, "grad_norm": 1.1247055530548096, "learning_rate": 3.298e-05, "loss": 0.4482, "step": 6599 }, { "epoch": 0.3695822600515175, "grad_norm": 1.1412831544876099, "learning_rate": 3.2985e-05, "loss": 0.3781, "step": 6600 }, { "epoch": 0.3696382573636465, "grad_norm": 1.1583365201950073, "learning_rate": 3.299e-05, "loss": 0.3765, "step": 6601 }, { "epoch": 0.36969425467577555, "grad_norm": 1.2251805067062378, "learning_rate": 3.2995000000000005e-05, "loss": 0.4022, "step": 6602 }, { "epoch": 0.36975025198790457, "grad_norm": 1.2362018823623657, "learning_rate": 3.3e-05, "loss": 0.5551, "step": 6603 }, { "epoch": 0.3698062493000336, "grad_norm": 1.3730159997940063, "learning_rate": 3.3005e-05, "loss": 0.4925, "step": 6604 }, { "epoch": 0.3698622466121626, "grad_norm": 1.365923285484314, "learning_rate": 3.3010000000000004e-05, "loss": 0.4339, "step": 6605 }, { "epoch": 0.3699182439242916, "grad_norm": 1.1870639324188232, "learning_rate": 3.3015e-05, "loss": 0.3789, "step": 6606 }, { "epoch": 0.36997424123642064, "grad_norm": 4.544864177703857, "learning_rate": 3.302e-05, "loss": 0.5686, "step": 6607 }, { "epoch": 0.37003023854854966, "grad_norm": 1.421617865562439, "learning_rate": 3.3025e-05, "loss": 0.5282, "step": 6608 }, { "epoch": 0.3700862358606787, "grad_norm": 1.3793761730194092, "learning_rate": 3.303e-05, "loss": 0.4925, "step": 6609 }, { "epoch": 0.3701422331728077, "grad_norm": 1.3852094411849976, "learning_rate": 3.3035e-05, "loss": 0.4386, "step": 6610 }, { "epoch": 0.3701982304849367, "grad_norm": 1.2681360244750977, "learning_rate": 3.304e-05, "loss": 0.3768, "step": 6611 }, { "epoch": 0.37025422779706574, "grad_norm": 1.3386889696121216, "learning_rate": 3.3045000000000006e-05, "loss": 0.4566, "step": 6612 }, { "epoch": 0.37031022510919476, "grad_norm": 1.2781789302825928, "learning_rate": 3.3050000000000004e-05, "loss": 0.4486, "step": 6613 }, { "epoch": 0.3703662224213238, "grad_norm": 1.4412894248962402, "learning_rate": 3.3055e-05, "loss": 0.4828, "step": 6614 }, { "epoch": 0.3704222197334528, "grad_norm": 1.2869528532028198, "learning_rate": 3.3060000000000005e-05, "loss": 0.4226, "step": 6615 }, { "epoch": 0.3704782170455818, "grad_norm": 1.3932974338531494, "learning_rate": 3.3065e-05, "loss": 0.4495, "step": 6616 }, { "epoch": 0.37053421435771083, "grad_norm": 1.136078119277954, "learning_rate": 3.307e-05, "loss": 0.4614, "step": 6617 }, { "epoch": 0.37059021166983985, "grad_norm": 1.113147497177124, "learning_rate": 3.3075e-05, "loss": 0.3735, "step": 6618 }, { "epoch": 0.37064620898196887, "grad_norm": 1.1008528470993042, "learning_rate": 3.308e-05, "loss": 0.4377, "step": 6619 }, { "epoch": 0.3707022062940979, "grad_norm": 1.2893664836883545, "learning_rate": 3.3085e-05, "loss": 0.4508, "step": 6620 }, { "epoch": 0.3707582036062269, "grad_norm": 1.1024919748306274, "learning_rate": 3.309e-05, "loss": 0.3329, "step": 6621 }, { "epoch": 0.37081420091835593, "grad_norm": 1.320804476737976, "learning_rate": 3.3095e-05, "loss": 0.4158, "step": 6622 }, { "epoch": 0.37087019823048495, "grad_norm": 1.3588051795959473, "learning_rate": 3.3100000000000005e-05, "loss": 0.5241, "step": 6623 }, { "epoch": 0.37092619554261397, "grad_norm": 1.2875909805297852, "learning_rate": 3.3105e-05, "loss": 0.3818, "step": 6624 }, { "epoch": 0.370982192854743, "grad_norm": 1.308329463005066, "learning_rate": 3.311e-05, "loss": 0.5868, "step": 6625 }, { "epoch": 0.371038190166872, "grad_norm": 1.737322449684143, "learning_rate": 3.3115000000000004e-05, "loss": 0.4847, "step": 6626 }, { "epoch": 0.371094187479001, "grad_norm": 4.365012168884277, "learning_rate": 3.312e-05, "loss": 0.4465, "step": 6627 }, { "epoch": 0.37115018479113004, "grad_norm": 1.3691514730453491, "learning_rate": 3.3125e-05, "loss": 0.4063, "step": 6628 }, { "epoch": 0.37120618210325906, "grad_norm": 1.4230196475982666, "learning_rate": 3.313e-05, "loss": 0.461, "step": 6629 }, { "epoch": 0.3712621794153881, "grad_norm": 1.1951203346252441, "learning_rate": 3.3135e-05, "loss": 0.4172, "step": 6630 }, { "epoch": 0.3713181767275171, "grad_norm": 1.3649755716323853, "learning_rate": 3.314e-05, "loss": 0.3652, "step": 6631 }, { "epoch": 0.3713741740396461, "grad_norm": 1.3521397113800049, "learning_rate": 3.3145e-05, "loss": 0.4778, "step": 6632 }, { "epoch": 0.37143017135177514, "grad_norm": 1.3003733158111572, "learning_rate": 3.3150000000000006e-05, "loss": 0.3717, "step": 6633 }, { "epoch": 0.37148616866390416, "grad_norm": 1.3330631256103516, "learning_rate": 3.3155000000000004e-05, "loss": 0.4405, "step": 6634 }, { "epoch": 0.3715421659760331, "grad_norm": 1.1360352039337158, "learning_rate": 3.316e-05, "loss": 0.4608, "step": 6635 }, { "epoch": 0.37159816328816214, "grad_norm": 1.2262935638427734, "learning_rate": 3.3165e-05, "loss": 0.4187, "step": 6636 }, { "epoch": 0.37165416060029116, "grad_norm": 1.1865484714508057, "learning_rate": 3.317e-05, "loss": 0.403, "step": 6637 }, { "epoch": 0.3717101579124202, "grad_norm": 1.1020766496658325, "learning_rate": 3.3175e-05, "loss": 0.4161, "step": 6638 }, { "epoch": 0.3717661552245492, "grad_norm": 1.1455082893371582, "learning_rate": 3.318e-05, "loss": 0.4625, "step": 6639 }, { "epoch": 0.3718221525366782, "grad_norm": 1.2724734544754028, "learning_rate": 3.3185e-05, "loss": 0.4234, "step": 6640 }, { "epoch": 0.37187814984880724, "grad_norm": 1.2132536172866821, "learning_rate": 3.319e-05, "loss": 0.3902, "step": 6641 }, { "epoch": 0.37193414716093626, "grad_norm": 1.353208303451538, "learning_rate": 3.3195e-05, "loss": 0.5316, "step": 6642 }, { "epoch": 0.3719901444730653, "grad_norm": 1.3749722242355347, "learning_rate": 3.32e-05, "loss": 0.3778, "step": 6643 }, { "epoch": 0.3720461417851943, "grad_norm": 1.0999027490615845, "learning_rate": 3.3205000000000005e-05, "loss": 0.3242, "step": 6644 }, { "epoch": 0.3721021390973233, "grad_norm": 1.3657257556915283, "learning_rate": 3.321e-05, "loss": 0.5076, "step": 6645 }, { "epoch": 0.37215813640945233, "grad_norm": 1.0309100151062012, "learning_rate": 3.3215e-05, "loss": 0.3803, "step": 6646 }, { "epoch": 0.37221413372158135, "grad_norm": 1.1874897480010986, "learning_rate": 3.3220000000000004e-05, "loss": 0.4817, "step": 6647 }, { "epoch": 0.37227013103371037, "grad_norm": 1.455005407333374, "learning_rate": 3.3225e-05, "loss": 0.4271, "step": 6648 }, { "epoch": 0.3723261283458394, "grad_norm": 1.4285304546356201, "learning_rate": 3.323e-05, "loss": 0.3734, "step": 6649 }, { "epoch": 0.3723821256579684, "grad_norm": 1.0313078165054321, "learning_rate": 3.3235e-05, "loss": 0.3734, "step": 6650 }, { "epoch": 0.37243812297009743, "grad_norm": 0.9433008432388306, "learning_rate": 3.324e-05, "loss": 0.318, "step": 6651 }, { "epoch": 0.37249412028222645, "grad_norm": 1.1286393404006958, "learning_rate": 3.3245000000000004e-05, "loss": 0.459, "step": 6652 }, { "epoch": 0.37255011759435547, "grad_norm": 0.9914267659187317, "learning_rate": 3.325e-05, "loss": 0.3964, "step": 6653 }, { "epoch": 0.3726061149064845, "grad_norm": 1.1322031021118164, "learning_rate": 3.3255000000000006e-05, "loss": 0.5777, "step": 6654 }, { "epoch": 0.3726621122186135, "grad_norm": 1.4263185262680054, "learning_rate": 3.3260000000000003e-05, "loss": 0.5518, "step": 6655 }, { "epoch": 0.3727181095307425, "grad_norm": 1.1430028676986694, "learning_rate": 3.3265e-05, "loss": 0.4088, "step": 6656 }, { "epoch": 0.37277410684287154, "grad_norm": 1.332008957862854, "learning_rate": 3.327e-05, "loss": 0.4516, "step": 6657 }, { "epoch": 0.37283010415500056, "grad_norm": 1.0811599493026733, "learning_rate": 3.3275e-05, "loss": 0.3934, "step": 6658 }, { "epoch": 0.3728861014671296, "grad_norm": 1.345110297203064, "learning_rate": 3.328e-05, "loss": 0.4315, "step": 6659 }, { "epoch": 0.3729420987792586, "grad_norm": 1.412089467048645, "learning_rate": 3.3285e-05, "loss": 0.5837, "step": 6660 }, { "epoch": 0.3729980960913876, "grad_norm": 1.514289379119873, "learning_rate": 3.329e-05, "loss": 0.4549, "step": 6661 }, { "epoch": 0.37305409340351664, "grad_norm": 1.1381101608276367, "learning_rate": 3.3295000000000006e-05, "loss": 0.3819, "step": 6662 }, { "epoch": 0.37311009071564566, "grad_norm": 1.1717133522033691, "learning_rate": 3.33e-05, "loss": 0.4161, "step": 6663 }, { "epoch": 0.3731660880277747, "grad_norm": 1.2617908716201782, "learning_rate": 3.3305e-05, "loss": 0.4284, "step": 6664 }, { "epoch": 0.3732220853399037, "grad_norm": 1.1256204843521118, "learning_rate": 3.3310000000000005e-05, "loss": 0.3324, "step": 6665 }, { "epoch": 0.3732780826520327, "grad_norm": 1.3104017972946167, "learning_rate": 3.3315e-05, "loss": 0.603, "step": 6666 }, { "epoch": 0.37333407996416174, "grad_norm": 1.2038390636444092, "learning_rate": 3.332e-05, "loss": 0.5616, "step": 6667 }, { "epoch": 0.37339007727629075, "grad_norm": 1.3213260173797607, "learning_rate": 3.3325000000000004e-05, "loss": 0.4087, "step": 6668 }, { "epoch": 0.3734460745884198, "grad_norm": 1.4307289123535156, "learning_rate": 3.333e-05, "loss": 0.5434, "step": 6669 }, { "epoch": 0.3735020719005488, "grad_norm": 1.0907399654388428, "learning_rate": 3.3335e-05, "loss": 0.3285, "step": 6670 }, { "epoch": 0.3735580692126778, "grad_norm": 1.2606312036514282, "learning_rate": 3.3339999999999996e-05, "loss": 0.3946, "step": 6671 }, { "epoch": 0.37361406652480683, "grad_norm": 1.20966637134552, "learning_rate": 3.334500000000001e-05, "loss": 0.3584, "step": 6672 }, { "epoch": 0.37367006383693585, "grad_norm": 1.0477896928787231, "learning_rate": 3.3350000000000004e-05, "loss": 0.3188, "step": 6673 }, { "epoch": 0.37372606114906487, "grad_norm": 1.3895022869110107, "learning_rate": 3.3355e-05, "loss": 0.6125, "step": 6674 }, { "epoch": 0.3737820584611939, "grad_norm": 1.870864748954773, "learning_rate": 3.336e-05, "loss": 0.4728, "step": 6675 }, { "epoch": 0.3738380557733229, "grad_norm": 1.2265498638153076, "learning_rate": 3.3365e-05, "loss": 0.3698, "step": 6676 }, { "epoch": 0.37389405308545187, "grad_norm": 1.2869940996170044, "learning_rate": 3.337e-05, "loss": 0.5283, "step": 6677 }, { "epoch": 0.3739500503975809, "grad_norm": 1.4047949314117432, "learning_rate": 3.3375e-05, "loss": 0.3931, "step": 6678 }, { "epoch": 0.3740060477097099, "grad_norm": 1.167543888092041, "learning_rate": 3.338e-05, "loss": 0.4388, "step": 6679 }, { "epoch": 0.37406204502183893, "grad_norm": 1.3333889245986938, "learning_rate": 3.3385e-05, "loss": 0.3845, "step": 6680 }, { "epoch": 0.37411804233396795, "grad_norm": 1.2822749614715576, "learning_rate": 3.339e-05, "loss": 0.4201, "step": 6681 }, { "epoch": 0.37417403964609697, "grad_norm": 1.1371928453445435, "learning_rate": 3.3395e-05, "loss": 0.3391, "step": 6682 }, { "epoch": 0.374230036958226, "grad_norm": 1.3467376232147217, "learning_rate": 3.3400000000000005e-05, "loss": 0.4459, "step": 6683 }, { "epoch": 0.374286034270355, "grad_norm": 1.1777925491333008, "learning_rate": 3.3405e-05, "loss": 0.4282, "step": 6684 }, { "epoch": 0.374342031582484, "grad_norm": 1.192771077156067, "learning_rate": 3.341e-05, "loss": 0.4234, "step": 6685 }, { "epoch": 0.37439802889461304, "grad_norm": 1.1964887380599976, "learning_rate": 3.3415000000000004e-05, "loss": 0.4521, "step": 6686 }, { "epoch": 0.37445402620674206, "grad_norm": 1.1602240800857544, "learning_rate": 3.342e-05, "loss": 0.5186, "step": 6687 }, { "epoch": 0.3745100235188711, "grad_norm": 1.2414394617080688, "learning_rate": 3.3425e-05, "loss": 0.3399, "step": 6688 }, { "epoch": 0.3745660208310001, "grad_norm": 1.127994418144226, "learning_rate": 3.3430000000000003e-05, "loss": 0.5001, "step": 6689 }, { "epoch": 0.3746220181431291, "grad_norm": 1.3012959957122803, "learning_rate": 3.3435e-05, "loss": 0.4527, "step": 6690 }, { "epoch": 0.37467801545525814, "grad_norm": 1.074070692062378, "learning_rate": 3.344e-05, "loss": 0.401, "step": 6691 }, { "epoch": 0.37473401276738716, "grad_norm": 1.3016616106033325, "learning_rate": 3.3445e-05, "loss": 0.4612, "step": 6692 }, { "epoch": 0.3747900100795162, "grad_norm": 1.3945709466934204, "learning_rate": 3.345000000000001e-05, "loss": 0.4831, "step": 6693 }, { "epoch": 0.3748460073916452, "grad_norm": 1.1574516296386719, "learning_rate": 3.3455000000000004e-05, "loss": 0.5866, "step": 6694 }, { "epoch": 0.3749020047037742, "grad_norm": 1.5415083169937134, "learning_rate": 3.346e-05, "loss": 0.4111, "step": 6695 }, { "epoch": 0.37495800201590324, "grad_norm": 1.3542542457580566, "learning_rate": 3.3465e-05, "loss": 0.4482, "step": 6696 }, { "epoch": 0.37501399932803225, "grad_norm": 1.5849406719207764, "learning_rate": 3.347e-05, "loss": 0.3569, "step": 6697 }, { "epoch": 0.3750699966401613, "grad_norm": 1.2463269233703613, "learning_rate": 3.3475e-05, "loss": 0.4923, "step": 6698 }, { "epoch": 0.3751259939522903, "grad_norm": 1.2878328561782837, "learning_rate": 3.348e-05, "loss": 0.4517, "step": 6699 }, { "epoch": 0.3751819912644193, "grad_norm": 1.3001095056533813, "learning_rate": 3.3485e-05, "loss": 0.4187, "step": 6700 }, { "epoch": 0.37523798857654833, "grad_norm": 1.1260403394699097, "learning_rate": 3.349e-05, "loss": 0.3583, "step": 6701 }, { "epoch": 0.37529398588867735, "grad_norm": 1.2898516654968262, "learning_rate": 3.3495000000000004e-05, "loss": 0.4044, "step": 6702 }, { "epoch": 0.37534998320080637, "grad_norm": 1.0880471467971802, "learning_rate": 3.35e-05, "loss": 0.3948, "step": 6703 }, { "epoch": 0.3754059805129354, "grad_norm": 0.983112096786499, "learning_rate": 3.3505000000000005e-05, "loss": 0.3603, "step": 6704 }, { "epoch": 0.3754619778250644, "grad_norm": 1.2240639925003052, "learning_rate": 3.351e-05, "loss": 0.4636, "step": 6705 }, { "epoch": 0.3755179751371934, "grad_norm": 1.7151166200637817, "learning_rate": 3.3515e-05, "loss": 0.5167, "step": 6706 }, { "epoch": 0.37557397244932245, "grad_norm": 1.0267263650894165, "learning_rate": 3.3520000000000004e-05, "loss": 0.3929, "step": 6707 }, { "epoch": 0.37562996976145147, "grad_norm": 2.0249452590942383, "learning_rate": 3.3525e-05, "loss": 0.4115, "step": 6708 }, { "epoch": 0.3756859670735805, "grad_norm": 1.2309340238571167, "learning_rate": 3.353e-05, "loss": 0.4488, "step": 6709 }, { "epoch": 0.3757419643857095, "grad_norm": 1.1514673233032227, "learning_rate": 3.3534999999999997e-05, "loss": 0.3519, "step": 6710 }, { "epoch": 0.3757979616978385, "grad_norm": 0.9589496850967407, "learning_rate": 3.354e-05, "loss": 0.4595, "step": 6711 }, { "epoch": 0.37585395900996754, "grad_norm": 1.345346450805664, "learning_rate": 3.3545000000000005e-05, "loss": 0.4561, "step": 6712 }, { "epoch": 0.37590995632209656, "grad_norm": 1.1441172361373901, "learning_rate": 3.355e-05, "loss": 0.3547, "step": 6713 }, { "epoch": 0.3759659536342256, "grad_norm": 1.4294555187225342, "learning_rate": 3.3555e-05, "loss": 0.4995, "step": 6714 }, { "epoch": 0.3760219509463546, "grad_norm": 1.6530765295028687, "learning_rate": 3.3560000000000004e-05, "loss": 0.4534, "step": 6715 }, { "epoch": 0.3760779482584836, "grad_norm": 1.140238642692566, "learning_rate": 3.3565e-05, "loss": 0.4516, "step": 6716 }, { "epoch": 0.37613394557061264, "grad_norm": 1.2830810546875, "learning_rate": 3.357e-05, "loss": 0.4988, "step": 6717 }, { "epoch": 0.3761899428827416, "grad_norm": 1.5989134311676025, "learning_rate": 3.3575e-05, "loss": 0.4608, "step": 6718 }, { "epoch": 0.3762459401948706, "grad_norm": 1.3424721956253052, "learning_rate": 3.358e-05, "loss": 0.6089, "step": 6719 }, { "epoch": 0.37630193750699964, "grad_norm": 1.6010934114456177, "learning_rate": 3.3585e-05, "loss": 0.6814, "step": 6720 }, { "epoch": 0.37635793481912866, "grad_norm": 1.0401952266693115, "learning_rate": 3.359e-05, "loss": 0.3748, "step": 6721 }, { "epoch": 0.3764139321312577, "grad_norm": 1.324708104133606, "learning_rate": 3.3595000000000006e-05, "loss": 0.3935, "step": 6722 }, { "epoch": 0.3764699294433867, "grad_norm": 1.4354658126831055, "learning_rate": 3.3600000000000004e-05, "loss": 0.4073, "step": 6723 }, { "epoch": 0.3765259267555157, "grad_norm": 1.1871925592422485, "learning_rate": 3.3605e-05, "loss": 0.3696, "step": 6724 }, { "epoch": 0.37658192406764474, "grad_norm": 1.34526789188385, "learning_rate": 3.3610000000000005e-05, "loss": 0.4582, "step": 6725 }, { "epoch": 0.37663792137977375, "grad_norm": 1.047061562538147, "learning_rate": 3.3615e-05, "loss": 0.401, "step": 6726 }, { "epoch": 0.3766939186919028, "grad_norm": 1.6513768434524536, "learning_rate": 3.362e-05, "loss": 0.4484, "step": 6727 }, { "epoch": 0.3767499160040318, "grad_norm": 1.3245466947555542, "learning_rate": 3.3625000000000004e-05, "loss": 0.4729, "step": 6728 }, { "epoch": 0.3768059133161608, "grad_norm": 2.3662190437316895, "learning_rate": 3.363e-05, "loss": 0.4553, "step": 6729 }, { "epoch": 0.37686191062828983, "grad_norm": 1.32633638381958, "learning_rate": 3.3635e-05, "loss": 0.4385, "step": 6730 }, { "epoch": 0.37691790794041885, "grad_norm": 1.370506763458252, "learning_rate": 3.3639999999999996e-05, "loss": 0.5247, "step": 6731 }, { "epoch": 0.37697390525254787, "grad_norm": 1.3929905891418457, "learning_rate": 3.364500000000001e-05, "loss": 0.5001, "step": 6732 }, { "epoch": 0.3770299025646769, "grad_norm": 1.440767526626587, "learning_rate": 3.3650000000000005e-05, "loss": 0.5486, "step": 6733 }, { "epoch": 0.3770858998768059, "grad_norm": 1.2727113962173462, "learning_rate": 3.3655e-05, "loss": 0.5041, "step": 6734 }, { "epoch": 0.3771418971889349, "grad_norm": 1.1316595077514648, "learning_rate": 3.366e-05, "loss": 0.4268, "step": 6735 }, { "epoch": 0.37719789450106395, "grad_norm": 1.1441484689712524, "learning_rate": 3.3665000000000004e-05, "loss": 0.3752, "step": 6736 }, { "epoch": 0.37725389181319297, "grad_norm": 1.4344395399093628, "learning_rate": 3.367e-05, "loss": 0.4142, "step": 6737 }, { "epoch": 0.377309889125322, "grad_norm": 1.3655970096588135, "learning_rate": 3.3675e-05, "loss": 0.3492, "step": 6738 }, { "epoch": 0.377365886437451, "grad_norm": 1.1607861518859863, "learning_rate": 3.368e-05, "loss": 0.451, "step": 6739 }, { "epoch": 0.37742188374958, "grad_norm": 3.041842460632324, "learning_rate": 3.3685e-05, "loss": 0.5925, "step": 6740 }, { "epoch": 0.37747788106170904, "grad_norm": 1.3775370121002197, "learning_rate": 3.369e-05, "loss": 0.3979, "step": 6741 }, { "epoch": 0.37753387837383806, "grad_norm": 1.4027009010314941, "learning_rate": 3.3695e-05, "loss": 0.459, "step": 6742 }, { "epoch": 0.3775898756859671, "grad_norm": 1.8847804069519043, "learning_rate": 3.3700000000000006e-05, "loss": 0.6248, "step": 6743 }, { "epoch": 0.3776458729980961, "grad_norm": 1.2024933099746704, "learning_rate": 3.3705000000000003e-05, "loss": 0.4764, "step": 6744 }, { "epoch": 0.3777018703102251, "grad_norm": 1.3369094133377075, "learning_rate": 3.371e-05, "loss": 0.4192, "step": 6745 }, { "epoch": 0.37775786762235414, "grad_norm": 1.276689887046814, "learning_rate": 3.3715000000000005e-05, "loss": 0.4924, "step": 6746 }, { "epoch": 0.37781386493448316, "grad_norm": 1.388626217842102, "learning_rate": 3.372e-05, "loss": 0.4662, "step": 6747 }, { "epoch": 0.3778698622466122, "grad_norm": 1.1112879514694214, "learning_rate": 3.3725e-05, "loss": 0.3071, "step": 6748 }, { "epoch": 0.3779258595587412, "grad_norm": 1.2572911977767944, "learning_rate": 3.373e-05, "loss": 0.4241, "step": 6749 }, { "epoch": 0.3779818568708702, "grad_norm": 2.8464155197143555, "learning_rate": 3.3735e-05, "loss": 0.5338, "step": 6750 }, { "epoch": 0.37803785418299923, "grad_norm": 1.3102375268936157, "learning_rate": 3.374e-05, "loss": 0.3824, "step": 6751 }, { "epoch": 0.37809385149512825, "grad_norm": 1.555628776550293, "learning_rate": 3.3745e-05, "loss": 0.5413, "step": 6752 }, { "epoch": 0.37814984880725727, "grad_norm": 1.146290898323059, "learning_rate": 3.375000000000001e-05, "loss": 0.3548, "step": 6753 }, { "epoch": 0.3782058461193863, "grad_norm": 1.0477023124694824, "learning_rate": 3.3755000000000005e-05, "loss": 0.3244, "step": 6754 }, { "epoch": 0.3782618434315153, "grad_norm": 1.1944644451141357, "learning_rate": 3.376e-05, "loss": 0.4431, "step": 6755 }, { "epoch": 0.37831784074364433, "grad_norm": 1.236586332321167, "learning_rate": 3.3765e-05, "loss": 0.4362, "step": 6756 }, { "epoch": 0.37837383805577335, "grad_norm": 13.946683883666992, "learning_rate": 3.3770000000000004e-05, "loss": 0.399, "step": 6757 }, { "epoch": 0.37842983536790237, "grad_norm": 1.2594490051269531, "learning_rate": 3.3775e-05, "loss": 0.3616, "step": 6758 }, { "epoch": 0.37848583268003133, "grad_norm": 1.4111195802688599, "learning_rate": 3.378e-05, "loss": 0.4966, "step": 6759 }, { "epoch": 0.37854182999216035, "grad_norm": 1.3201584815979004, "learning_rate": 3.3785e-05, "loss": 0.386, "step": 6760 }, { "epoch": 0.37859782730428937, "grad_norm": 1.3126658201217651, "learning_rate": 3.379e-05, "loss": 0.506, "step": 6761 }, { "epoch": 0.3786538246164184, "grad_norm": 1.1481921672821045, "learning_rate": 3.3795e-05, "loss": 0.3953, "step": 6762 }, { "epoch": 0.3787098219285474, "grad_norm": 1.41203773021698, "learning_rate": 3.38e-05, "loss": 0.3521, "step": 6763 }, { "epoch": 0.3787658192406764, "grad_norm": 1.215990424156189, "learning_rate": 3.3805000000000006e-05, "loss": 0.4788, "step": 6764 }, { "epoch": 0.37882181655280545, "grad_norm": 1.1901772022247314, "learning_rate": 3.381e-05, "loss": 0.3711, "step": 6765 }, { "epoch": 0.37887781386493447, "grad_norm": 1.2417351007461548, "learning_rate": 3.3815e-05, "loss": 0.4276, "step": 6766 }, { "epoch": 0.3789338111770635, "grad_norm": 1.1263600587844849, "learning_rate": 3.3820000000000005e-05, "loss": 0.4496, "step": 6767 }, { "epoch": 0.3789898084891925, "grad_norm": 1.247110366821289, "learning_rate": 3.3825e-05, "loss": 0.4089, "step": 6768 }, { "epoch": 0.3790458058013215, "grad_norm": 1.2428250312805176, "learning_rate": 3.383e-05, "loss": 0.4079, "step": 6769 }, { "epoch": 0.37910180311345054, "grad_norm": 1.5003178119659424, "learning_rate": 3.3835e-05, "loss": 0.5265, "step": 6770 }, { "epoch": 0.37915780042557956, "grad_norm": 1.2733747959136963, "learning_rate": 3.384e-05, "loss": 0.4329, "step": 6771 }, { "epoch": 0.3792137977377086, "grad_norm": 1.390162706375122, "learning_rate": 3.3845e-05, "loss": 0.4958, "step": 6772 }, { "epoch": 0.3792697950498376, "grad_norm": 1.2606457471847534, "learning_rate": 3.385e-05, "loss": 0.4001, "step": 6773 }, { "epoch": 0.3793257923619666, "grad_norm": 1.286514401435852, "learning_rate": 3.3855e-05, "loss": 0.3534, "step": 6774 }, { "epoch": 0.37938178967409564, "grad_norm": 1.1558682918548584, "learning_rate": 3.3860000000000004e-05, "loss": 0.4396, "step": 6775 }, { "epoch": 0.37943778698622466, "grad_norm": 1.3537018299102783, "learning_rate": 3.3865e-05, "loss": 0.4584, "step": 6776 }, { "epoch": 0.3794937842983537, "grad_norm": 1.4790765047073364, "learning_rate": 3.387e-05, "loss": 0.5101, "step": 6777 }, { "epoch": 0.3795497816104827, "grad_norm": 1.381075382232666, "learning_rate": 3.3875000000000003e-05, "loss": 0.4182, "step": 6778 }, { "epoch": 0.3796057789226117, "grad_norm": 1.909953236579895, "learning_rate": 3.388e-05, "loss": 0.4372, "step": 6779 }, { "epoch": 0.37966177623474073, "grad_norm": 1.7746893167495728, "learning_rate": 3.3885e-05, "loss": 0.4639, "step": 6780 }, { "epoch": 0.37971777354686975, "grad_norm": 1.4533138275146484, "learning_rate": 3.389e-05, "loss": 0.4153, "step": 6781 }, { "epoch": 0.37977377085899877, "grad_norm": 1.1535168886184692, "learning_rate": 3.3895e-05, "loss": 0.3526, "step": 6782 }, { "epoch": 0.3798297681711278, "grad_norm": 1.2879915237426758, "learning_rate": 3.3900000000000004e-05, "loss": 0.3643, "step": 6783 }, { "epoch": 0.3798857654832568, "grad_norm": 1.2191060781478882, "learning_rate": 3.3905e-05, "loss": 0.5161, "step": 6784 }, { "epoch": 0.37994176279538583, "grad_norm": 1.0810246467590332, "learning_rate": 3.3910000000000006e-05, "loss": 0.4164, "step": 6785 }, { "epoch": 0.37999776010751485, "grad_norm": 1.4238454103469849, "learning_rate": 3.3915e-05, "loss": 0.426, "step": 6786 }, { "epoch": 0.38005375741964387, "grad_norm": 1.1587119102478027, "learning_rate": 3.392e-05, "loss": 0.3992, "step": 6787 }, { "epoch": 0.3801097547317729, "grad_norm": 1.2623403072357178, "learning_rate": 3.3925e-05, "loss": 0.4614, "step": 6788 }, { "epoch": 0.3801657520439019, "grad_norm": 1.398996114730835, "learning_rate": 3.393e-05, "loss": 0.4433, "step": 6789 }, { "epoch": 0.3802217493560309, "grad_norm": 1.2736495733261108, "learning_rate": 3.3935e-05, "loss": 0.3597, "step": 6790 }, { "epoch": 0.38027774666815994, "grad_norm": 1.165753960609436, "learning_rate": 3.394e-05, "loss": 0.3583, "step": 6791 }, { "epoch": 0.38033374398028896, "grad_norm": 1.2651578187942505, "learning_rate": 3.3945e-05, "loss": 0.4531, "step": 6792 }, { "epoch": 0.380389741292418, "grad_norm": 1.4925222396850586, "learning_rate": 3.3950000000000005e-05, "loss": 0.4597, "step": 6793 }, { "epoch": 0.380445738604547, "grad_norm": 1.1772981882095337, "learning_rate": 3.3955e-05, "loss": 0.4551, "step": 6794 }, { "epoch": 0.380501735916676, "grad_norm": 1.0072182416915894, "learning_rate": 3.396e-05, "loss": 0.3487, "step": 6795 }, { "epoch": 0.38055773322880504, "grad_norm": 1.4475287199020386, "learning_rate": 3.3965000000000004e-05, "loss": 0.5036, "step": 6796 }, { "epoch": 0.38061373054093406, "grad_norm": 1.2004691362380981, "learning_rate": 3.397e-05, "loss": 0.4327, "step": 6797 }, { "epoch": 0.3806697278530631, "grad_norm": 1.0906919240951538, "learning_rate": 3.3975e-05, "loss": 0.3438, "step": 6798 }, { "epoch": 0.3807257251651921, "grad_norm": 1.304298996925354, "learning_rate": 3.398e-05, "loss": 0.4037, "step": 6799 }, { "epoch": 0.3807817224773211, "grad_norm": 1.767535924911499, "learning_rate": 3.3985e-05, "loss": 0.5008, "step": 6800 }, { "epoch": 0.3808377197894501, "grad_norm": 1.1377581357955933, "learning_rate": 3.399e-05, "loss": 0.3894, "step": 6801 }, { "epoch": 0.3808937171015791, "grad_norm": 1.1364119052886963, "learning_rate": 3.3995e-05, "loss": 0.433, "step": 6802 }, { "epoch": 0.3809497144137081, "grad_norm": 1.2736361026763916, "learning_rate": 3.4000000000000007e-05, "loss": 0.4333, "step": 6803 }, { "epoch": 0.38100571172583714, "grad_norm": 1.2721407413482666, "learning_rate": 3.4005000000000004e-05, "loss": 0.3837, "step": 6804 }, { "epoch": 0.38106170903796616, "grad_norm": 0.9903769493103027, "learning_rate": 3.401e-05, "loss": 0.3657, "step": 6805 }, { "epoch": 0.3811177063500952, "grad_norm": 1.243751883506775, "learning_rate": 3.4015000000000006e-05, "loss": 0.415, "step": 6806 }, { "epoch": 0.3811737036622242, "grad_norm": 1.1863563060760498, "learning_rate": 3.402e-05, "loss": 0.527, "step": 6807 }, { "epoch": 0.3812297009743532, "grad_norm": 1.2471858263015747, "learning_rate": 3.4025e-05, "loss": 0.5276, "step": 6808 }, { "epoch": 0.38128569828648223, "grad_norm": 1.4560558795928955, "learning_rate": 3.403e-05, "loss": 0.3881, "step": 6809 }, { "epoch": 0.38134169559861125, "grad_norm": 1.310857892036438, "learning_rate": 3.4035e-05, "loss": 0.6719, "step": 6810 }, { "epoch": 0.38139769291074027, "grad_norm": 1.2389583587646484, "learning_rate": 3.404e-05, "loss": 0.4477, "step": 6811 }, { "epoch": 0.3814536902228693, "grad_norm": 1.1277942657470703, "learning_rate": 3.4045e-05, "loss": 0.3727, "step": 6812 }, { "epoch": 0.3815096875349983, "grad_norm": 1.286651611328125, "learning_rate": 3.405e-05, "loss": 0.3979, "step": 6813 }, { "epoch": 0.38156568484712733, "grad_norm": 1.2730367183685303, "learning_rate": 3.4055000000000005e-05, "loss": 0.3411, "step": 6814 }, { "epoch": 0.38162168215925635, "grad_norm": 1.1979669332504272, "learning_rate": 3.406e-05, "loss": 0.3594, "step": 6815 }, { "epoch": 0.38167767947138537, "grad_norm": 1.4920653104782104, "learning_rate": 3.4065e-05, "loss": 0.4879, "step": 6816 }, { "epoch": 0.3817336767835144, "grad_norm": 1.131361722946167, "learning_rate": 3.4070000000000004e-05, "loss": 0.3472, "step": 6817 }, { "epoch": 0.3817896740956434, "grad_norm": 1.2309305667877197, "learning_rate": 3.4075e-05, "loss": 0.3836, "step": 6818 }, { "epoch": 0.3818456714077724, "grad_norm": 1.2341068983078003, "learning_rate": 3.408e-05, "loss": 0.5457, "step": 6819 }, { "epoch": 0.38190166871990144, "grad_norm": 1.188979983329773, "learning_rate": 3.4085e-05, "loss": 0.4289, "step": 6820 }, { "epoch": 0.38195766603203046, "grad_norm": 1.1993087530136108, "learning_rate": 3.409e-05, "loss": 0.5416, "step": 6821 }, { "epoch": 0.3820136633441595, "grad_norm": 0.96775883436203, "learning_rate": 3.4095e-05, "loss": 0.426, "step": 6822 }, { "epoch": 0.3820696606562885, "grad_norm": 2.017106056213379, "learning_rate": 3.41e-05, "loss": 0.5721, "step": 6823 }, { "epoch": 0.3821256579684175, "grad_norm": 1.203216791152954, "learning_rate": 3.4105000000000006e-05, "loss": 0.3714, "step": 6824 }, { "epoch": 0.38218165528054654, "grad_norm": 1.0981453657150269, "learning_rate": 3.4110000000000004e-05, "loss": 0.3762, "step": 6825 }, { "epoch": 0.38223765259267556, "grad_norm": 1.0832266807556152, "learning_rate": 3.4115e-05, "loss": 0.4779, "step": 6826 }, { "epoch": 0.3822936499048046, "grad_norm": 1.519089698791504, "learning_rate": 3.412e-05, "loss": 0.6131, "step": 6827 }, { "epoch": 0.3823496472169336, "grad_norm": 1.205888271331787, "learning_rate": 3.4125e-05, "loss": 0.4936, "step": 6828 }, { "epoch": 0.3824056445290626, "grad_norm": 1.3022352457046509, "learning_rate": 3.413e-05, "loss": 0.4341, "step": 6829 }, { "epoch": 0.38246164184119164, "grad_norm": 1.164675235748291, "learning_rate": 3.4135e-05, "loss": 0.4531, "step": 6830 }, { "epoch": 0.38251763915332065, "grad_norm": 1.1891977787017822, "learning_rate": 3.414e-05, "loss": 0.531, "step": 6831 }, { "epoch": 0.3825736364654497, "grad_norm": 1.1762791872024536, "learning_rate": 3.4145e-05, "loss": 0.4263, "step": 6832 }, { "epoch": 0.3826296337775787, "grad_norm": 1.4778733253479004, "learning_rate": 3.415e-05, "loss": 0.733, "step": 6833 }, { "epoch": 0.3826856310897077, "grad_norm": 1.2638355493545532, "learning_rate": 3.4155e-05, "loss": 0.404, "step": 6834 }, { "epoch": 0.38274162840183673, "grad_norm": 1.3806214332580566, "learning_rate": 3.4160000000000005e-05, "loss": 0.537, "step": 6835 }, { "epoch": 0.38279762571396575, "grad_norm": 1.2141627073287964, "learning_rate": 3.4165e-05, "loss": 0.457, "step": 6836 }, { "epoch": 0.38285362302609477, "grad_norm": 1.233931303024292, "learning_rate": 3.417e-05, "loss": 0.4319, "step": 6837 }, { "epoch": 0.3829096203382238, "grad_norm": 1.4215152263641357, "learning_rate": 3.4175000000000004e-05, "loss": 0.5703, "step": 6838 }, { "epoch": 0.3829656176503528, "grad_norm": 1.1874788999557495, "learning_rate": 3.418e-05, "loss": 0.5177, "step": 6839 }, { "epoch": 0.3830216149624818, "grad_norm": 1.167649269104004, "learning_rate": 3.4185e-05, "loss": 0.3902, "step": 6840 }, { "epoch": 0.38307761227461085, "grad_norm": 1.2854527235031128, "learning_rate": 3.419e-05, "loss": 0.4082, "step": 6841 }, { "epoch": 0.3831336095867398, "grad_norm": 1.080438256263733, "learning_rate": 3.4195e-05, "loss": 0.4204, "step": 6842 }, { "epoch": 0.38318960689886883, "grad_norm": 1.3386930227279663, "learning_rate": 3.4200000000000005e-05, "loss": 0.5462, "step": 6843 }, { "epoch": 0.38324560421099785, "grad_norm": 1.4374494552612305, "learning_rate": 3.4205e-05, "loss": 0.4368, "step": 6844 }, { "epoch": 0.38330160152312687, "grad_norm": 1.7632851600646973, "learning_rate": 3.4210000000000006e-05, "loss": 0.3249, "step": 6845 }, { "epoch": 0.3833575988352559, "grad_norm": 1.5548715591430664, "learning_rate": 3.4215000000000004e-05, "loss": 0.6289, "step": 6846 }, { "epoch": 0.3834135961473849, "grad_norm": 1.3940459489822388, "learning_rate": 3.422e-05, "loss": 0.4671, "step": 6847 }, { "epoch": 0.3834695934595139, "grad_norm": 1.5930670499801636, "learning_rate": 3.4225e-05, "loss": 0.6257, "step": 6848 }, { "epoch": 0.38352559077164294, "grad_norm": 1.5224014520645142, "learning_rate": 3.423e-05, "loss": 0.4133, "step": 6849 }, { "epoch": 0.38358158808377196, "grad_norm": 1.4070916175842285, "learning_rate": 3.4235e-05, "loss": 0.3598, "step": 6850 }, { "epoch": 0.383637585395901, "grad_norm": 1.2294327020645142, "learning_rate": 3.424e-05, "loss": 0.4053, "step": 6851 }, { "epoch": 0.38369358270803, "grad_norm": 1.1390637159347534, "learning_rate": 3.4245e-05, "loss": 0.4106, "step": 6852 }, { "epoch": 0.383749580020159, "grad_norm": 1.3385306596755981, "learning_rate": 3.4250000000000006e-05, "loss": 0.4459, "step": 6853 }, { "epoch": 0.38380557733228804, "grad_norm": 1.1420754194259644, "learning_rate": 3.4255e-05, "loss": 0.3797, "step": 6854 }, { "epoch": 0.38386157464441706, "grad_norm": 1.1515679359436035, "learning_rate": 3.426e-05, "loss": 0.3679, "step": 6855 }, { "epoch": 0.3839175719565461, "grad_norm": 1.1968247890472412, "learning_rate": 3.4265000000000005e-05, "loss": 0.4082, "step": 6856 }, { "epoch": 0.3839735692686751, "grad_norm": 1.1582374572753906, "learning_rate": 3.427e-05, "loss": 0.3743, "step": 6857 }, { "epoch": 0.3840295665808041, "grad_norm": 1.2968791723251343, "learning_rate": 3.4275e-05, "loss": 0.4489, "step": 6858 }, { "epoch": 0.38408556389293314, "grad_norm": 1.2227622270584106, "learning_rate": 3.4280000000000004e-05, "loss": 0.4424, "step": 6859 }, { "epoch": 0.38414156120506215, "grad_norm": 1.312872290611267, "learning_rate": 3.4285e-05, "loss": 0.4034, "step": 6860 }, { "epoch": 0.3841975585171912, "grad_norm": 1.2574517726898193, "learning_rate": 3.429e-05, "loss": 0.4268, "step": 6861 }, { "epoch": 0.3842535558293202, "grad_norm": 1.277334451675415, "learning_rate": 3.4294999999999996e-05, "loss": 0.4557, "step": 6862 }, { "epoch": 0.3843095531414492, "grad_norm": 1.556626558303833, "learning_rate": 3.430000000000001e-05, "loss": 0.851, "step": 6863 }, { "epoch": 0.38436555045357823, "grad_norm": 2.4134163856506348, "learning_rate": 3.4305000000000004e-05, "loss": 0.49, "step": 6864 }, { "epoch": 0.38442154776570725, "grad_norm": 1.1647013425827026, "learning_rate": 3.431e-05, "loss": 0.6369, "step": 6865 }, { "epoch": 0.38447754507783627, "grad_norm": 1.2150636911392212, "learning_rate": 3.4315000000000006e-05, "loss": 0.5115, "step": 6866 }, { "epoch": 0.3845335423899653, "grad_norm": 1.3191980123519897, "learning_rate": 3.4320000000000003e-05, "loss": 0.428, "step": 6867 }, { "epoch": 0.3845895397020943, "grad_norm": 1.2424001693725586, "learning_rate": 3.4325e-05, "loss": 0.4363, "step": 6868 }, { "epoch": 0.3846455370142233, "grad_norm": 1.0711196660995483, "learning_rate": 3.433e-05, "loss": 0.45, "step": 6869 }, { "epoch": 0.38470153432635235, "grad_norm": 1.1021277904510498, "learning_rate": 3.4335e-05, "loss": 0.3696, "step": 6870 }, { "epoch": 0.38475753163848136, "grad_norm": 1.745692491531372, "learning_rate": 3.434e-05, "loss": 0.4261, "step": 6871 }, { "epoch": 0.3848135289506104, "grad_norm": 1.2101738452911377, "learning_rate": 3.4345e-05, "loss": 0.4441, "step": 6872 }, { "epoch": 0.3848695262627394, "grad_norm": 1.2984495162963867, "learning_rate": 3.435e-05, "loss": 0.6471, "step": 6873 }, { "epoch": 0.3849255235748684, "grad_norm": 1.0604979991912842, "learning_rate": 3.4355000000000006e-05, "loss": 0.4003, "step": 6874 }, { "epoch": 0.38498152088699744, "grad_norm": 1.4221092462539673, "learning_rate": 3.436e-05, "loss": 0.4043, "step": 6875 }, { "epoch": 0.38503751819912646, "grad_norm": 1.2192177772521973, "learning_rate": 3.4365e-05, "loss": 0.4698, "step": 6876 }, { "epoch": 0.3850935155112555, "grad_norm": 1.405113697052002, "learning_rate": 3.4370000000000005e-05, "loss": 0.4693, "step": 6877 }, { "epoch": 0.3851495128233845, "grad_norm": 1.3599504232406616, "learning_rate": 3.4375e-05, "loss": 0.4721, "step": 6878 }, { "epoch": 0.3852055101355135, "grad_norm": 2.1586360931396484, "learning_rate": 3.438e-05, "loss": 0.4071, "step": 6879 }, { "epoch": 0.38526150744764254, "grad_norm": 1.1666386127471924, "learning_rate": 3.4385000000000004e-05, "loss": 0.4566, "step": 6880 }, { "epoch": 0.38531750475977156, "grad_norm": 1.0913177728652954, "learning_rate": 3.439e-05, "loss": 0.3272, "step": 6881 }, { "epoch": 0.3853735020719006, "grad_norm": 1.3614965677261353, "learning_rate": 3.4395e-05, "loss": 0.3895, "step": 6882 }, { "epoch": 0.38542949938402954, "grad_norm": 1.186721920967102, "learning_rate": 3.4399999999999996e-05, "loss": 0.364, "step": 6883 }, { "epoch": 0.38548549669615856, "grad_norm": 1.7788968086242676, "learning_rate": 3.440500000000001e-05, "loss": 0.8067, "step": 6884 }, { "epoch": 0.3855414940082876, "grad_norm": 1.1849788427352905, "learning_rate": 3.4410000000000004e-05, "loss": 0.4643, "step": 6885 }, { "epoch": 0.3855974913204166, "grad_norm": 1.4890176057815552, "learning_rate": 3.4415e-05, "loss": 0.4669, "step": 6886 }, { "epoch": 0.3856534886325456, "grad_norm": 1.48883056640625, "learning_rate": 3.442e-05, "loss": 0.3893, "step": 6887 }, { "epoch": 0.38570948594467463, "grad_norm": 1.2005877494812012, "learning_rate": 3.4425e-05, "loss": 0.5971, "step": 6888 }, { "epoch": 0.38576548325680365, "grad_norm": 1.5162949562072754, "learning_rate": 3.443e-05, "loss": 0.4271, "step": 6889 }, { "epoch": 0.3858214805689327, "grad_norm": 1.1552103757858276, "learning_rate": 3.4435e-05, "loss": 0.37, "step": 6890 }, { "epoch": 0.3858774778810617, "grad_norm": 1.5426958799362183, "learning_rate": 3.444e-05, "loss": 0.4643, "step": 6891 }, { "epoch": 0.3859334751931907, "grad_norm": 2.051938056945801, "learning_rate": 3.4445e-05, "loss": 0.4913, "step": 6892 }, { "epoch": 0.38598947250531973, "grad_norm": 1.3490667343139648, "learning_rate": 3.445e-05, "loss": 0.4701, "step": 6893 }, { "epoch": 0.38604546981744875, "grad_norm": 1.3706315755844116, "learning_rate": 3.4455e-05, "loss": 0.5357, "step": 6894 }, { "epoch": 0.38610146712957777, "grad_norm": 1.1113626956939697, "learning_rate": 3.4460000000000005e-05, "loss": 0.355, "step": 6895 }, { "epoch": 0.3861574644417068, "grad_norm": 3.260390281677246, "learning_rate": 3.4465e-05, "loss": 0.47, "step": 6896 }, { "epoch": 0.3862134617538358, "grad_norm": 1.1313581466674805, "learning_rate": 3.447e-05, "loss": 0.3574, "step": 6897 }, { "epoch": 0.3862694590659648, "grad_norm": 1.3877431154251099, "learning_rate": 3.4475000000000005e-05, "loss": 0.5123, "step": 6898 }, { "epoch": 0.38632545637809385, "grad_norm": 1.3723994493484497, "learning_rate": 3.448e-05, "loss": 0.4964, "step": 6899 }, { "epoch": 0.38638145369022286, "grad_norm": 1.228934407234192, "learning_rate": 3.4485e-05, "loss": 0.4702, "step": 6900 }, { "epoch": 0.3864374510023519, "grad_norm": 1.1545768976211548, "learning_rate": 3.449e-05, "loss": 0.4698, "step": 6901 }, { "epoch": 0.3864934483144809, "grad_norm": 1.2166193723678589, "learning_rate": 3.4495e-05, "loss": 0.4343, "step": 6902 }, { "epoch": 0.3865494456266099, "grad_norm": 1.4506065845489502, "learning_rate": 3.45e-05, "loss": 0.4393, "step": 6903 }, { "epoch": 0.38660544293873894, "grad_norm": 1.083709716796875, "learning_rate": 3.4505e-05, "loss": 0.3242, "step": 6904 }, { "epoch": 0.38666144025086796, "grad_norm": 1.1156466007232666, "learning_rate": 3.451000000000001e-05, "loss": 0.397, "step": 6905 }, { "epoch": 0.386717437562997, "grad_norm": 1.5354053974151611, "learning_rate": 3.4515000000000004e-05, "loss": 0.5582, "step": 6906 }, { "epoch": 0.386773434875126, "grad_norm": 1.1571729183197021, "learning_rate": 3.452e-05, "loss": 0.3811, "step": 6907 }, { "epoch": 0.386829432187255, "grad_norm": 1.1743639707565308, "learning_rate": 3.4525e-05, "loss": 0.5123, "step": 6908 }, { "epoch": 0.38688542949938404, "grad_norm": 1.0858750343322754, "learning_rate": 3.453e-05, "loss": 0.4123, "step": 6909 }, { "epoch": 0.38694142681151306, "grad_norm": 1.1950575113296509, "learning_rate": 3.4535e-05, "loss": 0.4931, "step": 6910 }, { "epoch": 0.3869974241236421, "grad_norm": 1.3804479837417603, "learning_rate": 3.454e-05, "loss": 0.4097, "step": 6911 }, { "epoch": 0.3870534214357711, "grad_norm": 1.3190300464630127, "learning_rate": 3.4545e-05, "loss": 0.3901, "step": 6912 }, { "epoch": 0.3871094187479001, "grad_norm": 1.4819968938827515, "learning_rate": 3.455e-05, "loss": 0.499, "step": 6913 }, { "epoch": 0.38716541606002913, "grad_norm": 1.4721925258636475, "learning_rate": 3.4555000000000004e-05, "loss": 0.4597, "step": 6914 }, { "epoch": 0.38722141337215815, "grad_norm": 1.1170094013214111, "learning_rate": 3.456e-05, "loss": 0.5047, "step": 6915 }, { "epoch": 0.38727741068428717, "grad_norm": 1.3028054237365723, "learning_rate": 3.4565000000000005e-05, "loss": 0.4317, "step": 6916 }, { "epoch": 0.3873334079964162, "grad_norm": 1.096895456314087, "learning_rate": 3.457e-05, "loss": 0.3691, "step": 6917 }, { "epoch": 0.3873894053085452, "grad_norm": 1.234584927558899, "learning_rate": 3.4575e-05, "loss": 0.4658, "step": 6918 }, { "epoch": 0.38744540262067423, "grad_norm": 1.1240357160568237, "learning_rate": 3.4580000000000004e-05, "loss": 0.3645, "step": 6919 }, { "epoch": 0.38750139993280325, "grad_norm": 2.325113296508789, "learning_rate": 3.4585e-05, "loss": 0.3758, "step": 6920 }, { "epoch": 0.38755739724493227, "grad_norm": 1.31596839427948, "learning_rate": 3.459e-05, "loss": 0.5102, "step": 6921 }, { "epoch": 0.3876133945570613, "grad_norm": 1.3582637310028076, "learning_rate": 3.4594999999999997e-05, "loss": 0.4485, "step": 6922 }, { "epoch": 0.3876693918691903, "grad_norm": 1.4770441055297852, "learning_rate": 3.46e-05, "loss": 0.4524, "step": 6923 }, { "epoch": 0.3877253891813193, "grad_norm": 1.3404148817062378, "learning_rate": 3.4605000000000005e-05, "loss": 0.5851, "step": 6924 }, { "epoch": 0.3877813864934483, "grad_norm": 1.0689105987548828, "learning_rate": 3.461e-05, "loss": 0.3645, "step": 6925 }, { "epoch": 0.3878373838055773, "grad_norm": 1.2504135370254517, "learning_rate": 3.4615e-05, "loss": 0.4709, "step": 6926 }, { "epoch": 0.3878933811177063, "grad_norm": 1.2252495288848877, "learning_rate": 3.4620000000000004e-05, "loss": 0.4627, "step": 6927 }, { "epoch": 0.38794937842983535, "grad_norm": 1.0927207469940186, "learning_rate": 3.4625e-05, "loss": 0.388, "step": 6928 }, { "epoch": 0.38800537574196436, "grad_norm": 1.1858336925506592, "learning_rate": 3.463e-05, "loss": 0.4278, "step": 6929 }, { "epoch": 0.3880613730540934, "grad_norm": 1.2361561059951782, "learning_rate": 3.4635e-05, "loss": 0.3855, "step": 6930 }, { "epoch": 0.3881173703662224, "grad_norm": 1.2411030530929565, "learning_rate": 3.464e-05, "loss": 0.4048, "step": 6931 }, { "epoch": 0.3881733676783514, "grad_norm": 1.1442571878433228, "learning_rate": 3.4645e-05, "loss": 0.4149, "step": 6932 }, { "epoch": 0.38822936499048044, "grad_norm": 1.3089278936386108, "learning_rate": 3.465e-05, "loss": 0.5776, "step": 6933 }, { "epoch": 0.38828536230260946, "grad_norm": 1.1244102716445923, "learning_rate": 3.4655000000000006e-05, "loss": 0.3307, "step": 6934 }, { "epoch": 0.3883413596147385, "grad_norm": 1.507246732711792, "learning_rate": 3.4660000000000004e-05, "loss": 0.5329, "step": 6935 }, { "epoch": 0.3883973569268675, "grad_norm": 4.239734649658203, "learning_rate": 3.4665e-05, "loss": 0.4608, "step": 6936 }, { "epoch": 0.3884533542389965, "grad_norm": 1.2836531400680542, "learning_rate": 3.4670000000000005e-05, "loss": 0.3798, "step": 6937 }, { "epoch": 0.38850935155112554, "grad_norm": 1.5413106679916382, "learning_rate": 3.4675e-05, "loss": 0.5689, "step": 6938 }, { "epoch": 0.38856534886325456, "grad_norm": 1.3127338886260986, "learning_rate": 3.468e-05, "loss": 0.4873, "step": 6939 }, { "epoch": 0.3886213461753836, "grad_norm": 1.2186760902404785, "learning_rate": 3.4685000000000004e-05, "loss": 0.3925, "step": 6940 }, { "epoch": 0.3886773434875126, "grad_norm": 1.1795878410339355, "learning_rate": 3.469e-05, "loss": 0.3932, "step": 6941 }, { "epoch": 0.3887333407996416, "grad_norm": 1.3185460567474365, "learning_rate": 3.4695e-05, "loss": 0.3922, "step": 6942 }, { "epoch": 0.38878933811177063, "grad_norm": 1.2021520137786865, "learning_rate": 3.4699999999999996e-05, "loss": 0.3763, "step": 6943 }, { "epoch": 0.38884533542389965, "grad_norm": 1.2987655401229858, "learning_rate": 3.470500000000001e-05, "loss": 0.5663, "step": 6944 }, { "epoch": 0.38890133273602867, "grad_norm": 1.2102904319763184, "learning_rate": 3.4710000000000005e-05, "loss": 0.4477, "step": 6945 }, { "epoch": 0.3889573300481577, "grad_norm": 1.288582444190979, "learning_rate": 3.4715e-05, "loss": 0.3822, "step": 6946 }, { "epoch": 0.3890133273602867, "grad_norm": 1.1250205039978027, "learning_rate": 3.472e-05, "loss": 0.435, "step": 6947 }, { "epoch": 0.38906932467241573, "grad_norm": 1.4243067502975464, "learning_rate": 3.4725000000000004e-05, "loss": 0.5506, "step": 6948 }, { "epoch": 0.38912532198454475, "grad_norm": 1.173213243484497, "learning_rate": 3.473e-05, "loss": 0.4241, "step": 6949 }, { "epoch": 0.38918131929667377, "grad_norm": 1.061355471611023, "learning_rate": 3.4735e-05, "loss": 0.3489, "step": 6950 }, { "epoch": 0.3892373166088028, "grad_norm": 1.2567192316055298, "learning_rate": 3.474e-05, "loss": 0.5298, "step": 6951 }, { "epoch": 0.3892933139209318, "grad_norm": 1.2359381914138794, "learning_rate": 3.4745e-05, "loss": 0.3426, "step": 6952 }, { "epoch": 0.3893493112330608, "grad_norm": 1.3463321924209595, "learning_rate": 3.475e-05, "loss": 0.412, "step": 6953 }, { "epoch": 0.38940530854518984, "grad_norm": 1.8644763231277466, "learning_rate": 3.4755e-05, "loss": 0.5142, "step": 6954 }, { "epoch": 0.38946130585731886, "grad_norm": 0.9919539093971252, "learning_rate": 3.4760000000000006e-05, "loss": 0.4033, "step": 6955 }, { "epoch": 0.3895173031694479, "grad_norm": 1.1118013858795166, "learning_rate": 3.4765000000000003e-05, "loss": 0.4297, "step": 6956 }, { "epoch": 0.3895733004815769, "grad_norm": 1.2252516746520996, "learning_rate": 3.477e-05, "loss": 0.457, "step": 6957 }, { "epoch": 0.3896292977937059, "grad_norm": 1.1286903619766235, "learning_rate": 3.4775000000000005e-05, "loss": 0.3852, "step": 6958 }, { "epoch": 0.38968529510583494, "grad_norm": 1.4658342599868774, "learning_rate": 3.478e-05, "loss": 0.3999, "step": 6959 }, { "epoch": 0.38974129241796396, "grad_norm": 1.2423683404922485, "learning_rate": 3.4785e-05, "loss": 0.51, "step": 6960 }, { "epoch": 0.389797289730093, "grad_norm": 1.2950979471206665, "learning_rate": 3.479e-05, "loss": 0.3804, "step": 6961 }, { "epoch": 0.389853287042222, "grad_norm": 1.4664896726608276, "learning_rate": 3.4795e-05, "loss": 0.3658, "step": 6962 }, { "epoch": 0.389909284354351, "grad_norm": 1.451002836227417, "learning_rate": 3.48e-05, "loss": 0.5165, "step": 6963 }, { "epoch": 0.38996528166648003, "grad_norm": 1.3125041723251343, "learning_rate": 3.4805e-05, "loss": 0.4174, "step": 6964 }, { "epoch": 0.39002127897860905, "grad_norm": 1.2604563236236572, "learning_rate": 3.481e-05, "loss": 0.3917, "step": 6965 }, { "epoch": 0.390077276290738, "grad_norm": 1.142025113105774, "learning_rate": 3.4815000000000005e-05, "loss": 0.3222, "step": 6966 }, { "epoch": 0.39013327360286704, "grad_norm": 1.1667081117630005, "learning_rate": 3.482e-05, "loss": 0.4721, "step": 6967 }, { "epoch": 0.39018927091499606, "grad_norm": 1.1558924913406372, "learning_rate": 3.4825e-05, "loss": 0.3383, "step": 6968 }, { "epoch": 0.3902452682271251, "grad_norm": 1.4228202104568481, "learning_rate": 3.4830000000000004e-05, "loss": 0.4312, "step": 6969 }, { "epoch": 0.3903012655392541, "grad_norm": 1.2872172594070435, "learning_rate": 3.4835e-05, "loss": 0.4468, "step": 6970 }, { "epoch": 0.3903572628513831, "grad_norm": 1.1830133199691772, "learning_rate": 3.484e-05, "loss": 0.44, "step": 6971 }, { "epoch": 0.39041326016351213, "grad_norm": 3.1067752838134766, "learning_rate": 3.4845e-05, "loss": 0.3564, "step": 6972 }, { "epoch": 0.39046925747564115, "grad_norm": 1.09810209274292, "learning_rate": 3.485e-05, "loss": 0.4307, "step": 6973 }, { "epoch": 0.39052525478777017, "grad_norm": 1.3250813484191895, "learning_rate": 3.4855000000000004e-05, "loss": 0.3495, "step": 6974 }, { "epoch": 0.3905812520998992, "grad_norm": 1.537216067314148, "learning_rate": 3.486e-05, "loss": 0.6629, "step": 6975 }, { "epoch": 0.3906372494120282, "grad_norm": 1.3258821964263916, "learning_rate": 3.4865000000000006e-05, "loss": 0.4447, "step": 6976 }, { "epoch": 0.39069324672415723, "grad_norm": 1.4364511966705322, "learning_rate": 3.487e-05, "loss": 0.535, "step": 6977 }, { "epoch": 0.39074924403628625, "grad_norm": 1.1638134717941284, "learning_rate": 3.4875e-05, "loss": 0.4004, "step": 6978 }, { "epoch": 0.39080524134841527, "grad_norm": 1.2769513130187988, "learning_rate": 3.4880000000000005e-05, "loss": 0.6045, "step": 6979 }, { "epoch": 0.3908612386605443, "grad_norm": 1.2911280393600464, "learning_rate": 3.4885e-05, "loss": 0.4701, "step": 6980 }, { "epoch": 0.3909172359726733, "grad_norm": 1.2202502489089966, "learning_rate": 3.489e-05, "loss": 0.4383, "step": 6981 }, { "epoch": 0.3909732332848023, "grad_norm": 1.4804140329360962, "learning_rate": 3.4895e-05, "loss": 0.381, "step": 6982 }, { "epoch": 0.39102923059693134, "grad_norm": 1.1945704221725464, "learning_rate": 3.49e-05, "loss": 0.4915, "step": 6983 }, { "epoch": 0.39108522790906036, "grad_norm": 1.3579177856445312, "learning_rate": 3.4905000000000005e-05, "loss": 0.5499, "step": 6984 }, { "epoch": 0.3911412252211894, "grad_norm": 1.2986173629760742, "learning_rate": 3.491e-05, "loss": 0.4338, "step": 6985 }, { "epoch": 0.3911972225333184, "grad_norm": 1.363681674003601, "learning_rate": 3.4915e-05, "loss": 0.6142, "step": 6986 }, { "epoch": 0.3912532198454474, "grad_norm": 1.0938702821731567, "learning_rate": 3.4920000000000004e-05, "loss": 0.3773, "step": 6987 }, { "epoch": 0.39130921715757644, "grad_norm": 1.4995903968811035, "learning_rate": 3.4925e-05, "loss": 0.5526, "step": 6988 }, { "epoch": 0.39136521446970546, "grad_norm": 1.4191012382507324, "learning_rate": 3.493e-05, "loss": 0.4109, "step": 6989 }, { "epoch": 0.3914212117818345, "grad_norm": 1.1995792388916016, "learning_rate": 3.4935000000000003e-05, "loss": 0.4719, "step": 6990 }, { "epoch": 0.3914772090939635, "grad_norm": 1.0581377744674683, "learning_rate": 3.494e-05, "loss": 0.3769, "step": 6991 }, { "epoch": 0.3915332064060925, "grad_norm": 1.1604219675064087, "learning_rate": 3.4945e-05, "loss": 0.3578, "step": 6992 }, { "epoch": 0.39158920371822153, "grad_norm": 1.4558964967727661, "learning_rate": 3.495e-05, "loss": 0.672, "step": 6993 }, { "epoch": 0.39164520103035055, "grad_norm": 1.0141115188598633, "learning_rate": 3.495500000000001e-05, "loss": 0.363, "step": 6994 }, { "epoch": 0.3917011983424796, "grad_norm": 1.1952109336853027, "learning_rate": 3.4960000000000004e-05, "loss": 0.4501, "step": 6995 }, { "epoch": 0.3917571956546086, "grad_norm": 1.6617878675460815, "learning_rate": 3.4965e-05, "loss": 0.4786, "step": 6996 }, { "epoch": 0.3918131929667376, "grad_norm": 1.493242621421814, "learning_rate": 3.4970000000000006e-05, "loss": 0.4174, "step": 6997 }, { "epoch": 0.39186919027886663, "grad_norm": 15.541422843933105, "learning_rate": 3.4975e-05, "loss": 0.4603, "step": 6998 }, { "epoch": 0.39192518759099565, "grad_norm": 1.217512607574463, "learning_rate": 3.498e-05, "loss": 0.4167, "step": 6999 }, { "epoch": 0.39198118490312467, "grad_norm": 1.1900042295455933, "learning_rate": 3.4985e-05, "loss": 0.453, "step": 7000 }, { "epoch": 0.3920371822152537, "grad_norm": 1.1864182949066162, "learning_rate": 3.499e-05, "loss": 0.3501, "step": 7001 }, { "epoch": 0.3920931795273827, "grad_norm": 1.2844698429107666, "learning_rate": 3.4995e-05, "loss": 0.4631, "step": 7002 }, { "epoch": 0.3921491768395117, "grad_norm": 1.172251582145691, "learning_rate": 3.5e-05, "loss": 0.4115, "step": 7003 }, { "epoch": 0.39220517415164075, "grad_norm": 1.2889574766159058, "learning_rate": 3.5005e-05, "loss": 0.4025, "step": 7004 }, { "epoch": 0.39226117146376976, "grad_norm": 1.1393121480941772, "learning_rate": 3.5010000000000005e-05, "loss": 0.4025, "step": 7005 }, { "epoch": 0.3923171687758988, "grad_norm": 1.583275556564331, "learning_rate": 3.5015e-05, "loss": 0.6657, "step": 7006 }, { "epoch": 0.39237316608802775, "grad_norm": 1.3567055463790894, "learning_rate": 3.502e-05, "loss": 0.5812, "step": 7007 }, { "epoch": 0.39242916340015677, "grad_norm": 1.3116973638534546, "learning_rate": 3.5025000000000004e-05, "loss": 0.527, "step": 7008 }, { "epoch": 0.3924851607122858, "grad_norm": 1.4356811046600342, "learning_rate": 3.503e-05, "loss": 0.4215, "step": 7009 }, { "epoch": 0.3925411580244148, "grad_norm": 1.354910969734192, "learning_rate": 3.5035e-05, "loss": 0.3817, "step": 7010 }, { "epoch": 0.3925971553365438, "grad_norm": 1.2192323207855225, "learning_rate": 3.504e-05, "loss": 0.3654, "step": 7011 }, { "epoch": 0.39265315264867284, "grad_norm": 1.248289942741394, "learning_rate": 3.5045e-05, "loss": 0.4381, "step": 7012 }, { "epoch": 0.39270914996080186, "grad_norm": 1.6048190593719482, "learning_rate": 3.505e-05, "loss": 0.6216, "step": 7013 }, { "epoch": 0.3927651472729309, "grad_norm": 1.28898024559021, "learning_rate": 3.5055e-05, "loss": 0.5206, "step": 7014 }, { "epoch": 0.3928211445850599, "grad_norm": 1.372096300125122, "learning_rate": 3.5060000000000007e-05, "loss": 0.5168, "step": 7015 }, { "epoch": 0.3928771418971889, "grad_norm": 1.2836592197418213, "learning_rate": 3.5065000000000004e-05, "loss": 0.3966, "step": 7016 }, { "epoch": 0.39293313920931794, "grad_norm": 1.5730881690979004, "learning_rate": 3.507e-05, "loss": 0.4068, "step": 7017 }, { "epoch": 0.39298913652144696, "grad_norm": 1.4202806949615479, "learning_rate": 3.5075000000000006e-05, "loss": 0.5073, "step": 7018 }, { "epoch": 0.393045133833576, "grad_norm": 1.3695437908172607, "learning_rate": 3.508e-05, "loss": 0.4575, "step": 7019 }, { "epoch": 0.393101131145705, "grad_norm": 1.159608006477356, "learning_rate": 3.5085e-05, "loss": 0.4209, "step": 7020 }, { "epoch": 0.393157128457834, "grad_norm": 1.1785948276519775, "learning_rate": 3.509e-05, "loss": 0.3444, "step": 7021 }, { "epoch": 0.39321312576996303, "grad_norm": 1.4187601804733276, "learning_rate": 3.5095e-05, "loss": 0.3834, "step": 7022 }, { "epoch": 0.39326912308209205, "grad_norm": 1.3515288829803467, "learning_rate": 3.51e-05, "loss": 0.6947, "step": 7023 }, { "epoch": 0.3933251203942211, "grad_norm": 1.3403400182724, "learning_rate": 3.5105e-05, "loss": 0.4919, "step": 7024 }, { "epoch": 0.3933811177063501, "grad_norm": 1.4202855825424194, "learning_rate": 3.511e-05, "loss": 0.3962, "step": 7025 }, { "epoch": 0.3934371150184791, "grad_norm": 1.132371187210083, "learning_rate": 3.5115000000000005e-05, "loss": 0.3486, "step": 7026 }, { "epoch": 0.39349311233060813, "grad_norm": 1.3173556327819824, "learning_rate": 3.512e-05, "loss": 0.4557, "step": 7027 }, { "epoch": 0.39354910964273715, "grad_norm": 1.1719353199005127, "learning_rate": 3.5125e-05, "loss": 0.3156, "step": 7028 }, { "epoch": 0.39360510695486617, "grad_norm": 1.105429768562317, "learning_rate": 3.5130000000000004e-05, "loss": 0.3959, "step": 7029 }, { "epoch": 0.3936611042669952, "grad_norm": 1.223471999168396, "learning_rate": 3.5135e-05, "loss": 0.4871, "step": 7030 }, { "epoch": 0.3937171015791242, "grad_norm": 1.2651889324188232, "learning_rate": 3.514e-05, "loss": 0.419, "step": 7031 }, { "epoch": 0.3937730988912532, "grad_norm": 1.2285356521606445, "learning_rate": 3.5145e-05, "loss": 0.4938, "step": 7032 }, { "epoch": 0.39382909620338225, "grad_norm": 1.3484148979187012, "learning_rate": 3.515e-05, "loss": 0.5286, "step": 7033 }, { "epoch": 0.39388509351551126, "grad_norm": 1.1736124753952026, "learning_rate": 3.5155e-05, "loss": 0.3927, "step": 7034 }, { "epoch": 0.3939410908276403, "grad_norm": 1.4963797330856323, "learning_rate": 3.516e-05, "loss": 0.5774, "step": 7035 }, { "epoch": 0.3939970881397693, "grad_norm": 1.4173800945281982, "learning_rate": 3.5165000000000006e-05, "loss": 0.6018, "step": 7036 }, { "epoch": 0.3940530854518983, "grad_norm": 1.1178412437438965, "learning_rate": 3.5170000000000004e-05, "loss": 0.5061, "step": 7037 }, { "epoch": 0.39410908276402734, "grad_norm": 1.119616985321045, "learning_rate": 3.5175e-05, "loss": 0.3985, "step": 7038 }, { "epoch": 0.39416508007615636, "grad_norm": 1.2967212200164795, "learning_rate": 3.518e-05, "loss": 0.4044, "step": 7039 }, { "epoch": 0.3942210773882854, "grad_norm": 1.1283607482910156, "learning_rate": 3.5185e-05, "loss": 0.3502, "step": 7040 }, { "epoch": 0.3942770747004144, "grad_norm": 1.2243571281433105, "learning_rate": 3.519e-05, "loss": 0.474, "step": 7041 }, { "epoch": 0.3943330720125434, "grad_norm": 1.5098562240600586, "learning_rate": 3.5195e-05, "loss": 0.4933, "step": 7042 }, { "epoch": 0.39438906932467244, "grad_norm": 1.3509429693222046, "learning_rate": 3.52e-05, "loss": 0.4049, "step": 7043 }, { "epoch": 0.39444506663680146, "grad_norm": 1.1621174812316895, "learning_rate": 3.5205e-05, "loss": 0.374, "step": 7044 }, { "epoch": 0.3945010639489305, "grad_norm": 1.142918586730957, "learning_rate": 3.5210000000000003e-05, "loss": 0.3954, "step": 7045 }, { "epoch": 0.3945570612610595, "grad_norm": 1.1373687982559204, "learning_rate": 3.5215e-05, "loss": 0.325, "step": 7046 }, { "epoch": 0.3946130585731885, "grad_norm": 1.0039230585098267, "learning_rate": 3.5220000000000005e-05, "loss": 0.408, "step": 7047 }, { "epoch": 0.39466905588531753, "grad_norm": 1.5211772918701172, "learning_rate": 3.5225e-05, "loss": 0.5512, "step": 7048 }, { "epoch": 0.3947250531974465, "grad_norm": 1.3901233673095703, "learning_rate": 3.523e-05, "loss": 0.571, "step": 7049 }, { "epoch": 0.3947810505095755, "grad_norm": 2.409402370452881, "learning_rate": 3.5235000000000004e-05, "loss": 0.3791, "step": 7050 }, { "epoch": 0.39483704782170453, "grad_norm": 1.1406456232070923, "learning_rate": 3.524e-05, "loss": 0.4096, "step": 7051 }, { "epoch": 0.39489304513383355, "grad_norm": 1.1457037925720215, "learning_rate": 3.5245e-05, "loss": 0.376, "step": 7052 }, { "epoch": 0.3949490424459626, "grad_norm": 1.4743554592132568, "learning_rate": 3.525e-05, "loss": 0.5204, "step": 7053 }, { "epoch": 0.3950050397580916, "grad_norm": 1.1844478845596313, "learning_rate": 3.5255e-05, "loss": 0.4048, "step": 7054 }, { "epoch": 0.3950610370702206, "grad_norm": 1.2880264520645142, "learning_rate": 3.5260000000000005e-05, "loss": 0.4198, "step": 7055 }, { "epoch": 0.39511703438234963, "grad_norm": 1.3818327188491821, "learning_rate": 3.5265e-05, "loss": 0.5185, "step": 7056 }, { "epoch": 0.39517303169447865, "grad_norm": 1.1847329139709473, "learning_rate": 3.5270000000000006e-05, "loss": 0.5541, "step": 7057 }, { "epoch": 0.39522902900660767, "grad_norm": 1.0822230577468872, "learning_rate": 3.5275000000000004e-05, "loss": 0.3495, "step": 7058 }, { "epoch": 0.3952850263187367, "grad_norm": 5.522929668426514, "learning_rate": 3.528e-05, "loss": 0.3949, "step": 7059 }, { "epoch": 0.3953410236308657, "grad_norm": 1.1144723892211914, "learning_rate": 3.5285e-05, "loss": 0.3869, "step": 7060 }, { "epoch": 0.3953970209429947, "grad_norm": 1.2210427522659302, "learning_rate": 3.529e-05, "loss": 0.3968, "step": 7061 }, { "epoch": 0.39545301825512374, "grad_norm": 1.253044605255127, "learning_rate": 3.5295e-05, "loss": 0.396, "step": 7062 }, { "epoch": 0.39550901556725276, "grad_norm": 1.2485785484313965, "learning_rate": 3.53e-05, "loss": 0.3947, "step": 7063 }, { "epoch": 0.3955650128793818, "grad_norm": 1.1033822298049927, "learning_rate": 3.5305e-05, "loss": 0.3477, "step": 7064 }, { "epoch": 0.3956210101915108, "grad_norm": 1.2442518472671509, "learning_rate": 3.5310000000000006e-05, "loss": 0.5305, "step": 7065 }, { "epoch": 0.3956770075036398, "grad_norm": 1.0721766948699951, "learning_rate": 3.5315e-05, "loss": 0.3685, "step": 7066 }, { "epoch": 0.39573300481576884, "grad_norm": 1.1637009382247925, "learning_rate": 3.532e-05, "loss": 0.5006, "step": 7067 }, { "epoch": 0.39578900212789786, "grad_norm": 1.4650561809539795, "learning_rate": 3.5325000000000005e-05, "loss": 0.4508, "step": 7068 }, { "epoch": 0.3958449994400269, "grad_norm": 1.0735374689102173, "learning_rate": 3.533e-05, "loss": 0.4061, "step": 7069 }, { "epoch": 0.3959009967521559, "grad_norm": 1.3330549001693726, "learning_rate": 3.5335e-05, "loss": 0.4362, "step": 7070 }, { "epoch": 0.3959569940642849, "grad_norm": 1.250771403312683, "learning_rate": 3.5340000000000004e-05, "loss": 0.5554, "step": 7071 }, { "epoch": 0.39601299137641394, "grad_norm": 1.3783133029937744, "learning_rate": 3.5345e-05, "loss": 0.51, "step": 7072 }, { "epoch": 0.39606898868854296, "grad_norm": 1.0835919380187988, "learning_rate": 3.535e-05, "loss": 0.445, "step": 7073 }, { "epoch": 0.396124986000672, "grad_norm": 1.3178541660308838, "learning_rate": 3.5354999999999996e-05, "loss": 0.4205, "step": 7074 }, { "epoch": 0.396180983312801, "grad_norm": 1.3751288652420044, "learning_rate": 3.536000000000001e-05, "loss": 0.4258, "step": 7075 }, { "epoch": 0.39623698062493, "grad_norm": 1.0545991659164429, "learning_rate": 3.5365000000000004e-05, "loss": 0.3546, "step": 7076 }, { "epoch": 0.39629297793705903, "grad_norm": 1.3124701976776123, "learning_rate": 3.537e-05, "loss": 0.3171, "step": 7077 }, { "epoch": 0.39634897524918805, "grad_norm": 1.2867190837860107, "learning_rate": 3.5375e-05, "loss": 0.4597, "step": 7078 }, { "epoch": 0.39640497256131707, "grad_norm": 1.1991990804672241, "learning_rate": 3.5380000000000003e-05, "loss": 0.4953, "step": 7079 }, { "epoch": 0.3964609698734461, "grad_norm": 1.238351821899414, "learning_rate": 3.5385e-05, "loss": 0.4385, "step": 7080 }, { "epoch": 0.3965169671855751, "grad_norm": 1.2495702505111694, "learning_rate": 3.539e-05, "loss": 0.4106, "step": 7081 }, { "epoch": 0.39657296449770413, "grad_norm": 3.731426477432251, "learning_rate": 3.5395e-05, "loss": 0.4307, "step": 7082 }, { "epoch": 0.39662896180983315, "grad_norm": 1.1573306322097778, "learning_rate": 3.54e-05, "loss": 0.3633, "step": 7083 }, { "epoch": 0.39668495912196217, "grad_norm": 1.0320273637771606, "learning_rate": 3.5405e-05, "loss": 0.3824, "step": 7084 }, { "epoch": 0.3967409564340912, "grad_norm": 1.2094013690948486, "learning_rate": 3.541e-05, "loss": 0.3714, "step": 7085 }, { "epoch": 0.3967969537462202, "grad_norm": 1.1214096546173096, "learning_rate": 3.5415000000000006e-05, "loss": 0.4957, "step": 7086 }, { "epoch": 0.3968529510583492, "grad_norm": 1.2384463548660278, "learning_rate": 3.542e-05, "loss": 0.4128, "step": 7087 }, { "epoch": 0.39690894837047824, "grad_norm": 1.5187432765960693, "learning_rate": 3.5425e-05, "loss": 0.5801, "step": 7088 }, { "epoch": 0.39696494568260726, "grad_norm": 1.1081868410110474, "learning_rate": 3.5430000000000005e-05, "loss": 0.4494, "step": 7089 }, { "epoch": 0.3970209429947362, "grad_norm": 1.179426670074463, "learning_rate": 3.5435e-05, "loss": 0.4237, "step": 7090 }, { "epoch": 0.39707694030686524, "grad_norm": 1.200624942779541, "learning_rate": 3.544e-05, "loss": 0.4315, "step": 7091 }, { "epoch": 0.39713293761899426, "grad_norm": 1.3659522533416748, "learning_rate": 3.5445000000000004e-05, "loss": 0.5273, "step": 7092 }, { "epoch": 0.3971889349311233, "grad_norm": 1.3462220430374146, "learning_rate": 3.545e-05, "loss": 0.3893, "step": 7093 }, { "epoch": 0.3972449322432523, "grad_norm": 1.2855170965194702, "learning_rate": 3.5455e-05, "loss": 0.3949, "step": 7094 }, { "epoch": 0.3973009295553813, "grad_norm": 1.4168190956115723, "learning_rate": 3.546e-05, "loss": 0.4308, "step": 7095 }, { "epoch": 0.39735692686751034, "grad_norm": 1.4847972393035889, "learning_rate": 3.546500000000001e-05, "loss": 0.4734, "step": 7096 }, { "epoch": 0.39741292417963936, "grad_norm": 1.2195695638656616, "learning_rate": 3.5470000000000004e-05, "loss": 0.3766, "step": 7097 }, { "epoch": 0.3974689214917684, "grad_norm": 1.1849185228347778, "learning_rate": 3.5475e-05, "loss": 0.3926, "step": 7098 }, { "epoch": 0.3975249188038974, "grad_norm": 1.1611347198486328, "learning_rate": 3.548e-05, "loss": 0.3221, "step": 7099 }, { "epoch": 0.3975809161160264, "grad_norm": 1.236920952796936, "learning_rate": 3.5485e-05, "loss": 0.5203, "step": 7100 }, { "epoch": 0.39763691342815544, "grad_norm": 1.2539856433868408, "learning_rate": 3.549e-05, "loss": 0.4427, "step": 7101 }, { "epoch": 0.39769291074028446, "grad_norm": 1.3313424587249756, "learning_rate": 3.5495e-05, "loss": 0.517, "step": 7102 }, { "epoch": 0.3977489080524135, "grad_norm": 1.177718162536621, "learning_rate": 3.55e-05, "loss": 0.5253, "step": 7103 }, { "epoch": 0.3978049053645425, "grad_norm": 1.362850546836853, "learning_rate": 3.5505e-05, "loss": 0.421, "step": 7104 }, { "epoch": 0.3978609026766715, "grad_norm": 1.25775146484375, "learning_rate": 3.5510000000000004e-05, "loss": 0.5547, "step": 7105 }, { "epoch": 0.39791689998880053, "grad_norm": 1.2604835033416748, "learning_rate": 3.5515e-05, "loss": 0.4058, "step": 7106 }, { "epoch": 0.39797289730092955, "grad_norm": 1.0059744119644165, "learning_rate": 3.5520000000000006e-05, "loss": 0.3841, "step": 7107 }, { "epoch": 0.39802889461305857, "grad_norm": 1.1106350421905518, "learning_rate": 3.5525e-05, "loss": 0.3949, "step": 7108 }, { "epoch": 0.3980848919251876, "grad_norm": 1.8010390996932983, "learning_rate": 3.553e-05, "loss": 0.7235, "step": 7109 }, { "epoch": 0.3981408892373166, "grad_norm": 1.3277437686920166, "learning_rate": 3.5535000000000005e-05, "loss": 0.4169, "step": 7110 }, { "epoch": 0.39819688654944563, "grad_norm": 1.2066757678985596, "learning_rate": 3.554e-05, "loss": 0.4722, "step": 7111 }, { "epoch": 0.39825288386157465, "grad_norm": 1.8611897230148315, "learning_rate": 3.5545e-05, "loss": 0.452, "step": 7112 }, { "epoch": 0.39830888117370367, "grad_norm": 1.1620473861694336, "learning_rate": 3.555e-05, "loss": 0.498, "step": 7113 }, { "epoch": 0.3983648784858327, "grad_norm": 1.1954268217086792, "learning_rate": 3.5555e-05, "loss": 0.4771, "step": 7114 }, { "epoch": 0.3984208757979617, "grad_norm": 1.1766629219055176, "learning_rate": 3.5560000000000005e-05, "loss": 0.3488, "step": 7115 }, { "epoch": 0.3984768731100907, "grad_norm": 1.1939324140548706, "learning_rate": 3.5565e-05, "loss": 0.4501, "step": 7116 }, { "epoch": 0.39853287042221974, "grad_norm": 0.9701012372970581, "learning_rate": 3.557e-05, "loss": 0.389, "step": 7117 }, { "epoch": 0.39858886773434876, "grad_norm": 1.6646636724472046, "learning_rate": 3.5575000000000004e-05, "loss": 0.5372, "step": 7118 }, { "epoch": 0.3986448650464778, "grad_norm": 1.4646655321121216, "learning_rate": 3.558e-05, "loss": 0.3593, "step": 7119 }, { "epoch": 0.3987008623586068, "grad_norm": 1.3005801439285278, "learning_rate": 3.5585e-05, "loss": 0.5581, "step": 7120 }, { "epoch": 0.3987568596707358, "grad_norm": 1.3204537630081177, "learning_rate": 3.559e-05, "loss": 0.3271, "step": 7121 }, { "epoch": 0.39881285698286484, "grad_norm": 1.1972370147705078, "learning_rate": 3.5595e-05, "loss": 0.6395, "step": 7122 }, { "epoch": 0.39886885429499386, "grad_norm": 1.33364999294281, "learning_rate": 3.56e-05, "loss": 0.4278, "step": 7123 }, { "epoch": 0.3989248516071229, "grad_norm": 1.370723009109497, "learning_rate": 3.5605e-05, "loss": 0.4126, "step": 7124 }, { "epoch": 0.3989808489192519, "grad_norm": 1.1895666122436523, "learning_rate": 3.5610000000000006e-05, "loss": 0.584, "step": 7125 }, { "epoch": 0.3990368462313809, "grad_norm": 1.1378017663955688, "learning_rate": 3.5615000000000004e-05, "loss": 0.3344, "step": 7126 }, { "epoch": 0.39909284354350993, "grad_norm": 1.3302061557769775, "learning_rate": 3.562e-05, "loss": 0.3386, "step": 7127 }, { "epoch": 0.39914884085563895, "grad_norm": 1.2411571741104126, "learning_rate": 3.5625000000000005e-05, "loss": 0.4365, "step": 7128 }, { "epoch": 0.399204838167768, "grad_norm": 1.3164284229278564, "learning_rate": 3.563e-05, "loss": 0.4375, "step": 7129 }, { "epoch": 0.399260835479897, "grad_norm": 1.1458563804626465, "learning_rate": 3.5635e-05, "loss": 0.3704, "step": 7130 }, { "epoch": 0.39931683279202596, "grad_norm": 1.1261699199676514, "learning_rate": 3.5640000000000004e-05, "loss": 0.3955, "step": 7131 }, { "epoch": 0.399372830104155, "grad_norm": 1.163615345954895, "learning_rate": 3.5645e-05, "loss": 0.3663, "step": 7132 }, { "epoch": 0.399428827416284, "grad_norm": 1.8783775568008423, "learning_rate": 3.565e-05, "loss": 0.5242, "step": 7133 }, { "epoch": 0.399484824728413, "grad_norm": 1.1717891693115234, "learning_rate": 3.5654999999999997e-05, "loss": 0.4164, "step": 7134 }, { "epoch": 0.39954082204054203, "grad_norm": 1.0024054050445557, "learning_rate": 3.566e-05, "loss": 0.4352, "step": 7135 }, { "epoch": 0.39959681935267105, "grad_norm": 1.2847247123718262, "learning_rate": 3.5665000000000005e-05, "loss": 0.4291, "step": 7136 }, { "epoch": 0.39965281666480007, "grad_norm": 1.2100944519042969, "learning_rate": 3.567e-05, "loss": 0.4851, "step": 7137 }, { "epoch": 0.3997088139769291, "grad_norm": 1.32924222946167, "learning_rate": 3.5675e-05, "loss": 0.5232, "step": 7138 }, { "epoch": 0.3997648112890581, "grad_norm": 1.2437349557876587, "learning_rate": 3.5680000000000004e-05, "loss": 0.4545, "step": 7139 }, { "epoch": 0.3998208086011871, "grad_norm": 1.652258038520813, "learning_rate": 3.5685e-05, "loss": 0.6215, "step": 7140 }, { "epoch": 0.39987680591331615, "grad_norm": 2.5210049152374268, "learning_rate": 3.569e-05, "loss": 0.5923, "step": 7141 }, { "epoch": 0.39993280322544517, "grad_norm": 1.4418772459030151, "learning_rate": 3.5695e-05, "loss": 0.4272, "step": 7142 }, { "epoch": 0.3999888005375742, "grad_norm": 1.1753559112548828, "learning_rate": 3.57e-05, "loss": 0.4517, "step": 7143 }, { "epoch": 0.4000447978497032, "grad_norm": 1.2177468538284302, "learning_rate": 3.5705e-05, "loss": 0.3785, "step": 7144 }, { "epoch": 0.4001007951618322, "grad_norm": 1.270458698272705, "learning_rate": 3.571e-05, "loss": 0.5715, "step": 7145 }, { "epoch": 0.40015679247396124, "grad_norm": 1.3269884586334229, "learning_rate": 3.5715000000000006e-05, "loss": 0.402, "step": 7146 }, { "epoch": 0.40021278978609026, "grad_norm": 1.286996603012085, "learning_rate": 3.5720000000000004e-05, "loss": 0.4246, "step": 7147 }, { "epoch": 0.4002687870982193, "grad_norm": 1.4019207954406738, "learning_rate": 3.5725e-05, "loss": 0.3634, "step": 7148 }, { "epoch": 0.4003247844103483, "grad_norm": 1.0722886323928833, "learning_rate": 3.5730000000000005e-05, "loss": 0.3802, "step": 7149 }, { "epoch": 0.4003807817224773, "grad_norm": 1.2400652170181274, "learning_rate": 3.5735e-05, "loss": 0.3321, "step": 7150 }, { "epoch": 0.40043677903460634, "grad_norm": 1.1441702842712402, "learning_rate": 3.574e-05, "loss": 0.4854, "step": 7151 }, { "epoch": 0.40049277634673536, "grad_norm": 1.040071964263916, "learning_rate": 3.5745e-05, "loss": 0.3512, "step": 7152 }, { "epoch": 0.4005487736588644, "grad_norm": 1.3732068538665771, "learning_rate": 3.575e-05, "loss": 0.3955, "step": 7153 }, { "epoch": 0.4006047709709934, "grad_norm": 1.2217940092086792, "learning_rate": 3.5755e-05, "loss": 0.5224, "step": 7154 }, { "epoch": 0.4006607682831224, "grad_norm": 1.4754819869995117, "learning_rate": 3.5759999999999996e-05, "loss": 0.5832, "step": 7155 }, { "epoch": 0.40071676559525143, "grad_norm": 1.1872299909591675, "learning_rate": 3.576500000000001e-05, "loss": 0.4427, "step": 7156 }, { "epoch": 0.40077276290738045, "grad_norm": 1.497488021850586, "learning_rate": 3.5770000000000005e-05, "loss": 0.4887, "step": 7157 }, { "epoch": 0.4008287602195095, "grad_norm": 1.1495366096496582, "learning_rate": 3.5775e-05, "loss": 0.4121, "step": 7158 }, { "epoch": 0.4008847575316385, "grad_norm": 1.237281084060669, "learning_rate": 3.578e-05, "loss": 0.4101, "step": 7159 }, { "epoch": 0.4009407548437675, "grad_norm": 1.3852341175079346, "learning_rate": 3.5785000000000004e-05, "loss": 0.4432, "step": 7160 }, { "epoch": 0.40099675215589653, "grad_norm": 1.3871299028396606, "learning_rate": 3.579e-05, "loss": 0.4908, "step": 7161 }, { "epoch": 0.40105274946802555, "grad_norm": 1.0768440961837769, "learning_rate": 3.5795e-05, "loss": 0.3331, "step": 7162 }, { "epoch": 0.40110874678015457, "grad_norm": 1.0352228879928589, "learning_rate": 3.58e-05, "loss": 0.4007, "step": 7163 }, { "epoch": 0.4011647440922836, "grad_norm": 1.4716922044754028, "learning_rate": 3.5805e-05, "loss": 0.5157, "step": 7164 }, { "epoch": 0.4012207414044126, "grad_norm": 1.0265246629714966, "learning_rate": 3.581e-05, "loss": 0.3717, "step": 7165 }, { "epoch": 0.4012767387165416, "grad_norm": 1.1104140281677246, "learning_rate": 3.5815e-05, "loss": 0.3902, "step": 7166 }, { "epoch": 0.40133273602867064, "grad_norm": 1.0913019180297852, "learning_rate": 3.5820000000000006e-05, "loss": 0.4035, "step": 7167 }, { "epoch": 0.40138873334079966, "grad_norm": 1.4303982257843018, "learning_rate": 3.5825000000000003e-05, "loss": 0.4168, "step": 7168 }, { "epoch": 0.4014447306529287, "grad_norm": 3.429379463195801, "learning_rate": 3.583e-05, "loss": 0.7091, "step": 7169 }, { "epoch": 0.4015007279650577, "grad_norm": 1.3319716453552246, "learning_rate": 3.5835000000000005e-05, "loss": 0.421, "step": 7170 }, { "epoch": 0.4015567252771867, "grad_norm": 1.5306086540222168, "learning_rate": 3.584e-05, "loss": 0.4384, "step": 7171 }, { "epoch": 0.40161272258931574, "grad_norm": 1.2060859203338623, "learning_rate": 3.5845e-05, "loss": 0.4859, "step": 7172 }, { "epoch": 0.4016687199014447, "grad_norm": 1.3723657131195068, "learning_rate": 3.585e-05, "loss": 0.3922, "step": 7173 }, { "epoch": 0.4017247172135737, "grad_norm": 1.451207160949707, "learning_rate": 3.5855e-05, "loss": 0.468, "step": 7174 }, { "epoch": 0.40178071452570274, "grad_norm": 1.2338693141937256, "learning_rate": 3.586e-05, "loss": 0.5271, "step": 7175 }, { "epoch": 0.40183671183783176, "grad_norm": 1.2146055698394775, "learning_rate": 3.5865e-05, "loss": 0.4202, "step": 7176 }, { "epoch": 0.4018927091499608, "grad_norm": 1.243673324584961, "learning_rate": 3.587e-05, "loss": 0.4702, "step": 7177 }, { "epoch": 0.4019487064620898, "grad_norm": 1.3768872022628784, "learning_rate": 3.5875000000000005e-05, "loss": 0.4021, "step": 7178 }, { "epoch": 0.4020047037742188, "grad_norm": 1.3724054098129272, "learning_rate": 3.588e-05, "loss": 0.4104, "step": 7179 }, { "epoch": 0.40206070108634784, "grad_norm": 1.1506850719451904, "learning_rate": 3.5885e-05, "loss": 0.344, "step": 7180 }, { "epoch": 0.40211669839847686, "grad_norm": 1.2319324016571045, "learning_rate": 3.5890000000000004e-05, "loss": 0.4383, "step": 7181 }, { "epoch": 0.4021726957106059, "grad_norm": 1.4333292245864868, "learning_rate": 3.5895e-05, "loss": 0.5512, "step": 7182 }, { "epoch": 0.4022286930227349, "grad_norm": 1.2146166563034058, "learning_rate": 3.59e-05, "loss": 0.5041, "step": 7183 }, { "epoch": 0.4022846903348639, "grad_norm": 1.3595854043960571, "learning_rate": 3.5905e-05, "loss": 0.4679, "step": 7184 }, { "epoch": 0.40234068764699293, "grad_norm": 1.2343111038208008, "learning_rate": 3.591e-05, "loss": 0.3349, "step": 7185 }, { "epoch": 0.40239668495912195, "grad_norm": 1.4902734756469727, "learning_rate": 3.5915000000000004e-05, "loss": 0.6286, "step": 7186 }, { "epoch": 0.40245268227125097, "grad_norm": 1.4743362665176392, "learning_rate": 3.592e-05, "loss": 0.4208, "step": 7187 }, { "epoch": 0.40250867958338, "grad_norm": 1.2193795442581177, "learning_rate": 3.5925000000000006e-05, "loss": 0.5368, "step": 7188 }, { "epoch": 0.402564676895509, "grad_norm": 1.2408791780471802, "learning_rate": 3.593e-05, "loss": 0.4042, "step": 7189 }, { "epoch": 0.40262067420763803, "grad_norm": 1.4323499202728271, "learning_rate": 3.5935e-05, "loss": 0.4446, "step": 7190 }, { "epoch": 0.40267667151976705, "grad_norm": 1.3486984968185425, "learning_rate": 3.594e-05, "loss": 0.4092, "step": 7191 }, { "epoch": 0.40273266883189607, "grad_norm": 1.1780016422271729, "learning_rate": 3.5945e-05, "loss": 0.5441, "step": 7192 }, { "epoch": 0.4027886661440251, "grad_norm": 1.5800676345825195, "learning_rate": 3.595e-05, "loss": 0.7775, "step": 7193 }, { "epoch": 0.4028446634561541, "grad_norm": 1.375052809715271, "learning_rate": 3.5955e-05, "loss": 0.4215, "step": 7194 }, { "epoch": 0.4029006607682831, "grad_norm": 1.192807674407959, "learning_rate": 3.596e-05, "loss": 0.4601, "step": 7195 }, { "epoch": 0.40295665808041214, "grad_norm": 1.272210717201233, "learning_rate": 3.5965000000000005e-05, "loss": 0.4006, "step": 7196 }, { "epoch": 0.40301265539254116, "grad_norm": 1.3375262022018433, "learning_rate": 3.597e-05, "loss": 0.5038, "step": 7197 }, { "epoch": 0.4030686527046702, "grad_norm": 1.3216993808746338, "learning_rate": 3.5975e-05, "loss": 0.4402, "step": 7198 }, { "epoch": 0.4031246500167992, "grad_norm": 1.2181096076965332, "learning_rate": 3.5980000000000004e-05, "loss": 0.355, "step": 7199 }, { "epoch": 0.4031806473289282, "grad_norm": 1.183577060699463, "learning_rate": 3.5985e-05, "loss": 0.471, "step": 7200 }, { "epoch": 0.40323664464105724, "grad_norm": 1.8316634893417358, "learning_rate": 3.599e-05, "loss": 0.4463, "step": 7201 }, { "epoch": 0.40329264195318626, "grad_norm": 1.3434016704559326, "learning_rate": 3.5995000000000004e-05, "loss": 0.5099, "step": 7202 }, { "epoch": 0.4033486392653153, "grad_norm": 1.1872845888137817, "learning_rate": 3.6e-05, "loss": 0.3647, "step": 7203 }, { "epoch": 0.4034046365774443, "grad_norm": 1.775923728942871, "learning_rate": 3.6005e-05, "loss": 0.4471, "step": 7204 }, { "epoch": 0.4034606338895733, "grad_norm": 1.5498169660568237, "learning_rate": 3.601e-05, "loss": 0.4645, "step": 7205 }, { "epoch": 0.40351663120170234, "grad_norm": 1.1243666410446167, "learning_rate": 3.601500000000001e-05, "loss": 0.4595, "step": 7206 }, { "epoch": 0.40357262851383136, "grad_norm": 1.136042833328247, "learning_rate": 3.6020000000000004e-05, "loss": 0.5057, "step": 7207 }, { "epoch": 0.4036286258259604, "grad_norm": 1.2992314100265503, "learning_rate": 3.6025e-05, "loss": 0.4082, "step": 7208 }, { "epoch": 0.4036846231380894, "grad_norm": 1.7175463438034058, "learning_rate": 3.6030000000000006e-05, "loss": 0.4009, "step": 7209 }, { "epoch": 0.4037406204502184, "grad_norm": 1.1723322868347168, "learning_rate": 3.6035e-05, "loss": 0.3874, "step": 7210 }, { "epoch": 0.40379661776234743, "grad_norm": 1.2163728475570679, "learning_rate": 3.604e-05, "loss": 0.4348, "step": 7211 }, { "epoch": 0.40385261507447645, "grad_norm": 0.9793835282325745, "learning_rate": 3.6045e-05, "loss": 0.3209, "step": 7212 }, { "epoch": 0.40390861238660547, "grad_norm": 1.1643357276916504, "learning_rate": 3.605e-05, "loss": 0.4987, "step": 7213 }, { "epoch": 0.40396460969873443, "grad_norm": 1.122285008430481, "learning_rate": 3.6055e-05, "loss": 0.4352, "step": 7214 }, { "epoch": 0.40402060701086345, "grad_norm": 1.0734235048294067, "learning_rate": 3.606e-05, "loss": 0.5808, "step": 7215 }, { "epoch": 0.40407660432299247, "grad_norm": 1.249040961265564, "learning_rate": 3.6065e-05, "loss": 0.6445, "step": 7216 }, { "epoch": 0.4041326016351215, "grad_norm": 1.110620379447937, "learning_rate": 3.6070000000000005e-05, "loss": 0.3976, "step": 7217 }, { "epoch": 0.4041885989472505, "grad_norm": 1.151766300201416, "learning_rate": 3.6075e-05, "loss": 0.35, "step": 7218 }, { "epoch": 0.40424459625937953, "grad_norm": 1.2607028484344482, "learning_rate": 3.608e-05, "loss": 0.4196, "step": 7219 }, { "epoch": 0.40430059357150855, "grad_norm": 1.0967851877212524, "learning_rate": 3.6085000000000004e-05, "loss": 0.4373, "step": 7220 }, { "epoch": 0.40435659088363757, "grad_norm": 1.0691070556640625, "learning_rate": 3.609e-05, "loss": 0.3334, "step": 7221 }, { "epoch": 0.4044125881957666, "grad_norm": 1.171049952507019, "learning_rate": 3.6095e-05, "loss": 0.5588, "step": 7222 }, { "epoch": 0.4044685855078956, "grad_norm": 1.1470245122909546, "learning_rate": 3.61e-05, "loss": 0.4066, "step": 7223 }, { "epoch": 0.4045245828200246, "grad_norm": 1.4739209413528442, "learning_rate": 3.6105e-05, "loss": 0.5064, "step": 7224 }, { "epoch": 0.40458058013215364, "grad_norm": 1.940086007118225, "learning_rate": 3.611e-05, "loss": 0.4543, "step": 7225 }, { "epoch": 0.40463657744428266, "grad_norm": 1.4099403619766235, "learning_rate": 3.6115e-05, "loss": 0.4356, "step": 7226 }, { "epoch": 0.4046925747564117, "grad_norm": 1.4548096656799316, "learning_rate": 3.6120000000000007e-05, "loss": 0.5859, "step": 7227 }, { "epoch": 0.4047485720685407, "grad_norm": 1.3317774534225464, "learning_rate": 3.6125000000000004e-05, "loss": 0.4717, "step": 7228 }, { "epoch": 0.4048045693806697, "grad_norm": 1.229290246963501, "learning_rate": 3.613e-05, "loss": 0.4747, "step": 7229 }, { "epoch": 0.40486056669279874, "grad_norm": 1.174689769744873, "learning_rate": 3.6135000000000006e-05, "loss": 0.483, "step": 7230 }, { "epoch": 0.40491656400492776, "grad_norm": 1.3518184423446655, "learning_rate": 3.614e-05, "loss": 0.4043, "step": 7231 }, { "epoch": 0.4049725613170568, "grad_norm": 1.2385170459747314, "learning_rate": 3.6145e-05, "loss": 0.4015, "step": 7232 }, { "epoch": 0.4050285586291858, "grad_norm": 1.234592080116272, "learning_rate": 3.615e-05, "loss": 0.4218, "step": 7233 }, { "epoch": 0.4050845559413148, "grad_norm": 1.2679672241210938, "learning_rate": 3.6155e-05, "loss": 0.4764, "step": 7234 }, { "epoch": 0.40514055325344384, "grad_norm": 1.4722155332565308, "learning_rate": 3.616e-05, "loss": 0.4824, "step": 7235 }, { "epoch": 0.40519655056557285, "grad_norm": 0.9777321219444275, "learning_rate": 3.6165000000000004e-05, "loss": 0.3926, "step": 7236 }, { "epoch": 0.4052525478777019, "grad_norm": 1.7210427522659302, "learning_rate": 3.617e-05, "loss": 0.5682, "step": 7237 }, { "epoch": 0.4053085451898309, "grad_norm": 1.0800691843032837, "learning_rate": 3.6175000000000005e-05, "loss": 0.4105, "step": 7238 }, { "epoch": 0.4053645425019599, "grad_norm": 1.1063060760498047, "learning_rate": 3.618e-05, "loss": 0.392, "step": 7239 }, { "epoch": 0.40542053981408893, "grad_norm": 1.069801688194275, "learning_rate": 3.6185e-05, "loss": 0.3287, "step": 7240 }, { "epoch": 0.40547653712621795, "grad_norm": 1.3083385229110718, "learning_rate": 3.6190000000000004e-05, "loss": 0.495, "step": 7241 }, { "epoch": 0.40553253443834697, "grad_norm": 1.2802073955535889, "learning_rate": 3.6195e-05, "loss": 0.4546, "step": 7242 }, { "epoch": 0.405588531750476, "grad_norm": 1.2145832777023315, "learning_rate": 3.62e-05, "loss": 0.4236, "step": 7243 }, { "epoch": 0.405644529062605, "grad_norm": 1.1301569938659668, "learning_rate": 3.6205e-05, "loss": 0.4901, "step": 7244 }, { "epoch": 0.405700526374734, "grad_norm": 1.6309202909469604, "learning_rate": 3.621e-05, "loss": 0.6519, "step": 7245 }, { "epoch": 0.40575652368686305, "grad_norm": 1.1026517152786255, "learning_rate": 3.6215000000000005e-05, "loss": 0.4012, "step": 7246 }, { "epoch": 0.40581252099899207, "grad_norm": 1.5104278326034546, "learning_rate": 3.622e-05, "loss": 0.4446, "step": 7247 }, { "epoch": 0.4058685183111211, "grad_norm": 1.4630019664764404, "learning_rate": 3.6225000000000006e-05, "loss": 0.4647, "step": 7248 }, { "epoch": 0.4059245156232501, "grad_norm": 1.742742657661438, "learning_rate": 3.6230000000000004e-05, "loss": 0.4791, "step": 7249 }, { "epoch": 0.4059805129353791, "grad_norm": 1.2679790258407593, "learning_rate": 3.6235e-05, "loss": 0.4973, "step": 7250 }, { "epoch": 0.40603651024750814, "grad_norm": 1.2643063068389893, "learning_rate": 3.624e-05, "loss": 0.5072, "step": 7251 }, { "epoch": 0.40609250755963716, "grad_norm": 1.1733382940292358, "learning_rate": 3.6245e-05, "loss": 0.3236, "step": 7252 }, { "epoch": 0.4061485048717662, "grad_norm": 1.5944677591323853, "learning_rate": 3.625e-05, "loss": 0.445, "step": 7253 }, { "epoch": 0.4062045021838952, "grad_norm": 1.2801021337509155, "learning_rate": 3.6255e-05, "loss": 0.5094, "step": 7254 }, { "epoch": 0.40626049949602416, "grad_norm": 1.1235573291778564, "learning_rate": 3.626e-05, "loss": 0.5119, "step": 7255 }, { "epoch": 0.4063164968081532, "grad_norm": 1.1305443048477173, "learning_rate": 3.6265e-05, "loss": 0.4362, "step": 7256 }, { "epoch": 0.4063724941202822, "grad_norm": 1.908765196800232, "learning_rate": 3.6270000000000003e-05, "loss": 0.6886, "step": 7257 }, { "epoch": 0.4064284914324112, "grad_norm": 1.2548452615737915, "learning_rate": 3.6275e-05, "loss": 0.401, "step": 7258 }, { "epoch": 0.40648448874454024, "grad_norm": 1.400665044784546, "learning_rate": 3.6280000000000005e-05, "loss": 0.4434, "step": 7259 }, { "epoch": 0.40654048605666926, "grad_norm": 1.52109694480896, "learning_rate": 3.6285e-05, "loss": 0.525, "step": 7260 }, { "epoch": 0.4065964833687983, "grad_norm": 1.3359694480895996, "learning_rate": 3.629e-05, "loss": 0.4099, "step": 7261 }, { "epoch": 0.4066524806809273, "grad_norm": 1.321385383605957, "learning_rate": 3.6295000000000004e-05, "loss": 0.4814, "step": 7262 }, { "epoch": 0.4067084779930563, "grad_norm": 1.3202811479568481, "learning_rate": 3.63e-05, "loss": 0.3902, "step": 7263 }, { "epoch": 0.40676447530518534, "grad_norm": 1.1171973943710327, "learning_rate": 3.6305e-05, "loss": 0.3772, "step": 7264 }, { "epoch": 0.40682047261731435, "grad_norm": 1.2634572982788086, "learning_rate": 3.6309999999999996e-05, "loss": 0.4029, "step": 7265 }, { "epoch": 0.4068764699294434, "grad_norm": 1.2522668838500977, "learning_rate": 3.6315e-05, "loss": 0.3694, "step": 7266 }, { "epoch": 0.4069324672415724, "grad_norm": 1.335279107093811, "learning_rate": 3.6320000000000005e-05, "loss": 0.5055, "step": 7267 }, { "epoch": 0.4069884645537014, "grad_norm": 1.239645004272461, "learning_rate": 3.6325e-05, "loss": 0.3645, "step": 7268 }, { "epoch": 0.40704446186583043, "grad_norm": 1.522817611694336, "learning_rate": 3.6330000000000006e-05, "loss": 0.4993, "step": 7269 }, { "epoch": 0.40710045917795945, "grad_norm": 1.3160094022750854, "learning_rate": 3.6335000000000004e-05, "loss": 0.5277, "step": 7270 }, { "epoch": 0.40715645649008847, "grad_norm": 1.2404543161392212, "learning_rate": 3.634e-05, "loss": 0.5226, "step": 7271 }, { "epoch": 0.4072124538022175, "grad_norm": 1.3096650838851929, "learning_rate": 3.6345e-05, "loss": 0.4599, "step": 7272 }, { "epoch": 0.4072684511143465, "grad_norm": 1.3579174280166626, "learning_rate": 3.635e-05, "loss": 0.5192, "step": 7273 }, { "epoch": 0.4073244484264755, "grad_norm": 1.1769434213638306, "learning_rate": 3.6355e-05, "loss": 0.3975, "step": 7274 }, { "epoch": 0.40738044573860455, "grad_norm": 1.0503480434417725, "learning_rate": 3.636e-05, "loss": 0.3213, "step": 7275 }, { "epoch": 0.40743644305073357, "grad_norm": 1.0357081890106201, "learning_rate": 3.6365e-05, "loss": 0.3951, "step": 7276 }, { "epoch": 0.4074924403628626, "grad_norm": 1.1854957342147827, "learning_rate": 3.6370000000000006e-05, "loss": 0.3392, "step": 7277 }, { "epoch": 0.4075484376749916, "grad_norm": 1.3929799795150757, "learning_rate": 3.6375e-05, "loss": 0.437, "step": 7278 }, { "epoch": 0.4076044349871206, "grad_norm": 1.2759002447128296, "learning_rate": 3.638e-05, "loss": 0.5598, "step": 7279 }, { "epoch": 0.40766043229924964, "grad_norm": 1.2573679685592651, "learning_rate": 3.6385000000000005e-05, "loss": 0.3609, "step": 7280 }, { "epoch": 0.40771642961137866, "grad_norm": 1.2415151596069336, "learning_rate": 3.639e-05, "loss": 0.4047, "step": 7281 }, { "epoch": 0.4077724269235077, "grad_norm": 1.0531760454177856, "learning_rate": 3.6395e-05, "loss": 0.4488, "step": 7282 }, { "epoch": 0.4078284242356367, "grad_norm": 1.5336650609970093, "learning_rate": 3.6400000000000004e-05, "loss": 0.4572, "step": 7283 }, { "epoch": 0.4078844215477657, "grad_norm": 1.1323792934417725, "learning_rate": 3.6405e-05, "loss": 0.4861, "step": 7284 }, { "epoch": 0.40794041885989474, "grad_norm": 1.2384064197540283, "learning_rate": 3.641e-05, "loss": 0.468, "step": 7285 }, { "epoch": 0.40799641617202376, "grad_norm": 1.3712652921676636, "learning_rate": 3.6414999999999996e-05, "loss": 0.3852, "step": 7286 }, { "epoch": 0.4080524134841528, "grad_norm": 1.0916869640350342, "learning_rate": 3.642000000000001e-05, "loss": 0.3724, "step": 7287 }, { "epoch": 0.4081084107962818, "grad_norm": 1.1505745649337769, "learning_rate": 3.6425000000000004e-05, "loss": 0.3085, "step": 7288 }, { "epoch": 0.4081644081084108, "grad_norm": 1.4061627388000488, "learning_rate": 3.643e-05, "loss": 0.4416, "step": 7289 }, { "epoch": 0.40822040542053983, "grad_norm": 1.1372989416122437, "learning_rate": 3.6435e-05, "loss": 0.4334, "step": 7290 }, { "epoch": 0.40827640273266885, "grad_norm": 1.311160683631897, "learning_rate": 3.6440000000000003e-05, "loss": 0.417, "step": 7291 }, { "epoch": 0.40833240004479787, "grad_norm": 1.776376485824585, "learning_rate": 3.6445e-05, "loss": 0.4312, "step": 7292 }, { "epoch": 0.4083883973569269, "grad_norm": 1.5318913459777832, "learning_rate": 3.645e-05, "loss": 0.4828, "step": 7293 }, { "epoch": 0.4084443946690559, "grad_norm": 1.1611205339431763, "learning_rate": 3.6455e-05, "loss": 0.3558, "step": 7294 }, { "epoch": 0.40850039198118493, "grad_norm": 1.2419720888137817, "learning_rate": 3.646e-05, "loss": 0.5373, "step": 7295 }, { "epoch": 0.40855638929331395, "grad_norm": 1.2146140336990356, "learning_rate": 3.6465e-05, "loss": 0.4384, "step": 7296 }, { "epoch": 0.4086123866054429, "grad_norm": 1.3515194654464722, "learning_rate": 3.647e-05, "loss": 0.4263, "step": 7297 }, { "epoch": 0.40866838391757193, "grad_norm": 1.2659810781478882, "learning_rate": 3.6475000000000006e-05, "loss": 0.4124, "step": 7298 }, { "epoch": 0.40872438122970095, "grad_norm": 1.2609952688217163, "learning_rate": 3.648e-05, "loss": 0.4871, "step": 7299 }, { "epoch": 0.40878037854182997, "grad_norm": 1.183382272720337, "learning_rate": 3.6485e-05, "loss": 0.3869, "step": 7300 }, { "epoch": 0.408836375853959, "grad_norm": 1.1628130674362183, "learning_rate": 3.6490000000000005e-05, "loss": 0.4279, "step": 7301 }, { "epoch": 0.408892373166088, "grad_norm": 1.176312804222107, "learning_rate": 3.6495e-05, "loss": 0.3122, "step": 7302 }, { "epoch": 0.408948370478217, "grad_norm": 1.3133997917175293, "learning_rate": 3.65e-05, "loss": 0.5109, "step": 7303 }, { "epoch": 0.40900436779034605, "grad_norm": 1.2249528169631958, "learning_rate": 3.6505e-05, "loss": 0.4877, "step": 7304 }, { "epoch": 0.40906036510247507, "grad_norm": 1.3445569276809692, "learning_rate": 3.651e-05, "loss": 0.5041, "step": 7305 }, { "epoch": 0.4091163624146041, "grad_norm": 1.1757526397705078, "learning_rate": 3.6515e-05, "loss": 0.4316, "step": 7306 }, { "epoch": 0.4091723597267331, "grad_norm": 1.3491877317428589, "learning_rate": 3.652e-05, "loss": 0.493, "step": 7307 }, { "epoch": 0.4092283570388621, "grad_norm": 1.0845398902893066, "learning_rate": 3.652500000000001e-05, "loss": 0.525, "step": 7308 }, { "epoch": 0.40928435435099114, "grad_norm": 1.3383963108062744, "learning_rate": 3.6530000000000004e-05, "loss": 0.575, "step": 7309 }, { "epoch": 0.40934035166312016, "grad_norm": 1.0717452764511108, "learning_rate": 3.6535e-05, "loss": 0.4511, "step": 7310 }, { "epoch": 0.4093963489752492, "grad_norm": 1.304875373840332, "learning_rate": 3.654e-05, "loss": 0.4678, "step": 7311 }, { "epoch": 0.4094523462873782, "grad_norm": 1.4814187288284302, "learning_rate": 3.6545e-05, "loss": 0.477, "step": 7312 }, { "epoch": 0.4095083435995072, "grad_norm": 1.102741003036499, "learning_rate": 3.655e-05, "loss": 0.4875, "step": 7313 }, { "epoch": 0.40956434091163624, "grad_norm": 1.2406634092330933, "learning_rate": 3.6555e-05, "loss": 0.4823, "step": 7314 }, { "epoch": 0.40962033822376526, "grad_norm": 1.372307300567627, "learning_rate": 3.656e-05, "loss": 0.347, "step": 7315 }, { "epoch": 0.4096763355358943, "grad_norm": 1.330670714378357, "learning_rate": 3.6565e-05, "loss": 0.5762, "step": 7316 }, { "epoch": 0.4097323328480233, "grad_norm": 1.1185837984085083, "learning_rate": 3.6570000000000004e-05, "loss": 0.4308, "step": 7317 }, { "epoch": 0.4097883301601523, "grad_norm": 1.2783232927322388, "learning_rate": 3.6575e-05, "loss": 0.4272, "step": 7318 }, { "epoch": 0.40984432747228133, "grad_norm": 1.0761449337005615, "learning_rate": 3.6580000000000006e-05, "loss": 0.3062, "step": 7319 }, { "epoch": 0.40990032478441035, "grad_norm": 1.0321117639541626, "learning_rate": 3.6585e-05, "loss": 0.3292, "step": 7320 }, { "epoch": 0.40995632209653937, "grad_norm": 1.1968425512313843, "learning_rate": 3.659e-05, "loss": 0.4818, "step": 7321 }, { "epoch": 0.4100123194086684, "grad_norm": 1.2820180654525757, "learning_rate": 3.6595000000000005e-05, "loss": 0.4734, "step": 7322 }, { "epoch": 0.4100683167207974, "grad_norm": 4.161149024963379, "learning_rate": 3.66e-05, "loss": 0.5211, "step": 7323 }, { "epoch": 0.41012431403292643, "grad_norm": 1.2682455778121948, "learning_rate": 3.6605e-05, "loss": 0.4286, "step": 7324 }, { "epoch": 0.41018031134505545, "grad_norm": 1.7455681562423706, "learning_rate": 3.661e-05, "loss": 0.6589, "step": 7325 }, { "epoch": 0.41023630865718447, "grad_norm": 1.2011220455169678, "learning_rate": 3.6615e-05, "loss": 0.3598, "step": 7326 }, { "epoch": 0.4102923059693135, "grad_norm": 1.5919878482818604, "learning_rate": 3.6620000000000005e-05, "loss": 0.4307, "step": 7327 }, { "epoch": 0.4103483032814425, "grad_norm": 1.0798871517181396, "learning_rate": 3.6625e-05, "loss": 0.3439, "step": 7328 }, { "epoch": 0.4104043005935715, "grad_norm": 1.3776450157165527, "learning_rate": 3.663e-05, "loss": 0.4705, "step": 7329 }, { "epoch": 0.41046029790570054, "grad_norm": 1.2709369659423828, "learning_rate": 3.6635000000000004e-05, "loss": 0.3923, "step": 7330 }, { "epoch": 0.41051629521782956, "grad_norm": 1.1078665256500244, "learning_rate": 3.664e-05, "loss": 0.4569, "step": 7331 }, { "epoch": 0.4105722925299586, "grad_norm": 1.228965401649475, "learning_rate": 3.6645e-05, "loss": 0.4598, "step": 7332 }, { "epoch": 0.4106282898420876, "grad_norm": 1.068197250366211, "learning_rate": 3.665e-05, "loss": 0.3491, "step": 7333 }, { "epoch": 0.4106842871542166, "grad_norm": 1.3216149806976318, "learning_rate": 3.6655e-05, "loss": 0.6087, "step": 7334 }, { "epoch": 0.41074028446634564, "grad_norm": 1.2426646947860718, "learning_rate": 3.666e-05, "loss": 0.3737, "step": 7335 }, { "epoch": 0.41079628177847466, "grad_norm": 1.1281229257583618, "learning_rate": 3.6665e-05, "loss": 0.4539, "step": 7336 }, { "epoch": 0.4108522790906037, "grad_norm": 1.2939091920852661, "learning_rate": 3.6670000000000006e-05, "loss": 0.4575, "step": 7337 }, { "epoch": 0.41090827640273264, "grad_norm": 1.2367515563964844, "learning_rate": 3.6675000000000004e-05, "loss": 0.4127, "step": 7338 }, { "epoch": 0.41096427371486166, "grad_norm": 1.260865569114685, "learning_rate": 3.668e-05, "loss": 0.6281, "step": 7339 }, { "epoch": 0.4110202710269907, "grad_norm": 1.2586859464645386, "learning_rate": 3.6685000000000005e-05, "loss": 0.4123, "step": 7340 }, { "epoch": 0.4110762683391197, "grad_norm": 1.1080645322799683, "learning_rate": 3.669e-05, "loss": 0.4533, "step": 7341 }, { "epoch": 0.4111322656512487, "grad_norm": 1.2502843141555786, "learning_rate": 3.6695e-05, "loss": 0.4435, "step": 7342 }, { "epoch": 0.41118826296337774, "grad_norm": 1.20815908908844, "learning_rate": 3.6700000000000004e-05, "loss": 0.4348, "step": 7343 }, { "epoch": 0.41124426027550676, "grad_norm": 1.056998610496521, "learning_rate": 3.6705e-05, "loss": 0.3169, "step": 7344 }, { "epoch": 0.4113002575876358, "grad_norm": 1.3029078245162964, "learning_rate": 3.671e-05, "loss": 0.5681, "step": 7345 }, { "epoch": 0.4113562548997648, "grad_norm": 1.2896829843521118, "learning_rate": 3.6714999999999997e-05, "loss": 0.7173, "step": 7346 }, { "epoch": 0.4114122522118938, "grad_norm": 1.2883481979370117, "learning_rate": 3.672000000000001e-05, "loss": 0.4045, "step": 7347 }, { "epoch": 0.41146824952402283, "grad_norm": 1.1023024320602417, "learning_rate": 3.6725000000000005e-05, "loss": 0.3875, "step": 7348 }, { "epoch": 0.41152424683615185, "grad_norm": 1.1380733251571655, "learning_rate": 3.673e-05, "loss": 0.3352, "step": 7349 }, { "epoch": 0.41158024414828087, "grad_norm": 1.166361689567566, "learning_rate": 3.6735e-05, "loss": 0.5477, "step": 7350 }, { "epoch": 0.4116362414604099, "grad_norm": 1.4538735151290894, "learning_rate": 3.6740000000000004e-05, "loss": 0.4344, "step": 7351 }, { "epoch": 0.4116922387725389, "grad_norm": 1.8776609897613525, "learning_rate": 3.6745e-05, "loss": 0.4424, "step": 7352 }, { "epoch": 0.41174823608466793, "grad_norm": 1.1175944805145264, "learning_rate": 3.675e-05, "loss": 0.4074, "step": 7353 }, { "epoch": 0.41180423339679695, "grad_norm": 2.575025796890259, "learning_rate": 3.6755e-05, "loss": 0.4766, "step": 7354 }, { "epoch": 0.41186023070892597, "grad_norm": 1.4082902669906616, "learning_rate": 3.676e-05, "loss": 0.5161, "step": 7355 }, { "epoch": 0.411916228021055, "grad_norm": 1.2325143814086914, "learning_rate": 3.6765e-05, "loss": 0.4787, "step": 7356 }, { "epoch": 0.411972225333184, "grad_norm": 1.1196098327636719, "learning_rate": 3.677e-05, "loss": 0.3825, "step": 7357 }, { "epoch": 0.412028222645313, "grad_norm": 1.7802287340164185, "learning_rate": 3.6775000000000006e-05, "loss": 0.4514, "step": 7358 }, { "epoch": 0.41208421995744204, "grad_norm": 1.1851742267608643, "learning_rate": 3.6780000000000004e-05, "loss": 0.3664, "step": 7359 }, { "epoch": 0.41214021726957106, "grad_norm": 1.0434215068817139, "learning_rate": 3.6785e-05, "loss": 0.3649, "step": 7360 }, { "epoch": 0.4121962145817001, "grad_norm": 1.520704746246338, "learning_rate": 3.6790000000000005e-05, "loss": 0.5598, "step": 7361 }, { "epoch": 0.4122522118938291, "grad_norm": 1.5654765367507935, "learning_rate": 3.6795e-05, "loss": 0.4388, "step": 7362 }, { "epoch": 0.4123082092059581, "grad_norm": 1.2206003665924072, "learning_rate": 3.68e-05, "loss": 0.5292, "step": 7363 }, { "epoch": 0.41236420651808714, "grad_norm": 1.1813868284225464, "learning_rate": 3.6805e-05, "loss": 0.4441, "step": 7364 }, { "epoch": 0.41242020383021616, "grad_norm": 1.269207239151001, "learning_rate": 3.681e-05, "loss": 0.3842, "step": 7365 }, { "epoch": 0.4124762011423452, "grad_norm": 1.1340774297714233, "learning_rate": 3.6815e-05, "loss": 0.3907, "step": 7366 }, { "epoch": 0.4125321984544742, "grad_norm": 1.211429476737976, "learning_rate": 3.682e-05, "loss": 0.4474, "step": 7367 }, { "epoch": 0.4125881957666032, "grad_norm": 1.2279529571533203, "learning_rate": 3.6825e-05, "loss": 0.6188, "step": 7368 }, { "epoch": 0.41264419307873224, "grad_norm": 1.2425464391708374, "learning_rate": 3.6830000000000005e-05, "loss": 0.3226, "step": 7369 }, { "epoch": 0.41270019039086125, "grad_norm": 1.0841212272644043, "learning_rate": 3.6835e-05, "loss": 0.3205, "step": 7370 }, { "epoch": 0.4127561877029903, "grad_norm": 1.2456731796264648, "learning_rate": 3.684e-05, "loss": 0.4568, "step": 7371 }, { "epoch": 0.4128121850151193, "grad_norm": 1.2336543798446655, "learning_rate": 3.6845000000000004e-05, "loss": 0.4919, "step": 7372 }, { "epoch": 0.4128681823272483, "grad_norm": 1.2150521278381348, "learning_rate": 3.685e-05, "loss": 0.3569, "step": 7373 }, { "epoch": 0.41292417963937733, "grad_norm": 1.4870706796646118, "learning_rate": 3.6855e-05, "loss": 0.4903, "step": 7374 }, { "epoch": 0.41298017695150635, "grad_norm": 1.4366742372512817, "learning_rate": 3.686e-05, "loss": 0.5873, "step": 7375 }, { "epoch": 0.41303617426363537, "grad_norm": 1.1463624238967896, "learning_rate": 3.6865e-05, "loss": 0.3585, "step": 7376 }, { "epoch": 0.4130921715757644, "grad_norm": 1.1975924968719482, "learning_rate": 3.6870000000000004e-05, "loss": 0.4505, "step": 7377 }, { "epoch": 0.4131481688878934, "grad_norm": 1.291324496269226, "learning_rate": 3.6875e-05, "loss": 0.3405, "step": 7378 }, { "epoch": 0.41320416620002237, "grad_norm": 1.1325887441635132, "learning_rate": 3.6880000000000006e-05, "loss": 0.4735, "step": 7379 }, { "epoch": 0.4132601635121514, "grad_norm": 1.3903597593307495, "learning_rate": 3.6885000000000003e-05, "loss": 0.4485, "step": 7380 }, { "epoch": 0.4133161608242804, "grad_norm": 1.4702013731002808, "learning_rate": 3.689e-05, "loss": 0.4377, "step": 7381 }, { "epoch": 0.41337215813640943, "grad_norm": 1.2567180395126343, "learning_rate": 3.6895000000000005e-05, "loss": 0.347, "step": 7382 }, { "epoch": 0.41342815544853845, "grad_norm": 1.202655553817749, "learning_rate": 3.69e-05, "loss": 0.3415, "step": 7383 }, { "epoch": 0.41348415276066747, "grad_norm": 1.4150012731552124, "learning_rate": 3.6905e-05, "loss": 0.4606, "step": 7384 }, { "epoch": 0.4135401500727965, "grad_norm": 1.4006381034851074, "learning_rate": 3.691e-05, "loss": 0.6619, "step": 7385 }, { "epoch": 0.4135961473849255, "grad_norm": 1.134314775466919, "learning_rate": 3.6915e-05, "loss": 0.3623, "step": 7386 }, { "epoch": 0.4136521446970545, "grad_norm": 1.419512152671814, "learning_rate": 3.692e-05, "loss": 0.3783, "step": 7387 }, { "epoch": 0.41370814200918354, "grad_norm": 1.394559383392334, "learning_rate": 3.6925e-05, "loss": 0.3843, "step": 7388 }, { "epoch": 0.41376413932131256, "grad_norm": 1.3517988920211792, "learning_rate": 3.693e-05, "loss": 0.5111, "step": 7389 }, { "epoch": 0.4138201366334416, "grad_norm": 1.3542639017105103, "learning_rate": 3.6935000000000005e-05, "loss": 0.4957, "step": 7390 }, { "epoch": 0.4138761339455706, "grad_norm": 1.3230736255645752, "learning_rate": 3.694e-05, "loss": 0.3711, "step": 7391 }, { "epoch": 0.4139321312576996, "grad_norm": 1.388952374458313, "learning_rate": 3.6945e-05, "loss": 0.3725, "step": 7392 }, { "epoch": 0.41398812856982864, "grad_norm": 1.247883677482605, "learning_rate": 3.6950000000000004e-05, "loss": 0.4009, "step": 7393 }, { "epoch": 0.41404412588195766, "grad_norm": 1.362732172012329, "learning_rate": 3.6955e-05, "loss": 0.4289, "step": 7394 }, { "epoch": 0.4141001231940867, "grad_norm": 1.1690155267715454, "learning_rate": 3.696e-05, "loss": 0.4162, "step": 7395 }, { "epoch": 0.4141561205062157, "grad_norm": 1.3039852380752563, "learning_rate": 3.6965e-05, "loss": 0.624, "step": 7396 }, { "epoch": 0.4142121178183447, "grad_norm": 1.0486277341842651, "learning_rate": 3.697e-05, "loss": 0.4451, "step": 7397 }, { "epoch": 0.41426811513047374, "grad_norm": 1.4914668798446655, "learning_rate": 3.6975000000000004e-05, "loss": 0.4872, "step": 7398 }, { "epoch": 0.41432411244260275, "grad_norm": 1.2971842288970947, "learning_rate": 3.698e-05, "loss": 0.4778, "step": 7399 }, { "epoch": 0.4143801097547318, "grad_norm": 1.277298927307129, "learning_rate": 3.6985000000000006e-05, "loss": 0.4799, "step": 7400 }, { "epoch": 0.4144361070668608, "grad_norm": 1.1096839904785156, "learning_rate": 3.699e-05, "loss": 0.3887, "step": 7401 }, { "epoch": 0.4144921043789898, "grad_norm": 1.6496092081069946, "learning_rate": 3.6995e-05, "loss": 0.3864, "step": 7402 }, { "epoch": 0.41454810169111883, "grad_norm": 1.2638951539993286, "learning_rate": 3.7e-05, "loss": 0.4364, "step": 7403 }, { "epoch": 0.41460409900324785, "grad_norm": 1.452673077583313, "learning_rate": 3.7005e-05, "loss": 0.4836, "step": 7404 }, { "epoch": 0.41466009631537687, "grad_norm": 1.1603468656539917, "learning_rate": 3.701e-05, "loss": 0.4391, "step": 7405 }, { "epoch": 0.4147160936275059, "grad_norm": 1.2490837574005127, "learning_rate": 3.7015e-05, "loss": 0.377, "step": 7406 }, { "epoch": 0.4147720909396349, "grad_norm": 1.301253080368042, "learning_rate": 3.702e-05, "loss": 0.4884, "step": 7407 }, { "epoch": 0.4148280882517639, "grad_norm": 1.1982135772705078, "learning_rate": 3.7025000000000005e-05, "loss": 0.4737, "step": 7408 }, { "epoch": 0.41488408556389295, "grad_norm": 1.099167823791504, "learning_rate": 3.703e-05, "loss": 0.3405, "step": 7409 }, { "epoch": 0.41494008287602197, "grad_norm": 1.174721121788025, "learning_rate": 3.7035e-05, "loss": 0.3617, "step": 7410 }, { "epoch": 0.414996080188151, "grad_norm": 1.277965784072876, "learning_rate": 3.7040000000000005e-05, "loss": 0.4473, "step": 7411 }, { "epoch": 0.41505207750028, "grad_norm": 1.1440922021865845, "learning_rate": 3.7045e-05, "loss": 0.3614, "step": 7412 }, { "epoch": 0.415108074812409, "grad_norm": 1.3988312482833862, "learning_rate": 3.705e-05, "loss": 0.5083, "step": 7413 }, { "epoch": 0.41516407212453804, "grad_norm": 1.0867133140563965, "learning_rate": 3.7055000000000004e-05, "loss": 0.3373, "step": 7414 }, { "epoch": 0.41522006943666706, "grad_norm": 1.3099242448806763, "learning_rate": 3.706e-05, "loss": 0.758, "step": 7415 }, { "epoch": 0.4152760667487961, "grad_norm": 1.2499079704284668, "learning_rate": 3.7065e-05, "loss": 0.4199, "step": 7416 }, { "epoch": 0.4153320640609251, "grad_norm": 1.0558418035507202, "learning_rate": 3.707e-05, "loss": 0.392, "step": 7417 }, { "epoch": 0.4153880613730541, "grad_norm": 1.1931686401367188, "learning_rate": 3.707500000000001e-05, "loss": 0.3167, "step": 7418 }, { "epoch": 0.41544405868518314, "grad_norm": 1.4347862005233765, "learning_rate": 3.7080000000000004e-05, "loss": 0.4799, "step": 7419 }, { "epoch": 0.41550005599731216, "grad_norm": 1.5621248483657837, "learning_rate": 3.7085e-05, "loss": 0.4901, "step": 7420 }, { "epoch": 0.4155560533094411, "grad_norm": 1.4398621320724487, "learning_rate": 3.7090000000000006e-05, "loss": 0.4738, "step": 7421 }, { "epoch": 0.41561205062157014, "grad_norm": 2.3422293663024902, "learning_rate": 3.7095e-05, "loss": 0.5476, "step": 7422 }, { "epoch": 0.41566804793369916, "grad_norm": 1.1191027164459229, "learning_rate": 3.71e-05, "loss": 0.3922, "step": 7423 }, { "epoch": 0.4157240452458282, "grad_norm": 1.2718764543533325, "learning_rate": 3.7105e-05, "loss": 0.4628, "step": 7424 }, { "epoch": 0.4157800425579572, "grad_norm": 1.2488576173782349, "learning_rate": 3.711e-05, "loss": 0.3959, "step": 7425 }, { "epoch": 0.4158360398700862, "grad_norm": 1.1072102785110474, "learning_rate": 3.7115e-05, "loss": 0.4925, "step": 7426 }, { "epoch": 0.41589203718221524, "grad_norm": 1.272878885269165, "learning_rate": 3.712e-05, "loss": 0.3726, "step": 7427 }, { "epoch": 0.41594803449434425, "grad_norm": 1.1713122129440308, "learning_rate": 3.7125e-05, "loss": 0.3643, "step": 7428 }, { "epoch": 0.4160040318064733, "grad_norm": 1.1248369216918945, "learning_rate": 3.7130000000000005e-05, "loss": 0.2857, "step": 7429 }, { "epoch": 0.4160600291186023, "grad_norm": 1.3147884607315063, "learning_rate": 3.7135e-05, "loss": 0.4309, "step": 7430 }, { "epoch": 0.4161160264307313, "grad_norm": 1.1655421257019043, "learning_rate": 3.714e-05, "loss": 0.454, "step": 7431 }, { "epoch": 0.41617202374286033, "grad_norm": 1.1496081352233887, "learning_rate": 3.7145000000000004e-05, "loss": 0.3775, "step": 7432 }, { "epoch": 0.41622802105498935, "grad_norm": 1.1635730266571045, "learning_rate": 3.715e-05, "loss": 0.3961, "step": 7433 }, { "epoch": 0.41628401836711837, "grad_norm": 1.2568323612213135, "learning_rate": 3.7155e-05, "loss": 0.5334, "step": 7434 }, { "epoch": 0.4163400156792474, "grad_norm": 1.2780178785324097, "learning_rate": 3.716e-05, "loss": 0.4151, "step": 7435 }, { "epoch": 0.4163960129913764, "grad_norm": 1.2436951398849487, "learning_rate": 3.7165e-05, "loss": 0.3052, "step": 7436 }, { "epoch": 0.4164520103035054, "grad_norm": 1.131870150566101, "learning_rate": 3.717e-05, "loss": 0.3319, "step": 7437 }, { "epoch": 0.41650800761563445, "grad_norm": 1.9683313369750977, "learning_rate": 3.7175e-05, "loss": 0.4952, "step": 7438 }, { "epoch": 0.41656400492776346, "grad_norm": 1.0045701265335083, "learning_rate": 3.7180000000000007e-05, "loss": 0.3867, "step": 7439 }, { "epoch": 0.4166200022398925, "grad_norm": 1.1301097869873047, "learning_rate": 3.7185000000000004e-05, "loss": 0.4603, "step": 7440 }, { "epoch": 0.4166759995520215, "grad_norm": 1.4950412511825562, "learning_rate": 3.719e-05, "loss": 0.7327, "step": 7441 }, { "epoch": 0.4167319968641505, "grad_norm": 1.2363497018814087, "learning_rate": 3.7195e-05, "loss": 0.4815, "step": 7442 }, { "epoch": 0.41678799417627954, "grad_norm": 1.0679094791412354, "learning_rate": 3.72e-05, "loss": 0.3766, "step": 7443 }, { "epoch": 0.41684399148840856, "grad_norm": 1.2219816446304321, "learning_rate": 3.7205e-05, "loss": 0.4915, "step": 7444 }, { "epoch": 0.4168999888005376, "grad_norm": 1.3547922372817993, "learning_rate": 3.721e-05, "loss": 0.4618, "step": 7445 }, { "epoch": 0.4169559861126666, "grad_norm": 1.3519762754440308, "learning_rate": 3.7215e-05, "loss": 0.5428, "step": 7446 }, { "epoch": 0.4170119834247956, "grad_norm": 1.3374686241149902, "learning_rate": 3.722e-05, "loss": 0.4427, "step": 7447 }, { "epoch": 0.41706798073692464, "grad_norm": 1.1669195890426636, "learning_rate": 3.7225000000000004e-05, "loss": 0.4255, "step": 7448 }, { "epoch": 0.41712397804905366, "grad_norm": 1.2421183586120605, "learning_rate": 3.723e-05, "loss": 0.3645, "step": 7449 }, { "epoch": 0.4171799753611827, "grad_norm": 1.3754910230636597, "learning_rate": 3.7235000000000005e-05, "loss": 0.5434, "step": 7450 }, { "epoch": 0.4172359726733117, "grad_norm": 1.1193515062332153, "learning_rate": 3.724e-05, "loss": 0.4259, "step": 7451 }, { "epoch": 0.4172919699854407, "grad_norm": 1.3255189657211304, "learning_rate": 3.7245e-05, "loss": 0.4716, "step": 7452 }, { "epoch": 0.41734796729756973, "grad_norm": 1.2888439893722534, "learning_rate": 3.7250000000000004e-05, "loss": 0.4525, "step": 7453 }, { "epoch": 0.41740396460969875, "grad_norm": 1.7321168184280396, "learning_rate": 3.7255e-05, "loss": 0.4392, "step": 7454 }, { "epoch": 0.41745996192182777, "grad_norm": 1.167161226272583, "learning_rate": 3.726e-05, "loss": 0.3948, "step": 7455 }, { "epoch": 0.4175159592339568, "grad_norm": 1.6288481950759888, "learning_rate": 3.7265e-05, "loss": 0.4082, "step": 7456 }, { "epoch": 0.4175719565460858, "grad_norm": 1.244638442993164, "learning_rate": 3.727e-05, "loss": 0.4842, "step": 7457 }, { "epoch": 0.41762795385821483, "grad_norm": 1.3217008113861084, "learning_rate": 3.7275000000000005e-05, "loss": 0.4173, "step": 7458 }, { "epoch": 0.41768395117034385, "grad_norm": 1.2548723220825195, "learning_rate": 3.728e-05, "loss": 0.4547, "step": 7459 }, { "epoch": 0.41773994848247287, "grad_norm": 1.2116191387176514, "learning_rate": 3.7285000000000006e-05, "loss": 0.337, "step": 7460 }, { "epoch": 0.4177959457946019, "grad_norm": 1.359483242034912, "learning_rate": 3.7290000000000004e-05, "loss": 0.4193, "step": 7461 }, { "epoch": 0.41785194310673085, "grad_norm": 1.196447491645813, "learning_rate": 3.7295e-05, "loss": 0.48, "step": 7462 }, { "epoch": 0.41790794041885987, "grad_norm": 1.2658195495605469, "learning_rate": 3.73e-05, "loss": 0.5135, "step": 7463 }, { "epoch": 0.4179639377309889, "grad_norm": 1.276366949081421, "learning_rate": 3.7305e-05, "loss": 0.4656, "step": 7464 }, { "epoch": 0.4180199350431179, "grad_norm": 1.5904545783996582, "learning_rate": 3.731e-05, "loss": 0.4994, "step": 7465 }, { "epoch": 0.4180759323552469, "grad_norm": 1.1834349632263184, "learning_rate": 3.7315e-05, "loss": 0.3434, "step": 7466 }, { "epoch": 0.41813192966737595, "grad_norm": 1.1932505369186401, "learning_rate": 3.732e-05, "loss": 0.4881, "step": 7467 }, { "epoch": 0.41818792697950496, "grad_norm": 1.2890348434448242, "learning_rate": 3.7325000000000006e-05, "loss": 0.4165, "step": 7468 }, { "epoch": 0.418243924291634, "grad_norm": 1.2739930152893066, "learning_rate": 3.7330000000000003e-05, "loss": 0.4243, "step": 7469 }, { "epoch": 0.418299921603763, "grad_norm": 1.0256266593933105, "learning_rate": 3.7335e-05, "loss": 0.3585, "step": 7470 }, { "epoch": 0.418355918915892, "grad_norm": 1.309674859046936, "learning_rate": 3.7340000000000005e-05, "loss": 0.4201, "step": 7471 }, { "epoch": 0.41841191622802104, "grad_norm": 1.2147879600524902, "learning_rate": 3.7345e-05, "loss": 0.3943, "step": 7472 }, { "epoch": 0.41846791354015006, "grad_norm": 1.3038194179534912, "learning_rate": 3.735e-05, "loss": 0.3817, "step": 7473 }, { "epoch": 0.4185239108522791, "grad_norm": 1.4162400960922241, "learning_rate": 3.7355000000000004e-05, "loss": 0.462, "step": 7474 }, { "epoch": 0.4185799081644081, "grad_norm": 1.294368028640747, "learning_rate": 3.736e-05, "loss": 0.4256, "step": 7475 }, { "epoch": 0.4186359054765371, "grad_norm": 1.3236455917358398, "learning_rate": 3.7365e-05, "loss": 0.3296, "step": 7476 }, { "epoch": 0.41869190278866614, "grad_norm": 1.3321163654327393, "learning_rate": 3.7369999999999996e-05, "loss": 0.4964, "step": 7477 }, { "epoch": 0.41874790010079516, "grad_norm": 1.361327886581421, "learning_rate": 3.737500000000001e-05, "loss": 0.4242, "step": 7478 }, { "epoch": 0.4188038974129242, "grad_norm": 1.1381531953811646, "learning_rate": 3.7380000000000005e-05, "loss": 0.3938, "step": 7479 }, { "epoch": 0.4188598947250532, "grad_norm": 1.175363302230835, "learning_rate": 3.7385e-05, "loss": 0.3535, "step": 7480 }, { "epoch": 0.4189158920371822, "grad_norm": 1.2960225343704224, "learning_rate": 3.739e-05, "loss": 0.4894, "step": 7481 }, { "epoch": 0.41897188934931123, "grad_norm": 1.4263495206832886, "learning_rate": 3.7395000000000004e-05, "loss": 0.5933, "step": 7482 }, { "epoch": 0.41902788666144025, "grad_norm": 1.2767161130905151, "learning_rate": 3.74e-05, "loss": 0.4764, "step": 7483 }, { "epoch": 0.41908388397356927, "grad_norm": 1.178892970085144, "learning_rate": 3.7405e-05, "loss": 0.4257, "step": 7484 }, { "epoch": 0.4191398812856983, "grad_norm": 1.3218621015548706, "learning_rate": 3.741e-05, "loss": 0.4084, "step": 7485 }, { "epoch": 0.4191958785978273, "grad_norm": 1.385350227355957, "learning_rate": 3.7415e-05, "loss": 0.4606, "step": 7486 }, { "epoch": 0.41925187590995633, "grad_norm": 1.311037302017212, "learning_rate": 3.742e-05, "loss": 0.4947, "step": 7487 }, { "epoch": 0.41930787322208535, "grad_norm": 1.311902403831482, "learning_rate": 3.7425e-05, "loss": 0.4066, "step": 7488 }, { "epoch": 0.41936387053421437, "grad_norm": 1.1940802335739136, "learning_rate": 3.7430000000000006e-05, "loss": 0.3556, "step": 7489 }, { "epoch": 0.4194198678463434, "grad_norm": 1.77219557762146, "learning_rate": 3.7435e-05, "loss": 0.4119, "step": 7490 }, { "epoch": 0.4194758651584724, "grad_norm": 1.2206567525863647, "learning_rate": 3.744e-05, "loss": 0.4377, "step": 7491 }, { "epoch": 0.4195318624706014, "grad_norm": 1.3089338541030884, "learning_rate": 3.7445000000000005e-05, "loss": 0.5447, "step": 7492 }, { "epoch": 0.41958785978273044, "grad_norm": 1.2232393026351929, "learning_rate": 3.745e-05, "loss": 0.3733, "step": 7493 }, { "epoch": 0.41964385709485946, "grad_norm": 0.9914332032203674, "learning_rate": 3.7455e-05, "loss": 0.4314, "step": 7494 }, { "epoch": 0.4196998544069885, "grad_norm": 1.6882667541503906, "learning_rate": 3.7460000000000004e-05, "loss": 0.5703, "step": 7495 }, { "epoch": 0.4197558517191175, "grad_norm": 1.0649563074111938, "learning_rate": 3.7465e-05, "loss": 0.4024, "step": 7496 }, { "epoch": 0.4198118490312465, "grad_norm": 1.0853193998336792, "learning_rate": 3.747e-05, "loss": 0.3227, "step": 7497 }, { "epoch": 0.41986784634337554, "grad_norm": 1.2998113632202148, "learning_rate": 3.7475e-05, "loss": 0.4718, "step": 7498 }, { "epoch": 0.41992384365550456, "grad_norm": 1.1960525512695312, "learning_rate": 3.748000000000001e-05, "loss": 0.4201, "step": 7499 }, { "epoch": 0.4199798409676336, "grad_norm": 1.2740609645843506, "learning_rate": 3.7485000000000004e-05, "loss": 0.459, "step": 7500 }, { "epoch": 0.4200358382797626, "grad_norm": 1.428773045539856, "learning_rate": 3.749e-05, "loss": 0.5769, "step": 7501 }, { "epoch": 0.4200918355918916, "grad_norm": 1.283211350440979, "learning_rate": 3.7495e-05, "loss": 0.3808, "step": 7502 }, { "epoch": 0.4201478329040206, "grad_norm": 1.177197813987732, "learning_rate": 3.7500000000000003e-05, "loss": 0.4485, "step": 7503 }, { "epoch": 0.4202038302161496, "grad_norm": 1.2770239114761353, "learning_rate": 3.7505e-05, "loss": 0.4191, "step": 7504 }, { "epoch": 0.4202598275282786, "grad_norm": 1.404529333114624, "learning_rate": 3.751e-05, "loss": 0.4719, "step": 7505 }, { "epoch": 0.42031582484040764, "grad_norm": 1.4240243434906006, "learning_rate": 3.7515e-05, "loss": 0.7294, "step": 7506 }, { "epoch": 0.42037182215253666, "grad_norm": 1.0826585292816162, "learning_rate": 3.752e-05, "loss": 0.339, "step": 7507 }, { "epoch": 0.4204278194646657, "grad_norm": 1.2525144815444946, "learning_rate": 3.7525e-05, "loss": 0.4028, "step": 7508 }, { "epoch": 0.4204838167767947, "grad_norm": 1.1017965078353882, "learning_rate": 3.753e-05, "loss": 0.4051, "step": 7509 }, { "epoch": 0.4205398140889237, "grad_norm": 1.0927131175994873, "learning_rate": 3.7535000000000006e-05, "loss": 0.3306, "step": 7510 }, { "epoch": 0.42059581140105273, "grad_norm": 1.099252462387085, "learning_rate": 3.754e-05, "loss": 0.3469, "step": 7511 }, { "epoch": 0.42065180871318175, "grad_norm": 1.1844866275787354, "learning_rate": 3.7545e-05, "loss": 0.3762, "step": 7512 }, { "epoch": 0.42070780602531077, "grad_norm": 1.3952338695526123, "learning_rate": 3.7550000000000005e-05, "loss": 0.4501, "step": 7513 }, { "epoch": 0.4207638033374398, "grad_norm": 1.1330647468566895, "learning_rate": 3.7555e-05, "loss": 0.4405, "step": 7514 }, { "epoch": 0.4208198006495688, "grad_norm": 1.39003324508667, "learning_rate": 3.756e-05, "loss": 0.5793, "step": 7515 }, { "epoch": 0.42087579796169783, "grad_norm": 1.1815036535263062, "learning_rate": 3.7565e-05, "loss": 0.5452, "step": 7516 }, { "epoch": 0.42093179527382685, "grad_norm": 1.0811365842819214, "learning_rate": 3.757e-05, "loss": 0.396, "step": 7517 }, { "epoch": 0.42098779258595587, "grad_norm": 1.2157245874404907, "learning_rate": 3.7575e-05, "loss": 0.3333, "step": 7518 }, { "epoch": 0.4210437898980849, "grad_norm": 1.5133262872695923, "learning_rate": 3.758e-05, "loss": 0.5342, "step": 7519 }, { "epoch": 0.4210997872102139, "grad_norm": 1.161711573600769, "learning_rate": 3.758500000000001e-05, "loss": 0.373, "step": 7520 }, { "epoch": 0.4211557845223429, "grad_norm": 1.3511238098144531, "learning_rate": 3.7590000000000004e-05, "loss": 0.3456, "step": 7521 }, { "epoch": 0.42121178183447194, "grad_norm": 1.7794889211654663, "learning_rate": 3.7595e-05, "loss": 0.503, "step": 7522 }, { "epoch": 0.42126777914660096, "grad_norm": 1.5192196369171143, "learning_rate": 3.76e-05, "loss": 0.4161, "step": 7523 }, { "epoch": 0.42132377645873, "grad_norm": 1.1321442127227783, "learning_rate": 3.7605e-05, "loss": 0.3586, "step": 7524 }, { "epoch": 0.421379773770859, "grad_norm": 1.3278142213821411, "learning_rate": 3.761e-05, "loss": 0.4832, "step": 7525 }, { "epoch": 0.421435771082988, "grad_norm": 1.2671815156936646, "learning_rate": 3.7615e-05, "loss": 0.4192, "step": 7526 }, { "epoch": 0.42149176839511704, "grad_norm": 1.416520595550537, "learning_rate": 3.762e-05, "loss": 0.4862, "step": 7527 }, { "epoch": 0.42154776570724606, "grad_norm": 1.217038631439209, "learning_rate": 3.7625e-05, "loss": 0.4498, "step": 7528 }, { "epoch": 0.4216037630193751, "grad_norm": 2.6948342323303223, "learning_rate": 3.7630000000000004e-05, "loss": 0.6703, "step": 7529 }, { "epoch": 0.4216597603315041, "grad_norm": 1.1527397632598877, "learning_rate": 3.7635e-05, "loss": 0.4959, "step": 7530 }, { "epoch": 0.4217157576436331, "grad_norm": 1.5450860261917114, "learning_rate": 3.7640000000000006e-05, "loss": 0.4654, "step": 7531 }, { "epoch": 0.42177175495576213, "grad_norm": 1.394510269165039, "learning_rate": 3.7645e-05, "loss": 0.5322, "step": 7532 }, { "epoch": 0.42182775226789115, "grad_norm": 1.3331291675567627, "learning_rate": 3.765e-05, "loss": 0.4591, "step": 7533 }, { "epoch": 0.4218837495800202, "grad_norm": 1.3717551231384277, "learning_rate": 3.7655000000000005e-05, "loss": 0.5099, "step": 7534 }, { "epoch": 0.4219397468921492, "grad_norm": 1.1739580631256104, "learning_rate": 3.766e-05, "loss": 0.3854, "step": 7535 }, { "epoch": 0.4219957442042782, "grad_norm": 1.1824437379837036, "learning_rate": 3.7665e-05, "loss": 0.4561, "step": 7536 }, { "epoch": 0.42205174151640723, "grad_norm": 1.332931637763977, "learning_rate": 3.767e-05, "loss": 0.3535, "step": 7537 }, { "epoch": 0.42210773882853625, "grad_norm": 1.2487376928329468, "learning_rate": 3.7675e-05, "loss": 0.4024, "step": 7538 }, { "epoch": 0.42216373614066527, "grad_norm": 1.5085293054580688, "learning_rate": 3.7680000000000005e-05, "loss": 0.3954, "step": 7539 }, { "epoch": 0.4222197334527943, "grad_norm": 1.2047690153121948, "learning_rate": 3.7685e-05, "loss": 0.517, "step": 7540 }, { "epoch": 0.4222757307649233, "grad_norm": 1.1977018117904663, "learning_rate": 3.769e-05, "loss": 0.3594, "step": 7541 }, { "epoch": 0.4223317280770523, "grad_norm": 1.2525365352630615, "learning_rate": 3.7695000000000004e-05, "loss": 0.5402, "step": 7542 }, { "epoch": 0.42238772538918135, "grad_norm": 4.599066734313965, "learning_rate": 3.77e-05, "loss": 0.3296, "step": 7543 }, { "epoch": 0.42244372270131036, "grad_norm": 1.1863445043563843, "learning_rate": 3.7705e-05, "loss": 0.3454, "step": 7544 }, { "epoch": 0.42249972001343933, "grad_norm": 1.478772521018982, "learning_rate": 3.771e-05, "loss": 0.5987, "step": 7545 }, { "epoch": 0.42255571732556835, "grad_norm": 1.3632503747940063, "learning_rate": 3.7715e-05, "loss": 0.4575, "step": 7546 }, { "epoch": 0.42261171463769737, "grad_norm": 1.4847928285598755, "learning_rate": 3.772e-05, "loss": 0.3567, "step": 7547 }, { "epoch": 0.4226677119498264, "grad_norm": 1.3456907272338867, "learning_rate": 3.7725e-05, "loss": 0.542, "step": 7548 }, { "epoch": 0.4227237092619554, "grad_norm": 1.2731678485870361, "learning_rate": 3.7730000000000006e-05, "loss": 0.4282, "step": 7549 }, { "epoch": 0.4227797065740844, "grad_norm": 4.894685745239258, "learning_rate": 3.7735000000000004e-05, "loss": 0.6533, "step": 7550 }, { "epoch": 0.42283570388621344, "grad_norm": 1.2923585176467896, "learning_rate": 3.774e-05, "loss": 0.6542, "step": 7551 }, { "epoch": 0.42289170119834246, "grad_norm": 1.7255858182907104, "learning_rate": 3.7745000000000005e-05, "loss": 0.5819, "step": 7552 }, { "epoch": 0.4229476985104715, "grad_norm": 1.4177074432373047, "learning_rate": 3.775e-05, "loss": 0.5332, "step": 7553 }, { "epoch": 0.4230036958226005, "grad_norm": 1.2697304487228394, "learning_rate": 3.7755e-05, "loss": 0.48, "step": 7554 }, { "epoch": 0.4230596931347295, "grad_norm": 1.4944721460342407, "learning_rate": 3.776e-05, "loss": 0.6667, "step": 7555 }, { "epoch": 0.42311569044685854, "grad_norm": 1.0259102582931519, "learning_rate": 3.7765e-05, "loss": 0.3976, "step": 7556 }, { "epoch": 0.42317168775898756, "grad_norm": 1.1391164064407349, "learning_rate": 3.777e-05, "loss": 0.526, "step": 7557 }, { "epoch": 0.4232276850711166, "grad_norm": 1.4234325885772705, "learning_rate": 3.7775e-05, "loss": 0.5159, "step": 7558 }, { "epoch": 0.4232836823832456, "grad_norm": 1.290900468826294, "learning_rate": 3.778000000000001e-05, "loss": 0.4166, "step": 7559 }, { "epoch": 0.4233396796953746, "grad_norm": 1.2310255765914917, "learning_rate": 3.7785000000000005e-05, "loss": 0.4356, "step": 7560 }, { "epoch": 0.42339567700750363, "grad_norm": 1.2476540803909302, "learning_rate": 3.779e-05, "loss": 0.3219, "step": 7561 }, { "epoch": 0.42345167431963265, "grad_norm": 1.3199870586395264, "learning_rate": 3.7795e-05, "loss": 0.487, "step": 7562 }, { "epoch": 0.4235076716317617, "grad_norm": 1.2570263147354126, "learning_rate": 3.7800000000000004e-05, "loss": 0.4006, "step": 7563 }, { "epoch": 0.4235636689438907, "grad_norm": 1.365960955619812, "learning_rate": 3.7805e-05, "loss": 0.4384, "step": 7564 }, { "epoch": 0.4236196662560197, "grad_norm": 1.5694210529327393, "learning_rate": 3.781e-05, "loss": 0.528, "step": 7565 }, { "epoch": 0.42367566356814873, "grad_norm": 1.0389361381530762, "learning_rate": 3.7815e-05, "loss": 0.3886, "step": 7566 }, { "epoch": 0.42373166088027775, "grad_norm": Infinity, "learning_rate": 3.7815e-05, "loss": 0.4799, "step": 7567 }, { "epoch": 0.42378765819240677, "grad_norm": 1.392661690711975, "learning_rate": 3.782e-05, "loss": 0.4247, "step": 7568 }, { "epoch": 0.4238436555045358, "grad_norm": 1.1096845865249634, "learning_rate": 3.7825e-05, "loss": 0.354, "step": 7569 }, { "epoch": 0.4238996528166648, "grad_norm": 1.1711372137069702, "learning_rate": 3.783e-05, "loss": 0.4522, "step": 7570 }, { "epoch": 0.4239556501287938, "grad_norm": 1.2791259288787842, "learning_rate": 3.7835000000000006e-05, "loss": 0.3417, "step": 7571 }, { "epoch": 0.42401164744092285, "grad_norm": 1.1541465520858765, "learning_rate": 3.7840000000000004e-05, "loss": 0.4978, "step": 7572 }, { "epoch": 0.42406764475305186, "grad_norm": 1.3474169969558716, "learning_rate": 3.7845e-05, "loss": 0.4274, "step": 7573 }, { "epoch": 0.4241236420651809, "grad_norm": 1.2171330451965332, "learning_rate": 3.7850000000000005e-05, "loss": 0.4049, "step": 7574 }, { "epoch": 0.4241796393773099, "grad_norm": 1.2262377738952637, "learning_rate": 3.7855e-05, "loss": 0.3458, "step": 7575 }, { "epoch": 0.4242356366894389, "grad_norm": 2.331810474395752, "learning_rate": 3.786e-05, "loss": 0.4114, "step": 7576 }, { "epoch": 0.42429163400156794, "grad_norm": 1.209799885749817, "learning_rate": 3.7865e-05, "loss": 0.4726, "step": 7577 }, { "epoch": 0.42434763131369696, "grad_norm": 1.1870396137237549, "learning_rate": 3.787e-05, "loss": 0.4475, "step": 7578 }, { "epoch": 0.424403628625826, "grad_norm": 1.3213865756988525, "learning_rate": 3.7875e-05, "loss": 0.4684, "step": 7579 }, { "epoch": 0.424459625937955, "grad_norm": 1.2779390811920166, "learning_rate": 3.788e-05, "loss": 0.4014, "step": 7580 }, { "epoch": 0.424515623250084, "grad_norm": 1.1260337829589844, "learning_rate": 3.7885e-05, "loss": 0.4165, "step": 7581 }, { "epoch": 0.42457162056221304, "grad_norm": 1.3278111219406128, "learning_rate": 3.7890000000000005e-05, "loss": 0.4884, "step": 7582 }, { "epoch": 0.42462761787434206, "grad_norm": 1.2361208200454712, "learning_rate": 3.7895e-05, "loss": 0.4114, "step": 7583 }, { "epoch": 0.4246836151864711, "grad_norm": 1.1926347017288208, "learning_rate": 3.79e-05, "loss": 0.4686, "step": 7584 }, { "epoch": 0.4247396124986001, "grad_norm": 1.308377981185913, "learning_rate": 3.7905000000000004e-05, "loss": 0.5798, "step": 7585 }, { "epoch": 0.42479560981072906, "grad_norm": 1.328100562095642, "learning_rate": 3.791e-05, "loss": 0.4104, "step": 7586 }, { "epoch": 0.4248516071228581, "grad_norm": 1.2501837015151978, "learning_rate": 3.7915e-05, "loss": 0.3796, "step": 7587 }, { "epoch": 0.4249076044349871, "grad_norm": 1.293765664100647, "learning_rate": 3.792e-05, "loss": 0.3548, "step": 7588 }, { "epoch": 0.4249636017471161, "grad_norm": 1.2599366903305054, "learning_rate": 3.7925e-05, "loss": 0.3834, "step": 7589 }, { "epoch": 0.42501959905924513, "grad_norm": 1.2977622747421265, "learning_rate": 3.7930000000000004e-05, "loss": 0.5509, "step": 7590 }, { "epoch": 0.42507559637137415, "grad_norm": 1.1092506647109985, "learning_rate": 3.7935e-05, "loss": 0.4144, "step": 7591 }, { "epoch": 0.4251315936835032, "grad_norm": 1.2002649307250977, "learning_rate": 3.7940000000000006e-05, "loss": 0.3521, "step": 7592 }, { "epoch": 0.4251875909956322, "grad_norm": 1.210794448852539, "learning_rate": 3.7945000000000003e-05, "loss": 0.3235, "step": 7593 }, { "epoch": 0.4252435883077612, "grad_norm": 1.4287084341049194, "learning_rate": 3.795e-05, "loss": 0.4229, "step": 7594 }, { "epoch": 0.42529958561989023, "grad_norm": 1.0724073648452759, "learning_rate": 3.7955e-05, "loss": 0.3281, "step": 7595 }, { "epoch": 0.42535558293201925, "grad_norm": 1.2447272539138794, "learning_rate": 3.796e-05, "loss": 0.4698, "step": 7596 }, { "epoch": 0.42541158024414827, "grad_norm": 1.1794023513793945, "learning_rate": 3.7965e-05, "loss": 0.4844, "step": 7597 }, { "epoch": 0.4254675775562773, "grad_norm": 1.178626298904419, "learning_rate": 3.797e-05, "loss": 0.4751, "step": 7598 }, { "epoch": 0.4255235748684063, "grad_norm": 1.1559756994247437, "learning_rate": 3.7975e-05, "loss": 0.3661, "step": 7599 }, { "epoch": 0.4255795721805353, "grad_norm": 1.593385934829712, "learning_rate": 3.7980000000000006e-05, "loss": 0.6293, "step": 7600 }, { "epoch": 0.42563556949266435, "grad_norm": 1.361419439315796, "learning_rate": 3.7985e-05, "loss": 0.5309, "step": 7601 }, { "epoch": 0.42569156680479336, "grad_norm": 1.2820345163345337, "learning_rate": 3.799e-05, "loss": 0.5431, "step": 7602 }, { "epoch": 0.4257475641169224, "grad_norm": 1.487322449684143, "learning_rate": 3.7995000000000005e-05, "loss": 0.548, "step": 7603 }, { "epoch": 0.4258035614290514, "grad_norm": 1.1518892049789429, "learning_rate": 3.8e-05, "loss": 0.5031, "step": 7604 }, { "epoch": 0.4258595587411804, "grad_norm": 1.4109389781951904, "learning_rate": 3.8005e-05, "loss": 0.5499, "step": 7605 }, { "epoch": 0.42591555605330944, "grad_norm": 1.2256593704223633, "learning_rate": 3.8010000000000004e-05, "loss": 0.4154, "step": 7606 }, { "epoch": 0.42597155336543846, "grad_norm": 1.0937228202819824, "learning_rate": 3.8015e-05, "loss": 0.3778, "step": 7607 }, { "epoch": 0.4260275506775675, "grad_norm": 1.2796554565429688, "learning_rate": 3.802e-05, "loss": 0.4817, "step": 7608 }, { "epoch": 0.4260835479896965, "grad_norm": 1.2420028448104858, "learning_rate": 3.8025e-05, "loss": 0.4238, "step": 7609 }, { "epoch": 0.4261395453018255, "grad_norm": 1.066815733909607, "learning_rate": 3.803000000000001e-05, "loss": 0.4001, "step": 7610 }, { "epoch": 0.42619554261395454, "grad_norm": 1.2149802446365356, "learning_rate": 3.8035000000000004e-05, "loss": 0.3767, "step": 7611 }, { "epoch": 0.42625153992608356, "grad_norm": 1.2878392934799194, "learning_rate": 3.804e-05, "loss": 0.419, "step": 7612 }, { "epoch": 0.4263075372382126, "grad_norm": 1.2985421419143677, "learning_rate": 3.8045000000000006e-05, "loss": 0.447, "step": 7613 }, { "epoch": 0.4263635345503416, "grad_norm": 1.4540281295776367, "learning_rate": 3.805e-05, "loss": 0.3851, "step": 7614 }, { "epoch": 0.4264195318624706, "grad_norm": 1.2379271984100342, "learning_rate": 3.8055e-05, "loss": 0.4092, "step": 7615 }, { "epoch": 0.42647552917459963, "grad_norm": 1.2478660345077515, "learning_rate": 3.806e-05, "loss": 0.3936, "step": 7616 }, { "epoch": 0.42653152648672865, "grad_norm": 1.6623990535736084, "learning_rate": 3.8065e-05, "loss": 0.5192, "step": 7617 }, { "epoch": 0.42658752379885767, "grad_norm": 1.3103492259979248, "learning_rate": 3.807e-05, "loss": 0.4512, "step": 7618 }, { "epoch": 0.4266435211109867, "grad_norm": 1.294898509979248, "learning_rate": 3.8075e-05, "loss": 0.4323, "step": 7619 }, { "epoch": 0.4266995184231157, "grad_norm": 1.214297890663147, "learning_rate": 3.808e-05, "loss": 0.3929, "step": 7620 }, { "epoch": 0.42675551573524473, "grad_norm": 1.2719229459762573, "learning_rate": 3.8085000000000006e-05, "loss": 0.4642, "step": 7621 }, { "epoch": 0.42681151304737375, "grad_norm": 1.2533601522445679, "learning_rate": 3.809e-05, "loss": 0.3769, "step": 7622 }, { "epoch": 0.42686751035950277, "grad_norm": 1.5504531860351562, "learning_rate": 3.8095e-05, "loss": 0.4822, "step": 7623 }, { "epoch": 0.4269235076716318, "grad_norm": 1.2235283851623535, "learning_rate": 3.8100000000000005e-05, "loss": 0.3915, "step": 7624 }, { "epoch": 0.4269795049837608, "grad_norm": 1.3170377016067505, "learning_rate": 3.8105e-05, "loss": 0.4689, "step": 7625 }, { "epoch": 0.4270355022958898, "grad_norm": 1.1815876960754395, "learning_rate": 3.811e-05, "loss": 0.4741, "step": 7626 }, { "epoch": 0.4270914996080188, "grad_norm": 1.4471092224121094, "learning_rate": 3.8115000000000004e-05, "loss": 0.5063, "step": 7627 }, { "epoch": 0.4271474969201478, "grad_norm": 1.2353376150131226, "learning_rate": 3.812e-05, "loss": 0.4274, "step": 7628 }, { "epoch": 0.4272034942322768, "grad_norm": 1.41990065574646, "learning_rate": 3.8125e-05, "loss": 0.4, "step": 7629 }, { "epoch": 0.42725949154440584, "grad_norm": 1.7524484395980835, "learning_rate": 3.8129999999999996e-05, "loss": 0.4488, "step": 7630 }, { "epoch": 0.42731548885653486, "grad_norm": 1.1383867263793945, "learning_rate": 3.813500000000001e-05, "loss": 0.5019, "step": 7631 }, { "epoch": 0.4273714861686639, "grad_norm": 1.3128741979599, "learning_rate": 3.8140000000000004e-05, "loss": 0.3711, "step": 7632 }, { "epoch": 0.4274274834807929, "grad_norm": 1.2062790393829346, "learning_rate": 3.8145e-05, "loss": 0.4014, "step": 7633 }, { "epoch": 0.4274834807929219, "grad_norm": 1.2138209342956543, "learning_rate": 3.8150000000000006e-05, "loss": 0.4114, "step": 7634 }, { "epoch": 0.42753947810505094, "grad_norm": 1.4575783014297485, "learning_rate": 3.8155e-05, "loss": 0.6132, "step": 7635 }, { "epoch": 0.42759547541717996, "grad_norm": 1.1320710182189941, "learning_rate": 3.816e-05, "loss": 0.3843, "step": 7636 }, { "epoch": 0.427651472729309, "grad_norm": 1.2448700666427612, "learning_rate": 3.8165e-05, "loss": 0.5245, "step": 7637 }, { "epoch": 0.427707470041438, "grad_norm": 0.9886170029640198, "learning_rate": 3.817e-05, "loss": 0.3921, "step": 7638 }, { "epoch": 0.427763467353567, "grad_norm": 1.1423417329788208, "learning_rate": 3.8175e-05, "loss": 0.3704, "step": 7639 }, { "epoch": 0.42781946466569604, "grad_norm": 1.307464838027954, "learning_rate": 3.818e-05, "loss": 0.4182, "step": 7640 }, { "epoch": 0.42787546197782506, "grad_norm": 1.1473870277404785, "learning_rate": 3.8185e-05, "loss": 0.4207, "step": 7641 }, { "epoch": 0.4279314592899541, "grad_norm": 1.1849149465560913, "learning_rate": 3.8190000000000005e-05, "loss": 0.526, "step": 7642 }, { "epoch": 0.4279874566020831, "grad_norm": 1.3337353467941284, "learning_rate": 3.8195e-05, "loss": 0.3972, "step": 7643 }, { "epoch": 0.4280434539142121, "grad_norm": 1.0717594623565674, "learning_rate": 3.82e-05, "loss": 0.3909, "step": 7644 }, { "epoch": 0.42809945122634113, "grad_norm": 1.5796343088150024, "learning_rate": 3.8205000000000004e-05, "loss": 0.6941, "step": 7645 }, { "epoch": 0.42815544853847015, "grad_norm": 1.196513295173645, "learning_rate": 3.821e-05, "loss": 0.4088, "step": 7646 }, { "epoch": 0.42821144585059917, "grad_norm": 1.0890085697174072, "learning_rate": 3.8215e-05, "loss": 0.4185, "step": 7647 }, { "epoch": 0.4282674431627282, "grad_norm": 1.166717767715454, "learning_rate": 3.822e-05, "loss": 0.4052, "step": 7648 }, { "epoch": 0.4283234404748572, "grad_norm": 1.1860620975494385, "learning_rate": 3.8225e-05, "loss": 0.5055, "step": 7649 }, { "epoch": 0.42837943778698623, "grad_norm": 1.2802859544754028, "learning_rate": 3.823e-05, "loss": 0.3698, "step": 7650 }, { "epoch": 0.42843543509911525, "grad_norm": 1.1276755332946777, "learning_rate": 3.8235e-05, "loss": 0.4438, "step": 7651 }, { "epoch": 0.42849143241124427, "grad_norm": 1.3856489658355713, "learning_rate": 3.8240000000000007e-05, "loss": 0.5302, "step": 7652 }, { "epoch": 0.4285474297233733, "grad_norm": 1.2472093105316162, "learning_rate": 3.8245000000000004e-05, "loss": 0.3277, "step": 7653 }, { "epoch": 0.4286034270355023, "grad_norm": 1.3688454627990723, "learning_rate": 3.825e-05, "loss": 0.3755, "step": 7654 }, { "epoch": 0.4286594243476313, "grad_norm": 1.4699273109436035, "learning_rate": 3.8255e-05, "loss": 0.4998, "step": 7655 }, { "epoch": 0.42871542165976034, "grad_norm": 1.7625372409820557, "learning_rate": 3.826e-05, "loss": 0.6153, "step": 7656 }, { "epoch": 0.42877141897188936, "grad_norm": 1.4056943655014038, "learning_rate": 3.8265e-05, "loss": 0.4559, "step": 7657 }, { "epoch": 0.4288274162840184, "grad_norm": 1.6552726030349731, "learning_rate": 3.827e-05, "loss": 0.4928, "step": 7658 }, { "epoch": 0.4288834135961474, "grad_norm": 1.1531238555908203, "learning_rate": 3.8275e-05, "loss": 0.3823, "step": 7659 }, { "epoch": 0.4289394109082764, "grad_norm": 1.2474634647369385, "learning_rate": 3.828e-05, "loss": 0.4445, "step": 7660 }, { "epoch": 0.42899540822040544, "grad_norm": 1.1210706233978271, "learning_rate": 3.8285000000000004e-05, "loss": 0.3965, "step": 7661 }, { "epoch": 0.42905140553253446, "grad_norm": 1.180922508239746, "learning_rate": 3.829e-05, "loss": 0.3521, "step": 7662 }, { "epoch": 0.4291074028446635, "grad_norm": 0.9694157838821411, "learning_rate": 3.8295000000000005e-05, "loss": 0.4278, "step": 7663 }, { "epoch": 0.4291634001567925, "grad_norm": 1.468010663986206, "learning_rate": 3.83e-05, "loss": 0.5533, "step": 7664 }, { "epoch": 0.4292193974689215, "grad_norm": 1.3029483556747437, "learning_rate": 3.8305e-05, "loss": 0.3681, "step": 7665 }, { "epoch": 0.42927539478105053, "grad_norm": 1.5082733631134033, "learning_rate": 3.8310000000000004e-05, "loss": 0.4196, "step": 7666 }, { "epoch": 0.42933139209317955, "grad_norm": 1.4620344638824463, "learning_rate": 3.8315e-05, "loss": 0.4516, "step": 7667 }, { "epoch": 0.4293873894053086, "grad_norm": 1.3122128248214722, "learning_rate": 3.832e-05, "loss": 0.5609, "step": 7668 }, { "epoch": 0.42944338671743754, "grad_norm": 1.352057695388794, "learning_rate": 3.8324999999999996e-05, "loss": 0.7109, "step": 7669 }, { "epoch": 0.42949938402956656, "grad_norm": 1.085706353187561, "learning_rate": 3.833e-05, "loss": 0.3828, "step": 7670 }, { "epoch": 0.4295553813416956, "grad_norm": 1.1122759580612183, "learning_rate": 3.8335000000000005e-05, "loss": 0.3223, "step": 7671 }, { "epoch": 0.4296113786538246, "grad_norm": 1.310568928718567, "learning_rate": 3.834e-05, "loss": 0.3985, "step": 7672 }, { "epoch": 0.4296673759659536, "grad_norm": 1.6231038570404053, "learning_rate": 3.8345000000000006e-05, "loss": 0.531, "step": 7673 }, { "epoch": 0.42972337327808263, "grad_norm": 1.6763800382614136, "learning_rate": 3.8350000000000004e-05, "loss": 0.4758, "step": 7674 }, { "epoch": 0.42977937059021165, "grad_norm": 1.440268874168396, "learning_rate": 3.8355e-05, "loss": 0.5195, "step": 7675 }, { "epoch": 0.42983536790234067, "grad_norm": 1.206406593322754, "learning_rate": 3.836e-05, "loss": 0.3597, "step": 7676 }, { "epoch": 0.4298913652144697, "grad_norm": 1.423871636390686, "learning_rate": 3.8365e-05, "loss": 0.3996, "step": 7677 }, { "epoch": 0.4299473625265987, "grad_norm": 1.1362015008926392, "learning_rate": 3.837e-05, "loss": 0.3242, "step": 7678 }, { "epoch": 0.43000335983872773, "grad_norm": 1.2650737762451172, "learning_rate": 3.8375e-05, "loss": 0.3938, "step": 7679 }, { "epoch": 0.43005935715085675, "grad_norm": 1.2767328023910522, "learning_rate": 3.838e-05, "loss": 0.3629, "step": 7680 }, { "epoch": 0.43011535446298577, "grad_norm": 1.2091460227966309, "learning_rate": 3.8385000000000006e-05, "loss": 0.3916, "step": 7681 }, { "epoch": 0.4301713517751148, "grad_norm": 1.4345455169677734, "learning_rate": 3.8390000000000003e-05, "loss": 0.4058, "step": 7682 }, { "epoch": 0.4302273490872438, "grad_norm": 1.095362663269043, "learning_rate": 3.8395e-05, "loss": 0.4278, "step": 7683 }, { "epoch": 0.4302833463993728, "grad_norm": 1.3193087577819824, "learning_rate": 3.8400000000000005e-05, "loss": 0.3639, "step": 7684 }, { "epoch": 0.43033934371150184, "grad_norm": 1.5311903953552246, "learning_rate": 3.8405e-05, "loss": 0.4734, "step": 7685 }, { "epoch": 0.43039534102363086, "grad_norm": 1.3788670301437378, "learning_rate": 3.841e-05, "loss": 0.4635, "step": 7686 }, { "epoch": 0.4304513383357599, "grad_norm": 1.1755338907241821, "learning_rate": 3.8415000000000004e-05, "loss": 0.3556, "step": 7687 }, { "epoch": 0.4305073356478889, "grad_norm": 1.3992962837219238, "learning_rate": 3.842e-05, "loss": 0.4594, "step": 7688 }, { "epoch": 0.4305633329600179, "grad_norm": 1.4017950296401978, "learning_rate": 3.8425e-05, "loss": 0.4544, "step": 7689 }, { "epoch": 0.43061933027214694, "grad_norm": 1.3413540124893188, "learning_rate": 3.8429999999999996e-05, "loss": 0.4211, "step": 7690 }, { "epoch": 0.43067532758427596, "grad_norm": 1.6711502075195312, "learning_rate": 3.843500000000001e-05, "loss": 0.4686, "step": 7691 }, { "epoch": 0.430731324896405, "grad_norm": 1.2665237188339233, "learning_rate": 3.8440000000000005e-05, "loss": 0.3576, "step": 7692 }, { "epoch": 0.430787322208534, "grad_norm": 1.1450090408325195, "learning_rate": 3.8445e-05, "loss": 0.304, "step": 7693 }, { "epoch": 0.430843319520663, "grad_norm": 1.2788779735565186, "learning_rate": 3.845e-05, "loss": 0.5303, "step": 7694 }, { "epoch": 0.43089931683279203, "grad_norm": 1.3919241428375244, "learning_rate": 3.8455000000000004e-05, "loss": 0.5688, "step": 7695 }, { "epoch": 0.43095531414492105, "grad_norm": 1.1815086603164673, "learning_rate": 3.846e-05, "loss": 0.4524, "step": 7696 }, { "epoch": 0.4310113114570501, "grad_norm": 1.1565638780593872, "learning_rate": 3.8465e-05, "loss": 0.4198, "step": 7697 }, { "epoch": 0.4310673087691791, "grad_norm": 1.2445493936538696, "learning_rate": 3.847e-05, "loss": 0.6976, "step": 7698 }, { "epoch": 0.4311233060813081, "grad_norm": 1.1842223405838013, "learning_rate": 3.8475e-05, "loss": 0.4367, "step": 7699 }, { "epoch": 0.43117930339343713, "grad_norm": 1.1668330430984497, "learning_rate": 3.848e-05, "loss": 0.386, "step": 7700 }, { "epoch": 0.43123530070556615, "grad_norm": 1.2867923974990845, "learning_rate": 3.8485e-05, "loss": 0.4306, "step": 7701 }, { "epoch": 0.43129129801769517, "grad_norm": 1.5501301288604736, "learning_rate": 3.8490000000000006e-05, "loss": 0.4631, "step": 7702 }, { "epoch": 0.4313472953298242, "grad_norm": 1.4100300073623657, "learning_rate": 3.8495e-05, "loss": 0.5668, "step": 7703 }, { "epoch": 0.4314032926419532, "grad_norm": 1.12372887134552, "learning_rate": 3.85e-05, "loss": 0.3659, "step": 7704 }, { "epoch": 0.4314592899540822, "grad_norm": 1.3030691146850586, "learning_rate": 3.8505000000000005e-05, "loss": 0.353, "step": 7705 }, { "epoch": 0.43151528726621124, "grad_norm": 1.2100201845169067, "learning_rate": 3.851e-05, "loss": 0.3685, "step": 7706 }, { "epoch": 0.43157128457834026, "grad_norm": 1.2769105434417725, "learning_rate": 3.8515e-05, "loss": 0.4037, "step": 7707 }, { "epoch": 0.4316272818904693, "grad_norm": 1.2264678478240967, "learning_rate": 3.8520000000000004e-05, "loss": 0.4961, "step": 7708 }, { "epoch": 0.4316832792025983, "grad_norm": 1.1368542909622192, "learning_rate": 3.8525e-05, "loss": 0.5397, "step": 7709 }, { "epoch": 0.43173927651472727, "grad_norm": 1.2379624843597412, "learning_rate": 3.853e-05, "loss": 0.4158, "step": 7710 }, { "epoch": 0.4317952738268563, "grad_norm": 1.4977108240127563, "learning_rate": 3.8535e-05, "loss": 0.4232, "step": 7711 }, { "epoch": 0.4318512711389853, "grad_norm": 1.2705695629119873, "learning_rate": 3.854000000000001e-05, "loss": 0.3418, "step": 7712 }, { "epoch": 0.4319072684511143, "grad_norm": 1.5933589935302734, "learning_rate": 3.8545000000000004e-05, "loss": 0.4735, "step": 7713 }, { "epoch": 0.43196326576324334, "grad_norm": 1.3652877807617188, "learning_rate": 3.855e-05, "loss": 0.463, "step": 7714 }, { "epoch": 0.43201926307537236, "grad_norm": 1.5132670402526855, "learning_rate": 3.8555e-05, "loss": 0.4214, "step": 7715 }, { "epoch": 0.4320752603875014, "grad_norm": 1.5290735960006714, "learning_rate": 3.8560000000000004e-05, "loss": 0.4887, "step": 7716 }, { "epoch": 0.4321312576996304, "grad_norm": 1.0978847742080688, "learning_rate": 3.8565e-05, "loss": 0.318, "step": 7717 }, { "epoch": 0.4321872550117594, "grad_norm": 1.5986554622650146, "learning_rate": 3.857e-05, "loss": 0.3287, "step": 7718 }, { "epoch": 0.43224325232388844, "grad_norm": 1.3401552438735962, "learning_rate": 3.8575e-05, "loss": 0.4553, "step": 7719 }, { "epoch": 0.43229924963601746, "grad_norm": 1.586480975151062, "learning_rate": 3.858e-05, "loss": 0.5544, "step": 7720 }, { "epoch": 0.4323552469481465, "grad_norm": 1.1275185346603394, "learning_rate": 3.8585000000000004e-05, "loss": 0.3601, "step": 7721 }, { "epoch": 0.4324112442602755, "grad_norm": 1.3213917016983032, "learning_rate": 3.859e-05, "loss": 0.4303, "step": 7722 }, { "epoch": 0.4324672415724045, "grad_norm": 1.2904232740402222, "learning_rate": 3.8595000000000006e-05, "loss": 0.4344, "step": 7723 }, { "epoch": 0.43252323888453353, "grad_norm": 1.3175349235534668, "learning_rate": 3.86e-05, "loss": 0.492, "step": 7724 }, { "epoch": 0.43257923619666255, "grad_norm": 1.0625821352005005, "learning_rate": 3.8605e-05, "loss": 0.3455, "step": 7725 }, { "epoch": 0.4326352335087916, "grad_norm": 1.3834384679794312, "learning_rate": 3.8610000000000005e-05, "loss": 0.395, "step": 7726 }, { "epoch": 0.4326912308209206, "grad_norm": 1.3488636016845703, "learning_rate": 3.8615e-05, "loss": 0.4571, "step": 7727 }, { "epoch": 0.4327472281330496, "grad_norm": 1.7581381797790527, "learning_rate": 3.862e-05, "loss": 0.4489, "step": 7728 }, { "epoch": 0.43280322544517863, "grad_norm": 1.182015299797058, "learning_rate": 3.8625e-05, "loss": 0.2721, "step": 7729 }, { "epoch": 0.43285922275730765, "grad_norm": 1.2194050550460815, "learning_rate": 3.863e-05, "loss": 0.4309, "step": 7730 }, { "epoch": 0.43291522006943667, "grad_norm": 1.1201328039169312, "learning_rate": 3.8635000000000005e-05, "loss": 0.3698, "step": 7731 }, { "epoch": 0.4329712173815657, "grad_norm": 1.129077672958374, "learning_rate": 3.864e-05, "loss": 0.4306, "step": 7732 }, { "epoch": 0.4330272146936947, "grad_norm": 1.4392932653427124, "learning_rate": 3.8645e-05, "loss": 0.4341, "step": 7733 }, { "epoch": 0.4330832120058237, "grad_norm": 1.3381160497665405, "learning_rate": 3.8650000000000004e-05, "loss": 0.3719, "step": 7734 }, { "epoch": 0.43313920931795274, "grad_norm": 1.306389570236206, "learning_rate": 3.8655e-05, "loss": 0.6106, "step": 7735 }, { "epoch": 0.43319520663008176, "grad_norm": 1.388902187347412, "learning_rate": 3.866e-05, "loss": 0.8112, "step": 7736 }, { "epoch": 0.4332512039422108, "grad_norm": 1.2088311910629272, "learning_rate": 3.8665e-05, "loss": 0.5203, "step": 7737 }, { "epoch": 0.4333072012543398, "grad_norm": 1.1478239297866821, "learning_rate": 3.867e-05, "loss": 0.4369, "step": 7738 }, { "epoch": 0.4333631985664688, "grad_norm": 1.2467337846755981, "learning_rate": 3.8675e-05, "loss": 0.5301, "step": 7739 }, { "epoch": 0.43341919587859784, "grad_norm": 1.1686300039291382, "learning_rate": 3.868e-05, "loss": 0.395, "step": 7740 }, { "epoch": 0.43347519319072686, "grad_norm": 1.4347566366195679, "learning_rate": 3.8685000000000007e-05, "loss": 0.4974, "step": 7741 }, { "epoch": 0.4335311905028559, "grad_norm": 1.3105829954147339, "learning_rate": 3.8690000000000004e-05, "loss": 0.433, "step": 7742 }, { "epoch": 0.4335871878149849, "grad_norm": 1.149224042892456, "learning_rate": 3.8695e-05, "loss": 0.5755, "step": 7743 }, { "epoch": 0.4336431851271139, "grad_norm": 1.3344298601150513, "learning_rate": 3.8700000000000006e-05, "loss": 0.5189, "step": 7744 }, { "epoch": 0.43369918243924294, "grad_norm": 1.017473816871643, "learning_rate": 3.8705e-05, "loss": 0.3963, "step": 7745 }, { "epoch": 0.43375517975137196, "grad_norm": 1.1155141592025757, "learning_rate": 3.871e-05, "loss": 0.389, "step": 7746 }, { "epoch": 0.433811177063501, "grad_norm": 1.467545986175537, "learning_rate": 3.8715000000000005e-05, "loss": 0.4094, "step": 7747 }, { "epoch": 0.43386717437563, "grad_norm": 1.255529522895813, "learning_rate": 3.872e-05, "loss": 0.4239, "step": 7748 }, { "epoch": 0.433923171687759, "grad_norm": 1.2360903024673462, "learning_rate": 3.8725e-05, "loss": 0.4332, "step": 7749 }, { "epoch": 0.43397916899988803, "grad_norm": 1.3143188953399658, "learning_rate": 3.873e-05, "loss": 0.4957, "step": 7750 }, { "epoch": 0.434035166312017, "grad_norm": 1.3152927160263062, "learning_rate": 3.873500000000001e-05, "loss": 0.4215, "step": 7751 }, { "epoch": 0.434091163624146, "grad_norm": 1.1224337816238403, "learning_rate": 3.8740000000000005e-05, "loss": 0.3767, "step": 7752 }, { "epoch": 0.43414716093627503, "grad_norm": 1.1857683658599854, "learning_rate": 3.8745e-05, "loss": 0.4179, "step": 7753 }, { "epoch": 0.43420315824840405, "grad_norm": 1.1691921949386597, "learning_rate": 3.875e-05, "loss": 0.3126, "step": 7754 }, { "epoch": 0.43425915556053307, "grad_norm": 1.1144546270370483, "learning_rate": 3.8755000000000004e-05, "loss": 0.4183, "step": 7755 }, { "epoch": 0.4343151528726621, "grad_norm": 1.328009009361267, "learning_rate": 3.876e-05, "loss": 0.5218, "step": 7756 }, { "epoch": 0.4343711501847911, "grad_norm": 1.1573644876480103, "learning_rate": 3.8765e-05, "loss": 0.3924, "step": 7757 }, { "epoch": 0.43442714749692013, "grad_norm": 1.4506826400756836, "learning_rate": 3.877e-05, "loss": 0.5673, "step": 7758 }, { "epoch": 0.43448314480904915, "grad_norm": 1.173905611038208, "learning_rate": 3.8775e-05, "loss": 0.4158, "step": 7759 }, { "epoch": 0.43453914212117817, "grad_norm": 1.3109742403030396, "learning_rate": 3.878e-05, "loss": 0.3942, "step": 7760 }, { "epoch": 0.4345951394333072, "grad_norm": 1.3459542989730835, "learning_rate": 3.8785e-05, "loss": 0.4062, "step": 7761 }, { "epoch": 0.4346511367454362, "grad_norm": 1.3858788013458252, "learning_rate": 3.8790000000000006e-05, "loss": 0.4552, "step": 7762 }, { "epoch": 0.4347071340575652, "grad_norm": 1.2540960311889648, "learning_rate": 3.8795000000000004e-05, "loss": 0.5135, "step": 7763 }, { "epoch": 0.43476313136969424, "grad_norm": 1.2480825185775757, "learning_rate": 3.88e-05, "loss": 0.5709, "step": 7764 }, { "epoch": 0.43481912868182326, "grad_norm": 1.3746126890182495, "learning_rate": 3.8805000000000005e-05, "loss": 0.349, "step": 7765 }, { "epoch": 0.4348751259939523, "grad_norm": 1.1591088771820068, "learning_rate": 3.881e-05, "loss": 0.3539, "step": 7766 }, { "epoch": 0.4349311233060813, "grad_norm": 1.2983176708221436, "learning_rate": 3.8815e-05, "loss": 0.5045, "step": 7767 }, { "epoch": 0.4349871206182103, "grad_norm": 1.2469282150268555, "learning_rate": 3.882e-05, "loss": 0.3675, "step": 7768 }, { "epoch": 0.43504311793033934, "grad_norm": 1.2342485189437866, "learning_rate": 3.8825e-05, "loss": 0.4768, "step": 7769 }, { "epoch": 0.43509911524246836, "grad_norm": 1.05726957321167, "learning_rate": 3.883e-05, "loss": 0.3183, "step": 7770 }, { "epoch": 0.4351551125545974, "grad_norm": 1.0779683589935303, "learning_rate": 3.8835e-05, "loss": 0.3627, "step": 7771 }, { "epoch": 0.4352111098667264, "grad_norm": 1.2601139545440674, "learning_rate": 3.884e-05, "loss": 0.3713, "step": 7772 }, { "epoch": 0.4352671071788554, "grad_norm": 1.1393673419952393, "learning_rate": 3.8845000000000005e-05, "loss": 0.3637, "step": 7773 }, { "epoch": 0.43532310449098444, "grad_norm": 1.3326117992401123, "learning_rate": 3.885e-05, "loss": 0.311, "step": 7774 }, { "epoch": 0.43537910180311346, "grad_norm": 1.242078423500061, "learning_rate": 3.8855e-05, "loss": 0.4623, "step": 7775 }, { "epoch": 0.4354350991152425, "grad_norm": 1.4016425609588623, "learning_rate": 3.8860000000000004e-05, "loss": 0.3817, "step": 7776 }, { "epoch": 0.4354910964273715, "grad_norm": 1.1242419481277466, "learning_rate": 3.8865e-05, "loss": 0.3917, "step": 7777 }, { "epoch": 0.4355470937395005, "grad_norm": 1.3407025337219238, "learning_rate": 3.887e-05, "loss": 0.3847, "step": 7778 }, { "epoch": 0.43560309105162953, "grad_norm": 1.1474888324737549, "learning_rate": 3.8875e-05, "loss": 0.3273, "step": 7779 }, { "epoch": 0.43565908836375855, "grad_norm": 1.417089819908142, "learning_rate": 3.888e-05, "loss": 0.5501, "step": 7780 }, { "epoch": 0.43571508567588757, "grad_norm": 1.3149428367614746, "learning_rate": 3.8885e-05, "loss": 0.6506, "step": 7781 }, { "epoch": 0.4357710829880166, "grad_norm": 1.202264666557312, "learning_rate": 3.889e-05, "loss": 0.5165, "step": 7782 }, { "epoch": 0.4358270803001456, "grad_norm": 1.2116482257843018, "learning_rate": 3.8895000000000006e-05, "loss": 0.4614, "step": 7783 }, { "epoch": 0.4358830776122746, "grad_norm": 1.240391492843628, "learning_rate": 3.8900000000000004e-05, "loss": 0.4139, "step": 7784 }, { "epoch": 0.43593907492440365, "grad_norm": 1.3046212196350098, "learning_rate": 3.8905e-05, "loss": 0.3925, "step": 7785 }, { "epoch": 0.43599507223653267, "grad_norm": 1.100469708442688, "learning_rate": 3.8910000000000005e-05, "loss": 0.452, "step": 7786 }, { "epoch": 0.4360510695486617, "grad_norm": 1.89679753780365, "learning_rate": 3.8915e-05, "loss": 0.685, "step": 7787 }, { "epoch": 0.4361070668607907, "grad_norm": 1.1426291465759277, "learning_rate": 3.892e-05, "loss": 0.4713, "step": 7788 }, { "epoch": 0.4361630641729197, "grad_norm": 1.0942741632461548, "learning_rate": 3.8925e-05, "loss": 0.363, "step": 7789 }, { "epoch": 0.43621906148504874, "grad_norm": 1.3065084218978882, "learning_rate": 3.893e-05, "loss": 0.435, "step": 7790 }, { "epoch": 0.43627505879717776, "grad_norm": 1.1707557439804077, "learning_rate": 3.8935e-05, "loss": 0.476, "step": 7791 }, { "epoch": 0.4363310561093068, "grad_norm": 1.4024662971496582, "learning_rate": 3.894e-05, "loss": 0.4503, "step": 7792 }, { "epoch": 0.43638705342143574, "grad_norm": 1.111179232597351, "learning_rate": 3.8945e-05, "loss": 0.4734, "step": 7793 }, { "epoch": 0.43644305073356476, "grad_norm": 1.2108148336410522, "learning_rate": 3.8950000000000005e-05, "loss": 0.4128, "step": 7794 }, { "epoch": 0.4364990480456938, "grad_norm": 1.275070071220398, "learning_rate": 3.8955e-05, "loss": 0.3925, "step": 7795 }, { "epoch": 0.4365550453578228, "grad_norm": 1.3724416494369507, "learning_rate": 3.896e-05, "loss": 0.4165, "step": 7796 }, { "epoch": 0.4366110426699518, "grad_norm": 1.3684978485107422, "learning_rate": 3.8965000000000004e-05, "loss": 0.4433, "step": 7797 }, { "epoch": 0.43666703998208084, "grad_norm": 1.1701958179473877, "learning_rate": 3.897e-05, "loss": 0.4411, "step": 7798 }, { "epoch": 0.43672303729420986, "grad_norm": 1.102452278137207, "learning_rate": 3.8975e-05, "loss": 0.392, "step": 7799 }, { "epoch": 0.4367790346063389, "grad_norm": 1.1435978412628174, "learning_rate": 3.898e-05, "loss": 0.3787, "step": 7800 }, { "epoch": 0.4368350319184679, "grad_norm": 1.4434677362442017, "learning_rate": 3.8985e-05, "loss": 0.5624, "step": 7801 }, { "epoch": 0.4368910292305969, "grad_norm": 1.0614144802093506, "learning_rate": 3.8990000000000004e-05, "loss": 0.3437, "step": 7802 }, { "epoch": 0.43694702654272594, "grad_norm": 1.2822693586349487, "learning_rate": 3.8995e-05, "loss": 0.4794, "step": 7803 }, { "epoch": 0.43700302385485496, "grad_norm": 1.189255714416504, "learning_rate": 3.9000000000000006e-05, "loss": 0.3455, "step": 7804 }, { "epoch": 0.437059021166984, "grad_norm": 1.1027987003326416, "learning_rate": 3.9005000000000003e-05, "loss": 0.5032, "step": 7805 }, { "epoch": 0.437115018479113, "grad_norm": 1.2029386758804321, "learning_rate": 3.901e-05, "loss": 0.3261, "step": 7806 }, { "epoch": 0.437171015791242, "grad_norm": 1.1270817518234253, "learning_rate": 3.9015e-05, "loss": 0.393, "step": 7807 }, { "epoch": 0.43722701310337103, "grad_norm": 1.2903618812561035, "learning_rate": 3.902e-05, "loss": 0.3469, "step": 7808 }, { "epoch": 0.43728301041550005, "grad_norm": 1.3289403915405273, "learning_rate": 3.9025e-05, "loss": 0.4608, "step": 7809 }, { "epoch": 0.43733900772762907, "grad_norm": 1.1192606687545776, "learning_rate": 3.903e-05, "loss": 0.4093, "step": 7810 }, { "epoch": 0.4373950050397581, "grad_norm": 1.3133131265640259, "learning_rate": 3.9035e-05, "loss": 0.3993, "step": 7811 }, { "epoch": 0.4374510023518871, "grad_norm": 1.2583736181259155, "learning_rate": 3.9040000000000006e-05, "loss": 0.4404, "step": 7812 }, { "epoch": 0.4375069996640161, "grad_norm": 0.9704046845436096, "learning_rate": 3.9045e-05, "loss": 0.3489, "step": 7813 }, { "epoch": 0.43756299697614515, "grad_norm": 1.8509451150894165, "learning_rate": 3.905e-05, "loss": 0.4609, "step": 7814 }, { "epoch": 0.43761899428827417, "grad_norm": 1.298453688621521, "learning_rate": 3.9055000000000005e-05, "loss": 0.4883, "step": 7815 }, { "epoch": 0.4376749916004032, "grad_norm": 1.572828769683838, "learning_rate": 3.906e-05, "loss": 0.665, "step": 7816 }, { "epoch": 0.4377309889125322, "grad_norm": 1.1312446594238281, "learning_rate": 3.9065e-05, "loss": 0.368, "step": 7817 }, { "epoch": 0.4377869862246612, "grad_norm": 1.2823508977890015, "learning_rate": 3.9070000000000004e-05, "loss": 0.4562, "step": 7818 }, { "epoch": 0.43784298353679024, "grad_norm": 1.3399925231933594, "learning_rate": 3.9075e-05, "loss": 0.4222, "step": 7819 }, { "epoch": 0.43789898084891926, "grad_norm": 1.2265499830245972, "learning_rate": 3.908e-05, "loss": 0.3734, "step": 7820 }, { "epoch": 0.4379549781610483, "grad_norm": 1.4076920747756958, "learning_rate": 3.9085e-05, "loss": 0.3937, "step": 7821 }, { "epoch": 0.4380109754731773, "grad_norm": 1.2663395404815674, "learning_rate": 3.909000000000001e-05, "loss": 0.3566, "step": 7822 }, { "epoch": 0.4380669727853063, "grad_norm": 1.4407652616500854, "learning_rate": 3.9095000000000004e-05, "loss": 0.4386, "step": 7823 }, { "epoch": 0.43812297009743534, "grad_norm": 1.1476681232452393, "learning_rate": 3.91e-05, "loss": 0.3476, "step": 7824 }, { "epoch": 0.43817896740956436, "grad_norm": 1.0374912023544312, "learning_rate": 3.9105000000000006e-05, "loss": 0.3338, "step": 7825 }, { "epoch": 0.4382349647216934, "grad_norm": 1.0831481218338013, "learning_rate": 3.911e-05, "loss": 0.4214, "step": 7826 }, { "epoch": 0.4382909620338224, "grad_norm": 1.1083035469055176, "learning_rate": 3.9115e-05, "loss": 0.318, "step": 7827 }, { "epoch": 0.4383469593459514, "grad_norm": 1.2883092164993286, "learning_rate": 3.912e-05, "loss": 0.4401, "step": 7828 }, { "epoch": 0.43840295665808043, "grad_norm": 1.313269019126892, "learning_rate": 3.9125e-05, "loss": 0.4767, "step": 7829 }, { "epoch": 0.43845895397020945, "grad_norm": 1.169537901878357, "learning_rate": 3.913e-05, "loss": 0.3928, "step": 7830 }, { "epoch": 0.43851495128233847, "grad_norm": 1.2053309679031372, "learning_rate": 3.9135e-05, "loss": 0.4535, "step": 7831 }, { "epoch": 0.4385709485944675, "grad_norm": 1.3004990816116333, "learning_rate": 3.914e-05, "loss": 0.4291, "step": 7832 }, { "epoch": 0.4386269459065965, "grad_norm": 1.3423177003860474, "learning_rate": 3.9145000000000006e-05, "loss": 0.3814, "step": 7833 }, { "epoch": 0.4386829432187255, "grad_norm": 1.397086262702942, "learning_rate": 3.915e-05, "loss": 0.423, "step": 7834 }, { "epoch": 0.4387389405308545, "grad_norm": 1.1477322578430176, "learning_rate": 3.9155e-05, "loss": 0.451, "step": 7835 }, { "epoch": 0.4387949378429835, "grad_norm": 1.1389315128326416, "learning_rate": 3.9160000000000005e-05, "loss": 0.4046, "step": 7836 }, { "epoch": 0.43885093515511253, "grad_norm": 1.3055751323699951, "learning_rate": 3.9165e-05, "loss": 0.4471, "step": 7837 }, { "epoch": 0.43890693246724155, "grad_norm": 1.1227158308029175, "learning_rate": 3.917e-05, "loss": 0.3757, "step": 7838 }, { "epoch": 0.43896292977937057, "grad_norm": 1.2383346557617188, "learning_rate": 3.9175000000000004e-05, "loss": 0.4505, "step": 7839 }, { "epoch": 0.4390189270914996, "grad_norm": 1.3493623733520508, "learning_rate": 3.918e-05, "loss": 0.5136, "step": 7840 }, { "epoch": 0.4390749244036286, "grad_norm": 1.250669240951538, "learning_rate": 3.9185e-05, "loss": 0.2715, "step": 7841 }, { "epoch": 0.4391309217157576, "grad_norm": 1.2727452516555786, "learning_rate": 3.919e-05, "loss": 0.4297, "step": 7842 }, { "epoch": 0.43918691902788665, "grad_norm": 1.1866668462753296, "learning_rate": 3.919500000000001e-05, "loss": 0.3604, "step": 7843 }, { "epoch": 0.43924291634001567, "grad_norm": 1.1657792329788208, "learning_rate": 3.9200000000000004e-05, "loss": 0.3871, "step": 7844 }, { "epoch": 0.4392989136521447, "grad_norm": 1.3171136379241943, "learning_rate": 3.9205e-05, "loss": 0.4378, "step": 7845 }, { "epoch": 0.4393549109642737, "grad_norm": 1.1899813413619995, "learning_rate": 3.921e-05, "loss": 0.3927, "step": 7846 }, { "epoch": 0.4394109082764027, "grad_norm": 1.1432589292526245, "learning_rate": 3.9215e-05, "loss": 0.3734, "step": 7847 }, { "epoch": 0.43946690558853174, "grad_norm": 1.1378377676010132, "learning_rate": 3.922e-05, "loss": 0.3443, "step": 7848 }, { "epoch": 0.43952290290066076, "grad_norm": 1.4463274478912354, "learning_rate": 3.9225e-05, "loss": 0.5706, "step": 7849 }, { "epoch": 0.4395789002127898, "grad_norm": 1.6434000730514526, "learning_rate": 3.923e-05, "loss": 0.5424, "step": 7850 }, { "epoch": 0.4396348975249188, "grad_norm": 1.0726593732833862, "learning_rate": 3.9235e-05, "loss": 0.3867, "step": 7851 }, { "epoch": 0.4396908948370478, "grad_norm": 1.233354926109314, "learning_rate": 3.9240000000000004e-05, "loss": 0.4477, "step": 7852 }, { "epoch": 0.43974689214917684, "grad_norm": 1.4947172403335571, "learning_rate": 3.9245e-05, "loss": 0.3989, "step": 7853 }, { "epoch": 0.43980288946130586, "grad_norm": 1.1785705089569092, "learning_rate": 3.9250000000000005e-05, "loss": 0.343, "step": 7854 }, { "epoch": 0.4398588867734349, "grad_norm": 1.3784432411193848, "learning_rate": 3.9255e-05, "loss": 0.4166, "step": 7855 }, { "epoch": 0.4399148840855639, "grad_norm": 1.8765945434570312, "learning_rate": 3.926e-05, "loss": 0.4711, "step": 7856 }, { "epoch": 0.4399708813976929, "grad_norm": 1.1780389547348022, "learning_rate": 3.9265000000000004e-05, "loss": 0.437, "step": 7857 }, { "epoch": 0.44002687870982193, "grad_norm": 1.1090939044952393, "learning_rate": 3.927e-05, "loss": 0.3735, "step": 7858 }, { "epoch": 0.44008287602195095, "grad_norm": 1.3783758878707886, "learning_rate": 3.9275e-05, "loss": 0.521, "step": 7859 }, { "epoch": 0.44013887333407997, "grad_norm": 1.1683554649353027, "learning_rate": 3.9280000000000003e-05, "loss": 0.4162, "step": 7860 }, { "epoch": 0.440194870646209, "grad_norm": 1.0943423509597778, "learning_rate": 3.9285e-05, "loss": 0.5115, "step": 7861 }, { "epoch": 0.440250867958338, "grad_norm": 1.2970889806747437, "learning_rate": 3.9290000000000005e-05, "loss": 0.5817, "step": 7862 }, { "epoch": 0.44030686527046703, "grad_norm": 1.1418607234954834, "learning_rate": 3.9295e-05, "loss": 0.3782, "step": 7863 }, { "epoch": 0.44036286258259605, "grad_norm": 1.0735609531402588, "learning_rate": 3.9300000000000007e-05, "loss": 0.3981, "step": 7864 }, { "epoch": 0.44041885989472507, "grad_norm": 1.6328279972076416, "learning_rate": 3.9305000000000004e-05, "loss": 0.4539, "step": 7865 }, { "epoch": 0.4404748572068541, "grad_norm": 1.295376181602478, "learning_rate": 3.931e-05, "loss": 0.46, "step": 7866 }, { "epoch": 0.4405308545189831, "grad_norm": 1.2095136642456055, "learning_rate": 3.9315e-05, "loss": 0.3505, "step": 7867 }, { "epoch": 0.4405868518311121, "grad_norm": 1.2632331848144531, "learning_rate": 3.932e-05, "loss": 0.5245, "step": 7868 }, { "epoch": 0.44064284914324114, "grad_norm": 1.3711323738098145, "learning_rate": 3.9325e-05, "loss": 0.4808, "step": 7869 }, { "epoch": 0.44069884645537016, "grad_norm": 1.4462391138076782, "learning_rate": 3.933e-05, "loss": 0.4866, "step": 7870 }, { "epoch": 0.4407548437674992, "grad_norm": 1.1605569124221802, "learning_rate": 3.9335e-05, "loss": 0.4799, "step": 7871 }, { "epoch": 0.4408108410796282, "grad_norm": 1.3611857891082764, "learning_rate": 3.9340000000000006e-05, "loss": 0.314, "step": 7872 }, { "epoch": 0.4408668383917572, "grad_norm": 1.3676567077636719, "learning_rate": 3.9345000000000004e-05, "loss": 0.4342, "step": 7873 }, { "epoch": 0.44092283570388624, "grad_norm": 1.602771282196045, "learning_rate": 3.935e-05, "loss": 0.5213, "step": 7874 }, { "epoch": 0.4409788330160152, "grad_norm": 1.1253604888916016, "learning_rate": 3.9355000000000005e-05, "loss": 0.3175, "step": 7875 }, { "epoch": 0.4410348303281442, "grad_norm": 1.1032730340957642, "learning_rate": 3.936e-05, "loss": 0.3832, "step": 7876 }, { "epoch": 0.44109082764027324, "grad_norm": 1.2083414793014526, "learning_rate": 3.9365e-05, "loss": 0.321, "step": 7877 }, { "epoch": 0.44114682495240226, "grad_norm": 1.108339548110962, "learning_rate": 3.9370000000000004e-05, "loss": 0.3668, "step": 7878 }, { "epoch": 0.4412028222645313, "grad_norm": 1.3832827806472778, "learning_rate": 3.9375e-05, "loss": 0.3733, "step": 7879 }, { "epoch": 0.4412588195766603, "grad_norm": 1.0996882915496826, "learning_rate": 3.938e-05, "loss": 0.365, "step": 7880 }, { "epoch": 0.4413148168887893, "grad_norm": 1.3138083219528198, "learning_rate": 3.9384999999999996e-05, "loss": 0.46, "step": 7881 }, { "epoch": 0.44137081420091834, "grad_norm": 2.8446733951568604, "learning_rate": 3.939e-05, "loss": 0.4353, "step": 7882 }, { "epoch": 0.44142681151304736, "grad_norm": 1.1881431341171265, "learning_rate": 3.9395000000000005e-05, "loss": 0.4411, "step": 7883 }, { "epoch": 0.4414828088251764, "grad_norm": 1.2280796766281128, "learning_rate": 3.94e-05, "loss": 0.568, "step": 7884 }, { "epoch": 0.4415388061373054, "grad_norm": 1.5344942808151245, "learning_rate": 3.9405e-05, "loss": 0.381, "step": 7885 }, { "epoch": 0.4415948034494344, "grad_norm": 1.349732756614685, "learning_rate": 3.9410000000000004e-05, "loss": 0.4975, "step": 7886 }, { "epoch": 0.44165080076156343, "grad_norm": 1.2575490474700928, "learning_rate": 3.9415e-05, "loss": 0.4531, "step": 7887 }, { "epoch": 0.44170679807369245, "grad_norm": 1.1180615425109863, "learning_rate": 3.942e-05, "loss": 0.4269, "step": 7888 }, { "epoch": 0.44176279538582147, "grad_norm": 1.3497393131256104, "learning_rate": 3.9425e-05, "loss": 0.5359, "step": 7889 }, { "epoch": 0.4418187926979505, "grad_norm": 1.1241097450256348, "learning_rate": 3.943e-05, "loss": 0.463, "step": 7890 }, { "epoch": 0.4418747900100795, "grad_norm": 1.443230390548706, "learning_rate": 3.9435e-05, "loss": 0.6691, "step": 7891 }, { "epoch": 0.44193078732220853, "grad_norm": 1.1160078048706055, "learning_rate": 3.944e-05, "loss": 0.3997, "step": 7892 }, { "epoch": 0.44198678463433755, "grad_norm": 1.2423415184020996, "learning_rate": 3.9445000000000006e-05, "loss": 0.4094, "step": 7893 }, { "epoch": 0.44204278194646657, "grad_norm": 1.3356132507324219, "learning_rate": 3.9450000000000003e-05, "loss": 0.4605, "step": 7894 }, { "epoch": 0.4420987792585956, "grad_norm": 1.4842458963394165, "learning_rate": 3.9455e-05, "loss": 0.4993, "step": 7895 }, { "epoch": 0.4421547765707246, "grad_norm": 1.5475319623947144, "learning_rate": 3.9460000000000005e-05, "loss": 0.3421, "step": 7896 }, { "epoch": 0.4422107738828536, "grad_norm": 1.338922381401062, "learning_rate": 3.9465e-05, "loss": 0.4591, "step": 7897 }, { "epoch": 0.44226677119498264, "grad_norm": 1.264709234237671, "learning_rate": 3.947e-05, "loss": 0.5858, "step": 7898 }, { "epoch": 0.44232276850711166, "grad_norm": 1.154921293258667, "learning_rate": 3.9475000000000004e-05, "loss": 0.4254, "step": 7899 }, { "epoch": 0.4423787658192407, "grad_norm": 1.3502569198608398, "learning_rate": 3.948e-05, "loss": 0.4737, "step": 7900 }, { "epoch": 0.4424347631313697, "grad_norm": 1.159049153327942, "learning_rate": 3.9485e-05, "loss": 0.4563, "step": 7901 }, { "epoch": 0.4424907604434987, "grad_norm": 1.2016639709472656, "learning_rate": 3.9489999999999996e-05, "loss": 0.5038, "step": 7902 }, { "epoch": 0.44254675775562774, "grad_norm": 1.406581997871399, "learning_rate": 3.949500000000001e-05, "loss": 0.5906, "step": 7903 }, { "epoch": 0.44260275506775676, "grad_norm": 1.1897046566009521, "learning_rate": 3.9500000000000005e-05, "loss": 0.4028, "step": 7904 }, { "epoch": 0.4426587523798858, "grad_norm": 1.1853468418121338, "learning_rate": 3.9505e-05, "loss": 0.3767, "step": 7905 }, { "epoch": 0.4427147496920148, "grad_norm": 1.3921821117401123, "learning_rate": 3.951e-05, "loss": 0.4843, "step": 7906 }, { "epoch": 0.4427707470041438, "grad_norm": 1.3302898406982422, "learning_rate": 3.9515000000000004e-05, "loss": 0.4561, "step": 7907 }, { "epoch": 0.44282674431627284, "grad_norm": 1.3814650774002075, "learning_rate": 3.952e-05, "loss": 0.4598, "step": 7908 }, { "epoch": 0.44288274162840185, "grad_norm": 1.281061053276062, "learning_rate": 3.9525e-05, "loss": 0.4592, "step": 7909 }, { "epoch": 0.4429387389405309, "grad_norm": 1.0565319061279297, "learning_rate": 3.953e-05, "loss": 0.3522, "step": 7910 }, { "epoch": 0.4429947362526599, "grad_norm": 1.2696032524108887, "learning_rate": 3.9535e-05, "loss": 0.4059, "step": 7911 }, { "epoch": 0.4430507335647889, "grad_norm": 1.0670174360275269, "learning_rate": 3.954e-05, "loss": 0.3732, "step": 7912 }, { "epoch": 0.44310673087691793, "grad_norm": 1.371474027633667, "learning_rate": 3.9545e-05, "loss": 0.4391, "step": 7913 }, { "epoch": 0.44316272818904695, "grad_norm": 1.1317119598388672, "learning_rate": 3.9550000000000006e-05, "loss": 0.3645, "step": 7914 }, { "epoch": 0.44321872550117597, "grad_norm": 1.0873996019363403, "learning_rate": 3.9555e-05, "loss": 0.4618, "step": 7915 }, { "epoch": 0.443274722813305, "grad_norm": 1.397986888885498, "learning_rate": 3.956e-05, "loss": 0.539, "step": 7916 }, { "epoch": 0.44333072012543395, "grad_norm": 1.3856488466262817, "learning_rate": 3.9565000000000005e-05, "loss": 0.6002, "step": 7917 }, { "epoch": 0.44338671743756297, "grad_norm": 1.1605008840560913, "learning_rate": 3.957e-05, "loss": 0.3676, "step": 7918 }, { "epoch": 0.443442714749692, "grad_norm": 1.2629355192184448, "learning_rate": 3.9575e-05, "loss": 0.3796, "step": 7919 }, { "epoch": 0.443498712061821, "grad_norm": 1.28914213180542, "learning_rate": 3.958e-05, "loss": 0.4858, "step": 7920 }, { "epoch": 0.44355470937395003, "grad_norm": 1.1475467681884766, "learning_rate": 3.9585e-05, "loss": 0.3757, "step": 7921 }, { "epoch": 0.44361070668607905, "grad_norm": 1.3344305753707886, "learning_rate": 3.959e-05, "loss": 0.5062, "step": 7922 }, { "epoch": 0.44366670399820807, "grad_norm": 1.1459726095199585, "learning_rate": 3.9595e-05, "loss": 0.3602, "step": 7923 }, { "epoch": 0.4437227013103371, "grad_norm": 1.5019686222076416, "learning_rate": 3.960000000000001e-05, "loss": 0.4824, "step": 7924 }, { "epoch": 0.4437786986224661, "grad_norm": 1.6637812852859497, "learning_rate": 3.9605000000000005e-05, "loss": 0.5459, "step": 7925 }, { "epoch": 0.4438346959345951, "grad_norm": 1.2453186511993408, "learning_rate": 3.961e-05, "loss": 0.3356, "step": 7926 }, { "epoch": 0.44389069324672414, "grad_norm": 1.4240748882293701, "learning_rate": 3.9615e-05, "loss": 0.3513, "step": 7927 }, { "epoch": 0.44394669055885316, "grad_norm": 1.5543899536132812, "learning_rate": 3.9620000000000004e-05, "loss": 0.4535, "step": 7928 }, { "epoch": 0.4440026878709822, "grad_norm": 1.4476829767227173, "learning_rate": 3.9625e-05, "loss": 0.4958, "step": 7929 }, { "epoch": 0.4440586851831112, "grad_norm": 1.2234768867492676, "learning_rate": 3.963e-05, "loss": 0.3737, "step": 7930 }, { "epoch": 0.4441146824952402, "grad_norm": 1.4079090356826782, "learning_rate": 3.9635e-05, "loss": 0.4697, "step": 7931 }, { "epoch": 0.44417067980736924, "grad_norm": 1.09403657913208, "learning_rate": 3.964e-05, "loss": 0.3721, "step": 7932 }, { "epoch": 0.44422667711949826, "grad_norm": 1.1749703884124756, "learning_rate": 3.9645000000000004e-05, "loss": 0.3767, "step": 7933 }, { "epoch": 0.4442826744316273, "grad_norm": 1.252769112586975, "learning_rate": 3.965e-05, "loss": 0.3794, "step": 7934 }, { "epoch": 0.4443386717437563, "grad_norm": 1.4599592685699463, "learning_rate": 3.9655000000000006e-05, "loss": 0.394, "step": 7935 }, { "epoch": 0.4443946690558853, "grad_norm": 1.324910044670105, "learning_rate": 3.966e-05, "loss": 0.4233, "step": 7936 }, { "epoch": 0.44445066636801434, "grad_norm": 1.1552929878234863, "learning_rate": 3.9665e-05, "loss": 0.482, "step": 7937 }, { "epoch": 0.44450666368014335, "grad_norm": 1.4286192655563354, "learning_rate": 3.9670000000000005e-05, "loss": 0.3971, "step": 7938 }, { "epoch": 0.4445626609922724, "grad_norm": 1.4314066171646118, "learning_rate": 3.9675e-05, "loss": 0.5845, "step": 7939 }, { "epoch": 0.4446186583044014, "grad_norm": 1.1849724054336548, "learning_rate": 3.968e-05, "loss": 0.3866, "step": 7940 }, { "epoch": 0.4446746556165304, "grad_norm": 1.1440964937210083, "learning_rate": 3.9685e-05, "loss": 0.5633, "step": 7941 }, { "epoch": 0.44473065292865943, "grad_norm": 1.0037082433700562, "learning_rate": 3.969e-05, "loss": 0.3313, "step": 7942 }, { "epoch": 0.44478665024078845, "grad_norm": 1.0694080591201782, "learning_rate": 3.9695000000000005e-05, "loss": 0.4466, "step": 7943 }, { "epoch": 0.44484264755291747, "grad_norm": 1.5107334852218628, "learning_rate": 3.97e-05, "loss": 0.5122, "step": 7944 }, { "epoch": 0.4448986448650465, "grad_norm": 1.397767424583435, "learning_rate": 3.9705e-05, "loss": 0.4112, "step": 7945 }, { "epoch": 0.4449546421771755, "grad_norm": 1.3833537101745605, "learning_rate": 3.9710000000000004e-05, "loss": 0.3841, "step": 7946 }, { "epoch": 0.4450106394893045, "grad_norm": 1.301817536354065, "learning_rate": 3.9715e-05, "loss": 0.5478, "step": 7947 }, { "epoch": 0.44506663680143355, "grad_norm": 1.5289353132247925, "learning_rate": 3.972e-05, "loss": 0.4269, "step": 7948 }, { "epoch": 0.44512263411356257, "grad_norm": 1.3700004816055298, "learning_rate": 3.9725e-05, "loss": 0.5458, "step": 7949 }, { "epoch": 0.4451786314256916, "grad_norm": 1.2713686227798462, "learning_rate": 3.973e-05, "loss": 0.4822, "step": 7950 }, { "epoch": 0.4452346287378206, "grad_norm": 1.1312288045883179, "learning_rate": 3.9735e-05, "loss": 0.3152, "step": 7951 }, { "epoch": 0.4452906260499496, "grad_norm": 1.3867393732070923, "learning_rate": 3.974e-05, "loss": 0.3717, "step": 7952 }, { "epoch": 0.44534662336207864, "grad_norm": 1.2697700262069702, "learning_rate": 3.9745000000000007e-05, "loss": 0.659, "step": 7953 }, { "epoch": 0.44540262067420766, "grad_norm": 1.2105894088745117, "learning_rate": 3.9750000000000004e-05, "loss": 0.4468, "step": 7954 }, { "epoch": 0.4454586179863367, "grad_norm": 1.2118644714355469, "learning_rate": 3.9755e-05, "loss": 0.3919, "step": 7955 }, { "epoch": 0.4455146152984657, "grad_norm": 1.1268454790115356, "learning_rate": 3.9760000000000006e-05, "loss": 0.3192, "step": 7956 }, { "epoch": 0.4455706126105947, "grad_norm": 1.1013085842132568, "learning_rate": 3.9765e-05, "loss": 0.3714, "step": 7957 }, { "epoch": 0.4456266099227237, "grad_norm": 1.3739217519760132, "learning_rate": 3.977e-05, "loss": 0.4496, "step": 7958 }, { "epoch": 0.4456826072348527, "grad_norm": 0.9961612224578857, "learning_rate": 3.9775e-05, "loss": 0.3866, "step": 7959 }, { "epoch": 0.4457386045469817, "grad_norm": 1.3658822774887085, "learning_rate": 3.978e-05, "loss": 0.4917, "step": 7960 }, { "epoch": 0.44579460185911074, "grad_norm": 1.3278608322143555, "learning_rate": 3.9785e-05, "loss": 0.3447, "step": 7961 }, { "epoch": 0.44585059917123976, "grad_norm": 1.2886418104171753, "learning_rate": 3.979e-05, "loss": 0.4889, "step": 7962 }, { "epoch": 0.4459065964833688, "grad_norm": 1.2320998907089233, "learning_rate": 3.979500000000001e-05, "loss": 0.4547, "step": 7963 }, { "epoch": 0.4459625937954978, "grad_norm": 1.3106369972229004, "learning_rate": 3.9800000000000005e-05, "loss": 0.4884, "step": 7964 }, { "epoch": 0.4460185911076268, "grad_norm": 1.3171268701553345, "learning_rate": 3.9805e-05, "loss": 0.2768, "step": 7965 }, { "epoch": 0.44607458841975584, "grad_norm": 1.265672206878662, "learning_rate": 3.981e-05, "loss": 0.5514, "step": 7966 }, { "epoch": 0.44613058573188485, "grad_norm": 1.2978721857070923, "learning_rate": 3.9815000000000004e-05, "loss": 0.4264, "step": 7967 }, { "epoch": 0.4461865830440139, "grad_norm": 1.543387770652771, "learning_rate": 3.982e-05, "loss": 0.5642, "step": 7968 }, { "epoch": 0.4462425803561429, "grad_norm": 1.3579707145690918, "learning_rate": 3.9825e-05, "loss": 0.3669, "step": 7969 }, { "epoch": 0.4462985776682719, "grad_norm": 1.495080828666687, "learning_rate": 3.983e-05, "loss": 0.4153, "step": 7970 }, { "epoch": 0.44635457498040093, "grad_norm": 1.2408615350723267, "learning_rate": 3.9835e-05, "loss": 0.4271, "step": 7971 }, { "epoch": 0.44641057229252995, "grad_norm": 1.266782283782959, "learning_rate": 3.984e-05, "loss": 0.3797, "step": 7972 }, { "epoch": 0.44646656960465897, "grad_norm": 1.8094933032989502, "learning_rate": 3.9845e-05, "loss": 0.582, "step": 7973 }, { "epoch": 0.446522566916788, "grad_norm": 1.3468682765960693, "learning_rate": 3.9850000000000006e-05, "loss": 0.473, "step": 7974 }, { "epoch": 0.446578564228917, "grad_norm": 1.3554326295852661, "learning_rate": 3.9855000000000004e-05, "loss": 0.5442, "step": 7975 }, { "epoch": 0.446634561541046, "grad_norm": 1.2305324077606201, "learning_rate": 3.986e-05, "loss": 0.4173, "step": 7976 }, { "epoch": 0.44669055885317505, "grad_norm": 0.9923909902572632, "learning_rate": 3.9865000000000005e-05, "loss": 0.3104, "step": 7977 }, { "epoch": 0.44674655616530407, "grad_norm": 1.1111879348754883, "learning_rate": 3.987e-05, "loss": 0.4633, "step": 7978 }, { "epoch": 0.4468025534774331, "grad_norm": 1.341869592666626, "learning_rate": 3.9875e-05, "loss": 0.3594, "step": 7979 }, { "epoch": 0.4468585507895621, "grad_norm": 1.3253010511398315, "learning_rate": 3.988e-05, "loss": 0.5988, "step": 7980 }, { "epoch": 0.4469145481016911, "grad_norm": 1.386620044708252, "learning_rate": 3.9885e-05, "loss": 0.4883, "step": 7981 }, { "epoch": 0.44697054541382014, "grad_norm": 1.2293016910552979, "learning_rate": 3.989e-05, "loss": 0.4162, "step": 7982 }, { "epoch": 0.44702654272594916, "grad_norm": 1.0457274913787842, "learning_rate": 3.9895000000000003e-05, "loss": 0.3897, "step": 7983 }, { "epoch": 0.4470825400380782, "grad_norm": 1.4893736839294434, "learning_rate": 3.99e-05, "loss": 0.4356, "step": 7984 }, { "epoch": 0.4471385373502072, "grad_norm": 1.1966415643692017, "learning_rate": 3.9905000000000005e-05, "loss": 0.3838, "step": 7985 }, { "epoch": 0.4471945346623362, "grad_norm": 1.4571969509124756, "learning_rate": 3.991e-05, "loss": 0.5493, "step": 7986 }, { "epoch": 0.44725053197446524, "grad_norm": 1.3030469417572021, "learning_rate": 3.9915e-05, "loss": 0.6022, "step": 7987 }, { "epoch": 0.44730652928659426, "grad_norm": 1.2906101942062378, "learning_rate": 3.9920000000000004e-05, "loss": 0.6794, "step": 7988 }, { "epoch": 0.4473625265987233, "grad_norm": 1.244630217552185, "learning_rate": 3.9925e-05, "loss": 0.3808, "step": 7989 }, { "epoch": 0.4474185239108523, "grad_norm": 1.330586314201355, "learning_rate": 3.993e-05, "loss": 0.5, "step": 7990 }, { "epoch": 0.4474745212229813, "grad_norm": 1.4680848121643066, "learning_rate": 3.9935e-05, "loss": 0.4274, "step": 7991 }, { "epoch": 0.44753051853511033, "grad_norm": 1.2660781145095825, "learning_rate": 3.994e-05, "loss": 0.4114, "step": 7992 }, { "epoch": 0.44758651584723935, "grad_norm": 1.614398717880249, "learning_rate": 3.9945000000000005e-05, "loss": 0.5062, "step": 7993 }, { "epoch": 0.44764251315936837, "grad_norm": 1.0764151811599731, "learning_rate": 3.995e-05, "loss": 0.3746, "step": 7994 }, { "epoch": 0.4476985104714974, "grad_norm": 1.3683723211288452, "learning_rate": 3.9955000000000006e-05, "loss": 0.5567, "step": 7995 }, { "epoch": 0.4477545077836264, "grad_norm": 1.0107738971710205, "learning_rate": 3.9960000000000004e-05, "loss": 0.3768, "step": 7996 }, { "epoch": 0.44781050509575543, "grad_norm": 1.3045953512191772, "learning_rate": 3.9965e-05, "loss": 0.5264, "step": 7997 }, { "epoch": 0.44786650240788445, "grad_norm": 1.2756482362747192, "learning_rate": 3.9970000000000005e-05, "loss": 0.4646, "step": 7998 }, { "epoch": 0.4479224997200134, "grad_norm": 1.2149758338928223, "learning_rate": 3.9975e-05, "loss": 0.4615, "step": 7999 }, { "epoch": 0.44797849703214243, "grad_norm": 1.2461477518081665, "learning_rate": 3.998e-05, "loss": 0.4821, "step": 8000 }, { "epoch": 0.44803449434427145, "grad_norm": 1.3961334228515625, "learning_rate": 3.9985e-05, "loss": 0.4495, "step": 8001 }, { "epoch": 0.44809049165640047, "grad_norm": 1.2145496606826782, "learning_rate": 3.999e-05, "loss": 0.3813, "step": 8002 }, { "epoch": 0.4481464889685295, "grad_norm": 1.1947134733200073, "learning_rate": 3.9995000000000006e-05, "loss": 0.4029, "step": 8003 }, { "epoch": 0.4482024862806585, "grad_norm": 1.1587896347045898, "learning_rate": 4e-05, "loss": 0.4203, "step": 8004 }, { "epoch": 0.4482584835927875, "grad_norm": 1.4524353742599487, "learning_rate": 4.0005e-05, "loss": 0.5276, "step": 8005 }, { "epoch": 0.44831448090491655, "grad_norm": 1.0707982778549194, "learning_rate": 4.0010000000000005e-05, "loss": 0.3752, "step": 8006 }, { "epoch": 0.44837047821704556, "grad_norm": 1.0258558988571167, "learning_rate": 4.0015e-05, "loss": 0.3766, "step": 8007 }, { "epoch": 0.4484264755291746, "grad_norm": 1.292534589767456, "learning_rate": 4.002e-05, "loss": 0.4509, "step": 8008 }, { "epoch": 0.4484824728413036, "grad_norm": 1.4285045862197876, "learning_rate": 4.0025000000000004e-05, "loss": 0.4806, "step": 8009 }, { "epoch": 0.4485384701534326, "grad_norm": 1.1258915662765503, "learning_rate": 4.003e-05, "loss": 0.3962, "step": 8010 }, { "epoch": 0.44859446746556164, "grad_norm": 1.215669870376587, "learning_rate": 4.0035e-05, "loss": 0.5454, "step": 8011 }, { "epoch": 0.44865046477769066, "grad_norm": 1.133365273475647, "learning_rate": 4.004e-05, "loss": 0.4184, "step": 8012 }, { "epoch": 0.4487064620898197, "grad_norm": 1.6083720922470093, "learning_rate": 4.0045e-05, "loss": 0.4896, "step": 8013 }, { "epoch": 0.4487624594019487, "grad_norm": 1.1899293661117554, "learning_rate": 4.0050000000000004e-05, "loss": 0.4205, "step": 8014 }, { "epoch": 0.4488184567140777, "grad_norm": 1.310274600982666, "learning_rate": 4.0055e-05, "loss": 0.4943, "step": 8015 }, { "epoch": 0.44887445402620674, "grad_norm": 1.2298719882965088, "learning_rate": 4.0060000000000006e-05, "loss": 0.3656, "step": 8016 }, { "epoch": 0.44893045133833576, "grad_norm": 1.258176565170288, "learning_rate": 4.0065000000000003e-05, "loss": 0.3348, "step": 8017 }, { "epoch": 0.4489864486504648, "grad_norm": 1.1516302824020386, "learning_rate": 4.007e-05, "loss": 0.5148, "step": 8018 }, { "epoch": 0.4490424459625938, "grad_norm": 1.500391960144043, "learning_rate": 4.0075e-05, "loss": 0.4206, "step": 8019 }, { "epoch": 0.4490984432747228, "grad_norm": 1.087298035621643, "learning_rate": 4.008e-05, "loss": 0.3701, "step": 8020 }, { "epoch": 0.44915444058685183, "grad_norm": 1.2705739736557007, "learning_rate": 4.0085e-05, "loss": 0.4524, "step": 8021 }, { "epoch": 0.44921043789898085, "grad_norm": 1.171284794807434, "learning_rate": 4.009e-05, "loss": 0.3866, "step": 8022 }, { "epoch": 0.44926643521110987, "grad_norm": 1.157090425491333, "learning_rate": 4.0095e-05, "loss": 0.4116, "step": 8023 }, { "epoch": 0.4493224325232389, "grad_norm": 1.1388421058654785, "learning_rate": 4.0100000000000006e-05, "loss": 0.4194, "step": 8024 }, { "epoch": 0.4493784298353679, "grad_norm": 1.231940746307373, "learning_rate": 4.0105e-05, "loss": 0.3605, "step": 8025 }, { "epoch": 0.44943442714749693, "grad_norm": 1.280762791633606, "learning_rate": 4.011e-05, "loss": 0.5177, "step": 8026 }, { "epoch": 0.44949042445962595, "grad_norm": 1.1017980575561523, "learning_rate": 4.0115000000000005e-05, "loss": 0.3673, "step": 8027 }, { "epoch": 0.44954642177175497, "grad_norm": 1.2504467964172363, "learning_rate": 4.012e-05, "loss": 0.512, "step": 8028 }, { "epoch": 0.449602419083884, "grad_norm": 1.2567552328109741, "learning_rate": 4.0125e-05, "loss": 0.3979, "step": 8029 }, { "epoch": 0.449658416396013, "grad_norm": 1.2547028064727783, "learning_rate": 4.0130000000000004e-05, "loss": 0.429, "step": 8030 }, { "epoch": 0.449714413708142, "grad_norm": 1.2034847736358643, "learning_rate": 4.0135e-05, "loss": 0.4543, "step": 8031 }, { "epoch": 0.44977041102027104, "grad_norm": 1.3718706369400024, "learning_rate": 4.014e-05, "loss": 0.4695, "step": 8032 }, { "epoch": 0.44982640833240006, "grad_norm": 1.1253634691238403, "learning_rate": 4.0144999999999996e-05, "loss": 0.4362, "step": 8033 }, { "epoch": 0.4498824056445291, "grad_norm": 1.0990970134735107, "learning_rate": 4.015000000000001e-05, "loss": 0.5066, "step": 8034 }, { "epoch": 0.4499384029566581, "grad_norm": 1.5450721979141235, "learning_rate": 4.0155000000000004e-05, "loss": 0.4642, "step": 8035 }, { "epoch": 0.4499944002687871, "grad_norm": 1.2929738759994507, "learning_rate": 4.016e-05, "loss": 0.5573, "step": 8036 }, { "epoch": 0.45005039758091614, "grad_norm": 1.633539080619812, "learning_rate": 4.0165000000000006e-05, "loss": 0.4439, "step": 8037 }, { "epoch": 0.45010639489304516, "grad_norm": 1.2135896682739258, "learning_rate": 4.017e-05, "loss": 0.3989, "step": 8038 }, { "epoch": 0.4501623922051742, "grad_norm": 1.394967794418335, "learning_rate": 4.0175e-05, "loss": 0.4351, "step": 8039 }, { "epoch": 0.4502183895173032, "grad_norm": 1.177072525024414, "learning_rate": 4.018e-05, "loss": 0.4132, "step": 8040 }, { "epoch": 0.45027438682943216, "grad_norm": 1.293129563331604, "learning_rate": 4.0185e-05, "loss": 0.3174, "step": 8041 }, { "epoch": 0.4503303841415612, "grad_norm": 1.3859515190124512, "learning_rate": 4.019e-05, "loss": 0.4402, "step": 8042 }, { "epoch": 0.4503863814536902, "grad_norm": 1.4350817203521729, "learning_rate": 4.0195e-05, "loss": 0.4825, "step": 8043 }, { "epoch": 0.4504423787658192, "grad_norm": 1.1298954486846924, "learning_rate": 4.02e-05, "loss": 0.4487, "step": 8044 }, { "epoch": 0.45049837607794824, "grad_norm": 1.2463808059692383, "learning_rate": 4.0205000000000006e-05, "loss": 0.3645, "step": 8045 }, { "epoch": 0.45055437339007726, "grad_norm": 1.2132930755615234, "learning_rate": 4.021e-05, "loss": 0.3821, "step": 8046 }, { "epoch": 0.4506103707022063, "grad_norm": 1.4917255640029907, "learning_rate": 4.0215e-05, "loss": 0.5085, "step": 8047 }, { "epoch": 0.4506663680143353, "grad_norm": 1.0278327465057373, "learning_rate": 4.0220000000000005e-05, "loss": 0.4188, "step": 8048 }, { "epoch": 0.4507223653264643, "grad_norm": 1.4655405282974243, "learning_rate": 4.0225e-05, "loss": 0.5233, "step": 8049 }, { "epoch": 0.45077836263859333, "grad_norm": 1.2491194009780884, "learning_rate": 4.023e-05, "loss": 0.4767, "step": 8050 }, { "epoch": 0.45083435995072235, "grad_norm": 1.330393671989441, "learning_rate": 4.0235000000000004e-05, "loss": 0.4438, "step": 8051 }, { "epoch": 0.45089035726285137, "grad_norm": 1.231075406074524, "learning_rate": 4.024e-05, "loss": 0.4787, "step": 8052 }, { "epoch": 0.4509463545749804, "grad_norm": 1.1820240020751953, "learning_rate": 4.0245e-05, "loss": 0.4129, "step": 8053 }, { "epoch": 0.4510023518871094, "grad_norm": 1.0467157363891602, "learning_rate": 4.025e-05, "loss": 0.3787, "step": 8054 }, { "epoch": 0.45105834919923843, "grad_norm": 1.2204055786132812, "learning_rate": 4.025500000000001e-05, "loss": 0.3706, "step": 8055 }, { "epoch": 0.45111434651136745, "grad_norm": 1.5015779733657837, "learning_rate": 4.0260000000000004e-05, "loss": 0.4293, "step": 8056 }, { "epoch": 0.45117034382349647, "grad_norm": 1.1499462127685547, "learning_rate": 4.0265e-05, "loss": 0.4028, "step": 8057 }, { "epoch": 0.4512263411356255, "grad_norm": 1.630139708518982, "learning_rate": 4.027e-05, "loss": 0.4948, "step": 8058 }, { "epoch": 0.4512823384477545, "grad_norm": 1.1332985162734985, "learning_rate": 4.0275e-05, "loss": 0.3303, "step": 8059 }, { "epoch": 0.4513383357598835, "grad_norm": 1.9683868885040283, "learning_rate": 4.028e-05, "loss": 0.4903, "step": 8060 }, { "epoch": 0.45139433307201254, "grad_norm": 1.0710465908050537, "learning_rate": 4.0285e-05, "loss": 0.3492, "step": 8061 }, { "epoch": 0.45145033038414156, "grad_norm": 1.514875888824463, "learning_rate": 4.029e-05, "loss": 0.4255, "step": 8062 }, { "epoch": 0.4515063276962706, "grad_norm": 1.4187906980514526, "learning_rate": 4.0295e-05, "loss": 0.4634, "step": 8063 }, { "epoch": 0.4515623250083996, "grad_norm": 1.4659744501113892, "learning_rate": 4.0300000000000004e-05, "loss": 0.4806, "step": 8064 }, { "epoch": 0.4516183223205286, "grad_norm": 1.1748239994049072, "learning_rate": 4.0305e-05, "loss": 0.3964, "step": 8065 }, { "epoch": 0.45167431963265764, "grad_norm": 1.0754516124725342, "learning_rate": 4.0310000000000005e-05, "loss": 0.394, "step": 8066 }, { "epoch": 0.45173031694478666, "grad_norm": 1.1976890563964844, "learning_rate": 4.0315e-05, "loss": 0.3775, "step": 8067 }, { "epoch": 0.4517863142569157, "grad_norm": 1.5069286823272705, "learning_rate": 4.032e-05, "loss": 0.6658, "step": 8068 }, { "epoch": 0.4518423115690447, "grad_norm": 1.6875029802322388, "learning_rate": 4.0325000000000004e-05, "loss": 0.4667, "step": 8069 }, { "epoch": 0.4518983088811737, "grad_norm": 1.1968203783035278, "learning_rate": 4.033e-05, "loss": 0.4361, "step": 8070 }, { "epoch": 0.45195430619330274, "grad_norm": 1.481873869895935, "learning_rate": 4.0335e-05, "loss": 0.4308, "step": 8071 }, { "epoch": 0.45201030350543175, "grad_norm": 1.190264344215393, "learning_rate": 4.034e-05, "loss": 0.4504, "step": 8072 }, { "epoch": 0.4520663008175608, "grad_norm": 1.1881425380706787, "learning_rate": 4.0345e-05, "loss": 0.4565, "step": 8073 }, { "epoch": 0.4521222981296898, "grad_norm": 1.15525484085083, "learning_rate": 4.0350000000000005e-05, "loss": 0.3779, "step": 8074 }, { "epoch": 0.4521782954418188, "grad_norm": 1.2618846893310547, "learning_rate": 4.0355e-05, "loss": 0.4439, "step": 8075 }, { "epoch": 0.45223429275394783, "grad_norm": 1.3922920227050781, "learning_rate": 4.0360000000000007e-05, "loss": 0.4628, "step": 8076 }, { "epoch": 0.45229029006607685, "grad_norm": 1.2423697710037231, "learning_rate": 4.0365000000000004e-05, "loss": 0.4043, "step": 8077 }, { "epoch": 0.45234628737820587, "grad_norm": 1.279617190361023, "learning_rate": 4.037e-05, "loss": 0.5224, "step": 8078 }, { "epoch": 0.4524022846903349, "grad_norm": 1.2444432973861694, "learning_rate": 4.0375e-05, "loss": 0.3951, "step": 8079 }, { "epoch": 0.4524582820024639, "grad_norm": 1.1708821058273315, "learning_rate": 4.038e-05, "loss": 0.3757, "step": 8080 }, { "epoch": 0.4525142793145929, "grad_norm": 1.2084872722625732, "learning_rate": 4.0385e-05, "loss": 0.3436, "step": 8081 }, { "epoch": 0.4525702766267219, "grad_norm": 1.4237982034683228, "learning_rate": 4.039e-05, "loss": 0.4698, "step": 8082 }, { "epoch": 0.4526262739388509, "grad_norm": 1.3786941766738892, "learning_rate": 4.0395e-05, "loss": 0.3887, "step": 8083 }, { "epoch": 0.45268227125097993, "grad_norm": 1.1412067413330078, "learning_rate": 4.0400000000000006e-05, "loss": 0.4617, "step": 8084 }, { "epoch": 0.45273826856310895, "grad_norm": 1.231732964515686, "learning_rate": 4.0405000000000004e-05, "loss": 0.3878, "step": 8085 }, { "epoch": 0.45279426587523797, "grad_norm": 1.242849588394165, "learning_rate": 4.041e-05, "loss": 0.4939, "step": 8086 }, { "epoch": 0.452850263187367, "grad_norm": 1.2185568809509277, "learning_rate": 4.0415000000000005e-05, "loss": 0.3916, "step": 8087 }, { "epoch": 0.452906260499496, "grad_norm": 1.1475306749343872, "learning_rate": 4.042e-05, "loss": 0.4227, "step": 8088 }, { "epoch": 0.452962257811625, "grad_norm": 1.4434412717819214, "learning_rate": 4.0425e-05, "loss": 0.489, "step": 8089 }, { "epoch": 0.45301825512375404, "grad_norm": 1.2608963251113892, "learning_rate": 4.0430000000000004e-05, "loss": 0.4135, "step": 8090 }, { "epoch": 0.45307425243588306, "grad_norm": 1.2011988162994385, "learning_rate": 4.0435e-05, "loss": 0.4414, "step": 8091 }, { "epoch": 0.4531302497480121, "grad_norm": 1.0195295810699463, "learning_rate": 4.044e-05, "loss": 0.4124, "step": 8092 }, { "epoch": 0.4531862470601411, "grad_norm": 1.2460380792617798, "learning_rate": 4.0444999999999996e-05, "loss": 0.408, "step": 8093 }, { "epoch": 0.4532422443722701, "grad_norm": 1.3952407836914062, "learning_rate": 4.045000000000001e-05, "loss": 0.5317, "step": 8094 }, { "epoch": 0.45329824168439914, "grad_norm": 1.2081043720245361, "learning_rate": 4.0455000000000005e-05, "loss": 0.37, "step": 8095 }, { "epoch": 0.45335423899652816, "grad_norm": 1.1570297479629517, "learning_rate": 4.046e-05, "loss": 0.365, "step": 8096 }, { "epoch": 0.4534102363086572, "grad_norm": 1.3316103219985962, "learning_rate": 4.0465e-05, "loss": 0.4904, "step": 8097 }, { "epoch": 0.4534662336207862, "grad_norm": 1.4848228693008423, "learning_rate": 4.0470000000000004e-05, "loss": 0.4557, "step": 8098 }, { "epoch": 0.4535222309329152, "grad_norm": 1.5339934825897217, "learning_rate": 4.0475e-05, "loss": 0.4291, "step": 8099 }, { "epoch": 0.45357822824504423, "grad_norm": 1.0706136226654053, "learning_rate": 4.048e-05, "loss": 0.3559, "step": 8100 }, { "epoch": 0.45363422555717325, "grad_norm": 1.1460086107254028, "learning_rate": 4.0485e-05, "loss": 0.3852, "step": 8101 }, { "epoch": 0.4536902228693023, "grad_norm": 1.1236015558242798, "learning_rate": 4.049e-05, "loss": 0.4457, "step": 8102 }, { "epoch": 0.4537462201814313, "grad_norm": 1.4457634687423706, "learning_rate": 4.0495e-05, "loss": 0.5892, "step": 8103 }, { "epoch": 0.4538022174935603, "grad_norm": 1.413439393043518, "learning_rate": 4.05e-05, "loss": 0.555, "step": 8104 }, { "epoch": 0.45385821480568933, "grad_norm": 1.2305419445037842, "learning_rate": 4.0505000000000006e-05, "loss": 0.3831, "step": 8105 }, { "epoch": 0.45391421211781835, "grad_norm": 1.3862773180007935, "learning_rate": 4.0510000000000003e-05, "loss": 0.4541, "step": 8106 }, { "epoch": 0.45397020942994737, "grad_norm": 1.3567650318145752, "learning_rate": 4.0515e-05, "loss": 0.5618, "step": 8107 }, { "epoch": 0.4540262067420764, "grad_norm": 1.232162356376648, "learning_rate": 4.0520000000000005e-05, "loss": 0.3998, "step": 8108 }, { "epoch": 0.4540822040542054, "grad_norm": 1.2350515127182007, "learning_rate": 4.0525e-05, "loss": 0.4445, "step": 8109 }, { "epoch": 0.4541382013663344, "grad_norm": 1.6478445529937744, "learning_rate": 4.053e-05, "loss": 0.4283, "step": 8110 }, { "epoch": 0.45419419867846345, "grad_norm": 1.372441053390503, "learning_rate": 4.0535000000000004e-05, "loss": 0.3788, "step": 8111 }, { "epoch": 0.45425019599059246, "grad_norm": 1.1326618194580078, "learning_rate": 4.054e-05, "loss": 0.4098, "step": 8112 }, { "epoch": 0.4543061933027215, "grad_norm": 1.2759462594985962, "learning_rate": 4.0545e-05, "loss": 0.4116, "step": 8113 }, { "epoch": 0.4543621906148505, "grad_norm": 1.4068135023117065, "learning_rate": 4.055e-05, "loss": 0.3877, "step": 8114 }, { "epoch": 0.4544181879269795, "grad_norm": 1.5296299457550049, "learning_rate": 4.055500000000001e-05, "loss": 0.5493, "step": 8115 }, { "epoch": 0.45447418523910854, "grad_norm": 1.0567477941513062, "learning_rate": 4.0560000000000005e-05, "loss": 0.3859, "step": 8116 }, { "epoch": 0.45453018255123756, "grad_norm": 1.291696310043335, "learning_rate": 4.0565e-05, "loss": 0.4156, "step": 8117 }, { "epoch": 0.4545861798633666, "grad_norm": 1.5526171922683716, "learning_rate": 4.057e-05, "loss": 0.5647, "step": 8118 }, { "epoch": 0.4546421771754956, "grad_norm": 1.2019410133361816, "learning_rate": 4.0575000000000004e-05, "loss": 0.3844, "step": 8119 }, { "epoch": 0.4546981744876246, "grad_norm": 1.3499386310577393, "learning_rate": 4.058e-05, "loss": 0.5272, "step": 8120 }, { "epoch": 0.45475417179975364, "grad_norm": 1.597776174545288, "learning_rate": 4.0585e-05, "loss": 0.5078, "step": 8121 }, { "epoch": 0.45481016911188266, "grad_norm": 1.053828477859497, "learning_rate": 4.059e-05, "loss": 0.282, "step": 8122 }, { "epoch": 0.4548661664240116, "grad_norm": 1.2141889333724976, "learning_rate": 4.0595e-05, "loss": 0.3871, "step": 8123 }, { "epoch": 0.45492216373614064, "grad_norm": 1.2278751134872437, "learning_rate": 4.0600000000000004e-05, "loss": 0.371, "step": 8124 }, { "epoch": 0.45497816104826966, "grad_norm": 1.1336435079574585, "learning_rate": 4.0605e-05, "loss": 0.3757, "step": 8125 }, { "epoch": 0.4550341583603987, "grad_norm": 1.213625431060791, "learning_rate": 4.0610000000000006e-05, "loss": 0.5468, "step": 8126 }, { "epoch": 0.4550901556725277, "grad_norm": 1.0659953355789185, "learning_rate": 4.0615e-05, "loss": 0.3782, "step": 8127 }, { "epoch": 0.4551461529846567, "grad_norm": 1.1252938508987427, "learning_rate": 4.062e-05, "loss": 0.3667, "step": 8128 }, { "epoch": 0.45520215029678573, "grad_norm": 1.2927285432815552, "learning_rate": 4.0625000000000005e-05, "loss": 0.4203, "step": 8129 }, { "epoch": 0.45525814760891475, "grad_norm": 1.2861311435699463, "learning_rate": 4.063e-05, "loss": 0.4138, "step": 8130 }, { "epoch": 0.4553141449210438, "grad_norm": 1.390527606010437, "learning_rate": 4.0635e-05, "loss": 0.4645, "step": 8131 }, { "epoch": 0.4553701422331728, "grad_norm": 1.4087966680526733, "learning_rate": 4.064e-05, "loss": 0.4163, "step": 8132 }, { "epoch": 0.4554261395453018, "grad_norm": 1.4138132333755493, "learning_rate": 4.0645e-05, "loss": 0.4528, "step": 8133 }, { "epoch": 0.45548213685743083, "grad_norm": 1.3514633178710938, "learning_rate": 4.065e-05, "loss": 0.3211, "step": 8134 }, { "epoch": 0.45553813416955985, "grad_norm": 1.0520461797714233, "learning_rate": 4.0655e-05, "loss": 0.4123, "step": 8135 }, { "epoch": 0.45559413148168887, "grad_norm": 1.2497740983963013, "learning_rate": 4.066e-05, "loss": 0.39, "step": 8136 }, { "epoch": 0.4556501287938179, "grad_norm": 1.0242879390716553, "learning_rate": 4.0665000000000005e-05, "loss": 0.3915, "step": 8137 }, { "epoch": 0.4557061261059469, "grad_norm": 1.367636799812317, "learning_rate": 4.067e-05, "loss": 0.362, "step": 8138 }, { "epoch": 0.4557621234180759, "grad_norm": 1.2019015550613403, "learning_rate": 4.0675e-05, "loss": 0.5287, "step": 8139 }, { "epoch": 0.45581812073020495, "grad_norm": 1.3564034700393677, "learning_rate": 4.0680000000000004e-05, "loss": 0.5151, "step": 8140 }, { "epoch": 0.45587411804233396, "grad_norm": 1.424517035484314, "learning_rate": 4.0685e-05, "loss": 0.5017, "step": 8141 }, { "epoch": 0.455930115354463, "grad_norm": 1.372143030166626, "learning_rate": 4.069e-05, "loss": 0.3535, "step": 8142 }, { "epoch": 0.455986112666592, "grad_norm": 1.584250569343567, "learning_rate": 4.0695e-05, "loss": 0.5072, "step": 8143 }, { "epoch": 0.456042109978721, "grad_norm": 1.085153579711914, "learning_rate": 4.07e-05, "loss": 0.4053, "step": 8144 }, { "epoch": 0.45609810729085004, "grad_norm": 1.2137624025344849, "learning_rate": 4.0705000000000004e-05, "loss": 0.4806, "step": 8145 }, { "epoch": 0.45615410460297906, "grad_norm": 1.4045476913452148, "learning_rate": 4.071e-05, "loss": 0.6196, "step": 8146 }, { "epoch": 0.4562101019151081, "grad_norm": 1.3317464590072632, "learning_rate": 4.0715000000000006e-05, "loss": 0.4997, "step": 8147 }, { "epoch": 0.4562660992272371, "grad_norm": 1.1357710361480713, "learning_rate": 4.072e-05, "loss": 0.3664, "step": 8148 }, { "epoch": 0.4563220965393661, "grad_norm": 1.2137690782546997, "learning_rate": 4.0725e-05, "loss": 0.3473, "step": 8149 }, { "epoch": 0.45637809385149514, "grad_norm": 1.4994457960128784, "learning_rate": 4.0730000000000005e-05, "loss": 0.5448, "step": 8150 }, { "epoch": 0.45643409116362416, "grad_norm": 1.363540768623352, "learning_rate": 4.0735e-05, "loss": 0.4171, "step": 8151 }, { "epoch": 0.4564900884757532, "grad_norm": 1.1198484897613525, "learning_rate": 4.074e-05, "loss": 0.3726, "step": 8152 }, { "epoch": 0.4565460857878822, "grad_norm": 1.3675659894943237, "learning_rate": 4.0745e-05, "loss": 0.5798, "step": 8153 }, { "epoch": 0.4566020831000112, "grad_norm": 1.2910454273223877, "learning_rate": 4.075e-05, "loss": 0.3636, "step": 8154 }, { "epoch": 0.45665808041214023, "grad_norm": 1.163226842880249, "learning_rate": 4.0755000000000005e-05, "loss": 0.3997, "step": 8155 }, { "epoch": 0.45671407772426925, "grad_norm": 1.3678209781646729, "learning_rate": 4.076e-05, "loss": 0.5217, "step": 8156 }, { "epoch": 0.45677007503639827, "grad_norm": 1.0722938776016235, "learning_rate": 4.0765e-05, "loss": 0.3597, "step": 8157 }, { "epoch": 0.4568260723485273, "grad_norm": 1.2889726161956787, "learning_rate": 4.0770000000000004e-05, "loss": 0.5934, "step": 8158 }, { "epoch": 0.4568820696606563, "grad_norm": 1.3214764595031738, "learning_rate": 4.0775e-05, "loss": 0.432, "step": 8159 }, { "epoch": 0.45693806697278533, "grad_norm": 1.1166839599609375, "learning_rate": 4.078e-05, "loss": 0.3357, "step": 8160 }, { "epoch": 0.45699406428491435, "grad_norm": 1.1353657245635986, "learning_rate": 4.0785e-05, "loss": 0.4701, "step": 8161 }, { "epoch": 0.45705006159704337, "grad_norm": 1.2369425296783447, "learning_rate": 4.079e-05, "loss": 0.3867, "step": 8162 }, { "epoch": 0.4571060589091724, "grad_norm": 1.2651628255844116, "learning_rate": 4.0795e-05, "loss": 0.4669, "step": 8163 }, { "epoch": 0.4571620562213014, "grad_norm": 1.2256214618682861, "learning_rate": 4.08e-05, "loss": 0.3618, "step": 8164 }, { "epoch": 0.45721805353343037, "grad_norm": 3.2736332416534424, "learning_rate": 4.0805000000000007e-05, "loss": 0.4127, "step": 8165 }, { "epoch": 0.4572740508455594, "grad_norm": 1.2997982501983643, "learning_rate": 4.0810000000000004e-05, "loss": 0.4168, "step": 8166 }, { "epoch": 0.4573300481576884, "grad_norm": 1.2261103391647339, "learning_rate": 4.0815e-05, "loss": 0.6351, "step": 8167 }, { "epoch": 0.4573860454698174, "grad_norm": 1.2697564363479614, "learning_rate": 4.0820000000000006e-05, "loss": 0.4485, "step": 8168 }, { "epoch": 0.45744204278194645, "grad_norm": 1.2801098823547363, "learning_rate": 4.0825e-05, "loss": 0.4397, "step": 8169 }, { "epoch": 0.45749804009407546, "grad_norm": 1.3548836708068848, "learning_rate": 4.083e-05, "loss": 0.4038, "step": 8170 }, { "epoch": 0.4575540374062045, "grad_norm": 1.4707496166229248, "learning_rate": 4.0835e-05, "loss": 0.4418, "step": 8171 }, { "epoch": 0.4576100347183335, "grad_norm": 1.1236820220947266, "learning_rate": 4.084e-05, "loss": 0.3652, "step": 8172 }, { "epoch": 0.4576660320304625, "grad_norm": 1.1483045816421509, "learning_rate": 4.0845e-05, "loss": 0.3515, "step": 8173 }, { "epoch": 0.45772202934259154, "grad_norm": 1.2674108743667603, "learning_rate": 4.085e-05, "loss": 0.5687, "step": 8174 }, { "epoch": 0.45777802665472056, "grad_norm": 1.2336115837097168, "learning_rate": 4.0855e-05, "loss": 0.4017, "step": 8175 }, { "epoch": 0.4578340239668496, "grad_norm": 1.3384089469909668, "learning_rate": 4.0860000000000005e-05, "loss": 0.4603, "step": 8176 }, { "epoch": 0.4578900212789786, "grad_norm": 1.5389137268066406, "learning_rate": 4.0865e-05, "loss": 0.468, "step": 8177 }, { "epoch": 0.4579460185911076, "grad_norm": 1.3750388622283936, "learning_rate": 4.087e-05, "loss": 0.4232, "step": 8178 }, { "epoch": 0.45800201590323664, "grad_norm": 1.1116269826889038, "learning_rate": 4.0875000000000004e-05, "loss": 0.3124, "step": 8179 }, { "epoch": 0.45805801321536566, "grad_norm": 1.4280734062194824, "learning_rate": 4.088e-05, "loss": 0.4786, "step": 8180 }, { "epoch": 0.4581140105274947, "grad_norm": 1.4413557052612305, "learning_rate": 4.0885e-05, "loss": 0.4382, "step": 8181 }, { "epoch": 0.4581700078396237, "grad_norm": 7.288043022155762, "learning_rate": 4.089e-05, "loss": 0.5532, "step": 8182 }, { "epoch": 0.4582260051517527, "grad_norm": 1.6042112112045288, "learning_rate": 4.0895e-05, "loss": 0.6748, "step": 8183 }, { "epoch": 0.45828200246388173, "grad_norm": 1.3809505701065063, "learning_rate": 4.09e-05, "loss": 0.3865, "step": 8184 }, { "epoch": 0.45833799977601075, "grad_norm": 1.2860163450241089, "learning_rate": 4.0905e-05, "loss": 0.3698, "step": 8185 }, { "epoch": 0.45839399708813977, "grad_norm": 1.2628626823425293, "learning_rate": 4.0910000000000006e-05, "loss": 0.6904, "step": 8186 }, { "epoch": 0.4584499944002688, "grad_norm": 1.2822555303573608, "learning_rate": 4.0915000000000004e-05, "loss": 0.3674, "step": 8187 }, { "epoch": 0.4585059917123978, "grad_norm": 1.1814857721328735, "learning_rate": 4.092e-05, "loss": 0.4019, "step": 8188 }, { "epoch": 0.45856198902452683, "grad_norm": 1.335870623588562, "learning_rate": 4.0925000000000005e-05, "loss": 0.4589, "step": 8189 }, { "epoch": 0.45861798633665585, "grad_norm": 1.0942025184631348, "learning_rate": 4.093e-05, "loss": 0.4158, "step": 8190 }, { "epoch": 0.45867398364878487, "grad_norm": 1.0841032266616821, "learning_rate": 4.0935e-05, "loss": 0.2678, "step": 8191 }, { "epoch": 0.4587299809609139, "grad_norm": 1.435125708580017, "learning_rate": 4.094e-05, "loss": 0.4795, "step": 8192 }, { "epoch": 0.4587859782730429, "grad_norm": 1.2814698219299316, "learning_rate": 4.0945e-05, "loss": 0.2863, "step": 8193 }, { "epoch": 0.4588419755851719, "grad_norm": 1.0510350465774536, "learning_rate": 4.095e-05, "loss": 0.4812, "step": 8194 }, { "epoch": 0.45889797289730094, "grad_norm": 1.5333740711212158, "learning_rate": 4.0955000000000003e-05, "loss": 0.4996, "step": 8195 }, { "epoch": 0.45895397020942996, "grad_norm": 1.2973487377166748, "learning_rate": 4.096e-05, "loss": 0.3833, "step": 8196 }, { "epoch": 0.459009967521559, "grad_norm": 1.12303626537323, "learning_rate": 4.0965000000000005e-05, "loss": 0.3749, "step": 8197 }, { "epoch": 0.459065964833688, "grad_norm": 1.536573052406311, "learning_rate": 4.097e-05, "loss": 0.3857, "step": 8198 }, { "epoch": 0.459121962145817, "grad_norm": 1.2383390665054321, "learning_rate": 4.0975e-05, "loss": 0.5466, "step": 8199 }, { "epoch": 0.45917795945794604, "grad_norm": 1.5300266742706299, "learning_rate": 4.0980000000000004e-05, "loss": 0.4991, "step": 8200 }, { "epoch": 0.45923395677007506, "grad_norm": 1.4450125694274902, "learning_rate": 4.0985e-05, "loss": 0.493, "step": 8201 }, { "epoch": 0.4592899540822041, "grad_norm": 1.259688377380371, "learning_rate": 4.099e-05, "loss": 0.3361, "step": 8202 }, { "epoch": 0.4593459513943331, "grad_norm": 1.6072922945022583, "learning_rate": 4.0995e-05, "loss": 0.368, "step": 8203 }, { "epoch": 0.4594019487064621, "grad_norm": 1.303582787513733, "learning_rate": 4.1e-05, "loss": 0.4323, "step": 8204 }, { "epoch": 0.45945794601859113, "grad_norm": 1.08994460105896, "learning_rate": 4.1005000000000005e-05, "loss": 0.3298, "step": 8205 }, { "epoch": 0.4595139433307201, "grad_norm": 1.1609015464782715, "learning_rate": 4.101e-05, "loss": 0.3274, "step": 8206 }, { "epoch": 0.4595699406428491, "grad_norm": 1.1675680875778198, "learning_rate": 4.1015000000000006e-05, "loss": 0.3734, "step": 8207 }, { "epoch": 0.45962593795497814, "grad_norm": 1.1078368425369263, "learning_rate": 4.1020000000000004e-05, "loss": 0.2963, "step": 8208 }, { "epoch": 0.45968193526710716, "grad_norm": 1.1644442081451416, "learning_rate": 4.1025e-05, "loss": 0.4045, "step": 8209 }, { "epoch": 0.4597379325792362, "grad_norm": 1.2316521406173706, "learning_rate": 4.103e-05, "loss": 0.3405, "step": 8210 }, { "epoch": 0.4597939298913652, "grad_norm": 1.2362315654754639, "learning_rate": 4.1035e-05, "loss": 0.4789, "step": 8211 }, { "epoch": 0.4598499272034942, "grad_norm": 1.1099541187286377, "learning_rate": 4.104e-05, "loss": 0.3682, "step": 8212 }, { "epoch": 0.45990592451562323, "grad_norm": 1.1598701477050781, "learning_rate": 4.1045e-05, "loss": 0.4685, "step": 8213 }, { "epoch": 0.45996192182775225, "grad_norm": 1.2803281545639038, "learning_rate": 4.105e-05, "loss": 0.4272, "step": 8214 }, { "epoch": 0.46001791913988127, "grad_norm": 1.3603127002716064, "learning_rate": 4.1055000000000006e-05, "loss": 0.4071, "step": 8215 }, { "epoch": 0.4600739164520103, "grad_norm": 1.4698951244354248, "learning_rate": 4.106e-05, "loss": 0.5065, "step": 8216 }, { "epoch": 0.4601299137641393, "grad_norm": 1.216223120689392, "learning_rate": 4.1065e-05, "loss": 0.42, "step": 8217 }, { "epoch": 0.46018591107626833, "grad_norm": 1.1401433944702148, "learning_rate": 4.1070000000000005e-05, "loss": 0.3659, "step": 8218 }, { "epoch": 0.46024190838839735, "grad_norm": 1.258061408996582, "learning_rate": 4.1075e-05, "loss": 0.3915, "step": 8219 }, { "epoch": 0.46029790570052637, "grad_norm": 1.2542272806167603, "learning_rate": 4.108e-05, "loss": 0.3817, "step": 8220 }, { "epoch": 0.4603539030126554, "grad_norm": 1.1371816396713257, "learning_rate": 4.1085000000000004e-05, "loss": 0.3568, "step": 8221 }, { "epoch": 0.4604099003247844, "grad_norm": 1.3532615900039673, "learning_rate": 4.109e-05, "loss": 0.5167, "step": 8222 }, { "epoch": 0.4604658976369134, "grad_norm": 1.1306337118148804, "learning_rate": 4.1095e-05, "loss": 0.3148, "step": 8223 }, { "epoch": 0.46052189494904244, "grad_norm": 1.223226547241211, "learning_rate": 4.11e-05, "loss": 0.4107, "step": 8224 }, { "epoch": 0.46057789226117146, "grad_norm": 1.410191297531128, "learning_rate": 4.110500000000001e-05, "loss": 0.398, "step": 8225 }, { "epoch": 0.4606338895733005, "grad_norm": 1.2490471601486206, "learning_rate": 4.1110000000000005e-05, "loss": 0.4474, "step": 8226 }, { "epoch": 0.4606898868854295, "grad_norm": 1.339874267578125, "learning_rate": 4.1115e-05, "loss": 0.4839, "step": 8227 }, { "epoch": 0.4607458841975585, "grad_norm": 1.209472417831421, "learning_rate": 4.1120000000000006e-05, "loss": 0.3883, "step": 8228 }, { "epoch": 0.46080188150968754, "grad_norm": 1.391992211341858, "learning_rate": 4.1125000000000004e-05, "loss": 0.4707, "step": 8229 }, { "epoch": 0.46085787882181656, "grad_norm": 1.5319911241531372, "learning_rate": 4.113e-05, "loss": 0.529, "step": 8230 }, { "epoch": 0.4609138761339456, "grad_norm": 1.330243468284607, "learning_rate": 4.1135e-05, "loss": 0.6138, "step": 8231 }, { "epoch": 0.4609698734460746, "grad_norm": 1.2333359718322754, "learning_rate": 4.114e-05, "loss": 0.4012, "step": 8232 }, { "epoch": 0.4610258707582036, "grad_norm": 1.2454456090927124, "learning_rate": 4.1145e-05, "loss": 0.3726, "step": 8233 }, { "epoch": 0.46108186807033263, "grad_norm": 1.2721551656723022, "learning_rate": 4.115e-05, "loss": 0.485, "step": 8234 }, { "epoch": 0.46113786538246165, "grad_norm": 1.3370745182037354, "learning_rate": 4.1155e-05, "loss": 0.4843, "step": 8235 }, { "epoch": 0.4611938626945907, "grad_norm": 1.1030439138412476, "learning_rate": 4.1160000000000006e-05, "loss": 0.4659, "step": 8236 }, { "epoch": 0.4612498600067197, "grad_norm": 1.1367319822311401, "learning_rate": 4.1165e-05, "loss": 0.3827, "step": 8237 }, { "epoch": 0.4613058573188487, "grad_norm": 1.2777172327041626, "learning_rate": 4.117e-05, "loss": 0.4422, "step": 8238 }, { "epoch": 0.46136185463097773, "grad_norm": 1.0879982709884644, "learning_rate": 4.1175000000000005e-05, "loss": 0.4092, "step": 8239 }, { "epoch": 0.46141785194310675, "grad_norm": 1.1981239318847656, "learning_rate": 4.118e-05, "loss": 0.3226, "step": 8240 }, { "epoch": 0.46147384925523577, "grad_norm": 1.2820683717727661, "learning_rate": 4.1185e-05, "loss": 0.3804, "step": 8241 }, { "epoch": 0.4615298465673648, "grad_norm": 1.2135425806045532, "learning_rate": 4.1190000000000004e-05, "loss": 0.4471, "step": 8242 }, { "epoch": 0.4615858438794938, "grad_norm": 1.178632378578186, "learning_rate": 4.1195e-05, "loss": 0.3996, "step": 8243 }, { "epoch": 0.4616418411916228, "grad_norm": 1.5495091676712036, "learning_rate": 4.12e-05, "loss": 0.4909, "step": 8244 }, { "epoch": 0.46169783850375185, "grad_norm": 1.735106348991394, "learning_rate": 4.1205e-05, "loss": 0.559, "step": 8245 }, { "epoch": 0.46175383581588086, "grad_norm": 1.3498954772949219, "learning_rate": 4.121000000000001e-05, "loss": 0.456, "step": 8246 }, { "epoch": 0.46180983312800983, "grad_norm": 1.32374906539917, "learning_rate": 4.1215000000000004e-05, "loss": 0.4899, "step": 8247 }, { "epoch": 0.46186583044013885, "grad_norm": 1.8373581171035767, "learning_rate": 4.122e-05, "loss": 0.5818, "step": 8248 }, { "epoch": 0.46192182775226787, "grad_norm": 1.378580093383789, "learning_rate": 4.1225e-05, "loss": 0.4885, "step": 8249 }, { "epoch": 0.4619778250643969, "grad_norm": 1.1680793762207031, "learning_rate": 4.123e-05, "loss": 0.3525, "step": 8250 }, { "epoch": 0.4620338223765259, "grad_norm": 1.231508731842041, "learning_rate": 4.1235e-05, "loss": 0.4798, "step": 8251 }, { "epoch": 0.4620898196886549, "grad_norm": 1.0578612089157104, "learning_rate": 4.124e-05, "loss": 0.4774, "step": 8252 }, { "epoch": 0.46214581700078394, "grad_norm": 1.1454347372055054, "learning_rate": 4.1245e-05, "loss": 0.5126, "step": 8253 }, { "epoch": 0.46220181431291296, "grad_norm": 1.1849184036254883, "learning_rate": 4.125e-05, "loss": 0.4887, "step": 8254 }, { "epoch": 0.462257811625042, "grad_norm": 1.3541444540023804, "learning_rate": 4.1255e-05, "loss": 0.444, "step": 8255 }, { "epoch": 0.462313808937171, "grad_norm": 1.303864598274231, "learning_rate": 4.126e-05, "loss": 0.4418, "step": 8256 }, { "epoch": 0.4623698062493, "grad_norm": 1.2061721086502075, "learning_rate": 4.1265000000000006e-05, "loss": 0.3507, "step": 8257 }, { "epoch": 0.46242580356142904, "grad_norm": 1.4810192584991455, "learning_rate": 4.127e-05, "loss": 0.4216, "step": 8258 }, { "epoch": 0.46248180087355806, "grad_norm": 1.2376545667648315, "learning_rate": 4.1275e-05, "loss": 0.3474, "step": 8259 }, { "epoch": 0.4625377981856871, "grad_norm": 1.3552175760269165, "learning_rate": 4.1280000000000005e-05, "loss": 0.5276, "step": 8260 }, { "epoch": 0.4625937954978161, "grad_norm": 1.53526771068573, "learning_rate": 4.1285e-05, "loss": 0.5101, "step": 8261 }, { "epoch": 0.4626497928099451, "grad_norm": 1.2176669836044312, "learning_rate": 4.129e-05, "loss": 0.3766, "step": 8262 }, { "epoch": 0.46270579012207413, "grad_norm": 1.3207225799560547, "learning_rate": 4.1295000000000004e-05, "loss": 0.3536, "step": 8263 }, { "epoch": 0.46276178743420315, "grad_norm": 1.5815467834472656, "learning_rate": 4.13e-05, "loss": 0.3933, "step": 8264 }, { "epoch": 0.4628177847463322, "grad_norm": 1.1484071016311646, "learning_rate": 4.1305e-05, "loss": 0.3941, "step": 8265 }, { "epoch": 0.4628737820584612, "grad_norm": 1.2536993026733398, "learning_rate": 4.131e-05, "loss": 0.3645, "step": 8266 }, { "epoch": 0.4629297793705902, "grad_norm": 1.1876859664916992, "learning_rate": 4.131500000000001e-05, "loss": 0.5404, "step": 8267 }, { "epoch": 0.46298577668271923, "grad_norm": 1.2260714769363403, "learning_rate": 4.1320000000000004e-05, "loss": 0.5652, "step": 8268 }, { "epoch": 0.46304177399484825, "grad_norm": 1.2548279762268066, "learning_rate": 4.1325e-05, "loss": 0.4476, "step": 8269 }, { "epoch": 0.46309777130697727, "grad_norm": 1.3838043212890625, "learning_rate": 4.133e-05, "loss": 0.3975, "step": 8270 }, { "epoch": 0.4631537686191063, "grad_norm": 1.2394155263900757, "learning_rate": 4.1335e-05, "loss": 0.4295, "step": 8271 }, { "epoch": 0.4632097659312353, "grad_norm": 1.3314915895462036, "learning_rate": 4.134e-05, "loss": 0.5559, "step": 8272 }, { "epoch": 0.4632657632433643, "grad_norm": 1.9199459552764893, "learning_rate": 4.1345e-05, "loss": 0.5407, "step": 8273 }, { "epoch": 0.46332176055549334, "grad_norm": 1.1639423370361328, "learning_rate": 4.135e-05, "loss": 0.4417, "step": 8274 }, { "epoch": 0.46337775786762236, "grad_norm": 1.4173465967178345, "learning_rate": 4.1355e-05, "loss": 0.3688, "step": 8275 }, { "epoch": 0.4634337551797514, "grad_norm": 1.2867845296859741, "learning_rate": 4.1360000000000004e-05, "loss": 0.387, "step": 8276 }, { "epoch": 0.4634897524918804, "grad_norm": 1.2569302320480347, "learning_rate": 4.1365e-05, "loss": 0.4712, "step": 8277 }, { "epoch": 0.4635457498040094, "grad_norm": 1.2919987440109253, "learning_rate": 4.1370000000000005e-05, "loss": 0.5301, "step": 8278 }, { "epoch": 0.46360174711613844, "grad_norm": 1.2371081113815308, "learning_rate": 4.1375e-05, "loss": 0.4337, "step": 8279 }, { "epoch": 0.46365774442826746, "grad_norm": 1.2740482091903687, "learning_rate": 4.138e-05, "loss": 0.5003, "step": 8280 }, { "epoch": 0.4637137417403965, "grad_norm": 1.199916124343872, "learning_rate": 4.1385000000000004e-05, "loss": 0.4129, "step": 8281 }, { "epoch": 0.4637697390525255, "grad_norm": 1.294663667678833, "learning_rate": 4.139e-05, "loss": 0.4201, "step": 8282 }, { "epoch": 0.4638257363646545, "grad_norm": 1.6683567762374878, "learning_rate": 4.1395e-05, "loss": 0.4427, "step": 8283 }, { "epoch": 0.46388173367678354, "grad_norm": 1.3331955671310425, "learning_rate": 4.14e-05, "loss": 0.3179, "step": 8284 }, { "epoch": 0.46393773098891256, "grad_norm": 1.1765532493591309, "learning_rate": 4.1405e-05, "loss": 0.2859, "step": 8285 }, { "epoch": 0.4639937283010416, "grad_norm": 1.1770352125167847, "learning_rate": 4.1410000000000005e-05, "loss": 0.4553, "step": 8286 }, { "epoch": 0.4640497256131706, "grad_norm": 1.4300110340118408, "learning_rate": 4.1415e-05, "loss": 0.3925, "step": 8287 }, { "epoch": 0.4641057229252996, "grad_norm": 1.2035119533538818, "learning_rate": 4.142000000000001e-05, "loss": 0.4124, "step": 8288 }, { "epoch": 0.4641617202374286, "grad_norm": 1.1478502750396729, "learning_rate": 4.1425000000000004e-05, "loss": 0.483, "step": 8289 }, { "epoch": 0.4642177175495576, "grad_norm": 1.1492865085601807, "learning_rate": 4.143e-05, "loss": 0.3764, "step": 8290 }, { "epoch": 0.4642737148616866, "grad_norm": 1.1740813255310059, "learning_rate": 4.1435e-05, "loss": 0.4237, "step": 8291 }, { "epoch": 0.46432971217381563, "grad_norm": 1.120821237564087, "learning_rate": 4.144e-05, "loss": 0.3322, "step": 8292 }, { "epoch": 0.46438570948594465, "grad_norm": 1.0857949256896973, "learning_rate": 4.1445e-05, "loss": 0.3412, "step": 8293 }, { "epoch": 0.4644417067980737, "grad_norm": 1.2305532693862915, "learning_rate": 4.145e-05, "loss": 0.4732, "step": 8294 }, { "epoch": 0.4644977041102027, "grad_norm": 3.610482692718506, "learning_rate": 4.1455e-05, "loss": 0.5287, "step": 8295 }, { "epoch": 0.4645537014223317, "grad_norm": 1.2439639568328857, "learning_rate": 4.1460000000000006e-05, "loss": 0.4171, "step": 8296 }, { "epoch": 0.46460969873446073, "grad_norm": 1.2058327198028564, "learning_rate": 4.1465000000000004e-05, "loss": 0.4941, "step": 8297 }, { "epoch": 0.46466569604658975, "grad_norm": 1.315577507019043, "learning_rate": 4.147e-05, "loss": 0.3526, "step": 8298 }, { "epoch": 0.46472169335871877, "grad_norm": 1.451165795326233, "learning_rate": 4.1475000000000005e-05, "loss": 0.4023, "step": 8299 }, { "epoch": 0.4647776906708478, "grad_norm": 1.1746854782104492, "learning_rate": 4.148e-05, "loss": 0.3519, "step": 8300 }, { "epoch": 0.4648336879829768, "grad_norm": 1.617592692375183, "learning_rate": 4.1485e-05, "loss": 0.429, "step": 8301 }, { "epoch": 0.4648896852951058, "grad_norm": 1.423981785774231, "learning_rate": 4.1490000000000004e-05, "loss": 0.5623, "step": 8302 }, { "epoch": 0.46494568260723484, "grad_norm": 1.2950692176818848, "learning_rate": 4.1495e-05, "loss": 0.4532, "step": 8303 }, { "epoch": 0.46500167991936386, "grad_norm": 1.4684441089630127, "learning_rate": 4.15e-05, "loss": 0.5641, "step": 8304 }, { "epoch": 0.4650576772314929, "grad_norm": 1.3021410703659058, "learning_rate": 4.1504999999999996e-05, "loss": 0.4803, "step": 8305 }, { "epoch": 0.4651136745436219, "grad_norm": 1.3266184329986572, "learning_rate": 4.151000000000001e-05, "loss": 0.5234, "step": 8306 }, { "epoch": 0.4651696718557509, "grad_norm": 1.1590931415557861, "learning_rate": 4.1515000000000005e-05, "loss": 0.3411, "step": 8307 }, { "epoch": 0.46522566916787994, "grad_norm": 1.4431616067886353, "learning_rate": 4.152e-05, "loss": 0.4403, "step": 8308 }, { "epoch": 0.46528166648000896, "grad_norm": 1.0642368793487549, "learning_rate": 4.1525e-05, "loss": 0.3634, "step": 8309 }, { "epoch": 0.465337663792138, "grad_norm": 1.254712462425232, "learning_rate": 4.1530000000000004e-05, "loss": 0.4495, "step": 8310 }, { "epoch": 0.465393661104267, "grad_norm": 1.1482433080673218, "learning_rate": 4.1535e-05, "loss": 0.3812, "step": 8311 }, { "epoch": 0.465449658416396, "grad_norm": 1.107596755027771, "learning_rate": 4.154e-05, "loss": 0.4688, "step": 8312 }, { "epoch": 0.46550565572852504, "grad_norm": 1.2421313524246216, "learning_rate": 4.1545e-05, "loss": 0.3898, "step": 8313 }, { "epoch": 0.46556165304065406, "grad_norm": 1.170276165008545, "learning_rate": 4.155e-05, "loss": 0.3996, "step": 8314 }, { "epoch": 0.4656176503527831, "grad_norm": 1.4561901092529297, "learning_rate": 4.1555e-05, "loss": 0.4181, "step": 8315 }, { "epoch": 0.4656736476649121, "grad_norm": 1.160518765449524, "learning_rate": 4.156e-05, "loss": 0.524, "step": 8316 }, { "epoch": 0.4657296449770411, "grad_norm": 1.5591087341308594, "learning_rate": 4.1565000000000006e-05, "loss": 0.5717, "step": 8317 }, { "epoch": 0.46578564228917013, "grad_norm": 1.2539479732513428, "learning_rate": 4.1570000000000003e-05, "loss": 0.5269, "step": 8318 }, { "epoch": 0.46584163960129915, "grad_norm": 1.2426033020019531, "learning_rate": 4.1575e-05, "loss": 0.5566, "step": 8319 }, { "epoch": 0.46589763691342817, "grad_norm": 1.1913050413131714, "learning_rate": 4.1580000000000005e-05, "loss": 0.3839, "step": 8320 }, { "epoch": 0.4659536342255572, "grad_norm": 1.2415966987609863, "learning_rate": 4.1585e-05, "loss": 0.3982, "step": 8321 }, { "epoch": 0.4660096315376862, "grad_norm": 1.1320737600326538, "learning_rate": 4.159e-05, "loss": 0.4148, "step": 8322 }, { "epoch": 0.46606562884981523, "grad_norm": 1.401557445526123, "learning_rate": 4.1595e-05, "loss": 0.4431, "step": 8323 }, { "epoch": 0.46612162616194425, "grad_norm": 1.2837218046188354, "learning_rate": 4.16e-05, "loss": 0.4153, "step": 8324 }, { "epoch": 0.46617762347407327, "grad_norm": 1.4516154527664185, "learning_rate": 4.1605e-05, "loss": 0.493, "step": 8325 }, { "epoch": 0.4662336207862023, "grad_norm": 1.219534993171692, "learning_rate": 4.161e-05, "loss": 0.4909, "step": 8326 }, { "epoch": 0.4662896180983313, "grad_norm": 1.3851169347763062, "learning_rate": 4.161500000000001e-05, "loss": 0.3984, "step": 8327 }, { "epoch": 0.4663456154104603, "grad_norm": 1.273726224899292, "learning_rate": 4.1620000000000005e-05, "loss": 0.3899, "step": 8328 }, { "epoch": 0.46640161272258934, "grad_norm": 2.3882343769073486, "learning_rate": 4.1625e-05, "loss": 0.6668, "step": 8329 }, { "epoch": 0.4664576100347183, "grad_norm": 1.1493287086486816, "learning_rate": 4.163e-05, "loss": 0.3776, "step": 8330 }, { "epoch": 0.4665136073468473, "grad_norm": 1.4332467317581177, "learning_rate": 4.1635000000000004e-05, "loss": 0.4647, "step": 8331 }, { "epoch": 0.46656960465897634, "grad_norm": 1.2122044563293457, "learning_rate": 4.164e-05, "loss": 0.3503, "step": 8332 }, { "epoch": 0.46662560197110536, "grad_norm": 1.2885552644729614, "learning_rate": 4.1645e-05, "loss": 0.587, "step": 8333 }, { "epoch": 0.4666815992832344, "grad_norm": 1.2309215068817139, "learning_rate": 4.165e-05, "loss": 0.5463, "step": 8334 }, { "epoch": 0.4667375965953634, "grad_norm": 1.1431134939193726, "learning_rate": 4.1655e-05, "loss": 0.3593, "step": 8335 }, { "epoch": 0.4667935939074924, "grad_norm": 1.6323133707046509, "learning_rate": 4.1660000000000004e-05, "loss": 0.4827, "step": 8336 }, { "epoch": 0.46684959121962144, "grad_norm": 1.3194406032562256, "learning_rate": 4.1665e-05, "loss": 0.4625, "step": 8337 }, { "epoch": 0.46690558853175046, "grad_norm": 1.180116891860962, "learning_rate": 4.1670000000000006e-05, "loss": 0.4089, "step": 8338 }, { "epoch": 0.4669615858438795, "grad_norm": 1.1480462551116943, "learning_rate": 4.1675e-05, "loss": 0.426, "step": 8339 }, { "epoch": 0.4670175831560085, "grad_norm": 1.3161413669586182, "learning_rate": 4.168e-05, "loss": 0.4495, "step": 8340 }, { "epoch": 0.4670735804681375, "grad_norm": 1.3676503896713257, "learning_rate": 4.1685000000000005e-05, "loss": 0.4018, "step": 8341 }, { "epoch": 0.46712957778026654, "grad_norm": 1.2224594354629517, "learning_rate": 4.169e-05, "loss": 0.3976, "step": 8342 }, { "epoch": 0.46718557509239556, "grad_norm": 1.1920114755630493, "learning_rate": 4.1695e-05, "loss": 0.3581, "step": 8343 }, { "epoch": 0.4672415724045246, "grad_norm": 1.3474382162094116, "learning_rate": 4.17e-05, "loss": 0.4556, "step": 8344 }, { "epoch": 0.4672975697166536, "grad_norm": 1.1781539916992188, "learning_rate": 4.1705e-05, "loss": 0.4035, "step": 8345 }, { "epoch": 0.4673535670287826, "grad_norm": 1.3382846117019653, "learning_rate": 4.1710000000000006e-05, "loss": 0.4421, "step": 8346 }, { "epoch": 0.46740956434091163, "grad_norm": 1.2219523191452026, "learning_rate": 4.1715e-05, "loss": 0.342, "step": 8347 }, { "epoch": 0.46746556165304065, "grad_norm": 1.2026203870773315, "learning_rate": 4.172e-05, "loss": 0.3207, "step": 8348 }, { "epoch": 0.46752155896516967, "grad_norm": 1.270452857017517, "learning_rate": 4.1725000000000005e-05, "loss": 0.3315, "step": 8349 }, { "epoch": 0.4675775562772987, "grad_norm": 1.188292145729065, "learning_rate": 4.173e-05, "loss": 0.4851, "step": 8350 }, { "epoch": 0.4676335535894277, "grad_norm": 1.2668474912643433, "learning_rate": 4.1735e-05, "loss": 0.5031, "step": 8351 }, { "epoch": 0.4676895509015567, "grad_norm": 1.1694830656051636, "learning_rate": 4.1740000000000004e-05, "loss": 0.399, "step": 8352 }, { "epoch": 0.46774554821368575, "grad_norm": Infinity, "learning_rate": 4.1740000000000004e-05, "loss": 0.3961, "step": 8353 }, { "epoch": 0.46780154552581477, "grad_norm": 1.2740497589111328, "learning_rate": 4.1745e-05, "loss": 0.3757, "step": 8354 }, { "epoch": 0.4678575428379438, "grad_norm": 1.4216548204421997, "learning_rate": 4.175e-05, "loss": 0.5351, "step": 8355 }, { "epoch": 0.4679135401500728, "grad_norm": 1.3175214529037476, "learning_rate": 4.1755e-05, "loss": 0.4135, "step": 8356 }, { "epoch": 0.4679695374622018, "grad_norm": 1.3715314865112305, "learning_rate": 4.176000000000001e-05, "loss": 0.4682, "step": 8357 }, { "epoch": 0.46802553477433084, "grad_norm": 1.3775523900985718, "learning_rate": 4.1765000000000004e-05, "loss": 0.3678, "step": 8358 }, { "epoch": 0.46808153208645986, "grad_norm": 0.9641080498695374, "learning_rate": 4.177e-05, "loss": 0.352, "step": 8359 }, { "epoch": 0.4681375293985889, "grad_norm": 1.3386561870574951, "learning_rate": 4.1775000000000006e-05, "loss": 0.3755, "step": 8360 }, { "epoch": 0.4681935267107179, "grad_norm": 1.8422759771347046, "learning_rate": 4.178e-05, "loss": 0.4397, "step": 8361 }, { "epoch": 0.4682495240228469, "grad_norm": 1.231330394744873, "learning_rate": 4.1785e-05, "loss": 0.4097, "step": 8362 }, { "epoch": 0.46830552133497594, "grad_norm": 1.1578826904296875, "learning_rate": 4.179e-05, "loss": 0.4185, "step": 8363 }, { "epoch": 0.46836151864710496, "grad_norm": 1.117853045463562, "learning_rate": 4.1795e-05, "loss": 0.4176, "step": 8364 }, { "epoch": 0.468417515959234, "grad_norm": 1.170587420463562, "learning_rate": 4.18e-05, "loss": 0.3724, "step": 8365 }, { "epoch": 0.468473513271363, "grad_norm": 1.0577501058578491, "learning_rate": 4.1805e-05, "loss": 0.4205, "step": 8366 }, { "epoch": 0.468529510583492, "grad_norm": 1.3998960256576538, "learning_rate": 4.181000000000001e-05, "loss": 0.5887, "step": 8367 }, { "epoch": 0.46858550789562103, "grad_norm": 1.2609167098999023, "learning_rate": 4.1815000000000005e-05, "loss": 0.4566, "step": 8368 }, { "epoch": 0.46864150520775005, "grad_norm": 1.1134520769119263, "learning_rate": 4.182e-05, "loss": 0.4511, "step": 8369 }, { "epoch": 0.4686975025198791, "grad_norm": 1.513018250465393, "learning_rate": 4.1825e-05, "loss": 0.4166, "step": 8370 }, { "epoch": 0.46875349983200804, "grad_norm": 1.2183899879455566, "learning_rate": 4.1830000000000004e-05, "loss": 0.3214, "step": 8371 }, { "epoch": 0.46880949714413706, "grad_norm": 1.2080955505371094, "learning_rate": 4.1835e-05, "loss": 0.4435, "step": 8372 }, { "epoch": 0.4688654944562661, "grad_norm": 1.2426809072494507, "learning_rate": 4.184e-05, "loss": 0.5445, "step": 8373 }, { "epoch": 0.4689214917683951, "grad_norm": 1.1984511613845825, "learning_rate": 4.1845000000000003e-05, "loss": 0.3413, "step": 8374 }, { "epoch": 0.4689774890805241, "grad_norm": 1.206897497177124, "learning_rate": 4.185e-05, "loss": 0.6036, "step": 8375 }, { "epoch": 0.46903348639265313, "grad_norm": 1.1841862201690674, "learning_rate": 4.1855e-05, "loss": 0.3707, "step": 8376 }, { "epoch": 0.46908948370478215, "grad_norm": 1.1985225677490234, "learning_rate": 4.186e-05, "loss": 0.4071, "step": 8377 }, { "epoch": 0.46914548101691117, "grad_norm": 1.1437454223632812, "learning_rate": 4.1865000000000007e-05, "loss": 0.3683, "step": 8378 }, { "epoch": 0.4692014783290402, "grad_norm": 1.1463754177093506, "learning_rate": 4.1870000000000004e-05, "loss": 0.3746, "step": 8379 }, { "epoch": 0.4692574756411692, "grad_norm": 1.1942380666732788, "learning_rate": 4.1875e-05, "loss": 0.3742, "step": 8380 }, { "epoch": 0.4693134729532982, "grad_norm": 1.176209807395935, "learning_rate": 4.1880000000000006e-05, "loss": 0.4211, "step": 8381 }, { "epoch": 0.46936947026542725, "grad_norm": 1.2539499998092651, "learning_rate": 4.1885e-05, "loss": 0.4638, "step": 8382 }, { "epoch": 0.46942546757755627, "grad_norm": 1.2557951211929321, "learning_rate": 4.189e-05, "loss": 0.3983, "step": 8383 }, { "epoch": 0.4694814648896853, "grad_norm": 1.3948652744293213, "learning_rate": 4.1895e-05, "loss": 0.4609, "step": 8384 }, { "epoch": 0.4695374622018143, "grad_norm": 1.2727895975112915, "learning_rate": 4.19e-05, "loss": 0.3754, "step": 8385 }, { "epoch": 0.4695934595139433, "grad_norm": 1.0155041217803955, "learning_rate": 4.1905e-05, "loss": 0.3234, "step": 8386 }, { "epoch": 0.46964945682607234, "grad_norm": 1.5105594396591187, "learning_rate": 4.191e-05, "loss": 0.4672, "step": 8387 }, { "epoch": 0.46970545413820136, "grad_norm": 1.1224207878112793, "learning_rate": 4.1915e-05, "loss": 0.483, "step": 8388 }, { "epoch": 0.4697614514503304, "grad_norm": 1.2138935327529907, "learning_rate": 4.1920000000000005e-05, "loss": 0.3697, "step": 8389 }, { "epoch": 0.4698174487624594, "grad_norm": 1.4979029893875122, "learning_rate": 4.1925e-05, "loss": 0.6758, "step": 8390 }, { "epoch": 0.4698734460745884, "grad_norm": 1.3279343843460083, "learning_rate": 4.193e-05, "loss": 0.3267, "step": 8391 }, { "epoch": 0.46992944338671744, "grad_norm": 1.2012410163879395, "learning_rate": 4.1935000000000004e-05, "loss": 0.3242, "step": 8392 }, { "epoch": 0.46998544069884646, "grad_norm": 1.4319946765899658, "learning_rate": 4.194e-05, "loss": 0.601, "step": 8393 }, { "epoch": 0.4700414380109755, "grad_norm": 1.10093355178833, "learning_rate": 4.1945e-05, "loss": 0.4318, "step": 8394 }, { "epoch": 0.4700974353231045, "grad_norm": 1.4722977876663208, "learning_rate": 4.195e-05, "loss": 0.5919, "step": 8395 }, { "epoch": 0.4701534326352335, "grad_norm": 1.5779893398284912, "learning_rate": 4.1955e-05, "loss": 0.4911, "step": 8396 }, { "epoch": 0.47020942994736253, "grad_norm": 1.1077783107757568, "learning_rate": 4.196e-05, "loss": 0.4134, "step": 8397 }, { "epoch": 0.47026542725949155, "grad_norm": 1.2830719947814941, "learning_rate": 4.1965e-05, "loss": 0.4634, "step": 8398 }, { "epoch": 0.47032142457162057, "grad_norm": 1.5814772844314575, "learning_rate": 4.1970000000000006e-05, "loss": 0.4829, "step": 8399 }, { "epoch": 0.4703774218837496, "grad_norm": 1.3765275478363037, "learning_rate": 4.1975000000000004e-05, "loss": 0.4915, "step": 8400 }, { "epoch": 0.4704334191958786, "grad_norm": 1.1571701765060425, "learning_rate": 4.198e-05, "loss": 0.3825, "step": 8401 }, { "epoch": 0.47048941650800763, "grad_norm": 1.8681832551956177, "learning_rate": 4.1985000000000005e-05, "loss": 0.7184, "step": 8402 }, { "epoch": 0.47054541382013665, "grad_norm": 1.151965618133545, "learning_rate": 4.199e-05, "loss": 0.3928, "step": 8403 }, { "epoch": 0.47060141113226567, "grad_norm": 1.2336509227752686, "learning_rate": 4.1995e-05, "loss": 0.4089, "step": 8404 }, { "epoch": 0.4706574084443947, "grad_norm": 1.2102365493774414, "learning_rate": 4.2e-05, "loss": 0.4185, "step": 8405 }, { "epoch": 0.4707134057565237, "grad_norm": 1.2155449390411377, "learning_rate": 4.2005e-05, "loss": 0.4994, "step": 8406 }, { "epoch": 0.4707694030686527, "grad_norm": 1.1627260446548462, "learning_rate": 4.201e-05, "loss": 0.4654, "step": 8407 }, { "epoch": 0.47082540038078174, "grad_norm": 1.1251270771026611, "learning_rate": 4.2015000000000003e-05, "loss": 0.4778, "step": 8408 }, { "epoch": 0.47088139769291076, "grad_norm": 1.2458096742630005, "learning_rate": 4.202e-05, "loss": 0.3236, "step": 8409 }, { "epoch": 0.4709373950050398, "grad_norm": 1.3230431079864502, "learning_rate": 4.2025000000000005e-05, "loss": 0.456, "step": 8410 }, { "epoch": 0.4709933923171688, "grad_norm": 1.3141199350357056, "learning_rate": 4.203e-05, "loss": 0.5702, "step": 8411 }, { "epoch": 0.4710493896292978, "grad_norm": 1.612363338470459, "learning_rate": 4.2035e-05, "loss": 0.5587, "step": 8412 }, { "epoch": 0.4711053869414268, "grad_norm": 1.1775351762771606, "learning_rate": 4.2040000000000004e-05, "loss": 0.4286, "step": 8413 }, { "epoch": 0.4711613842535558, "grad_norm": 1.340772032737732, "learning_rate": 4.2045e-05, "loss": 0.476, "step": 8414 }, { "epoch": 0.4712173815656848, "grad_norm": 1.1946804523468018, "learning_rate": 4.205e-05, "loss": 0.5355, "step": 8415 }, { "epoch": 0.47127337887781384, "grad_norm": 1.1417064666748047, "learning_rate": 4.2055e-05, "loss": 0.4496, "step": 8416 }, { "epoch": 0.47132937618994286, "grad_norm": 1.1817623376846313, "learning_rate": 4.206e-05, "loss": 0.4119, "step": 8417 }, { "epoch": 0.4713853735020719, "grad_norm": 1.3147894144058228, "learning_rate": 4.2065000000000005e-05, "loss": 0.5118, "step": 8418 }, { "epoch": 0.4714413708142009, "grad_norm": 1.122429370880127, "learning_rate": 4.207e-05, "loss": 0.3612, "step": 8419 }, { "epoch": 0.4714973681263299, "grad_norm": 1.4243937730789185, "learning_rate": 4.2075000000000006e-05, "loss": 0.5584, "step": 8420 }, { "epoch": 0.47155336543845894, "grad_norm": 1.4853757619857788, "learning_rate": 4.2080000000000004e-05, "loss": 0.3971, "step": 8421 }, { "epoch": 0.47160936275058796, "grad_norm": 1.1240071058273315, "learning_rate": 4.2085e-05, "loss": 0.4124, "step": 8422 }, { "epoch": 0.471665360062717, "grad_norm": 1.3261979818344116, "learning_rate": 4.209e-05, "loss": 0.445, "step": 8423 }, { "epoch": 0.471721357374846, "grad_norm": 1.1619199514389038, "learning_rate": 4.2095e-05, "loss": 0.3402, "step": 8424 }, { "epoch": 0.471777354686975, "grad_norm": 1.2499545812606812, "learning_rate": 4.21e-05, "loss": 0.3863, "step": 8425 }, { "epoch": 0.47183335199910403, "grad_norm": 1.130560040473938, "learning_rate": 4.2105e-05, "loss": 0.3989, "step": 8426 }, { "epoch": 0.47188934931123305, "grad_norm": 1.137787938117981, "learning_rate": 4.211e-05, "loss": 0.3587, "step": 8427 }, { "epoch": 0.47194534662336207, "grad_norm": 1.1024013757705688, "learning_rate": 4.2115000000000006e-05, "loss": 0.416, "step": 8428 }, { "epoch": 0.4720013439354911, "grad_norm": 1.1284549236297607, "learning_rate": 4.212e-05, "loss": 0.4004, "step": 8429 }, { "epoch": 0.4720573412476201, "grad_norm": 1.240843415260315, "learning_rate": 4.2125e-05, "loss": 0.4798, "step": 8430 }, { "epoch": 0.47211333855974913, "grad_norm": 1.1080268621444702, "learning_rate": 4.2130000000000005e-05, "loss": 0.389, "step": 8431 }, { "epoch": 0.47216933587187815, "grad_norm": 1.2600219249725342, "learning_rate": 4.2135e-05, "loss": 0.4573, "step": 8432 }, { "epoch": 0.47222533318400717, "grad_norm": 1.1538052558898926, "learning_rate": 4.214e-05, "loss": 0.305, "step": 8433 }, { "epoch": 0.4722813304961362, "grad_norm": 1.1136164665222168, "learning_rate": 4.2145000000000004e-05, "loss": 0.4308, "step": 8434 }, { "epoch": 0.4723373278082652, "grad_norm": 1.4552466869354248, "learning_rate": 4.215e-05, "loss": 0.491, "step": 8435 }, { "epoch": 0.4723933251203942, "grad_norm": 1.4417535066604614, "learning_rate": 4.2155e-05, "loss": 0.4447, "step": 8436 }, { "epoch": 0.47244932243252324, "grad_norm": 1.243757724761963, "learning_rate": 4.2159999999999996e-05, "loss": 0.3696, "step": 8437 }, { "epoch": 0.47250531974465226, "grad_norm": 1.1235696077346802, "learning_rate": 4.216500000000001e-05, "loss": 0.3932, "step": 8438 }, { "epoch": 0.4725613170567813, "grad_norm": 1.2974927425384521, "learning_rate": 4.2170000000000005e-05, "loss": 0.4324, "step": 8439 }, { "epoch": 0.4726173143689103, "grad_norm": 1.5832685232162476, "learning_rate": 4.2175e-05, "loss": 0.5752, "step": 8440 }, { "epoch": 0.4726733116810393, "grad_norm": 1.3697322607040405, "learning_rate": 4.2180000000000006e-05, "loss": 0.3736, "step": 8441 }, { "epoch": 0.47272930899316834, "grad_norm": 1.3663676977157593, "learning_rate": 4.2185000000000004e-05, "loss": 0.5753, "step": 8442 }, { "epoch": 0.47278530630529736, "grad_norm": 1.1377853155136108, "learning_rate": 4.219e-05, "loss": 0.4741, "step": 8443 }, { "epoch": 0.4728413036174264, "grad_norm": 1.0805935859680176, "learning_rate": 4.2195e-05, "loss": 0.4755, "step": 8444 }, { "epoch": 0.4728973009295554, "grad_norm": 1.3271127939224243, "learning_rate": 4.22e-05, "loss": 0.4224, "step": 8445 }, { "epoch": 0.4729532982416844, "grad_norm": 1.1402291059494019, "learning_rate": 4.2205e-05, "loss": 0.6048, "step": 8446 }, { "epoch": 0.47300929555381344, "grad_norm": 1.3297299146652222, "learning_rate": 4.221e-05, "loss": 0.4885, "step": 8447 }, { "epoch": 0.47306529286594245, "grad_norm": 1.0995362997055054, "learning_rate": 4.2215e-05, "loss": 0.4252, "step": 8448 }, { "epoch": 0.4731212901780715, "grad_norm": 1.1222656965255737, "learning_rate": 4.2220000000000006e-05, "loss": 0.3197, "step": 8449 }, { "epoch": 0.4731772874902005, "grad_norm": 1.27969229221344, "learning_rate": 4.2225e-05, "loss": 0.4638, "step": 8450 }, { "epoch": 0.4732332848023295, "grad_norm": 1.1901013851165771, "learning_rate": 4.223e-05, "loss": 0.4299, "step": 8451 }, { "epoch": 0.47328928211445853, "grad_norm": 1.2603455781936646, "learning_rate": 4.2235000000000005e-05, "loss": 0.3355, "step": 8452 }, { "epoch": 0.47334527942658755, "grad_norm": 1.155975341796875, "learning_rate": 4.224e-05, "loss": 0.3084, "step": 8453 }, { "epoch": 0.4734012767387165, "grad_norm": 1.3510417938232422, "learning_rate": 4.2245e-05, "loss": 0.5243, "step": 8454 }, { "epoch": 0.47345727405084553, "grad_norm": 1.082627773284912, "learning_rate": 4.2250000000000004e-05, "loss": 0.3634, "step": 8455 }, { "epoch": 0.47351327136297455, "grad_norm": 1.5972204208374023, "learning_rate": 4.2255e-05, "loss": 0.4188, "step": 8456 }, { "epoch": 0.47356926867510357, "grad_norm": 1.4694019556045532, "learning_rate": 4.226e-05, "loss": 0.3224, "step": 8457 }, { "epoch": 0.4736252659872326, "grad_norm": 1.2422003746032715, "learning_rate": 4.2265e-05, "loss": 0.5619, "step": 8458 }, { "epoch": 0.4736812632993616, "grad_norm": 1.2314832210540771, "learning_rate": 4.227000000000001e-05, "loss": 0.3839, "step": 8459 }, { "epoch": 0.47373726061149063, "grad_norm": 1.3565618991851807, "learning_rate": 4.2275000000000004e-05, "loss": 0.4406, "step": 8460 }, { "epoch": 0.47379325792361965, "grad_norm": 3.4537088871002197, "learning_rate": 4.228e-05, "loss": 0.5471, "step": 8461 }, { "epoch": 0.47384925523574867, "grad_norm": 1.3622307777404785, "learning_rate": 4.2285e-05, "loss": 0.4902, "step": 8462 }, { "epoch": 0.4739052525478777, "grad_norm": 1.2124581336975098, "learning_rate": 4.229e-05, "loss": 0.4515, "step": 8463 }, { "epoch": 0.4739612498600067, "grad_norm": 1.5086095333099365, "learning_rate": 4.2295e-05, "loss": 0.4574, "step": 8464 }, { "epoch": 0.4740172471721357, "grad_norm": 1.3772732019424438, "learning_rate": 4.23e-05, "loss": 0.4398, "step": 8465 }, { "epoch": 0.47407324448426474, "grad_norm": 1.1363698244094849, "learning_rate": 4.2305e-05, "loss": 0.4022, "step": 8466 }, { "epoch": 0.47412924179639376, "grad_norm": 1.329436182975769, "learning_rate": 4.231e-05, "loss": 0.4389, "step": 8467 }, { "epoch": 0.4741852391085228, "grad_norm": 1.2213752269744873, "learning_rate": 4.2315000000000004e-05, "loss": 0.3593, "step": 8468 }, { "epoch": 0.4742412364206518, "grad_norm": 1.3433914184570312, "learning_rate": 4.232e-05, "loss": 0.381, "step": 8469 }, { "epoch": 0.4742972337327808, "grad_norm": 1.1535258293151855, "learning_rate": 4.2325000000000006e-05, "loss": 0.3721, "step": 8470 }, { "epoch": 0.47435323104490984, "grad_norm": 1.349773645401001, "learning_rate": 4.233e-05, "loss": 0.4592, "step": 8471 }, { "epoch": 0.47440922835703886, "grad_norm": 1.6597414016723633, "learning_rate": 4.2335e-05, "loss": 0.4148, "step": 8472 }, { "epoch": 0.4744652256691679, "grad_norm": 1.2057273387908936, "learning_rate": 4.2340000000000005e-05, "loss": 0.3378, "step": 8473 }, { "epoch": 0.4745212229812969, "grad_norm": 1.6307529211044312, "learning_rate": 4.2345e-05, "loss": 0.421, "step": 8474 }, { "epoch": 0.4745772202934259, "grad_norm": 1.352099895477295, "learning_rate": 4.235e-05, "loss": 0.3547, "step": 8475 }, { "epoch": 0.47463321760555494, "grad_norm": 1.4897671937942505, "learning_rate": 4.2355000000000004e-05, "loss": 0.4558, "step": 8476 }, { "epoch": 0.47468921491768395, "grad_norm": 1.261129379272461, "learning_rate": 4.236e-05, "loss": 0.42, "step": 8477 }, { "epoch": 0.474745212229813, "grad_norm": 1.3802372217178345, "learning_rate": 4.2365000000000005e-05, "loss": 0.3786, "step": 8478 }, { "epoch": 0.474801209541942, "grad_norm": 1.2289320230484009, "learning_rate": 4.237e-05, "loss": 0.4587, "step": 8479 }, { "epoch": 0.474857206854071, "grad_norm": 1.220415711402893, "learning_rate": 4.237500000000001e-05, "loss": 0.4528, "step": 8480 }, { "epoch": 0.47491320416620003, "grad_norm": 1.6037752628326416, "learning_rate": 4.2380000000000004e-05, "loss": 0.4565, "step": 8481 }, { "epoch": 0.47496920147832905, "grad_norm": 1.3115330934524536, "learning_rate": 4.2385e-05, "loss": 0.4804, "step": 8482 }, { "epoch": 0.47502519879045807, "grad_norm": 1.4712803363800049, "learning_rate": 4.239e-05, "loss": 0.3827, "step": 8483 }, { "epoch": 0.4750811961025871, "grad_norm": 1.428632378578186, "learning_rate": 4.2395e-05, "loss": 0.5751, "step": 8484 }, { "epoch": 0.4751371934147161, "grad_norm": 1.1504849195480347, "learning_rate": 4.24e-05, "loss": 0.513, "step": 8485 }, { "epoch": 0.4751931907268451, "grad_norm": 1.1852437257766724, "learning_rate": 4.2405e-05, "loss": 0.5621, "step": 8486 }, { "epoch": 0.47524918803897415, "grad_norm": 1.389432430267334, "learning_rate": 4.241e-05, "loss": 0.264, "step": 8487 }, { "epoch": 0.47530518535110317, "grad_norm": 1.338192105293274, "learning_rate": 4.2415000000000006e-05, "loss": 0.4091, "step": 8488 }, { "epoch": 0.4753611826632322, "grad_norm": 1.1868879795074463, "learning_rate": 4.2420000000000004e-05, "loss": 0.4872, "step": 8489 }, { "epoch": 0.4754171799753612, "grad_norm": 1.2049229145050049, "learning_rate": 4.2425e-05, "loss": 0.3504, "step": 8490 }, { "epoch": 0.4754731772874902, "grad_norm": 1.1247048377990723, "learning_rate": 4.2430000000000005e-05, "loss": 0.3739, "step": 8491 }, { "epoch": 0.47552917459961924, "grad_norm": 1.4636249542236328, "learning_rate": 4.2435e-05, "loss": 0.4197, "step": 8492 }, { "epoch": 0.47558517191174826, "grad_norm": 1.7594317197799683, "learning_rate": 4.244e-05, "loss": 0.4573, "step": 8493 }, { "epoch": 0.4756411692238773, "grad_norm": 1.2627742290496826, "learning_rate": 4.2445000000000004e-05, "loss": 0.4462, "step": 8494 }, { "epoch": 0.47569716653600624, "grad_norm": 1.0841120481491089, "learning_rate": 4.245e-05, "loss": 0.3393, "step": 8495 }, { "epoch": 0.47575316384813526, "grad_norm": 1.362037181854248, "learning_rate": 4.2455e-05, "loss": 0.6053, "step": 8496 }, { "epoch": 0.4758091611602643, "grad_norm": 1.2184008359909058, "learning_rate": 4.246e-05, "loss": 0.4431, "step": 8497 }, { "epoch": 0.4758651584723933, "grad_norm": 1.2519798278808594, "learning_rate": 4.246500000000001e-05, "loss": 0.4529, "step": 8498 }, { "epoch": 0.4759211557845223, "grad_norm": 1.2431145906448364, "learning_rate": 4.2470000000000005e-05, "loss": 0.366, "step": 8499 }, { "epoch": 0.47597715309665134, "grad_norm": 1.226942539215088, "learning_rate": 4.2475e-05, "loss": 0.5221, "step": 8500 }, { "epoch": 0.47603315040878036, "grad_norm": 1.2117550373077393, "learning_rate": 4.248e-05, "loss": 0.5224, "step": 8501 }, { "epoch": 0.4760891477209094, "grad_norm": 1.5471678972244263, "learning_rate": 4.2485000000000004e-05, "loss": 0.5953, "step": 8502 }, { "epoch": 0.4761451450330384, "grad_norm": 1.3753056526184082, "learning_rate": 4.249e-05, "loss": 0.5152, "step": 8503 }, { "epoch": 0.4762011423451674, "grad_norm": 1.514878511428833, "learning_rate": 4.2495e-05, "loss": 0.7663, "step": 8504 }, { "epoch": 0.47625713965729644, "grad_norm": 1.3028452396392822, "learning_rate": 4.25e-05, "loss": 0.4354, "step": 8505 }, { "epoch": 0.47631313696942545, "grad_norm": 1.2990000247955322, "learning_rate": 4.2505e-05, "loss": 0.3929, "step": 8506 }, { "epoch": 0.4763691342815545, "grad_norm": 1.1618504524230957, "learning_rate": 4.251e-05, "loss": 0.3683, "step": 8507 }, { "epoch": 0.4764251315936835, "grad_norm": 1.2800405025482178, "learning_rate": 4.2515e-05, "loss": 0.4431, "step": 8508 }, { "epoch": 0.4764811289058125, "grad_norm": 1.145199179649353, "learning_rate": 4.2520000000000006e-05, "loss": 0.4034, "step": 8509 }, { "epoch": 0.47653712621794153, "grad_norm": 1.1259734630584717, "learning_rate": 4.2525000000000004e-05, "loss": 0.453, "step": 8510 }, { "epoch": 0.47659312353007055, "grad_norm": 1.203216791152954, "learning_rate": 4.253e-05, "loss": 0.4499, "step": 8511 }, { "epoch": 0.47664912084219957, "grad_norm": 1.1637547016143799, "learning_rate": 4.2535000000000005e-05, "loss": 0.415, "step": 8512 }, { "epoch": 0.4767051181543286, "grad_norm": 1.1568801403045654, "learning_rate": 4.254e-05, "loss": 0.4233, "step": 8513 }, { "epoch": 0.4767611154664576, "grad_norm": 1.6813509464263916, "learning_rate": 4.2545e-05, "loss": 0.5052, "step": 8514 }, { "epoch": 0.4768171127785866, "grad_norm": 1.1524626016616821, "learning_rate": 4.2550000000000004e-05, "loss": 0.4701, "step": 8515 }, { "epoch": 0.47687311009071565, "grad_norm": 1.2931450605392456, "learning_rate": 4.2555e-05, "loss": 0.3449, "step": 8516 }, { "epoch": 0.47692910740284467, "grad_norm": 1.2546114921569824, "learning_rate": 4.256e-05, "loss": 0.41, "step": 8517 }, { "epoch": 0.4769851047149737, "grad_norm": 1.379087209701538, "learning_rate": 4.2564999999999997e-05, "loss": 0.5385, "step": 8518 }, { "epoch": 0.4770411020271027, "grad_norm": 1.0247600078582764, "learning_rate": 4.257000000000001e-05, "loss": 0.3114, "step": 8519 }, { "epoch": 0.4770970993392317, "grad_norm": 1.1872286796569824, "learning_rate": 4.2575000000000005e-05, "loss": 0.3339, "step": 8520 }, { "epoch": 0.47715309665136074, "grad_norm": 1.1792237758636475, "learning_rate": 4.258e-05, "loss": 0.6009, "step": 8521 }, { "epoch": 0.47720909396348976, "grad_norm": 1.1308717727661133, "learning_rate": 4.2585e-05, "loss": 0.4187, "step": 8522 }, { "epoch": 0.4772650912756188, "grad_norm": 1.3926209211349487, "learning_rate": 4.2590000000000004e-05, "loss": 0.4622, "step": 8523 }, { "epoch": 0.4773210885877478, "grad_norm": 1.264739990234375, "learning_rate": 4.2595e-05, "loss": 0.3727, "step": 8524 }, { "epoch": 0.4773770858998768, "grad_norm": 1.2509269714355469, "learning_rate": 4.26e-05, "loss": 0.5148, "step": 8525 }, { "epoch": 0.47743308321200584, "grad_norm": 1.2540090084075928, "learning_rate": 4.2605e-05, "loss": 0.4493, "step": 8526 }, { "epoch": 0.47748908052413486, "grad_norm": 1.2280350923538208, "learning_rate": 4.261e-05, "loss": 0.3851, "step": 8527 }, { "epoch": 0.4775450778362639, "grad_norm": 1.5690923929214478, "learning_rate": 4.2615e-05, "loss": 0.3389, "step": 8528 }, { "epoch": 0.4776010751483929, "grad_norm": 1.2065706253051758, "learning_rate": 4.262e-05, "loss": 0.4382, "step": 8529 }, { "epoch": 0.4776570724605219, "grad_norm": 1.1458523273468018, "learning_rate": 4.2625000000000006e-05, "loss": 0.3564, "step": 8530 }, { "epoch": 0.47771306977265093, "grad_norm": 1.0513176918029785, "learning_rate": 4.2630000000000004e-05, "loss": 0.3677, "step": 8531 }, { "epoch": 0.47776906708477995, "grad_norm": 1.4468929767608643, "learning_rate": 4.2635e-05, "loss": 0.5722, "step": 8532 }, { "epoch": 0.47782506439690897, "grad_norm": 1.1844818592071533, "learning_rate": 4.2640000000000005e-05, "loss": 0.3671, "step": 8533 }, { "epoch": 0.477881061709038, "grad_norm": 1.572331428527832, "learning_rate": 4.2645e-05, "loss": 0.4609, "step": 8534 }, { "epoch": 0.477937059021167, "grad_norm": 1.3428393602371216, "learning_rate": 4.265e-05, "loss": 0.5407, "step": 8535 }, { "epoch": 0.47799305633329603, "grad_norm": 1.227597713470459, "learning_rate": 4.2655e-05, "loss": 0.4428, "step": 8536 }, { "epoch": 0.478049053645425, "grad_norm": 1.580487608909607, "learning_rate": 4.266e-05, "loss": 0.3856, "step": 8537 }, { "epoch": 0.478105050957554, "grad_norm": 1.3850281238555908, "learning_rate": 4.2665e-05, "loss": 0.4733, "step": 8538 }, { "epoch": 0.47816104826968303, "grad_norm": 1.440145492553711, "learning_rate": 4.267e-05, "loss": 0.655, "step": 8539 }, { "epoch": 0.47821704558181205, "grad_norm": 1.1501185894012451, "learning_rate": 4.2675e-05, "loss": 0.4654, "step": 8540 }, { "epoch": 0.47827304289394107, "grad_norm": 1.074999213218689, "learning_rate": 4.2680000000000005e-05, "loss": 0.3818, "step": 8541 }, { "epoch": 0.4783290402060701, "grad_norm": 1.189826488494873, "learning_rate": 4.2685e-05, "loss": 0.4428, "step": 8542 }, { "epoch": 0.4783850375181991, "grad_norm": 1.0231224298477173, "learning_rate": 4.269e-05, "loss": 0.3145, "step": 8543 }, { "epoch": 0.4784410348303281, "grad_norm": 1.1621557474136353, "learning_rate": 4.2695000000000004e-05, "loss": 0.3708, "step": 8544 }, { "epoch": 0.47849703214245715, "grad_norm": 1.0861577987670898, "learning_rate": 4.27e-05, "loss": 0.444, "step": 8545 }, { "epoch": 0.47855302945458617, "grad_norm": 1.2717492580413818, "learning_rate": 4.2705e-05, "loss": 0.413, "step": 8546 }, { "epoch": 0.4786090267667152, "grad_norm": 1.286747694015503, "learning_rate": 4.271e-05, "loss": 0.4528, "step": 8547 }, { "epoch": 0.4786650240788442, "grad_norm": 1.2454153299331665, "learning_rate": 4.2715e-05, "loss": 0.4464, "step": 8548 }, { "epoch": 0.4787210213909732, "grad_norm": 1.143328070640564, "learning_rate": 4.2720000000000004e-05, "loss": 0.3162, "step": 8549 }, { "epoch": 0.47877701870310224, "grad_norm": 1.269060492515564, "learning_rate": 4.2725e-05, "loss": 0.6328, "step": 8550 }, { "epoch": 0.47883301601523126, "grad_norm": 1.2941027879714966, "learning_rate": 4.2730000000000006e-05, "loss": 0.4825, "step": 8551 }, { "epoch": 0.4788890133273603, "grad_norm": 1.3494073152542114, "learning_rate": 4.2735e-05, "loss": 0.5508, "step": 8552 }, { "epoch": 0.4789450106394893, "grad_norm": 1.3683594465255737, "learning_rate": 4.274e-05, "loss": 0.3838, "step": 8553 }, { "epoch": 0.4790010079516183, "grad_norm": 1.2412092685699463, "learning_rate": 4.2745000000000005e-05, "loss": 0.401, "step": 8554 }, { "epoch": 0.47905700526374734, "grad_norm": 1.2897952795028687, "learning_rate": 4.275e-05, "loss": 0.4075, "step": 8555 }, { "epoch": 0.47911300257587636, "grad_norm": 1.2257930040359497, "learning_rate": 4.2755e-05, "loss": 0.4128, "step": 8556 }, { "epoch": 0.4791689998880054, "grad_norm": 1.193520188331604, "learning_rate": 4.276e-05, "loss": 0.3751, "step": 8557 }, { "epoch": 0.4792249972001344, "grad_norm": 1.2806997299194336, "learning_rate": 4.2765e-05, "loss": 0.3365, "step": 8558 }, { "epoch": 0.4792809945122634, "grad_norm": 1.281146764755249, "learning_rate": 4.2770000000000006e-05, "loss": 0.4771, "step": 8559 }, { "epoch": 0.47933699182439243, "grad_norm": 1.3099502325057983, "learning_rate": 4.2775e-05, "loss": 0.3065, "step": 8560 }, { "epoch": 0.47939298913652145, "grad_norm": 1.3549703359603882, "learning_rate": 4.278e-05, "loss": 0.3679, "step": 8561 }, { "epoch": 0.47944898644865047, "grad_norm": 1.138702154159546, "learning_rate": 4.2785000000000005e-05, "loss": 0.3429, "step": 8562 }, { "epoch": 0.4795049837607795, "grad_norm": 1.219333529472351, "learning_rate": 4.279e-05, "loss": 0.2814, "step": 8563 }, { "epoch": 0.4795609810729085, "grad_norm": 1.2598845958709717, "learning_rate": 4.2795e-05, "loss": 0.35, "step": 8564 }, { "epoch": 0.47961697838503753, "grad_norm": 1.1545138359069824, "learning_rate": 4.2800000000000004e-05, "loss": 0.4768, "step": 8565 }, { "epoch": 0.47967297569716655, "grad_norm": 1.149704933166504, "learning_rate": 4.2805e-05, "loss": 0.5022, "step": 8566 }, { "epoch": 0.47972897300929557, "grad_norm": 1.1559488773345947, "learning_rate": 4.281e-05, "loss": 0.368, "step": 8567 }, { "epoch": 0.4797849703214246, "grad_norm": 1.2474576234817505, "learning_rate": 4.2815e-05, "loss": 0.3818, "step": 8568 }, { "epoch": 0.4798409676335536, "grad_norm": 1.2686368227005005, "learning_rate": 4.282000000000001e-05, "loss": 0.3357, "step": 8569 }, { "epoch": 0.4798969649456826, "grad_norm": 1.383123755455017, "learning_rate": 4.2825000000000004e-05, "loss": 0.4867, "step": 8570 }, { "epoch": 0.47995296225781164, "grad_norm": 1.1608155965805054, "learning_rate": 4.283e-05, "loss": 0.4371, "step": 8571 }, { "epoch": 0.48000895956994066, "grad_norm": 1.2059683799743652, "learning_rate": 4.2835000000000006e-05, "loss": 0.4577, "step": 8572 }, { "epoch": 0.4800649568820697, "grad_norm": 1.4019975662231445, "learning_rate": 4.284e-05, "loss": 0.38, "step": 8573 }, { "epoch": 0.4801209541941987, "grad_norm": 1.38151216506958, "learning_rate": 4.2845e-05, "loss": 0.5864, "step": 8574 }, { "epoch": 0.4801769515063277, "grad_norm": 1.21138334274292, "learning_rate": 4.285e-05, "loss": 0.4912, "step": 8575 }, { "epoch": 0.48023294881845674, "grad_norm": 1.4562879800796509, "learning_rate": 4.2855e-05, "loss": 0.427, "step": 8576 }, { "epoch": 0.48028894613058576, "grad_norm": 1.2790573835372925, "learning_rate": 4.286e-05, "loss": 0.4197, "step": 8577 }, { "epoch": 0.4803449434427147, "grad_norm": 1.2116519212722778, "learning_rate": 4.2865e-05, "loss": 0.3213, "step": 8578 }, { "epoch": 0.48040094075484374, "grad_norm": 1.5755831003189087, "learning_rate": 4.287000000000001e-05, "loss": 0.5585, "step": 8579 }, { "epoch": 0.48045693806697276, "grad_norm": 1.244770884513855, "learning_rate": 4.2875000000000005e-05, "loss": 0.4356, "step": 8580 }, { "epoch": 0.4805129353791018, "grad_norm": 1.1025171279907227, "learning_rate": 4.288e-05, "loss": 0.4699, "step": 8581 }, { "epoch": 0.4805689326912308, "grad_norm": 1.2222015857696533, "learning_rate": 4.2885e-05, "loss": 0.3861, "step": 8582 }, { "epoch": 0.4806249300033598, "grad_norm": 1.071301817893982, "learning_rate": 4.2890000000000004e-05, "loss": 0.3008, "step": 8583 }, { "epoch": 0.48068092731548884, "grad_norm": 1.2025933265686035, "learning_rate": 4.2895e-05, "loss": 0.3804, "step": 8584 }, { "epoch": 0.48073692462761786, "grad_norm": 1.428401231765747, "learning_rate": 4.29e-05, "loss": 0.4237, "step": 8585 }, { "epoch": 0.4807929219397469, "grad_norm": 1.6277501583099365, "learning_rate": 4.2905000000000003e-05, "loss": 0.4789, "step": 8586 }, { "epoch": 0.4808489192518759, "grad_norm": 1.3520350456237793, "learning_rate": 4.291e-05, "loss": 0.5518, "step": 8587 }, { "epoch": 0.4809049165640049, "grad_norm": 1.2806624174118042, "learning_rate": 4.2915e-05, "loss": 0.522, "step": 8588 }, { "epoch": 0.48096091387613393, "grad_norm": 1.2821451425552368, "learning_rate": 4.292e-05, "loss": 0.4721, "step": 8589 }, { "epoch": 0.48101691118826295, "grad_norm": 1.2855767011642456, "learning_rate": 4.2925000000000007e-05, "loss": 0.3422, "step": 8590 }, { "epoch": 0.48107290850039197, "grad_norm": 1.1831409931182861, "learning_rate": 4.2930000000000004e-05, "loss": 0.4138, "step": 8591 }, { "epoch": 0.481128905812521, "grad_norm": 1.2454614639282227, "learning_rate": 4.2935e-05, "loss": 0.4348, "step": 8592 }, { "epoch": 0.48118490312465, "grad_norm": 1.504920482635498, "learning_rate": 4.2940000000000006e-05, "loss": 0.5542, "step": 8593 }, { "epoch": 0.48124090043677903, "grad_norm": 1.6143132448196411, "learning_rate": 4.2945e-05, "loss": 0.4956, "step": 8594 }, { "epoch": 0.48129689774890805, "grad_norm": 1.2793389558792114, "learning_rate": 4.295e-05, "loss": 0.385, "step": 8595 }, { "epoch": 0.48135289506103707, "grad_norm": 1.141887903213501, "learning_rate": 4.2955e-05, "loss": 0.3376, "step": 8596 }, { "epoch": 0.4814088923731661, "grad_norm": 1.3110154867172241, "learning_rate": 4.296e-05, "loss": 0.4257, "step": 8597 }, { "epoch": 0.4814648896852951, "grad_norm": 1.4544636011123657, "learning_rate": 4.2965e-05, "loss": 0.4742, "step": 8598 }, { "epoch": 0.4815208869974241, "grad_norm": 1.5760170221328735, "learning_rate": 4.2970000000000004e-05, "loss": 0.5291, "step": 8599 }, { "epoch": 0.48157688430955314, "grad_norm": 1.3123674392700195, "learning_rate": 4.2975e-05, "loss": 0.4305, "step": 8600 }, { "epoch": 0.48163288162168216, "grad_norm": 1.5963950157165527, "learning_rate": 4.2980000000000005e-05, "loss": 0.5198, "step": 8601 }, { "epoch": 0.4816888789338112, "grad_norm": 1.2458404302597046, "learning_rate": 4.2985e-05, "loss": 0.492, "step": 8602 }, { "epoch": 0.4817448762459402, "grad_norm": 1.2439574003219604, "learning_rate": 4.299e-05, "loss": 0.5217, "step": 8603 }, { "epoch": 0.4818008735580692, "grad_norm": 1.4142569303512573, "learning_rate": 4.2995000000000004e-05, "loss": 0.3955, "step": 8604 }, { "epoch": 0.48185687087019824, "grad_norm": 1.3641350269317627, "learning_rate": 4.3e-05, "loss": 0.5114, "step": 8605 }, { "epoch": 0.48191286818232726, "grad_norm": 1.1383672952651978, "learning_rate": 4.3005e-05, "loss": 0.411, "step": 8606 }, { "epoch": 0.4819688654944563, "grad_norm": 1.6961710453033447, "learning_rate": 4.301e-05, "loss": 0.4136, "step": 8607 }, { "epoch": 0.4820248628065853, "grad_norm": 1.1815787553787231, "learning_rate": 4.3015e-05, "loss": 0.3027, "step": 8608 }, { "epoch": 0.4820808601187143, "grad_norm": 1.0456570386886597, "learning_rate": 4.3020000000000005e-05, "loss": 0.378, "step": 8609 }, { "epoch": 0.48213685743084334, "grad_norm": 1.0018742084503174, "learning_rate": 4.3025e-05, "loss": 0.384, "step": 8610 }, { "epoch": 0.48219285474297235, "grad_norm": 1.1864042282104492, "learning_rate": 4.3030000000000006e-05, "loss": 0.528, "step": 8611 }, { "epoch": 0.4822488520551014, "grad_norm": 1.4641215801239014, "learning_rate": 4.3035000000000004e-05, "loss": 0.5839, "step": 8612 }, { "epoch": 0.4823048493672304, "grad_norm": 1.232445478439331, "learning_rate": 4.304e-05, "loss": 0.449, "step": 8613 }, { "epoch": 0.4823608466793594, "grad_norm": 1.4176139831542969, "learning_rate": 4.3045e-05, "loss": 0.4188, "step": 8614 }, { "epoch": 0.48241684399148843, "grad_norm": 2.108597993850708, "learning_rate": 4.305e-05, "loss": 0.3727, "step": 8615 }, { "epoch": 0.48247284130361745, "grad_norm": 1.249324083328247, "learning_rate": 4.3055e-05, "loss": 0.4655, "step": 8616 }, { "epoch": 0.48252883861574647, "grad_norm": 1.4487379789352417, "learning_rate": 4.306e-05, "loss": 0.4904, "step": 8617 }, { "epoch": 0.4825848359278755, "grad_norm": 1.620957612991333, "learning_rate": 4.3065e-05, "loss": 0.3909, "step": 8618 }, { "epoch": 0.48264083324000445, "grad_norm": 1.402370810508728, "learning_rate": 4.3070000000000006e-05, "loss": 0.6593, "step": 8619 }, { "epoch": 0.48269683055213347, "grad_norm": 1.1624068021774292, "learning_rate": 4.3075000000000003e-05, "loss": 0.4366, "step": 8620 }, { "epoch": 0.4827528278642625, "grad_norm": 1.351397156715393, "learning_rate": 4.308e-05, "loss": 0.5857, "step": 8621 }, { "epoch": 0.4828088251763915, "grad_norm": 1.030238151550293, "learning_rate": 4.3085000000000005e-05, "loss": 0.3954, "step": 8622 }, { "epoch": 0.48286482248852053, "grad_norm": 1.4337857961654663, "learning_rate": 4.309e-05, "loss": 0.4261, "step": 8623 }, { "epoch": 0.48292081980064955, "grad_norm": 1.1636183261871338, "learning_rate": 4.3095e-05, "loss": 0.2829, "step": 8624 }, { "epoch": 0.48297681711277857, "grad_norm": 1.1158586740493774, "learning_rate": 4.3100000000000004e-05, "loss": 0.4353, "step": 8625 }, { "epoch": 0.4830328144249076, "grad_norm": 2.748267889022827, "learning_rate": 4.3105e-05, "loss": 0.4636, "step": 8626 }, { "epoch": 0.4830888117370366, "grad_norm": 1.0934284925460815, "learning_rate": 4.311e-05, "loss": 0.3526, "step": 8627 }, { "epoch": 0.4831448090491656, "grad_norm": 1.2375462055206299, "learning_rate": 4.3115e-05, "loss": 0.444, "step": 8628 }, { "epoch": 0.48320080636129464, "grad_norm": 1.3731330633163452, "learning_rate": 4.312000000000001e-05, "loss": 0.4732, "step": 8629 }, { "epoch": 0.48325680367342366, "grad_norm": 1.1914840936660767, "learning_rate": 4.3125000000000005e-05, "loss": 0.3496, "step": 8630 }, { "epoch": 0.4833128009855527, "grad_norm": 1.2739779949188232, "learning_rate": 4.313e-05, "loss": 0.6161, "step": 8631 }, { "epoch": 0.4833687982976817, "grad_norm": 1.5741862058639526, "learning_rate": 4.3135000000000006e-05, "loss": 0.6004, "step": 8632 }, { "epoch": 0.4834247956098107, "grad_norm": 1.4872034788131714, "learning_rate": 4.3140000000000004e-05, "loss": 0.6158, "step": 8633 }, { "epoch": 0.48348079292193974, "grad_norm": 2.9754796028137207, "learning_rate": 4.3145e-05, "loss": 0.4436, "step": 8634 }, { "epoch": 0.48353679023406876, "grad_norm": 1.236561894416809, "learning_rate": 4.315e-05, "loss": 0.5231, "step": 8635 }, { "epoch": 0.4835927875461978, "grad_norm": 1.2106773853302002, "learning_rate": 4.3155e-05, "loss": 0.3766, "step": 8636 }, { "epoch": 0.4836487848583268, "grad_norm": 1.407659649848938, "learning_rate": 4.316e-05, "loss": 0.5709, "step": 8637 }, { "epoch": 0.4837047821704558, "grad_norm": 1.1345996856689453, "learning_rate": 4.3165e-05, "loss": 0.363, "step": 8638 }, { "epoch": 0.48376077948258484, "grad_norm": 1.2164907455444336, "learning_rate": 4.317e-05, "loss": 0.509, "step": 8639 }, { "epoch": 0.48381677679471385, "grad_norm": 1.0939404964447021, "learning_rate": 4.3175000000000006e-05, "loss": 0.468, "step": 8640 }, { "epoch": 0.4838727741068429, "grad_norm": 1.355599045753479, "learning_rate": 4.318e-05, "loss": 0.4164, "step": 8641 }, { "epoch": 0.4839287714189719, "grad_norm": 1.1362967491149902, "learning_rate": 4.3185e-05, "loss": 0.3244, "step": 8642 }, { "epoch": 0.4839847687311009, "grad_norm": 1.142841100692749, "learning_rate": 4.3190000000000005e-05, "loss": 0.439, "step": 8643 }, { "epoch": 0.48404076604322993, "grad_norm": 1.155008316040039, "learning_rate": 4.3195e-05, "loss": 0.5317, "step": 8644 }, { "epoch": 0.48409676335535895, "grad_norm": 1.1879433393478394, "learning_rate": 4.32e-05, "loss": 0.3743, "step": 8645 }, { "epoch": 0.48415276066748797, "grad_norm": 1.4973292350769043, "learning_rate": 4.3205000000000004e-05, "loss": 0.544, "step": 8646 }, { "epoch": 0.484208757979617, "grad_norm": 1.404468059539795, "learning_rate": 4.321e-05, "loss": 0.4304, "step": 8647 }, { "epoch": 0.484264755291746, "grad_norm": 1.1524723768234253, "learning_rate": 4.3215e-05, "loss": 0.3708, "step": 8648 }, { "epoch": 0.484320752603875, "grad_norm": 1.2423940896987915, "learning_rate": 4.3219999999999996e-05, "loss": 0.509, "step": 8649 }, { "epoch": 0.48437674991600405, "grad_norm": 1.4346377849578857, "learning_rate": 4.322500000000001e-05, "loss": 0.3965, "step": 8650 }, { "epoch": 0.48443274722813306, "grad_norm": 1.4229187965393066, "learning_rate": 4.3230000000000005e-05, "loss": 0.4362, "step": 8651 }, { "epoch": 0.4844887445402621, "grad_norm": 1.2607970237731934, "learning_rate": 4.3235e-05, "loss": 0.4363, "step": 8652 }, { "epoch": 0.4845447418523911, "grad_norm": 1.0456748008728027, "learning_rate": 4.324e-05, "loss": 0.379, "step": 8653 }, { "epoch": 0.4846007391645201, "grad_norm": 1.325932502746582, "learning_rate": 4.3245000000000004e-05, "loss": 0.4285, "step": 8654 }, { "epoch": 0.48465673647664914, "grad_norm": 1.3755015134811401, "learning_rate": 4.325e-05, "loss": 0.3669, "step": 8655 }, { "epoch": 0.48471273378877816, "grad_norm": 1.0719605684280396, "learning_rate": 4.3255e-05, "loss": 0.3082, "step": 8656 }, { "epoch": 0.4847687311009072, "grad_norm": 1.1531341075897217, "learning_rate": 4.326e-05, "loss": 0.4137, "step": 8657 }, { "epoch": 0.4848247284130362, "grad_norm": 1.3665847778320312, "learning_rate": 4.3265e-05, "loss": 0.4001, "step": 8658 }, { "epoch": 0.4848807257251652, "grad_norm": 1.3036551475524902, "learning_rate": 4.327e-05, "loss": 0.4852, "step": 8659 }, { "epoch": 0.48493672303729424, "grad_norm": 1.1637921333312988, "learning_rate": 4.3275e-05, "loss": 0.426, "step": 8660 }, { "epoch": 0.4849927203494232, "grad_norm": 1.3472542762756348, "learning_rate": 4.3280000000000006e-05, "loss": 0.4741, "step": 8661 }, { "epoch": 0.4850487176615522, "grad_norm": 1.2393300533294678, "learning_rate": 4.3285e-05, "loss": 0.317, "step": 8662 }, { "epoch": 0.48510471497368124, "grad_norm": 1.2900378704071045, "learning_rate": 4.329e-05, "loss": 0.4287, "step": 8663 }, { "epoch": 0.48516071228581026, "grad_norm": 1.5040082931518555, "learning_rate": 4.3295000000000005e-05, "loss": 0.4619, "step": 8664 }, { "epoch": 0.4852167095979393, "grad_norm": 1.1562973260879517, "learning_rate": 4.33e-05, "loss": 0.4546, "step": 8665 }, { "epoch": 0.4852727069100683, "grad_norm": 1.3024035692214966, "learning_rate": 4.3305e-05, "loss": 0.3741, "step": 8666 }, { "epoch": 0.4853287042221973, "grad_norm": 2.070420742034912, "learning_rate": 4.3310000000000004e-05, "loss": 0.4455, "step": 8667 }, { "epoch": 0.48538470153432633, "grad_norm": 1.1950358152389526, "learning_rate": 4.3315e-05, "loss": 0.4381, "step": 8668 }, { "epoch": 0.48544069884645535, "grad_norm": 1.0591496229171753, "learning_rate": 4.332e-05, "loss": 0.3745, "step": 8669 }, { "epoch": 0.4854966961585844, "grad_norm": 1.0986610651016235, "learning_rate": 4.3325e-05, "loss": 0.4661, "step": 8670 }, { "epoch": 0.4855526934707134, "grad_norm": 1.3725085258483887, "learning_rate": 4.333000000000001e-05, "loss": 0.5387, "step": 8671 }, { "epoch": 0.4856086907828424, "grad_norm": 1.3331035375595093, "learning_rate": 4.3335000000000004e-05, "loss": 0.445, "step": 8672 }, { "epoch": 0.48566468809497143, "grad_norm": 1.120967984199524, "learning_rate": 4.334e-05, "loss": 0.3554, "step": 8673 }, { "epoch": 0.48572068540710045, "grad_norm": 1.117554783821106, "learning_rate": 4.3345e-05, "loss": 0.3826, "step": 8674 }, { "epoch": 0.48577668271922947, "grad_norm": 1.3135879039764404, "learning_rate": 4.335e-05, "loss": 0.4429, "step": 8675 }, { "epoch": 0.4858326800313585, "grad_norm": 1.6566495895385742, "learning_rate": 4.3355e-05, "loss": 0.3585, "step": 8676 }, { "epoch": 0.4858886773434875, "grad_norm": 1.137291431427002, "learning_rate": 4.336e-05, "loss": 0.3587, "step": 8677 }, { "epoch": 0.4859446746556165, "grad_norm": 7.002615928649902, "learning_rate": 4.3365e-05, "loss": 0.3849, "step": 8678 }, { "epoch": 0.48600067196774555, "grad_norm": 1.2987697124481201, "learning_rate": 4.337e-05, "loss": 0.5691, "step": 8679 }, { "epoch": 0.48605666927987456, "grad_norm": 1.201171636581421, "learning_rate": 4.3375000000000004e-05, "loss": 0.3965, "step": 8680 }, { "epoch": 0.4861126665920036, "grad_norm": 1.2575432062149048, "learning_rate": 4.338e-05, "loss": 0.4293, "step": 8681 }, { "epoch": 0.4861686639041326, "grad_norm": 1.2194340229034424, "learning_rate": 4.3385000000000006e-05, "loss": 0.4773, "step": 8682 }, { "epoch": 0.4862246612162616, "grad_norm": 1.40916907787323, "learning_rate": 4.339e-05, "loss": 0.6009, "step": 8683 }, { "epoch": 0.48628065852839064, "grad_norm": 1.3053512573242188, "learning_rate": 4.3395e-05, "loss": 0.4454, "step": 8684 }, { "epoch": 0.48633665584051966, "grad_norm": 1.3352710008621216, "learning_rate": 4.3400000000000005e-05, "loss": 0.422, "step": 8685 }, { "epoch": 0.4863926531526487, "grad_norm": 1.613464117050171, "learning_rate": 4.3405e-05, "loss": 0.461, "step": 8686 }, { "epoch": 0.4864486504647777, "grad_norm": 1.0982309579849243, "learning_rate": 4.341e-05, "loss": 0.3205, "step": 8687 }, { "epoch": 0.4865046477769067, "grad_norm": 1.5111366510391235, "learning_rate": 4.3415e-05, "loss": 0.4938, "step": 8688 }, { "epoch": 0.48656064508903574, "grad_norm": 1.0738887786865234, "learning_rate": 4.342e-05, "loss": 0.4375, "step": 8689 }, { "epoch": 0.48661664240116476, "grad_norm": 1.1899917125701904, "learning_rate": 4.3425000000000005e-05, "loss": 0.3337, "step": 8690 }, { "epoch": 0.4866726397132938, "grad_norm": 1.386510968208313, "learning_rate": 4.343e-05, "loss": 0.4249, "step": 8691 }, { "epoch": 0.4867286370254228, "grad_norm": 1.2535392045974731, "learning_rate": 4.343500000000001e-05, "loss": 0.4321, "step": 8692 }, { "epoch": 0.4867846343375518, "grad_norm": 1.122766137123108, "learning_rate": 4.3440000000000004e-05, "loss": 0.4399, "step": 8693 }, { "epoch": 0.48684063164968083, "grad_norm": 1.240119218826294, "learning_rate": 4.3445e-05, "loss": 0.533, "step": 8694 }, { "epoch": 0.48689662896180985, "grad_norm": 1.1166800260543823, "learning_rate": 4.345e-05, "loss": 0.4065, "step": 8695 }, { "epoch": 0.48695262627393887, "grad_norm": 1.2940329313278198, "learning_rate": 4.3455e-05, "loss": 0.4668, "step": 8696 }, { "epoch": 0.4870086235860679, "grad_norm": 1.1920511722564697, "learning_rate": 4.346e-05, "loss": 0.4026, "step": 8697 }, { "epoch": 0.4870646208981969, "grad_norm": 1.1582311391830444, "learning_rate": 4.3465e-05, "loss": 0.5435, "step": 8698 }, { "epoch": 0.48712061821032593, "grad_norm": 1.4122322797775269, "learning_rate": 4.347e-05, "loss": 0.5706, "step": 8699 }, { "epoch": 0.48717661552245495, "grad_norm": 1.1645272970199585, "learning_rate": 4.3475000000000006e-05, "loss": 0.4621, "step": 8700 }, { "epoch": 0.48723261283458397, "grad_norm": 1.3535151481628418, "learning_rate": 4.3480000000000004e-05, "loss": 0.39, "step": 8701 }, { "epoch": 0.48728861014671293, "grad_norm": 1.1694250106811523, "learning_rate": 4.3485e-05, "loss": 0.4385, "step": 8702 }, { "epoch": 0.48734460745884195, "grad_norm": 1.1841671466827393, "learning_rate": 4.3490000000000005e-05, "loss": 0.5778, "step": 8703 }, { "epoch": 0.48740060477097097, "grad_norm": 1.0413399934768677, "learning_rate": 4.3495e-05, "loss": 0.4254, "step": 8704 }, { "epoch": 0.4874566020831, "grad_norm": 1.1953142881393433, "learning_rate": 4.35e-05, "loss": 0.4206, "step": 8705 }, { "epoch": 0.487512599395229, "grad_norm": 1.3137667179107666, "learning_rate": 4.3505000000000004e-05, "loss": 0.4972, "step": 8706 }, { "epoch": 0.487568596707358, "grad_norm": 1.1413288116455078, "learning_rate": 4.351e-05, "loss": 0.3851, "step": 8707 }, { "epoch": 0.48762459401948705, "grad_norm": 1.2208783626556396, "learning_rate": 4.3515e-05, "loss": 0.4572, "step": 8708 }, { "epoch": 0.48768059133161606, "grad_norm": 1.3009867668151855, "learning_rate": 4.352e-05, "loss": 0.441, "step": 8709 }, { "epoch": 0.4877365886437451, "grad_norm": 1.4197602272033691, "learning_rate": 4.352500000000001e-05, "loss": 0.4225, "step": 8710 }, { "epoch": 0.4877925859558741, "grad_norm": 1.5292143821716309, "learning_rate": 4.3530000000000005e-05, "loss": 0.4581, "step": 8711 }, { "epoch": 0.4878485832680031, "grad_norm": 1.1709518432617188, "learning_rate": 4.3535e-05, "loss": 0.308, "step": 8712 }, { "epoch": 0.48790458058013214, "grad_norm": 1.5590686798095703, "learning_rate": 4.354e-05, "loss": 0.351, "step": 8713 }, { "epoch": 0.48796057789226116, "grad_norm": 1.4348865747451782, "learning_rate": 4.3545000000000004e-05, "loss": 0.5042, "step": 8714 }, { "epoch": 0.4880165752043902, "grad_norm": 1.520550012588501, "learning_rate": 4.355e-05, "loss": 0.4064, "step": 8715 }, { "epoch": 0.4880725725165192, "grad_norm": 1.2306742668151855, "learning_rate": 4.3555e-05, "loss": 0.4183, "step": 8716 }, { "epoch": 0.4881285698286482, "grad_norm": 1.1449452638626099, "learning_rate": 4.356e-05, "loss": 0.4367, "step": 8717 }, { "epoch": 0.48818456714077724, "grad_norm": 1.4547386169433594, "learning_rate": 4.3565e-05, "loss": 0.3432, "step": 8718 }, { "epoch": 0.48824056445290626, "grad_norm": 1.3006969690322876, "learning_rate": 4.357e-05, "loss": 0.442, "step": 8719 }, { "epoch": 0.4882965617650353, "grad_norm": 1.424604058265686, "learning_rate": 4.3575e-05, "loss": 0.6239, "step": 8720 }, { "epoch": 0.4883525590771643, "grad_norm": 1.3461222648620605, "learning_rate": 4.3580000000000006e-05, "loss": 0.3196, "step": 8721 }, { "epoch": 0.4884085563892933, "grad_norm": 1.2990061044692993, "learning_rate": 4.3585000000000004e-05, "loss": 0.4009, "step": 8722 }, { "epoch": 0.48846455370142233, "grad_norm": 1.274626612663269, "learning_rate": 4.359e-05, "loss": 0.4871, "step": 8723 }, { "epoch": 0.48852055101355135, "grad_norm": 1.3779020309448242, "learning_rate": 4.3595000000000005e-05, "loss": 0.4717, "step": 8724 }, { "epoch": 0.48857654832568037, "grad_norm": 1.209932804107666, "learning_rate": 4.36e-05, "loss": 0.4969, "step": 8725 }, { "epoch": 0.4886325456378094, "grad_norm": 1.4006141424179077, "learning_rate": 4.3605e-05, "loss": 0.5306, "step": 8726 }, { "epoch": 0.4886885429499384, "grad_norm": 1.6883563995361328, "learning_rate": 4.361e-05, "loss": 0.4729, "step": 8727 }, { "epoch": 0.48874454026206743, "grad_norm": 1.2552179098129272, "learning_rate": 4.3615e-05, "loss": 0.5425, "step": 8728 }, { "epoch": 0.48880053757419645, "grad_norm": 1.6334960460662842, "learning_rate": 4.362e-05, "loss": 0.3721, "step": 8729 }, { "epoch": 0.48885653488632547, "grad_norm": 1.4374152421951294, "learning_rate": 4.3625e-05, "loss": 0.3604, "step": 8730 }, { "epoch": 0.4889125321984545, "grad_norm": 1.3681845664978027, "learning_rate": 4.363000000000001e-05, "loss": 0.4152, "step": 8731 }, { "epoch": 0.4889685295105835, "grad_norm": 1.2419909238815308, "learning_rate": 4.3635000000000005e-05, "loss": 0.4418, "step": 8732 }, { "epoch": 0.4890245268227125, "grad_norm": 1.1012789011001587, "learning_rate": 4.364e-05, "loss": 0.3571, "step": 8733 }, { "epoch": 0.48908052413484154, "grad_norm": 1.400465965270996, "learning_rate": 4.3645e-05, "loss": 0.4592, "step": 8734 }, { "epoch": 0.48913652144697056, "grad_norm": 1.4451253414154053, "learning_rate": 4.3650000000000004e-05, "loss": 0.5828, "step": 8735 }, { "epoch": 0.4891925187590996, "grad_norm": 1.1183686256408691, "learning_rate": 4.3655e-05, "loss": 0.3976, "step": 8736 }, { "epoch": 0.4892485160712286, "grad_norm": 1.1781045198440552, "learning_rate": 4.366e-05, "loss": 0.3226, "step": 8737 }, { "epoch": 0.4893045133833576, "grad_norm": 1.074252963066101, "learning_rate": 4.3665e-05, "loss": 0.428, "step": 8738 }, { "epoch": 0.48936051069548664, "grad_norm": 1.3820005655288696, "learning_rate": 4.367e-05, "loss": 0.4251, "step": 8739 }, { "epoch": 0.48941650800761566, "grad_norm": 1.3468071222305298, "learning_rate": 4.3675000000000005e-05, "loss": 0.4603, "step": 8740 }, { "epoch": 0.4894725053197447, "grad_norm": 1.370620846748352, "learning_rate": 4.368e-05, "loss": 0.4452, "step": 8741 }, { "epoch": 0.4895285026318737, "grad_norm": 1.138685703277588, "learning_rate": 4.3685000000000006e-05, "loss": 0.419, "step": 8742 }, { "epoch": 0.48958449994400266, "grad_norm": 1.067796230316162, "learning_rate": 4.3690000000000004e-05, "loss": 0.4796, "step": 8743 }, { "epoch": 0.4896404972561317, "grad_norm": 1.22769033908844, "learning_rate": 4.3695e-05, "loss": 0.3606, "step": 8744 }, { "epoch": 0.4896964945682607, "grad_norm": 1.3409802913665771, "learning_rate": 4.3700000000000005e-05, "loss": 0.3688, "step": 8745 }, { "epoch": 0.4897524918803897, "grad_norm": 1.0355149507522583, "learning_rate": 4.3705e-05, "loss": 0.4116, "step": 8746 }, { "epoch": 0.48980848919251874, "grad_norm": 1.2071590423583984, "learning_rate": 4.371e-05, "loss": 0.3694, "step": 8747 }, { "epoch": 0.48986448650464776, "grad_norm": 1.3578791618347168, "learning_rate": 4.3715e-05, "loss": 0.5592, "step": 8748 }, { "epoch": 0.4899204838167768, "grad_norm": 1.3043264150619507, "learning_rate": 4.372e-05, "loss": 0.4845, "step": 8749 }, { "epoch": 0.4899764811289058, "grad_norm": 1.3237848281860352, "learning_rate": 4.3725000000000006e-05, "loss": 0.4548, "step": 8750 }, { "epoch": 0.4900324784410348, "grad_norm": 1.137434959411621, "learning_rate": 4.373e-05, "loss": 0.4235, "step": 8751 }, { "epoch": 0.49008847575316383, "grad_norm": 1.2952182292938232, "learning_rate": 4.3735e-05, "loss": 0.4302, "step": 8752 }, { "epoch": 0.49014447306529285, "grad_norm": 1.2217761278152466, "learning_rate": 4.3740000000000005e-05, "loss": 0.4094, "step": 8753 }, { "epoch": 0.49020047037742187, "grad_norm": 1.3580907583236694, "learning_rate": 4.3745e-05, "loss": 0.4641, "step": 8754 }, { "epoch": 0.4902564676895509, "grad_norm": 1.3834844827651978, "learning_rate": 4.375e-05, "loss": 0.5332, "step": 8755 }, { "epoch": 0.4903124650016799, "grad_norm": 1.376662254333496, "learning_rate": 4.3755000000000004e-05, "loss": 0.4227, "step": 8756 }, { "epoch": 0.49036846231380893, "grad_norm": 1.4165754318237305, "learning_rate": 4.376e-05, "loss": 0.5348, "step": 8757 }, { "epoch": 0.49042445962593795, "grad_norm": 0.9695160388946533, "learning_rate": 4.3765e-05, "loss": 0.294, "step": 8758 }, { "epoch": 0.49048045693806697, "grad_norm": 1.2843620777130127, "learning_rate": 4.377e-05, "loss": 0.5706, "step": 8759 }, { "epoch": 0.490536454250196, "grad_norm": 1.5324783325195312, "learning_rate": 4.3775e-05, "loss": 0.4855, "step": 8760 }, { "epoch": 0.490592451562325, "grad_norm": 1.4297372102737427, "learning_rate": 4.3780000000000004e-05, "loss": 0.458, "step": 8761 }, { "epoch": 0.490648448874454, "grad_norm": 1.6255004405975342, "learning_rate": 4.3785e-05, "loss": 0.4558, "step": 8762 }, { "epoch": 0.49070444618658304, "grad_norm": 1.2813847064971924, "learning_rate": 4.3790000000000006e-05, "loss": 0.3361, "step": 8763 }, { "epoch": 0.49076044349871206, "grad_norm": 1.2937064170837402, "learning_rate": 4.3795e-05, "loss": 0.3665, "step": 8764 }, { "epoch": 0.4908164408108411, "grad_norm": 1.2819708585739136, "learning_rate": 4.38e-05, "loss": 0.3996, "step": 8765 }, { "epoch": 0.4908724381229701, "grad_norm": 1.1545991897583008, "learning_rate": 4.3805000000000005e-05, "loss": 0.3478, "step": 8766 }, { "epoch": 0.4909284354350991, "grad_norm": 1.3991823196411133, "learning_rate": 4.381e-05, "loss": 0.4191, "step": 8767 }, { "epoch": 0.49098443274722814, "grad_norm": 1.263278603553772, "learning_rate": 4.3815e-05, "loss": 0.384, "step": 8768 }, { "epoch": 0.49104043005935716, "grad_norm": 1.6084866523742676, "learning_rate": 4.382e-05, "loss": 0.353, "step": 8769 }, { "epoch": 0.4910964273714862, "grad_norm": 1.383324146270752, "learning_rate": 4.3825e-05, "loss": 0.4827, "step": 8770 }, { "epoch": 0.4911524246836152, "grad_norm": 1.567399501800537, "learning_rate": 4.3830000000000006e-05, "loss": 0.4992, "step": 8771 }, { "epoch": 0.4912084219957442, "grad_norm": 1.2193971872329712, "learning_rate": 4.3835e-05, "loss": 0.382, "step": 8772 }, { "epoch": 0.49126441930787323, "grad_norm": 1.2334585189819336, "learning_rate": 4.384e-05, "loss": 0.3591, "step": 8773 }, { "epoch": 0.49132041662000225, "grad_norm": 1.1027566194534302, "learning_rate": 4.3845000000000005e-05, "loss": 0.3656, "step": 8774 }, { "epoch": 0.4913764139321313, "grad_norm": 1.3041020631790161, "learning_rate": 4.385e-05, "loss": 0.4085, "step": 8775 }, { "epoch": 0.4914324112442603, "grad_norm": 1.1698496341705322, "learning_rate": 4.3855e-05, "loss": 0.3652, "step": 8776 }, { "epoch": 0.4914884085563893, "grad_norm": 1.3277337551116943, "learning_rate": 4.3860000000000004e-05, "loss": 0.4438, "step": 8777 }, { "epoch": 0.49154440586851833, "grad_norm": 1.3195639848709106, "learning_rate": 4.3865e-05, "loss": 0.4391, "step": 8778 }, { "epoch": 0.49160040318064735, "grad_norm": 1.4638186693191528, "learning_rate": 4.387e-05, "loss": 0.449, "step": 8779 }, { "epoch": 0.49165640049277637, "grad_norm": 1.458646535873413, "learning_rate": 4.3875e-05, "loss": 0.3749, "step": 8780 }, { "epoch": 0.4917123978049054, "grad_norm": 1.220596194267273, "learning_rate": 4.388000000000001e-05, "loss": 0.4252, "step": 8781 }, { "epoch": 0.4917683951170344, "grad_norm": 1.456165075302124, "learning_rate": 4.3885000000000004e-05, "loss": 0.4328, "step": 8782 }, { "epoch": 0.4918243924291634, "grad_norm": 2.3613314628601074, "learning_rate": 4.389e-05, "loss": 0.6209, "step": 8783 }, { "epoch": 0.49188038974129245, "grad_norm": 1.0482722520828247, "learning_rate": 4.3895000000000006e-05, "loss": 0.4775, "step": 8784 }, { "epoch": 0.4919363870534214, "grad_norm": 1.2302495241165161, "learning_rate": 4.39e-05, "loss": 0.4245, "step": 8785 }, { "epoch": 0.49199238436555043, "grad_norm": 1.2513916492462158, "learning_rate": 4.3905e-05, "loss": 0.4248, "step": 8786 }, { "epoch": 0.49204838167767945, "grad_norm": 1.495124340057373, "learning_rate": 4.391e-05, "loss": 0.481, "step": 8787 }, { "epoch": 0.49210437898980847, "grad_norm": 1.1340218782424927, "learning_rate": 4.3915e-05, "loss": 0.4743, "step": 8788 }, { "epoch": 0.4921603763019375, "grad_norm": 1.4609558582305908, "learning_rate": 4.392e-05, "loss": 0.4757, "step": 8789 }, { "epoch": 0.4922163736140665, "grad_norm": 1.3570151329040527, "learning_rate": 4.3925e-05, "loss": 0.485, "step": 8790 }, { "epoch": 0.4922723709261955, "grad_norm": 1.118835687637329, "learning_rate": 4.393e-05, "loss": 0.376, "step": 8791 }, { "epoch": 0.49232836823832454, "grad_norm": 1.5721051692962646, "learning_rate": 4.3935000000000005e-05, "loss": 0.5853, "step": 8792 }, { "epoch": 0.49238436555045356, "grad_norm": 1.169965147972107, "learning_rate": 4.394e-05, "loss": 0.4243, "step": 8793 }, { "epoch": 0.4924403628625826, "grad_norm": 1.2264182567596436, "learning_rate": 4.3945e-05, "loss": 0.4102, "step": 8794 }, { "epoch": 0.4924963601747116, "grad_norm": 0.9702182412147522, "learning_rate": 4.3950000000000004e-05, "loss": 0.3123, "step": 8795 }, { "epoch": 0.4925523574868406, "grad_norm": 1.12941575050354, "learning_rate": 4.3955e-05, "loss": 0.3943, "step": 8796 }, { "epoch": 0.49260835479896964, "grad_norm": 1.5503352880477905, "learning_rate": 4.396e-05, "loss": 0.4887, "step": 8797 }, { "epoch": 0.49266435211109866, "grad_norm": 1.2890199422836304, "learning_rate": 4.3965000000000003e-05, "loss": 0.42, "step": 8798 }, { "epoch": 0.4927203494232277, "grad_norm": 1.1840254068374634, "learning_rate": 4.397e-05, "loss": 0.3709, "step": 8799 }, { "epoch": 0.4927763467353567, "grad_norm": 1.4131531715393066, "learning_rate": 4.3975e-05, "loss": 0.6136, "step": 8800 }, { "epoch": 0.4928323440474857, "grad_norm": 1.085906744003296, "learning_rate": 4.398e-05, "loss": 0.3824, "step": 8801 }, { "epoch": 0.49288834135961473, "grad_norm": 1.2747128009796143, "learning_rate": 4.398500000000001e-05, "loss": 0.3949, "step": 8802 }, { "epoch": 0.49294433867174375, "grad_norm": 1.0494940280914307, "learning_rate": 4.3990000000000004e-05, "loss": 0.3932, "step": 8803 }, { "epoch": 0.4930003359838728, "grad_norm": 1.1495386362075806, "learning_rate": 4.3995e-05, "loss": 0.3146, "step": 8804 }, { "epoch": 0.4930563332960018, "grad_norm": 1.4327843189239502, "learning_rate": 4.4000000000000006e-05, "loss": 0.4102, "step": 8805 }, { "epoch": 0.4931123306081308, "grad_norm": 1.2018320560455322, "learning_rate": 4.4005e-05, "loss": 0.3846, "step": 8806 }, { "epoch": 0.49316832792025983, "grad_norm": 1.2860138416290283, "learning_rate": 4.401e-05, "loss": 0.4058, "step": 8807 }, { "epoch": 0.49322432523238885, "grad_norm": 1.254707932472229, "learning_rate": 4.4015e-05, "loss": 0.3821, "step": 8808 }, { "epoch": 0.49328032254451787, "grad_norm": 1.2617658376693726, "learning_rate": 4.402e-05, "loss": 0.4053, "step": 8809 }, { "epoch": 0.4933363198566469, "grad_norm": 1.1106071472167969, "learning_rate": 4.4025e-05, "loss": 0.319, "step": 8810 }, { "epoch": 0.4933923171687759, "grad_norm": 1.0793672800064087, "learning_rate": 4.4030000000000004e-05, "loss": 0.3452, "step": 8811 }, { "epoch": 0.4934483144809049, "grad_norm": 1.3354262113571167, "learning_rate": 4.4035e-05, "loss": 0.4187, "step": 8812 }, { "epoch": 0.49350431179303395, "grad_norm": 1.1656129360198975, "learning_rate": 4.4040000000000005e-05, "loss": 0.4096, "step": 8813 }, { "epoch": 0.49356030910516296, "grad_norm": 1.3050516843795776, "learning_rate": 4.4045e-05, "loss": 0.402, "step": 8814 }, { "epoch": 0.493616306417292, "grad_norm": 1.3620717525482178, "learning_rate": 4.405e-05, "loss": 0.4883, "step": 8815 }, { "epoch": 0.493672303729421, "grad_norm": 1.1378008127212524, "learning_rate": 4.4055000000000004e-05, "loss": 0.3795, "step": 8816 }, { "epoch": 0.49372830104155, "grad_norm": 1.1346876621246338, "learning_rate": 4.406e-05, "loss": 0.3449, "step": 8817 }, { "epoch": 0.49378429835367904, "grad_norm": 1.2142455577850342, "learning_rate": 4.4065e-05, "loss": 0.4761, "step": 8818 }, { "epoch": 0.49384029566580806, "grad_norm": 1.2473500967025757, "learning_rate": 4.407e-05, "loss": 0.5042, "step": 8819 }, { "epoch": 0.4938962929779371, "grad_norm": 1.159977912902832, "learning_rate": 4.4075e-05, "loss": 0.3856, "step": 8820 }, { "epoch": 0.4939522902900661, "grad_norm": 1.3137444257736206, "learning_rate": 4.4080000000000005e-05, "loss": 0.4382, "step": 8821 }, { "epoch": 0.4940082876021951, "grad_norm": 1.673619270324707, "learning_rate": 4.4085e-05, "loss": 0.5157, "step": 8822 }, { "epoch": 0.49406428491432414, "grad_norm": 1.3758738040924072, "learning_rate": 4.4090000000000006e-05, "loss": 0.5904, "step": 8823 }, { "epoch": 0.49412028222645316, "grad_norm": 1.015759825706482, "learning_rate": 4.4095000000000004e-05, "loss": 0.331, "step": 8824 }, { "epoch": 0.4941762795385822, "grad_norm": 1.161116600036621, "learning_rate": 4.41e-05, "loss": 0.2972, "step": 8825 }, { "epoch": 0.49423227685071114, "grad_norm": 1.3757725954055786, "learning_rate": 4.4105e-05, "loss": 0.3803, "step": 8826 }, { "epoch": 0.49428827416284016, "grad_norm": 1.5430757999420166, "learning_rate": 4.411e-05, "loss": 0.6873, "step": 8827 }, { "epoch": 0.4943442714749692, "grad_norm": 1.497576117515564, "learning_rate": 4.4115e-05, "loss": 0.5016, "step": 8828 }, { "epoch": 0.4944002687870982, "grad_norm": 1.2487151622772217, "learning_rate": 4.412e-05, "loss": 0.4236, "step": 8829 }, { "epoch": 0.4944562660992272, "grad_norm": 1.313583254814148, "learning_rate": 4.4125e-05, "loss": 0.6077, "step": 8830 }, { "epoch": 0.49451226341135623, "grad_norm": 1.2831478118896484, "learning_rate": 4.4130000000000006e-05, "loss": 0.4209, "step": 8831 }, { "epoch": 0.49456826072348525, "grad_norm": 1.4325164556503296, "learning_rate": 4.4135000000000003e-05, "loss": 0.44, "step": 8832 }, { "epoch": 0.4946242580356143, "grad_norm": 1.1198872327804565, "learning_rate": 4.414e-05, "loss": 0.451, "step": 8833 }, { "epoch": 0.4946802553477433, "grad_norm": 1.1292073726654053, "learning_rate": 4.4145000000000005e-05, "loss": 0.4119, "step": 8834 }, { "epoch": 0.4947362526598723, "grad_norm": 1.7890433073043823, "learning_rate": 4.415e-05, "loss": 0.3996, "step": 8835 }, { "epoch": 0.49479224997200133, "grad_norm": 1.0275719165802002, "learning_rate": 4.4155e-05, "loss": 0.3757, "step": 8836 }, { "epoch": 0.49484824728413035, "grad_norm": 1.1006819009780884, "learning_rate": 4.4160000000000004e-05, "loss": 0.4615, "step": 8837 }, { "epoch": 0.49490424459625937, "grad_norm": 1.203513503074646, "learning_rate": 4.4165e-05, "loss": 0.3168, "step": 8838 }, { "epoch": 0.4949602419083884, "grad_norm": 1.2412500381469727, "learning_rate": 4.417e-05, "loss": 0.4583, "step": 8839 }, { "epoch": 0.4950162392205174, "grad_norm": 1.6406822204589844, "learning_rate": 4.4174999999999996e-05, "loss": 0.5096, "step": 8840 }, { "epoch": 0.4950722365326464, "grad_norm": 1.2065621614456177, "learning_rate": 4.418000000000001e-05, "loss": 0.4388, "step": 8841 }, { "epoch": 0.49512823384477544, "grad_norm": 1.75081205368042, "learning_rate": 4.4185000000000005e-05, "loss": 0.528, "step": 8842 }, { "epoch": 0.49518423115690446, "grad_norm": 1.1402934789657593, "learning_rate": 4.419e-05, "loss": 0.389, "step": 8843 }, { "epoch": 0.4952402284690335, "grad_norm": 1.4472622871398926, "learning_rate": 4.4195000000000006e-05, "loss": 0.526, "step": 8844 }, { "epoch": 0.4952962257811625, "grad_norm": 1.3543546199798584, "learning_rate": 4.4200000000000004e-05, "loss": 0.4009, "step": 8845 }, { "epoch": 0.4953522230932915, "grad_norm": 1.2927943468093872, "learning_rate": 4.4205e-05, "loss": 0.4673, "step": 8846 }, { "epoch": 0.49540822040542054, "grad_norm": 1.1858797073364258, "learning_rate": 4.421e-05, "loss": 0.4821, "step": 8847 }, { "epoch": 0.49546421771754956, "grad_norm": 1.1288942098617554, "learning_rate": 4.4215e-05, "loss": 0.3303, "step": 8848 }, { "epoch": 0.4955202150296786, "grad_norm": 1.1077618598937988, "learning_rate": 4.422e-05, "loss": 0.3461, "step": 8849 }, { "epoch": 0.4955762123418076, "grad_norm": 0.9944883584976196, "learning_rate": 4.4225e-05, "loss": 0.3039, "step": 8850 }, { "epoch": 0.4956322096539366, "grad_norm": 1.2238086462020874, "learning_rate": 4.423e-05, "loss": 0.4426, "step": 8851 }, { "epoch": 0.49568820696606564, "grad_norm": 1.333704948425293, "learning_rate": 4.4235000000000006e-05, "loss": 0.5154, "step": 8852 }, { "epoch": 0.49574420427819466, "grad_norm": 1.2379481792449951, "learning_rate": 4.424e-05, "loss": 0.4862, "step": 8853 }, { "epoch": 0.4958002015903237, "grad_norm": 1.1883821487426758, "learning_rate": 4.4245e-05, "loss": 0.3883, "step": 8854 }, { "epoch": 0.4958561989024527, "grad_norm": 1.4488531351089478, "learning_rate": 4.4250000000000005e-05, "loss": 0.4037, "step": 8855 }, { "epoch": 0.4959121962145817, "grad_norm": 1.6411842107772827, "learning_rate": 4.4255e-05, "loss": 0.484, "step": 8856 }, { "epoch": 0.49596819352671073, "grad_norm": 1.5303682088851929, "learning_rate": 4.426e-05, "loss": 0.5324, "step": 8857 }, { "epoch": 0.49602419083883975, "grad_norm": 1.2763642072677612, "learning_rate": 4.4265000000000004e-05, "loss": 0.4157, "step": 8858 }, { "epoch": 0.49608018815096877, "grad_norm": 1.1704176664352417, "learning_rate": 4.427e-05, "loss": 0.4128, "step": 8859 }, { "epoch": 0.4961361854630978, "grad_norm": 1.387826681137085, "learning_rate": 4.4275e-05, "loss": 0.5283, "step": 8860 }, { "epoch": 0.4961921827752268, "grad_norm": 1.2293310165405273, "learning_rate": 4.428e-05, "loss": 0.548, "step": 8861 }, { "epoch": 0.49624818008735583, "grad_norm": 1.2959572076797485, "learning_rate": 4.428500000000001e-05, "loss": 0.4113, "step": 8862 }, { "epoch": 0.49630417739948485, "grad_norm": 1.3615657091140747, "learning_rate": 4.4290000000000005e-05, "loss": 0.4515, "step": 8863 }, { "epoch": 0.49636017471161387, "grad_norm": 1.145766019821167, "learning_rate": 4.4295e-05, "loss": 0.383, "step": 8864 }, { "epoch": 0.4964161720237429, "grad_norm": 1.296778678894043, "learning_rate": 4.43e-05, "loss": 0.4408, "step": 8865 }, { "epoch": 0.4964721693358719, "grad_norm": 1.1632485389709473, "learning_rate": 4.4305000000000004e-05, "loss": 0.4587, "step": 8866 }, { "epoch": 0.49652816664800087, "grad_norm": 1.2473266124725342, "learning_rate": 4.431e-05, "loss": 0.4444, "step": 8867 }, { "epoch": 0.4965841639601299, "grad_norm": 1.2199045419692993, "learning_rate": 4.4315e-05, "loss": 0.3768, "step": 8868 }, { "epoch": 0.4966401612722589, "grad_norm": 1.3166041374206543, "learning_rate": 4.432e-05, "loss": 0.4984, "step": 8869 }, { "epoch": 0.4966961585843879, "grad_norm": 1.2682785987854004, "learning_rate": 4.4325e-05, "loss": 0.461, "step": 8870 }, { "epoch": 0.49675215589651694, "grad_norm": 1.5442527532577515, "learning_rate": 4.4330000000000004e-05, "loss": 0.44, "step": 8871 }, { "epoch": 0.49680815320864596, "grad_norm": 1.106680154800415, "learning_rate": 4.4335e-05, "loss": 0.3382, "step": 8872 }, { "epoch": 0.496864150520775, "grad_norm": 1.2025519609451294, "learning_rate": 4.4340000000000006e-05, "loss": 0.4117, "step": 8873 }, { "epoch": 0.496920147832904, "grad_norm": 1.3306968212127686, "learning_rate": 4.4345e-05, "loss": 0.4045, "step": 8874 }, { "epoch": 0.496976145145033, "grad_norm": 1.4084997177124023, "learning_rate": 4.435e-05, "loss": 0.7849, "step": 8875 }, { "epoch": 0.49703214245716204, "grad_norm": 1.4674460887908936, "learning_rate": 4.4355000000000005e-05, "loss": 0.4156, "step": 8876 }, { "epoch": 0.49708813976929106, "grad_norm": 1.2347136735916138, "learning_rate": 4.436e-05, "loss": 0.3831, "step": 8877 }, { "epoch": 0.4971441370814201, "grad_norm": 1.287600040435791, "learning_rate": 4.4365e-05, "loss": 0.3978, "step": 8878 }, { "epoch": 0.4972001343935491, "grad_norm": 1.413364052772522, "learning_rate": 4.4370000000000004e-05, "loss": 0.3869, "step": 8879 }, { "epoch": 0.4972561317056781, "grad_norm": 1.6367324590682983, "learning_rate": 4.4375e-05, "loss": 0.4517, "step": 8880 }, { "epoch": 0.49731212901780714, "grad_norm": 1.4247015714645386, "learning_rate": 4.438e-05, "loss": 0.4257, "step": 8881 }, { "epoch": 0.49736812632993616, "grad_norm": 1.250550627708435, "learning_rate": 4.4385e-05, "loss": 0.5311, "step": 8882 }, { "epoch": 0.4974241236420652, "grad_norm": 1.1516586542129517, "learning_rate": 4.439000000000001e-05, "loss": 0.469, "step": 8883 }, { "epoch": 0.4974801209541942, "grad_norm": 1.2464271783828735, "learning_rate": 4.4395000000000004e-05, "loss": 0.5226, "step": 8884 }, { "epoch": 0.4975361182663232, "grad_norm": 1.1471192836761475, "learning_rate": 4.44e-05, "loss": 0.4076, "step": 8885 }, { "epoch": 0.49759211557845223, "grad_norm": 1.119562029838562, "learning_rate": 4.4405e-05, "loss": 0.4719, "step": 8886 }, { "epoch": 0.49764811289058125, "grad_norm": 1.217142105102539, "learning_rate": 4.4410000000000003e-05, "loss": 0.3851, "step": 8887 }, { "epoch": 0.49770411020271027, "grad_norm": 1.3142110109329224, "learning_rate": 4.4415e-05, "loss": 0.5367, "step": 8888 }, { "epoch": 0.4977601075148393, "grad_norm": 1.2821145057678223, "learning_rate": 4.442e-05, "loss": 0.3896, "step": 8889 }, { "epoch": 0.4978161048269683, "grad_norm": 1.1195389032363892, "learning_rate": 4.4425e-05, "loss": 0.3382, "step": 8890 }, { "epoch": 0.49787210213909733, "grad_norm": 1.240980863571167, "learning_rate": 4.443e-05, "loss": 0.5295, "step": 8891 }, { "epoch": 0.49792809945122635, "grad_norm": 1.1518019437789917, "learning_rate": 4.4435000000000004e-05, "loss": 0.3763, "step": 8892 }, { "epoch": 0.49798409676335537, "grad_norm": 1.1410083770751953, "learning_rate": 4.444e-05, "loss": 0.4042, "step": 8893 }, { "epoch": 0.4980400940754844, "grad_norm": 1.1585884094238281, "learning_rate": 4.4445000000000006e-05, "loss": 0.4042, "step": 8894 }, { "epoch": 0.4980960913876134, "grad_norm": 1.2835559844970703, "learning_rate": 4.445e-05, "loss": 0.5014, "step": 8895 }, { "epoch": 0.4981520886997424, "grad_norm": 1.4558626413345337, "learning_rate": 4.4455e-05, "loss": 0.4909, "step": 8896 }, { "epoch": 0.49820808601187144, "grad_norm": 1.0214385986328125, "learning_rate": 4.4460000000000005e-05, "loss": 0.3897, "step": 8897 }, { "epoch": 0.49826408332400046, "grad_norm": 1.1316633224487305, "learning_rate": 4.4465e-05, "loss": 0.4244, "step": 8898 }, { "epoch": 0.4983200806361295, "grad_norm": 1.2841393947601318, "learning_rate": 4.447e-05, "loss": 0.3288, "step": 8899 }, { "epoch": 0.4983760779482585, "grad_norm": 1.366647720336914, "learning_rate": 4.4475e-05, "loss": 0.4425, "step": 8900 }, { "epoch": 0.4984320752603875, "grad_norm": 1.5029202699661255, "learning_rate": 4.448e-05, "loss": 0.3958, "step": 8901 }, { "epoch": 0.49848807257251654, "grad_norm": 1.1440002918243408, "learning_rate": 4.4485000000000005e-05, "loss": 0.4056, "step": 8902 }, { "epoch": 0.49854406988464556, "grad_norm": 1.1052082777023315, "learning_rate": 4.449e-05, "loss": 0.4626, "step": 8903 }, { "epoch": 0.4986000671967746, "grad_norm": 1.1949622631072998, "learning_rate": 4.4495e-05, "loss": 0.3967, "step": 8904 }, { "epoch": 0.4986560645089036, "grad_norm": 1.3871986865997314, "learning_rate": 4.4500000000000004e-05, "loss": 0.4062, "step": 8905 }, { "epoch": 0.4987120618210326, "grad_norm": 1.0678956508636475, "learning_rate": 4.4505e-05, "loss": 0.3173, "step": 8906 }, { "epoch": 0.49876805913316163, "grad_norm": 1.2354152202606201, "learning_rate": 4.451e-05, "loss": 0.426, "step": 8907 }, { "epoch": 0.49882405644529065, "grad_norm": 1.5488899946212769, "learning_rate": 4.4515e-05, "loss": 0.5341, "step": 8908 }, { "epoch": 0.4988800537574196, "grad_norm": 1.7664490938186646, "learning_rate": 4.452e-05, "loss": 0.4924, "step": 8909 }, { "epoch": 0.49893605106954864, "grad_norm": 1.1872198581695557, "learning_rate": 4.4525e-05, "loss": 0.4802, "step": 8910 }, { "epoch": 0.49899204838167766, "grad_norm": 1.6726036071777344, "learning_rate": 4.453e-05, "loss": 0.7142, "step": 8911 }, { "epoch": 0.4990480456938067, "grad_norm": 1.430335521697998, "learning_rate": 4.4535000000000006e-05, "loss": 0.4979, "step": 8912 }, { "epoch": 0.4991040430059357, "grad_norm": 1.2542810440063477, "learning_rate": 4.4540000000000004e-05, "loss": 0.4055, "step": 8913 }, { "epoch": 0.4991600403180647, "grad_norm": 1.1580100059509277, "learning_rate": 4.4545e-05, "loss": 0.4304, "step": 8914 }, { "epoch": 0.49921603763019373, "grad_norm": 1.0954418182373047, "learning_rate": 4.4550000000000005e-05, "loss": 0.3711, "step": 8915 }, { "epoch": 0.49927203494232275, "grad_norm": 1.2141430377960205, "learning_rate": 4.4555e-05, "loss": 0.3588, "step": 8916 }, { "epoch": 0.49932803225445177, "grad_norm": 1.110926628112793, "learning_rate": 4.456e-05, "loss": 0.3741, "step": 8917 }, { "epoch": 0.4993840295665808, "grad_norm": 1.374337077140808, "learning_rate": 4.4565000000000004e-05, "loss": 0.4762, "step": 8918 }, { "epoch": 0.4994400268787098, "grad_norm": 1.0902456045150757, "learning_rate": 4.457e-05, "loss": 0.4118, "step": 8919 }, { "epoch": 0.4994960241908388, "grad_norm": 1.2432687282562256, "learning_rate": 4.4575e-05, "loss": 0.4993, "step": 8920 }, { "epoch": 0.49955202150296785, "grad_norm": 1.3850815296173096, "learning_rate": 4.458e-05, "loss": 0.5616, "step": 8921 }, { "epoch": 0.49960801881509687, "grad_norm": 1.1992584466934204, "learning_rate": 4.458500000000001e-05, "loss": 0.4051, "step": 8922 }, { "epoch": 0.4996640161272259, "grad_norm": 1.339229941368103, "learning_rate": 4.4590000000000005e-05, "loss": 0.5669, "step": 8923 }, { "epoch": 0.4997200134393549, "grad_norm": 1.2795366048812866, "learning_rate": 4.4595e-05, "loss": 0.5839, "step": 8924 }, { "epoch": 0.4997760107514839, "grad_norm": 1.505853295326233, "learning_rate": 4.46e-05, "loss": 0.4764, "step": 8925 }, { "epoch": 0.49983200806361294, "grad_norm": 1.1352922916412354, "learning_rate": 4.4605000000000004e-05, "loss": 0.4109, "step": 8926 }, { "epoch": 0.49988800537574196, "grad_norm": 1.429019570350647, "learning_rate": 4.461e-05, "loss": 0.5747, "step": 8927 }, { "epoch": 0.499944002687871, "grad_norm": 1.4581761360168457, "learning_rate": 4.4615e-05, "loss": 0.4963, "step": 8928 }, { "epoch": 0.5, "grad_norm": 1.2715257406234741, "learning_rate": 4.462e-05, "loss": 0.3885, "step": 8929 }, { "epoch": 0.500055997312129, "grad_norm": 1.158057689666748, "learning_rate": 4.4625e-05, "loss": 0.368, "step": 8930 }, { "epoch": 0.500111994624258, "grad_norm": 1.2566872835159302, "learning_rate": 4.463e-05, "loss": 0.4259, "step": 8931 }, { "epoch": 0.5001679919363871, "grad_norm": 1.050191044807434, "learning_rate": 4.4635e-05, "loss": 0.3771, "step": 8932 }, { "epoch": 0.5002239892485161, "grad_norm": 1.2480908632278442, "learning_rate": 4.4640000000000006e-05, "loss": 0.5151, "step": 8933 }, { "epoch": 0.5002799865606451, "grad_norm": 1.6038448810577393, "learning_rate": 4.4645000000000004e-05, "loss": 0.4815, "step": 8934 }, { "epoch": 0.5003359838727741, "grad_norm": 1.3310796022415161, "learning_rate": 4.465e-05, "loss": 0.4337, "step": 8935 }, { "epoch": 0.5003919811849031, "grad_norm": 1.0335931777954102, "learning_rate": 4.4655000000000005e-05, "loss": 0.3021, "step": 8936 }, { "epoch": 0.5004479784970322, "grad_norm": 1.6768550872802734, "learning_rate": 4.466e-05, "loss": 0.5165, "step": 8937 }, { "epoch": 0.5005039758091612, "grad_norm": 1.3291367292404175, "learning_rate": 4.4665e-05, "loss": 0.3469, "step": 8938 }, { "epoch": 0.5005599731212902, "grad_norm": 1.7011210918426514, "learning_rate": 4.467e-05, "loss": 0.4439, "step": 8939 }, { "epoch": 0.5006159704334192, "grad_norm": 1.314420461654663, "learning_rate": 4.4675e-05, "loss": 0.4314, "step": 8940 }, { "epoch": 0.5006719677455482, "grad_norm": 1.4272866249084473, "learning_rate": 4.468e-05, "loss": 0.6066, "step": 8941 }, { "epoch": 0.5007279650576772, "grad_norm": 1.32636559009552, "learning_rate": 4.4685e-05, "loss": 0.391, "step": 8942 }, { "epoch": 0.5007839623698063, "grad_norm": 1.1752004623413086, "learning_rate": 4.469e-05, "loss": 0.4544, "step": 8943 }, { "epoch": 0.5008399596819353, "grad_norm": 1.1247951984405518, "learning_rate": 4.4695000000000005e-05, "loss": 0.3804, "step": 8944 }, { "epoch": 0.5008959569940643, "grad_norm": 1.4395594596862793, "learning_rate": 4.47e-05, "loss": 0.3673, "step": 8945 }, { "epoch": 0.5009519543061933, "grad_norm": 1.283260464668274, "learning_rate": 4.4705e-05, "loss": 0.4168, "step": 8946 }, { "epoch": 0.5010079516183223, "grad_norm": 1.0718876123428345, "learning_rate": 4.4710000000000004e-05, "loss": 0.4354, "step": 8947 }, { "epoch": 0.5010639489304514, "grad_norm": 1.2211148738861084, "learning_rate": 4.4715e-05, "loss": 0.3799, "step": 8948 }, { "epoch": 0.5011199462425804, "grad_norm": 1.3038597106933594, "learning_rate": 4.472e-05, "loss": 0.3742, "step": 8949 }, { "epoch": 0.5011759435547094, "grad_norm": 1.9161030054092407, "learning_rate": 4.4725e-05, "loss": 0.7407, "step": 8950 }, { "epoch": 0.5012319408668384, "grad_norm": 1.223834753036499, "learning_rate": 4.473e-05, "loss": 0.3828, "step": 8951 }, { "epoch": 0.5012879381789674, "grad_norm": 1.2141587734222412, "learning_rate": 4.4735000000000005e-05, "loss": 0.3771, "step": 8952 }, { "epoch": 0.5013439354910965, "grad_norm": 1.4120736122131348, "learning_rate": 4.474e-05, "loss": 0.5211, "step": 8953 }, { "epoch": 0.5013999328032255, "grad_norm": 1.2435225248336792, "learning_rate": 4.4745000000000006e-05, "loss": 0.3964, "step": 8954 }, { "epoch": 0.5014559301153545, "grad_norm": 1.2978291511535645, "learning_rate": 4.4750000000000004e-05, "loss": 0.3875, "step": 8955 }, { "epoch": 0.5015119274274835, "grad_norm": 1.3040884733200073, "learning_rate": 4.4755e-05, "loss": 0.3999, "step": 8956 }, { "epoch": 0.5015679247396125, "grad_norm": 1.2925035953521729, "learning_rate": 4.4760000000000005e-05, "loss": 0.3641, "step": 8957 }, { "epoch": 0.5016239220517416, "grad_norm": 1.1881881952285767, "learning_rate": 4.4765e-05, "loss": 0.398, "step": 8958 }, { "epoch": 0.5016799193638706, "grad_norm": 1.1945446729660034, "learning_rate": 4.477e-05, "loss": 0.4379, "step": 8959 }, { "epoch": 0.5017359166759996, "grad_norm": 1.4754998683929443, "learning_rate": 4.4775e-05, "loss": 0.4611, "step": 8960 }, { "epoch": 0.5017919139881286, "grad_norm": 1.242138385772705, "learning_rate": 4.478e-05, "loss": 0.344, "step": 8961 }, { "epoch": 0.5018479113002576, "grad_norm": 1.179626226425171, "learning_rate": 4.4785000000000006e-05, "loss": 0.4507, "step": 8962 }, { "epoch": 0.5019039086123867, "grad_norm": 1.34430992603302, "learning_rate": 4.479e-05, "loss": 0.4302, "step": 8963 }, { "epoch": 0.5019599059245157, "grad_norm": 1.2207221984863281, "learning_rate": 4.4795e-05, "loss": 0.4214, "step": 8964 }, { "epoch": 0.5020159032366447, "grad_norm": 1.1519558429718018, "learning_rate": 4.4800000000000005e-05, "loss": 0.3989, "step": 8965 }, { "epoch": 0.5020719005487737, "grad_norm": 1.4847782850265503, "learning_rate": 4.4805e-05, "loss": 0.6974, "step": 8966 }, { "epoch": 0.5021278978609027, "grad_norm": 1.3812850713729858, "learning_rate": 4.481e-05, "loss": 0.5247, "step": 8967 }, { "epoch": 0.5021838951730317, "grad_norm": 1.2806353569030762, "learning_rate": 4.4815000000000004e-05, "loss": 0.4348, "step": 8968 }, { "epoch": 0.5022398924851608, "grad_norm": 1.4079915285110474, "learning_rate": 4.482e-05, "loss": 0.4269, "step": 8969 }, { "epoch": 0.5022958897972898, "grad_norm": 1.0850051641464233, "learning_rate": 4.4825e-05, "loss": 0.336, "step": 8970 }, { "epoch": 0.5023518871094187, "grad_norm": 1.2497671842575073, "learning_rate": 4.483e-05, "loss": 0.4074, "step": 8971 }, { "epoch": 0.5024078844215477, "grad_norm": 1.6500200033187866, "learning_rate": 4.483500000000001e-05, "loss": 0.4611, "step": 8972 }, { "epoch": 0.5024638817336767, "grad_norm": 1.4457597732543945, "learning_rate": 4.4840000000000004e-05, "loss": 0.8361, "step": 8973 }, { "epoch": 0.5025198790458058, "grad_norm": 1.1702678203582764, "learning_rate": 4.4845e-05, "loss": 0.3945, "step": 8974 }, { "epoch": 0.5025758763579348, "grad_norm": 1.2733867168426514, "learning_rate": 4.4850000000000006e-05, "loss": 0.5797, "step": 8975 }, { "epoch": 0.5026318736700638, "grad_norm": 1.2133017778396606, "learning_rate": 4.4855e-05, "loss": 0.3941, "step": 8976 }, { "epoch": 0.5026878709821928, "grad_norm": 1.2491768598556519, "learning_rate": 4.486e-05, "loss": 0.4425, "step": 8977 }, { "epoch": 0.5027438682943218, "grad_norm": 1.3059080839157104, "learning_rate": 4.4865e-05, "loss": 0.473, "step": 8978 }, { "epoch": 0.5027998656064508, "grad_norm": 1.359402060508728, "learning_rate": 4.487e-05, "loss": 0.5198, "step": 8979 }, { "epoch": 0.5028558629185799, "grad_norm": 1.8723503351211548, "learning_rate": 4.4875e-05, "loss": 0.3733, "step": 8980 }, { "epoch": 0.5029118602307089, "grad_norm": 1.2968909740447998, "learning_rate": 4.488e-05, "loss": 0.4656, "step": 8981 }, { "epoch": 0.5029678575428379, "grad_norm": 1.528132677078247, "learning_rate": 4.488500000000001e-05, "loss": 0.5171, "step": 8982 }, { "epoch": 0.5030238548549669, "grad_norm": 1.212577223777771, "learning_rate": 4.4890000000000006e-05, "loss": 0.3623, "step": 8983 }, { "epoch": 0.5030798521670959, "grad_norm": 1.7409963607788086, "learning_rate": 4.4895e-05, "loss": 0.3292, "step": 8984 }, { "epoch": 0.503135849479225, "grad_norm": 1.1049139499664307, "learning_rate": 4.49e-05, "loss": 0.358, "step": 8985 }, { "epoch": 0.503191846791354, "grad_norm": 1.239724040031433, "learning_rate": 4.4905000000000005e-05, "loss": 0.3727, "step": 8986 }, { "epoch": 0.503247844103483, "grad_norm": 1.2785840034484863, "learning_rate": 4.491e-05, "loss": 0.4344, "step": 8987 }, { "epoch": 0.503303841415612, "grad_norm": 1.2609670162200928, "learning_rate": 4.4915e-05, "loss": 0.483, "step": 8988 }, { "epoch": 0.503359838727741, "grad_norm": 1.1001750230789185, "learning_rate": 4.4920000000000004e-05, "loss": 0.3377, "step": 8989 }, { "epoch": 0.5034158360398701, "grad_norm": 1.1395809650421143, "learning_rate": 4.4925e-05, "loss": 0.4317, "step": 8990 }, { "epoch": 0.5034718333519991, "grad_norm": 1.4517383575439453, "learning_rate": 4.493e-05, "loss": 0.399, "step": 8991 }, { "epoch": 0.5035278306641281, "grad_norm": 1.731385350227356, "learning_rate": 4.4935e-05, "loss": 0.6224, "step": 8992 }, { "epoch": 0.5035838279762571, "grad_norm": 1.2057623863220215, "learning_rate": 4.494000000000001e-05, "loss": 0.478, "step": 8993 }, { "epoch": 0.5036398252883861, "grad_norm": 2.52968692779541, "learning_rate": 4.4945000000000004e-05, "loss": 0.5506, "step": 8994 }, { "epoch": 0.5036958226005152, "grad_norm": 1.3260557651519775, "learning_rate": 4.495e-05, "loss": 0.4464, "step": 8995 }, { "epoch": 0.5037518199126442, "grad_norm": 1.177143931388855, "learning_rate": 4.4955000000000006e-05, "loss": 0.405, "step": 8996 }, { "epoch": 0.5038078172247732, "grad_norm": 1.2711893320083618, "learning_rate": 4.496e-05, "loss": 0.4228, "step": 8997 }, { "epoch": 0.5038638145369022, "grad_norm": 1.2666378021240234, "learning_rate": 4.4965e-05, "loss": 0.4483, "step": 8998 }, { "epoch": 0.5039198118490312, "grad_norm": 1.1410467624664307, "learning_rate": 4.497e-05, "loss": 0.3531, "step": 8999 }, { "epoch": 0.5039758091611602, "grad_norm": 1.304840087890625, "learning_rate": 4.4975e-05, "loss": 0.4781, "step": 9000 }, { "epoch": 0.5040318064732893, "grad_norm": 1.2505258321762085, "learning_rate": 4.498e-05, "loss": 0.4548, "step": 9001 }, { "epoch": 0.5040878037854183, "grad_norm": 1.2904996871948242, "learning_rate": 4.4985000000000004e-05, "loss": 0.4078, "step": 9002 }, { "epoch": 0.5041438010975473, "grad_norm": 1.3471643924713135, "learning_rate": 4.499e-05, "loss": 0.6741, "step": 9003 }, { "epoch": 0.5041997984096763, "grad_norm": 1.1887874603271484, "learning_rate": 4.4995000000000005e-05, "loss": 0.4686, "step": 9004 }, { "epoch": 0.5042557957218053, "grad_norm": 1.4074841737747192, "learning_rate": 4.5e-05, "loss": 0.6854, "step": 9005 }, { "epoch": 0.5043117930339344, "grad_norm": 1.458162546157837, "learning_rate": 4.5005e-05, "loss": 0.4305, "step": 9006 }, { "epoch": 0.5043677903460634, "grad_norm": 1.4055298566818237, "learning_rate": 4.5010000000000004e-05, "loss": 0.3209, "step": 9007 }, { "epoch": 0.5044237876581924, "grad_norm": 1.2616369724273682, "learning_rate": 4.5015e-05, "loss": 0.4925, "step": 9008 }, { "epoch": 0.5044797849703214, "grad_norm": 1.2956013679504395, "learning_rate": 4.502e-05, "loss": 0.4253, "step": 9009 }, { "epoch": 0.5045357822824504, "grad_norm": 1.3505840301513672, "learning_rate": 4.5025000000000003e-05, "loss": 0.5589, "step": 9010 }, { "epoch": 0.5045917795945795, "grad_norm": 1.4064513444900513, "learning_rate": 4.503e-05, "loss": 0.5392, "step": 9011 }, { "epoch": 0.5046477769067085, "grad_norm": 2.8027055263519287, "learning_rate": 4.5035e-05, "loss": 0.5942, "step": 9012 }, { "epoch": 0.5047037742188375, "grad_norm": 1.185011863708496, "learning_rate": 4.504e-05, "loss": 0.4601, "step": 9013 }, { "epoch": 0.5047597715309665, "grad_norm": 1.240644931793213, "learning_rate": 4.504500000000001e-05, "loss": 0.4146, "step": 9014 }, { "epoch": 0.5048157688430955, "grad_norm": 1.5560582876205444, "learning_rate": 4.5050000000000004e-05, "loss": 0.4718, "step": 9015 }, { "epoch": 0.5048717661552246, "grad_norm": 1.1165698766708374, "learning_rate": 4.5055e-05, "loss": 0.423, "step": 9016 }, { "epoch": 0.5049277634673536, "grad_norm": 1.8999931812286377, "learning_rate": 4.506e-05, "loss": 0.3922, "step": 9017 }, { "epoch": 0.5049837607794826, "grad_norm": 1.4183663129806519, "learning_rate": 4.5065e-05, "loss": 0.4596, "step": 9018 }, { "epoch": 0.5050397580916116, "grad_norm": 1.1907249689102173, "learning_rate": 4.507e-05, "loss": 0.3696, "step": 9019 }, { "epoch": 0.5050957554037406, "grad_norm": 1.2172504663467407, "learning_rate": 4.5075e-05, "loss": 0.4613, "step": 9020 }, { "epoch": 0.5051517527158697, "grad_norm": 1.169787883758545, "learning_rate": 4.508e-05, "loss": 0.364, "step": 9021 }, { "epoch": 0.5052077500279987, "grad_norm": 1.4569367170333862, "learning_rate": 4.5085e-05, "loss": 0.4964, "step": 9022 }, { "epoch": 0.5052637473401277, "grad_norm": 1.2050981521606445, "learning_rate": 4.5090000000000004e-05, "loss": 0.3919, "step": 9023 }, { "epoch": 0.5053197446522567, "grad_norm": 1.0716274976730347, "learning_rate": 4.5095e-05, "loss": 0.3925, "step": 9024 }, { "epoch": 0.5053757419643857, "grad_norm": 1.2640608549118042, "learning_rate": 4.5100000000000005e-05, "loss": 0.397, "step": 9025 }, { "epoch": 0.5054317392765147, "grad_norm": 1.3346258401870728, "learning_rate": 4.5105e-05, "loss": 0.3929, "step": 9026 }, { "epoch": 0.5054877365886438, "grad_norm": 1.1225422620773315, "learning_rate": 4.511e-05, "loss": 0.3526, "step": 9027 }, { "epoch": 0.5055437339007728, "grad_norm": 1.5367369651794434, "learning_rate": 4.5115000000000004e-05, "loss": 0.4391, "step": 9028 }, { "epoch": 0.5055997312129018, "grad_norm": 1.249927282333374, "learning_rate": 4.512e-05, "loss": 0.4161, "step": 9029 }, { "epoch": 0.5056557285250308, "grad_norm": 1.2556788921356201, "learning_rate": 4.5125e-05, "loss": 0.3548, "step": 9030 }, { "epoch": 0.5057117258371598, "grad_norm": 1.3210538625717163, "learning_rate": 4.513e-05, "loss": 0.5145, "step": 9031 }, { "epoch": 0.5057677231492889, "grad_norm": 1.4485116004943848, "learning_rate": 4.5135e-05, "loss": 0.4993, "step": 9032 }, { "epoch": 0.5058237204614179, "grad_norm": 1.3486223220825195, "learning_rate": 4.5140000000000005e-05, "loss": 0.4924, "step": 9033 }, { "epoch": 0.5058797177735469, "grad_norm": 1.189575433731079, "learning_rate": 4.5145e-05, "loss": 0.5028, "step": 9034 }, { "epoch": 0.5059357150856759, "grad_norm": 1.2943909168243408, "learning_rate": 4.5150000000000006e-05, "loss": 0.4446, "step": 9035 }, { "epoch": 0.5059917123978049, "grad_norm": 1.1260215044021606, "learning_rate": 4.5155000000000004e-05, "loss": 0.426, "step": 9036 }, { "epoch": 0.506047709709934, "grad_norm": 1.3268543481826782, "learning_rate": 4.516e-05, "loss": 0.6069, "step": 9037 }, { "epoch": 0.506103707022063, "grad_norm": 1.2931846380233765, "learning_rate": 4.5165e-05, "loss": 0.4658, "step": 9038 }, { "epoch": 0.506159704334192, "grad_norm": 1.1556150913238525, "learning_rate": 4.517e-05, "loss": 0.3782, "step": 9039 }, { "epoch": 0.506215701646321, "grad_norm": 1.1783028841018677, "learning_rate": 4.5175e-05, "loss": 0.3293, "step": 9040 }, { "epoch": 0.50627169895845, "grad_norm": 1.607237696647644, "learning_rate": 4.518e-05, "loss": 0.4597, "step": 9041 }, { "epoch": 0.506327696270579, "grad_norm": 1.459184169769287, "learning_rate": 4.5185e-05, "loss": 0.4745, "step": 9042 }, { "epoch": 0.5063836935827081, "grad_norm": 1.2248507738113403, "learning_rate": 4.5190000000000006e-05, "loss": 0.5046, "step": 9043 }, { "epoch": 0.5064396908948371, "grad_norm": 1.4197899103164673, "learning_rate": 4.5195000000000004e-05, "loss": 0.5367, "step": 9044 }, { "epoch": 0.5064956882069661, "grad_norm": 1.3539178371429443, "learning_rate": 4.52e-05, "loss": 0.4222, "step": 9045 }, { "epoch": 0.5065516855190951, "grad_norm": 1.159487247467041, "learning_rate": 4.5205000000000005e-05, "loss": 0.4317, "step": 9046 }, { "epoch": 0.5066076828312241, "grad_norm": 1.3368951082229614, "learning_rate": 4.521e-05, "loss": 0.449, "step": 9047 }, { "epoch": 0.5066636801433532, "grad_norm": 1.304384469985962, "learning_rate": 4.5215e-05, "loss": 0.7044, "step": 9048 }, { "epoch": 0.5067196774554822, "grad_norm": 1.4194291830062866, "learning_rate": 4.5220000000000004e-05, "loss": 0.4021, "step": 9049 }, { "epoch": 0.5067756747676112, "grad_norm": 1.290582537651062, "learning_rate": 4.5225e-05, "loss": 0.5375, "step": 9050 }, { "epoch": 0.5068316720797402, "grad_norm": 2.5563111305236816, "learning_rate": 4.523e-05, "loss": 0.368, "step": 9051 }, { "epoch": 0.5068876693918692, "grad_norm": 1.0181171894073486, "learning_rate": 4.5234999999999996e-05, "loss": 0.3027, "step": 9052 }, { "epoch": 0.5069436667039983, "grad_norm": 1.1910600662231445, "learning_rate": 4.524000000000001e-05, "loss": 0.3645, "step": 9053 }, { "epoch": 0.5069996640161272, "grad_norm": 1.234055995941162, "learning_rate": 4.5245000000000005e-05, "loss": 0.4281, "step": 9054 }, { "epoch": 0.5070556613282562, "grad_norm": 1.3286703824996948, "learning_rate": 4.525e-05, "loss": 0.4125, "step": 9055 }, { "epoch": 0.5071116586403852, "grad_norm": 1.4298676252365112, "learning_rate": 4.5255000000000006e-05, "loss": 0.4882, "step": 9056 }, { "epoch": 0.5071676559525142, "grad_norm": 1.1724324226379395, "learning_rate": 4.5260000000000004e-05, "loss": 0.3975, "step": 9057 }, { "epoch": 0.5072236532646432, "grad_norm": 1.190553903579712, "learning_rate": 4.5265e-05, "loss": 0.4498, "step": 9058 }, { "epoch": 0.5072796505767723, "grad_norm": 1.2644656896591187, "learning_rate": 4.527e-05, "loss": 0.4226, "step": 9059 }, { "epoch": 0.5073356478889013, "grad_norm": 1.5083469152450562, "learning_rate": 4.5275e-05, "loss": 0.3173, "step": 9060 }, { "epoch": 0.5073916452010303, "grad_norm": 1.3417754173278809, "learning_rate": 4.528e-05, "loss": 0.3594, "step": 9061 }, { "epoch": 0.5074476425131593, "grad_norm": 1.2471858263015747, "learning_rate": 4.5285e-05, "loss": 0.3968, "step": 9062 }, { "epoch": 0.5075036398252883, "grad_norm": 1.3608206510543823, "learning_rate": 4.529e-05, "loss": 0.4765, "step": 9063 }, { "epoch": 0.5075596371374174, "grad_norm": 1.4405231475830078, "learning_rate": 4.5295000000000006e-05, "loss": 0.5328, "step": 9064 }, { "epoch": 0.5076156344495464, "grad_norm": 1.1082167625427246, "learning_rate": 4.53e-05, "loss": 0.3742, "step": 9065 }, { "epoch": 0.5076716317616754, "grad_norm": 1.232535481452942, "learning_rate": 4.5305e-05, "loss": 0.3588, "step": 9066 }, { "epoch": 0.5077276290738044, "grad_norm": 1.1461641788482666, "learning_rate": 4.5310000000000005e-05, "loss": 0.4133, "step": 9067 }, { "epoch": 0.5077836263859334, "grad_norm": 1.3012218475341797, "learning_rate": 4.5315e-05, "loss": 0.5193, "step": 9068 }, { "epoch": 0.5078396236980625, "grad_norm": 1.2144492864608765, "learning_rate": 4.532e-05, "loss": 0.3894, "step": 9069 }, { "epoch": 0.5078956210101915, "grad_norm": 1.208936333656311, "learning_rate": 4.5325000000000004e-05, "loss": 0.39, "step": 9070 }, { "epoch": 0.5079516183223205, "grad_norm": 1.1332958936691284, "learning_rate": 4.533e-05, "loss": 0.3271, "step": 9071 }, { "epoch": 0.5080076156344495, "grad_norm": 1.1151103973388672, "learning_rate": 4.5335e-05, "loss": 0.3278, "step": 9072 }, { "epoch": 0.5080636129465785, "grad_norm": 1.5224456787109375, "learning_rate": 4.534e-05, "loss": 0.485, "step": 9073 }, { "epoch": 0.5081196102587076, "grad_norm": 1.3664674758911133, "learning_rate": 4.534500000000001e-05, "loss": 0.5187, "step": 9074 }, { "epoch": 0.5081756075708366, "grad_norm": 1.2341505289077759, "learning_rate": 4.5350000000000005e-05, "loss": 0.3897, "step": 9075 }, { "epoch": 0.5082316048829656, "grad_norm": 1.1455440521240234, "learning_rate": 4.5355e-05, "loss": 0.3157, "step": 9076 }, { "epoch": 0.5082876021950946, "grad_norm": 1.1693487167358398, "learning_rate": 4.536e-05, "loss": 0.376, "step": 9077 }, { "epoch": 0.5083435995072236, "grad_norm": 1.324777364730835, "learning_rate": 4.5365000000000004e-05, "loss": 0.5683, "step": 9078 }, { "epoch": 0.5083995968193527, "grad_norm": 1.2605270147323608, "learning_rate": 4.537e-05, "loss": 0.3379, "step": 9079 }, { "epoch": 0.5084555941314817, "grad_norm": 1.3456287384033203, "learning_rate": 4.5375e-05, "loss": 0.554, "step": 9080 }, { "epoch": 0.5085115914436107, "grad_norm": 1.2584619522094727, "learning_rate": 4.538e-05, "loss": 0.5242, "step": 9081 }, { "epoch": 0.5085675887557397, "grad_norm": 1.4179725646972656, "learning_rate": 4.5385e-05, "loss": 0.4579, "step": 9082 }, { "epoch": 0.5086235860678687, "grad_norm": 1.4745380878448486, "learning_rate": 4.5390000000000004e-05, "loss": 0.428, "step": 9083 }, { "epoch": 0.5086795833799977, "grad_norm": 1.2324879169464111, "learning_rate": 4.5395e-05, "loss": 0.376, "step": 9084 }, { "epoch": 0.5087355806921268, "grad_norm": 1.1317589282989502, "learning_rate": 4.5400000000000006e-05, "loss": 0.2786, "step": 9085 }, { "epoch": 0.5087915780042558, "grad_norm": 1.1965456008911133, "learning_rate": 4.5405e-05, "loss": 0.3678, "step": 9086 }, { "epoch": 0.5088475753163848, "grad_norm": 1.6246026754379272, "learning_rate": 4.541e-05, "loss": 0.4555, "step": 9087 }, { "epoch": 0.5089035726285138, "grad_norm": 1.401979684829712, "learning_rate": 4.5415000000000005e-05, "loss": 0.5249, "step": 9088 }, { "epoch": 0.5089595699406428, "grad_norm": 1.5277830362319946, "learning_rate": 4.542e-05, "loss": 0.3628, "step": 9089 }, { "epoch": 0.5090155672527719, "grad_norm": 1.3548916578292847, "learning_rate": 4.5425e-05, "loss": 0.4446, "step": 9090 }, { "epoch": 0.5090715645649009, "grad_norm": 1.2804852724075317, "learning_rate": 4.543e-05, "loss": 0.4077, "step": 9091 }, { "epoch": 0.5091275618770299, "grad_norm": 1.2474457025527954, "learning_rate": 4.5435e-05, "loss": 0.3732, "step": 9092 }, { "epoch": 0.5091835591891589, "grad_norm": 1.3139522075653076, "learning_rate": 4.5440000000000005e-05, "loss": 0.468, "step": 9093 }, { "epoch": 0.5092395565012879, "grad_norm": 1.2315027713775635, "learning_rate": 4.5445e-05, "loss": 0.488, "step": 9094 }, { "epoch": 0.509295553813417, "grad_norm": 1.3135638236999512, "learning_rate": 4.545000000000001e-05, "loss": 0.4894, "step": 9095 }, { "epoch": 0.509351551125546, "grad_norm": 1.1322346925735474, "learning_rate": 4.5455000000000004e-05, "loss": 0.3885, "step": 9096 }, { "epoch": 0.509407548437675, "grad_norm": 1.29420804977417, "learning_rate": 4.546e-05, "loss": 0.3649, "step": 9097 }, { "epoch": 0.509463545749804, "grad_norm": 1.3108958005905151, "learning_rate": 4.5465e-05, "loss": 0.4254, "step": 9098 }, { "epoch": 0.509519543061933, "grad_norm": 1.3444880247116089, "learning_rate": 4.5470000000000003e-05, "loss": 0.4003, "step": 9099 }, { "epoch": 0.509575540374062, "grad_norm": 1.1340411901474, "learning_rate": 4.5475e-05, "loss": 0.3966, "step": 9100 }, { "epoch": 0.5096315376861911, "grad_norm": 1.425068974494934, "learning_rate": 4.548e-05, "loss": 0.4551, "step": 9101 }, { "epoch": 0.5096875349983201, "grad_norm": 1.381592869758606, "learning_rate": 4.5485e-05, "loss": 0.3084, "step": 9102 }, { "epoch": 0.5097435323104491, "grad_norm": 1.239937663078308, "learning_rate": 4.549000000000001e-05, "loss": 0.4293, "step": 9103 }, { "epoch": 0.5097995296225781, "grad_norm": 1.8166515827178955, "learning_rate": 4.5495000000000004e-05, "loss": 0.593, "step": 9104 }, { "epoch": 0.5098555269347071, "grad_norm": 1.406846046447754, "learning_rate": 4.55e-05, "loss": 0.515, "step": 9105 }, { "epoch": 0.5099115242468362, "grad_norm": 1.3348393440246582, "learning_rate": 4.5505000000000006e-05, "loss": 0.4368, "step": 9106 }, { "epoch": 0.5099675215589652, "grad_norm": 1.1971629858016968, "learning_rate": 4.551e-05, "loss": 0.5018, "step": 9107 }, { "epoch": 0.5100235188710942, "grad_norm": 1.2922112941741943, "learning_rate": 4.5515e-05, "loss": 0.4869, "step": 9108 }, { "epoch": 0.5100795161832232, "grad_norm": 4.104691982269287, "learning_rate": 4.5520000000000005e-05, "loss": 0.4964, "step": 9109 }, { "epoch": 0.5101355134953522, "grad_norm": 1.2255407571792603, "learning_rate": 4.5525e-05, "loss": 0.4425, "step": 9110 }, { "epoch": 0.5101915108074813, "grad_norm": 1.3091654777526855, "learning_rate": 4.553e-05, "loss": 0.419, "step": 9111 }, { "epoch": 0.5102475081196103, "grad_norm": 1.6149533987045288, "learning_rate": 4.5535e-05, "loss": 0.4657, "step": 9112 }, { "epoch": 0.5103035054317393, "grad_norm": 1.4710536003112793, "learning_rate": 4.554000000000001e-05, "loss": 0.4854, "step": 9113 }, { "epoch": 0.5103595027438683, "grad_norm": 1.2672430276870728, "learning_rate": 4.5545000000000005e-05, "loss": 0.5837, "step": 9114 }, { "epoch": 0.5104155000559973, "grad_norm": 1.4874082803726196, "learning_rate": 4.555e-05, "loss": 0.5358, "step": 9115 }, { "epoch": 0.5104714973681264, "grad_norm": 1.204516887664795, "learning_rate": 4.5555e-05, "loss": 0.4116, "step": 9116 }, { "epoch": 0.5105274946802554, "grad_norm": 1.1308927536010742, "learning_rate": 4.5560000000000004e-05, "loss": 0.3743, "step": 9117 }, { "epoch": 0.5105834919923844, "grad_norm": 1.3372341394424438, "learning_rate": 4.5565e-05, "loss": 0.5025, "step": 9118 }, { "epoch": 0.5106394893045134, "grad_norm": 1.2648262977600098, "learning_rate": 4.557e-05, "loss": 0.432, "step": 9119 }, { "epoch": 0.5106954866166424, "grad_norm": 1.1696102619171143, "learning_rate": 4.5575e-05, "loss": 0.4272, "step": 9120 }, { "epoch": 0.5107514839287715, "grad_norm": 1.4026856422424316, "learning_rate": 4.558e-05, "loss": 0.3541, "step": 9121 }, { "epoch": 0.5108074812409005, "grad_norm": 1.13295316696167, "learning_rate": 4.5585e-05, "loss": 0.3299, "step": 9122 }, { "epoch": 0.5108634785530295, "grad_norm": 1.5681631565093994, "learning_rate": 4.559e-05, "loss": 0.3446, "step": 9123 }, { "epoch": 0.5109194758651585, "grad_norm": 1.1067805290222168, "learning_rate": 4.5595000000000006e-05, "loss": 0.3119, "step": 9124 }, { "epoch": 0.5109754731772875, "grad_norm": 1.3130533695220947, "learning_rate": 4.5600000000000004e-05, "loss": 0.4757, "step": 9125 }, { "epoch": 0.5110314704894166, "grad_norm": 1.240734577178955, "learning_rate": 4.5605e-05, "loss": 0.389, "step": 9126 }, { "epoch": 0.5110874678015456, "grad_norm": 1.3343544006347656, "learning_rate": 4.5610000000000005e-05, "loss": 0.4339, "step": 9127 }, { "epoch": 0.5111434651136746, "grad_norm": 1.2861734628677368, "learning_rate": 4.5615e-05, "loss": 0.4505, "step": 9128 }, { "epoch": 0.5111994624258036, "grad_norm": 1.5055768489837646, "learning_rate": 4.562e-05, "loss": 0.588, "step": 9129 }, { "epoch": 0.5112554597379326, "grad_norm": 1.5687438249588013, "learning_rate": 4.5625e-05, "loss": 0.4528, "step": 9130 }, { "epoch": 0.5113114570500616, "grad_norm": 1.7964882850646973, "learning_rate": 4.563e-05, "loss": 0.5269, "step": 9131 }, { "epoch": 0.5113674543621907, "grad_norm": 1.2458393573760986, "learning_rate": 4.5635e-05, "loss": 0.5181, "step": 9132 }, { "epoch": 0.5114234516743197, "grad_norm": 1.0184763669967651, "learning_rate": 4.564e-05, "loss": 0.4279, "step": 9133 }, { "epoch": 0.5114794489864487, "grad_norm": 1.3518426418304443, "learning_rate": 4.564500000000001e-05, "loss": 0.3765, "step": 9134 }, { "epoch": 0.5115354462985777, "grad_norm": 1.2622166872024536, "learning_rate": 4.5650000000000005e-05, "loss": 0.4849, "step": 9135 }, { "epoch": 0.5115914436107066, "grad_norm": 1.3343026638031006, "learning_rate": 4.5655e-05, "loss": 0.4758, "step": 9136 }, { "epoch": 0.5116474409228357, "grad_norm": 1.2146726846694946, "learning_rate": 4.566e-05, "loss": 0.4958, "step": 9137 }, { "epoch": 0.5117034382349647, "grad_norm": 1.320116400718689, "learning_rate": 4.5665000000000004e-05, "loss": 0.4295, "step": 9138 }, { "epoch": 0.5117594355470937, "grad_norm": 1.2639986276626587, "learning_rate": 4.567e-05, "loss": 0.3921, "step": 9139 }, { "epoch": 0.5118154328592227, "grad_norm": 1.1365134716033936, "learning_rate": 4.5675e-05, "loss": 0.4634, "step": 9140 }, { "epoch": 0.5118714301713517, "grad_norm": 1.3811694383621216, "learning_rate": 4.568e-05, "loss": 0.4476, "step": 9141 }, { "epoch": 0.5119274274834807, "grad_norm": 1.1554290056228638, "learning_rate": 4.5685e-05, "loss": 0.3517, "step": 9142 }, { "epoch": 0.5119834247956098, "grad_norm": 1.1563440561294556, "learning_rate": 4.569e-05, "loss": 0.4315, "step": 9143 }, { "epoch": 0.5120394221077388, "grad_norm": 1.0936301946640015, "learning_rate": 4.5695e-05, "loss": 0.4105, "step": 9144 }, { "epoch": 0.5120954194198678, "grad_norm": 1.7846345901489258, "learning_rate": 4.5700000000000006e-05, "loss": 0.4825, "step": 9145 }, { "epoch": 0.5121514167319968, "grad_norm": 1.1873970031738281, "learning_rate": 4.5705000000000004e-05, "loss": 0.4168, "step": 9146 }, { "epoch": 0.5122074140441258, "grad_norm": 1.2181586027145386, "learning_rate": 4.571e-05, "loss": 0.4442, "step": 9147 }, { "epoch": 0.5122634113562549, "grad_norm": 1.1474303007125854, "learning_rate": 4.5715000000000005e-05, "loss": 0.449, "step": 9148 }, { "epoch": 0.5123194086683839, "grad_norm": 1.3512732982635498, "learning_rate": 4.572e-05, "loss": 0.4581, "step": 9149 }, { "epoch": 0.5123754059805129, "grad_norm": 1.2675834894180298, "learning_rate": 4.5725e-05, "loss": 0.3983, "step": 9150 }, { "epoch": 0.5124314032926419, "grad_norm": 1.3232731819152832, "learning_rate": 4.573e-05, "loss": 0.4474, "step": 9151 }, { "epoch": 0.5124874006047709, "grad_norm": 1.4368561506271362, "learning_rate": 4.5735e-05, "loss": 0.391, "step": 9152 }, { "epoch": 0.5125433979169, "grad_norm": 1.2953909635543823, "learning_rate": 4.574e-05, "loss": 0.4717, "step": 9153 }, { "epoch": 0.512599395229029, "grad_norm": 2.3071467876434326, "learning_rate": 4.5745e-05, "loss": 0.408, "step": 9154 }, { "epoch": 0.512655392541158, "grad_norm": 1.2890418767929077, "learning_rate": 4.575e-05, "loss": 0.3286, "step": 9155 }, { "epoch": 0.512711389853287, "grad_norm": 1.3090392351150513, "learning_rate": 4.5755000000000005e-05, "loss": 0.5139, "step": 9156 }, { "epoch": 0.512767387165416, "grad_norm": 1.228469967842102, "learning_rate": 4.576e-05, "loss": 0.4131, "step": 9157 }, { "epoch": 0.512823384477545, "grad_norm": 1.266729474067688, "learning_rate": 4.5765e-05, "loss": 0.4143, "step": 9158 }, { "epoch": 0.5128793817896741, "grad_norm": 1.4858880043029785, "learning_rate": 4.5770000000000004e-05, "loss": 0.4721, "step": 9159 }, { "epoch": 0.5129353791018031, "grad_norm": 1.380627155303955, "learning_rate": 4.5775e-05, "loss": 0.6853, "step": 9160 }, { "epoch": 0.5129913764139321, "grad_norm": 1.2912437915802002, "learning_rate": 4.578e-05, "loss": 0.3152, "step": 9161 }, { "epoch": 0.5130473737260611, "grad_norm": 1.0574392080307007, "learning_rate": 4.5785e-05, "loss": 0.3956, "step": 9162 }, { "epoch": 0.5131033710381901, "grad_norm": 0.939217746257782, "learning_rate": 4.579e-05, "loss": 0.2672, "step": 9163 }, { "epoch": 0.5131593683503192, "grad_norm": 1.147523045539856, "learning_rate": 4.5795000000000005e-05, "loss": 0.3168, "step": 9164 }, { "epoch": 0.5132153656624482, "grad_norm": 1.3794147968292236, "learning_rate": 4.58e-05, "loss": 0.657, "step": 9165 }, { "epoch": 0.5132713629745772, "grad_norm": 1.8935555219650269, "learning_rate": 4.5805000000000006e-05, "loss": 0.5039, "step": 9166 }, { "epoch": 0.5133273602867062, "grad_norm": 1.233540654182434, "learning_rate": 4.5810000000000004e-05, "loss": 0.4168, "step": 9167 }, { "epoch": 0.5133833575988352, "grad_norm": 1.2867122888565063, "learning_rate": 4.5815e-05, "loss": 0.4104, "step": 9168 }, { "epoch": 0.5134393549109643, "grad_norm": 1.1694250106811523, "learning_rate": 4.5820000000000005e-05, "loss": 0.425, "step": 9169 }, { "epoch": 0.5134953522230933, "grad_norm": 1.2046486139297485, "learning_rate": 4.5825e-05, "loss": 0.4514, "step": 9170 }, { "epoch": 0.5135513495352223, "grad_norm": 1.1943657398223877, "learning_rate": 4.583e-05, "loss": 0.617, "step": 9171 }, { "epoch": 0.5136073468473513, "grad_norm": 1.2283899784088135, "learning_rate": 4.5835e-05, "loss": 0.4312, "step": 9172 }, { "epoch": 0.5136633441594803, "grad_norm": 1.3342396020889282, "learning_rate": 4.584e-05, "loss": 0.4931, "step": 9173 }, { "epoch": 0.5137193414716094, "grad_norm": 1.4554526805877686, "learning_rate": 4.5845000000000006e-05, "loss": 0.4853, "step": 9174 }, { "epoch": 0.5137753387837384, "grad_norm": 1.2992995977401733, "learning_rate": 4.585e-05, "loss": 0.4486, "step": 9175 }, { "epoch": 0.5138313360958674, "grad_norm": 1.2688758373260498, "learning_rate": 4.5855e-05, "loss": 0.4145, "step": 9176 }, { "epoch": 0.5138873334079964, "grad_norm": 1.1902157068252563, "learning_rate": 4.5860000000000005e-05, "loss": 0.3951, "step": 9177 }, { "epoch": 0.5139433307201254, "grad_norm": 1.1584275960922241, "learning_rate": 4.5865e-05, "loss": 0.3811, "step": 9178 }, { "epoch": 0.5139993280322545, "grad_norm": 1.4463282823562622, "learning_rate": 4.587e-05, "loss": 0.4477, "step": 9179 }, { "epoch": 0.5140553253443835, "grad_norm": 1.225193738937378, "learning_rate": 4.5875000000000004e-05, "loss": 0.5085, "step": 9180 }, { "epoch": 0.5141113226565125, "grad_norm": 1.3751776218414307, "learning_rate": 4.588e-05, "loss": 0.4581, "step": 9181 }, { "epoch": 0.5141673199686415, "grad_norm": 1.1857109069824219, "learning_rate": 4.5885e-05, "loss": 0.4816, "step": 9182 }, { "epoch": 0.5142233172807705, "grad_norm": 1.116721510887146, "learning_rate": 4.589e-05, "loss": 0.418, "step": 9183 }, { "epoch": 0.5142793145928996, "grad_norm": 1.302652359008789, "learning_rate": 4.589500000000001e-05, "loss": 0.3992, "step": 9184 }, { "epoch": 0.5143353119050286, "grad_norm": 1.2706457376480103, "learning_rate": 4.5900000000000004e-05, "loss": 0.4317, "step": 9185 }, { "epoch": 0.5143913092171576, "grad_norm": 1.1368987560272217, "learning_rate": 4.5905e-05, "loss": 0.5304, "step": 9186 }, { "epoch": 0.5144473065292866, "grad_norm": 1.1194905042648315, "learning_rate": 4.5910000000000006e-05, "loss": 0.4262, "step": 9187 }, { "epoch": 0.5145033038414156, "grad_norm": 1.4616315364837646, "learning_rate": 4.5915000000000003e-05, "loss": 0.412, "step": 9188 }, { "epoch": 0.5145593011535446, "grad_norm": 1.2584593296051025, "learning_rate": 4.592e-05, "loss": 0.4486, "step": 9189 }, { "epoch": 0.5146152984656737, "grad_norm": 1.3513503074645996, "learning_rate": 4.5925e-05, "loss": 0.4367, "step": 9190 }, { "epoch": 0.5146712957778027, "grad_norm": 1.3395848274230957, "learning_rate": 4.593e-05, "loss": 0.4674, "step": 9191 }, { "epoch": 0.5147272930899317, "grad_norm": 1.45074462890625, "learning_rate": 4.5935e-05, "loss": 0.5375, "step": 9192 }, { "epoch": 0.5147832904020607, "grad_norm": 1.2556960582733154, "learning_rate": 4.594e-05, "loss": 0.4159, "step": 9193 }, { "epoch": 0.5148392877141897, "grad_norm": 1.1413027048110962, "learning_rate": 4.5945e-05, "loss": 0.3966, "step": 9194 }, { "epoch": 0.5148952850263188, "grad_norm": 1.5372223854064941, "learning_rate": 4.5950000000000006e-05, "loss": 0.3643, "step": 9195 }, { "epoch": 0.5149512823384478, "grad_norm": 1.4177385568618774, "learning_rate": 4.5955e-05, "loss": 0.48, "step": 9196 }, { "epoch": 0.5150072796505768, "grad_norm": 1.2636547088623047, "learning_rate": 4.596e-05, "loss": 0.4348, "step": 9197 }, { "epoch": 0.5150632769627058, "grad_norm": 1.465863823890686, "learning_rate": 4.5965000000000005e-05, "loss": 0.4749, "step": 9198 }, { "epoch": 0.5151192742748348, "grad_norm": 1.1423953771591187, "learning_rate": 4.597e-05, "loss": 0.3845, "step": 9199 }, { "epoch": 0.5151752715869639, "grad_norm": 1.100070834159851, "learning_rate": 4.5975e-05, "loss": 0.4243, "step": 9200 }, { "epoch": 0.5152312688990929, "grad_norm": 1.6481834650039673, "learning_rate": 4.5980000000000004e-05, "loss": 0.5836, "step": 9201 }, { "epoch": 0.5152872662112219, "grad_norm": 1.323297142982483, "learning_rate": 4.5985e-05, "loss": 0.4682, "step": 9202 }, { "epoch": 0.5153432635233509, "grad_norm": 1.2022228240966797, "learning_rate": 4.599e-05, "loss": 0.4252, "step": 9203 }, { "epoch": 0.5153992608354799, "grad_norm": 1.5649847984313965, "learning_rate": 4.5995e-05, "loss": 0.6934, "step": 9204 }, { "epoch": 0.515455258147609, "grad_norm": 1.4646174907684326, "learning_rate": 4.600000000000001e-05, "loss": 0.4992, "step": 9205 }, { "epoch": 0.515511255459738, "grad_norm": 1.2009707689285278, "learning_rate": 4.6005000000000004e-05, "loss": 0.4129, "step": 9206 }, { "epoch": 0.515567252771867, "grad_norm": 1.3114912509918213, "learning_rate": 4.601e-05, "loss": 0.3581, "step": 9207 }, { "epoch": 0.515623250083996, "grad_norm": 1.2285393476486206, "learning_rate": 4.6015000000000006e-05, "loss": 0.4393, "step": 9208 }, { "epoch": 0.515679247396125, "grad_norm": 1.554861307144165, "learning_rate": 4.602e-05, "loss": 0.4743, "step": 9209 }, { "epoch": 0.515735244708254, "grad_norm": 1.0693328380584717, "learning_rate": 4.6025e-05, "loss": 0.3656, "step": 9210 }, { "epoch": 0.5157912420203831, "grad_norm": 1.2261241674423218, "learning_rate": 4.603e-05, "loss": 0.4464, "step": 9211 }, { "epoch": 0.5158472393325121, "grad_norm": 1.3556733131408691, "learning_rate": 4.6035e-05, "loss": 0.4532, "step": 9212 }, { "epoch": 0.5159032366446411, "grad_norm": 1.05940580368042, "learning_rate": 4.604e-05, "loss": 0.3792, "step": 9213 }, { "epoch": 0.5159592339567701, "grad_norm": 0.9899119138717651, "learning_rate": 4.6045000000000004e-05, "loss": 0.362, "step": 9214 }, { "epoch": 0.5160152312688991, "grad_norm": 1.0863126516342163, "learning_rate": 4.605e-05, "loss": 0.4113, "step": 9215 }, { "epoch": 0.5160712285810282, "grad_norm": 1.3411493301391602, "learning_rate": 4.6055000000000005e-05, "loss": 0.4646, "step": 9216 }, { "epoch": 0.5161272258931572, "grad_norm": 1.2432260513305664, "learning_rate": 4.606e-05, "loss": 0.4235, "step": 9217 }, { "epoch": 0.5161832232052862, "grad_norm": 1.2353248596191406, "learning_rate": 4.6065e-05, "loss": 0.3575, "step": 9218 }, { "epoch": 0.5162392205174151, "grad_norm": 1.151160717010498, "learning_rate": 4.6070000000000004e-05, "loss": 0.3813, "step": 9219 }, { "epoch": 0.5162952178295441, "grad_norm": 1.0681962966918945, "learning_rate": 4.6075e-05, "loss": 0.2898, "step": 9220 }, { "epoch": 0.5163512151416731, "grad_norm": 1.4751173257827759, "learning_rate": 4.608e-05, "loss": 0.611, "step": 9221 }, { "epoch": 0.5164072124538022, "grad_norm": 1.1293771266937256, "learning_rate": 4.6085000000000003e-05, "loss": 0.4167, "step": 9222 }, { "epoch": 0.5164632097659312, "grad_norm": 1.486940622329712, "learning_rate": 4.609e-05, "loss": 0.5161, "step": 9223 }, { "epoch": 0.5165192070780602, "grad_norm": 1.2248575687408447, "learning_rate": 4.6095000000000005e-05, "loss": 0.5836, "step": 9224 }, { "epoch": 0.5165752043901892, "grad_norm": 1.1783866882324219, "learning_rate": 4.61e-05, "loss": 0.3845, "step": 9225 }, { "epoch": 0.5166312017023182, "grad_norm": 1.5090482234954834, "learning_rate": 4.610500000000001e-05, "loss": 0.397, "step": 9226 }, { "epoch": 0.5166871990144473, "grad_norm": 1.2617485523223877, "learning_rate": 4.6110000000000004e-05, "loss": 0.3639, "step": 9227 }, { "epoch": 0.5167431963265763, "grad_norm": 1.2481554746627808, "learning_rate": 4.6115e-05, "loss": 0.4247, "step": 9228 }, { "epoch": 0.5167991936387053, "grad_norm": 1.1832367181777954, "learning_rate": 4.612e-05, "loss": 0.3609, "step": 9229 }, { "epoch": 0.5168551909508343, "grad_norm": 1.161326289176941, "learning_rate": 4.6125e-05, "loss": 0.3136, "step": 9230 }, { "epoch": 0.5169111882629633, "grad_norm": 1.1889429092407227, "learning_rate": 4.613e-05, "loss": 0.4367, "step": 9231 }, { "epoch": 0.5169671855750924, "grad_norm": 1.656028389930725, "learning_rate": 4.6135e-05, "loss": 0.5329, "step": 9232 }, { "epoch": 0.5170231828872214, "grad_norm": 1.3792628049850464, "learning_rate": 4.614e-05, "loss": 0.4479, "step": 9233 }, { "epoch": 0.5170791801993504, "grad_norm": 1.2553712129592896, "learning_rate": 4.6145000000000006e-05, "loss": 0.4374, "step": 9234 }, { "epoch": 0.5171351775114794, "grad_norm": 1.0009866952896118, "learning_rate": 4.6150000000000004e-05, "loss": 0.3785, "step": 9235 }, { "epoch": 0.5171911748236084, "grad_norm": 1.2896463871002197, "learning_rate": 4.6155e-05, "loss": 0.4246, "step": 9236 }, { "epoch": 0.5172471721357375, "grad_norm": 1.2183150053024292, "learning_rate": 4.6160000000000005e-05, "loss": 0.3809, "step": 9237 }, { "epoch": 0.5173031694478665, "grad_norm": 1.196364402770996, "learning_rate": 4.6165e-05, "loss": 0.4453, "step": 9238 }, { "epoch": 0.5173591667599955, "grad_norm": 1.162322759628296, "learning_rate": 4.617e-05, "loss": 0.4103, "step": 9239 }, { "epoch": 0.5174151640721245, "grad_norm": 1.1721992492675781, "learning_rate": 4.6175000000000004e-05, "loss": 0.4286, "step": 9240 }, { "epoch": 0.5174711613842535, "grad_norm": 1.5863608121871948, "learning_rate": 4.618e-05, "loss": 0.4548, "step": 9241 }, { "epoch": 0.5175271586963825, "grad_norm": 1.2901023626327515, "learning_rate": 4.6185e-05, "loss": 0.5696, "step": 9242 }, { "epoch": 0.5175831560085116, "grad_norm": 1.0869500637054443, "learning_rate": 4.619e-05, "loss": 0.4111, "step": 9243 }, { "epoch": 0.5176391533206406, "grad_norm": 1.1780967712402344, "learning_rate": 4.619500000000001e-05, "loss": 0.3407, "step": 9244 }, { "epoch": 0.5176951506327696, "grad_norm": 1.2058454751968384, "learning_rate": 4.6200000000000005e-05, "loss": 0.4369, "step": 9245 }, { "epoch": 0.5177511479448986, "grad_norm": 1.329429268836975, "learning_rate": 4.6205e-05, "loss": 0.5259, "step": 9246 }, { "epoch": 0.5178071452570276, "grad_norm": 1.1401983499526978, "learning_rate": 4.6210000000000006e-05, "loss": 0.4004, "step": 9247 }, { "epoch": 0.5178631425691567, "grad_norm": 1.2211241722106934, "learning_rate": 4.6215000000000004e-05, "loss": 0.4571, "step": 9248 }, { "epoch": 0.5179191398812857, "grad_norm": 1.2139710187911987, "learning_rate": 4.622e-05, "loss": 0.3738, "step": 9249 }, { "epoch": 0.5179751371934147, "grad_norm": 1.2400494813919067, "learning_rate": 4.6225e-05, "loss": 0.4665, "step": 9250 }, { "epoch": 0.5180311345055437, "grad_norm": 1.2360652685165405, "learning_rate": 4.623e-05, "loss": 0.329, "step": 9251 }, { "epoch": 0.5180871318176727, "grad_norm": 1.4026408195495605, "learning_rate": 4.6235e-05, "loss": 0.5076, "step": 9252 }, { "epoch": 0.5181431291298018, "grad_norm": 1.1283280849456787, "learning_rate": 4.624e-05, "loss": 0.3409, "step": 9253 }, { "epoch": 0.5181991264419308, "grad_norm": 1.180201768875122, "learning_rate": 4.6245e-05, "loss": 0.3412, "step": 9254 }, { "epoch": 0.5182551237540598, "grad_norm": 1.263129711151123, "learning_rate": 4.6250000000000006e-05, "loss": 0.3948, "step": 9255 }, { "epoch": 0.5183111210661888, "grad_norm": 2.041626453399658, "learning_rate": 4.6255000000000004e-05, "loss": 0.4851, "step": 9256 }, { "epoch": 0.5183671183783178, "grad_norm": 1.2479692697525024, "learning_rate": 4.626e-05, "loss": 0.3156, "step": 9257 }, { "epoch": 0.5184231156904469, "grad_norm": 1.620681881904602, "learning_rate": 4.6265000000000005e-05, "loss": 0.418, "step": 9258 }, { "epoch": 0.5184791130025759, "grad_norm": 1.504738688468933, "learning_rate": 4.627e-05, "loss": 0.4166, "step": 9259 }, { "epoch": 0.5185351103147049, "grad_norm": 1.296774983406067, "learning_rate": 4.6275e-05, "loss": 0.4553, "step": 9260 }, { "epoch": 0.5185911076268339, "grad_norm": 1.2768787145614624, "learning_rate": 4.6280000000000004e-05, "loss": 0.4789, "step": 9261 }, { "epoch": 0.5186471049389629, "grad_norm": 1.1409201622009277, "learning_rate": 4.6285e-05, "loss": 0.4079, "step": 9262 }, { "epoch": 0.518703102251092, "grad_norm": 1.0574982166290283, "learning_rate": 4.629e-05, "loss": 0.3735, "step": 9263 }, { "epoch": 0.518759099563221, "grad_norm": 1.2961485385894775, "learning_rate": 4.6294999999999996e-05, "loss": 0.3911, "step": 9264 }, { "epoch": 0.51881509687535, "grad_norm": 1.4814493656158447, "learning_rate": 4.630000000000001e-05, "loss": 0.3772, "step": 9265 }, { "epoch": 0.518871094187479, "grad_norm": 1.281013011932373, "learning_rate": 4.6305000000000005e-05, "loss": 0.5341, "step": 9266 }, { "epoch": 0.518927091499608, "grad_norm": 1.3805668354034424, "learning_rate": 4.631e-05, "loss": 0.4929, "step": 9267 }, { "epoch": 0.518983088811737, "grad_norm": 1.4640450477600098, "learning_rate": 4.6315e-05, "loss": 0.4276, "step": 9268 }, { "epoch": 0.5190390861238661, "grad_norm": 1.4518136978149414, "learning_rate": 4.6320000000000004e-05, "loss": 0.5306, "step": 9269 }, { "epoch": 0.5190950834359951, "grad_norm": 1.4797710180282593, "learning_rate": 4.6325e-05, "loss": 0.3544, "step": 9270 }, { "epoch": 0.5191510807481241, "grad_norm": 1.166329026222229, "learning_rate": 4.633e-05, "loss": 0.3905, "step": 9271 }, { "epoch": 0.5192070780602531, "grad_norm": 1.1806344985961914, "learning_rate": 4.6335e-05, "loss": 0.4603, "step": 9272 }, { "epoch": 0.5192630753723821, "grad_norm": 1.1213579177856445, "learning_rate": 4.634e-05, "loss": 0.3816, "step": 9273 }, { "epoch": 0.5193190726845112, "grad_norm": 1.24141263961792, "learning_rate": 4.6345e-05, "loss": 0.4185, "step": 9274 }, { "epoch": 0.5193750699966402, "grad_norm": 1.2993707656860352, "learning_rate": 4.635e-05, "loss": 0.3636, "step": 9275 }, { "epoch": 0.5194310673087692, "grad_norm": 1.1595044136047363, "learning_rate": 4.6355000000000006e-05, "loss": 0.3716, "step": 9276 }, { "epoch": 0.5194870646208982, "grad_norm": 1.4520783424377441, "learning_rate": 4.636e-05, "loss": 0.4154, "step": 9277 }, { "epoch": 0.5195430619330272, "grad_norm": 1.390235185623169, "learning_rate": 4.6365e-05, "loss": 0.5374, "step": 9278 }, { "epoch": 0.5195990592451563, "grad_norm": 1.1509873867034912, "learning_rate": 4.6370000000000005e-05, "loss": 0.3998, "step": 9279 }, { "epoch": 0.5196550565572853, "grad_norm": 1.089005947113037, "learning_rate": 4.6375e-05, "loss": 0.3992, "step": 9280 }, { "epoch": 0.5197110538694143, "grad_norm": 1.2871884107589722, "learning_rate": 4.638e-05, "loss": 0.4491, "step": 9281 }, { "epoch": 0.5197670511815433, "grad_norm": 1.0660498142242432, "learning_rate": 4.6385000000000004e-05, "loss": 0.3495, "step": 9282 }, { "epoch": 0.5198230484936723, "grad_norm": 1.4733692407608032, "learning_rate": 4.639e-05, "loss": 0.388, "step": 9283 }, { "epoch": 0.5198790458058014, "grad_norm": 1.3952951431274414, "learning_rate": 4.6395e-05, "loss": 0.3995, "step": 9284 }, { "epoch": 0.5199350431179304, "grad_norm": 1.2849518060684204, "learning_rate": 4.64e-05, "loss": 0.3945, "step": 9285 }, { "epoch": 0.5199910404300594, "grad_norm": 1.2646955251693726, "learning_rate": 4.640500000000001e-05, "loss": 0.4605, "step": 9286 }, { "epoch": 0.5200470377421884, "grad_norm": 1.6200830936431885, "learning_rate": 4.6410000000000005e-05, "loss": 0.5205, "step": 9287 }, { "epoch": 0.5201030350543174, "grad_norm": 1.5152814388275146, "learning_rate": 4.6415e-05, "loss": 0.53, "step": 9288 }, { "epoch": 0.5201590323664464, "grad_norm": 1.5220857858657837, "learning_rate": 4.642e-05, "loss": 0.6232, "step": 9289 }, { "epoch": 0.5202150296785755, "grad_norm": 1.34884512424469, "learning_rate": 4.6425000000000004e-05, "loss": 0.4985, "step": 9290 }, { "epoch": 0.5202710269907045, "grad_norm": 1.3182399272918701, "learning_rate": 4.643e-05, "loss": 0.5307, "step": 9291 }, { "epoch": 0.5203270243028335, "grad_norm": 1.4727940559387207, "learning_rate": 4.6435e-05, "loss": 0.6653, "step": 9292 }, { "epoch": 0.5203830216149625, "grad_norm": 1.233260989189148, "learning_rate": 4.644e-05, "loss": 0.3941, "step": 9293 }, { "epoch": 0.5204390189270915, "grad_norm": 1.5735726356506348, "learning_rate": 4.6445e-05, "loss": 0.5239, "step": 9294 }, { "epoch": 0.5204950162392206, "grad_norm": 1.2462122440338135, "learning_rate": 4.6450000000000004e-05, "loss": 0.4223, "step": 9295 }, { "epoch": 0.5205510135513496, "grad_norm": 1.1779415607452393, "learning_rate": 4.6455e-05, "loss": 0.3389, "step": 9296 }, { "epoch": 0.5206070108634786, "grad_norm": 6.715813636779785, "learning_rate": 4.6460000000000006e-05, "loss": 0.5612, "step": 9297 }, { "epoch": 0.5206630081756076, "grad_norm": 1.1489617824554443, "learning_rate": 4.6465e-05, "loss": 0.3704, "step": 9298 }, { "epoch": 0.5207190054877366, "grad_norm": 1.129090666770935, "learning_rate": 4.647e-05, "loss": 0.4061, "step": 9299 }, { "epoch": 0.5207750027998657, "grad_norm": 1.7248512506484985, "learning_rate": 4.6475000000000005e-05, "loss": 0.4675, "step": 9300 }, { "epoch": 0.5208310001119947, "grad_norm": 1.4207942485809326, "learning_rate": 4.648e-05, "loss": 0.5119, "step": 9301 }, { "epoch": 0.5208869974241236, "grad_norm": 1.1348084211349487, "learning_rate": 4.6485e-05, "loss": 0.3737, "step": 9302 }, { "epoch": 0.5209429947362526, "grad_norm": 1.1250228881835938, "learning_rate": 4.649e-05, "loss": 0.4405, "step": 9303 }, { "epoch": 0.5209989920483816, "grad_norm": 1.207688808441162, "learning_rate": 4.6495e-05, "loss": 0.3961, "step": 9304 }, { "epoch": 0.5210549893605106, "grad_norm": 1.2484468221664429, "learning_rate": 4.6500000000000005e-05, "loss": 0.5452, "step": 9305 }, { "epoch": 0.5211109866726397, "grad_norm": 1.426182508468628, "learning_rate": 4.6505e-05, "loss": 0.4543, "step": 9306 }, { "epoch": 0.5211669839847687, "grad_norm": 1.3050537109375, "learning_rate": 4.651e-05, "loss": 0.3999, "step": 9307 }, { "epoch": 0.5212229812968977, "grad_norm": 1.513106346130371, "learning_rate": 4.6515000000000004e-05, "loss": 0.5753, "step": 9308 }, { "epoch": 0.5212789786090267, "grad_norm": 1.4483026266098022, "learning_rate": 4.652e-05, "loss": 0.4234, "step": 9309 }, { "epoch": 0.5213349759211557, "grad_norm": 1.6815348863601685, "learning_rate": 4.6525e-05, "loss": 0.5076, "step": 9310 }, { "epoch": 0.5213909732332848, "grad_norm": 3.2403481006622314, "learning_rate": 4.6530000000000003e-05, "loss": 0.4415, "step": 9311 }, { "epoch": 0.5214469705454138, "grad_norm": 0.9420170187950134, "learning_rate": 4.6535e-05, "loss": 0.4042, "step": 9312 }, { "epoch": 0.5215029678575428, "grad_norm": 1.3481045961380005, "learning_rate": 4.654e-05, "loss": 0.513, "step": 9313 }, { "epoch": 0.5215589651696718, "grad_norm": 1.4042633771896362, "learning_rate": 4.6545e-05, "loss": 0.4236, "step": 9314 }, { "epoch": 0.5216149624818008, "grad_norm": 1.3333860635757446, "learning_rate": 4.655000000000001e-05, "loss": 0.3404, "step": 9315 }, { "epoch": 0.5216709597939299, "grad_norm": 1.5369305610656738, "learning_rate": 4.6555000000000004e-05, "loss": 0.5177, "step": 9316 }, { "epoch": 0.5217269571060589, "grad_norm": 1.1305073499679565, "learning_rate": 4.656e-05, "loss": 0.3775, "step": 9317 }, { "epoch": 0.5217829544181879, "grad_norm": 1.3339060544967651, "learning_rate": 4.6565000000000006e-05, "loss": 0.4506, "step": 9318 }, { "epoch": 0.5218389517303169, "grad_norm": 1.486646294593811, "learning_rate": 4.657e-05, "loss": 0.4526, "step": 9319 }, { "epoch": 0.5218949490424459, "grad_norm": 2.0488927364349365, "learning_rate": 4.6575e-05, "loss": 0.3688, "step": 9320 }, { "epoch": 0.521950946354575, "grad_norm": 1.4488757848739624, "learning_rate": 4.6580000000000005e-05, "loss": 0.5925, "step": 9321 }, { "epoch": 0.522006943666704, "grad_norm": 1.2774953842163086, "learning_rate": 4.6585e-05, "loss": 0.4723, "step": 9322 }, { "epoch": 0.522062940978833, "grad_norm": 1.2316725254058838, "learning_rate": 4.659e-05, "loss": 0.4808, "step": 9323 }, { "epoch": 0.522118938290962, "grad_norm": 1.5660383701324463, "learning_rate": 4.6595e-05, "loss": 0.5014, "step": 9324 }, { "epoch": 0.522174935603091, "grad_norm": 1.5444111824035645, "learning_rate": 4.660000000000001e-05, "loss": 0.5312, "step": 9325 }, { "epoch": 0.52223093291522, "grad_norm": 1.4226216077804565, "learning_rate": 4.6605000000000005e-05, "loss": 0.5107, "step": 9326 }, { "epoch": 0.5222869302273491, "grad_norm": 1.1815792322158813, "learning_rate": 4.661e-05, "loss": 0.349, "step": 9327 }, { "epoch": 0.5223429275394781, "grad_norm": 1.321407437324524, "learning_rate": 4.6615e-05, "loss": 0.5086, "step": 9328 }, { "epoch": 0.5223989248516071, "grad_norm": 1.2084922790527344, "learning_rate": 4.6620000000000004e-05, "loss": 0.3924, "step": 9329 }, { "epoch": 0.5224549221637361, "grad_norm": 1.177322506904602, "learning_rate": 4.6625e-05, "loss": 0.3934, "step": 9330 }, { "epoch": 0.5225109194758651, "grad_norm": 1.176064133644104, "learning_rate": 4.663e-05, "loss": 0.4001, "step": 9331 }, { "epoch": 0.5225669167879942, "grad_norm": 1.2922840118408203, "learning_rate": 4.6635e-05, "loss": 0.4604, "step": 9332 }, { "epoch": 0.5226229141001232, "grad_norm": 1.0318429470062256, "learning_rate": 4.664e-05, "loss": 0.415, "step": 9333 }, { "epoch": 0.5226789114122522, "grad_norm": 1.2632776498794556, "learning_rate": 4.6645e-05, "loss": 0.4401, "step": 9334 }, { "epoch": 0.5227349087243812, "grad_norm": 1.361064076423645, "learning_rate": 4.665e-05, "loss": 0.4092, "step": 9335 }, { "epoch": 0.5227909060365102, "grad_norm": 1.4519658088684082, "learning_rate": 4.6655000000000006e-05, "loss": 0.6166, "step": 9336 }, { "epoch": 0.5228469033486393, "grad_norm": 1.2925876379013062, "learning_rate": 4.6660000000000004e-05, "loss": 0.4137, "step": 9337 }, { "epoch": 0.5229029006607683, "grad_norm": 1.1427985429763794, "learning_rate": 4.6665e-05, "loss": 0.3611, "step": 9338 }, { "epoch": 0.5229588979728973, "grad_norm": 1.4485900402069092, "learning_rate": 4.6670000000000005e-05, "loss": 0.3126, "step": 9339 }, { "epoch": 0.5230148952850263, "grad_norm": 1.188767671585083, "learning_rate": 4.6675e-05, "loss": 0.3825, "step": 9340 }, { "epoch": 0.5230708925971553, "grad_norm": 1.5355230569839478, "learning_rate": 4.668e-05, "loss": 0.3954, "step": 9341 }, { "epoch": 0.5231268899092844, "grad_norm": 1.3338741064071655, "learning_rate": 4.6685e-05, "loss": 0.4825, "step": 9342 }, { "epoch": 0.5231828872214134, "grad_norm": 1.626522183418274, "learning_rate": 4.669e-05, "loss": 0.5366, "step": 9343 }, { "epoch": 0.5232388845335424, "grad_norm": 1.5628392696380615, "learning_rate": 4.6695e-05, "loss": 0.5842, "step": 9344 }, { "epoch": 0.5232948818456714, "grad_norm": 1.1358919143676758, "learning_rate": 4.6700000000000003e-05, "loss": 0.4047, "step": 9345 }, { "epoch": 0.5233508791578004, "grad_norm": 1.2265276908874512, "learning_rate": 4.670500000000001e-05, "loss": 0.4204, "step": 9346 }, { "epoch": 0.5234068764699294, "grad_norm": 1.4317282438278198, "learning_rate": 4.6710000000000005e-05, "loss": 0.4173, "step": 9347 }, { "epoch": 0.5234628737820585, "grad_norm": 1.3782039880752563, "learning_rate": 4.6715e-05, "loss": 0.3343, "step": 9348 }, { "epoch": 0.5235188710941875, "grad_norm": 1.3642542362213135, "learning_rate": 4.672e-05, "loss": 0.4608, "step": 9349 }, { "epoch": 0.5235748684063165, "grad_norm": 1.1251015663146973, "learning_rate": 4.6725000000000004e-05, "loss": 0.3629, "step": 9350 }, { "epoch": 0.5236308657184455, "grad_norm": 1.2117772102355957, "learning_rate": 4.673e-05, "loss": 0.4681, "step": 9351 }, { "epoch": 0.5236868630305745, "grad_norm": 1.146168828010559, "learning_rate": 4.6735e-05, "loss": 0.4403, "step": 9352 }, { "epoch": 0.5237428603427036, "grad_norm": 1.1908000707626343, "learning_rate": 4.674e-05, "loss": 0.3886, "step": 9353 }, { "epoch": 0.5237988576548326, "grad_norm": 1.557708740234375, "learning_rate": 4.6745e-05, "loss": 0.513, "step": 9354 }, { "epoch": 0.5238548549669616, "grad_norm": 1.2665703296661377, "learning_rate": 4.6750000000000005e-05, "loss": 0.4626, "step": 9355 }, { "epoch": 0.5239108522790906, "grad_norm": 1.1932997703552246, "learning_rate": 4.6755e-05, "loss": 0.4521, "step": 9356 }, { "epoch": 0.5239668495912196, "grad_norm": 1.125240445137024, "learning_rate": 4.6760000000000006e-05, "loss": 0.3721, "step": 9357 }, { "epoch": 0.5240228469033487, "grad_norm": 1.2674050331115723, "learning_rate": 4.6765000000000004e-05, "loss": 0.4335, "step": 9358 }, { "epoch": 0.5240788442154777, "grad_norm": 1.3403898477554321, "learning_rate": 4.677e-05, "loss": 0.4734, "step": 9359 }, { "epoch": 0.5241348415276067, "grad_norm": 1.3714861869812012, "learning_rate": 4.6775000000000005e-05, "loss": 0.424, "step": 9360 }, { "epoch": 0.5241908388397357, "grad_norm": 1.10027277469635, "learning_rate": 4.678e-05, "loss": 0.3875, "step": 9361 }, { "epoch": 0.5242468361518647, "grad_norm": 1.2574408054351807, "learning_rate": 4.6785e-05, "loss": 0.6071, "step": 9362 }, { "epoch": 0.5243028334639938, "grad_norm": 1.5307801961898804, "learning_rate": 4.679e-05, "loss": 0.4204, "step": 9363 }, { "epoch": 0.5243588307761228, "grad_norm": 1.489932656288147, "learning_rate": 4.6795e-05, "loss": 0.5373, "step": 9364 }, { "epoch": 0.5244148280882518, "grad_norm": 1.3600836992263794, "learning_rate": 4.6800000000000006e-05, "loss": 0.4462, "step": 9365 }, { "epoch": 0.5244708254003808, "grad_norm": 1.307382345199585, "learning_rate": 4.6805e-05, "loss": 0.3492, "step": 9366 }, { "epoch": 0.5245268227125098, "grad_norm": 1.6123884916305542, "learning_rate": 4.681e-05, "loss": 0.5212, "step": 9367 }, { "epoch": 0.5245828200246389, "grad_norm": 1.2430503368377686, "learning_rate": 4.6815000000000005e-05, "loss": 0.4878, "step": 9368 }, { "epoch": 0.5246388173367679, "grad_norm": 1.4058754444122314, "learning_rate": 4.682e-05, "loss": 0.4161, "step": 9369 }, { "epoch": 0.5246948146488969, "grad_norm": 1.9148458242416382, "learning_rate": 4.6825e-05, "loss": 0.6418, "step": 9370 }, { "epoch": 0.5247508119610259, "grad_norm": 1.1850706338882446, "learning_rate": 4.6830000000000004e-05, "loss": 0.4079, "step": 9371 }, { "epoch": 0.5248068092731549, "grad_norm": 1.6921254396438599, "learning_rate": 4.6835e-05, "loss": 0.4794, "step": 9372 }, { "epoch": 0.524862806585284, "grad_norm": 1.3645633459091187, "learning_rate": 4.684e-05, "loss": 0.3625, "step": 9373 }, { "epoch": 0.524918803897413, "grad_norm": 1.3538650274276733, "learning_rate": 4.6845e-05, "loss": 0.4021, "step": 9374 }, { "epoch": 0.524974801209542, "grad_norm": 1.3050851821899414, "learning_rate": 4.685000000000001e-05, "loss": 0.5553, "step": 9375 }, { "epoch": 0.525030798521671, "grad_norm": 1.1158578395843506, "learning_rate": 4.6855000000000005e-05, "loss": 0.3695, "step": 9376 }, { "epoch": 0.5250867958338, "grad_norm": 1.3095901012420654, "learning_rate": 4.686e-05, "loss": 0.5141, "step": 9377 }, { "epoch": 0.525142793145929, "grad_norm": 1.19870126247406, "learning_rate": 4.6865000000000006e-05, "loss": 0.3386, "step": 9378 }, { "epoch": 0.5251987904580581, "grad_norm": 1.4308778047561646, "learning_rate": 4.6870000000000004e-05, "loss": 0.4569, "step": 9379 }, { "epoch": 0.5252547877701871, "grad_norm": 1.3367048501968384, "learning_rate": 4.6875e-05, "loss": 0.4415, "step": 9380 }, { "epoch": 0.5253107850823161, "grad_norm": 1.186528205871582, "learning_rate": 4.688e-05, "loss": 0.5143, "step": 9381 }, { "epoch": 0.5253667823944451, "grad_norm": 1.1099333763122559, "learning_rate": 4.6885e-05, "loss": 0.3806, "step": 9382 }, { "epoch": 0.5254227797065741, "grad_norm": 1.295485019683838, "learning_rate": 4.689e-05, "loss": 0.4786, "step": 9383 }, { "epoch": 0.525478777018703, "grad_norm": 1.3310902118682861, "learning_rate": 4.6895e-05, "loss": 0.4158, "step": 9384 }, { "epoch": 0.5255347743308321, "grad_norm": 1.4873652458190918, "learning_rate": 4.69e-05, "loss": 0.4764, "step": 9385 }, { "epoch": 0.5255907716429611, "grad_norm": 19.411251068115234, "learning_rate": 4.6905000000000006e-05, "loss": 0.5014, "step": 9386 }, { "epoch": 0.5256467689550901, "grad_norm": 1.3132100105285645, "learning_rate": 4.691e-05, "loss": 0.6241, "step": 9387 }, { "epoch": 0.5257027662672191, "grad_norm": 1.4633852243423462, "learning_rate": 4.6915e-05, "loss": 0.4219, "step": 9388 }, { "epoch": 0.5257587635793481, "grad_norm": 1.215549111366272, "learning_rate": 4.6920000000000005e-05, "loss": 0.3689, "step": 9389 }, { "epoch": 0.5258147608914772, "grad_norm": 1.663164496421814, "learning_rate": 4.6925e-05, "loss": 0.5895, "step": 9390 }, { "epoch": 0.5258707582036062, "grad_norm": 1.1354742050170898, "learning_rate": 4.693e-05, "loss": 0.4579, "step": 9391 }, { "epoch": 0.5259267555157352, "grad_norm": 1.1233223676681519, "learning_rate": 4.6935000000000004e-05, "loss": 0.5082, "step": 9392 }, { "epoch": 0.5259827528278642, "grad_norm": 1.3693325519561768, "learning_rate": 4.694e-05, "loss": 0.4704, "step": 9393 }, { "epoch": 0.5260387501399932, "grad_norm": 1.2200404405593872, "learning_rate": 4.6945e-05, "loss": 0.4775, "step": 9394 }, { "epoch": 0.5260947474521223, "grad_norm": 1.23910653591156, "learning_rate": 4.695e-05, "loss": 0.4767, "step": 9395 }, { "epoch": 0.5261507447642513, "grad_norm": 1.3311809301376343, "learning_rate": 4.695500000000001e-05, "loss": 0.2818, "step": 9396 }, { "epoch": 0.5262067420763803, "grad_norm": 1.1943596601486206, "learning_rate": 4.6960000000000004e-05, "loss": 0.4084, "step": 9397 }, { "epoch": 0.5262627393885093, "grad_norm": 1.1603953838348389, "learning_rate": 4.6965e-05, "loss": 0.3852, "step": 9398 }, { "epoch": 0.5263187367006383, "grad_norm": 1.0466657876968384, "learning_rate": 4.6970000000000006e-05, "loss": 0.4014, "step": 9399 }, { "epoch": 0.5263747340127674, "grad_norm": 1.563252568244934, "learning_rate": 4.6975000000000003e-05, "loss": 0.4187, "step": 9400 }, { "epoch": 0.5264307313248964, "grad_norm": 1.361717700958252, "learning_rate": 4.698e-05, "loss": 0.454, "step": 9401 }, { "epoch": 0.5264867286370254, "grad_norm": 1.0306333303451538, "learning_rate": 4.6985e-05, "loss": 0.3177, "step": 9402 }, { "epoch": 0.5265427259491544, "grad_norm": 1.068896770477295, "learning_rate": 4.699e-05, "loss": 0.3457, "step": 9403 }, { "epoch": 0.5265987232612834, "grad_norm": 1.3894723653793335, "learning_rate": 4.6995e-05, "loss": 0.5277, "step": 9404 }, { "epoch": 0.5266547205734124, "grad_norm": 1.188368320465088, "learning_rate": 4.7e-05, "loss": 0.3323, "step": 9405 }, { "epoch": 0.5267107178855415, "grad_norm": 1.2241084575653076, "learning_rate": 4.7005e-05, "loss": 0.4512, "step": 9406 }, { "epoch": 0.5267667151976705, "grad_norm": 1.2160418033599854, "learning_rate": 4.7010000000000006e-05, "loss": 0.4451, "step": 9407 }, { "epoch": 0.5268227125097995, "grad_norm": 1.332726001739502, "learning_rate": 4.7015e-05, "loss": 0.4397, "step": 9408 }, { "epoch": 0.5268787098219285, "grad_norm": 1.3292813301086426, "learning_rate": 4.702e-05, "loss": 0.5394, "step": 9409 }, { "epoch": 0.5269347071340575, "grad_norm": 1.5254113674163818, "learning_rate": 4.7025000000000005e-05, "loss": 0.4445, "step": 9410 }, { "epoch": 0.5269907044461866, "grad_norm": 1.443175196647644, "learning_rate": 4.703e-05, "loss": 0.4907, "step": 9411 }, { "epoch": 0.5270467017583156, "grad_norm": 1.3154882192611694, "learning_rate": 4.7035e-05, "loss": 0.3779, "step": 9412 }, { "epoch": 0.5271026990704446, "grad_norm": 1.2727384567260742, "learning_rate": 4.7040000000000004e-05, "loss": 0.4134, "step": 9413 }, { "epoch": 0.5271586963825736, "grad_norm": 1.4846564531326294, "learning_rate": 4.7045e-05, "loss": 0.4969, "step": 9414 }, { "epoch": 0.5272146936947026, "grad_norm": 1.115434169769287, "learning_rate": 4.705e-05, "loss": 0.2774, "step": 9415 }, { "epoch": 0.5272706910068317, "grad_norm": 1.1634632349014282, "learning_rate": 4.7055e-05, "loss": 0.3853, "step": 9416 }, { "epoch": 0.5273266883189607, "grad_norm": 1.208876132965088, "learning_rate": 4.706000000000001e-05, "loss": 0.5363, "step": 9417 }, { "epoch": 0.5273826856310897, "grad_norm": 1.4385795593261719, "learning_rate": 4.7065000000000004e-05, "loss": 0.4161, "step": 9418 }, { "epoch": 0.5274386829432187, "grad_norm": 1.210424542427063, "learning_rate": 4.707e-05, "loss": 0.4713, "step": 9419 }, { "epoch": 0.5274946802553477, "grad_norm": 1.1139848232269287, "learning_rate": 4.7075e-05, "loss": 0.451, "step": 9420 }, { "epoch": 0.5275506775674768, "grad_norm": 1.3247337341308594, "learning_rate": 4.708e-05, "loss": 0.4168, "step": 9421 }, { "epoch": 0.5276066748796058, "grad_norm": 1.511054515838623, "learning_rate": 4.7085e-05, "loss": 0.4623, "step": 9422 }, { "epoch": 0.5276626721917348, "grad_norm": 2.5482265949249268, "learning_rate": 4.709e-05, "loss": 0.5358, "step": 9423 }, { "epoch": 0.5277186695038638, "grad_norm": 1.2414195537567139, "learning_rate": 4.7095e-05, "loss": 0.3446, "step": 9424 }, { "epoch": 0.5277746668159928, "grad_norm": 1.1679651737213135, "learning_rate": 4.71e-05, "loss": 0.4133, "step": 9425 }, { "epoch": 0.5278306641281219, "grad_norm": 1.3512089252471924, "learning_rate": 4.7105000000000004e-05, "loss": 0.5281, "step": 9426 }, { "epoch": 0.5278866614402509, "grad_norm": 1.1578283309936523, "learning_rate": 4.711e-05, "loss": 0.3743, "step": 9427 }, { "epoch": 0.5279426587523799, "grad_norm": 1.0728938579559326, "learning_rate": 4.7115000000000005e-05, "loss": 0.3666, "step": 9428 }, { "epoch": 0.5279986560645089, "grad_norm": 1.27793288230896, "learning_rate": 4.712e-05, "loss": 0.4113, "step": 9429 }, { "epoch": 0.5280546533766379, "grad_norm": 1.2008014917373657, "learning_rate": 4.7125e-05, "loss": 0.458, "step": 9430 }, { "epoch": 0.528110650688767, "grad_norm": 1.2136887311935425, "learning_rate": 4.7130000000000004e-05, "loss": 0.382, "step": 9431 }, { "epoch": 0.528166648000896, "grad_norm": 1.2503955364227295, "learning_rate": 4.7135e-05, "loss": 0.4685, "step": 9432 }, { "epoch": 0.528222645313025, "grad_norm": 1.1596254110336304, "learning_rate": 4.714e-05, "loss": 0.4247, "step": 9433 }, { "epoch": 0.528278642625154, "grad_norm": 1.1022257804870605, "learning_rate": 4.7145000000000003e-05, "loss": 0.379, "step": 9434 }, { "epoch": 0.528334639937283, "grad_norm": 1.4563370943069458, "learning_rate": 4.715e-05, "loss": 0.4772, "step": 9435 }, { "epoch": 0.528390637249412, "grad_norm": 1.2413662672042847, "learning_rate": 4.7155000000000005e-05, "loss": 0.3936, "step": 9436 }, { "epoch": 0.5284466345615411, "grad_norm": 1.4025589227676392, "learning_rate": 4.716e-05, "loss": 0.4219, "step": 9437 }, { "epoch": 0.5285026318736701, "grad_norm": 1.4004168510437012, "learning_rate": 4.716500000000001e-05, "loss": 0.4298, "step": 9438 }, { "epoch": 0.5285586291857991, "grad_norm": 1.2061214447021484, "learning_rate": 4.7170000000000004e-05, "loss": 0.4344, "step": 9439 }, { "epoch": 0.5286146264979281, "grad_norm": 1.204013705253601, "learning_rate": 4.7175e-05, "loss": 0.389, "step": 9440 }, { "epoch": 0.5286706238100571, "grad_norm": 1.3896828889846802, "learning_rate": 4.718e-05, "loss": 0.4706, "step": 9441 }, { "epoch": 0.5287266211221862, "grad_norm": 1.1460342407226562, "learning_rate": 4.7185e-05, "loss": 0.5139, "step": 9442 }, { "epoch": 0.5287826184343152, "grad_norm": 1.0229713916778564, "learning_rate": 4.719e-05, "loss": 0.353, "step": 9443 }, { "epoch": 0.5288386157464442, "grad_norm": 1.3112131357192993, "learning_rate": 4.7195e-05, "loss": 0.6277, "step": 9444 }, { "epoch": 0.5288946130585732, "grad_norm": 1.111806869506836, "learning_rate": 4.72e-05, "loss": 0.3495, "step": 9445 }, { "epoch": 0.5289506103707022, "grad_norm": 1.380745768547058, "learning_rate": 4.7205000000000006e-05, "loss": 0.443, "step": 9446 }, { "epoch": 0.5290066076828313, "grad_norm": 1.5349440574645996, "learning_rate": 4.7210000000000004e-05, "loss": 0.7024, "step": 9447 }, { "epoch": 0.5290626049949603, "grad_norm": 1.9456593990325928, "learning_rate": 4.7215e-05, "loss": 0.6035, "step": 9448 }, { "epoch": 0.5291186023070893, "grad_norm": 1.6643338203430176, "learning_rate": 4.7220000000000005e-05, "loss": 0.6041, "step": 9449 }, { "epoch": 0.5291745996192183, "grad_norm": 1.0799925327301025, "learning_rate": 4.7225e-05, "loss": 0.4175, "step": 9450 }, { "epoch": 0.5292305969313473, "grad_norm": 1.3846999406814575, "learning_rate": 4.723e-05, "loss": 0.4605, "step": 9451 }, { "epoch": 0.5292865942434763, "grad_norm": 30.342243194580078, "learning_rate": 4.7235000000000004e-05, "loss": 0.4169, "step": 9452 }, { "epoch": 0.5293425915556054, "grad_norm": 1.0062071084976196, "learning_rate": 4.724e-05, "loss": 0.286, "step": 9453 }, { "epoch": 0.5293985888677344, "grad_norm": 1.2299600839614868, "learning_rate": 4.7245e-05, "loss": 0.6064, "step": 9454 }, { "epoch": 0.5294545861798634, "grad_norm": 1.2189158201217651, "learning_rate": 4.7249999999999997e-05, "loss": 0.4084, "step": 9455 }, { "epoch": 0.5295105834919924, "grad_norm": 1.4640798568725586, "learning_rate": 4.725500000000001e-05, "loss": 0.4591, "step": 9456 }, { "epoch": 0.5295665808041214, "grad_norm": 1.0949474573135376, "learning_rate": 4.7260000000000005e-05, "loss": 0.3065, "step": 9457 }, { "epoch": 0.5296225781162505, "grad_norm": 1.0800518989562988, "learning_rate": 4.7265e-05, "loss": 0.3289, "step": 9458 }, { "epoch": 0.5296785754283795, "grad_norm": 1.3044289350509644, "learning_rate": 4.7270000000000007e-05, "loss": 0.4441, "step": 9459 }, { "epoch": 0.5297345727405085, "grad_norm": 1.2596040964126587, "learning_rate": 4.7275000000000004e-05, "loss": 0.5076, "step": 9460 }, { "epoch": 0.5297905700526375, "grad_norm": 1.1207771301269531, "learning_rate": 4.728e-05, "loss": 0.4076, "step": 9461 }, { "epoch": 0.5298465673647665, "grad_norm": 1.3882770538330078, "learning_rate": 4.7285e-05, "loss": 0.4243, "step": 9462 }, { "epoch": 0.5299025646768956, "grad_norm": 2.043513298034668, "learning_rate": 4.729e-05, "loss": 0.5781, "step": 9463 }, { "epoch": 0.5299585619890246, "grad_norm": 1.4725967645645142, "learning_rate": 4.7295e-05, "loss": 0.6341, "step": 9464 }, { "epoch": 0.5300145593011536, "grad_norm": 1.2623803615570068, "learning_rate": 4.73e-05, "loss": 0.4125, "step": 9465 }, { "epoch": 0.5300705566132826, "grad_norm": 1.5083774328231812, "learning_rate": 4.7305e-05, "loss": 0.5131, "step": 9466 }, { "epoch": 0.5301265539254115, "grad_norm": 1.3852721452713013, "learning_rate": 4.7310000000000006e-05, "loss": 0.4347, "step": 9467 }, { "epoch": 0.5301825512375405, "grad_norm": 1.2226790189743042, "learning_rate": 4.7315000000000004e-05, "loss": 0.4741, "step": 9468 }, { "epoch": 0.5302385485496696, "grad_norm": 1.1415860652923584, "learning_rate": 4.732e-05, "loss": 0.4221, "step": 9469 }, { "epoch": 0.5302945458617986, "grad_norm": 1.0505965948104858, "learning_rate": 4.7325000000000005e-05, "loss": 0.3267, "step": 9470 }, { "epoch": 0.5303505431739276, "grad_norm": 1.618748664855957, "learning_rate": 4.733e-05, "loss": 0.4949, "step": 9471 }, { "epoch": 0.5304065404860566, "grad_norm": 1.3280977010726929, "learning_rate": 4.7335e-05, "loss": 0.3981, "step": 9472 }, { "epoch": 0.5304625377981856, "grad_norm": 1.3729336261749268, "learning_rate": 4.7340000000000004e-05, "loss": 0.4779, "step": 9473 }, { "epoch": 0.5305185351103147, "grad_norm": 1.0747077465057373, "learning_rate": 4.7345e-05, "loss": 0.4052, "step": 9474 }, { "epoch": 0.5305745324224437, "grad_norm": 1.391895055770874, "learning_rate": 4.735e-05, "loss": 0.4142, "step": 9475 }, { "epoch": 0.5306305297345727, "grad_norm": 1.2232345342636108, "learning_rate": 4.7355e-05, "loss": 0.382, "step": 9476 }, { "epoch": 0.5306865270467017, "grad_norm": 1.3728983402252197, "learning_rate": 4.736000000000001e-05, "loss": 0.6502, "step": 9477 }, { "epoch": 0.5307425243588307, "grad_norm": 1.1662178039550781, "learning_rate": 4.7365000000000005e-05, "loss": 0.4822, "step": 9478 }, { "epoch": 0.5307985216709598, "grad_norm": 1.171358346939087, "learning_rate": 4.737e-05, "loss": 0.4309, "step": 9479 }, { "epoch": 0.5308545189830888, "grad_norm": 5.396470069885254, "learning_rate": 4.7375e-05, "loss": 0.5371, "step": 9480 }, { "epoch": 0.5309105162952178, "grad_norm": 1.3976140022277832, "learning_rate": 4.7380000000000004e-05, "loss": 0.3117, "step": 9481 }, { "epoch": 0.5309665136073468, "grad_norm": 1.4935336112976074, "learning_rate": 4.7385e-05, "loss": 0.4498, "step": 9482 }, { "epoch": 0.5310225109194758, "grad_norm": 1.3390616178512573, "learning_rate": 4.739e-05, "loss": 0.4706, "step": 9483 }, { "epoch": 0.5310785082316049, "grad_norm": 1.7145739793777466, "learning_rate": 4.7395e-05, "loss": 0.4191, "step": 9484 }, { "epoch": 0.5311345055437339, "grad_norm": 1.1888912916183472, "learning_rate": 4.74e-05, "loss": 0.3363, "step": 9485 }, { "epoch": 0.5311905028558629, "grad_norm": 1.2296078205108643, "learning_rate": 4.7405000000000004e-05, "loss": 0.3789, "step": 9486 }, { "epoch": 0.5312465001679919, "grad_norm": 1.4090124368667603, "learning_rate": 4.741e-05, "loss": 0.5327, "step": 9487 }, { "epoch": 0.5313024974801209, "grad_norm": 1.6135203838348389, "learning_rate": 4.7415000000000006e-05, "loss": 0.5505, "step": 9488 }, { "epoch": 0.53135849479225, "grad_norm": 1.1662812232971191, "learning_rate": 4.742e-05, "loss": 0.3032, "step": 9489 }, { "epoch": 0.531414492104379, "grad_norm": 1.2016109228134155, "learning_rate": 4.7425e-05, "loss": 0.3916, "step": 9490 }, { "epoch": 0.531470489416508, "grad_norm": 1.1974090337753296, "learning_rate": 4.7430000000000005e-05, "loss": 0.4503, "step": 9491 }, { "epoch": 0.531526486728637, "grad_norm": 1.194698691368103, "learning_rate": 4.7435e-05, "loss": 0.3947, "step": 9492 }, { "epoch": 0.531582484040766, "grad_norm": 1.080213189125061, "learning_rate": 4.744e-05, "loss": 0.4138, "step": 9493 }, { "epoch": 0.531638481352895, "grad_norm": 1.5487487316131592, "learning_rate": 4.7445e-05, "loss": 0.398, "step": 9494 }, { "epoch": 0.5316944786650241, "grad_norm": 1.090031623840332, "learning_rate": 4.745e-05, "loss": 0.3756, "step": 9495 }, { "epoch": 0.5317504759771531, "grad_norm": 1.1094419956207275, "learning_rate": 4.7455000000000006e-05, "loss": 0.4849, "step": 9496 }, { "epoch": 0.5318064732892821, "grad_norm": 1.4495092630386353, "learning_rate": 4.746e-05, "loss": 0.3952, "step": 9497 }, { "epoch": 0.5318624706014111, "grad_norm": 1.535077452659607, "learning_rate": 4.746500000000001e-05, "loss": 0.429, "step": 9498 }, { "epoch": 0.5319184679135401, "grad_norm": 1.8276748657226562, "learning_rate": 4.7470000000000005e-05, "loss": 0.4638, "step": 9499 }, { "epoch": 0.5319744652256692, "grad_norm": 1.2453430891036987, "learning_rate": 4.7475e-05, "loss": 0.3779, "step": 9500 }, { "epoch": 0.5320304625377982, "grad_norm": 1.2442799806594849, "learning_rate": 4.748e-05, "loss": 0.6226, "step": 9501 }, { "epoch": 0.5320864598499272, "grad_norm": 1.3208845853805542, "learning_rate": 4.7485000000000004e-05, "loss": 0.3726, "step": 9502 }, { "epoch": 0.5321424571620562, "grad_norm": 1.1866168975830078, "learning_rate": 4.749e-05, "loss": 0.3711, "step": 9503 }, { "epoch": 0.5321984544741852, "grad_norm": 1.3956489562988281, "learning_rate": 4.7495e-05, "loss": 0.5071, "step": 9504 }, { "epoch": 0.5322544517863143, "grad_norm": 1.4432622194290161, "learning_rate": 4.75e-05, "loss": 0.5588, "step": 9505 }, { "epoch": 0.5323104490984433, "grad_norm": 1.4026999473571777, "learning_rate": 4.7505e-05, "loss": 0.4134, "step": 9506 }, { "epoch": 0.5323664464105723, "grad_norm": 1.4475175142288208, "learning_rate": 4.7510000000000004e-05, "loss": 0.4246, "step": 9507 }, { "epoch": 0.5324224437227013, "grad_norm": 1.3349000215530396, "learning_rate": 4.7515e-05, "loss": 0.2891, "step": 9508 }, { "epoch": 0.5324784410348303, "grad_norm": 1.7620033025741577, "learning_rate": 4.7520000000000006e-05, "loss": 0.4458, "step": 9509 }, { "epoch": 0.5325344383469593, "grad_norm": 1.14523184299469, "learning_rate": 4.7525e-05, "loss": 0.3744, "step": 9510 }, { "epoch": 0.5325904356590884, "grad_norm": 1.699660301208496, "learning_rate": 4.753e-05, "loss": 0.5277, "step": 9511 }, { "epoch": 0.5326464329712174, "grad_norm": 1.4605993032455444, "learning_rate": 4.7535000000000005e-05, "loss": 0.4717, "step": 9512 }, { "epoch": 0.5327024302833464, "grad_norm": 1.1820509433746338, "learning_rate": 4.754e-05, "loss": 0.4485, "step": 9513 }, { "epoch": 0.5327584275954754, "grad_norm": 1.1635390520095825, "learning_rate": 4.7545e-05, "loss": 0.3693, "step": 9514 }, { "epoch": 0.5328144249076044, "grad_norm": 1.3283125162124634, "learning_rate": 4.755e-05, "loss": 0.3971, "step": 9515 }, { "epoch": 0.5328704222197335, "grad_norm": 1.3234190940856934, "learning_rate": 4.7555e-05, "loss": 0.5261, "step": 9516 }, { "epoch": 0.5329264195318625, "grad_norm": 1.2321648597717285, "learning_rate": 4.7560000000000005e-05, "loss": 0.5013, "step": 9517 }, { "epoch": 0.5329824168439915, "grad_norm": 1.5043165683746338, "learning_rate": 4.7565e-05, "loss": 0.3885, "step": 9518 }, { "epoch": 0.5330384141561205, "grad_norm": 1.2735203504562378, "learning_rate": 4.757e-05, "loss": 0.5187, "step": 9519 }, { "epoch": 0.5330944114682495, "grad_norm": 1.0651447772979736, "learning_rate": 4.7575000000000004e-05, "loss": 0.3281, "step": 9520 }, { "epoch": 0.5331504087803786, "grad_norm": 1.2711256742477417, "learning_rate": 4.758e-05, "loss": 0.365, "step": 9521 }, { "epoch": 0.5332064060925076, "grad_norm": 1.3065346479415894, "learning_rate": 4.7585e-05, "loss": 0.5437, "step": 9522 }, { "epoch": 0.5332624034046366, "grad_norm": 1.5553252696990967, "learning_rate": 4.7590000000000003e-05, "loss": 0.5435, "step": 9523 }, { "epoch": 0.5333184007167656, "grad_norm": 1.0811947584152222, "learning_rate": 4.7595e-05, "loss": 0.3668, "step": 9524 }, { "epoch": 0.5333743980288946, "grad_norm": 1.2615207433700562, "learning_rate": 4.76e-05, "loss": 0.4445, "step": 9525 }, { "epoch": 0.5334303953410237, "grad_norm": 1.282523512840271, "learning_rate": 4.7605e-05, "loss": 0.3629, "step": 9526 }, { "epoch": 0.5334863926531527, "grad_norm": 1.3136628866195679, "learning_rate": 4.761000000000001e-05, "loss": 0.4487, "step": 9527 }, { "epoch": 0.5335423899652817, "grad_norm": 1.2010968923568726, "learning_rate": 4.7615000000000004e-05, "loss": 0.3197, "step": 9528 }, { "epoch": 0.5335983872774107, "grad_norm": 1.1536890268325806, "learning_rate": 4.762e-05, "loss": 0.4651, "step": 9529 }, { "epoch": 0.5336543845895397, "grad_norm": 1.1152513027191162, "learning_rate": 4.7625000000000006e-05, "loss": 0.4346, "step": 9530 }, { "epoch": 0.5337103819016688, "grad_norm": 1.399315595626831, "learning_rate": 4.763e-05, "loss": 0.4695, "step": 9531 }, { "epoch": 0.5337663792137978, "grad_norm": 1.2495238780975342, "learning_rate": 4.7635e-05, "loss": 0.352, "step": 9532 }, { "epoch": 0.5338223765259268, "grad_norm": 1.1976304054260254, "learning_rate": 4.7640000000000005e-05, "loss": 0.3945, "step": 9533 }, { "epoch": 0.5338783738380558, "grad_norm": 1.330531120300293, "learning_rate": 4.7645e-05, "loss": 0.4243, "step": 9534 }, { "epoch": 0.5339343711501848, "grad_norm": 1.3263925313949585, "learning_rate": 4.765e-05, "loss": 0.3887, "step": 9535 }, { "epoch": 0.5339903684623138, "grad_norm": 1.2777055501937866, "learning_rate": 4.7655e-05, "loss": 0.3793, "step": 9536 }, { "epoch": 0.5340463657744429, "grad_norm": 1.4256744384765625, "learning_rate": 4.766000000000001e-05, "loss": 0.5506, "step": 9537 }, { "epoch": 0.5341023630865719, "grad_norm": 1.3626021146774292, "learning_rate": 4.7665000000000005e-05, "loss": 0.4123, "step": 9538 }, { "epoch": 0.5341583603987009, "grad_norm": 1.2359644174575806, "learning_rate": 4.767e-05, "loss": 0.4575, "step": 9539 }, { "epoch": 0.5342143577108299, "grad_norm": 1.4144980907440186, "learning_rate": 4.7675e-05, "loss": 0.4501, "step": 9540 }, { "epoch": 0.5342703550229589, "grad_norm": 1.4422568082809448, "learning_rate": 4.7680000000000004e-05, "loss": 0.581, "step": 9541 }, { "epoch": 0.534326352335088, "grad_norm": Infinity, "learning_rate": 4.7680000000000004e-05, "loss": 0.4907, "step": 9542 }, { "epoch": 0.534382349647217, "grad_norm": 1.2412611246109009, "learning_rate": 4.7685e-05, "loss": 0.4355, "step": 9543 }, { "epoch": 0.534438346959346, "grad_norm": 1.3127073049545288, "learning_rate": 4.769e-05, "loss": 0.4543, "step": 9544 }, { "epoch": 0.534494344271475, "grad_norm": 1.9356169700622559, "learning_rate": 4.7695e-05, "loss": 0.5243, "step": 9545 }, { "epoch": 0.534550341583604, "grad_norm": 1.1254539489746094, "learning_rate": 4.77e-05, "loss": 0.4137, "step": 9546 }, { "epoch": 0.5346063388957331, "grad_norm": 1.1302624940872192, "learning_rate": 4.7705e-05, "loss": 0.4327, "step": 9547 }, { "epoch": 0.5346623362078621, "grad_norm": 1.411635160446167, "learning_rate": 4.771e-05, "loss": 0.5023, "step": 9548 }, { "epoch": 0.5347183335199911, "grad_norm": 1.284804344177246, "learning_rate": 4.7715000000000006e-05, "loss": 0.4215, "step": 9549 }, { "epoch": 0.53477433083212, "grad_norm": 1.2880300283432007, "learning_rate": 4.7720000000000004e-05, "loss": 0.4165, "step": 9550 }, { "epoch": 0.534830328144249, "grad_norm": 1.2266498804092407, "learning_rate": 4.7725e-05, "loss": 0.3902, "step": 9551 }, { "epoch": 0.534886325456378, "grad_norm": 1.2976826429367065, "learning_rate": 4.7730000000000005e-05, "loss": 0.4641, "step": 9552 }, { "epoch": 0.5349423227685071, "grad_norm": 1.2469403743743896, "learning_rate": 4.7735e-05, "loss": 0.3762, "step": 9553 }, { "epoch": 0.5349983200806361, "grad_norm": 1.1859091520309448, "learning_rate": 4.774e-05, "loss": 0.4301, "step": 9554 }, { "epoch": 0.5350543173927651, "grad_norm": 1.3102047443389893, "learning_rate": 4.7745e-05, "loss": 0.3257, "step": 9555 }, { "epoch": 0.5351103147048941, "grad_norm": 1.27182936668396, "learning_rate": 4.775e-05, "loss": 0.3905, "step": 9556 }, { "epoch": 0.5351663120170231, "grad_norm": 1.1418534517288208, "learning_rate": 4.7755e-05, "loss": 0.4676, "step": 9557 }, { "epoch": 0.5352223093291522, "grad_norm": 1.368029236793518, "learning_rate": 4.7760000000000004e-05, "loss": 0.4567, "step": 9558 }, { "epoch": 0.5352783066412812, "grad_norm": 1.408414602279663, "learning_rate": 4.7765e-05, "loss": 0.4692, "step": 9559 }, { "epoch": 0.5353343039534102, "grad_norm": 1.2978746891021729, "learning_rate": 4.7770000000000005e-05, "loss": 0.418, "step": 9560 }, { "epoch": 0.5353903012655392, "grad_norm": 1.1471731662750244, "learning_rate": 4.7775e-05, "loss": 0.4687, "step": 9561 }, { "epoch": 0.5354462985776682, "grad_norm": 1.217063307762146, "learning_rate": 4.778e-05, "loss": 0.3974, "step": 9562 }, { "epoch": 0.5355022958897973, "grad_norm": 1.1473416090011597, "learning_rate": 4.7785000000000004e-05, "loss": 0.4221, "step": 9563 }, { "epoch": 0.5355582932019263, "grad_norm": 1.3304030895233154, "learning_rate": 4.779e-05, "loss": 0.4207, "step": 9564 }, { "epoch": 0.5356142905140553, "grad_norm": 1.2993205785751343, "learning_rate": 4.7795e-05, "loss": 0.3556, "step": 9565 }, { "epoch": 0.5356702878261843, "grad_norm": 1.4037895202636719, "learning_rate": 4.78e-05, "loss": 0.456, "step": 9566 }, { "epoch": 0.5357262851383133, "grad_norm": 1.8493205308914185, "learning_rate": 4.7805e-05, "loss": 0.3586, "step": 9567 }, { "epoch": 0.5357822824504423, "grad_norm": 1.2599310874938965, "learning_rate": 4.7810000000000005e-05, "loss": 0.4284, "step": 9568 }, { "epoch": 0.5358382797625714, "grad_norm": 1.3864649534225464, "learning_rate": 4.7815e-05, "loss": 0.4289, "step": 9569 }, { "epoch": 0.5358942770747004, "grad_norm": 1.3962305784225464, "learning_rate": 4.7820000000000006e-05, "loss": 0.4402, "step": 9570 }, { "epoch": 0.5359502743868294, "grad_norm": 1.3587095737457275, "learning_rate": 4.7825000000000004e-05, "loss": 0.6073, "step": 9571 }, { "epoch": 0.5360062716989584, "grad_norm": 1.2020891904830933, "learning_rate": 4.783e-05, "loss": 0.3073, "step": 9572 }, { "epoch": 0.5360622690110874, "grad_norm": 1.3456652164459229, "learning_rate": 4.7835000000000005e-05, "loss": 0.5029, "step": 9573 }, { "epoch": 0.5361182663232165, "grad_norm": 1.320943832397461, "learning_rate": 4.784e-05, "loss": 0.5947, "step": 9574 }, { "epoch": 0.5361742636353455, "grad_norm": 1.4631785154342651, "learning_rate": 4.7845e-05, "loss": 0.388, "step": 9575 }, { "epoch": 0.5362302609474745, "grad_norm": 1.2012794017791748, "learning_rate": 4.785e-05, "loss": 0.4939, "step": 9576 }, { "epoch": 0.5362862582596035, "grad_norm": 1.5282450914382935, "learning_rate": 4.7855e-05, "loss": 0.4669, "step": 9577 }, { "epoch": 0.5363422555717325, "grad_norm": 1.3773553371429443, "learning_rate": 4.7860000000000006e-05, "loss": 0.5134, "step": 9578 }, { "epoch": 0.5363982528838616, "grad_norm": 1.3201590776443481, "learning_rate": 4.7865e-05, "loss": 0.3516, "step": 9579 }, { "epoch": 0.5364542501959906, "grad_norm": 1.338982343673706, "learning_rate": 4.787e-05, "loss": 0.504, "step": 9580 }, { "epoch": 0.5365102475081196, "grad_norm": 0.941386342048645, "learning_rate": 4.7875000000000005e-05, "loss": 0.2867, "step": 9581 }, { "epoch": 0.5365662448202486, "grad_norm": 1.6042498350143433, "learning_rate": 4.788e-05, "loss": 0.4469, "step": 9582 }, { "epoch": 0.5366222421323776, "grad_norm": 1.460040807723999, "learning_rate": 4.7885e-05, "loss": 0.4622, "step": 9583 }, { "epoch": 0.5366782394445067, "grad_norm": 1.2942413091659546, "learning_rate": 4.7890000000000004e-05, "loss": 0.3773, "step": 9584 }, { "epoch": 0.5367342367566357, "grad_norm": 1.3352640867233276, "learning_rate": 4.7895e-05, "loss": 0.4208, "step": 9585 }, { "epoch": 0.5367902340687647, "grad_norm": 1.1913381814956665, "learning_rate": 4.79e-05, "loss": 0.4136, "step": 9586 }, { "epoch": 0.5368462313808937, "grad_norm": 1.2104109525680542, "learning_rate": 4.7905e-05, "loss": 0.3568, "step": 9587 }, { "epoch": 0.5369022286930227, "grad_norm": 0.90604168176651, "learning_rate": 4.791000000000001e-05, "loss": 0.3124, "step": 9588 }, { "epoch": 0.5369582260051518, "grad_norm": 1.3224369287490845, "learning_rate": 4.7915000000000005e-05, "loss": 0.4234, "step": 9589 }, { "epoch": 0.5370142233172808, "grad_norm": 1.2113516330718994, "learning_rate": 4.792e-05, "loss": 0.4572, "step": 9590 }, { "epoch": 0.5370702206294098, "grad_norm": 1.168519139289856, "learning_rate": 4.7925000000000006e-05, "loss": 0.4055, "step": 9591 }, { "epoch": 0.5371262179415388, "grad_norm": 1.1018953323364258, "learning_rate": 4.7930000000000004e-05, "loss": 0.4418, "step": 9592 }, { "epoch": 0.5371822152536678, "grad_norm": 1.2094017267227173, "learning_rate": 4.7935e-05, "loss": 0.3992, "step": 9593 }, { "epoch": 0.5372382125657968, "grad_norm": 1.0364420413970947, "learning_rate": 4.794e-05, "loss": 0.3968, "step": 9594 }, { "epoch": 0.5372942098779259, "grad_norm": 1.2871984243392944, "learning_rate": 4.7945e-05, "loss": 0.4747, "step": 9595 }, { "epoch": 0.5373502071900549, "grad_norm": 1.1016348600387573, "learning_rate": 4.795e-05, "loss": 0.3754, "step": 9596 }, { "epoch": 0.5374062045021839, "grad_norm": 1.1769936084747314, "learning_rate": 4.7955e-05, "loss": 0.3593, "step": 9597 }, { "epoch": 0.5374622018143129, "grad_norm": 1.468197226524353, "learning_rate": 4.796e-05, "loss": 0.4162, "step": 9598 }, { "epoch": 0.5375181991264419, "grad_norm": 1.1717861890792847, "learning_rate": 4.7965000000000006e-05, "loss": 0.3874, "step": 9599 }, { "epoch": 0.537574196438571, "grad_norm": 1.3909096717834473, "learning_rate": 4.797e-05, "loss": 0.4744, "step": 9600 }, { "epoch": 0.5376301937507, "grad_norm": 1.302345871925354, "learning_rate": 4.7975e-05, "loss": 0.5447, "step": 9601 }, { "epoch": 0.537686191062829, "grad_norm": 1.271485447883606, "learning_rate": 4.7980000000000005e-05, "loss": 0.3747, "step": 9602 }, { "epoch": 0.537742188374958, "grad_norm": 1.1500025987625122, "learning_rate": 4.7985e-05, "loss": 0.3789, "step": 9603 }, { "epoch": 0.537798185687087, "grad_norm": 1.4302514791488647, "learning_rate": 4.799e-05, "loss": 0.5329, "step": 9604 }, { "epoch": 0.5378541829992161, "grad_norm": 1.191972017288208, "learning_rate": 4.7995000000000004e-05, "loss": 0.4231, "step": 9605 }, { "epoch": 0.5379101803113451, "grad_norm": 1.1573739051818848, "learning_rate": 4.8e-05, "loss": 0.3268, "step": 9606 }, { "epoch": 0.5379661776234741, "grad_norm": 1.227290391921997, "learning_rate": 4.8005e-05, "loss": 0.456, "step": 9607 }, { "epoch": 0.5380221749356031, "grad_norm": 8.732412338256836, "learning_rate": 4.801e-05, "loss": 0.4961, "step": 9608 }, { "epoch": 0.5380781722477321, "grad_norm": 1.2821083068847656, "learning_rate": 4.801500000000001e-05, "loss": 0.5198, "step": 9609 }, { "epoch": 0.5381341695598612, "grad_norm": 1.3780461549758911, "learning_rate": 4.8020000000000004e-05, "loss": 0.4525, "step": 9610 }, { "epoch": 0.5381901668719902, "grad_norm": 1.4324729442596436, "learning_rate": 4.8025e-05, "loss": 0.5719, "step": 9611 }, { "epoch": 0.5382461641841192, "grad_norm": 1.385636568069458, "learning_rate": 4.8030000000000006e-05, "loss": 0.5452, "step": 9612 }, { "epoch": 0.5383021614962482, "grad_norm": 1.4018421173095703, "learning_rate": 4.8035000000000003e-05, "loss": 0.4338, "step": 9613 }, { "epoch": 0.5383581588083772, "grad_norm": 1.3203582763671875, "learning_rate": 4.804e-05, "loss": 0.3865, "step": 9614 }, { "epoch": 0.5384141561205062, "grad_norm": 1.5596752166748047, "learning_rate": 4.8045e-05, "loss": 0.5408, "step": 9615 }, { "epoch": 0.5384701534326353, "grad_norm": 1.4226065874099731, "learning_rate": 4.805e-05, "loss": 0.4455, "step": 9616 }, { "epoch": 0.5385261507447643, "grad_norm": 1.3288135528564453, "learning_rate": 4.8055e-05, "loss": 0.4446, "step": 9617 }, { "epoch": 0.5385821480568933, "grad_norm": 1.5903502702713013, "learning_rate": 4.8060000000000004e-05, "loss": 0.4474, "step": 9618 }, { "epoch": 0.5386381453690223, "grad_norm": 1.1605653762817383, "learning_rate": 4.8065e-05, "loss": 0.4431, "step": 9619 }, { "epoch": 0.5386941426811513, "grad_norm": 1.0023702383041382, "learning_rate": 4.8070000000000006e-05, "loss": 0.4153, "step": 9620 }, { "epoch": 0.5387501399932804, "grad_norm": 1.7927113771438599, "learning_rate": 4.8075e-05, "loss": 0.5195, "step": 9621 }, { "epoch": 0.5388061373054094, "grad_norm": 1.159888744354248, "learning_rate": 4.808e-05, "loss": 0.4383, "step": 9622 }, { "epoch": 0.5388621346175384, "grad_norm": 1.092410922050476, "learning_rate": 4.8085000000000005e-05, "loss": 0.4009, "step": 9623 }, { "epoch": 0.5389181319296674, "grad_norm": 1.4227818250656128, "learning_rate": 4.809e-05, "loss": 0.5175, "step": 9624 }, { "epoch": 0.5389741292417964, "grad_norm": 1.3793169260025024, "learning_rate": 4.8095e-05, "loss": 0.5603, "step": 9625 }, { "epoch": 0.5390301265539255, "grad_norm": 1.1705825328826904, "learning_rate": 4.8100000000000004e-05, "loss": 0.4039, "step": 9626 }, { "epoch": 0.5390861238660545, "grad_norm": 1.0983772277832031, "learning_rate": 4.8105e-05, "loss": 0.4904, "step": 9627 }, { "epoch": 0.5391421211781835, "grad_norm": 1.1396361589431763, "learning_rate": 4.8110000000000005e-05, "loss": 0.5152, "step": 9628 }, { "epoch": 0.5391981184903125, "grad_norm": 1.3853065967559814, "learning_rate": 4.8115e-05, "loss": 0.3574, "step": 9629 }, { "epoch": 0.5392541158024415, "grad_norm": 1.1867951154708862, "learning_rate": 4.812000000000001e-05, "loss": 0.3441, "step": 9630 }, { "epoch": 0.5393101131145706, "grad_norm": 1.5658626556396484, "learning_rate": 4.8125000000000004e-05, "loss": 0.596, "step": 9631 }, { "epoch": 0.5393661104266995, "grad_norm": 1.3408464193344116, "learning_rate": 4.813e-05, "loss": 0.3396, "step": 9632 }, { "epoch": 0.5394221077388285, "grad_norm": 1.2120468616485596, "learning_rate": 4.8135e-05, "loss": 0.447, "step": 9633 }, { "epoch": 0.5394781050509575, "grad_norm": 1.206899642944336, "learning_rate": 4.814e-05, "loss": 0.4764, "step": 9634 }, { "epoch": 0.5395341023630865, "grad_norm": 1.2194875478744507, "learning_rate": 4.8145e-05, "loss": 0.4007, "step": 9635 }, { "epoch": 0.5395900996752155, "grad_norm": 1.3562273979187012, "learning_rate": 4.815e-05, "loss": 0.5112, "step": 9636 }, { "epoch": 0.5396460969873446, "grad_norm": 4.3270063400268555, "learning_rate": 4.8155e-05, "loss": 0.3975, "step": 9637 }, { "epoch": 0.5397020942994736, "grad_norm": 1.493651270866394, "learning_rate": 4.816e-05, "loss": 0.3844, "step": 9638 }, { "epoch": 0.5397580916116026, "grad_norm": 1.3843554258346558, "learning_rate": 4.8165000000000004e-05, "loss": 0.418, "step": 9639 }, { "epoch": 0.5398140889237316, "grad_norm": 1.1690887212753296, "learning_rate": 4.817e-05, "loss": 0.3975, "step": 9640 }, { "epoch": 0.5398700862358606, "grad_norm": 1.0352396965026855, "learning_rate": 4.8175000000000005e-05, "loss": 0.3196, "step": 9641 }, { "epoch": 0.5399260835479897, "grad_norm": 1.4064724445343018, "learning_rate": 4.818e-05, "loss": 0.3739, "step": 9642 }, { "epoch": 0.5399820808601187, "grad_norm": 1.2507612705230713, "learning_rate": 4.8185e-05, "loss": 0.3976, "step": 9643 }, { "epoch": 0.5400380781722477, "grad_norm": 1.3313459157943726, "learning_rate": 4.8190000000000004e-05, "loss": 0.469, "step": 9644 }, { "epoch": 0.5400940754843767, "grad_norm": 1.3563543558120728, "learning_rate": 4.8195e-05, "loss": 0.4279, "step": 9645 }, { "epoch": 0.5401500727965057, "grad_norm": 1.2791963815689087, "learning_rate": 4.82e-05, "loss": 0.4305, "step": 9646 }, { "epoch": 0.5402060701086348, "grad_norm": 1.1706186532974243, "learning_rate": 4.8205000000000003e-05, "loss": 0.3308, "step": 9647 }, { "epoch": 0.5402620674207638, "grad_norm": 1.3401410579681396, "learning_rate": 4.821e-05, "loss": 0.4504, "step": 9648 }, { "epoch": 0.5403180647328928, "grad_norm": 1.125596523284912, "learning_rate": 4.8215000000000005e-05, "loss": 0.4313, "step": 9649 }, { "epoch": 0.5403740620450218, "grad_norm": 1.2922368049621582, "learning_rate": 4.822e-05, "loss": 0.446, "step": 9650 }, { "epoch": 0.5404300593571508, "grad_norm": 1.2992316484451294, "learning_rate": 4.822500000000001e-05, "loss": 0.4018, "step": 9651 }, { "epoch": 0.5404860566692798, "grad_norm": 1.452916145324707, "learning_rate": 4.8230000000000004e-05, "loss": 0.4483, "step": 9652 }, { "epoch": 0.5405420539814089, "grad_norm": 1.273618221282959, "learning_rate": 4.8235e-05, "loss": 0.4828, "step": 9653 }, { "epoch": 0.5405980512935379, "grad_norm": 1.3946022987365723, "learning_rate": 4.824e-05, "loss": 0.4167, "step": 9654 }, { "epoch": 0.5406540486056669, "grad_norm": 1.1924132108688354, "learning_rate": 4.8245e-05, "loss": 0.4438, "step": 9655 }, { "epoch": 0.5407100459177959, "grad_norm": 1.2493716478347778, "learning_rate": 4.825e-05, "loss": 0.3493, "step": 9656 }, { "epoch": 0.5407660432299249, "grad_norm": 1.4506077766418457, "learning_rate": 4.8255e-05, "loss": 0.4415, "step": 9657 }, { "epoch": 0.540822040542054, "grad_norm": 1.2077593803405762, "learning_rate": 4.826e-05, "loss": 0.3707, "step": 9658 }, { "epoch": 0.540878037854183, "grad_norm": 1.5090138912200928, "learning_rate": 4.8265000000000006e-05, "loss": 0.4473, "step": 9659 }, { "epoch": 0.540934035166312, "grad_norm": 1.2844105958938599, "learning_rate": 4.8270000000000004e-05, "loss": 0.5055, "step": 9660 }, { "epoch": 0.540990032478441, "grad_norm": 1.228994607925415, "learning_rate": 4.8275e-05, "loss": 0.4315, "step": 9661 }, { "epoch": 0.54104602979057, "grad_norm": 1.5682957172393799, "learning_rate": 4.8280000000000005e-05, "loss": 0.4486, "step": 9662 }, { "epoch": 0.5411020271026991, "grad_norm": 1.6749250888824463, "learning_rate": 4.8285e-05, "loss": 0.5563, "step": 9663 }, { "epoch": 0.5411580244148281, "grad_norm": 1.1559009552001953, "learning_rate": 4.829e-05, "loss": 0.4678, "step": 9664 }, { "epoch": 0.5412140217269571, "grad_norm": 1.2349425554275513, "learning_rate": 4.8295000000000004e-05, "loss": 0.3568, "step": 9665 }, { "epoch": 0.5412700190390861, "grad_norm": 1.2909871339797974, "learning_rate": 4.83e-05, "loss": 0.4112, "step": 9666 }, { "epoch": 0.5413260163512151, "grad_norm": 1.3625513315200806, "learning_rate": 4.8305e-05, "loss": 0.335, "step": 9667 }, { "epoch": 0.5413820136633442, "grad_norm": 1.8493263721466064, "learning_rate": 4.8309999999999997e-05, "loss": 0.5126, "step": 9668 }, { "epoch": 0.5414380109754732, "grad_norm": 1.6567726135253906, "learning_rate": 4.831500000000001e-05, "loss": 0.486, "step": 9669 }, { "epoch": 0.5414940082876022, "grad_norm": 1.2510757446289062, "learning_rate": 4.8320000000000005e-05, "loss": 0.362, "step": 9670 }, { "epoch": 0.5415500055997312, "grad_norm": 1.2585492134094238, "learning_rate": 4.8325e-05, "loss": 0.4178, "step": 9671 }, { "epoch": 0.5416060029118602, "grad_norm": 1.2923367023468018, "learning_rate": 4.833e-05, "loss": 0.4974, "step": 9672 }, { "epoch": 0.5416620002239892, "grad_norm": 1.4480977058410645, "learning_rate": 4.8335000000000004e-05, "loss": 0.4164, "step": 9673 }, { "epoch": 0.5417179975361183, "grad_norm": 1.1718289852142334, "learning_rate": 4.834e-05, "loss": 0.3666, "step": 9674 }, { "epoch": 0.5417739948482473, "grad_norm": 1.2877403497695923, "learning_rate": 4.8345e-05, "loss": 0.4637, "step": 9675 }, { "epoch": 0.5418299921603763, "grad_norm": 1.0981522798538208, "learning_rate": 4.835e-05, "loss": 0.3544, "step": 9676 }, { "epoch": 0.5418859894725053, "grad_norm": 1.439459204673767, "learning_rate": 4.8355e-05, "loss": 0.4665, "step": 9677 }, { "epoch": 0.5419419867846343, "grad_norm": 1.380591869354248, "learning_rate": 4.836e-05, "loss": 0.3931, "step": 9678 }, { "epoch": 0.5419979840967634, "grad_norm": 1.3844058513641357, "learning_rate": 4.8365e-05, "loss": 0.3871, "step": 9679 }, { "epoch": 0.5420539814088924, "grad_norm": 1.4423999786376953, "learning_rate": 4.8370000000000006e-05, "loss": 0.5286, "step": 9680 }, { "epoch": 0.5421099787210214, "grad_norm": 1.132737159729004, "learning_rate": 4.8375000000000004e-05, "loss": 0.4016, "step": 9681 }, { "epoch": 0.5421659760331504, "grad_norm": 1.215610384941101, "learning_rate": 4.838e-05, "loss": 0.3603, "step": 9682 }, { "epoch": 0.5422219733452794, "grad_norm": 1.3401881456375122, "learning_rate": 4.8385000000000005e-05, "loss": 0.5025, "step": 9683 }, { "epoch": 0.5422779706574085, "grad_norm": 1.0976073741912842, "learning_rate": 4.839e-05, "loss": 0.3805, "step": 9684 }, { "epoch": 0.5423339679695375, "grad_norm": 1.5351399183273315, "learning_rate": 4.8395e-05, "loss": 0.5663, "step": 9685 }, { "epoch": 0.5423899652816665, "grad_norm": 1.4356317520141602, "learning_rate": 4.8400000000000004e-05, "loss": 0.4673, "step": 9686 }, { "epoch": 0.5424459625937955, "grad_norm": 1.343070387840271, "learning_rate": 4.8405e-05, "loss": 0.3795, "step": 9687 }, { "epoch": 0.5425019599059245, "grad_norm": 1.2418947219848633, "learning_rate": 4.841e-05, "loss": 0.3363, "step": 9688 }, { "epoch": 0.5425579572180536, "grad_norm": 1.2552831172943115, "learning_rate": 4.8415e-05, "loss": 0.4219, "step": 9689 }, { "epoch": 0.5426139545301826, "grad_norm": 1.0641114711761475, "learning_rate": 4.842000000000001e-05, "loss": 0.425, "step": 9690 }, { "epoch": 0.5426699518423116, "grad_norm": 1.29921293258667, "learning_rate": 4.8425000000000005e-05, "loss": 0.4904, "step": 9691 }, { "epoch": 0.5427259491544406, "grad_norm": 1.0799882411956787, "learning_rate": 4.843e-05, "loss": 0.4693, "step": 9692 }, { "epoch": 0.5427819464665696, "grad_norm": 1.4437817335128784, "learning_rate": 4.8435e-05, "loss": 0.5803, "step": 9693 }, { "epoch": 0.5428379437786987, "grad_norm": 1.2713543176651, "learning_rate": 4.8440000000000004e-05, "loss": 0.3694, "step": 9694 }, { "epoch": 0.5428939410908277, "grad_norm": 1.7143093347549438, "learning_rate": 4.8445e-05, "loss": 0.6177, "step": 9695 }, { "epoch": 0.5429499384029567, "grad_norm": 1.1647511720657349, "learning_rate": 4.845e-05, "loss": 0.3676, "step": 9696 }, { "epoch": 0.5430059357150857, "grad_norm": 1.4182149171829224, "learning_rate": 4.8455e-05, "loss": 0.3801, "step": 9697 }, { "epoch": 0.5430619330272147, "grad_norm": 1.255975604057312, "learning_rate": 4.846e-05, "loss": 0.3259, "step": 9698 }, { "epoch": 0.5431179303393437, "grad_norm": 1.003980040550232, "learning_rate": 4.8465000000000004e-05, "loss": 0.3206, "step": 9699 }, { "epoch": 0.5431739276514728, "grad_norm": 1.301956057548523, "learning_rate": 4.847e-05, "loss": 0.368, "step": 9700 }, { "epoch": 0.5432299249636018, "grad_norm": 1.4601362943649292, "learning_rate": 4.8475000000000006e-05, "loss": 0.4239, "step": 9701 }, { "epoch": 0.5432859222757308, "grad_norm": 8.5525541305542, "learning_rate": 4.8480000000000003e-05, "loss": 0.4612, "step": 9702 }, { "epoch": 0.5433419195878598, "grad_norm": 1.5122413635253906, "learning_rate": 4.8485e-05, "loss": 0.6119, "step": 9703 }, { "epoch": 0.5433979168999888, "grad_norm": 1.0617645978927612, "learning_rate": 4.8490000000000005e-05, "loss": 0.345, "step": 9704 }, { "epoch": 0.5434539142121179, "grad_norm": 1.254473328590393, "learning_rate": 4.8495e-05, "loss": 0.6635, "step": 9705 }, { "epoch": 0.5435099115242469, "grad_norm": 0.9798499345779419, "learning_rate": 4.85e-05, "loss": 0.3501, "step": 9706 }, { "epoch": 0.5435659088363759, "grad_norm": 1.405495285987854, "learning_rate": 4.8505e-05, "loss": 0.4159, "step": 9707 }, { "epoch": 0.5436219061485049, "grad_norm": 1.1959142684936523, "learning_rate": 4.851e-05, "loss": 0.5036, "step": 9708 }, { "epoch": 0.5436779034606339, "grad_norm": 1.6853902339935303, "learning_rate": 4.8515000000000006e-05, "loss": 0.4186, "step": 9709 }, { "epoch": 0.543733900772763, "grad_norm": 1.2730882167816162, "learning_rate": 4.852e-05, "loss": 0.419, "step": 9710 }, { "epoch": 0.543789898084892, "grad_norm": 1.2109779119491577, "learning_rate": 4.8525e-05, "loss": 0.4926, "step": 9711 }, { "epoch": 0.543845895397021, "grad_norm": 1.1662299633026123, "learning_rate": 4.8530000000000005e-05, "loss": 0.4484, "step": 9712 }, { "epoch": 0.54390189270915, "grad_norm": 1.0187838077545166, "learning_rate": 4.8535e-05, "loss": 0.306, "step": 9713 }, { "epoch": 0.543957890021279, "grad_norm": 1.3637886047363281, "learning_rate": 4.854e-05, "loss": 0.4984, "step": 9714 }, { "epoch": 0.5440138873334079, "grad_norm": 1.4112552404403687, "learning_rate": 4.8545000000000004e-05, "loss": 0.4013, "step": 9715 }, { "epoch": 0.544069884645537, "grad_norm": 1.2921721935272217, "learning_rate": 4.855e-05, "loss": 0.512, "step": 9716 }, { "epoch": 0.544125881957666, "grad_norm": 1.2291465997695923, "learning_rate": 4.8555e-05, "loss": 0.4627, "step": 9717 }, { "epoch": 0.544181879269795, "grad_norm": 1.3939390182495117, "learning_rate": 4.856e-05, "loss": 0.3999, "step": 9718 }, { "epoch": 0.544237876581924, "grad_norm": 1.3417540788650513, "learning_rate": 4.856500000000001e-05, "loss": 0.4685, "step": 9719 }, { "epoch": 0.544293873894053, "grad_norm": 1.4190226793289185, "learning_rate": 4.8570000000000004e-05, "loss": 0.348, "step": 9720 }, { "epoch": 0.5443498712061821, "grad_norm": 1.4172230958938599, "learning_rate": 4.8575e-05, "loss": 0.6203, "step": 9721 }, { "epoch": 0.5444058685183111, "grad_norm": 1.4824928045272827, "learning_rate": 4.8580000000000006e-05, "loss": 0.4865, "step": 9722 }, { "epoch": 0.5444618658304401, "grad_norm": 1.2636237144470215, "learning_rate": 4.8585e-05, "loss": 0.4615, "step": 9723 }, { "epoch": 0.5445178631425691, "grad_norm": 1.3195079565048218, "learning_rate": 4.859e-05, "loss": 0.4728, "step": 9724 }, { "epoch": 0.5445738604546981, "grad_norm": 1.4156670570373535, "learning_rate": 4.8595000000000005e-05, "loss": 0.5529, "step": 9725 }, { "epoch": 0.5446298577668272, "grad_norm": 1.441025733947754, "learning_rate": 4.86e-05, "loss": 0.518, "step": 9726 }, { "epoch": 0.5446858550789562, "grad_norm": 1.2869678735733032, "learning_rate": 4.8605e-05, "loss": 0.3476, "step": 9727 }, { "epoch": 0.5447418523910852, "grad_norm": 1.0636709928512573, "learning_rate": 4.861e-05, "loss": 0.4203, "step": 9728 }, { "epoch": 0.5447978497032142, "grad_norm": 1.2076289653778076, "learning_rate": 4.861500000000001e-05, "loss": 0.4452, "step": 9729 }, { "epoch": 0.5448538470153432, "grad_norm": 1.218843698501587, "learning_rate": 4.8620000000000005e-05, "loss": 0.3975, "step": 9730 }, { "epoch": 0.5449098443274722, "grad_norm": 2.140641927719116, "learning_rate": 4.8625e-05, "loss": 0.4852, "step": 9731 }, { "epoch": 0.5449658416396013, "grad_norm": 1.2406911849975586, "learning_rate": 4.863e-05, "loss": 0.4207, "step": 9732 }, { "epoch": 0.5450218389517303, "grad_norm": 1.2015538215637207, "learning_rate": 4.8635000000000004e-05, "loss": 0.4469, "step": 9733 }, { "epoch": 0.5450778362638593, "grad_norm": 1.1803382635116577, "learning_rate": 4.864e-05, "loss": 0.4405, "step": 9734 }, { "epoch": 0.5451338335759883, "grad_norm": 1.1898819208145142, "learning_rate": 4.8645e-05, "loss": 0.4249, "step": 9735 }, { "epoch": 0.5451898308881173, "grad_norm": 1.2134382724761963, "learning_rate": 4.8650000000000003e-05, "loss": 0.4567, "step": 9736 }, { "epoch": 0.5452458282002464, "grad_norm": 1.4385006427764893, "learning_rate": 4.8655e-05, "loss": 0.4381, "step": 9737 }, { "epoch": 0.5453018255123754, "grad_norm": 1.1139192581176758, "learning_rate": 4.866e-05, "loss": 0.326, "step": 9738 }, { "epoch": 0.5453578228245044, "grad_norm": 1.3936790227890015, "learning_rate": 4.8665e-05, "loss": 0.4393, "step": 9739 }, { "epoch": 0.5454138201366334, "grad_norm": 1.3649449348449707, "learning_rate": 4.867000000000001e-05, "loss": 0.5342, "step": 9740 }, { "epoch": 0.5454698174487624, "grad_norm": 1.022350549697876, "learning_rate": 4.8675000000000004e-05, "loss": 0.3274, "step": 9741 }, { "epoch": 0.5455258147608915, "grad_norm": 1.327319860458374, "learning_rate": 4.868e-05, "loss": 0.4178, "step": 9742 }, { "epoch": 0.5455818120730205, "grad_norm": 1.623950719833374, "learning_rate": 4.8685000000000006e-05, "loss": 0.4655, "step": 9743 }, { "epoch": 0.5456378093851495, "grad_norm": 1.4667751789093018, "learning_rate": 4.869e-05, "loss": 0.4312, "step": 9744 }, { "epoch": 0.5456938066972785, "grad_norm": 1.2036206722259521, "learning_rate": 4.8695e-05, "loss": 0.4892, "step": 9745 }, { "epoch": 0.5457498040094075, "grad_norm": 1.1602802276611328, "learning_rate": 4.87e-05, "loss": 0.4343, "step": 9746 }, { "epoch": 0.5458058013215366, "grad_norm": 1.2215708494186401, "learning_rate": 4.8705e-05, "loss": 0.3786, "step": 9747 }, { "epoch": 0.5458617986336656, "grad_norm": 1.2116730213165283, "learning_rate": 4.871e-05, "loss": 0.4915, "step": 9748 }, { "epoch": 0.5459177959457946, "grad_norm": 1.3704102039337158, "learning_rate": 4.8715000000000004e-05, "loss": 0.4883, "step": 9749 }, { "epoch": 0.5459737932579236, "grad_norm": 1.4366365671157837, "learning_rate": 4.872000000000001e-05, "loss": 0.437, "step": 9750 }, { "epoch": 0.5460297905700526, "grad_norm": 1.5466474294662476, "learning_rate": 4.8725000000000005e-05, "loss": 0.503, "step": 9751 }, { "epoch": 0.5460857878821817, "grad_norm": 1.1597400903701782, "learning_rate": 4.873e-05, "loss": 0.426, "step": 9752 }, { "epoch": 0.5461417851943107, "grad_norm": 1.275761604309082, "learning_rate": 4.8735e-05, "loss": 0.386, "step": 9753 }, { "epoch": 0.5461977825064397, "grad_norm": 1.024357795715332, "learning_rate": 4.8740000000000004e-05, "loss": 0.4879, "step": 9754 }, { "epoch": 0.5462537798185687, "grad_norm": 1.1839563846588135, "learning_rate": 4.8745e-05, "loss": 0.3731, "step": 9755 }, { "epoch": 0.5463097771306977, "grad_norm": 1.2781145572662354, "learning_rate": 4.875e-05, "loss": 0.3335, "step": 9756 }, { "epoch": 0.5463657744428267, "grad_norm": 1.391566514968872, "learning_rate": 4.8755e-05, "loss": 0.5612, "step": 9757 }, { "epoch": 0.5464217717549558, "grad_norm": 1.3660463094711304, "learning_rate": 4.876e-05, "loss": 0.4597, "step": 9758 }, { "epoch": 0.5464777690670848, "grad_norm": 1.1681660413742065, "learning_rate": 4.8765e-05, "loss": 0.435, "step": 9759 }, { "epoch": 0.5465337663792138, "grad_norm": 1.3454241752624512, "learning_rate": 4.877e-05, "loss": 0.4118, "step": 9760 }, { "epoch": 0.5465897636913428, "grad_norm": 1.1808377504348755, "learning_rate": 4.8775000000000007e-05, "loss": 0.3623, "step": 9761 }, { "epoch": 0.5466457610034718, "grad_norm": 1.3442800045013428, "learning_rate": 4.8780000000000004e-05, "loss": 0.5379, "step": 9762 }, { "epoch": 0.5467017583156009, "grad_norm": 1.2783876657485962, "learning_rate": 4.8785e-05, "loss": 0.4813, "step": 9763 }, { "epoch": 0.5467577556277299, "grad_norm": 1.2709224224090576, "learning_rate": 4.8790000000000006e-05, "loss": 0.3285, "step": 9764 }, { "epoch": 0.5468137529398589, "grad_norm": 1.3587431907653809, "learning_rate": 4.8795e-05, "loss": 0.3749, "step": 9765 }, { "epoch": 0.5468697502519879, "grad_norm": 1.3329561948776245, "learning_rate": 4.88e-05, "loss": 0.4517, "step": 9766 }, { "epoch": 0.5469257475641169, "grad_norm": 1.3308213949203491, "learning_rate": 4.8805e-05, "loss": 0.4159, "step": 9767 }, { "epoch": 0.546981744876246, "grad_norm": 1.2997395992279053, "learning_rate": 4.881e-05, "loss": 0.3801, "step": 9768 }, { "epoch": 0.547037742188375, "grad_norm": 1.3082472085952759, "learning_rate": 4.8815e-05, "loss": 0.5065, "step": 9769 }, { "epoch": 0.547093739500504, "grad_norm": 1.2000705003738403, "learning_rate": 4.8820000000000004e-05, "loss": 0.3443, "step": 9770 }, { "epoch": 0.547149736812633, "grad_norm": 1.4347835779190063, "learning_rate": 4.8825e-05, "loss": 0.5783, "step": 9771 }, { "epoch": 0.547205734124762, "grad_norm": 1.2902127504348755, "learning_rate": 4.8830000000000005e-05, "loss": 0.3725, "step": 9772 }, { "epoch": 0.547261731436891, "grad_norm": 1.6311655044555664, "learning_rate": 4.8835e-05, "loss": 0.4099, "step": 9773 }, { "epoch": 0.5473177287490201, "grad_norm": 1.20345938205719, "learning_rate": 4.884e-05, "loss": 0.3987, "step": 9774 }, { "epoch": 0.5473737260611491, "grad_norm": 1.1928012371063232, "learning_rate": 4.8845000000000004e-05, "loss": 0.4513, "step": 9775 }, { "epoch": 0.5474297233732781, "grad_norm": 1.2258514165878296, "learning_rate": 4.885e-05, "loss": 0.4534, "step": 9776 }, { "epoch": 0.5474857206854071, "grad_norm": 1.28345787525177, "learning_rate": 4.8855e-05, "loss": 0.429, "step": 9777 }, { "epoch": 0.5475417179975361, "grad_norm": 1.2173876762390137, "learning_rate": 4.886e-05, "loss": 0.3636, "step": 9778 }, { "epoch": 0.5475977153096652, "grad_norm": 1.2504884004592896, "learning_rate": 4.8865e-05, "loss": 0.55, "step": 9779 }, { "epoch": 0.5476537126217942, "grad_norm": 1.0849627256393433, "learning_rate": 4.8870000000000005e-05, "loss": 0.4151, "step": 9780 }, { "epoch": 0.5477097099339232, "grad_norm": 1.2154240608215332, "learning_rate": 4.8875e-05, "loss": 0.4034, "step": 9781 }, { "epoch": 0.5477657072460522, "grad_norm": 1.1576330661773682, "learning_rate": 4.8880000000000006e-05, "loss": 0.4272, "step": 9782 }, { "epoch": 0.5478217045581812, "grad_norm": 1.195458173751831, "learning_rate": 4.8885000000000004e-05, "loss": 0.4196, "step": 9783 }, { "epoch": 0.5478777018703103, "grad_norm": 1.3338898420333862, "learning_rate": 4.889e-05, "loss": 0.5115, "step": 9784 }, { "epoch": 0.5479336991824393, "grad_norm": 1.3685988187789917, "learning_rate": 4.8895e-05, "loss": 0.3735, "step": 9785 }, { "epoch": 0.5479896964945683, "grad_norm": 0.9911099076271057, "learning_rate": 4.89e-05, "loss": 0.2798, "step": 9786 }, { "epoch": 0.5480456938066973, "grad_norm": 1.5359314680099487, "learning_rate": 4.8905e-05, "loss": 0.5281, "step": 9787 }, { "epoch": 0.5481016911188263, "grad_norm": 1.1968533992767334, "learning_rate": 4.891e-05, "loss": 0.4735, "step": 9788 }, { "epoch": 0.5481576884309554, "grad_norm": 1.3393092155456543, "learning_rate": 4.8915e-05, "loss": 0.4754, "step": 9789 }, { "epoch": 0.5482136857430844, "grad_norm": 1.2218310832977295, "learning_rate": 4.8920000000000006e-05, "loss": 0.3819, "step": 9790 }, { "epoch": 0.5482696830552134, "grad_norm": 1.3259683847427368, "learning_rate": 4.8925e-05, "loss": 0.4729, "step": 9791 }, { "epoch": 0.5483256803673424, "grad_norm": 1.296262264251709, "learning_rate": 4.893e-05, "loss": 0.4812, "step": 9792 }, { "epoch": 0.5483816776794714, "grad_norm": 1.2933176755905151, "learning_rate": 4.8935000000000005e-05, "loss": 0.3868, "step": 9793 }, { "epoch": 0.5484376749916005, "grad_norm": 1.1595845222473145, "learning_rate": 4.894e-05, "loss": 0.3071, "step": 9794 }, { "epoch": 0.5484936723037295, "grad_norm": 1.2668019533157349, "learning_rate": 4.8945e-05, "loss": 0.487, "step": 9795 }, { "epoch": 0.5485496696158585, "grad_norm": 1.3164705038070679, "learning_rate": 4.8950000000000004e-05, "loss": 0.4585, "step": 9796 }, { "epoch": 0.5486056669279875, "grad_norm": 1.343824028968811, "learning_rate": 4.8955e-05, "loss": 0.4897, "step": 9797 }, { "epoch": 0.5486616642401164, "grad_norm": 1.3883174657821655, "learning_rate": 4.896e-05, "loss": 0.4444, "step": 9798 }, { "epoch": 0.5487176615522454, "grad_norm": 1.1468616724014282, "learning_rate": 4.8965e-05, "loss": 0.4213, "step": 9799 }, { "epoch": 0.5487736588643745, "grad_norm": 1.2404942512512207, "learning_rate": 4.897000000000001e-05, "loss": 0.3937, "step": 9800 }, { "epoch": 0.5488296561765035, "grad_norm": 1.168466567993164, "learning_rate": 4.8975000000000005e-05, "loss": 0.3887, "step": 9801 }, { "epoch": 0.5488856534886325, "grad_norm": 1.2331757545471191, "learning_rate": 4.898e-05, "loss": 0.4094, "step": 9802 }, { "epoch": 0.5489416508007615, "grad_norm": 1.3100107908248901, "learning_rate": 4.8985000000000006e-05, "loss": 0.515, "step": 9803 }, { "epoch": 0.5489976481128905, "grad_norm": 1.0910251140594482, "learning_rate": 4.8990000000000004e-05, "loss": 0.4082, "step": 9804 }, { "epoch": 0.5490536454250196, "grad_norm": 1.0836122035980225, "learning_rate": 4.8995e-05, "loss": 0.3982, "step": 9805 }, { "epoch": 0.5491096427371486, "grad_norm": 1.4018274545669556, "learning_rate": 4.9e-05, "loss": 0.413, "step": 9806 }, { "epoch": 0.5491656400492776, "grad_norm": 1.7344642877578735, "learning_rate": 4.9005e-05, "loss": 0.4465, "step": 9807 }, { "epoch": 0.5492216373614066, "grad_norm": 1.191381812095642, "learning_rate": 4.901e-05, "loss": 0.4133, "step": 9808 }, { "epoch": 0.5492776346735356, "grad_norm": 1.0895473957061768, "learning_rate": 4.9015e-05, "loss": 0.3796, "step": 9809 }, { "epoch": 0.5493336319856646, "grad_norm": 1.3561818599700928, "learning_rate": 4.902e-05, "loss": 0.4408, "step": 9810 }, { "epoch": 0.5493896292977937, "grad_norm": 1.2005116939544678, "learning_rate": 4.9025000000000006e-05, "loss": 0.3827, "step": 9811 }, { "epoch": 0.5494456266099227, "grad_norm": 1.3890430927276611, "learning_rate": 4.903e-05, "loss": 0.4115, "step": 9812 }, { "epoch": 0.5495016239220517, "grad_norm": 1.304787039756775, "learning_rate": 4.9035e-05, "loss": 0.3443, "step": 9813 }, { "epoch": 0.5495576212341807, "grad_norm": 1.6234766244888306, "learning_rate": 4.9040000000000005e-05, "loss": 0.5334, "step": 9814 }, { "epoch": 0.5496136185463097, "grad_norm": 1.1996712684631348, "learning_rate": 4.9045e-05, "loss": 0.487, "step": 9815 }, { "epoch": 0.5496696158584388, "grad_norm": 1.5660183429718018, "learning_rate": 4.905e-05, "loss": 0.6293, "step": 9816 }, { "epoch": 0.5497256131705678, "grad_norm": 1.3654955625534058, "learning_rate": 4.9055000000000004e-05, "loss": 0.4961, "step": 9817 }, { "epoch": 0.5497816104826968, "grad_norm": 1.6731728315353394, "learning_rate": 4.906e-05, "loss": 0.6572, "step": 9818 }, { "epoch": 0.5498376077948258, "grad_norm": 1.17780601978302, "learning_rate": 4.9065e-05, "loss": 0.4288, "step": 9819 }, { "epoch": 0.5498936051069548, "grad_norm": 1.3803881406784058, "learning_rate": 4.907e-05, "loss": 0.4575, "step": 9820 }, { "epoch": 0.5499496024190839, "grad_norm": 1.2742030620574951, "learning_rate": 4.907500000000001e-05, "loss": 0.3687, "step": 9821 }, { "epoch": 0.5500055997312129, "grad_norm": 1.4195208549499512, "learning_rate": 4.9080000000000004e-05, "loss": 0.4378, "step": 9822 }, { "epoch": 0.5500615970433419, "grad_norm": 1.21417236328125, "learning_rate": 4.9085e-05, "loss": 0.5454, "step": 9823 }, { "epoch": 0.5501175943554709, "grad_norm": 1.2277599573135376, "learning_rate": 4.9090000000000006e-05, "loss": 0.3335, "step": 9824 }, { "epoch": 0.5501735916675999, "grad_norm": 1.2298928499221802, "learning_rate": 4.9095000000000003e-05, "loss": 0.5312, "step": 9825 }, { "epoch": 0.550229588979729, "grad_norm": 1.0673483610153198, "learning_rate": 4.91e-05, "loss": 0.3275, "step": 9826 }, { "epoch": 0.550285586291858, "grad_norm": 1.1780321598052979, "learning_rate": 4.9105e-05, "loss": 0.437, "step": 9827 }, { "epoch": 0.550341583603987, "grad_norm": 1.2712862491607666, "learning_rate": 4.911e-05, "loss": 0.3853, "step": 9828 }, { "epoch": 0.550397580916116, "grad_norm": 1.6301132440567017, "learning_rate": 4.9115e-05, "loss": 0.6661, "step": 9829 }, { "epoch": 0.550453578228245, "grad_norm": 1.2521926164627075, "learning_rate": 4.9120000000000004e-05, "loss": 0.3093, "step": 9830 }, { "epoch": 0.550509575540374, "grad_norm": 1.507238507270813, "learning_rate": 4.9125e-05, "loss": 0.4127, "step": 9831 }, { "epoch": 0.5505655728525031, "grad_norm": 1.2705938816070557, "learning_rate": 4.9130000000000006e-05, "loss": 0.5905, "step": 9832 }, { "epoch": 0.5506215701646321, "grad_norm": 1.9501150846481323, "learning_rate": 4.9135e-05, "loss": 0.5766, "step": 9833 }, { "epoch": 0.5506775674767611, "grad_norm": 1.173202633857727, "learning_rate": 4.914e-05, "loss": 0.341, "step": 9834 }, { "epoch": 0.5507335647888901, "grad_norm": 1.2615176439285278, "learning_rate": 4.9145000000000005e-05, "loss": 0.4548, "step": 9835 }, { "epoch": 0.5507895621010191, "grad_norm": 1.2580984830856323, "learning_rate": 4.915e-05, "loss": 0.4681, "step": 9836 }, { "epoch": 0.5508455594131482, "grad_norm": 1.2937867641448975, "learning_rate": 4.9155e-05, "loss": 0.3072, "step": 9837 }, { "epoch": 0.5509015567252772, "grad_norm": 1.0775372982025146, "learning_rate": 4.9160000000000004e-05, "loss": 0.3315, "step": 9838 }, { "epoch": 0.5509575540374062, "grad_norm": 1.1573628187179565, "learning_rate": 4.9165e-05, "loss": 0.4074, "step": 9839 }, { "epoch": 0.5510135513495352, "grad_norm": 1.6001033782958984, "learning_rate": 4.9170000000000005e-05, "loss": 0.4437, "step": 9840 }, { "epoch": 0.5510695486616642, "grad_norm": 1.460305094718933, "learning_rate": 4.9175e-05, "loss": 0.5827, "step": 9841 }, { "epoch": 0.5511255459737933, "grad_norm": 1.392538070678711, "learning_rate": 4.918000000000001e-05, "loss": 0.4461, "step": 9842 }, { "epoch": 0.5511815432859223, "grad_norm": 1.1869699954986572, "learning_rate": 4.9185000000000004e-05, "loss": 0.3958, "step": 9843 }, { "epoch": 0.5512375405980513, "grad_norm": 1.3342792987823486, "learning_rate": 4.919e-05, "loss": 0.4169, "step": 9844 }, { "epoch": 0.5512935379101803, "grad_norm": 5.5547990798950195, "learning_rate": 4.9195e-05, "loss": 0.546, "step": 9845 }, { "epoch": 0.5513495352223093, "grad_norm": 1.1892287731170654, "learning_rate": 4.92e-05, "loss": 0.3339, "step": 9846 }, { "epoch": 0.5514055325344384, "grad_norm": 1.1834392547607422, "learning_rate": 4.9205e-05, "loss": 0.4276, "step": 9847 }, { "epoch": 0.5514615298465674, "grad_norm": 1.444663166999817, "learning_rate": 4.921e-05, "loss": 0.3811, "step": 9848 }, { "epoch": 0.5515175271586964, "grad_norm": 1.4277019500732422, "learning_rate": 4.9215e-05, "loss": 0.3793, "step": 9849 }, { "epoch": 0.5515735244708254, "grad_norm": 1.271371841430664, "learning_rate": 4.9220000000000006e-05, "loss": 0.5175, "step": 9850 }, { "epoch": 0.5516295217829544, "grad_norm": 1.8749375343322754, "learning_rate": 4.9225000000000004e-05, "loss": 0.5032, "step": 9851 }, { "epoch": 0.5516855190950835, "grad_norm": 1.6567984819412231, "learning_rate": 4.923e-05, "loss": 0.5108, "step": 9852 }, { "epoch": 0.5517415164072125, "grad_norm": 1.0445003509521484, "learning_rate": 4.9235000000000005e-05, "loss": 0.4001, "step": 9853 }, { "epoch": 0.5517975137193415, "grad_norm": 1.4645371437072754, "learning_rate": 4.924e-05, "loss": 0.4019, "step": 9854 }, { "epoch": 0.5518535110314705, "grad_norm": 1.3834123611450195, "learning_rate": 4.9245e-05, "loss": 0.5157, "step": 9855 }, { "epoch": 0.5519095083435995, "grad_norm": 1.0995264053344727, "learning_rate": 4.9250000000000004e-05, "loss": 0.3924, "step": 9856 }, { "epoch": 0.5519655056557285, "grad_norm": 1.1953632831573486, "learning_rate": 4.9255e-05, "loss": 0.5091, "step": 9857 }, { "epoch": 0.5520215029678576, "grad_norm": 1.246602177619934, "learning_rate": 4.926e-05, "loss": 0.5428, "step": 9858 }, { "epoch": 0.5520775002799866, "grad_norm": 1.1450551748275757, "learning_rate": 4.9265e-05, "loss": 0.3966, "step": 9859 }, { "epoch": 0.5521334975921156, "grad_norm": 1.177097201347351, "learning_rate": 4.927000000000001e-05, "loss": 0.3795, "step": 9860 }, { "epoch": 0.5521894949042446, "grad_norm": 1.2279947996139526, "learning_rate": 4.9275000000000005e-05, "loss": 0.4562, "step": 9861 }, { "epoch": 0.5522454922163736, "grad_norm": 1.361910104751587, "learning_rate": 4.928e-05, "loss": 0.5464, "step": 9862 }, { "epoch": 0.5523014895285027, "grad_norm": 1.1686800718307495, "learning_rate": 4.928500000000001e-05, "loss": 0.4628, "step": 9863 }, { "epoch": 0.5523574868406317, "grad_norm": 1.1429228782653809, "learning_rate": 4.9290000000000004e-05, "loss": 0.3713, "step": 9864 }, { "epoch": 0.5524134841527607, "grad_norm": 1.4145402908325195, "learning_rate": 4.9295e-05, "loss": 0.3984, "step": 9865 }, { "epoch": 0.5524694814648897, "grad_norm": 1.3317267894744873, "learning_rate": 4.93e-05, "loss": 0.5272, "step": 9866 }, { "epoch": 0.5525254787770187, "grad_norm": 1.6362569332122803, "learning_rate": 4.9305e-05, "loss": 0.5404, "step": 9867 }, { "epoch": 0.5525814760891478, "grad_norm": 1.2255516052246094, "learning_rate": 4.931e-05, "loss": 0.38, "step": 9868 }, { "epoch": 0.5526374734012768, "grad_norm": 1.2675533294677734, "learning_rate": 4.9315e-05, "loss": 0.4144, "step": 9869 }, { "epoch": 0.5526934707134058, "grad_norm": 48.612266540527344, "learning_rate": 4.932e-05, "loss": 0.3633, "step": 9870 }, { "epoch": 0.5527494680255348, "grad_norm": 1.4007257223129272, "learning_rate": 4.9325000000000006e-05, "loss": 0.3649, "step": 9871 }, { "epoch": 0.5528054653376638, "grad_norm": 1.2883875370025635, "learning_rate": 4.9330000000000004e-05, "loss": 0.455, "step": 9872 }, { "epoch": 0.5528614626497929, "grad_norm": 1.2115495204925537, "learning_rate": 4.9335e-05, "loss": 0.4835, "step": 9873 }, { "epoch": 0.5529174599619219, "grad_norm": 1.1614922285079956, "learning_rate": 4.9340000000000005e-05, "loss": 0.6316, "step": 9874 }, { "epoch": 0.5529734572740509, "grad_norm": 1.5327253341674805, "learning_rate": 4.9345e-05, "loss": 0.5755, "step": 9875 }, { "epoch": 0.5530294545861799, "grad_norm": 1.2586109638214111, "learning_rate": 4.935e-05, "loss": 0.5452, "step": 9876 }, { "epoch": 0.5530854518983089, "grad_norm": 1.5047032833099365, "learning_rate": 4.9355000000000004e-05, "loss": 0.5365, "step": 9877 }, { "epoch": 0.553141449210438, "grad_norm": 1.508359670639038, "learning_rate": 4.936e-05, "loss": 0.5892, "step": 9878 }, { "epoch": 0.553197446522567, "grad_norm": 1.305232286453247, "learning_rate": 4.9365e-05, "loss": 0.5255, "step": 9879 }, { "epoch": 0.5532534438346959, "grad_norm": 1.352198839187622, "learning_rate": 4.937e-05, "loss": 0.4257, "step": 9880 }, { "epoch": 0.5533094411468249, "grad_norm": 1.2626460790634155, "learning_rate": 4.937500000000001e-05, "loss": 0.4752, "step": 9881 }, { "epoch": 0.5533654384589539, "grad_norm": 1.0746902227401733, "learning_rate": 4.9380000000000005e-05, "loss": 0.3612, "step": 9882 }, { "epoch": 0.5534214357710829, "grad_norm": 1.0874592065811157, "learning_rate": 4.9385e-05, "loss": 0.4713, "step": 9883 }, { "epoch": 0.553477433083212, "grad_norm": 1.600517988204956, "learning_rate": 4.939e-05, "loss": 0.3904, "step": 9884 }, { "epoch": 0.553533430395341, "grad_norm": 1.0637706518173218, "learning_rate": 4.9395000000000004e-05, "loss": 0.3279, "step": 9885 }, { "epoch": 0.55358942770747, "grad_norm": 1.3719364404678345, "learning_rate": 4.94e-05, "loss": 0.5074, "step": 9886 }, { "epoch": 0.553645425019599, "grad_norm": 1.278235912322998, "learning_rate": 4.9405e-05, "loss": 0.4989, "step": 9887 }, { "epoch": 0.553701422331728, "grad_norm": 1.3625400066375732, "learning_rate": 4.941e-05, "loss": 0.4649, "step": 9888 }, { "epoch": 0.553757419643857, "grad_norm": 1.0363482236862183, "learning_rate": 4.9415e-05, "loss": 0.416, "step": 9889 }, { "epoch": 0.5538134169559861, "grad_norm": 1.31887948513031, "learning_rate": 4.942e-05, "loss": 0.4591, "step": 9890 }, { "epoch": 0.5538694142681151, "grad_norm": 1.1346615552902222, "learning_rate": 4.9425e-05, "loss": 0.3147, "step": 9891 }, { "epoch": 0.5539254115802441, "grad_norm": 1.951660394668579, "learning_rate": 4.9430000000000006e-05, "loss": 0.4978, "step": 9892 }, { "epoch": 0.5539814088923731, "grad_norm": 1.2742087841033936, "learning_rate": 4.9435000000000004e-05, "loss": 0.4216, "step": 9893 }, { "epoch": 0.5540374062045021, "grad_norm": 1.3981987237930298, "learning_rate": 4.944e-05, "loss": 0.3698, "step": 9894 }, { "epoch": 0.5540934035166312, "grad_norm": 1.213977336883545, "learning_rate": 4.9445000000000005e-05, "loss": 0.4999, "step": 9895 }, { "epoch": 0.5541494008287602, "grad_norm": 1.315979242324829, "learning_rate": 4.945e-05, "loss": 0.5324, "step": 9896 }, { "epoch": 0.5542053981408892, "grad_norm": 1.1904572248458862, "learning_rate": 4.9455e-05, "loss": 0.3628, "step": 9897 }, { "epoch": 0.5542613954530182, "grad_norm": 1.1681935787200928, "learning_rate": 4.946e-05, "loss": 0.3761, "step": 9898 }, { "epoch": 0.5543173927651472, "grad_norm": 1.256194829940796, "learning_rate": 4.9465e-05, "loss": 0.4615, "step": 9899 }, { "epoch": 0.5543733900772763, "grad_norm": 1.124131202697754, "learning_rate": 4.947e-05, "loss": 0.5317, "step": 9900 }, { "epoch": 0.5544293873894053, "grad_norm": 1.7268904447555542, "learning_rate": 4.9475e-05, "loss": 0.4717, "step": 9901 }, { "epoch": 0.5544853847015343, "grad_norm": 1.1848986148834229, "learning_rate": 4.948000000000001e-05, "loss": 0.4333, "step": 9902 }, { "epoch": 0.5545413820136633, "grad_norm": 6.534971714019775, "learning_rate": 4.9485000000000005e-05, "loss": 0.3291, "step": 9903 }, { "epoch": 0.5545973793257923, "grad_norm": 1.3286279439926147, "learning_rate": 4.949e-05, "loss": 0.4083, "step": 9904 }, { "epoch": 0.5546533766379214, "grad_norm": 1.2005430459976196, "learning_rate": 4.9495e-05, "loss": 0.4521, "step": 9905 }, { "epoch": 0.5547093739500504, "grad_norm": 1.1113883256912231, "learning_rate": 4.9500000000000004e-05, "loss": 0.4342, "step": 9906 }, { "epoch": 0.5547653712621794, "grad_norm": 1.3328896760940552, "learning_rate": 4.9505e-05, "loss": 0.4356, "step": 9907 }, { "epoch": 0.5548213685743084, "grad_norm": 1.3005143404006958, "learning_rate": 4.951e-05, "loss": 0.5701, "step": 9908 }, { "epoch": 0.5548773658864374, "grad_norm": 1.0802335739135742, "learning_rate": 4.9515e-05, "loss": 0.4, "step": 9909 }, { "epoch": 0.5549333631985665, "grad_norm": 1.181265950202942, "learning_rate": 4.952e-05, "loss": 0.4187, "step": 9910 }, { "epoch": 0.5549893605106955, "grad_norm": 1.1380640268325806, "learning_rate": 4.9525000000000004e-05, "loss": 0.345, "step": 9911 }, { "epoch": 0.5550453578228245, "grad_norm": 1.468192458152771, "learning_rate": 4.953e-05, "loss": 0.5454, "step": 9912 }, { "epoch": 0.5551013551349535, "grad_norm": 1.2509827613830566, "learning_rate": 4.9535000000000006e-05, "loss": 0.3683, "step": 9913 }, { "epoch": 0.5551573524470825, "grad_norm": 1.237339735031128, "learning_rate": 4.9540000000000003e-05, "loss": 0.4945, "step": 9914 }, { "epoch": 0.5552133497592115, "grad_norm": 1.458763837814331, "learning_rate": 4.9545e-05, "loss": 0.4614, "step": 9915 }, { "epoch": 0.5552693470713406, "grad_norm": 1.4923385381698608, "learning_rate": 4.9550000000000005e-05, "loss": 0.4802, "step": 9916 }, { "epoch": 0.5553253443834696, "grad_norm": 1.1870142221450806, "learning_rate": 4.9555e-05, "loss": 0.3874, "step": 9917 }, { "epoch": 0.5553813416955986, "grad_norm": 1.1183218955993652, "learning_rate": 4.956e-05, "loss": 0.4729, "step": 9918 }, { "epoch": 0.5554373390077276, "grad_norm": 1.5191528797149658, "learning_rate": 4.9565e-05, "loss": 0.4362, "step": 9919 }, { "epoch": 0.5554933363198566, "grad_norm": 1.2282307147979736, "learning_rate": 4.957e-05, "loss": 0.4814, "step": 9920 }, { "epoch": 0.5555493336319857, "grad_norm": 1.3444799184799194, "learning_rate": 4.9575000000000006e-05, "loss": 0.4141, "step": 9921 }, { "epoch": 0.5556053309441147, "grad_norm": 1.3083947896957397, "learning_rate": 4.958e-05, "loss": 0.3594, "step": 9922 }, { "epoch": 0.5556613282562437, "grad_norm": 1.306146502494812, "learning_rate": 4.9585e-05, "loss": 0.3494, "step": 9923 }, { "epoch": 0.5557173255683727, "grad_norm": 1.2219531536102295, "learning_rate": 4.9590000000000005e-05, "loss": 0.3582, "step": 9924 }, { "epoch": 0.5557733228805017, "grad_norm": 1.2251049280166626, "learning_rate": 4.9595e-05, "loss": 0.3454, "step": 9925 }, { "epoch": 0.5558293201926308, "grad_norm": 1.1786918640136719, "learning_rate": 4.96e-05, "loss": 0.3274, "step": 9926 }, { "epoch": 0.5558853175047598, "grad_norm": 1.2297167778015137, "learning_rate": 4.9605000000000004e-05, "loss": 0.415, "step": 9927 }, { "epoch": 0.5559413148168888, "grad_norm": 1.1262352466583252, "learning_rate": 4.961e-05, "loss": 0.3766, "step": 9928 }, { "epoch": 0.5559973121290178, "grad_norm": 1.4815571308135986, "learning_rate": 4.9615e-05, "loss": 0.4644, "step": 9929 }, { "epoch": 0.5560533094411468, "grad_norm": 1.1427712440490723, "learning_rate": 4.962e-05, "loss": 0.3341, "step": 9930 }, { "epoch": 0.5561093067532759, "grad_norm": 1.142407774925232, "learning_rate": 4.962500000000001e-05, "loss": 0.3281, "step": 9931 }, { "epoch": 0.5561653040654049, "grad_norm": 1.2612287998199463, "learning_rate": 4.9630000000000004e-05, "loss": 0.5076, "step": 9932 }, { "epoch": 0.5562213013775339, "grad_norm": 1.4052684307098389, "learning_rate": 4.9635e-05, "loss": 0.3742, "step": 9933 }, { "epoch": 0.5562772986896629, "grad_norm": 1.068899393081665, "learning_rate": 4.9640000000000006e-05, "loss": 0.3504, "step": 9934 }, { "epoch": 0.5563332960017919, "grad_norm": 1.4374948740005493, "learning_rate": 4.9645e-05, "loss": 0.7201, "step": 9935 }, { "epoch": 0.556389293313921, "grad_norm": 1.3756743669509888, "learning_rate": 4.965e-05, "loss": 0.4793, "step": 9936 }, { "epoch": 0.55644529062605, "grad_norm": 1.3166941404342651, "learning_rate": 4.9655000000000005e-05, "loss": 0.3156, "step": 9937 }, { "epoch": 0.556501287938179, "grad_norm": 1.1218551397323608, "learning_rate": 4.966e-05, "loss": 0.4509, "step": 9938 }, { "epoch": 0.556557285250308, "grad_norm": 1.1736061573028564, "learning_rate": 4.9665e-05, "loss": 0.3869, "step": 9939 }, { "epoch": 0.556613282562437, "grad_norm": 1.6972935199737549, "learning_rate": 4.967e-05, "loss": 0.5223, "step": 9940 }, { "epoch": 0.556669279874566, "grad_norm": 1.475999116897583, "learning_rate": 4.967500000000001e-05, "loss": 0.5346, "step": 9941 }, { "epoch": 0.5567252771866951, "grad_norm": 1.411203145980835, "learning_rate": 4.9680000000000005e-05, "loss": 0.5327, "step": 9942 }, { "epoch": 0.5567812744988241, "grad_norm": 1.2205110788345337, "learning_rate": 4.9685e-05, "loss": 0.4637, "step": 9943 }, { "epoch": 0.5568372718109531, "grad_norm": 1.2715861797332764, "learning_rate": 4.969e-05, "loss": 0.5965, "step": 9944 }, { "epoch": 0.5568932691230821, "grad_norm": 1.3103289604187012, "learning_rate": 4.9695000000000004e-05, "loss": 0.4397, "step": 9945 }, { "epoch": 0.5569492664352111, "grad_norm": 1.4818700551986694, "learning_rate": 4.97e-05, "loss": 0.562, "step": 9946 }, { "epoch": 0.5570052637473402, "grad_norm": 1.9872112274169922, "learning_rate": 4.9705e-05, "loss": 0.4474, "step": 9947 }, { "epoch": 0.5570612610594692, "grad_norm": 1.3563531637191772, "learning_rate": 4.9710000000000003e-05, "loss": 0.3684, "step": 9948 }, { "epoch": 0.5571172583715982, "grad_norm": 1.163193702697754, "learning_rate": 4.9715e-05, "loss": 0.3716, "step": 9949 }, { "epoch": 0.5571732556837272, "grad_norm": 0.9077993631362915, "learning_rate": 4.972e-05, "loss": 0.3329, "step": 9950 }, { "epoch": 0.5572292529958562, "grad_norm": 1.0345548391342163, "learning_rate": 4.9725e-05, "loss": 0.3246, "step": 9951 }, { "epoch": 0.5572852503079853, "grad_norm": 1.3332639932632446, "learning_rate": 4.973000000000001e-05, "loss": 0.3712, "step": 9952 }, { "epoch": 0.5573412476201143, "grad_norm": 1.148186445236206, "learning_rate": 4.9735000000000004e-05, "loss": 0.3677, "step": 9953 }, { "epoch": 0.5573972449322433, "grad_norm": 1.2874257564544678, "learning_rate": 4.974e-05, "loss": 0.4196, "step": 9954 }, { "epoch": 0.5574532422443723, "grad_norm": 1.3812764883041382, "learning_rate": 4.9745000000000006e-05, "loss": 0.505, "step": 9955 }, { "epoch": 0.5575092395565013, "grad_norm": 1.211657166481018, "learning_rate": 4.975e-05, "loss": 0.3548, "step": 9956 }, { "epoch": 0.5575652368686304, "grad_norm": 2.642127513885498, "learning_rate": 4.9755e-05, "loss": 0.4774, "step": 9957 }, { "epoch": 0.5576212341807594, "grad_norm": 1.1937086582183838, "learning_rate": 4.976e-05, "loss": 0.3494, "step": 9958 }, { "epoch": 0.5576772314928884, "grad_norm": 1.290429711341858, "learning_rate": 4.9765e-05, "loss": 0.4232, "step": 9959 }, { "epoch": 0.5577332288050174, "grad_norm": 1.9520561695098877, "learning_rate": 4.977e-05, "loss": 0.5164, "step": 9960 }, { "epoch": 0.5577892261171464, "grad_norm": 1.2326164245605469, "learning_rate": 4.9775000000000004e-05, "loss": 0.4576, "step": 9961 }, { "epoch": 0.5578452234292754, "grad_norm": 1.1097726821899414, "learning_rate": 4.978e-05, "loss": 0.4239, "step": 9962 }, { "epoch": 0.5579012207414044, "grad_norm": 1.5771677494049072, "learning_rate": 4.9785000000000005e-05, "loss": 0.3854, "step": 9963 }, { "epoch": 0.5579572180535334, "grad_norm": 1.1490870714187622, "learning_rate": 4.979e-05, "loss": 0.3786, "step": 9964 }, { "epoch": 0.5580132153656624, "grad_norm": 1.0461891889572144, "learning_rate": 4.9795e-05, "loss": 0.3223, "step": 9965 }, { "epoch": 0.5580692126777914, "grad_norm": 1.3650190830230713, "learning_rate": 4.9800000000000004e-05, "loss": 0.492, "step": 9966 }, { "epoch": 0.5581252099899204, "grad_norm": 1.690585970878601, "learning_rate": 4.9805e-05, "loss": 0.5052, "step": 9967 }, { "epoch": 0.5581812073020495, "grad_norm": 1.3042556047439575, "learning_rate": 4.981e-05, "loss": 0.4262, "step": 9968 }, { "epoch": 0.5582372046141785, "grad_norm": 1.3416763544082642, "learning_rate": 4.9815e-05, "loss": 0.4576, "step": 9969 }, { "epoch": 0.5582932019263075, "grad_norm": 1.3321274518966675, "learning_rate": 4.982e-05, "loss": 0.4248, "step": 9970 }, { "epoch": 0.5583491992384365, "grad_norm": 1.4489244222640991, "learning_rate": 4.9825000000000005e-05, "loss": 0.5264, "step": 9971 }, { "epoch": 0.5584051965505655, "grad_norm": 1.2635912895202637, "learning_rate": 4.983e-05, "loss": 0.4988, "step": 9972 }, { "epoch": 0.5584611938626945, "grad_norm": 1.3435423374176025, "learning_rate": 4.9835000000000007e-05, "loss": 0.5256, "step": 9973 }, { "epoch": 0.5585171911748236, "grad_norm": 1.4019769430160522, "learning_rate": 4.9840000000000004e-05, "loss": 0.4462, "step": 9974 }, { "epoch": 0.5585731884869526, "grad_norm": 1.345577597618103, "learning_rate": 4.9845e-05, "loss": 0.3777, "step": 9975 }, { "epoch": 0.5586291857990816, "grad_norm": 1.290582299232483, "learning_rate": 4.9850000000000006e-05, "loss": 0.3645, "step": 9976 }, { "epoch": 0.5586851831112106, "grad_norm": 1.3481178283691406, "learning_rate": 4.9855e-05, "loss": 0.5196, "step": 9977 }, { "epoch": 0.5587411804233396, "grad_norm": 1.1358520984649658, "learning_rate": 4.986e-05, "loss": 0.383, "step": 9978 }, { "epoch": 0.5587971777354687, "grad_norm": 1.4544248580932617, "learning_rate": 4.9865e-05, "loss": 0.6048, "step": 9979 }, { "epoch": 0.5588531750475977, "grad_norm": 1.416058897972107, "learning_rate": 4.987e-05, "loss": 0.4876, "step": 9980 }, { "epoch": 0.5589091723597267, "grad_norm": 1.3810850381851196, "learning_rate": 4.9875000000000006e-05, "loss": 0.4473, "step": 9981 }, { "epoch": 0.5589651696718557, "grad_norm": 1.3577407598495483, "learning_rate": 4.9880000000000004e-05, "loss": 0.5043, "step": 9982 }, { "epoch": 0.5590211669839847, "grad_norm": 1.1616915464401245, "learning_rate": 4.9885e-05, "loss": 0.4671, "step": 9983 }, { "epoch": 0.5590771642961138, "grad_norm": 1.6595820188522339, "learning_rate": 4.9890000000000005e-05, "loss": 0.6291, "step": 9984 }, { "epoch": 0.5591331616082428, "grad_norm": 1.1977049112319946, "learning_rate": 4.9895e-05, "loss": 0.4237, "step": 9985 }, { "epoch": 0.5591891589203718, "grad_norm": 1.2889515161514282, "learning_rate": 4.99e-05, "loss": 0.4081, "step": 9986 }, { "epoch": 0.5592451562325008, "grad_norm": 1.3410112857818604, "learning_rate": 4.9905000000000004e-05, "loss": 0.414, "step": 9987 }, { "epoch": 0.5593011535446298, "grad_norm": 1.8319560289382935, "learning_rate": 4.991e-05, "loss": 0.5875, "step": 9988 }, { "epoch": 0.5593571508567589, "grad_norm": 1.1550171375274658, "learning_rate": 4.9915e-05, "loss": 0.3828, "step": 9989 }, { "epoch": 0.5594131481688879, "grad_norm": 1.2857604026794434, "learning_rate": 4.992e-05, "loss": 0.4118, "step": 9990 }, { "epoch": 0.5594691454810169, "grad_norm": 1.6735023260116577, "learning_rate": 4.992500000000001e-05, "loss": 0.5153, "step": 9991 }, { "epoch": 0.5595251427931459, "grad_norm": 1.284501075744629, "learning_rate": 4.9930000000000005e-05, "loss": 0.436, "step": 9992 }, { "epoch": 0.5595811401052749, "grad_norm": 1.1906112432479858, "learning_rate": 4.9935e-05, "loss": 0.4321, "step": 9993 }, { "epoch": 0.559637137417404, "grad_norm": 1.2857753038406372, "learning_rate": 4.9940000000000006e-05, "loss": 0.3267, "step": 9994 }, { "epoch": 0.559693134729533, "grad_norm": 1.9837729930877686, "learning_rate": 4.9945000000000004e-05, "loss": 0.4377, "step": 9995 }, { "epoch": 0.559749132041662, "grad_norm": 1.4722764492034912, "learning_rate": 4.995e-05, "loss": 0.5115, "step": 9996 }, { "epoch": 0.559805129353791, "grad_norm": 1.1309281587600708, "learning_rate": 4.9955e-05, "loss": 0.4, "step": 9997 }, { "epoch": 0.55986112666592, "grad_norm": 1.302437663078308, "learning_rate": 4.996e-05, "loss": 0.4132, "step": 9998 }, { "epoch": 0.559917123978049, "grad_norm": 1.5880764722824097, "learning_rate": 4.9965e-05, "loss": 0.6104, "step": 9999 }, { "epoch": 0.5599731212901781, "grad_norm": 1.1306363344192505, "learning_rate": 4.997e-05, "loss": 0.3663, "step": 10000 }, { "epoch": 0.5600291186023071, "grad_norm": 1.2174371480941772, "learning_rate": 4.9975e-05, "loss": 0.337, "step": 10001 }, { "epoch": 0.5600851159144361, "grad_norm": 1.394925832748413, "learning_rate": 4.9980000000000006e-05, "loss": 0.6143, "step": 10002 }, { "epoch": 0.5601411132265651, "grad_norm": 1.1916224956512451, "learning_rate": 4.9985e-05, "loss": 0.3698, "step": 10003 }, { "epoch": 0.5601971105386941, "grad_norm": 1.4514687061309814, "learning_rate": 4.999e-05, "loss": 0.4462, "step": 10004 }, { "epoch": 0.5602531078508232, "grad_norm": 1.3292044401168823, "learning_rate": 4.9995000000000005e-05, "loss": 0.4409, "step": 10005 }, { "epoch": 0.5603091051629522, "grad_norm": 1.328014850616455, "learning_rate": 5e-05, "loss": 0.4157, "step": 10006 }, { "epoch": 0.5603651024750812, "grad_norm": 1.4254311323165894, "learning_rate": 5.0005e-05, "loss": 0.5557, "step": 10007 }, { "epoch": 0.5604210997872102, "grad_norm": 1.393754482269287, "learning_rate": 5.0010000000000004e-05, "loss": 0.5511, "step": 10008 }, { "epoch": 0.5604770970993392, "grad_norm": 1.2166305780410767, "learning_rate": 5.0015e-05, "loss": 0.4034, "step": 10009 }, { "epoch": 0.5605330944114683, "grad_norm": 1.2587909698486328, "learning_rate": 5.002e-05, "loss": 0.4431, "step": 10010 }, { "epoch": 0.5605890917235973, "grad_norm": 1.1939144134521484, "learning_rate": 5.0025e-05, "loss": 0.4778, "step": 10011 }, { "epoch": 0.5606450890357263, "grad_norm": 1.3817065954208374, "learning_rate": 5.003e-05, "loss": 0.487, "step": 10012 }, { "epoch": 0.5607010863478553, "grad_norm": 1.108214020729065, "learning_rate": 5.0035e-05, "loss": 0.456, "step": 10013 }, { "epoch": 0.5607570836599843, "grad_norm": 1.1789519786834717, "learning_rate": 5.0039999999999995e-05, "loss": 0.4451, "step": 10014 }, { "epoch": 0.5608130809721134, "grad_norm": 1.1562879085540771, "learning_rate": 5.0045e-05, "loss": 0.3838, "step": 10015 }, { "epoch": 0.5608690782842424, "grad_norm": 1.337884783744812, "learning_rate": 5.005e-05, "loss": 0.6035, "step": 10016 }, { "epoch": 0.5609250755963714, "grad_norm": 1.4621641635894775, "learning_rate": 5.005500000000001e-05, "loss": 0.4793, "step": 10017 }, { "epoch": 0.5609810729085004, "grad_norm": 1.657607913017273, "learning_rate": 5.0060000000000005e-05, "loss": 0.3936, "step": 10018 }, { "epoch": 0.5610370702206294, "grad_norm": 1.2850358486175537, "learning_rate": 5.006500000000001e-05, "loss": 0.3666, "step": 10019 }, { "epoch": 0.5610930675327584, "grad_norm": 1.2727241516113281, "learning_rate": 5.007000000000001e-05, "loss": 0.5122, "step": 10020 }, { "epoch": 0.5611490648448875, "grad_norm": 1.912054181098938, "learning_rate": 5.0075000000000004e-05, "loss": 0.3845, "step": 10021 }, { "epoch": 0.5612050621570165, "grad_norm": 1.3623539209365845, "learning_rate": 5.008e-05, "loss": 0.4237, "step": 10022 }, { "epoch": 0.5612610594691455, "grad_norm": 1.307369589805603, "learning_rate": 5.0085000000000006e-05, "loss": 0.523, "step": 10023 }, { "epoch": 0.5613170567812745, "grad_norm": 1.2519283294677734, "learning_rate": 5.009e-05, "loss": 0.5827, "step": 10024 }, { "epoch": 0.5613730540934035, "grad_norm": 1.2211045026779175, "learning_rate": 5.0095e-05, "loss": 0.5273, "step": 10025 }, { "epoch": 0.5614290514055326, "grad_norm": 1.12700355052948, "learning_rate": 5.0100000000000005e-05, "loss": 0.3769, "step": 10026 }, { "epoch": 0.5614850487176616, "grad_norm": 1.1931533813476562, "learning_rate": 5.0105e-05, "loss": 0.4218, "step": 10027 }, { "epoch": 0.5615410460297906, "grad_norm": 1.2497025728225708, "learning_rate": 5.011e-05, "loss": 0.3069, "step": 10028 }, { "epoch": 0.5615970433419196, "grad_norm": 1.37238609790802, "learning_rate": 5.0115000000000004e-05, "loss": 0.4876, "step": 10029 }, { "epoch": 0.5616530406540486, "grad_norm": 1.170186996459961, "learning_rate": 5.012e-05, "loss": 0.3934, "step": 10030 }, { "epoch": 0.5617090379661777, "grad_norm": 1.1139869689941406, "learning_rate": 5.0125e-05, "loss": 0.3468, "step": 10031 }, { "epoch": 0.5617650352783067, "grad_norm": 1.4631116390228271, "learning_rate": 5.0129999999999996e-05, "loss": 0.6199, "step": 10032 }, { "epoch": 0.5618210325904357, "grad_norm": 1.2753750085830688, "learning_rate": 5.0135e-05, "loss": 0.4501, "step": 10033 }, { "epoch": 0.5618770299025647, "grad_norm": 1.632766604423523, "learning_rate": 5.014e-05, "loss": 0.4904, "step": 10034 }, { "epoch": 0.5619330272146937, "grad_norm": 1.181941270828247, "learning_rate": 5.0144999999999995e-05, "loss": 0.4378, "step": 10035 }, { "epoch": 0.5619890245268228, "grad_norm": 1.1032267808914185, "learning_rate": 5.015e-05, "loss": 0.3056, "step": 10036 }, { "epoch": 0.5620450218389518, "grad_norm": 1.1124083995819092, "learning_rate": 5.015500000000001e-05, "loss": 0.4144, "step": 10037 }, { "epoch": 0.5621010191510808, "grad_norm": 1.645929217338562, "learning_rate": 5.016000000000001e-05, "loss": 0.4506, "step": 10038 }, { "epoch": 0.5621570164632098, "grad_norm": 1.1254245042800903, "learning_rate": 5.0165000000000005e-05, "loss": 0.4107, "step": 10039 }, { "epoch": 0.5622130137753388, "grad_norm": 1.5527163743972778, "learning_rate": 5.017000000000001e-05, "loss": 0.5131, "step": 10040 }, { "epoch": 0.5622690110874679, "grad_norm": 1.2213736772537231, "learning_rate": 5.017500000000001e-05, "loss": 0.3638, "step": 10041 }, { "epoch": 0.5623250083995969, "grad_norm": 1.2467491626739502, "learning_rate": 5.0180000000000004e-05, "loss": 0.4783, "step": 10042 }, { "epoch": 0.5623810057117259, "grad_norm": 1.4593853950500488, "learning_rate": 5.0185e-05, "loss": 0.525, "step": 10043 }, { "epoch": 0.5624370030238549, "grad_norm": 1.1642142534255981, "learning_rate": 5.0190000000000006e-05, "loss": 0.4755, "step": 10044 }, { "epoch": 0.5624930003359839, "grad_norm": 1.3840899467468262, "learning_rate": 5.0195e-05, "loss": 0.4413, "step": 10045 }, { "epoch": 0.5625489976481128, "grad_norm": 1.1163687705993652, "learning_rate": 5.02e-05, "loss": 0.3448, "step": 10046 }, { "epoch": 0.5626049949602419, "grad_norm": 1.3278331756591797, "learning_rate": 5.0205000000000005e-05, "loss": 0.397, "step": 10047 }, { "epoch": 0.5626609922723709, "grad_norm": 1.5409855842590332, "learning_rate": 5.021e-05, "loss": 0.4767, "step": 10048 }, { "epoch": 0.5627169895844999, "grad_norm": 1.2589179277420044, "learning_rate": 5.0215e-05, "loss": 0.486, "step": 10049 }, { "epoch": 0.5627729868966289, "grad_norm": 1.3704774379730225, "learning_rate": 5.0220000000000004e-05, "loss": 0.514, "step": 10050 }, { "epoch": 0.5628289842087579, "grad_norm": 1.185131311416626, "learning_rate": 5.0225e-05, "loss": 0.5717, "step": 10051 }, { "epoch": 0.562884981520887, "grad_norm": 1.5050828456878662, "learning_rate": 5.023e-05, "loss": 0.4966, "step": 10052 }, { "epoch": 0.562940978833016, "grad_norm": 1.078010082244873, "learning_rate": 5.0234999999999996e-05, "loss": 0.3227, "step": 10053 }, { "epoch": 0.562996976145145, "grad_norm": 1.184657335281372, "learning_rate": 5.024e-05, "loss": 0.3177, "step": 10054 }, { "epoch": 0.563052973457274, "grad_norm": 1.322494626045227, "learning_rate": 5.0245e-05, "loss": 0.4325, "step": 10055 }, { "epoch": 0.563108970769403, "grad_norm": 1.3813997507095337, "learning_rate": 5.0249999999999995e-05, "loss": 0.586, "step": 10056 }, { "epoch": 0.563164968081532, "grad_norm": 1.1439642906188965, "learning_rate": 5.0255000000000006e-05, "loss": 0.4306, "step": 10057 }, { "epoch": 0.5632209653936611, "grad_norm": 1.2778465747833252, "learning_rate": 5.026000000000001e-05, "loss": 0.5442, "step": 10058 }, { "epoch": 0.5632769627057901, "grad_norm": 1.2710590362548828, "learning_rate": 5.026500000000001e-05, "loss": 0.5092, "step": 10059 }, { "epoch": 0.5633329600179191, "grad_norm": 1.255285382270813, "learning_rate": 5.0270000000000005e-05, "loss": 0.4325, "step": 10060 }, { "epoch": 0.5633889573300481, "grad_norm": 1.280313491821289, "learning_rate": 5.0275e-05, "loss": 0.4029, "step": 10061 }, { "epoch": 0.5634449546421771, "grad_norm": 1.267838478088379, "learning_rate": 5.0280000000000006e-05, "loss": 0.4354, "step": 10062 }, { "epoch": 0.5635009519543062, "grad_norm": 1.5349767208099365, "learning_rate": 5.0285000000000004e-05, "loss": 0.3912, "step": 10063 }, { "epoch": 0.5635569492664352, "grad_norm": 1.314571738243103, "learning_rate": 5.029e-05, "loss": 0.4128, "step": 10064 }, { "epoch": 0.5636129465785642, "grad_norm": 1.4526143074035645, "learning_rate": 5.0295000000000006e-05, "loss": 0.51, "step": 10065 }, { "epoch": 0.5636689438906932, "grad_norm": 1.4880921840667725, "learning_rate": 5.03e-05, "loss": 0.5507, "step": 10066 }, { "epoch": 0.5637249412028222, "grad_norm": 1.6427276134490967, "learning_rate": 5.0305e-05, "loss": 0.5428, "step": 10067 }, { "epoch": 0.5637809385149513, "grad_norm": 1.3415950536727905, "learning_rate": 5.0310000000000005e-05, "loss": 0.4392, "step": 10068 }, { "epoch": 0.5638369358270803, "grad_norm": 1.6944310665130615, "learning_rate": 5.0315e-05, "loss": 0.2834, "step": 10069 }, { "epoch": 0.5638929331392093, "grad_norm": 1.3977125883102417, "learning_rate": 5.032e-05, "loss": 0.4614, "step": 10070 }, { "epoch": 0.5639489304513383, "grad_norm": 1.3059650659561157, "learning_rate": 5.0325e-05, "loss": 0.4491, "step": 10071 }, { "epoch": 0.5640049277634673, "grad_norm": 1.402970790863037, "learning_rate": 5.033e-05, "loss": 0.622, "step": 10072 }, { "epoch": 0.5640609250755964, "grad_norm": 1.445788860321045, "learning_rate": 5.0335e-05, "loss": 0.558, "step": 10073 }, { "epoch": 0.5641169223877254, "grad_norm": 1.7032382488250732, "learning_rate": 5.0339999999999996e-05, "loss": 0.443, "step": 10074 }, { "epoch": 0.5641729196998544, "grad_norm": 1.2501990795135498, "learning_rate": 5.0345e-05, "loss": 0.4593, "step": 10075 }, { "epoch": 0.5642289170119834, "grad_norm": 1.5817010402679443, "learning_rate": 5.035e-05, "loss": 0.4985, "step": 10076 }, { "epoch": 0.5642849143241124, "grad_norm": 1.1269781589508057, "learning_rate": 5.035500000000001e-05, "loss": 0.4437, "step": 10077 }, { "epoch": 0.5643409116362414, "grad_norm": 1.16768479347229, "learning_rate": 5.0360000000000006e-05, "loss": 0.4073, "step": 10078 }, { "epoch": 0.5643969089483705, "grad_norm": 1.4160252809524536, "learning_rate": 5.036500000000001e-05, "loss": 0.4502, "step": 10079 }, { "epoch": 0.5644529062604995, "grad_norm": 1.240113615989685, "learning_rate": 5.037000000000001e-05, "loss": 0.4357, "step": 10080 }, { "epoch": 0.5645089035726285, "grad_norm": 1.3157459497451782, "learning_rate": 5.0375000000000005e-05, "loss": 0.4175, "step": 10081 }, { "epoch": 0.5645649008847575, "grad_norm": 1.5507278442382812, "learning_rate": 5.038e-05, "loss": 0.3846, "step": 10082 }, { "epoch": 0.5646208981968865, "grad_norm": 1.2351633310317993, "learning_rate": 5.0385000000000006e-05, "loss": 0.4688, "step": 10083 }, { "epoch": 0.5646768955090156, "grad_norm": 1.5217021703720093, "learning_rate": 5.0390000000000004e-05, "loss": 0.424, "step": 10084 }, { "epoch": 0.5647328928211446, "grad_norm": 1.1799594163894653, "learning_rate": 5.0395e-05, "loss": 0.3173, "step": 10085 }, { "epoch": 0.5647888901332736, "grad_norm": 1.218217372894287, "learning_rate": 5.0400000000000005e-05, "loss": 0.5611, "step": 10086 }, { "epoch": 0.5648448874454026, "grad_norm": 1.0080746412277222, "learning_rate": 5.0405e-05, "loss": 0.3904, "step": 10087 }, { "epoch": 0.5649008847575316, "grad_norm": 1.2529783248901367, "learning_rate": 5.041e-05, "loss": 0.3862, "step": 10088 }, { "epoch": 0.5649568820696607, "grad_norm": 1.1760334968566895, "learning_rate": 5.0415000000000004e-05, "loss": 0.4188, "step": 10089 }, { "epoch": 0.5650128793817897, "grad_norm": 1.174148440361023, "learning_rate": 5.042e-05, "loss": 0.456, "step": 10090 }, { "epoch": 0.5650688766939187, "grad_norm": 2.8847005367279053, "learning_rate": 5.0425e-05, "loss": 0.4242, "step": 10091 }, { "epoch": 0.5651248740060477, "grad_norm": 1.396600365638733, "learning_rate": 5.0429999999999997e-05, "loss": 0.3525, "step": 10092 }, { "epoch": 0.5651808713181767, "grad_norm": 2.1680355072021484, "learning_rate": 5.0435e-05, "loss": 0.4377, "step": 10093 }, { "epoch": 0.5652368686303058, "grad_norm": 1.2360775470733643, "learning_rate": 5.044e-05, "loss": 0.4383, "step": 10094 }, { "epoch": 0.5652928659424348, "grad_norm": 1.3207876682281494, "learning_rate": 5.0444999999999996e-05, "loss": 0.4131, "step": 10095 }, { "epoch": 0.5653488632545638, "grad_norm": 1.5699901580810547, "learning_rate": 5.045e-05, "loss": 0.3841, "step": 10096 }, { "epoch": 0.5654048605666928, "grad_norm": 1.070851445198059, "learning_rate": 5.045500000000001e-05, "loss": 0.2873, "step": 10097 }, { "epoch": 0.5654608578788218, "grad_norm": 1.6041005849838257, "learning_rate": 5.046000000000001e-05, "loss": 0.457, "step": 10098 }, { "epoch": 0.5655168551909509, "grad_norm": 1.3358922004699707, "learning_rate": 5.0465000000000006e-05, "loss": 0.395, "step": 10099 }, { "epoch": 0.5655728525030799, "grad_norm": 1.224945306777954, "learning_rate": 5.047e-05, "loss": 0.5468, "step": 10100 }, { "epoch": 0.5656288498152089, "grad_norm": 1.5300770998001099, "learning_rate": 5.047500000000001e-05, "loss": 0.4921, "step": 10101 }, { "epoch": 0.5656848471273379, "grad_norm": 1.1021238565444946, "learning_rate": 5.0480000000000005e-05, "loss": 0.456, "step": 10102 }, { "epoch": 0.5657408444394669, "grad_norm": 1.3550925254821777, "learning_rate": 5.0485e-05, "loss": 0.5868, "step": 10103 }, { "epoch": 0.565796841751596, "grad_norm": 1.0332305431365967, "learning_rate": 5.0490000000000006e-05, "loss": 0.3685, "step": 10104 }, { "epoch": 0.565852839063725, "grad_norm": 1.2964366674423218, "learning_rate": 5.0495000000000004e-05, "loss": 0.621, "step": 10105 }, { "epoch": 0.565908836375854, "grad_norm": 1.293464183807373, "learning_rate": 5.05e-05, "loss": 0.4659, "step": 10106 }, { "epoch": 0.565964833687983, "grad_norm": 1.5895806550979614, "learning_rate": 5.0505000000000005e-05, "loss": 0.454, "step": 10107 }, { "epoch": 0.566020831000112, "grad_norm": 1.4051517248153687, "learning_rate": 5.051e-05, "loss": 0.3849, "step": 10108 }, { "epoch": 0.566076828312241, "grad_norm": 1.385178565979004, "learning_rate": 5.0515e-05, "loss": 0.4506, "step": 10109 }, { "epoch": 0.5661328256243701, "grad_norm": 1.2554985284805298, "learning_rate": 5.052e-05, "loss": 0.524, "step": 10110 }, { "epoch": 0.5661888229364991, "grad_norm": 1.3433587551116943, "learning_rate": 5.0525e-05, "loss": 0.4234, "step": 10111 }, { "epoch": 0.5662448202486281, "grad_norm": 1.5164456367492676, "learning_rate": 5.053e-05, "loss": 0.481, "step": 10112 }, { "epoch": 0.5663008175607571, "grad_norm": 1.239671230316162, "learning_rate": 5.0534999999999996e-05, "loss": 0.4495, "step": 10113 }, { "epoch": 0.5663568148728861, "grad_norm": 1.2597676515579224, "learning_rate": 5.054e-05, "loss": 0.4129, "step": 10114 }, { "epoch": 0.5664128121850152, "grad_norm": 1.2329849004745483, "learning_rate": 5.0545e-05, "loss": 0.4358, "step": 10115 }, { "epoch": 0.5664688094971442, "grad_norm": 1.415442943572998, "learning_rate": 5.0549999999999995e-05, "loss": 0.4814, "step": 10116 }, { "epoch": 0.5665248068092732, "grad_norm": 1.3459875583648682, "learning_rate": 5.0555000000000006e-05, "loss": 0.4023, "step": 10117 }, { "epoch": 0.5665808041214022, "grad_norm": 2.1763296127319336, "learning_rate": 5.056000000000001e-05, "loss": 0.447, "step": 10118 }, { "epoch": 0.5666368014335312, "grad_norm": 1.1652395725250244, "learning_rate": 5.056500000000001e-05, "loss": 0.385, "step": 10119 }, { "epoch": 0.5666927987456603, "grad_norm": 1.3457837104797363, "learning_rate": 5.0570000000000005e-05, "loss": 0.3948, "step": 10120 }, { "epoch": 0.5667487960577893, "grad_norm": 1.684307336807251, "learning_rate": 5.0575e-05, "loss": 0.5563, "step": 10121 }, { "epoch": 0.5668047933699183, "grad_norm": 1.1551640033721924, "learning_rate": 5.058000000000001e-05, "loss": 0.528, "step": 10122 }, { "epoch": 0.5668607906820473, "grad_norm": 1.3200267553329468, "learning_rate": 5.0585000000000004e-05, "loss": 0.4706, "step": 10123 }, { "epoch": 0.5669167879941763, "grad_norm": 1.070975661277771, "learning_rate": 5.059e-05, "loss": 0.3755, "step": 10124 }, { "epoch": 0.5669727853063053, "grad_norm": 1.593381404876709, "learning_rate": 5.0595000000000006e-05, "loss": 0.3762, "step": 10125 }, { "epoch": 0.5670287826184344, "grad_norm": 1.2779661417007446, "learning_rate": 5.0600000000000003e-05, "loss": 0.4005, "step": 10126 }, { "epoch": 0.5670847799305634, "grad_norm": 1.187401533126831, "learning_rate": 5.0605e-05, "loss": 0.4789, "step": 10127 }, { "epoch": 0.5671407772426923, "grad_norm": 1.4982678890228271, "learning_rate": 5.0610000000000005e-05, "loss": 0.5009, "step": 10128 }, { "epoch": 0.5671967745548213, "grad_norm": 1.1863610744476318, "learning_rate": 5.0615e-05, "loss": 0.4679, "step": 10129 }, { "epoch": 0.5672527718669503, "grad_norm": 1.1557377576828003, "learning_rate": 5.062e-05, "loss": 0.4048, "step": 10130 }, { "epoch": 0.5673087691790794, "grad_norm": 1.194615125656128, "learning_rate": 5.0625e-05, "loss": 0.4318, "step": 10131 }, { "epoch": 0.5673647664912084, "grad_norm": 1.3305182456970215, "learning_rate": 5.063e-05, "loss": 0.483, "step": 10132 }, { "epoch": 0.5674207638033374, "grad_norm": 1.4567062854766846, "learning_rate": 5.0635e-05, "loss": 0.4314, "step": 10133 }, { "epoch": 0.5674767611154664, "grad_norm": 1.1840482950210571, "learning_rate": 5.0639999999999996e-05, "loss": 0.352, "step": 10134 }, { "epoch": 0.5675327584275954, "grad_norm": 1.8856470584869385, "learning_rate": 5.0645e-05, "loss": 0.4333, "step": 10135 }, { "epoch": 0.5675887557397244, "grad_norm": 1.2895816564559937, "learning_rate": 5.065e-05, "loss": 0.4328, "step": 10136 }, { "epoch": 0.5676447530518535, "grad_norm": 1.1949578523635864, "learning_rate": 5.0654999999999995e-05, "loss": 0.505, "step": 10137 }, { "epoch": 0.5677007503639825, "grad_norm": 1.247236728668213, "learning_rate": 5.0660000000000006e-05, "loss": 0.4934, "step": 10138 }, { "epoch": 0.5677567476761115, "grad_norm": 1.4603780508041382, "learning_rate": 5.0665000000000004e-05, "loss": 0.6489, "step": 10139 }, { "epoch": 0.5678127449882405, "grad_norm": 1.3208463191986084, "learning_rate": 5.067000000000001e-05, "loss": 0.4261, "step": 10140 }, { "epoch": 0.5678687423003695, "grad_norm": 1.293473482131958, "learning_rate": 5.0675000000000005e-05, "loss": 0.3721, "step": 10141 }, { "epoch": 0.5679247396124986, "grad_norm": 1.4508566856384277, "learning_rate": 5.068e-05, "loss": 0.3791, "step": 10142 }, { "epoch": 0.5679807369246276, "grad_norm": 1.4139388799667358, "learning_rate": 5.068500000000001e-05, "loss": 0.4025, "step": 10143 }, { "epoch": 0.5680367342367566, "grad_norm": 1.1509840488433838, "learning_rate": 5.0690000000000004e-05, "loss": 0.3744, "step": 10144 }, { "epoch": 0.5680927315488856, "grad_norm": 1.2031747102737427, "learning_rate": 5.0695e-05, "loss": 0.3816, "step": 10145 }, { "epoch": 0.5681487288610146, "grad_norm": 1.9181218147277832, "learning_rate": 5.0700000000000006e-05, "loss": 0.4964, "step": 10146 }, { "epoch": 0.5682047261731437, "grad_norm": 1.152400016784668, "learning_rate": 5.0705e-05, "loss": 0.3611, "step": 10147 }, { "epoch": 0.5682607234852727, "grad_norm": 1.409926414489746, "learning_rate": 5.071e-05, "loss": 0.3901, "step": 10148 }, { "epoch": 0.5683167207974017, "grad_norm": 1.3198037147521973, "learning_rate": 5.0715e-05, "loss": 0.4538, "step": 10149 }, { "epoch": 0.5683727181095307, "grad_norm": 1.2766097784042358, "learning_rate": 5.072e-05, "loss": 0.4074, "step": 10150 }, { "epoch": 0.5684287154216597, "grad_norm": 1.1689236164093018, "learning_rate": 5.0725e-05, "loss": 0.3714, "step": 10151 }, { "epoch": 0.5684847127337888, "grad_norm": 1.399337887763977, "learning_rate": 5.073e-05, "loss": 0.4977, "step": 10152 }, { "epoch": 0.5685407100459178, "grad_norm": 1.2349555492401123, "learning_rate": 5.0735e-05, "loss": 0.5333, "step": 10153 }, { "epoch": 0.5685967073580468, "grad_norm": 1.2456928491592407, "learning_rate": 5.074e-05, "loss": 0.4529, "step": 10154 }, { "epoch": 0.5686527046701758, "grad_norm": 1.2561348676681519, "learning_rate": 5.0744999999999996e-05, "loss": 0.449, "step": 10155 }, { "epoch": 0.5687087019823048, "grad_norm": 1.1609960794448853, "learning_rate": 5.075e-05, "loss": 0.3185, "step": 10156 }, { "epoch": 0.5687646992944339, "grad_norm": 1.2437316179275513, "learning_rate": 5.0755e-05, "loss": 0.4063, "step": 10157 }, { "epoch": 0.5688206966065629, "grad_norm": 1.2899188995361328, "learning_rate": 5.076000000000001e-05, "loss": 0.4687, "step": 10158 }, { "epoch": 0.5688766939186919, "grad_norm": 1.1509284973144531, "learning_rate": 5.0765000000000006e-05, "loss": 0.3083, "step": 10159 }, { "epoch": 0.5689326912308209, "grad_norm": 1.4670699834823608, "learning_rate": 5.0770000000000003e-05, "loss": 0.5398, "step": 10160 }, { "epoch": 0.5689886885429499, "grad_norm": 1.5057661533355713, "learning_rate": 5.077500000000001e-05, "loss": 0.45, "step": 10161 }, { "epoch": 0.569044685855079, "grad_norm": 1.4503322839736938, "learning_rate": 5.0780000000000005e-05, "loss": 0.5332, "step": 10162 }, { "epoch": 0.569100683167208, "grad_norm": 1.3776382207870483, "learning_rate": 5.0785e-05, "loss": 0.5604, "step": 10163 }, { "epoch": 0.569156680479337, "grad_norm": 1.2540619373321533, "learning_rate": 5.079000000000001e-05, "loss": 0.4003, "step": 10164 }, { "epoch": 0.569212677791466, "grad_norm": 1.4435511827468872, "learning_rate": 5.0795000000000004e-05, "loss": 0.6547, "step": 10165 }, { "epoch": 0.569268675103595, "grad_norm": 1.2559958696365356, "learning_rate": 5.08e-05, "loss": 0.4572, "step": 10166 }, { "epoch": 0.569324672415724, "grad_norm": 1.4132412672042847, "learning_rate": 5.0805000000000006e-05, "loss": 0.5495, "step": 10167 }, { "epoch": 0.5693806697278531, "grad_norm": 1.234095811843872, "learning_rate": 5.081e-05, "loss": 0.3255, "step": 10168 }, { "epoch": 0.5694366670399821, "grad_norm": 1.3978824615478516, "learning_rate": 5.0815e-05, "loss": 0.4344, "step": 10169 }, { "epoch": 0.5694926643521111, "grad_norm": 1.2753530740737915, "learning_rate": 5.082e-05, "loss": 0.3858, "step": 10170 }, { "epoch": 0.5695486616642401, "grad_norm": 1.3017603158950806, "learning_rate": 5.0825e-05, "loss": 0.4449, "step": 10171 }, { "epoch": 0.5696046589763691, "grad_norm": 1.1952513456344604, "learning_rate": 5.083e-05, "loss": 0.4657, "step": 10172 }, { "epoch": 0.5696606562884982, "grad_norm": 1.2655614614486694, "learning_rate": 5.0835e-05, "loss": 0.3996, "step": 10173 }, { "epoch": 0.5697166536006272, "grad_norm": 1.0660436153411865, "learning_rate": 5.084e-05, "loss": 0.3162, "step": 10174 }, { "epoch": 0.5697726509127562, "grad_norm": 1.3102269172668457, "learning_rate": 5.0845e-05, "loss": 0.4801, "step": 10175 }, { "epoch": 0.5698286482248852, "grad_norm": 1.2505639791488647, "learning_rate": 5.0849999999999996e-05, "loss": 0.4468, "step": 10176 }, { "epoch": 0.5698846455370142, "grad_norm": 1.0204392671585083, "learning_rate": 5.0855e-05, "loss": 0.3442, "step": 10177 }, { "epoch": 0.5699406428491433, "grad_norm": 1.2225295305252075, "learning_rate": 5.0860000000000004e-05, "loss": 0.4091, "step": 10178 }, { "epoch": 0.5699966401612723, "grad_norm": 1.420210361480713, "learning_rate": 5.086500000000001e-05, "loss": 0.5708, "step": 10179 }, { "epoch": 0.5700526374734013, "grad_norm": 1.2864094972610474, "learning_rate": 5.0870000000000006e-05, "loss": 0.4386, "step": 10180 }, { "epoch": 0.5701086347855303, "grad_norm": 1.4768576622009277, "learning_rate": 5.0875e-05, "loss": 0.3914, "step": 10181 }, { "epoch": 0.5701646320976593, "grad_norm": 1.3639792203903198, "learning_rate": 5.088000000000001e-05, "loss": 0.4886, "step": 10182 }, { "epoch": 0.5702206294097883, "grad_norm": 1.3538386821746826, "learning_rate": 5.0885000000000005e-05, "loss": 0.3957, "step": 10183 }, { "epoch": 0.5702766267219174, "grad_norm": 1.321562647819519, "learning_rate": 5.089e-05, "loss": 0.471, "step": 10184 }, { "epoch": 0.5703326240340464, "grad_norm": 1.2572190761566162, "learning_rate": 5.0895000000000007e-05, "loss": 0.4959, "step": 10185 }, { "epoch": 0.5703886213461754, "grad_norm": 1.2916969060897827, "learning_rate": 5.0900000000000004e-05, "loss": 0.4295, "step": 10186 }, { "epoch": 0.5704446186583044, "grad_norm": 1.5530734062194824, "learning_rate": 5.0905e-05, "loss": 0.5665, "step": 10187 }, { "epoch": 0.5705006159704334, "grad_norm": 1.0694494247436523, "learning_rate": 5.091e-05, "loss": 0.3661, "step": 10188 }, { "epoch": 0.5705566132825625, "grad_norm": 1.4759570360183716, "learning_rate": 5.0915e-05, "loss": 0.4288, "step": 10189 }, { "epoch": 0.5706126105946915, "grad_norm": 1.5343525409698486, "learning_rate": 5.092e-05, "loss": 0.4077, "step": 10190 }, { "epoch": 0.5706686079068205, "grad_norm": 1.1712658405303955, "learning_rate": 5.0925e-05, "loss": 0.3963, "step": 10191 }, { "epoch": 0.5707246052189495, "grad_norm": 1.7039088010787964, "learning_rate": 5.093e-05, "loss": 0.4555, "step": 10192 }, { "epoch": 0.5707806025310785, "grad_norm": 1.1426856517791748, "learning_rate": 5.0935e-05, "loss": 0.3363, "step": 10193 }, { "epoch": 0.5708365998432076, "grad_norm": 1.233491063117981, "learning_rate": 5.094e-05, "loss": 0.437, "step": 10194 }, { "epoch": 0.5708925971553366, "grad_norm": 1.1578457355499268, "learning_rate": 5.0945e-05, "loss": 0.4331, "step": 10195 }, { "epoch": 0.5709485944674656, "grad_norm": 1.2121589183807373, "learning_rate": 5.095e-05, "loss": 0.4047, "step": 10196 }, { "epoch": 0.5710045917795946, "grad_norm": 1.410102128982544, "learning_rate": 5.0954999999999996e-05, "loss": 0.4156, "step": 10197 }, { "epoch": 0.5710605890917236, "grad_norm": 1.3294005393981934, "learning_rate": 5.096000000000001e-05, "loss": 0.4113, "step": 10198 }, { "epoch": 0.5711165864038527, "grad_norm": 1.3565648794174194, "learning_rate": 5.0965000000000004e-05, "loss": 0.4551, "step": 10199 }, { "epoch": 0.5711725837159817, "grad_norm": 1.9718737602233887, "learning_rate": 5.097000000000001e-05, "loss": 0.5973, "step": 10200 }, { "epoch": 0.5712285810281107, "grad_norm": 1.5698515176773071, "learning_rate": 5.0975000000000006e-05, "loss": 0.4464, "step": 10201 }, { "epoch": 0.5712845783402397, "grad_norm": 1.5377118587493896, "learning_rate": 5.098e-05, "loss": 0.4791, "step": 10202 }, { "epoch": 0.5713405756523687, "grad_norm": 1.1959205865859985, "learning_rate": 5.098500000000001e-05, "loss": 0.4156, "step": 10203 }, { "epoch": 0.5713965729644978, "grad_norm": 1.2330917119979858, "learning_rate": 5.0990000000000005e-05, "loss": 0.4184, "step": 10204 }, { "epoch": 0.5714525702766268, "grad_norm": 1.1605263948440552, "learning_rate": 5.0995e-05, "loss": 0.3128, "step": 10205 }, { "epoch": 0.5715085675887558, "grad_norm": 1.1472564935684204, "learning_rate": 5.1000000000000006e-05, "loss": 0.3788, "step": 10206 }, { "epoch": 0.5715645649008848, "grad_norm": 1.4026507139205933, "learning_rate": 5.1005000000000004e-05, "loss": 0.4531, "step": 10207 }, { "epoch": 0.5716205622130138, "grad_norm": 1.314774513244629, "learning_rate": 5.101e-05, "loss": 0.3528, "step": 10208 }, { "epoch": 0.5716765595251428, "grad_norm": 1.4226630926132202, "learning_rate": 5.1015e-05, "loss": 0.4123, "step": 10209 }, { "epoch": 0.5717325568372719, "grad_norm": 1.463738203048706, "learning_rate": 5.102e-05, "loss": 0.6458, "step": 10210 }, { "epoch": 0.5717885541494008, "grad_norm": 1.4039021730422974, "learning_rate": 5.1025e-05, "loss": 0.3781, "step": 10211 }, { "epoch": 0.5718445514615298, "grad_norm": 1.6951605081558228, "learning_rate": 5.103e-05, "loss": 0.5027, "step": 10212 }, { "epoch": 0.5719005487736588, "grad_norm": 1.3380426168441772, "learning_rate": 5.1035e-05, "loss": 0.5473, "step": 10213 }, { "epoch": 0.5719565460857878, "grad_norm": 1.4729443788528442, "learning_rate": 5.104e-05, "loss": 0.3846, "step": 10214 }, { "epoch": 0.5720125433979169, "grad_norm": 1.4423785209655762, "learning_rate": 5.1045e-05, "loss": 0.7526, "step": 10215 }, { "epoch": 0.5720685407100459, "grad_norm": 1.164225459098816, "learning_rate": 5.105e-05, "loss": 0.328, "step": 10216 }, { "epoch": 0.5721245380221749, "grad_norm": 1.191494107246399, "learning_rate": 5.1055e-05, "loss": 0.3963, "step": 10217 }, { "epoch": 0.5721805353343039, "grad_norm": 1.256538987159729, "learning_rate": 5.106000000000001e-05, "loss": 0.4338, "step": 10218 }, { "epoch": 0.5722365326464329, "grad_norm": 1.5546156167984009, "learning_rate": 5.1065000000000007e-05, "loss": 0.4119, "step": 10219 }, { "epoch": 0.572292529958562, "grad_norm": 1.3575255870819092, "learning_rate": 5.1070000000000004e-05, "loss": 0.3489, "step": 10220 }, { "epoch": 0.572348527270691, "grad_norm": 1.6723190546035767, "learning_rate": 5.107500000000001e-05, "loss": 0.3096, "step": 10221 }, { "epoch": 0.57240452458282, "grad_norm": 1.1107463836669922, "learning_rate": 5.1080000000000006e-05, "loss": 0.3975, "step": 10222 }, { "epoch": 0.572460521894949, "grad_norm": 1.8103303909301758, "learning_rate": 5.1085e-05, "loss": 0.36, "step": 10223 }, { "epoch": 0.572516519207078, "grad_norm": 2.5603103637695312, "learning_rate": 5.109000000000001e-05, "loss": 0.4458, "step": 10224 }, { "epoch": 0.572572516519207, "grad_norm": 1.2526216506958008, "learning_rate": 5.1095000000000005e-05, "loss": 0.3847, "step": 10225 }, { "epoch": 0.5726285138313361, "grad_norm": 1.8950400352478027, "learning_rate": 5.11e-05, "loss": 0.5301, "step": 10226 }, { "epoch": 0.5726845111434651, "grad_norm": 1.1724538803100586, "learning_rate": 5.1105000000000006e-05, "loss": 0.4366, "step": 10227 }, { "epoch": 0.5727405084555941, "grad_norm": 1.6718838214874268, "learning_rate": 5.1110000000000004e-05, "loss": 0.4654, "step": 10228 }, { "epoch": 0.5727965057677231, "grad_norm": 1.3077808618545532, "learning_rate": 5.1115e-05, "loss": 0.4893, "step": 10229 }, { "epoch": 0.5728525030798521, "grad_norm": 1.3792833089828491, "learning_rate": 5.112e-05, "loss": 0.4925, "step": 10230 }, { "epoch": 0.5729085003919812, "grad_norm": 1.2939683198928833, "learning_rate": 5.1125e-05, "loss": 0.4321, "step": 10231 }, { "epoch": 0.5729644977041102, "grad_norm": 1.9967490434646606, "learning_rate": 5.113e-05, "loss": 0.3745, "step": 10232 }, { "epoch": 0.5730204950162392, "grad_norm": 1.1713602542877197, "learning_rate": 5.1135e-05, "loss": 0.4535, "step": 10233 }, { "epoch": 0.5730764923283682, "grad_norm": 1.2153222560882568, "learning_rate": 5.114e-05, "loss": 0.3062, "step": 10234 }, { "epoch": 0.5731324896404972, "grad_norm": 1.45026695728302, "learning_rate": 5.1145e-05, "loss": 0.4027, "step": 10235 }, { "epoch": 0.5731884869526263, "grad_norm": 1.146943211555481, "learning_rate": 5.1149999999999996e-05, "loss": 0.3594, "step": 10236 }, { "epoch": 0.5732444842647553, "grad_norm": 1.2367851734161377, "learning_rate": 5.1155e-05, "loss": 0.4114, "step": 10237 }, { "epoch": 0.5733004815768843, "grad_norm": 1.1932320594787598, "learning_rate": 5.1160000000000005e-05, "loss": 0.4816, "step": 10238 }, { "epoch": 0.5733564788890133, "grad_norm": 1.2560538053512573, "learning_rate": 5.116500000000001e-05, "loss": 0.4513, "step": 10239 }, { "epoch": 0.5734124762011423, "grad_norm": 1.418629765510559, "learning_rate": 5.1170000000000006e-05, "loss": 0.4694, "step": 10240 }, { "epoch": 0.5734684735132713, "grad_norm": 1.2285903692245483, "learning_rate": 5.1175000000000004e-05, "loss": 0.4627, "step": 10241 }, { "epoch": 0.5735244708254004, "grad_norm": 1.182493805885315, "learning_rate": 5.118000000000001e-05, "loss": 0.4864, "step": 10242 }, { "epoch": 0.5735804681375294, "grad_norm": 1.2190669775009155, "learning_rate": 5.1185000000000005e-05, "loss": 0.3992, "step": 10243 }, { "epoch": 0.5736364654496584, "grad_norm": 1.2626402378082275, "learning_rate": 5.119e-05, "loss": 0.6578, "step": 10244 }, { "epoch": 0.5736924627617874, "grad_norm": 1.4835706949234009, "learning_rate": 5.119500000000001e-05, "loss": 0.3841, "step": 10245 }, { "epoch": 0.5737484600739164, "grad_norm": 1.3277225494384766, "learning_rate": 5.1200000000000004e-05, "loss": 0.4006, "step": 10246 }, { "epoch": 0.5738044573860455, "grad_norm": 1.3944958448410034, "learning_rate": 5.1205e-05, "loss": 0.56, "step": 10247 }, { "epoch": 0.5738604546981745, "grad_norm": 1.4049177169799805, "learning_rate": 5.121e-05, "loss": 0.7031, "step": 10248 }, { "epoch": 0.5739164520103035, "grad_norm": 1.1918838024139404, "learning_rate": 5.1215000000000003e-05, "loss": 0.4446, "step": 10249 }, { "epoch": 0.5739724493224325, "grad_norm": 1.4402596950531006, "learning_rate": 5.122e-05, "loss": 0.5317, "step": 10250 }, { "epoch": 0.5740284466345615, "grad_norm": 1.4664949178695679, "learning_rate": 5.1225e-05, "loss": 0.4501, "step": 10251 }, { "epoch": 0.5740844439466906, "grad_norm": 1.4004675149917603, "learning_rate": 5.123e-05, "loss": 0.4477, "step": 10252 }, { "epoch": 0.5741404412588196, "grad_norm": 1.4063783884048462, "learning_rate": 5.1235e-05, "loss": 0.4045, "step": 10253 }, { "epoch": 0.5741964385709486, "grad_norm": 1.291314959526062, "learning_rate": 5.124e-05, "loss": 0.3632, "step": 10254 }, { "epoch": 0.5742524358830776, "grad_norm": 1.3557640314102173, "learning_rate": 5.1245e-05, "loss": 0.5423, "step": 10255 }, { "epoch": 0.5743084331952066, "grad_norm": 1.3525190353393555, "learning_rate": 5.125e-05, "loss": 0.3807, "step": 10256 }, { "epoch": 0.5743644305073357, "grad_norm": 1.233000636100769, "learning_rate": 5.1254999999999996e-05, "loss": 0.3606, "step": 10257 }, { "epoch": 0.5744204278194647, "grad_norm": 2.2913849353790283, "learning_rate": 5.1259999999999994e-05, "loss": 0.5721, "step": 10258 }, { "epoch": 0.5744764251315937, "grad_norm": 1.2874081134796143, "learning_rate": 5.1265000000000005e-05, "loss": 0.4782, "step": 10259 }, { "epoch": 0.5745324224437227, "grad_norm": 1.4316374063491821, "learning_rate": 5.127000000000001e-05, "loss": 0.4022, "step": 10260 }, { "epoch": 0.5745884197558517, "grad_norm": 1.219549298286438, "learning_rate": 5.1275000000000006e-05, "loss": 0.5247, "step": 10261 }, { "epoch": 0.5746444170679808, "grad_norm": 1.2923873662948608, "learning_rate": 5.1280000000000004e-05, "loss": 0.3801, "step": 10262 }, { "epoch": 0.5747004143801098, "grad_norm": 1.3383703231811523, "learning_rate": 5.128500000000001e-05, "loss": 0.5152, "step": 10263 }, { "epoch": 0.5747564116922388, "grad_norm": 1.1318565607070923, "learning_rate": 5.1290000000000005e-05, "loss": 0.4195, "step": 10264 }, { "epoch": 0.5748124090043678, "grad_norm": 1.4272615909576416, "learning_rate": 5.1295e-05, "loss": 0.45, "step": 10265 }, { "epoch": 0.5748684063164968, "grad_norm": 1.6429742574691772, "learning_rate": 5.130000000000001e-05, "loss": 0.5195, "step": 10266 }, { "epoch": 0.5749244036286258, "grad_norm": 1.1789437532424927, "learning_rate": 5.1305000000000004e-05, "loss": 0.4308, "step": 10267 }, { "epoch": 0.5749804009407549, "grad_norm": 1.2067891359329224, "learning_rate": 5.131e-05, "loss": 0.4, "step": 10268 }, { "epoch": 0.5750363982528839, "grad_norm": 1.212969183921814, "learning_rate": 5.1315e-05, "loss": 0.5542, "step": 10269 }, { "epoch": 0.5750923955650129, "grad_norm": 1.1853617429733276, "learning_rate": 5.132e-05, "loss": 0.4596, "step": 10270 }, { "epoch": 0.5751483928771419, "grad_norm": 1.5046513080596924, "learning_rate": 5.1325e-05, "loss": 0.459, "step": 10271 }, { "epoch": 0.5752043901892709, "grad_norm": 1.5734779834747314, "learning_rate": 5.133e-05, "loss": 0.5446, "step": 10272 }, { "epoch": 0.5752603875014, "grad_norm": 1.4467272758483887, "learning_rate": 5.1335e-05, "loss": 0.517, "step": 10273 }, { "epoch": 0.575316384813529, "grad_norm": 1.5649198293685913, "learning_rate": 5.134e-05, "loss": 0.547, "step": 10274 }, { "epoch": 0.575372382125658, "grad_norm": 1.5195143222808838, "learning_rate": 5.1345e-05, "loss": 0.5048, "step": 10275 }, { "epoch": 0.575428379437787, "grad_norm": 1.2118948698043823, "learning_rate": 5.135e-05, "loss": 0.4739, "step": 10276 }, { "epoch": 0.575484376749916, "grad_norm": 1.3803354501724243, "learning_rate": 5.1355e-05, "loss": 0.4965, "step": 10277 }, { "epoch": 0.5755403740620451, "grad_norm": 1.300803303718567, "learning_rate": 5.1359999999999996e-05, "loss": 0.4283, "step": 10278 }, { "epoch": 0.5755963713741741, "grad_norm": 1.058893084526062, "learning_rate": 5.136500000000001e-05, "loss": 0.322, "step": 10279 }, { "epoch": 0.5756523686863031, "grad_norm": 1.2869795560836792, "learning_rate": 5.1370000000000005e-05, "loss": 0.3932, "step": 10280 }, { "epoch": 0.5757083659984321, "grad_norm": 1.677501916885376, "learning_rate": 5.137500000000001e-05, "loss": 0.4229, "step": 10281 }, { "epoch": 0.5757643633105611, "grad_norm": 1.824832558631897, "learning_rate": 5.1380000000000006e-05, "loss": 0.4507, "step": 10282 }, { "epoch": 0.5758203606226902, "grad_norm": 1.1936434507369995, "learning_rate": 5.1385000000000004e-05, "loss": 0.3963, "step": 10283 }, { "epoch": 0.5758763579348192, "grad_norm": 1.6029094457626343, "learning_rate": 5.139000000000001e-05, "loss": 0.5803, "step": 10284 }, { "epoch": 0.5759323552469482, "grad_norm": 1.257612943649292, "learning_rate": 5.1395000000000005e-05, "loss": 0.3776, "step": 10285 }, { "epoch": 0.5759883525590772, "grad_norm": 1.2295781373977661, "learning_rate": 5.14e-05, "loss": 0.3462, "step": 10286 }, { "epoch": 0.5760443498712062, "grad_norm": 1.4837368726730347, "learning_rate": 5.1405e-05, "loss": 0.3868, "step": 10287 }, { "epoch": 0.5761003471833352, "grad_norm": 1.1890358924865723, "learning_rate": 5.1410000000000004e-05, "loss": 0.5082, "step": 10288 }, { "epoch": 0.5761563444954643, "grad_norm": 1.2450222969055176, "learning_rate": 5.1415e-05, "loss": 0.4077, "step": 10289 }, { "epoch": 0.5762123418075933, "grad_norm": 1.1931641101837158, "learning_rate": 5.142e-05, "loss": 0.3834, "step": 10290 }, { "epoch": 0.5762683391197223, "grad_norm": 1.5010100603103638, "learning_rate": 5.1425e-05, "loss": 0.4207, "step": 10291 }, { "epoch": 0.5763243364318513, "grad_norm": 1.242691159248352, "learning_rate": 5.143e-05, "loss": 0.2911, "step": 10292 }, { "epoch": 0.5763803337439803, "grad_norm": 1.0755046606063843, "learning_rate": 5.1435e-05, "loss": 0.398, "step": 10293 }, { "epoch": 0.5764363310561093, "grad_norm": 1.1281802654266357, "learning_rate": 5.144e-05, "loss": 0.4705, "step": 10294 }, { "epoch": 0.5764923283682383, "grad_norm": 1.2325894832611084, "learning_rate": 5.1445e-05, "loss": 0.401, "step": 10295 }, { "epoch": 0.5765483256803673, "grad_norm": 1.1909339427947998, "learning_rate": 5.145e-05, "loss": 0.3308, "step": 10296 }, { "epoch": 0.5766043229924963, "grad_norm": 1.4792735576629639, "learning_rate": 5.1454999999999994e-05, "loss": 0.4901, "step": 10297 }, { "epoch": 0.5766603203046253, "grad_norm": 1.283048152923584, "learning_rate": 5.146e-05, "loss": 0.3903, "step": 10298 }, { "epoch": 0.5767163176167543, "grad_norm": 1.468291997909546, "learning_rate": 5.146500000000001e-05, "loss": 0.4658, "step": 10299 }, { "epoch": 0.5767723149288834, "grad_norm": 1.227444052696228, "learning_rate": 5.147000000000001e-05, "loss": 0.3662, "step": 10300 }, { "epoch": 0.5768283122410124, "grad_norm": 1.3208351135253906, "learning_rate": 5.1475000000000004e-05, "loss": 0.3798, "step": 10301 }, { "epoch": 0.5768843095531414, "grad_norm": 1.2385649681091309, "learning_rate": 5.148000000000001e-05, "loss": 0.3872, "step": 10302 }, { "epoch": 0.5769403068652704, "grad_norm": 1.1699784994125366, "learning_rate": 5.1485000000000006e-05, "loss": 0.3847, "step": 10303 }, { "epoch": 0.5769963041773994, "grad_norm": 1.215610146522522, "learning_rate": 5.149e-05, "loss": 0.4676, "step": 10304 }, { "epoch": 0.5770523014895285, "grad_norm": 1.3170974254608154, "learning_rate": 5.149500000000001e-05, "loss": 0.5343, "step": 10305 }, { "epoch": 0.5771082988016575, "grad_norm": 1.1466550827026367, "learning_rate": 5.1500000000000005e-05, "loss": 0.4225, "step": 10306 }, { "epoch": 0.5771642961137865, "grad_norm": 1.6524522304534912, "learning_rate": 5.1505e-05, "loss": 0.52, "step": 10307 }, { "epoch": 0.5772202934259155, "grad_norm": 1.1020444631576538, "learning_rate": 5.151e-05, "loss": 0.3842, "step": 10308 }, { "epoch": 0.5772762907380445, "grad_norm": 1.213584065437317, "learning_rate": 5.1515000000000004e-05, "loss": 0.4118, "step": 10309 }, { "epoch": 0.5773322880501736, "grad_norm": 1.2105871438980103, "learning_rate": 5.152e-05, "loss": 0.4503, "step": 10310 }, { "epoch": 0.5773882853623026, "grad_norm": 1.268365740776062, "learning_rate": 5.1525e-05, "loss": 0.3572, "step": 10311 }, { "epoch": 0.5774442826744316, "grad_norm": 1.1743181943893433, "learning_rate": 5.153e-05, "loss": 0.3623, "step": 10312 }, { "epoch": 0.5775002799865606, "grad_norm": 1.4468861818313599, "learning_rate": 5.1535e-05, "loss": 0.6882, "step": 10313 }, { "epoch": 0.5775562772986896, "grad_norm": 1.0792425870895386, "learning_rate": 5.154e-05, "loss": 0.4079, "step": 10314 }, { "epoch": 0.5776122746108187, "grad_norm": 1.282312035560608, "learning_rate": 5.1545e-05, "loss": 0.3473, "step": 10315 }, { "epoch": 0.5776682719229477, "grad_norm": 1.4069294929504395, "learning_rate": 5.155e-05, "loss": 0.4258, "step": 10316 }, { "epoch": 0.5777242692350767, "grad_norm": 1.3479154109954834, "learning_rate": 5.1555e-05, "loss": 0.4756, "step": 10317 }, { "epoch": 0.5777802665472057, "grad_norm": 1.4789682626724243, "learning_rate": 5.1559999999999994e-05, "loss": 0.5154, "step": 10318 }, { "epoch": 0.5778362638593347, "grad_norm": 1.3756661415100098, "learning_rate": 5.1565000000000005e-05, "loss": 0.5582, "step": 10319 }, { "epoch": 0.5778922611714638, "grad_norm": 1.2096366882324219, "learning_rate": 5.157000000000001e-05, "loss": 0.4084, "step": 10320 }, { "epoch": 0.5779482584835928, "grad_norm": 1.2413229942321777, "learning_rate": 5.157500000000001e-05, "loss": 0.3693, "step": 10321 }, { "epoch": 0.5780042557957218, "grad_norm": 1.2954576015472412, "learning_rate": 5.1580000000000004e-05, "loss": 0.4466, "step": 10322 }, { "epoch": 0.5780602531078508, "grad_norm": 1.0908390283584595, "learning_rate": 5.158500000000001e-05, "loss": 0.4197, "step": 10323 }, { "epoch": 0.5781162504199798, "grad_norm": 1.415282130241394, "learning_rate": 5.1590000000000006e-05, "loss": 0.429, "step": 10324 }, { "epoch": 0.5781722477321088, "grad_norm": 1.2666020393371582, "learning_rate": 5.1595e-05, "loss": 0.503, "step": 10325 }, { "epoch": 0.5782282450442379, "grad_norm": 1.303097128868103, "learning_rate": 5.16e-05, "loss": 0.398, "step": 10326 }, { "epoch": 0.5782842423563669, "grad_norm": 1.124499797821045, "learning_rate": 5.1605000000000005e-05, "loss": 0.324, "step": 10327 }, { "epoch": 0.5783402396684959, "grad_norm": 1.4660717248916626, "learning_rate": 5.161e-05, "loss": 0.3562, "step": 10328 }, { "epoch": 0.5783962369806249, "grad_norm": 1.2222203016281128, "learning_rate": 5.1615e-05, "loss": 0.3489, "step": 10329 }, { "epoch": 0.5784522342927539, "grad_norm": 2.0427260398864746, "learning_rate": 5.1620000000000004e-05, "loss": 0.339, "step": 10330 }, { "epoch": 0.578508231604883, "grad_norm": 1.3806908130645752, "learning_rate": 5.1625e-05, "loss": 0.4112, "step": 10331 }, { "epoch": 0.578564228917012, "grad_norm": 2.235337495803833, "learning_rate": 5.163e-05, "loss": 0.4836, "step": 10332 }, { "epoch": 0.578620226229141, "grad_norm": 1.1346064805984497, "learning_rate": 5.1635e-05, "loss": 0.3712, "step": 10333 }, { "epoch": 0.57867622354127, "grad_norm": 1.5988128185272217, "learning_rate": 5.164e-05, "loss": 0.4664, "step": 10334 }, { "epoch": 0.578732220853399, "grad_norm": 1.2711799144744873, "learning_rate": 5.1645e-05, "loss": 0.3467, "step": 10335 }, { "epoch": 0.5787882181655281, "grad_norm": 1.3669320344924927, "learning_rate": 5.1649999999999995e-05, "loss": 0.4762, "step": 10336 }, { "epoch": 0.5788442154776571, "grad_norm": 1.696080207824707, "learning_rate": 5.1655e-05, "loss": 0.5719, "step": 10337 }, { "epoch": 0.5789002127897861, "grad_norm": 1.1864807605743408, "learning_rate": 5.166e-05, "loss": 0.7109, "step": 10338 }, { "epoch": 0.5789562101019151, "grad_norm": 1.3186880350112915, "learning_rate": 5.166500000000001e-05, "loss": 0.3441, "step": 10339 }, { "epoch": 0.5790122074140441, "grad_norm": 1.121558666229248, "learning_rate": 5.1670000000000005e-05, "loss": 0.3171, "step": 10340 }, { "epoch": 0.5790682047261732, "grad_norm": 1.4229801893234253, "learning_rate": 5.167500000000001e-05, "loss": 0.6164, "step": 10341 }, { "epoch": 0.5791242020383022, "grad_norm": 1.4089537858963013, "learning_rate": 5.168000000000001e-05, "loss": 0.5846, "step": 10342 }, { "epoch": 0.5791801993504312, "grad_norm": 1.085536003112793, "learning_rate": 5.1685000000000004e-05, "loss": 0.3138, "step": 10343 }, { "epoch": 0.5792361966625602, "grad_norm": 1.33896803855896, "learning_rate": 5.169000000000001e-05, "loss": 0.4165, "step": 10344 }, { "epoch": 0.5792921939746892, "grad_norm": 1.3204853534698486, "learning_rate": 5.1695000000000006e-05, "loss": 0.4337, "step": 10345 }, { "epoch": 0.5793481912868182, "grad_norm": 1.2813446521759033, "learning_rate": 5.17e-05, "loss": 0.4423, "step": 10346 }, { "epoch": 0.5794041885989473, "grad_norm": 1.2630864381790161, "learning_rate": 5.1705e-05, "loss": 0.3796, "step": 10347 }, { "epoch": 0.5794601859110763, "grad_norm": 1.107636570930481, "learning_rate": 5.1710000000000005e-05, "loss": 0.343, "step": 10348 }, { "epoch": 0.5795161832232053, "grad_norm": 1.1181697845458984, "learning_rate": 5.1715e-05, "loss": 0.4129, "step": 10349 }, { "epoch": 0.5795721805353343, "grad_norm": 1.3029804229736328, "learning_rate": 5.172e-05, "loss": 0.4137, "step": 10350 }, { "epoch": 0.5796281778474633, "grad_norm": 1.280713677406311, "learning_rate": 5.1725000000000004e-05, "loss": 0.4269, "step": 10351 }, { "epoch": 0.5796841751595924, "grad_norm": 1.6649599075317383, "learning_rate": 5.173e-05, "loss": 0.5059, "step": 10352 }, { "epoch": 0.5797401724717214, "grad_norm": 1.3355119228363037, "learning_rate": 5.1735e-05, "loss": 0.4417, "step": 10353 }, { "epoch": 0.5797961697838504, "grad_norm": 1.4218158721923828, "learning_rate": 5.174e-05, "loss": 0.4339, "step": 10354 }, { "epoch": 0.5798521670959794, "grad_norm": 1.2330162525177002, "learning_rate": 5.1745e-05, "loss": 0.4606, "step": 10355 }, { "epoch": 0.5799081644081084, "grad_norm": 1.3094184398651123, "learning_rate": 5.175e-05, "loss": 0.4366, "step": 10356 }, { "epoch": 0.5799641617202375, "grad_norm": 1.5101141929626465, "learning_rate": 5.1754999999999995e-05, "loss": 0.5264, "step": 10357 }, { "epoch": 0.5800201590323665, "grad_norm": 1.387771725654602, "learning_rate": 5.176e-05, "loss": 0.5416, "step": 10358 }, { "epoch": 0.5800761563444955, "grad_norm": 1.2103217840194702, "learning_rate": 5.176500000000001e-05, "loss": 0.5158, "step": 10359 }, { "epoch": 0.5801321536566245, "grad_norm": 1.405078411102295, "learning_rate": 5.177000000000001e-05, "loss": 0.4028, "step": 10360 }, { "epoch": 0.5801881509687535, "grad_norm": 2.192035675048828, "learning_rate": 5.1775000000000005e-05, "loss": 0.4246, "step": 10361 }, { "epoch": 0.5802441482808826, "grad_norm": 1.244006872177124, "learning_rate": 5.178000000000001e-05, "loss": 0.3975, "step": 10362 }, { "epoch": 0.5803001455930116, "grad_norm": 1.2485039234161377, "learning_rate": 5.1785000000000006e-05, "loss": 0.4321, "step": 10363 }, { "epoch": 0.5803561429051406, "grad_norm": 1.3139088153839111, "learning_rate": 5.1790000000000004e-05, "loss": 0.4581, "step": 10364 }, { "epoch": 0.5804121402172696, "grad_norm": 1.3031911849975586, "learning_rate": 5.1795e-05, "loss": 0.5174, "step": 10365 }, { "epoch": 0.5804681375293986, "grad_norm": 1.196021556854248, "learning_rate": 5.1800000000000005e-05, "loss": 0.3223, "step": 10366 }, { "epoch": 0.5805241348415277, "grad_norm": 1.153767466545105, "learning_rate": 5.1805e-05, "loss": 0.411, "step": 10367 }, { "epoch": 0.5805801321536567, "grad_norm": 1.1274921894073486, "learning_rate": 5.181e-05, "loss": 0.3668, "step": 10368 }, { "epoch": 0.5806361294657857, "grad_norm": 1.3947771787643433, "learning_rate": 5.1815000000000005e-05, "loss": 0.325, "step": 10369 }, { "epoch": 0.5806921267779147, "grad_norm": 1.2500982284545898, "learning_rate": 5.182e-05, "loss": 0.4032, "step": 10370 }, { "epoch": 0.5807481240900437, "grad_norm": 1.1656206846237183, "learning_rate": 5.1825e-05, "loss": 0.4406, "step": 10371 }, { "epoch": 0.5808041214021727, "grad_norm": 1.7991421222686768, "learning_rate": 5.1830000000000004e-05, "loss": 0.5553, "step": 10372 }, { "epoch": 0.5808601187143018, "grad_norm": 1.2823164463043213, "learning_rate": 5.1835e-05, "loss": 0.3647, "step": 10373 }, { "epoch": 0.5809161160264308, "grad_norm": 1.5040351152420044, "learning_rate": 5.184e-05, "loss": 0.5526, "step": 10374 }, { "epoch": 0.5809721133385598, "grad_norm": 1.8107975721359253, "learning_rate": 5.1844999999999996e-05, "loss": 0.6746, "step": 10375 }, { "epoch": 0.5810281106506887, "grad_norm": 1.2524305582046509, "learning_rate": 5.185e-05, "loss": 0.3989, "step": 10376 }, { "epoch": 0.5810841079628177, "grad_norm": 1.2219644784927368, "learning_rate": 5.1855e-05, "loss": 0.4442, "step": 10377 }, { "epoch": 0.5811401052749467, "grad_norm": 1.2759735584259033, "learning_rate": 5.1859999999999995e-05, "loss": 0.3981, "step": 10378 }, { "epoch": 0.5811961025870758, "grad_norm": 1.234581708908081, "learning_rate": 5.1865000000000006e-05, "loss": 0.4084, "step": 10379 }, { "epoch": 0.5812520998992048, "grad_norm": 1.4750521183013916, "learning_rate": 5.187000000000001e-05, "loss": 0.501, "step": 10380 }, { "epoch": 0.5813080972113338, "grad_norm": 1.4189141988754272, "learning_rate": 5.187500000000001e-05, "loss": 0.4196, "step": 10381 }, { "epoch": 0.5813640945234628, "grad_norm": 1.362120509147644, "learning_rate": 5.1880000000000005e-05, "loss": 0.4634, "step": 10382 }, { "epoch": 0.5814200918355918, "grad_norm": 1.1404772996902466, "learning_rate": 5.188500000000001e-05, "loss": 0.3291, "step": 10383 }, { "epoch": 0.5814760891477209, "grad_norm": 1.1161601543426514, "learning_rate": 5.1890000000000006e-05, "loss": 0.4301, "step": 10384 }, { "epoch": 0.5815320864598499, "grad_norm": 1.398680329322815, "learning_rate": 5.1895000000000004e-05, "loss": 0.5729, "step": 10385 }, { "epoch": 0.5815880837719789, "grad_norm": 1.1788133382797241, "learning_rate": 5.19e-05, "loss": 0.4033, "step": 10386 }, { "epoch": 0.5816440810841079, "grad_norm": 1.075253963470459, "learning_rate": 5.1905000000000005e-05, "loss": 0.4779, "step": 10387 }, { "epoch": 0.5817000783962369, "grad_norm": 1.4342814683914185, "learning_rate": 5.191e-05, "loss": 0.56, "step": 10388 }, { "epoch": 0.581756075708366, "grad_norm": 1.4962137937545776, "learning_rate": 5.1915e-05, "loss": 0.4512, "step": 10389 }, { "epoch": 0.581812073020495, "grad_norm": 1.5346430540084839, "learning_rate": 5.1920000000000004e-05, "loss": 0.4255, "step": 10390 }, { "epoch": 0.581868070332624, "grad_norm": 1.1646647453308105, "learning_rate": 5.1925e-05, "loss": 0.3365, "step": 10391 }, { "epoch": 0.581924067644753, "grad_norm": 1.2096935510635376, "learning_rate": 5.193e-05, "loss": 0.3587, "step": 10392 }, { "epoch": 0.581980064956882, "grad_norm": 1.6582748889923096, "learning_rate": 5.1935e-05, "loss": 0.4877, "step": 10393 }, { "epoch": 0.5820360622690111, "grad_norm": 1.4083611965179443, "learning_rate": 5.194e-05, "loss": 0.474, "step": 10394 }, { "epoch": 0.5820920595811401, "grad_norm": 1.2445719242095947, "learning_rate": 5.1945e-05, "loss": 0.4816, "step": 10395 }, { "epoch": 0.5821480568932691, "grad_norm": 20.22435760498047, "learning_rate": 5.1949999999999996e-05, "loss": 0.5547, "step": 10396 }, { "epoch": 0.5822040542053981, "grad_norm": 1.334487795829773, "learning_rate": 5.1955e-05, "loss": 0.3963, "step": 10397 }, { "epoch": 0.5822600515175271, "grad_norm": 1.431045413017273, "learning_rate": 5.196e-05, "loss": 0.4289, "step": 10398 }, { "epoch": 0.5823160488296562, "grad_norm": 1.3006852865219116, "learning_rate": 5.1964999999999995e-05, "loss": 0.4283, "step": 10399 }, { "epoch": 0.5823720461417852, "grad_norm": 1.5506912469863892, "learning_rate": 5.1970000000000006e-05, "loss": 0.4408, "step": 10400 }, { "epoch": 0.5824280434539142, "grad_norm": 1.452589988708496, "learning_rate": 5.197500000000001e-05, "loss": 0.513, "step": 10401 }, { "epoch": 0.5824840407660432, "grad_norm": 1.1917253732681274, "learning_rate": 5.198000000000001e-05, "loss": 0.3804, "step": 10402 }, { "epoch": 0.5825400380781722, "grad_norm": 1.2255934476852417, "learning_rate": 5.1985000000000005e-05, "loss": 0.4037, "step": 10403 }, { "epoch": 0.5825960353903012, "grad_norm": 1.1572686433792114, "learning_rate": 5.199000000000001e-05, "loss": 0.3719, "step": 10404 }, { "epoch": 0.5826520327024303, "grad_norm": 1.2482918500900269, "learning_rate": 5.1995000000000006e-05, "loss": 0.4869, "step": 10405 }, { "epoch": 0.5827080300145593, "grad_norm": 1.2625491619110107, "learning_rate": 5.2000000000000004e-05, "loss": 0.5219, "step": 10406 }, { "epoch": 0.5827640273266883, "grad_norm": 1.3208270072937012, "learning_rate": 5.2005e-05, "loss": 0.3986, "step": 10407 }, { "epoch": 0.5828200246388173, "grad_norm": 1.1687214374542236, "learning_rate": 5.2010000000000005e-05, "loss": 0.2842, "step": 10408 }, { "epoch": 0.5828760219509463, "grad_norm": 1.534670114517212, "learning_rate": 5.2015e-05, "loss": 0.6339, "step": 10409 }, { "epoch": 0.5829320192630754, "grad_norm": 1.1783336400985718, "learning_rate": 5.202e-05, "loss": 0.3793, "step": 10410 }, { "epoch": 0.5829880165752044, "grad_norm": 1.3272316455841064, "learning_rate": 5.2025000000000004e-05, "loss": 0.345, "step": 10411 }, { "epoch": 0.5830440138873334, "grad_norm": 1.3698999881744385, "learning_rate": 5.203e-05, "loss": 0.5388, "step": 10412 }, { "epoch": 0.5831000111994624, "grad_norm": 1.1794499158859253, "learning_rate": 5.2035e-05, "loss": 0.3658, "step": 10413 }, { "epoch": 0.5831560085115914, "grad_norm": 1.287438154220581, "learning_rate": 5.204e-05, "loss": 0.5242, "step": 10414 }, { "epoch": 0.5832120058237205, "grad_norm": 1.367053508758545, "learning_rate": 5.2045e-05, "loss": 0.5462, "step": 10415 }, { "epoch": 0.5832680031358495, "grad_norm": 1.1334296464920044, "learning_rate": 5.205e-05, "loss": 0.345, "step": 10416 }, { "epoch": 0.5833240004479785, "grad_norm": 1.7370926141738892, "learning_rate": 5.2054999999999995e-05, "loss": 0.4888, "step": 10417 }, { "epoch": 0.5833799977601075, "grad_norm": 1.3780725002288818, "learning_rate": 5.206e-05, "loss": 0.5872, "step": 10418 }, { "epoch": 0.5834359950722365, "grad_norm": 1.4774549007415771, "learning_rate": 5.2065e-05, "loss": 0.5877, "step": 10419 }, { "epoch": 0.5834919923843656, "grad_norm": 1.231362223625183, "learning_rate": 5.207000000000001e-05, "loss": 0.3213, "step": 10420 }, { "epoch": 0.5835479896964946, "grad_norm": 1.2752066850662231, "learning_rate": 5.2075000000000005e-05, "loss": 0.3385, "step": 10421 }, { "epoch": 0.5836039870086236, "grad_norm": 1.5953947305679321, "learning_rate": 5.208000000000001e-05, "loss": 0.4162, "step": 10422 }, { "epoch": 0.5836599843207526, "grad_norm": 1.1868245601654053, "learning_rate": 5.208500000000001e-05, "loss": 0.5124, "step": 10423 }, { "epoch": 0.5837159816328816, "grad_norm": 1.6609355211257935, "learning_rate": 5.2090000000000004e-05, "loss": 0.4391, "step": 10424 }, { "epoch": 0.5837719789450107, "grad_norm": 1.9022648334503174, "learning_rate": 5.2095e-05, "loss": 0.4716, "step": 10425 }, { "epoch": 0.5838279762571397, "grad_norm": 1.5023819208145142, "learning_rate": 5.2100000000000006e-05, "loss": 0.6172, "step": 10426 }, { "epoch": 0.5838839735692687, "grad_norm": 1.3575319051742554, "learning_rate": 5.2105000000000003e-05, "loss": 0.455, "step": 10427 }, { "epoch": 0.5839399708813977, "grad_norm": 1.2734533548355103, "learning_rate": 5.211e-05, "loss": 0.4149, "step": 10428 }, { "epoch": 0.5839959681935267, "grad_norm": 1.1898829936981201, "learning_rate": 5.2115000000000005e-05, "loss": 0.4806, "step": 10429 }, { "epoch": 0.5840519655056557, "grad_norm": 1.3889342546463013, "learning_rate": 5.212e-05, "loss": 0.5, "step": 10430 }, { "epoch": 0.5841079628177848, "grad_norm": 1.3933897018432617, "learning_rate": 5.2125e-05, "loss": 0.3554, "step": 10431 }, { "epoch": 0.5841639601299138, "grad_norm": 1.0557312965393066, "learning_rate": 5.2130000000000004e-05, "loss": 0.3702, "step": 10432 }, { "epoch": 0.5842199574420428, "grad_norm": 1.0459210872650146, "learning_rate": 5.2135e-05, "loss": 0.3555, "step": 10433 }, { "epoch": 0.5842759547541718, "grad_norm": 1.7518179416656494, "learning_rate": 5.214e-05, "loss": 0.4487, "step": 10434 }, { "epoch": 0.5843319520663008, "grad_norm": 1.1503556966781616, "learning_rate": 5.2144999999999996e-05, "loss": 0.4245, "step": 10435 }, { "epoch": 0.5843879493784299, "grad_norm": 1.4215766191482544, "learning_rate": 5.215e-05, "loss": 0.441, "step": 10436 }, { "epoch": 0.5844439466905589, "grad_norm": 1.3397252559661865, "learning_rate": 5.2155e-05, "loss": 0.3169, "step": 10437 }, { "epoch": 0.5844999440026879, "grad_norm": 1.2923110723495483, "learning_rate": 5.2159999999999995e-05, "loss": 0.4483, "step": 10438 }, { "epoch": 0.5845559413148169, "grad_norm": 1.4886518716812134, "learning_rate": 5.2165e-05, "loss": 0.4953, "step": 10439 }, { "epoch": 0.5846119386269459, "grad_norm": 1.6069964170455933, "learning_rate": 5.217000000000001e-05, "loss": 0.5019, "step": 10440 }, { "epoch": 0.584667935939075, "grad_norm": 1.211219072341919, "learning_rate": 5.217500000000001e-05, "loss": 0.5428, "step": 10441 }, { "epoch": 0.584723933251204, "grad_norm": 1.6830753087997437, "learning_rate": 5.2180000000000005e-05, "loss": 0.5872, "step": 10442 }, { "epoch": 0.584779930563333, "grad_norm": 1.4909956455230713, "learning_rate": 5.218500000000001e-05, "loss": 0.566, "step": 10443 }, { "epoch": 0.584835927875462, "grad_norm": 1.2251861095428467, "learning_rate": 5.219000000000001e-05, "loss": 0.4313, "step": 10444 }, { "epoch": 0.584891925187591, "grad_norm": 1.5191506147384644, "learning_rate": 5.2195000000000004e-05, "loss": 0.6065, "step": 10445 }, { "epoch": 0.58494792249972, "grad_norm": 1.486066460609436, "learning_rate": 5.22e-05, "loss": 0.4421, "step": 10446 }, { "epoch": 0.5850039198118491, "grad_norm": 1.4842216968536377, "learning_rate": 5.2205000000000006e-05, "loss": 0.4484, "step": 10447 }, { "epoch": 0.5850599171239781, "grad_norm": 1.1850941181182861, "learning_rate": 5.221e-05, "loss": 0.3796, "step": 10448 }, { "epoch": 0.5851159144361071, "grad_norm": 1.4743479490280151, "learning_rate": 5.2215e-05, "loss": 0.3642, "step": 10449 }, { "epoch": 0.5851719117482361, "grad_norm": 1.1929072141647339, "learning_rate": 5.2220000000000005e-05, "loss": 0.3456, "step": 10450 }, { "epoch": 0.5852279090603651, "grad_norm": 1.238207459449768, "learning_rate": 5.2225e-05, "loss": 0.3727, "step": 10451 }, { "epoch": 0.5852839063724942, "grad_norm": 1.2077758312225342, "learning_rate": 5.223e-05, "loss": 0.3962, "step": 10452 }, { "epoch": 0.5853399036846232, "grad_norm": 1.3768528699874878, "learning_rate": 5.2235000000000004e-05, "loss": 0.4577, "step": 10453 }, { "epoch": 0.5853959009967522, "grad_norm": 1.1925586462020874, "learning_rate": 5.224e-05, "loss": 0.54, "step": 10454 }, { "epoch": 0.5854518983088812, "grad_norm": 1.4597384929656982, "learning_rate": 5.2245e-05, "loss": 0.3021, "step": 10455 }, { "epoch": 0.5855078956210102, "grad_norm": 1.1666860580444336, "learning_rate": 5.2249999999999996e-05, "loss": 0.3434, "step": 10456 }, { "epoch": 0.5855638929331393, "grad_norm": 1.5159988403320312, "learning_rate": 5.2255e-05, "loss": 0.5155, "step": 10457 }, { "epoch": 0.5856198902452683, "grad_norm": 1.2205889225006104, "learning_rate": 5.226e-05, "loss": 0.3108, "step": 10458 }, { "epoch": 0.5856758875573972, "grad_norm": 1.1630786657333374, "learning_rate": 5.2264999999999995e-05, "loss": 0.4019, "step": 10459 }, { "epoch": 0.5857318848695262, "grad_norm": 1.2360212802886963, "learning_rate": 5.2270000000000006e-05, "loss": 0.4528, "step": 10460 }, { "epoch": 0.5857878821816552, "grad_norm": 1.3985111713409424, "learning_rate": 5.227500000000001e-05, "loss": 0.4072, "step": 10461 }, { "epoch": 0.5858438794937842, "grad_norm": 1.2422573566436768, "learning_rate": 5.228000000000001e-05, "loss": 0.4397, "step": 10462 }, { "epoch": 0.5858998768059133, "grad_norm": 1.134757161140442, "learning_rate": 5.2285000000000005e-05, "loss": 0.3392, "step": 10463 }, { "epoch": 0.5859558741180423, "grad_norm": 1.3189239501953125, "learning_rate": 5.229e-05, "loss": 0.6016, "step": 10464 }, { "epoch": 0.5860118714301713, "grad_norm": 2.8348681926727295, "learning_rate": 5.229500000000001e-05, "loss": 0.3658, "step": 10465 }, { "epoch": 0.5860678687423003, "grad_norm": 1.1461681127548218, "learning_rate": 5.2300000000000004e-05, "loss": 0.5019, "step": 10466 }, { "epoch": 0.5861238660544293, "grad_norm": 1.1149210929870605, "learning_rate": 5.2305e-05, "loss": 0.4242, "step": 10467 }, { "epoch": 0.5861798633665584, "grad_norm": 1.132940649986267, "learning_rate": 5.2310000000000006e-05, "loss": 0.4808, "step": 10468 }, { "epoch": 0.5862358606786874, "grad_norm": 1.4029748439788818, "learning_rate": 5.2315e-05, "loss": 0.4697, "step": 10469 }, { "epoch": 0.5862918579908164, "grad_norm": 1.6982300281524658, "learning_rate": 5.232e-05, "loss": 0.3914, "step": 10470 }, { "epoch": 0.5863478553029454, "grad_norm": 1.6048387289047241, "learning_rate": 5.2325000000000005e-05, "loss": 0.4111, "step": 10471 }, { "epoch": 0.5864038526150744, "grad_norm": 1.2392228841781616, "learning_rate": 5.233e-05, "loss": 0.4016, "step": 10472 }, { "epoch": 0.5864598499272035, "grad_norm": 1.248370885848999, "learning_rate": 5.2335e-05, "loss": 0.4372, "step": 10473 }, { "epoch": 0.5865158472393325, "grad_norm": 1.248731255531311, "learning_rate": 5.234e-05, "loss": 0.5049, "step": 10474 }, { "epoch": 0.5865718445514615, "grad_norm": 1.577825665473938, "learning_rate": 5.2345e-05, "loss": 0.5232, "step": 10475 }, { "epoch": 0.5866278418635905, "grad_norm": 15.490825653076172, "learning_rate": 5.235e-05, "loss": 0.6373, "step": 10476 }, { "epoch": 0.5866838391757195, "grad_norm": 1.3385379314422607, "learning_rate": 5.2354999999999996e-05, "loss": 0.405, "step": 10477 }, { "epoch": 0.5867398364878486, "grad_norm": 1.0995076894760132, "learning_rate": 5.236e-05, "loss": 0.3462, "step": 10478 }, { "epoch": 0.5867958337999776, "grad_norm": 1.135644793510437, "learning_rate": 5.2365e-05, "loss": 0.4616, "step": 10479 }, { "epoch": 0.5868518311121066, "grad_norm": 1.2388736009597778, "learning_rate": 5.237000000000001e-05, "loss": 0.3648, "step": 10480 }, { "epoch": 0.5869078284242356, "grad_norm": 1.3318978548049927, "learning_rate": 5.2375000000000006e-05, "loss": 0.5455, "step": 10481 }, { "epoch": 0.5869638257363646, "grad_norm": 1.180772066116333, "learning_rate": 5.238000000000001e-05, "loss": 0.4188, "step": 10482 }, { "epoch": 0.5870198230484936, "grad_norm": 1.0729402303695679, "learning_rate": 5.238500000000001e-05, "loss": 0.395, "step": 10483 }, { "epoch": 0.5870758203606227, "grad_norm": 1.390486717224121, "learning_rate": 5.2390000000000005e-05, "loss": 0.48, "step": 10484 }, { "epoch": 0.5871318176727517, "grad_norm": 1.126437783241272, "learning_rate": 5.2395e-05, "loss": 0.3945, "step": 10485 }, { "epoch": 0.5871878149848807, "grad_norm": 1.2294518947601318, "learning_rate": 5.2400000000000007e-05, "loss": 0.3782, "step": 10486 }, { "epoch": 0.5872438122970097, "grad_norm": 1.1480926275253296, "learning_rate": 5.2405000000000004e-05, "loss": 0.3972, "step": 10487 }, { "epoch": 0.5872998096091387, "grad_norm": 1.2376410961151123, "learning_rate": 5.241e-05, "loss": 0.5238, "step": 10488 }, { "epoch": 0.5873558069212678, "grad_norm": 1.3651033639907837, "learning_rate": 5.2415000000000006e-05, "loss": 0.4656, "step": 10489 }, { "epoch": 0.5874118042333968, "grad_norm": 1.3575665950775146, "learning_rate": 5.242e-05, "loss": 0.4168, "step": 10490 }, { "epoch": 0.5874678015455258, "grad_norm": 1.1683986186981201, "learning_rate": 5.2425e-05, "loss": 0.4651, "step": 10491 }, { "epoch": 0.5875237988576548, "grad_norm": 1.036370038986206, "learning_rate": 5.2430000000000005e-05, "loss": 0.2864, "step": 10492 }, { "epoch": 0.5875797961697838, "grad_norm": 1.2672920227050781, "learning_rate": 5.2435e-05, "loss": 0.4514, "step": 10493 }, { "epoch": 0.5876357934819129, "grad_norm": 1.1965370178222656, "learning_rate": 5.244e-05, "loss": 0.3968, "step": 10494 }, { "epoch": 0.5876917907940419, "grad_norm": 1.398776888847351, "learning_rate": 5.2445e-05, "loss": 0.4031, "step": 10495 }, { "epoch": 0.5877477881061709, "grad_norm": 1.222611904144287, "learning_rate": 5.245e-05, "loss": 0.3937, "step": 10496 }, { "epoch": 0.5878037854182999, "grad_norm": 2.0282087326049805, "learning_rate": 5.2455e-05, "loss": 0.5691, "step": 10497 }, { "epoch": 0.5878597827304289, "grad_norm": 1.4633281230926514, "learning_rate": 5.2459999999999996e-05, "loss": 0.3477, "step": 10498 }, { "epoch": 0.587915780042558, "grad_norm": 1.387283205986023, "learning_rate": 5.2465e-05, "loss": 0.4913, "step": 10499 }, { "epoch": 0.587971777354687, "grad_norm": 1.2462290525436401, "learning_rate": 5.247000000000001e-05, "loss": 0.4113, "step": 10500 }, { "epoch": 0.588027774666816, "grad_norm": 1.4916491508483887, "learning_rate": 5.247500000000001e-05, "loss": 0.3696, "step": 10501 }, { "epoch": 0.588083771978945, "grad_norm": 6.327125072479248, "learning_rate": 5.2480000000000006e-05, "loss": 0.4427, "step": 10502 }, { "epoch": 0.588139769291074, "grad_norm": 1.3494855165481567, "learning_rate": 5.2485e-05, "loss": 0.3723, "step": 10503 }, { "epoch": 0.588195766603203, "grad_norm": 1.153686285018921, "learning_rate": 5.249000000000001e-05, "loss": 0.4094, "step": 10504 }, { "epoch": 0.5882517639153321, "grad_norm": 1.3393079042434692, "learning_rate": 5.2495000000000005e-05, "loss": 0.4726, "step": 10505 }, { "epoch": 0.5883077612274611, "grad_norm": 1.1729991436004639, "learning_rate": 5.25e-05, "loss": 0.3605, "step": 10506 }, { "epoch": 0.5883637585395901, "grad_norm": 1.1829841136932373, "learning_rate": 5.2505000000000006e-05, "loss": 0.3994, "step": 10507 }, { "epoch": 0.5884197558517191, "grad_norm": 1.4506311416625977, "learning_rate": 5.2510000000000004e-05, "loss": 0.6831, "step": 10508 }, { "epoch": 0.5884757531638481, "grad_norm": 1.20071280002594, "learning_rate": 5.2515e-05, "loss": 0.3928, "step": 10509 }, { "epoch": 0.5885317504759772, "grad_norm": 1.2628368139266968, "learning_rate": 5.2520000000000005e-05, "loss": 0.4455, "step": 10510 }, { "epoch": 0.5885877477881062, "grad_norm": 1.1907291412353516, "learning_rate": 5.2525e-05, "loss": 0.4332, "step": 10511 }, { "epoch": 0.5886437451002352, "grad_norm": 1.2729754447937012, "learning_rate": 5.253e-05, "loss": 0.315, "step": 10512 }, { "epoch": 0.5886997424123642, "grad_norm": 1.0806745290756226, "learning_rate": 5.2535e-05, "loss": 0.3771, "step": 10513 }, { "epoch": 0.5887557397244932, "grad_norm": 1.6687424182891846, "learning_rate": 5.254e-05, "loss": 0.5379, "step": 10514 }, { "epoch": 0.5888117370366223, "grad_norm": 1.374032974243164, "learning_rate": 5.2545e-05, "loss": 0.4884, "step": 10515 }, { "epoch": 0.5888677343487513, "grad_norm": 1.7508906126022339, "learning_rate": 5.255e-05, "loss": 0.3592, "step": 10516 }, { "epoch": 0.5889237316608803, "grad_norm": 1.1985833644866943, "learning_rate": 5.2555e-05, "loss": 0.4139, "step": 10517 }, { "epoch": 0.5889797289730093, "grad_norm": 1.2333550453186035, "learning_rate": 5.256e-05, "loss": 0.3817, "step": 10518 }, { "epoch": 0.5890357262851383, "grad_norm": 1.2691351175308228, "learning_rate": 5.2564999999999996e-05, "loss": 0.4359, "step": 10519 }, { "epoch": 0.5890917235972674, "grad_norm": 1.2514894008636475, "learning_rate": 5.257e-05, "loss": 0.5062, "step": 10520 }, { "epoch": 0.5891477209093964, "grad_norm": 1.4081145524978638, "learning_rate": 5.257500000000001e-05, "loss": 0.3867, "step": 10521 }, { "epoch": 0.5892037182215254, "grad_norm": 1.2950271368026733, "learning_rate": 5.258000000000001e-05, "loss": 0.4013, "step": 10522 }, { "epoch": 0.5892597155336544, "grad_norm": 1.096846342086792, "learning_rate": 5.2585000000000006e-05, "loss": 0.4453, "step": 10523 }, { "epoch": 0.5893157128457834, "grad_norm": 1.0097601413726807, "learning_rate": 5.259e-05, "loss": 0.4251, "step": 10524 }, { "epoch": 0.5893717101579125, "grad_norm": 1.2172775268554688, "learning_rate": 5.259500000000001e-05, "loss": 0.3619, "step": 10525 }, { "epoch": 0.5894277074700415, "grad_norm": 1.1469647884368896, "learning_rate": 5.2600000000000005e-05, "loss": 0.4751, "step": 10526 }, { "epoch": 0.5894837047821705, "grad_norm": 1.3321958780288696, "learning_rate": 5.2605e-05, "loss": 0.3585, "step": 10527 }, { "epoch": 0.5895397020942995, "grad_norm": 1.2122538089752197, "learning_rate": 5.2610000000000006e-05, "loss": 0.3275, "step": 10528 }, { "epoch": 0.5895956994064285, "grad_norm": 1.414595603942871, "learning_rate": 5.2615000000000004e-05, "loss": 0.5268, "step": 10529 }, { "epoch": 0.5896516967185575, "grad_norm": 1.2470372915267944, "learning_rate": 5.262e-05, "loss": 0.3476, "step": 10530 }, { "epoch": 0.5897076940306866, "grad_norm": 1.3111385107040405, "learning_rate": 5.2625000000000005e-05, "loss": 0.4256, "step": 10531 }, { "epoch": 0.5897636913428156, "grad_norm": 1.2466546297073364, "learning_rate": 5.263e-05, "loss": 0.3991, "step": 10532 }, { "epoch": 0.5898196886549446, "grad_norm": 1.1770704984664917, "learning_rate": 5.2635e-05, "loss": 0.2895, "step": 10533 }, { "epoch": 0.5898756859670736, "grad_norm": 1.14566171169281, "learning_rate": 5.264e-05, "loss": 0.5139, "step": 10534 }, { "epoch": 0.5899316832792026, "grad_norm": 1.2309952974319458, "learning_rate": 5.2645e-05, "loss": 0.4874, "step": 10535 }, { "epoch": 0.5899876805913317, "grad_norm": 1.2991855144500732, "learning_rate": 5.265e-05, "loss": 0.4646, "step": 10536 }, { "epoch": 0.5900436779034607, "grad_norm": 1.1817588806152344, "learning_rate": 5.2654999999999996e-05, "loss": 0.3928, "step": 10537 }, { "epoch": 0.5900996752155897, "grad_norm": 1.4595263004302979, "learning_rate": 5.266e-05, "loss": 0.4957, "step": 10538 }, { "epoch": 0.5901556725277187, "grad_norm": 1.3468631505966187, "learning_rate": 5.2665e-05, "loss": 0.4032, "step": 10539 }, { "epoch": 0.5902116698398477, "grad_norm": 1.106804370880127, "learning_rate": 5.2669999999999995e-05, "loss": 0.495, "step": 10540 }, { "epoch": 0.5902676671519768, "grad_norm": 1.1846035718917847, "learning_rate": 5.2675000000000006e-05, "loss": 0.3707, "step": 10541 }, { "epoch": 0.5903236644641057, "grad_norm": 1.2655447721481323, "learning_rate": 5.2680000000000004e-05, "loss": 0.4262, "step": 10542 }, { "epoch": 0.5903796617762347, "grad_norm": 1.2150630950927734, "learning_rate": 5.268500000000001e-05, "loss": 0.5081, "step": 10543 }, { "epoch": 0.5904356590883637, "grad_norm": 1.4314662218093872, "learning_rate": 5.2690000000000005e-05, "loss": 0.4474, "step": 10544 }, { "epoch": 0.5904916564004927, "grad_norm": 2.045747756958008, "learning_rate": 5.2695e-05, "loss": 0.4762, "step": 10545 }, { "epoch": 0.5905476537126217, "grad_norm": 1.1559627056121826, "learning_rate": 5.270000000000001e-05, "loss": 0.3582, "step": 10546 }, { "epoch": 0.5906036510247508, "grad_norm": 1.184889793395996, "learning_rate": 5.2705000000000004e-05, "loss": 0.3376, "step": 10547 }, { "epoch": 0.5906596483368798, "grad_norm": 1.159448504447937, "learning_rate": 5.271e-05, "loss": 0.3158, "step": 10548 }, { "epoch": 0.5907156456490088, "grad_norm": 1.1259042024612427, "learning_rate": 5.2715000000000006e-05, "loss": 0.3088, "step": 10549 }, { "epoch": 0.5907716429611378, "grad_norm": 1.158154010772705, "learning_rate": 5.2720000000000003e-05, "loss": 0.3598, "step": 10550 }, { "epoch": 0.5908276402732668, "grad_norm": 1.3170642852783203, "learning_rate": 5.2725e-05, "loss": 0.3795, "step": 10551 }, { "epoch": 0.5908836375853959, "grad_norm": 1.4639804363250732, "learning_rate": 5.273e-05, "loss": 0.3567, "step": 10552 }, { "epoch": 0.5909396348975249, "grad_norm": 1.2510403394699097, "learning_rate": 5.2735e-05, "loss": 0.4231, "step": 10553 }, { "epoch": 0.5909956322096539, "grad_norm": 1.335352897644043, "learning_rate": 5.274e-05, "loss": 0.6167, "step": 10554 }, { "epoch": 0.5910516295217829, "grad_norm": 1.1949379444122314, "learning_rate": 5.2745e-05, "loss": 0.3584, "step": 10555 }, { "epoch": 0.5911076268339119, "grad_norm": 1.1833736896514893, "learning_rate": 5.275e-05, "loss": 0.3027, "step": 10556 }, { "epoch": 0.591163624146041, "grad_norm": 1.127034306526184, "learning_rate": 5.2755e-05, "loss": 0.4002, "step": 10557 }, { "epoch": 0.59121962145817, "grad_norm": 1.4355443716049194, "learning_rate": 5.2759999999999996e-05, "loss": 0.5975, "step": 10558 }, { "epoch": 0.591275618770299, "grad_norm": 1.3591961860656738, "learning_rate": 5.2765e-05, "loss": 0.3527, "step": 10559 }, { "epoch": 0.591331616082428, "grad_norm": 1.2719610929489136, "learning_rate": 5.277e-05, "loss": 0.4086, "step": 10560 }, { "epoch": 0.591387613394557, "grad_norm": 1.282096266746521, "learning_rate": 5.277500000000001e-05, "loss": 0.3531, "step": 10561 }, { "epoch": 0.591443610706686, "grad_norm": 2.146393060684204, "learning_rate": 5.2780000000000006e-05, "loss": 0.5905, "step": 10562 }, { "epoch": 0.5914996080188151, "grad_norm": 1.8344500064849854, "learning_rate": 5.2785000000000004e-05, "loss": 0.4916, "step": 10563 }, { "epoch": 0.5915556053309441, "grad_norm": 1.2841085195541382, "learning_rate": 5.279000000000001e-05, "loss": 0.4154, "step": 10564 }, { "epoch": 0.5916116026430731, "grad_norm": 1.4600125551223755, "learning_rate": 5.2795000000000005e-05, "loss": 0.5258, "step": 10565 }, { "epoch": 0.5916675999552021, "grad_norm": 1.319330096244812, "learning_rate": 5.28e-05, "loss": 0.4115, "step": 10566 }, { "epoch": 0.5917235972673311, "grad_norm": 1.253957748413086, "learning_rate": 5.280500000000001e-05, "loss": 0.4111, "step": 10567 }, { "epoch": 0.5917795945794602, "grad_norm": 2.324519634246826, "learning_rate": 5.2810000000000004e-05, "loss": 0.496, "step": 10568 }, { "epoch": 0.5918355918915892, "grad_norm": 1.4242885112762451, "learning_rate": 5.2815e-05, "loss": 0.4094, "step": 10569 }, { "epoch": 0.5918915892037182, "grad_norm": 1.340866208076477, "learning_rate": 5.2820000000000006e-05, "loss": 0.3748, "step": 10570 }, { "epoch": 0.5919475865158472, "grad_norm": 1.527490496635437, "learning_rate": 5.2825e-05, "loss": 0.5074, "step": 10571 }, { "epoch": 0.5920035838279762, "grad_norm": 1.5297170877456665, "learning_rate": 5.283e-05, "loss": 0.4071, "step": 10572 }, { "epoch": 0.5920595811401053, "grad_norm": 1.1640164852142334, "learning_rate": 5.2835e-05, "loss": 0.5012, "step": 10573 }, { "epoch": 0.5921155784522343, "grad_norm": 1.3273276090621948, "learning_rate": 5.284e-05, "loss": 0.3736, "step": 10574 }, { "epoch": 0.5921715757643633, "grad_norm": 1.29637610912323, "learning_rate": 5.2845e-05, "loss": 0.4081, "step": 10575 }, { "epoch": 0.5922275730764923, "grad_norm": 1.5078063011169434, "learning_rate": 5.285e-05, "loss": 0.8388, "step": 10576 }, { "epoch": 0.5922835703886213, "grad_norm": 1.3077116012573242, "learning_rate": 5.2855e-05, "loss": 0.5383, "step": 10577 }, { "epoch": 0.5923395677007504, "grad_norm": 1.2726671695709229, "learning_rate": 5.286e-05, "loss": 0.4237, "step": 10578 }, { "epoch": 0.5923955650128794, "grad_norm": 1.197482943534851, "learning_rate": 5.2864999999999996e-05, "loss": 0.352, "step": 10579 }, { "epoch": 0.5924515623250084, "grad_norm": 1.6226192712783813, "learning_rate": 5.287e-05, "loss": 0.6134, "step": 10580 }, { "epoch": 0.5925075596371374, "grad_norm": 1.1241241693496704, "learning_rate": 5.2875000000000005e-05, "loss": 0.5402, "step": 10581 }, { "epoch": 0.5925635569492664, "grad_norm": 1.4471862316131592, "learning_rate": 5.288000000000001e-05, "loss": 0.4068, "step": 10582 }, { "epoch": 0.5926195542613955, "grad_norm": 2.0013887882232666, "learning_rate": 5.2885000000000006e-05, "loss": 0.4744, "step": 10583 }, { "epoch": 0.5926755515735245, "grad_norm": 1.4567906856536865, "learning_rate": 5.2890000000000004e-05, "loss": 0.5836, "step": 10584 }, { "epoch": 0.5927315488856535, "grad_norm": 1.3054038286209106, "learning_rate": 5.289500000000001e-05, "loss": 0.4638, "step": 10585 }, { "epoch": 0.5927875461977825, "grad_norm": 1.1921995878219604, "learning_rate": 5.2900000000000005e-05, "loss": 0.3657, "step": 10586 }, { "epoch": 0.5928435435099115, "grad_norm": 1.283695101737976, "learning_rate": 5.2905e-05, "loss": 0.4191, "step": 10587 }, { "epoch": 0.5928995408220405, "grad_norm": 1.2014490365982056, "learning_rate": 5.291000000000001e-05, "loss": 0.3812, "step": 10588 }, { "epoch": 0.5929555381341696, "grad_norm": 1.3544204235076904, "learning_rate": 5.2915000000000004e-05, "loss": 0.3469, "step": 10589 }, { "epoch": 0.5930115354462986, "grad_norm": 1.3510054349899292, "learning_rate": 5.292e-05, "loss": 0.442, "step": 10590 }, { "epoch": 0.5930675327584276, "grad_norm": 1.387406349182129, "learning_rate": 5.2925000000000006e-05, "loss": 0.5564, "step": 10591 }, { "epoch": 0.5931235300705566, "grad_norm": 1.2237368822097778, "learning_rate": 5.293e-05, "loss": 0.4988, "step": 10592 }, { "epoch": 0.5931795273826856, "grad_norm": 1.5499701499938965, "learning_rate": 5.2935e-05, "loss": 0.4317, "step": 10593 }, { "epoch": 0.5932355246948147, "grad_norm": 6.83128547668457, "learning_rate": 5.294e-05, "loss": 0.4112, "step": 10594 }, { "epoch": 0.5932915220069437, "grad_norm": 1.2354567050933838, "learning_rate": 5.2945e-05, "loss": 0.4185, "step": 10595 }, { "epoch": 0.5933475193190727, "grad_norm": 1.5912731885910034, "learning_rate": 5.295e-05, "loss": 0.4071, "step": 10596 }, { "epoch": 0.5934035166312017, "grad_norm": 1.1895127296447754, "learning_rate": 5.2955e-05, "loss": 0.4107, "step": 10597 }, { "epoch": 0.5934595139433307, "grad_norm": 1.3144574165344238, "learning_rate": 5.296e-05, "loss": 0.3937, "step": 10598 }, { "epoch": 0.5935155112554598, "grad_norm": 1.428004503250122, "learning_rate": 5.2965e-05, "loss": 0.4642, "step": 10599 }, { "epoch": 0.5935715085675888, "grad_norm": 1.0706285238265991, "learning_rate": 5.2969999999999996e-05, "loss": 0.4732, "step": 10600 }, { "epoch": 0.5936275058797178, "grad_norm": 1.2697863578796387, "learning_rate": 5.297500000000001e-05, "loss": 0.4415, "step": 10601 }, { "epoch": 0.5936835031918468, "grad_norm": 1.2962846755981445, "learning_rate": 5.2980000000000004e-05, "loss": 0.4408, "step": 10602 }, { "epoch": 0.5937395005039758, "grad_norm": 1.3874752521514893, "learning_rate": 5.298500000000001e-05, "loss": 0.395, "step": 10603 }, { "epoch": 0.5937954978161049, "grad_norm": 1.258508563041687, "learning_rate": 5.2990000000000006e-05, "loss": 0.4574, "step": 10604 }, { "epoch": 0.5938514951282339, "grad_norm": 1.343687891960144, "learning_rate": 5.2995e-05, "loss": 0.4034, "step": 10605 }, { "epoch": 0.5939074924403629, "grad_norm": 1.2250497341156006, "learning_rate": 5.300000000000001e-05, "loss": 0.3901, "step": 10606 }, { "epoch": 0.5939634897524919, "grad_norm": 0.9959533214569092, "learning_rate": 5.3005000000000005e-05, "loss": 0.2805, "step": 10607 }, { "epoch": 0.5940194870646209, "grad_norm": 1.1464002132415771, "learning_rate": 5.301e-05, "loss": 0.3531, "step": 10608 }, { "epoch": 0.59407548437675, "grad_norm": 1.2639087438583374, "learning_rate": 5.3015000000000007e-05, "loss": 0.3548, "step": 10609 }, { "epoch": 0.594131481688879, "grad_norm": 1.5469439029693604, "learning_rate": 5.3020000000000004e-05, "loss": 0.6987, "step": 10610 }, { "epoch": 0.594187479001008, "grad_norm": 1.2237581014633179, "learning_rate": 5.3025e-05, "loss": 0.4156, "step": 10611 }, { "epoch": 0.594243476313137, "grad_norm": 1.4172309637069702, "learning_rate": 5.303e-05, "loss": 0.5293, "step": 10612 }, { "epoch": 0.594299473625266, "grad_norm": 1.323311686515808, "learning_rate": 5.3035e-05, "loss": 0.3676, "step": 10613 }, { "epoch": 0.594355470937395, "grad_norm": 1.3635905981063843, "learning_rate": 5.304e-05, "loss": 0.4182, "step": 10614 }, { "epoch": 0.5944114682495241, "grad_norm": 1.357318639755249, "learning_rate": 5.3045e-05, "loss": 0.4527, "step": 10615 }, { "epoch": 0.5944674655616531, "grad_norm": 1.4004288911819458, "learning_rate": 5.305e-05, "loss": 0.4716, "step": 10616 }, { "epoch": 0.5945234628737821, "grad_norm": 1.1210845708847046, "learning_rate": 5.3055e-05, "loss": 0.3992, "step": 10617 }, { "epoch": 0.5945794601859111, "grad_norm": 1.0251448154449463, "learning_rate": 5.306e-05, "loss": 0.354, "step": 10618 }, { "epoch": 0.5946354574980401, "grad_norm": 1.2607853412628174, "learning_rate": 5.3065e-05, "loss": 0.453, "step": 10619 }, { "epoch": 0.5946914548101692, "grad_norm": 1.6649878025054932, "learning_rate": 5.307e-05, "loss": 0.5101, "step": 10620 }, { "epoch": 0.5947474521222982, "grad_norm": 1.504562497138977, "learning_rate": 5.307500000000001e-05, "loss": 0.5634, "step": 10621 }, { "epoch": 0.5948034494344272, "grad_norm": 1.3259096145629883, "learning_rate": 5.308000000000001e-05, "loss": 0.4427, "step": 10622 }, { "epoch": 0.5948594467465562, "grad_norm": 1.3511524200439453, "learning_rate": 5.3085000000000004e-05, "loss": 0.475, "step": 10623 }, { "epoch": 0.5949154440586851, "grad_norm": 1.1541205644607544, "learning_rate": 5.309000000000001e-05, "loss": 0.4037, "step": 10624 }, { "epoch": 0.5949714413708141, "grad_norm": 1.1316460371017456, "learning_rate": 5.3095000000000006e-05, "loss": 0.3553, "step": 10625 }, { "epoch": 0.5950274386829432, "grad_norm": 1.2613017559051514, "learning_rate": 5.31e-05, "loss": 0.4714, "step": 10626 }, { "epoch": 0.5950834359950722, "grad_norm": 1.2521265745162964, "learning_rate": 5.310500000000001e-05, "loss": 0.4846, "step": 10627 }, { "epoch": 0.5951394333072012, "grad_norm": 1.2765040397644043, "learning_rate": 5.3110000000000005e-05, "loss": 0.4363, "step": 10628 }, { "epoch": 0.5951954306193302, "grad_norm": 1.2572119235992432, "learning_rate": 5.3115e-05, "loss": 0.4709, "step": 10629 }, { "epoch": 0.5952514279314592, "grad_norm": 1.1742247343063354, "learning_rate": 5.3120000000000006e-05, "loss": 0.4415, "step": 10630 }, { "epoch": 0.5953074252435883, "grad_norm": 1.5842454433441162, "learning_rate": 5.3125000000000004e-05, "loss": 0.4154, "step": 10631 }, { "epoch": 0.5953634225557173, "grad_norm": 1.1859489679336548, "learning_rate": 5.313e-05, "loss": 0.3858, "step": 10632 }, { "epoch": 0.5954194198678463, "grad_norm": 1.2519792318344116, "learning_rate": 5.3135e-05, "loss": 0.368, "step": 10633 }, { "epoch": 0.5954754171799753, "grad_norm": 1.3703736066818237, "learning_rate": 5.314e-05, "loss": 0.758, "step": 10634 }, { "epoch": 0.5955314144921043, "grad_norm": 1.2715849876403809, "learning_rate": 5.3145e-05, "loss": 0.4004, "step": 10635 }, { "epoch": 0.5955874118042334, "grad_norm": 1.769635558128357, "learning_rate": 5.315e-05, "loss": 0.4241, "step": 10636 }, { "epoch": 0.5956434091163624, "grad_norm": 1.0612351894378662, "learning_rate": 5.3155e-05, "loss": 0.3624, "step": 10637 }, { "epoch": 0.5956994064284914, "grad_norm": 1.1915764808654785, "learning_rate": 5.316e-05, "loss": 0.4014, "step": 10638 }, { "epoch": 0.5957554037406204, "grad_norm": 2.9453630447387695, "learning_rate": 5.3165e-05, "loss": 0.4261, "step": 10639 }, { "epoch": 0.5958114010527494, "grad_norm": 1.4035197496414185, "learning_rate": 5.317e-05, "loss": 0.4261, "step": 10640 }, { "epoch": 0.5958673983648785, "grad_norm": 1.150656819343567, "learning_rate": 5.3175e-05, "loss": 0.4096, "step": 10641 }, { "epoch": 0.5959233956770075, "grad_norm": 1.2750046253204346, "learning_rate": 5.318000000000001e-05, "loss": 0.4525, "step": 10642 }, { "epoch": 0.5959793929891365, "grad_norm": 1.4290735721588135, "learning_rate": 5.318500000000001e-05, "loss": 0.5292, "step": 10643 }, { "epoch": 0.5960353903012655, "grad_norm": 1.266943335533142, "learning_rate": 5.3190000000000004e-05, "loss": 0.3955, "step": 10644 }, { "epoch": 0.5960913876133945, "grad_norm": 1.522802472114563, "learning_rate": 5.319500000000001e-05, "loss": 0.7771, "step": 10645 }, { "epoch": 0.5961473849255235, "grad_norm": 1.3386750221252441, "learning_rate": 5.3200000000000006e-05, "loss": 0.4152, "step": 10646 }, { "epoch": 0.5962033822376526, "grad_norm": 1.1313281059265137, "learning_rate": 5.3205e-05, "loss": 0.4505, "step": 10647 }, { "epoch": 0.5962593795497816, "grad_norm": 1.1104485988616943, "learning_rate": 5.321000000000001e-05, "loss": 0.4002, "step": 10648 }, { "epoch": 0.5963153768619106, "grad_norm": 1.2520898580551147, "learning_rate": 5.3215000000000005e-05, "loss": 0.3537, "step": 10649 }, { "epoch": 0.5963713741740396, "grad_norm": 1.1591063737869263, "learning_rate": 5.322e-05, "loss": 0.4567, "step": 10650 }, { "epoch": 0.5964273714861686, "grad_norm": 1.056723713874817, "learning_rate": 5.3225e-05, "loss": 0.3858, "step": 10651 }, { "epoch": 0.5964833687982977, "grad_norm": 1.5650540590286255, "learning_rate": 5.3230000000000004e-05, "loss": 0.4986, "step": 10652 }, { "epoch": 0.5965393661104267, "grad_norm": 1.3419244289398193, "learning_rate": 5.3235e-05, "loss": 0.4247, "step": 10653 }, { "epoch": 0.5965953634225557, "grad_norm": 1.10451078414917, "learning_rate": 5.324e-05, "loss": 0.4241, "step": 10654 }, { "epoch": 0.5966513607346847, "grad_norm": 1.2173242568969727, "learning_rate": 5.3245e-05, "loss": 0.5483, "step": 10655 }, { "epoch": 0.5967073580468137, "grad_norm": 1.4182380437850952, "learning_rate": 5.325e-05, "loss": 0.4532, "step": 10656 }, { "epoch": 0.5967633553589428, "grad_norm": 1.1876804828643799, "learning_rate": 5.3255e-05, "loss": 0.3835, "step": 10657 }, { "epoch": 0.5968193526710718, "grad_norm": 1.3040655851364136, "learning_rate": 5.326e-05, "loss": 0.559, "step": 10658 }, { "epoch": 0.5968753499832008, "grad_norm": 1.5240356922149658, "learning_rate": 5.3265e-05, "loss": 0.456, "step": 10659 }, { "epoch": 0.5969313472953298, "grad_norm": 1.3377245664596558, "learning_rate": 5.3269999999999996e-05, "loss": 0.424, "step": 10660 }, { "epoch": 0.5969873446074588, "grad_norm": 1.2126078605651855, "learning_rate": 5.3274999999999994e-05, "loss": 0.5067, "step": 10661 }, { "epoch": 0.5970433419195879, "grad_norm": 1.19891357421875, "learning_rate": 5.3280000000000005e-05, "loss": 0.3772, "step": 10662 }, { "epoch": 0.5970993392317169, "grad_norm": 1.408841609954834, "learning_rate": 5.328500000000001e-05, "loss": 0.4489, "step": 10663 }, { "epoch": 0.5971553365438459, "grad_norm": 1.191271185874939, "learning_rate": 5.3290000000000006e-05, "loss": 0.375, "step": 10664 }, { "epoch": 0.5972113338559749, "grad_norm": 1.279780387878418, "learning_rate": 5.3295000000000004e-05, "loss": 0.38, "step": 10665 }, { "epoch": 0.5972673311681039, "grad_norm": 1.4989320039749146, "learning_rate": 5.330000000000001e-05, "loss": 0.583, "step": 10666 }, { "epoch": 0.597323328480233, "grad_norm": 1.1831014156341553, "learning_rate": 5.3305000000000005e-05, "loss": 0.3763, "step": 10667 }, { "epoch": 0.597379325792362, "grad_norm": 1.2059293985366821, "learning_rate": 5.331e-05, "loss": 0.4694, "step": 10668 }, { "epoch": 0.597435323104491, "grad_norm": 1.4759252071380615, "learning_rate": 5.331500000000001e-05, "loss": 0.5221, "step": 10669 }, { "epoch": 0.59749132041662, "grad_norm": 2.0236058235168457, "learning_rate": 5.3320000000000004e-05, "loss": 0.9048, "step": 10670 }, { "epoch": 0.597547317728749, "grad_norm": 1.2076748609542847, "learning_rate": 5.3325e-05, "loss": 0.3366, "step": 10671 }, { "epoch": 0.597603315040878, "grad_norm": 3.8239355087280273, "learning_rate": 5.333e-05, "loss": 0.3727, "step": 10672 }, { "epoch": 0.5976593123530071, "grad_norm": 1.216123104095459, "learning_rate": 5.3335000000000003e-05, "loss": 0.4195, "step": 10673 }, { "epoch": 0.5977153096651361, "grad_norm": 1.2644193172454834, "learning_rate": 5.334e-05, "loss": 0.4543, "step": 10674 }, { "epoch": 0.5977713069772651, "grad_norm": 1.345656394958496, "learning_rate": 5.3345e-05, "loss": 0.3471, "step": 10675 }, { "epoch": 0.5978273042893941, "grad_norm": 1.1759001016616821, "learning_rate": 5.335e-05, "loss": 0.4872, "step": 10676 }, { "epoch": 0.5978833016015231, "grad_norm": 1.7768365144729614, "learning_rate": 5.3355e-05, "loss": 0.5289, "step": 10677 }, { "epoch": 0.5979392989136522, "grad_norm": 1.2655423879623413, "learning_rate": 5.336e-05, "loss": 0.3933, "step": 10678 }, { "epoch": 0.5979952962257812, "grad_norm": 1.3385236263275146, "learning_rate": 5.3365e-05, "loss": 0.4937, "step": 10679 }, { "epoch": 0.5980512935379102, "grad_norm": 1.1813132762908936, "learning_rate": 5.337e-05, "loss": 0.3674, "step": 10680 }, { "epoch": 0.5981072908500392, "grad_norm": 1.2993154525756836, "learning_rate": 5.3374999999999996e-05, "loss": 0.4463, "step": 10681 }, { "epoch": 0.5981632881621682, "grad_norm": 1.3039528131484985, "learning_rate": 5.338000000000001e-05, "loss": 0.4434, "step": 10682 }, { "epoch": 0.5982192854742973, "grad_norm": 1.5547949075698853, "learning_rate": 5.3385000000000005e-05, "loss": 0.4416, "step": 10683 }, { "epoch": 0.5982752827864263, "grad_norm": 1.3940852880477905, "learning_rate": 5.339000000000001e-05, "loss": 0.4646, "step": 10684 }, { "epoch": 0.5983312800985553, "grad_norm": 1.3054916858673096, "learning_rate": 5.3395000000000006e-05, "loss": 0.3423, "step": 10685 }, { "epoch": 0.5983872774106843, "grad_norm": 1.3913575410842896, "learning_rate": 5.3400000000000004e-05, "loss": 0.4526, "step": 10686 }, { "epoch": 0.5984432747228133, "grad_norm": 1.3938846588134766, "learning_rate": 5.340500000000001e-05, "loss": 0.4867, "step": 10687 }, { "epoch": 0.5984992720349424, "grad_norm": 1.3865050077438354, "learning_rate": 5.3410000000000005e-05, "loss": 0.5223, "step": 10688 }, { "epoch": 0.5985552693470714, "grad_norm": 1.180454969406128, "learning_rate": 5.3415e-05, "loss": 0.3896, "step": 10689 }, { "epoch": 0.5986112666592004, "grad_norm": 1.3324973583221436, "learning_rate": 5.342e-05, "loss": 0.4556, "step": 10690 }, { "epoch": 0.5986672639713294, "grad_norm": 1.3338416814804077, "learning_rate": 5.3425000000000004e-05, "loss": 0.3878, "step": 10691 }, { "epoch": 0.5987232612834584, "grad_norm": 1.3270137310028076, "learning_rate": 5.343e-05, "loss": 0.478, "step": 10692 }, { "epoch": 0.5987792585955874, "grad_norm": 1.1505717039108276, "learning_rate": 5.3435e-05, "loss": 0.389, "step": 10693 }, { "epoch": 0.5988352559077165, "grad_norm": 1.434771180152893, "learning_rate": 5.344e-05, "loss": 0.3462, "step": 10694 }, { "epoch": 0.5988912532198455, "grad_norm": 1.1641889810562134, "learning_rate": 5.3445e-05, "loss": 0.424, "step": 10695 }, { "epoch": 0.5989472505319745, "grad_norm": 1.2255793809890747, "learning_rate": 5.345e-05, "loss": 0.5723, "step": 10696 }, { "epoch": 0.5990032478441035, "grad_norm": 1.0824426412582397, "learning_rate": 5.3455e-05, "loss": 0.4253, "step": 10697 }, { "epoch": 0.5990592451562325, "grad_norm": 1.2578072547912598, "learning_rate": 5.346e-05, "loss": 0.5385, "step": 10698 }, { "epoch": 0.5991152424683616, "grad_norm": 1.221637487411499, "learning_rate": 5.3465e-05, "loss": 0.4711, "step": 10699 }, { "epoch": 0.5991712397804906, "grad_norm": 1.1506383419036865, "learning_rate": 5.3469999999999995e-05, "loss": 0.3996, "step": 10700 }, { "epoch": 0.5992272370926196, "grad_norm": 1.373428463935852, "learning_rate": 5.3475e-05, "loss": 0.3844, "step": 10701 }, { "epoch": 0.5992832344047486, "grad_norm": 1.593705415725708, "learning_rate": 5.348000000000001e-05, "loss": 0.3559, "step": 10702 }, { "epoch": 0.5993392317168776, "grad_norm": 1.0618690252304077, "learning_rate": 5.348500000000001e-05, "loss": 0.4307, "step": 10703 }, { "epoch": 0.5993952290290067, "grad_norm": 1.1788705587387085, "learning_rate": 5.3490000000000005e-05, "loss": 0.4331, "step": 10704 }, { "epoch": 0.5994512263411357, "grad_norm": 1.2288025617599487, "learning_rate": 5.349500000000001e-05, "loss": 0.3987, "step": 10705 }, { "epoch": 0.5995072236532647, "grad_norm": 1.4503600597381592, "learning_rate": 5.3500000000000006e-05, "loss": 0.4142, "step": 10706 }, { "epoch": 0.5995632209653936, "grad_norm": 1.3712904453277588, "learning_rate": 5.3505000000000004e-05, "loss": 0.5967, "step": 10707 }, { "epoch": 0.5996192182775226, "grad_norm": 1.2276537418365479, "learning_rate": 5.351000000000001e-05, "loss": 0.3684, "step": 10708 }, { "epoch": 0.5996752155896516, "grad_norm": 1.0762742757797241, "learning_rate": 5.3515000000000005e-05, "loss": 0.3283, "step": 10709 }, { "epoch": 0.5997312129017807, "grad_norm": 1.382184624671936, "learning_rate": 5.352e-05, "loss": 0.6539, "step": 10710 }, { "epoch": 0.5997872102139097, "grad_norm": 1.044752836227417, "learning_rate": 5.3525e-05, "loss": 0.3505, "step": 10711 }, { "epoch": 0.5998432075260387, "grad_norm": 1.081892490386963, "learning_rate": 5.3530000000000004e-05, "loss": 0.4271, "step": 10712 }, { "epoch": 0.5998992048381677, "grad_norm": 1.3416136503219604, "learning_rate": 5.3535e-05, "loss": 0.3771, "step": 10713 }, { "epoch": 0.5999552021502967, "grad_norm": 1.536210060119629, "learning_rate": 5.354e-05, "loss": 0.6779, "step": 10714 }, { "epoch": 0.6000111994624258, "grad_norm": 1.3057374954223633, "learning_rate": 5.3545e-05, "loss": 0.4476, "step": 10715 }, { "epoch": 0.6000671967745548, "grad_norm": 1.132282018661499, "learning_rate": 5.355e-05, "loss": 0.3702, "step": 10716 }, { "epoch": 0.6001231940866838, "grad_norm": 1.3011412620544434, "learning_rate": 5.3555e-05, "loss": 0.4967, "step": 10717 }, { "epoch": 0.6001791913988128, "grad_norm": 1.2960222959518433, "learning_rate": 5.356e-05, "loss": 0.3776, "step": 10718 }, { "epoch": 0.6002351887109418, "grad_norm": 1.1946500539779663, "learning_rate": 5.3565e-05, "loss": 0.3625, "step": 10719 }, { "epoch": 0.6002911860230709, "grad_norm": 1.1649937629699707, "learning_rate": 5.357e-05, "loss": 0.4072, "step": 10720 }, { "epoch": 0.6003471833351999, "grad_norm": 1.5056583881378174, "learning_rate": 5.3574999999999994e-05, "loss": 0.6115, "step": 10721 }, { "epoch": 0.6004031806473289, "grad_norm": 1.3560359477996826, "learning_rate": 5.3580000000000005e-05, "loss": 0.4659, "step": 10722 }, { "epoch": 0.6004591779594579, "grad_norm": 1.4856892824172974, "learning_rate": 5.358500000000001e-05, "loss": 0.5304, "step": 10723 }, { "epoch": 0.6005151752715869, "grad_norm": 1.1554310321807861, "learning_rate": 5.359000000000001e-05, "loss": 0.4317, "step": 10724 }, { "epoch": 0.600571172583716, "grad_norm": 1.2974077463150024, "learning_rate": 5.3595000000000004e-05, "loss": 0.4683, "step": 10725 }, { "epoch": 0.600627169895845, "grad_norm": 1.1984494924545288, "learning_rate": 5.360000000000001e-05, "loss": 0.3942, "step": 10726 }, { "epoch": 0.600683167207974, "grad_norm": 1.267159342765808, "learning_rate": 5.3605000000000006e-05, "loss": 0.3985, "step": 10727 }, { "epoch": 0.600739164520103, "grad_norm": 1.2933393716812134, "learning_rate": 5.3610000000000003e-05, "loss": 0.521, "step": 10728 }, { "epoch": 0.600795161832232, "grad_norm": 1.145164132118225, "learning_rate": 5.3615e-05, "loss": 0.3908, "step": 10729 }, { "epoch": 0.600851159144361, "grad_norm": 1.5170440673828125, "learning_rate": 5.3620000000000005e-05, "loss": 0.4942, "step": 10730 }, { "epoch": 0.6009071564564901, "grad_norm": 1.2621502876281738, "learning_rate": 5.3625e-05, "loss": 0.445, "step": 10731 }, { "epoch": 0.6009631537686191, "grad_norm": 1.3321058750152588, "learning_rate": 5.363e-05, "loss": 0.4805, "step": 10732 }, { "epoch": 0.6010191510807481, "grad_norm": 3.6692733764648438, "learning_rate": 5.3635000000000004e-05, "loss": 0.4295, "step": 10733 }, { "epoch": 0.6010751483928771, "grad_norm": 1.2317042350769043, "learning_rate": 5.364e-05, "loss": 0.3777, "step": 10734 }, { "epoch": 0.6011311457050061, "grad_norm": 1.2057650089263916, "learning_rate": 5.3645e-05, "loss": 0.3517, "step": 10735 }, { "epoch": 0.6011871430171352, "grad_norm": 1.4327137470245361, "learning_rate": 5.365e-05, "loss": 0.4454, "step": 10736 }, { "epoch": 0.6012431403292642, "grad_norm": 1.1435508728027344, "learning_rate": 5.3655e-05, "loss": 0.5331, "step": 10737 }, { "epoch": 0.6012991376413932, "grad_norm": 1.0889784097671509, "learning_rate": 5.366e-05, "loss": 0.4379, "step": 10738 }, { "epoch": 0.6013551349535222, "grad_norm": 1.335314393043518, "learning_rate": 5.3664999999999995e-05, "loss": 0.4332, "step": 10739 }, { "epoch": 0.6014111322656512, "grad_norm": 1.3137105703353882, "learning_rate": 5.367e-05, "loss": 0.4548, "step": 10740 }, { "epoch": 0.6014671295777803, "grad_norm": 1.2829149961471558, "learning_rate": 5.3675e-05, "loss": 0.4708, "step": 10741 }, { "epoch": 0.6015231268899093, "grad_norm": 1.2563443183898926, "learning_rate": 5.368000000000001e-05, "loss": 0.5871, "step": 10742 }, { "epoch": 0.6015791242020383, "grad_norm": 1.365319013595581, "learning_rate": 5.3685000000000005e-05, "loss": 0.3273, "step": 10743 }, { "epoch": 0.6016351215141673, "grad_norm": 1.3159781694412231, "learning_rate": 5.369000000000001e-05, "loss": 0.4736, "step": 10744 }, { "epoch": 0.6016911188262963, "grad_norm": 1.2725735902786255, "learning_rate": 5.369500000000001e-05, "loss": 0.4324, "step": 10745 }, { "epoch": 0.6017471161384254, "grad_norm": 1.2729079723358154, "learning_rate": 5.3700000000000004e-05, "loss": 0.5507, "step": 10746 }, { "epoch": 0.6018031134505544, "grad_norm": 1.3266091346740723, "learning_rate": 5.370500000000001e-05, "loss": 0.3879, "step": 10747 }, { "epoch": 0.6018591107626834, "grad_norm": 1.4228885173797607, "learning_rate": 5.3710000000000006e-05, "loss": 0.4267, "step": 10748 }, { "epoch": 0.6019151080748124, "grad_norm": 1.3786468505859375, "learning_rate": 5.3715e-05, "loss": 0.6096, "step": 10749 }, { "epoch": 0.6019711053869414, "grad_norm": 1.2821747064590454, "learning_rate": 5.372e-05, "loss": 0.3711, "step": 10750 }, { "epoch": 0.6020271026990704, "grad_norm": 1.3442124128341675, "learning_rate": 5.3725000000000005e-05, "loss": 0.5343, "step": 10751 }, { "epoch": 0.6020831000111995, "grad_norm": 1.2129201889038086, "learning_rate": 5.373e-05, "loss": 0.3621, "step": 10752 }, { "epoch": 0.6021390973233285, "grad_norm": 1.8313417434692383, "learning_rate": 5.3735e-05, "loss": 0.3928, "step": 10753 }, { "epoch": 0.6021950946354575, "grad_norm": 1.3189783096313477, "learning_rate": 5.3740000000000004e-05, "loss": 0.4618, "step": 10754 }, { "epoch": 0.6022510919475865, "grad_norm": 1.542063593864441, "learning_rate": 5.3745e-05, "loss": 0.44, "step": 10755 }, { "epoch": 0.6023070892597155, "grad_norm": 1.1043028831481934, "learning_rate": 5.375e-05, "loss": 0.4144, "step": 10756 }, { "epoch": 0.6023630865718446, "grad_norm": 1.2992949485778809, "learning_rate": 5.3755e-05, "loss": 0.5384, "step": 10757 }, { "epoch": 0.6024190838839736, "grad_norm": 1.1901628971099854, "learning_rate": 5.376e-05, "loss": 0.3763, "step": 10758 }, { "epoch": 0.6024750811961026, "grad_norm": 1.3112820386886597, "learning_rate": 5.3765e-05, "loss": 0.4026, "step": 10759 }, { "epoch": 0.6025310785082316, "grad_norm": 1.519964575767517, "learning_rate": 5.3769999999999995e-05, "loss": 0.5919, "step": 10760 }, { "epoch": 0.6025870758203606, "grad_norm": 1.2642871141433716, "learning_rate": 5.3775e-05, "loss": 0.5022, "step": 10761 }, { "epoch": 0.6026430731324897, "grad_norm": 1.2007747888565063, "learning_rate": 5.378e-05, "loss": 0.5792, "step": 10762 }, { "epoch": 0.6026990704446187, "grad_norm": 1.247137427330017, "learning_rate": 5.378500000000001e-05, "loss": 0.3743, "step": 10763 }, { "epoch": 0.6027550677567477, "grad_norm": 1.2913554906845093, "learning_rate": 5.3790000000000005e-05, "loss": 0.4759, "step": 10764 }, { "epoch": 0.6028110650688767, "grad_norm": 1.1013917922973633, "learning_rate": 5.379500000000001e-05, "loss": 0.4122, "step": 10765 }, { "epoch": 0.6028670623810057, "grad_norm": 2.8226101398468018, "learning_rate": 5.380000000000001e-05, "loss": 0.3754, "step": 10766 }, { "epoch": 0.6029230596931348, "grad_norm": 1.3585474491119385, "learning_rate": 5.3805000000000004e-05, "loss": 0.3925, "step": 10767 }, { "epoch": 0.6029790570052638, "grad_norm": 0.9887531995773315, "learning_rate": 5.381e-05, "loss": 0.2893, "step": 10768 }, { "epoch": 0.6030350543173928, "grad_norm": 1.1428364515304565, "learning_rate": 5.3815000000000006e-05, "loss": 0.3991, "step": 10769 }, { "epoch": 0.6030910516295218, "grad_norm": 1.4529662132263184, "learning_rate": 5.382e-05, "loss": 0.4707, "step": 10770 }, { "epoch": 0.6031470489416508, "grad_norm": 1.2549299001693726, "learning_rate": 5.3825e-05, "loss": 0.3181, "step": 10771 }, { "epoch": 0.6032030462537799, "grad_norm": 1.3555113077163696, "learning_rate": 5.3830000000000005e-05, "loss": 0.4483, "step": 10772 }, { "epoch": 0.6032590435659089, "grad_norm": 1.4491767883300781, "learning_rate": 5.3835e-05, "loss": 0.6002, "step": 10773 }, { "epoch": 0.6033150408780379, "grad_norm": 1.3413119316101074, "learning_rate": 5.384e-05, "loss": 0.4649, "step": 10774 }, { "epoch": 0.6033710381901669, "grad_norm": 1.6401668787002563, "learning_rate": 5.3845000000000004e-05, "loss": 0.3622, "step": 10775 }, { "epoch": 0.6034270355022959, "grad_norm": 1.498465657234192, "learning_rate": 5.385e-05, "loss": 0.5619, "step": 10776 }, { "epoch": 0.603483032814425, "grad_norm": 1.2065180540084839, "learning_rate": 5.3855e-05, "loss": 0.5187, "step": 10777 }, { "epoch": 0.603539030126554, "grad_norm": 1.4181182384490967, "learning_rate": 5.386e-05, "loss": 0.4366, "step": 10778 }, { "epoch": 0.603595027438683, "grad_norm": 1.4053266048431396, "learning_rate": 5.3865e-05, "loss": 0.5172, "step": 10779 }, { "epoch": 0.603651024750812, "grad_norm": 1.3527300357818604, "learning_rate": 5.387e-05, "loss": 0.5256, "step": 10780 }, { "epoch": 0.603707022062941, "grad_norm": 1.6070576906204224, "learning_rate": 5.3874999999999995e-05, "loss": 0.5341, "step": 10781 }, { "epoch": 0.60376301937507, "grad_norm": 1.2043564319610596, "learning_rate": 5.388e-05, "loss": 0.338, "step": 10782 }, { "epoch": 0.6038190166871991, "grad_norm": 1.2934094667434692, "learning_rate": 5.388500000000001e-05, "loss": 0.3572, "step": 10783 }, { "epoch": 0.6038750139993281, "grad_norm": 1.2597966194152832, "learning_rate": 5.389000000000001e-05, "loss": 0.4674, "step": 10784 }, { "epoch": 0.6039310113114571, "grad_norm": 1.358716607093811, "learning_rate": 5.3895000000000005e-05, "loss": 0.4766, "step": 10785 }, { "epoch": 0.6039870086235861, "grad_norm": 1.2400068044662476, "learning_rate": 5.390000000000001e-05, "loss": 0.4309, "step": 10786 }, { "epoch": 0.6040430059357151, "grad_norm": 1.2446389198303223, "learning_rate": 5.3905000000000007e-05, "loss": 0.4155, "step": 10787 }, { "epoch": 0.6040990032478442, "grad_norm": 1.3253979682922363, "learning_rate": 5.3910000000000004e-05, "loss": 0.4481, "step": 10788 }, { "epoch": 0.6041550005599732, "grad_norm": 1.2350094318389893, "learning_rate": 5.3915e-05, "loss": 0.3623, "step": 10789 }, { "epoch": 0.6042109978721021, "grad_norm": 1.4209998846054077, "learning_rate": 5.3920000000000006e-05, "loss": 0.4583, "step": 10790 }, { "epoch": 0.6042669951842311, "grad_norm": 1.4345864057540894, "learning_rate": 5.3925e-05, "loss": 0.4316, "step": 10791 }, { "epoch": 0.6043229924963601, "grad_norm": 1.2540873289108276, "learning_rate": 5.393e-05, "loss": 0.4195, "step": 10792 }, { "epoch": 0.6043789898084891, "grad_norm": 1.4047833681106567, "learning_rate": 5.3935000000000005e-05, "loss": 0.4012, "step": 10793 }, { "epoch": 0.6044349871206182, "grad_norm": 1.1563358306884766, "learning_rate": 5.394e-05, "loss": 0.3616, "step": 10794 }, { "epoch": 0.6044909844327472, "grad_norm": 1.5559574365615845, "learning_rate": 5.3945e-05, "loss": 0.4022, "step": 10795 }, { "epoch": 0.6045469817448762, "grad_norm": 1.235590934753418, "learning_rate": 5.3950000000000004e-05, "loss": 0.4277, "step": 10796 }, { "epoch": 0.6046029790570052, "grad_norm": 1.1067087650299072, "learning_rate": 5.3955e-05, "loss": 0.4182, "step": 10797 }, { "epoch": 0.6046589763691342, "grad_norm": 1.5357977151870728, "learning_rate": 5.396e-05, "loss": 0.4248, "step": 10798 }, { "epoch": 0.6047149736812633, "grad_norm": 1.1177622079849243, "learning_rate": 5.3964999999999996e-05, "loss": 0.4271, "step": 10799 }, { "epoch": 0.6047709709933923, "grad_norm": 1.2507884502410889, "learning_rate": 5.397e-05, "loss": 0.4074, "step": 10800 }, { "epoch": 0.6048269683055213, "grad_norm": 1.195744276046753, "learning_rate": 5.3975e-05, "loss": 0.3231, "step": 10801 }, { "epoch": 0.6048829656176503, "grad_norm": 1.3030754327774048, "learning_rate": 5.3979999999999995e-05, "loss": 0.5291, "step": 10802 }, { "epoch": 0.6049389629297793, "grad_norm": 1.6797661781311035, "learning_rate": 5.3985000000000006e-05, "loss": 0.4854, "step": 10803 }, { "epoch": 0.6049949602419084, "grad_norm": 1.1389864683151245, "learning_rate": 5.399000000000001e-05, "loss": 0.4245, "step": 10804 }, { "epoch": 0.6050509575540374, "grad_norm": 4.239365577697754, "learning_rate": 5.399500000000001e-05, "loss": 0.4988, "step": 10805 }, { "epoch": 0.6051069548661664, "grad_norm": 1.2595585584640503, "learning_rate": 5.4000000000000005e-05, "loss": 0.4289, "step": 10806 }, { "epoch": 0.6051629521782954, "grad_norm": 1.268800973892212, "learning_rate": 5.400500000000001e-05, "loss": 0.4455, "step": 10807 }, { "epoch": 0.6052189494904244, "grad_norm": 1.621047019958496, "learning_rate": 5.4010000000000006e-05, "loss": 0.4633, "step": 10808 }, { "epoch": 0.6052749468025534, "grad_norm": 1.2500159740447998, "learning_rate": 5.4015000000000004e-05, "loss": 0.4274, "step": 10809 }, { "epoch": 0.6053309441146825, "grad_norm": 1.5942906141281128, "learning_rate": 5.402e-05, "loss": 0.4211, "step": 10810 }, { "epoch": 0.6053869414268115, "grad_norm": 1.32368803024292, "learning_rate": 5.4025000000000005e-05, "loss": 0.5392, "step": 10811 }, { "epoch": 0.6054429387389405, "grad_norm": 1.339520812034607, "learning_rate": 5.403e-05, "loss": 0.4374, "step": 10812 }, { "epoch": 0.6054989360510695, "grad_norm": 2.1490044593811035, "learning_rate": 5.4035e-05, "loss": 0.4251, "step": 10813 }, { "epoch": 0.6055549333631985, "grad_norm": 1.3826438188552856, "learning_rate": 5.4040000000000004e-05, "loss": 0.4717, "step": 10814 }, { "epoch": 0.6056109306753276, "grad_norm": 1.2756162881851196, "learning_rate": 5.4045e-05, "loss": 0.3501, "step": 10815 }, { "epoch": 0.6056669279874566, "grad_norm": 1.0150901079177856, "learning_rate": 5.405e-05, "loss": 0.366, "step": 10816 }, { "epoch": 0.6057229252995856, "grad_norm": 1.6545933485031128, "learning_rate": 5.4055e-05, "loss": 0.4843, "step": 10817 }, { "epoch": 0.6057789226117146, "grad_norm": 1.1055387258529663, "learning_rate": 5.406e-05, "loss": 0.38, "step": 10818 }, { "epoch": 0.6058349199238436, "grad_norm": 1.436523199081421, "learning_rate": 5.4065e-05, "loss": 0.4003, "step": 10819 }, { "epoch": 0.6058909172359727, "grad_norm": 1.28406822681427, "learning_rate": 5.4069999999999996e-05, "loss": 0.5015, "step": 10820 }, { "epoch": 0.6059469145481017, "grad_norm": 1.147472620010376, "learning_rate": 5.4075e-05, "loss": 0.2851, "step": 10821 }, { "epoch": 0.6060029118602307, "grad_norm": 1.0317665338516235, "learning_rate": 5.408e-05, "loss": 0.3906, "step": 10822 }, { "epoch": 0.6060589091723597, "grad_norm": 1.2612948417663574, "learning_rate": 5.408500000000001e-05, "loss": 0.429, "step": 10823 }, { "epoch": 0.6061149064844887, "grad_norm": 1.498570203781128, "learning_rate": 5.4090000000000006e-05, "loss": 0.535, "step": 10824 }, { "epoch": 0.6061709037966178, "grad_norm": 1.2321866750717163, "learning_rate": 5.409500000000001e-05, "loss": 0.4573, "step": 10825 }, { "epoch": 0.6062269011087468, "grad_norm": 1.2394964694976807, "learning_rate": 5.410000000000001e-05, "loss": 0.4547, "step": 10826 }, { "epoch": 0.6062828984208758, "grad_norm": 1.8866934776306152, "learning_rate": 5.4105000000000005e-05, "loss": 0.4295, "step": 10827 }, { "epoch": 0.6063388957330048, "grad_norm": 1.3016738891601562, "learning_rate": 5.411e-05, "loss": 0.418, "step": 10828 }, { "epoch": 0.6063948930451338, "grad_norm": 1.2495415210723877, "learning_rate": 5.4115000000000006e-05, "loss": 0.3469, "step": 10829 }, { "epoch": 0.6064508903572629, "grad_norm": 1.394277811050415, "learning_rate": 5.4120000000000004e-05, "loss": 0.4012, "step": 10830 }, { "epoch": 0.6065068876693919, "grad_norm": 1.276740550994873, "learning_rate": 5.4125e-05, "loss": 0.4102, "step": 10831 }, { "epoch": 0.6065628849815209, "grad_norm": 1.152902364730835, "learning_rate": 5.4130000000000005e-05, "loss": 0.4301, "step": 10832 }, { "epoch": 0.6066188822936499, "grad_norm": 1.3067655563354492, "learning_rate": 5.4135e-05, "loss": 0.5145, "step": 10833 }, { "epoch": 0.6066748796057789, "grad_norm": 1.089138388633728, "learning_rate": 5.414e-05, "loss": 0.371, "step": 10834 }, { "epoch": 0.606730876917908, "grad_norm": 1.266743779182434, "learning_rate": 5.4145000000000004e-05, "loss": 0.4955, "step": 10835 }, { "epoch": 0.606786874230037, "grad_norm": 1.3165127038955688, "learning_rate": 5.415e-05, "loss": 0.4544, "step": 10836 }, { "epoch": 0.606842871542166, "grad_norm": 1.4284101724624634, "learning_rate": 5.4155e-05, "loss": 0.3966, "step": 10837 }, { "epoch": 0.606898868854295, "grad_norm": 1.1028484106063843, "learning_rate": 5.4159999999999996e-05, "loss": 0.3794, "step": 10838 }, { "epoch": 0.606954866166424, "grad_norm": 1.2298623323440552, "learning_rate": 5.4165e-05, "loss": 0.424, "step": 10839 }, { "epoch": 0.607010863478553, "grad_norm": 1.338499903678894, "learning_rate": 5.417e-05, "loss": 0.4441, "step": 10840 }, { "epoch": 0.6070668607906821, "grad_norm": 1.2862958908081055, "learning_rate": 5.4174999999999995e-05, "loss": 0.4455, "step": 10841 }, { "epoch": 0.6071228581028111, "grad_norm": 1.2241559028625488, "learning_rate": 5.418e-05, "loss": 0.421, "step": 10842 }, { "epoch": 0.6071788554149401, "grad_norm": 1.0324537754058838, "learning_rate": 5.418500000000001e-05, "loss": 0.3749, "step": 10843 }, { "epoch": 0.6072348527270691, "grad_norm": 1.3654017448425293, "learning_rate": 5.419000000000001e-05, "loss": 0.3271, "step": 10844 }, { "epoch": 0.6072908500391981, "grad_norm": 1.1245365142822266, "learning_rate": 5.4195000000000005e-05, "loss": 0.3942, "step": 10845 }, { "epoch": 0.6073468473513272, "grad_norm": 1.579898715019226, "learning_rate": 5.420000000000001e-05, "loss": 0.4232, "step": 10846 }, { "epoch": 0.6074028446634562, "grad_norm": 1.23211669921875, "learning_rate": 5.420500000000001e-05, "loss": 0.4372, "step": 10847 }, { "epoch": 0.6074588419755852, "grad_norm": 1.2611342668533325, "learning_rate": 5.4210000000000004e-05, "loss": 0.5056, "step": 10848 }, { "epoch": 0.6075148392877142, "grad_norm": 1.1216042041778564, "learning_rate": 5.4215e-05, "loss": 0.3703, "step": 10849 }, { "epoch": 0.6075708365998432, "grad_norm": 1.1612191200256348, "learning_rate": 5.4220000000000006e-05, "loss": 0.4331, "step": 10850 }, { "epoch": 0.6076268339119723, "grad_norm": 1.5577188730239868, "learning_rate": 5.4225000000000003e-05, "loss": 0.6184, "step": 10851 }, { "epoch": 0.6076828312241013, "grad_norm": 1.3442915678024292, "learning_rate": 5.423e-05, "loss": 0.3291, "step": 10852 }, { "epoch": 0.6077388285362303, "grad_norm": 1.355039358139038, "learning_rate": 5.4235000000000005e-05, "loss": 0.6839, "step": 10853 }, { "epoch": 0.6077948258483593, "grad_norm": 1.3512647151947021, "learning_rate": 5.424e-05, "loss": 0.4379, "step": 10854 }, { "epoch": 0.6078508231604883, "grad_norm": 1.8527106046676636, "learning_rate": 5.4245e-05, "loss": 0.3735, "step": 10855 }, { "epoch": 0.6079068204726173, "grad_norm": 1.3146089315414429, "learning_rate": 5.4250000000000004e-05, "loss": 0.3954, "step": 10856 }, { "epoch": 0.6079628177847464, "grad_norm": 1.2463274002075195, "learning_rate": 5.4255e-05, "loss": 0.4262, "step": 10857 }, { "epoch": 0.6080188150968754, "grad_norm": 1.1774721145629883, "learning_rate": 5.426e-05, "loss": 0.3186, "step": 10858 }, { "epoch": 0.6080748124090044, "grad_norm": 1.1638809442520142, "learning_rate": 5.4264999999999996e-05, "loss": 0.3631, "step": 10859 }, { "epoch": 0.6081308097211334, "grad_norm": 1.337945580482483, "learning_rate": 5.427e-05, "loss": 0.3337, "step": 10860 }, { "epoch": 0.6081868070332624, "grad_norm": 1.6546392440795898, "learning_rate": 5.4275e-05, "loss": 0.5638, "step": 10861 }, { "epoch": 0.6082428043453915, "grad_norm": 1.39755117893219, "learning_rate": 5.4279999999999995e-05, "loss": 0.4977, "step": 10862 }, { "epoch": 0.6082988016575205, "grad_norm": 1.1643980741500854, "learning_rate": 5.4285000000000006e-05, "loss": 0.4908, "step": 10863 }, { "epoch": 0.6083547989696495, "grad_norm": 1.3000119924545288, "learning_rate": 5.429000000000001e-05, "loss": 0.5467, "step": 10864 }, { "epoch": 0.6084107962817785, "grad_norm": 1.3411471843719482, "learning_rate": 5.429500000000001e-05, "loss": 0.6696, "step": 10865 }, { "epoch": 0.6084667935939075, "grad_norm": 1.2692595720291138, "learning_rate": 5.4300000000000005e-05, "loss": 0.4051, "step": 10866 }, { "epoch": 0.6085227909060366, "grad_norm": 1.143664836883545, "learning_rate": 5.4305e-05, "loss": 0.3524, "step": 10867 }, { "epoch": 0.6085787882181656, "grad_norm": 1.2592768669128418, "learning_rate": 5.431000000000001e-05, "loss": 0.468, "step": 10868 }, { "epoch": 0.6086347855302946, "grad_norm": 4.257694721221924, "learning_rate": 5.4315000000000004e-05, "loss": 0.375, "step": 10869 }, { "epoch": 0.6086907828424236, "grad_norm": 1.3914134502410889, "learning_rate": 5.432e-05, "loss": 0.4355, "step": 10870 }, { "epoch": 0.6087467801545526, "grad_norm": 1.4659074544906616, "learning_rate": 5.4325000000000006e-05, "loss": 0.6274, "step": 10871 }, { "epoch": 0.6088027774666815, "grad_norm": 1.2778185606002808, "learning_rate": 5.433e-05, "loss": 0.5666, "step": 10872 }, { "epoch": 0.6088587747788106, "grad_norm": 1.189117193222046, "learning_rate": 5.4335e-05, "loss": 0.4109, "step": 10873 }, { "epoch": 0.6089147720909396, "grad_norm": 1.122721791267395, "learning_rate": 5.4340000000000005e-05, "loss": 0.4035, "step": 10874 }, { "epoch": 0.6089707694030686, "grad_norm": 1.4179353713989258, "learning_rate": 5.4345e-05, "loss": 0.5543, "step": 10875 }, { "epoch": 0.6090267667151976, "grad_norm": 1.0920861959457397, "learning_rate": 5.435e-05, "loss": 0.4486, "step": 10876 }, { "epoch": 0.6090827640273266, "grad_norm": 1.174911618232727, "learning_rate": 5.4355e-05, "loss": 0.4765, "step": 10877 }, { "epoch": 0.6091387613394557, "grad_norm": 1.4525388479232788, "learning_rate": 5.436e-05, "loss": 0.4524, "step": 10878 }, { "epoch": 0.6091947586515847, "grad_norm": 1.2018752098083496, "learning_rate": 5.4365e-05, "loss": 0.3196, "step": 10879 }, { "epoch": 0.6092507559637137, "grad_norm": 1.4582374095916748, "learning_rate": 5.4369999999999996e-05, "loss": 0.4108, "step": 10880 }, { "epoch": 0.6093067532758427, "grad_norm": 1.1760350465774536, "learning_rate": 5.4375e-05, "loss": 0.3301, "step": 10881 }, { "epoch": 0.6093627505879717, "grad_norm": 1.2863696813583374, "learning_rate": 5.438e-05, "loss": 0.4477, "step": 10882 }, { "epoch": 0.6094187479001008, "grad_norm": 1.2450844049453735, "learning_rate": 5.4384999999999995e-05, "loss": 0.4449, "step": 10883 }, { "epoch": 0.6094747452122298, "grad_norm": 1.1178971529006958, "learning_rate": 5.4390000000000006e-05, "loss": 0.3883, "step": 10884 }, { "epoch": 0.6095307425243588, "grad_norm": 1.490931749343872, "learning_rate": 5.439500000000001e-05, "loss": 0.3687, "step": 10885 }, { "epoch": 0.6095867398364878, "grad_norm": 1.1268008947372437, "learning_rate": 5.440000000000001e-05, "loss": 0.4334, "step": 10886 }, { "epoch": 0.6096427371486168, "grad_norm": 1.184987187385559, "learning_rate": 5.4405000000000005e-05, "loss": 0.4247, "step": 10887 }, { "epoch": 0.6096987344607459, "grad_norm": 1.1118158102035522, "learning_rate": 5.441e-05, "loss": 0.3663, "step": 10888 }, { "epoch": 0.6097547317728749, "grad_norm": 1.3129841089248657, "learning_rate": 5.441500000000001e-05, "loss": 0.2936, "step": 10889 }, { "epoch": 0.6098107290850039, "grad_norm": 1.2024022340774536, "learning_rate": 5.4420000000000004e-05, "loss": 0.3951, "step": 10890 }, { "epoch": 0.6098667263971329, "grad_norm": 1.322929859161377, "learning_rate": 5.4425e-05, "loss": 0.3611, "step": 10891 }, { "epoch": 0.6099227237092619, "grad_norm": 1.272114872932434, "learning_rate": 5.4430000000000006e-05, "loss": 0.4719, "step": 10892 }, { "epoch": 0.609978721021391, "grad_norm": 1.081798791885376, "learning_rate": 5.4435e-05, "loss": 0.4308, "step": 10893 }, { "epoch": 0.61003471833352, "grad_norm": 1.277859091758728, "learning_rate": 5.444e-05, "loss": 0.4554, "step": 10894 }, { "epoch": 0.610090715645649, "grad_norm": 1.3497674465179443, "learning_rate": 5.4445000000000005e-05, "loss": 0.3705, "step": 10895 }, { "epoch": 0.610146712957778, "grad_norm": 1.611820936203003, "learning_rate": 5.445e-05, "loss": 0.457, "step": 10896 }, { "epoch": 0.610202710269907, "grad_norm": 1.240739107131958, "learning_rate": 5.4455e-05, "loss": 0.4434, "step": 10897 }, { "epoch": 0.610258707582036, "grad_norm": 1.3244433403015137, "learning_rate": 5.446e-05, "loss": 0.3844, "step": 10898 }, { "epoch": 0.6103147048941651, "grad_norm": 1.235936164855957, "learning_rate": 5.4465e-05, "loss": 0.363, "step": 10899 }, { "epoch": 0.6103707022062941, "grad_norm": 1.3539587259292603, "learning_rate": 5.447e-05, "loss": 0.4094, "step": 10900 }, { "epoch": 0.6104266995184231, "grad_norm": 1.5547099113464355, "learning_rate": 5.4474999999999996e-05, "loss": 0.5054, "step": 10901 }, { "epoch": 0.6104826968305521, "grad_norm": 1.3546676635742188, "learning_rate": 5.448e-05, "loss": 0.5038, "step": 10902 }, { "epoch": 0.6105386941426811, "grad_norm": 1.2518770694732666, "learning_rate": 5.4485e-05, "loss": 0.3849, "step": 10903 }, { "epoch": 0.6105946914548102, "grad_norm": 1.5321272611618042, "learning_rate": 5.449000000000001e-05, "loss": 0.4972, "step": 10904 }, { "epoch": 0.6106506887669392, "grad_norm": 1.3916819095611572, "learning_rate": 5.4495000000000006e-05, "loss": 0.4346, "step": 10905 }, { "epoch": 0.6107066860790682, "grad_norm": 1.4135522842407227, "learning_rate": 5.45e-05, "loss": 0.5123, "step": 10906 }, { "epoch": 0.6107626833911972, "grad_norm": 1.1799620389938354, "learning_rate": 5.450500000000001e-05, "loss": 0.4236, "step": 10907 }, { "epoch": 0.6108186807033262, "grad_norm": 1.6834977865219116, "learning_rate": 5.4510000000000005e-05, "loss": 0.4363, "step": 10908 }, { "epoch": 0.6108746780154553, "grad_norm": 1.3735376596450806, "learning_rate": 5.4515e-05, "loss": 0.5093, "step": 10909 }, { "epoch": 0.6109306753275843, "grad_norm": 1.518600583076477, "learning_rate": 5.4520000000000007e-05, "loss": 0.5383, "step": 10910 }, { "epoch": 0.6109866726397133, "grad_norm": 1.2262818813323975, "learning_rate": 5.4525000000000004e-05, "loss": 0.4242, "step": 10911 }, { "epoch": 0.6110426699518423, "grad_norm": 1.3563529253005981, "learning_rate": 5.453e-05, "loss": 0.5936, "step": 10912 }, { "epoch": 0.6110986672639713, "grad_norm": 1.1710436344146729, "learning_rate": 5.4535000000000006e-05, "loss": 0.4508, "step": 10913 }, { "epoch": 0.6111546645761003, "grad_norm": 1.396704077720642, "learning_rate": 5.454e-05, "loss": 0.4649, "step": 10914 }, { "epoch": 0.6112106618882294, "grad_norm": 1.7950878143310547, "learning_rate": 5.4545e-05, "loss": 0.6378, "step": 10915 }, { "epoch": 0.6112666592003584, "grad_norm": 1.2352250814437866, "learning_rate": 5.455e-05, "loss": 0.4524, "step": 10916 }, { "epoch": 0.6113226565124874, "grad_norm": 1.079251766204834, "learning_rate": 5.4555e-05, "loss": 0.3759, "step": 10917 }, { "epoch": 0.6113786538246164, "grad_norm": 1.436574101448059, "learning_rate": 5.456e-05, "loss": 0.3481, "step": 10918 }, { "epoch": 0.6114346511367454, "grad_norm": 1.346972942352295, "learning_rate": 5.4565e-05, "loss": 0.4239, "step": 10919 }, { "epoch": 0.6114906484488745, "grad_norm": 1.3086992502212524, "learning_rate": 5.457e-05, "loss": 0.4323, "step": 10920 }, { "epoch": 0.6115466457610035, "grad_norm": 1.2626492977142334, "learning_rate": 5.4575e-05, "loss": 0.4108, "step": 10921 }, { "epoch": 0.6116026430731325, "grad_norm": 1.3344029188156128, "learning_rate": 5.4579999999999996e-05, "loss": 0.4914, "step": 10922 }, { "epoch": 0.6116586403852615, "grad_norm": 1.2221949100494385, "learning_rate": 5.4585e-05, "loss": 0.3818, "step": 10923 }, { "epoch": 0.6117146376973905, "grad_norm": 1.3945127725601196, "learning_rate": 5.459000000000001e-05, "loss": 0.4025, "step": 10924 }, { "epoch": 0.6117706350095196, "grad_norm": 1.5424026250839233, "learning_rate": 5.459500000000001e-05, "loss": 0.5179, "step": 10925 }, { "epoch": 0.6118266323216486, "grad_norm": 1.215567708015442, "learning_rate": 5.4600000000000006e-05, "loss": 0.4808, "step": 10926 }, { "epoch": 0.6118826296337776, "grad_norm": 1.0344998836517334, "learning_rate": 5.4605e-05, "loss": 0.4033, "step": 10927 }, { "epoch": 0.6119386269459066, "grad_norm": 1.3918633460998535, "learning_rate": 5.461000000000001e-05, "loss": 0.4244, "step": 10928 }, { "epoch": 0.6119946242580356, "grad_norm": 1.1625525951385498, "learning_rate": 5.4615000000000005e-05, "loss": 0.4361, "step": 10929 }, { "epoch": 0.6120506215701647, "grad_norm": 1.3637739419937134, "learning_rate": 5.462e-05, "loss": 0.4096, "step": 10930 }, { "epoch": 0.6121066188822937, "grad_norm": 0.9587275385856628, "learning_rate": 5.4625000000000006e-05, "loss": 0.335, "step": 10931 }, { "epoch": 0.6121626161944227, "grad_norm": 1.3482089042663574, "learning_rate": 5.4630000000000004e-05, "loss": 0.4462, "step": 10932 }, { "epoch": 0.6122186135065517, "grad_norm": 1.388649344444275, "learning_rate": 5.4635e-05, "loss": 0.354, "step": 10933 }, { "epoch": 0.6122746108186807, "grad_norm": 1.1378109455108643, "learning_rate": 5.4640000000000005e-05, "loss": 0.316, "step": 10934 }, { "epoch": 0.6123306081308098, "grad_norm": 1.0495285987854004, "learning_rate": 5.4645e-05, "loss": 0.3512, "step": 10935 }, { "epoch": 0.6123866054429388, "grad_norm": 2.3947298526763916, "learning_rate": 5.465e-05, "loss": 0.4391, "step": 10936 }, { "epoch": 0.6124426027550678, "grad_norm": 1.2747169733047485, "learning_rate": 5.4655e-05, "loss": 0.4272, "step": 10937 }, { "epoch": 0.6124986000671968, "grad_norm": 1.2688775062561035, "learning_rate": 5.466e-05, "loss": 0.4662, "step": 10938 }, { "epoch": 0.6125545973793258, "grad_norm": 1.249367594718933, "learning_rate": 5.4665e-05, "loss": 0.3896, "step": 10939 }, { "epoch": 0.6126105946914548, "grad_norm": 1.2491068840026855, "learning_rate": 5.467e-05, "loss": 0.3809, "step": 10940 }, { "epoch": 0.6126665920035839, "grad_norm": 1.1681910753250122, "learning_rate": 5.4675e-05, "loss": 0.3824, "step": 10941 }, { "epoch": 0.6127225893157129, "grad_norm": 1.31464684009552, "learning_rate": 5.468e-05, "loss": 0.4716, "step": 10942 }, { "epoch": 0.6127785866278419, "grad_norm": 1.1723101139068604, "learning_rate": 5.4684999999999996e-05, "loss": 0.4744, "step": 10943 }, { "epoch": 0.6128345839399709, "grad_norm": 1.363568663597107, "learning_rate": 5.469000000000001e-05, "loss": 0.4266, "step": 10944 }, { "epoch": 0.6128905812520999, "grad_norm": 1.425294280052185, "learning_rate": 5.4695000000000004e-05, "loss": 0.6472, "step": 10945 }, { "epoch": 0.612946578564229, "grad_norm": 1.3231925964355469, "learning_rate": 5.470000000000001e-05, "loss": 0.372, "step": 10946 }, { "epoch": 0.613002575876358, "grad_norm": 1.4727872610092163, "learning_rate": 5.4705000000000006e-05, "loss": 0.3979, "step": 10947 }, { "epoch": 0.613058573188487, "grad_norm": 1.184914469718933, "learning_rate": 5.471e-05, "loss": 0.4542, "step": 10948 }, { "epoch": 0.613114570500616, "grad_norm": 1.3102973699569702, "learning_rate": 5.471500000000001e-05, "loss": 0.4403, "step": 10949 }, { "epoch": 0.613170567812745, "grad_norm": 1.3212430477142334, "learning_rate": 5.4720000000000005e-05, "loss": 0.4557, "step": 10950 }, { "epoch": 0.6132265651248741, "grad_norm": 1.1247193813323975, "learning_rate": 5.4725e-05, "loss": 0.3428, "step": 10951 }, { "epoch": 0.6132825624370031, "grad_norm": 2.253873825073242, "learning_rate": 5.4730000000000006e-05, "loss": 0.547, "step": 10952 }, { "epoch": 0.6133385597491321, "grad_norm": 1.2817833423614502, "learning_rate": 5.4735000000000004e-05, "loss": 0.4066, "step": 10953 }, { "epoch": 0.6133945570612611, "grad_norm": 1.4005568027496338, "learning_rate": 5.474e-05, "loss": 0.3571, "step": 10954 }, { "epoch": 0.61345055437339, "grad_norm": 1.2430723905563354, "learning_rate": 5.4745e-05, "loss": 0.4512, "step": 10955 }, { "epoch": 0.613506551685519, "grad_norm": 1.6329760551452637, "learning_rate": 5.475e-05, "loss": 0.7016, "step": 10956 }, { "epoch": 0.6135625489976481, "grad_norm": 1.369162917137146, "learning_rate": 5.4755e-05, "loss": 0.4148, "step": 10957 }, { "epoch": 0.6136185463097771, "grad_norm": 1.1662013530731201, "learning_rate": 5.476e-05, "loss": 0.4242, "step": 10958 }, { "epoch": 0.6136745436219061, "grad_norm": 1.0382499694824219, "learning_rate": 5.4765e-05, "loss": 0.3375, "step": 10959 }, { "epoch": 0.6137305409340351, "grad_norm": 1.6717263460159302, "learning_rate": 5.477e-05, "loss": 0.4685, "step": 10960 }, { "epoch": 0.6137865382461641, "grad_norm": 1.4468532800674438, "learning_rate": 5.4774999999999996e-05, "loss": 0.4467, "step": 10961 }, { "epoch": 0.6138425355582932, "grad_norm": 1.1400389671325684, "learning_rate": 5.478e-05, "loss": 0.4137, "step": 10962 }, { "epoch": 0.6138985328704222, "grad_norm": 1.5730295181274414, "learning_rate": 5.4785e-05, "loss": 0.3277, "step": 10963 }, { "epoch": 0.6139545301825512, "grad_norm": 1.0826725959777832, "learning_rate": 5.479000000000001e-05, "loss": 0.3517, "step": 10964 }, { "epoch": 0.6140105274946802, "grad_norm": 1.3723716735839844, "learning_rate": 5.4795000000000006e-05, "loss": 0.4235, "step": 10965 }, { "epoch": 0.6140665248068092, "grad_norm": 1.3577009439468384, "learning_rate": 5.4800000000000004e-05, "loss": 0.5371, "step": 10966 }, { "epoch": 0.6141225221189383, "grad_norm": 1.2552025318145752, "learning_rate": 5.480500000000001e-05, "loss": 0.4135, "step": 10967 }, { "epoch": 0.6141785194310673, "grad_norm": 1.306849479675293, "learning_rate": 5.4810000000000005e-05, "loss": 0.5673, "step": 10968 }, { "epoch": 0.6142345167431963, "grad_norm": 1.1524357795715332, "learning_rate": 5.4815e-05, "loss": 0.359, "step": 10969 }, { "epoch": 0.6142905140553253, "grad_norm": 1.3059027194976807, "learning_rate": 5.482000000000001e-05, "loss": 0.4656, "step": 10970 }, { "epoch": 0.6143465113674543, "grad_norm": 1.4936234951019287, "learning_rate": 5.4825000000000004e-05, "loss": 0.4255, "step": 10971 }, { "epoch": 0.6144025086795833, "grad_norm": 1.2601217031478882, "learning_rate": 5.483e-05, "loss": 0.6143, "step": 10972 }, { "epoch": 0.6144585059917124, "grad_norm": 1.4112039804458618, "learning_rate": 5.4835000000000006e-05, "loss": 0.5809, "step": 10973 }, { "epoch": 0.6145145033038414, "grad_norm": 1.4576923847198486, "learning_rate": 5.4840000000000003e-05, "loss": 0.3353, "step": 10974 }, { "epoch": 0.6145705006159704, "grad_norm": 1.2597851753234863, "learning_rate": 5.4845e-05, "loss": 0.5477, "step": 10975 }, { "epoch": 0.6146264979280994, "grad_norm": 1.2168810367584229, "learning_rate": 5.485e-05, "loss": 0.4293, "step": 10976 }, { "epoch": 0.6146824952402284, "grad_norm": 1.051815152168274, "learning_rate": 5.4855e-05, "loss": 0.4024, "step": 10977 }, { "epoch": 0.6147384925523575, "grad_norm": 1.2898094654083252, "learning_rate": 5.486e-05, "loss": 0.4484, "step": 10978 }, { "epoch": 0.6147944898644865, "grad_norm": 1.4077908992767334, "learning_rate": 5.4865e-05, "loss": 0.4834, "step": 10979 }, { "epoch": 0.6148504871766155, "grad_norm": 1.5623785257339478, "learning_rate": 5.487e-05, "loss": 0.625, "step": 10980 }, { "epoch": 0.6149064844887445, "grad_norm": 1.3912885189056396, "learning_rate": 5.4875e-05, "loss": 0.5837, "step": 10981 }, { "epoch": 0.6149624818008735, "grad_norm": 1.4288856983184814, "learning_rate": 5.4879999999999996e-05, "loss": 0.4109, "step": 10982 }, { "epoch": 0.6150184791130026, "grad_norm": 1.4826418161392212, "learning_rate": 5.4885e-05, "loss": 0.5164, "step": 10983 }, { "epoch": 0.6150744764251316, "grad_norm": 1.431363821029663, "learning_rate": 5.4890000000000005e-05, "loss": 0.4435, "step": 10984 }, { "epoch": 0.6151304737372606, "grad_norm": 1.2103575468063354, "learning_rate": 5.489500000000001e-05, "loss": 0.3564, "step": 10985 }, { "epoch": 0.6151864710493896, "grad_norm": 1.0259290933609009, "learning_rate": 5.4900000000000006e-05, "loss": 0.3939, "step": 10986 }, { "epoch": 0.6152424683615186, "grad_norm": 1.5478663444519043, "learning_rate": 5.4905000000000004e-05, "loss": 0.4409, "step": 10987 }, { "epoch": 0.6152984656736477, "grad_norm": 1.2921854257583618, "learning_rate": 5.491000000000001e-05, "loss": 0.4146, "step": 10988 }, { "epoch": 0.6153544629857767, "grad_norm": 1.2508203983306885, "learning_rate": 5.4915000000000005e-05, "loss": 0.4168, "step": 10989 }, { "epoch": 0.6154104602979057, "grad_norm": 1.3484240770339966, "learning_rate": 5.492e-05, "loss": 0.4035, "step": 10990 }, { "epoch": 0.6154664576100347, "grad_norm": 1.207398533821106, "learning_rate": 5.492500000000001e-05, "loss": 0.3209, "step": 10991 }, { "epoch": 0.6155224549221637, "grad_norm": 2.1470868587493896, "learning_rate": 5.4930000000000004e-05, "loss": 0.3565, "step": 10992 }, { "epoch": 0.6155784522342928, "grad_norm": 1.2360011339187622, "learning_rate": 5.4935e-05, "loss": 0.3835, "step": 10993 }, { "epoch": 0.6156344495464218, "grad_norm": 1.2802917957305908, "learning_rate": 5.4940000000000006e-05, "loss": 0.3776, "step": 10994 }, { "epoch": 0.6156904468585508, "grad_norm": 1.2238057851791382, "learning_rate": 5.4945e-05, "loss": 0.4526, "step": 10995 }, { "epoch": 0.6157464441706798, "grad_norm": 1.3067924976348877, "learning_rate": 5.495e-05, "loss": 0.4185, "step": 10996 }, { "epoch": 0.6158024414828088, "grad_norm": 1.451428771018982, "learning_rate": 5.4955e-05, "loss": 0.6839, "step": 10997 }, { "epoch": 0.6158584387949378, "grad_norm": 3.082087516784668, "learning_rate": 5.496e-05, "loss": 0.4933, "step": 10998 }, { "epoch": 0.6159144361070669, "grad_norm": 1.1497201919555664, "learning_rate": 5.4965e-05, "loss": 0.5335, "step": 10999 }, { "epoch": 0.6159704334191959, "grad_norm": 1.3007779121398926, "learning_rate": 5.497e-05, "loss": 0.4934, "step": 11000 }, { "epoch": 0.6160264307313249, "grad_norm": 1.517802357673645, "learning_rate": 5.4975e-05, "loss": 0.622, "step": 11001 }, { "epoch": 0.6160824280434539, "grad_norm": 1.2755626440048218, "learning_rate": 5.498e-05, "loss": 0.3995, "step": 11002 }, { "epoch": 0.6161384253555829, "grad_norm": 1.2727468013763428, "learning_rate": 5.4984999999999996e-05, "loss": 0.3699, "step": 11003 }, { "epoch": 0.616194422667712, "grad_norm": 1.5111430883407593, "learning_rate": 5.499000000000001e-05, "loss": 0.4512, "step": 11004 }, { "epoch": 0.616250419979841, "grad_norm": 1.1231077909469604, "learning_rate": 5.4995000000000005e-05, "loss": 0.3624, "step": 11005 }, { "epoch": 0.61630641729197, "grad_norm": 1.5678112506866455, "learning_rate": 5.500000000000001e-05, "loss": 0.4726, "step": 11006 }, { "epoch": 0.616362414604099, "grad_norm": 1.218222737312317, "learning_rate": 5.5005000000000006e-05, "loss": 0.4233, "step": 11007 }, { "epoch": 0.616418411916228, "grad_norm": 1.5231025218963623, "learning_rate": 5.5010000000000004e-05, "loss": 0.5874, "step": 11008 }, { "epoch": 0.6164744092283571, "grad_norm": 1.3632187843322754, "learning_rate": 5.501500000000001e-05, "loss": 0.5486, "step": 11009 }, { "epoch": 0.6165304065404861, "grad_norm": 1.3238469362258911, "learning_rate": 5.5020000000000005e-05, "loss": 0.5328, "step": 11010 }, { "epoch": 0.6165864038526151, "grad_norm": 1.674149751663208, "learning_rate": 5.5025e-05, "loss": 0.4168, "step": 11011 }, { "epoch": 0.6166424011647441, "grad_norm": 1.2645148038864136, "learning_rate": 5.503000000000001e-05, "loss": 0.5078, "step": 11012 }, { "epoch": 0.6166983984768731, "grad_norm": 1.3339440822601318, "learning_rate": 5.5035000000000004e-05, "loss": 0.4358, "step": 11013 }, { "epoch": 0.6167543957890022, "grad_norm": 1.254732608795166, "learning_rate": 5.504e-05, "loss": 0.5489, "step": 11014 }, { "epoch": 0.6168103931011312, "grad_norm": 1.3222465515136719, "learning_rate": 5.5045e-05, "loss": 0.6669, "step": 11015 }, { "epoch": 0.6168663904132602, "grad_norm": 1.307663083076477, "learning_rate": 5.505e-05, "loss": 0.3796, "step": 11016 }, { "epoch": 0.6169223877253892, "grad_norm": 1.1509522199630737, "learning_rate": 5.5055e-05, "loss": 0.3864, "step": 11017 }, { "epoch": 0.6169783850375182, "grad_norm": 1.4544485807418823, "learning_rate": 5.506e-05, "loss": 0.4906, "step": 11018 }, { "epoch": 0.6170343823496472, "grad_norm": 1.219419002532959, "learning_rate": 5.5065e-05, "loss": 0.493, "step": 11019 }, { "epoch": 0.6170903796617763, "grad_norm": 1.4182944297790527, "learning_rate": 5.507e-05, "loss": 0.4782, "step": 11020 }, { "epoch": 0.6171463769739053, "grad_norm": 1.3153084516525269, "learning_rate": 5.5075e-05, "loss": 0.368, "step": 11021 }, { "epoch": 0.6172023742860343, "grad_norm": 1.4359521865844727, "learning_rate": 5.508e-05, "loss": 0.6702, "step": 11022 }, { "epoch": 0.6172583715981633, "grad_norm": 1.212326169013977, "learning_rate": 5.5085e-05, "loss": 0.4304, "step": 11023 }, { "epoch": 0.6173143689102923, "grad_norm": 1.1759984493255615, "learning_rate": 5.5089999999999996e-05, "loss": 0.4048, "step": 11024 }, { "epoch": 0.6173703662224214, "grad_norm": 1.2775005102157593, "learning_rate": 5.509500000000001e-05, "loss": 0.3695, "step": 11025 }, { "epoch": 0.6174263635345504, "grad_norm": 1.2537641525268555, "learning_rate": 5.5100000000000004e-05, "loss": 0.3624, "step": 11026 }, { "epoch": 0.6174823608466794, "grad_norm": 1.345799446105957, "learning_rate": 5.510500000000001e-05, "loss": 0.6128, "step": 11027 }, { "epoch": 0.6175383581588084, "grad_norm": 1.2790555953979492, "learning_rate": 5.5110000000000006e-05, "loss": 0.4091, "step": 11028 }, { "epoch": 0.6175943554709374, "grad_norm": 1.3208950757980347, "learning_rate": 5.5115000000000003e-05, "loss": 0.4776, "step": 11029 }, { "epoch": 0.6176503527830665, "grad_norm": 1.0869702100753784, "learning_rate": 5.512000000000001e-05, "loss": 0.3377, "step": 11030 }, { "epoch": 0.6177063500951955, "grad_norm": 1.5070682764053345, "learning_rate": 5.5125000000000005e-05, "loss": 0.5009, "step": 11031 }, { "epoch": 0.6177623474073245, "grad_norm": 1.4577410221099854, "learning_rate": 5.513e-05, "loss": 0.5479, "step": 11032 }, { "epoch": 0.6178183447194535, "grad_norm": 1.3744590282440186, "learning_rate": 5.5135000000000007e-05, "loss": 0.3575, "step": 11033 }, { "epoch": 0.6178743420315825, "grad_norm": 1.0250931978225708, "learning_rate": 5.5140000000000004e-05, "loss": 0.3366, "step": 11034 }, { "epoch": 0.6179303393437116, "grad_norm": 1.3300193548202515, "learning_rate": 5.5145e-05, "loss": 0.4172, "step": 11035 }, { "epoch": 0.6179863366558406, "grad_norm": 1.2005608081817627, "learning_rate": 5.515e-05, "loss": 0.3565, "step": 11036 }, { "epoch": 0.6180423339679696, "grad_norm": 1.2163310050964355, "learning_rate": 5.5155e-05, "loss": 0.3138, "step": 11037 }, { "epoch": 0.6180983312800985, "grad_norm": 1.210322618484497, "learning_rate": 5.516e-05, "loss": 0.4677, "step": 11038 }, { "epoch": 0.6181543285922275, "grad_norm": 1.21651029586792, "learning_rate": 5.5165e-05, "loss": 0.5215, "step": 11039 }, { "epoch": 0.6182103259043565, "grad_norm": 1.3309255838394165, "learning_rate": 5.517e-05, "loss": 0.4064, "step": 11040 }, { "epoch": 0.6182663232164856, "grad_norm": 1.3202449083328247, "learning_rate": 5.5175e-05, "loss": 0.433, "step": 11041 }, { "epoch": 0.6183223205286146, "grad_norm": 1.4714305400848389, "learning_rate": 5.518e-05, "loss": 0.4863, "step": 11042 }, { "epoch": 0.6183783178407436, "grad_norm": 1.1451250314712524, "learning_rate": 5.5185e-05, "loss": 0.4105, "step": 11043 }, { "epoch": 0.6184343151528726, "grad_norm": 1.2788653373718262, "learning_rate": 5.519e-05, "loss": 0.4258, "step": 11044 }, { "epoch": 0.6184903124650016, "grad_norm": 1.2494900226593018, "learning_rate": 5.519500000000001e-05, "loss": 0.3796, "step": 11045 }, { "epoch": 0.6185463097771307, "grad_norm": 1.2078883647918701, "learning_rate": 5.520000000000001e-05, "loss": 0.4384, "step": 11046 }, { "epoch": 0.6186023070892597, "grad_norm": 1.1643199920654297, "learning_rate": 5.5205000000000004e-05, "loss": 0.3847, "step": 11047 }, { "epoch": 0.6186583044013887, "grad_norm": 2.068957805633545, "learning_rate": 5.521000000000001e-05, "loss": 0.4046, "step": 11048 }, { "epoch": 0.6187143017135177, "grad_norm": 1.1836789846420288, "learning_rate": 5.5215000000000006e-05, "loss": 0.3422, "step": 11049 }, { "epoch": 0.6187702990256467, "grad_norm": 1.6435412168502808, "learning_rate": 5.522e-05, "loss": 0.4077, "step": 11050 }, { "epoch": 0.6188262963377757, "grad_norm": 1.2401477098464966, "learning_rate": 5.522500000000001e-05, "loss": 0.4519, "step": 11051 }, { "epoch": 0.6188822936499048, "grad_norm": 1.4588103294372559, "learning_rate": 5.5230000000000005e-05, "loss": 0.5337, "step": 11052 }, { "epoch": 0.6189382909620338, "grad_norm": 1.5246227979660034, "learning_rate": 5.5235e-05, "loss": 0.5767, "step": 11053 }, { "epoch": 0.6189942882741628, "grad_norm": 3.2909200191497803, "learning_rate": 5.524e-05, "loss": 0.3592, "step": 11054 }, { "epoch": 0.6190502855862918, "grad_norm": 1.2052184343338013, "learning_rate": 5.5245000000000004e-05, "loss": 0.4188, "step": 11055 }, { "epoch": 0.6191062828984208, "grad_norm": 1.6031838655471802, "learning_rate": 5.525e-05, "loss": 0.4697, "step": 11056 }, { "epoch": 0.6191622802105499, "grad_norm": 1.0913581848144531, "learning_rate": 5.5255e-05, "loss": 0.3663, "step": 11057 }, { "epoch": 0.6192182775226789, "grad_norm": 1.2913140058517456, "learning_rate": 5.526e-05, "loss": 0.4313, "step": 11058 }, { "epoch": 0.6192742748348079, "grad_norm": 1.3415793180465698, "learning_rate": 5.5265e-05, "loss": 0.4306, "step": 11059 }, { "epoch": 0.6193302721469369, "grad_norm": 1.519203782081604, "learning_rate": 5.527e-05, "loss": 0.4767, "step": 11060 }, { "epoch": 0.6193862694590659, "grad_norm": 1.2701588869094849, "learning_rate": 5.5275e-05, "loss": 0.5083, "step": 11061 }, { "epoch": 0.619442266771195, "grad_norm": 1.0991445779800415, "learning_rate": 5.528e-05, "loss": 0.347, "step": 11062 }, { "epoch": 0.619498264083324, "grad_norm": 1.1720919609069824, "learning_rate": 5.5285e-05, "loss": 0.3684, "step": 11063 }, { "epoch": 0.619554261395453, "grad_norm": 1.3075461387634277, "learning_rate": 5.5289999999999994e-05, "loss": 0.4524, "step": 11064 }, { "epoch": 0.619610258707582, "grad_norm": 1.2739973068237305, "learning_rate": 5.5295000000000005e-05, "loss": 0.5399, "step": 11065 }, { "epoch": 0.619666256019711, "grad_norm": 1.3019740581512451, "learning_rate": 5.530000000000001e-05, "loss": 0.4455, "step": 11066 }, { "epoch": 0.6197222533318401, "grad_norm": 1.2433137893676758, "learning_rate": 5.530500000000001e-05, "loss": 0.3155, "step": 11067 }, { "epoch": 0.6197782506439691, "grad_norm": 1.2786258459091187, "learning_rate": 5.5310000000000004e-05, "loss": 0.5279, "step": 11068 }, { "epoch": 0.6198342479560981, "grad_norm": 1.3097648620605469, "learning_rate": 5.531500000000001e-05, "loss": 0.5644, "step": 11069 }, { "epoch": 0.6198902452682271, "grad_norm": 1.406480312347412, "learning_rate": 5.5320000000000006e-05, "loss": 0.5045, "step": 11070 }, { "epoch": 0.6199462425803561, "grad_norm": 1.22561776638031, "learning_rate": 5.5325e-05, "loss": 0.3946, "step": 11071 }, { "epoch": 0.6200022398924852, "grad_norm": 1.4362882375717163, "learning_rate": 5.533000000000001e-05, "loss": 0.4382, "step": 11072 }, { "epoch": 0.6200582372046142, "grad_norm": 1.520639181137085, "learning_rate": 5.5335000000000005e-05, "loss": 0.4684, "step": 11073 }, { "epoch": 0.6201142345167432, "grad_norm": 1.1990320682525635, "learning_rate": 5.534e-05, "loss": 0.4649, "step": 11074 }, { "epoch": 0.6201702318288722, "grad_norm": 1.2321856021881104, "learning_rate": 5.5345e-05, "loss": 0.3917, "step": 11075 }, { "epoch": 0.6202262291410012, "grad_norm": 1.2517001628875732, "learning_rate": 5.5350000000000004e-05, "loss": 0.3432, "step": 11076 }, { "epoch": 0.6202822264531302, "grad_norm": 1.116336703300476, "learning_rate": 5.5355e-05, "loss": 0.414, "step": 11077 }, { "epoch": 0.6203382237652593, "grad_norm": 1.4558522701263428, "learning_rate": 5.536e-05, "loss": 0.3346, "step": 11078 }, { "epoch": 0.6203942210773883, "grad_norm": 1.334283709526062, "learning_rate": 5.5365e-05, "loss": 0.4531, "step": 11079 }, { "epoch": 0.6204502183895173, "grad_norm": 1.297268033027649, "learning_rate": 5.537e-05, "loss": 0.5036, "step": 11080 }, { "epoch": 0.6205062157016463, "grad_norm": 1.2266566753387451, "learning_rate": 5.5375e-05, "loss": 0.4021, "step": 11081 }, { "epoch": 0.6205622130137753, "grad_norm": 1.2731362581253052, "learning_rate": 5.538e-05, "loss": 0.4218, "step": 11082 }, { "epoch": 0.6206182103259044, "grad_norm": 1.3133450746536255, "learning_rate": 5.5385e-05, "loss": 0.4175, "step": 11083 }, { "epoch": 0.6206742076380334, "grad_norm": 1.390400767326355, "learning_rate": 5.5389999999999997e-05, "loss": 0.3764, "step": 11084 }, { "epoch": 0.6207302049501624, "grad_norm": 1.1527926921844482, "learning_rate": 5.539500000000001e-05, "loss": 0.3754, "step": 11085 }, { "epoch": 0.6207862022622914, "grad_norm": 1.342327356338501, "learning_rate": 5.5400000000000005e-05, "loss": 0.5999, "step": 11086 }, { "epoch": 0.6208421995744204, "grad_norm": 1.1754281520843506, "learning_rate": 5.540500000000001e-05, "loss": 0.4429, "step": 11087 }, { "epoch": 0.6208981968865495, "grad_norm": 1.2577422857284546, "learning_rate": 5.5410000000000007e-05, "loss": 0.4208, "step": 11088 }, { "epoch": 0.6209541941986785, "grad_norm": 1.3542053699493408, "learning_rate": 5.5415000000000004e-05, "loss": 0.4091, "step": 11089 }, { "epoch": 0.6210101915108075, "grad_norm": 1.1279963254928589, "learning_rate": 5.542000000000001e-05, "loss": 0.3579, "step": 11090 }, { "epoch": 0.6210661888229365, "grad_norm": 0.9857292175292969, "learning_rate": 5.5425000000000006e-05, "loss": 0.2692, "step": 11091 }, { "epoch": 0.6211221861350655, "grad_norm": 1.6026922464370728, "learning_rate": 5.543e-05, "loss": 0.5099, "step": 11092 }, { "epoch": 0.6211781834471946, "grad_norm": 1.8746087551116943, "learning_rate": 5.5435e-05, "loss": 0.5977, "step": 11093 }, { "epoch": 0.6212341807593236, "grad_norm": 1.1242713928222656, "learning_rate": 5.5440000000000005e-05, "loss": 0.3337, "step": 11094 }, { "epoch": 0.6212901780714526, "grad_norm": 1.4334536790847778, "learning_rate": 5.5445e-05, "loss": 0.491, "step": 11095 }, { "epoch": 0.6213461753835816, "grad_norm": 1.4200242757797241, "learning_rate": 5.545e-05, "loss": 0.4305, "step": 11096 }, { "epoch": 0.6214021726957106, "grad_norm": 1.2854182720184326, "learning_rate": 5.5455000000000004e-05, "loss": 0.5226, "step": 11097 }, { "epoch": 0.6214581700078396, "grad_norm": 1.1912659406661987, "learning_rate": 5.546e-05, "loss": 0.4192, "step": 11098 }, { "epoch": 0.6215141673199687, "grad_norm": 1.1298226118087769, "learning_rate": 5.5465e-05, "loss": 0.3647, "step": 11099 }, { "epoch": 0.6215701646320977, "grad_norm": 1.2152447700500488, "learning_rate": 5.547e-05, "loss": 0.311, "step": 11100 }, { "epoch": 0.6216261619442267, "grad_norm": 1.1264703273773193, "learning_rate": 5.5475e-05, "loss": 0.3419, "step": 11101 }, { "epoch": 0.6216821592563557, "grad_norm": 1.2177457809448242, "learning_rate": 5.548e-05, "loss": 0.3906, "step": 11102 }, { "epoch": 0.6217381565684847, "grad_norm": 1.399300217628479, "learning_rate": 5.5484999999999995e-05, "loss": 0.4839, "step": 11103 }, { "epoch": 0.6217941538806138, "grad_norm": 1.3462135791778564, "learning_rate": 5.549e-05, "loss": 0.4196, "step": 11104 }, { "epoch": 0.6218501511927428, "grad_norm": 1.5765787363052368, "learning_rate": 5.549500000000001e-05, "loss": 0.5721, "step": 11105 }, { "epoch": 0.6219061485048718, "grad_norm": 1.2746860980987549, "learning_rate": 5.550000000000001e-05, "loss": 0.4462, "step": 11106 }, { "epoch": 0.6219621458170008, "grad_norm": 1.1315209865570068, "learning_rate": 5.5505000000000005e-05, "loss": 0.396, "step": 11107 }, { "epoch": 0.6220181431291298, "grad_norm": 1.1072025299072266, "learning_rate": 5.551000000000001e-05, "loss": 0.2609, "step": 11108 }, { "epoch": 0.6220741404412589, "grad_norm": 1.4301235675811768, "learning_rate": 5.5515000000000006e-05, "loss": 0.4324, "step": 11109 }, { "epoch": 0.6221301377533879, "grad_norm": 1.4740022420883179, "learning_rate": 5.5520000000000004e-05, "loss": 0.5201, "step": 11110 }, { "epoch": 0.6221861350655169, "grad_norm": 1.1610413789749146, "learning_rate": 5.552500000000001e-05, "loss": 0.3384, "step": 11111 }, { "epoch": 0.6222421323776459, "grad_norm": 1.0692731142044067, "learning_rate": 5.5530000000000005e-05, "loss": 0.36, "step": 11112 }, { "epoch": 0.6222981296897749, "grad_norm": 1.2527985572814941, "learning_rate": 5.5535e-05, "loss": 0.5219, "step": 11113 }, { "epoch": 0.622354127001904, "grad_norm": 1.2130718231201172, "learning_rate": 5.554e-05, "loss": 0.418, "step": 11114 }, { "epoch": 0.622410124314033, "grad_norm": 1.4109265804290771, "learning_rate": 5.5545000000000004e-05, "loss": 0.4951, "step": 11115 }, { "epoch": 0.622466121626162, "grad_norm": 1.3276960849761963, "learning_rate": 5.555e-05, "loss": 0.4697, "step": 11116 }, { "epoch": 0.622522118938291, "grad_norm": 1.4295916557312012, "learning_rate": 5.5555e-05, "loss": 0.4085, "step": 11117 }, { "epoch": 0.62257811625042, "grad_norm": 1.5808247327804565, "learning_rate": 5.556e-05, "loss": 0.4237, "step": 11118 }, { "epoch": 0.622634113562549, "grad_norm": 1.3355120420455933, "learning_rate": 5.5565e-05, "loss": 0.5678, "step": 11119 }, { "epoch": 0.622690110874678, "grad_norm": 1.452845811843872, "learning_rate": 5.557e-05, "loss": 0.5304, "step": 11120 }, { "epoch": 0.622746108186807, "grad_norm": 1.1395128965377808, "learning_rate": 5.5575e-05, "loss": 0.3455, "step": 11121 }, { "epoch": 0.622802105498936, "grad_norm": 1.3583852052688599, "learning_rate": 5.558e-05, "loss": 0.3351, "step": 11122 }, { "epoch": 0.622858102811065, "grad_norm": 1.1834583282470703, "learning_rate": 5.5585e-05, "loss": 0.4016, "step": 11123 }, { "epoch": 0.622914100123194, "grad_norm": 1.3968842029571533, "learning_rate": 5.5589999999999995e-05, "loss": 0.3693, "step": 11124 }, { "epoch": 0.6229700974353231, "grad_norm": 1.4644670486450195, "learning_rate": 5.5595000000000006e-05, "loss": 0.5647, "step": 11125 }, { "epoch": 0.6230260947474521, "grad_norm": 1.2715789079666138, "learning_rate": 5.560000000000001e-05, "loss": 0.3577, "step": 11126 }, { "epoch": 0.6230820920595811, "grad_norm": 3.06221342086792, "learning_rate": 5.560500000000001e-05, "loss": 0.5486, "step": 11127 }, { "epoch": 0.6231380893717101, "grad_norm": 1.3651798963546753, "learning_rate": 5.5610000000000005e-05, "loss": 0.4616, "step": 11128 }, { "epoch": 0.6231940866838391, "grad_norm": 1.209020972251892, "learning_rate": 5.561500000000001e-05, "loss": 0.3837, "step": 11129 }, { "epoch": 0.6232500839959682, "grad_norm": 1.3096704483032227, "learning_rate": 5.5620000000000006e-05, "loss": 0.4342, "step": 11130 }, { "epoch": 0.6233060813080972, "grad_norm": 1.4271564483642578, "learning_rate": 5.5625000000000004e-05, "loss": 0.466, "step": 11131 }, { "epoch": 0.6233620786202262, "grad_norm": 1.1934137344360352, "learning_rate": 5.563e-05, "loss": 0.4586, "step": 11132 }, { "epoch": 0.6234180759323552, "grad_norm": 1.1513158082962036, "learning_rate": 5.5635000000000005e-05, "loss": 0.341, "step": 11133 }, { "epoch": 0.6234740732444842, "grad_norm": 1.2885565757751465, "learning_rate": 5.564e-05, "loss": 0.4821, "step": 11134 }, { "epoch": 0.6235300705566132, "grad_norm": 1.2884690761566162, "learning_rate": 5.5645e-05, "loss": 0.4927, "step": 11135 }, { "epoch": 0.6235860678687423, "grad_norm": 1.2023165225982666, "learning_rate": 5.5650000000000004e-05, "loss": 0.3787, "step": 11136 }, { "epoch": 0.6236420651808713, "grad_norm": 1.1008411645889282, "learning_rate": 5.5655e-05, "loss": 0.4443, "step": 11137 }, { "epoch": 0.6236980624930003, "grad_norm": 1.222356915473938, "learning_rate": 5.566e-05, "loss": 0.4743, "step": 11138 }, { "epoch": 0.6237540598051293, "grad_norm": 1.2594475746154785, "learning_rate": 5.5665e-05, "loss": 0.5381, "step": 11139 }, { "epoch": 0.6238100571172583, "grad_norm": 1.3761041164398193, "learning_rate": 5.567e-05, "loss": 0.542, "step": 11140 }, { "epoch": 0.6238660544293874, "grad_norm": 1.3204220533370972, "learning_rate": 5.5675e-05, "loss": 0.47, "step": 11141 }, { "epoch": 0.6239220517415164, "grad_norm": 1.188469648361206, "learning_rate": 5.5679999999999995e-05, "loss": 0.3275, "step": 11142 }, { "epoch": 0.6239780490536454, "grad_norm": 1.4052780866622925, "learning_rate": 5.5685e-05, "loss": 0.4375, "step": 11143 }, { "epoch": 0.6240340463657744, "grad_norm": 1.1389778852462769, "learning_rate": 5.569e-05, "loss": 0.336, "step": 11144 }, { "epoch": 0.6240900436779034, "grad_norm": 1.3461494445800781, "learning_rate": 5.5694999999999994e-05, "loss": 0.4599, "step": 11145 }, { "epoch": 0.6241460409900325, "grad_norm": 1.2822725772857666, "learning_rate": 5.5700000000000005e-05, "loss": 0.4538, "step": 11146 }, { "epoch": 0.6242020383021615, "grad_norm": 1.4448376893997192, "learning_rate": 5.570500000000001e-05, "loss": 0.4361, "step": 11147 }, { "epoch": 0.6242580356142905, "grad_norm": 1.0926368236541748, "learning_rate": 5.571000000000001e-05, "loss": 0.3762, "step": 11148 }, { "epoch": 0.6243140329264195, "grad_norm": 1.2508419752120972, "learning_rate": 5.5715000000000004e-05, "loss": 0.2762, "step": 11149 }, { "epoch": 0.6243700302385485, "grad_norm": 1.2052196264266968, "learning_rate": 5.572000000000001e-05, "loss": 0.4175, "step": 11150 }, { "epoch": 0.6244260275506776, "grad_norm": 1.2460700273513794, "learning_rate": 5.5725000000000006e-05, "loss": 0.4135, "step": 11151 }, { "epoch": 0.6244820248628066, "grad_norm": 1.1380853652954102, "learning_rate": 5.5730000000000003e-05, "loss": 0.4291, "step": 11152 }, { "epoch": 0.6245380221749356, "grad_norm": 1.1354602575302124, "learning_rate": 5.5735e-05, "loss": 0.3984, "step": 11153 }, { "epoch": 0.6245940194870646, "grad_norm": 1.2212356328964233, "learning_rate": 5.5740000000000005e-05, "loss": 0.3591, "step": 11154 }, { "epoch": 0.6246500167991936, "grad_norm": 1.7149409055709839, "learning_rate": 5.5745e-05, "loss": 0.4434, "step": 11155 }, { "epoch": 0.6247060141113226, "grad_norm": 1.4858187437057495, "learning_rate": 5.575e-05, "loss": 0.5377, "step": 11156 }, { "epoch": 0.6247620114234517, "grad_norm": 1.2911696434020996, "learning_rate": 5.5755000000000004e-05, "loss": 0.4101, "step": 11157 }, { "epoch": 0.6248180087355807, "grad_norm": 1.328184962272644, "learning_rate": 5.576e-05, "loss": 0.3503, "step": 11158 }, { "epoch": 0.6248740060477097, "grad_norm": 1.7264142036437988, "learning_rate": 5.5765e-05, "loss": 0.4688, "step": 11159 }, { "epoch": 0.6249300033598387, "grad_norm": 1.4469679594039917, "learning_rate": 5.577e-05, "loss": 0.528, "step": 11160 }, { "epoch": 0.6249860006719677, "grad_norm": 1.1961560249328613, "learning_rate": 5.5775e-05, "loss": 0.2944, "step": 11161 }, { "epoch": 0.6250419979840968, "grad_norm": 1.1152257919311523, "learning_rate": 5.578e-05, "loss": 0.3623, "step": 11162 }, { "epoch": 0.6250979952962258, "grad_norm": 1.3937879800796509, "learning_rate": 5.5784999999999995e-05, "loss": 0.3723, "step": 11163 }, { "epoch": 0.6251539926083548, "grad_norm": 1.5608205795288086, "learning_rate": 5.579e-05, "loss": 0.5245, "step": 11164 }, { "epoch": 0.6252099899204838, "grad_norm": 1.3844794034957886, "learning_rate": 5.5795e-05, "loss": 0.4392, "step": 11165 }, { "epoch": 0.6252659872326128, "grad_norm": 1.4845026731491089, "learning_rate": 5.580000000000001e-05, "loss": 0.5709, "step": 11166 }, { "epoch": 0.6253219845447419, "grad_norm": 1.2369791269302368, "learning_rate": 5.5805000000000005e-05, "loss": 0.4965, "step": 11167 }, { "epoch": 0.6253779818568709, "grad_norm": 1.115130066871643, "learning_rate": 5.581000000000001e-05, "loss": 0.4414, "step": 11168 }, { "epoch": 0.6254339791689999, "grad_norm": 1.6319810152053833, "learning_rate": 5.581500000000001e-05, "loss": 0.5116, "step": 11169 }, { "epoch": 0.6254899764811289, "grad_norm": 1.1978349685668945, "learning_rate": 5.5820000000000004e-05, "loss": 0.4042, "step": 11170 }, { "epoch": 0.6255459737932579, "grad_norm": 1.140435814857483, "learning_rate": 5.582500000000001e-05, "loss": 0.4591, "step": 11171 }, { "epoch": 0.625601971105387, "grad_norm": 1.1942261457443237, "learning_rate": 5.5830000000000006e-05, "loss": 0.5054, "step": 11172 }, { "epoch": 0.625657968417516, "grad_norm": 1.1937553882598877, "learning_rate": 5.5835e-05, "loss": 0.4353, "step": 11173 }, { "epoch": 0.625713965729645, "grad_norm": 1.2794886827468872, "learning_rate": 5.584e-05, "loss": 0.2802, "step": 11174 }, { "epoch": 0.625769963041774, "grad_norm": 1.1961913108825684, "learning_rate": 5.5845000000000005e-05, "loss": 0.464, "step": 11175 }, { "epoch": 0.625825960353903, "grad_norm": 1.4396162033081055, "learning_rate": 5.585e-05, "loss": 0.5098, "step": 11176 }, { "epoch": 0.625881957666032, "grad_norm": 1.1948219537734985, "learning_rate": 5.5855e-05, "loss": 0.431, "step": 11177 }, { "epoch": 0.6259379549781611, "grad_norm": 2.665365695953369, "learning_rate": 5.5860000000000004e-05, "loss": 0.421, "step": 11178 }, { "epoch": 0.6259939522902901, "grad_norm": 1.2600346803665161, "learning_rate": 5.5865e-05, "loss": 0.4265, "step": 11179 }, { "epoch": 0.6260499496024191, "grad_norm": 1.7787073850631714, "learning_rate": 5.587e-05, "loss": 0.5158, "step": 11180 }, { "epoch": 0.6261059469145481, "grad_norm": 1.2258793115615845, "learning_rate": 5.5875e-05, "loss": 0.5191, "step": 11181 }, { "epoch": 0.6261619442266771, "grad_norm": 1.2070649862289429, "learning_rate": 5.588e-05, "loss": 0.451, "step": 11182 }, { "epoch": 0.6262179415388062, "grad_norm": 4.887661457061768, "learning_rate": 5.5885e-05, "loss": 0.516, "step": 11183 }, { "epoch": 0.6262739388509352, "grad_norm": 1.2355186939239502, "learning_rate": 5.5889999999999995e-05, "loss": 0.4068, "step": 11184 }, { "epoch": 0.6263299361630642, "grad_norm": 1.3576050996780396, "learning_rate": 5.5895e-05, "loss": 0.6321, "step": 11185 }, { "epoch": 0.6263859334751932, "grad_norm": 1.405781865119934, "learning_rate": 5.590000000000001e-05, "loss": 0.4014, "step": 11186 }, { "epoch": 0.6264419307873222, "grad_norm": 1.2649202346801758, "learning_rate": 5.590500000000001e-05, "loss": 0.3719, "step": 11187 }, { "epoch": 0.6264979280994513, "grad_norm": 1.101420521736145, "learning_rate": 5.5910000000000005e-05, "loss": 0.3204, "step": 11188 }, { "epoch": 0.6265539254115803, "grad_norm": 1.2335408926010132, "learning_rate": 5.591500000000001e-05, "loss": 0.4149, "step": 11189 }, { "epoch": 0.6266099227237093, "grad_norm": 1.3392070531845093, "learning_rate": 5.592000000000001e-05, "loss": 0.5609, "step": 11190 }, { "epoch": 0.6266659200358383, "grad_norm": 1.339861512184143, "learning_rate": 5.5925000000000004e-05, "loss": 0.4918, "step": 11191 }, { "epoch": 0.6267219173479673, "grad_norm": 1.6837819814682007, "learning_rate": 5.593e-05, "loss": 0.5612, "step": 11192 }, { "epoch": 0.6267779146600964, "grad_norm": 1.3606007099151611, "learning_rate": 5.5935000000000006e-05, "loss": 0.4434, "step": 11193 }, { "epoch": 0.6268339119722254, "grad_norm": 1.2954528331756592, "learning_rate": 5.594e-05, "loss": 0.5468, "step": 11194 }, { "epoch": 0.6268899092843544, "grad_norm": 6.007063865661621, "learning_rate": 5.5945e-05, "loss": 0.4323, "step": 11195 }, { "epoch": 0.6269459065964834, "grad_norm": 1.3410000801086426, "learning_rate": 5.5950000000000005e-05, "loss": 0.3654, "step": 11196 }, { "epoch": 0.6270019039086124, "grad_norm": 1.3862791061401367, "learning_rate": 5.5955e-05, "loss": 0.515, "step": 11197 }, { "epoch": 0.6270579012207415, "grad_norm": 1.2850056886672974, "learning_rate": 5.596e-05, "loss": 0.4258, "step": 11198 }, { "epoch": 0.6271138985328705, "grad_norm": 1.225555658340454, "learning_rate": 5.5965000000000004e-05, "loss": 0.4058, "step": 11199 }, { "epoch": 0.6271698958449995, "grad_norm": 1.2777115106582642, "learning_rate": 5.597e-05, "loss": 0.3095, "step": 11200 }, { "epoch": 0.6272258931571285, "grad_norm": 1.643985629081726, "learning_rate": 5.5975e-05, "loss": 0.4018, "step": 11201 }, { "epoch": 0.6272818904692575, "grad_norm": 1.3758684396743774, "learning_rate": 5.5979999999999996e-05, "loss": 0.5092, "step": 11202 }, { "epoch": 0.6273378877813864, "grad_norm": 1.4489269256591797, "learning_rate": 5.5985e-05, "loss": 0.4493, "step": 11203 }, { "epoch": 0.6273938850935155, "grad_norm": 1.2936818599700928, "learning_rate": 5.599e-05, "loss": 0.4872, "step": 11204 }, { "epoch": 0.6274498824056445, "grad_norm": 1.2241450548171997, "learning_rate": 5.5994999999999995e-05, "loss": 0.3612, "step": 11205 }, { "epoch": 0.6275058797177735, "grad_norm": 1.1195197105407715, "learning_rate": 5.6000000000000006e-05, "loss": 0.3864, "step": 11206 }, { "epoch": 0.6275618770299025, "grad_norm": 1.2647725343704224, "learning_rate": 5.600500000000001e-05, "loss": 0.4808, "step": 11207 }, { "epoch": 0.6276178743420315, "grad_norm": 1.250089406967163, "learning_rate": 5.601000000000001e-05, "loss": 0.402, "step": 11208 }, { "epoch": 0.6276738716541606, "grad_norm": 1.9865802526474, "learning_rate": 5.6015000000000005e-05, "loss": 0.4525, "step": 11209 }, { "epoch": 0.6277298689662896, "grad_norm": 1.143970251083374, "learning_rate": 5.602000000000001e-05, "loss": 0.3206, "step": 11210 }, { "epoch": 0.6277858662784186, "grad_norm": 1.2424830198287964, "learning_rate": 5.6025000000000007e-05, "loss": 0.4686, "step": 11211 }, { "epoch": 0.6278418635905476, "grad_norm": 1.3840773105621338, "learning_rate": 5.6030000000000004e-05, "loss": 0.3806, "step": 11212 }, { "epoch": 0.6278978609026766, "grad_norm": 1.2964428663253784, "learning_rate": 5.6035e-05, "loss": 0.4664, "step": 11213 }, { "epoch": 0.6279538582148056, "grad_norm": 1.2423175573349, "learning_rate": 5.6040000000000006e-05, "loss": 0.3617, "step": 11214 }, { "epoch": 0.6280098555269347, "grad_norm": 1.3496135473251343, "learning_rate": 5.6045e-05, "loss": 0.5255, "step": 11215 }, { "epoch": 0.6280658528390637, "grad_norm": 1.2265090942382812, "learning_rate": 5.605e-05, "loss": 0.4631, "step": 11216 }, { "epoch": 0.6281218501511927, "grad_norm": 1.2823275327682495, "learning_rate": 5.6055000000000005e-05, "loss": 0.3997, "step": 11217 }, { "epoch": 0.6281778474633217, "grad_norm": 1.5288941860198975, "learning_rate": 5.606e-05, "loss": 0.5032, "step": 11218 }, { "epoch": 0.6282338447754507, "grad_norm": 1.2193326950073242, "learning_rate": 5.6065e-05, "loss": 0.401, "step": 11219 }, { "epoch": 0.6282898420875798, "grad_norm": 1.213442087173462, "learning_rate": 5.6070000000000004e-05, "loss": 0.4321, "step": 11220 }, { "epoch": 0.6283458393997088, "grad_norm": 1.3099799156188965, "learning_rate": 5.6075e-05, "loss": 0.411, "step": 11221 }, { "epoch": 0.6284018367118378, "grad_norm": 1.056294322013855, "learning_rate": 5.608e-05, "loss": 0.2766, "step": 11222 }, { "epoch": 0.6284578340239668, "grad_norm": 1.1698724031448364, "learning_rate": 5.6084999999999996e-05, "loss": 0.413, "step": 11223 }, { "epoch": 0.6285138313360958, "grad_norm": 1.145461916923523, "learning_rate": 5.609e-05, "loss": 0.2676, "step": 11224 }, { "epoch": 0.6285698286482249, "grad_norm": 1.2429014444351196, "learning_rate": 5.6095e-05, "loss": 0.4648, "step": 11225 }, { "epoch": 0.6286258259603539, "grad_norm": 1.3581485748291016, "learning_rate": 5.610000000000001e-05, "loss": 0.4733, "step": 11226 }, { "epoch": 0.6286818232724829, "grad_norm": 1.0891610383987427, "learning_rate": 5.6105000000000006e-05, "loss": 0.3796, "step": 11227 }, { "epoch": 0.6287378205846119, "grad_norm": 1.4651587009429932, "learning_rate": 5.611000000000001e-05, "loss": 0.5484, "step": 11228 }, { "epoch": 0.6287938178967409, "grad_norm": 1.1366677284240723, "learning_rate": 5.611500000000001e-05, "loss": 0.3699, "step": 11229 }, { "epoch": 0.62884981520887, "grad_norm": 1.1697146892547607, "learning_rate": 5.6120000000000005e-05, "loss": 0.4184, "step": 11230 }, { "epoch": 0.628905812520999, "grad_norm": 1.4018419981002808, "learning_rate": 5.6125e-05, "loss": 0.5213, "step": 11231 }, { "epoch": 0.628961809833128, "grad_norm": 1.3317739963531494, "learning_rate": 5.6130000000000006e-05, "loss": 0.4804, "step": 11232 }, { "epoch": 0.629017807145257, "grad_norm": 1.0773656368255615, "learning_rate": 5.6135000000000004e-05, "loss": 0.3926, "step": 11233 }, { "epoch": 0.629073804457386, "grad_norm": 2.1188793182373047, "learning_rate": 5.614e-05, "loss": 0.3593, "step": 11234 }, { "epoch": 0.629129801769515, "grad_norm": 1.1686667203903198, "learning_rate": 5.6145000000000005e-05, "loss": 0.3245, "step": 11235 }, { "epoch": 0.6291857990816441, "grad_norm": 1.5293480157852173, "learning_rate": 5.615e-05, "loss": 0.501, "step": 11236 }, { "epoch": 0.6292417963937731, "grad_norm": 1.2358907461166382, "learning_rate": 5.6155e-05, "loss": 0.3587, "step": 11237 }, { "epoch": 0.6292977937059021, "grad_norm": 6.005584239959717, "learning_rate": 5.6160000000000004e-05, "loss": 0.4272, "step": 11238 }, { "epoch": 0.6293537910180311, "grad_norm": 1.945157527923584, "learning_rate": 5.6165e-05, "loss": 0.5091, "step": 11239 }, { "epoch": 0.6294097883301601, "grad_norm": 1.199906349182129, "learning_rate": 5.617e-05, "loss": 0.4088, "step": 11240 }, { "epoch": 0.6294657856422892, "grad_norm": 1.1609842777252197, "learning_rate": 5.6175e-05, "loss": 0.37, "step": 11241 }, { "epoch": 0.6295217829544182, "grad_norm": 1.303733229637146, "learning_rate": 5.618e-05, "loss": 0.3684, "step": 11242 }, { "epoch": 0.6295777802665472, "grad_norm": 1.3500808477401733, "learning_rate": 5.6185e-05, "loss": 0.49, "step": 11243 }, { "epoch": 0.6296337775786762, "grad_norm": 1.1155012845993042, "learning_rate": 5.6189999999999996e-05, "loss": 0.3952, "step": 11244 }, { "epoch": 0.6296897748908052, "grad_norm": 1.3555607795715332, "learning_rate": 5.6195e-05, "loss": 0.4157, "step": 11245 }, { "epoch": 0.6297457722029343, "grad_norm": 1.3759735822677612, "learning_rate": 5.620000000000001e-05, "loss": 0.3974, "step": 11246 }, { "epoch": 0.6298017695150633, "grad_norm": 1.5575779676437378, "learning_rate": 5.620500000000001e-05, "loss": 0.3911, "step": 11247 }, { "epoch": 0.6298577668271923, "grad_norm": 1.4618136882781982, "learning_rate": 5.6210000000000006e-05, "loss": 0.4637, "step": 11248 }, { "epoch": 0.6299137641393213, "grad_norm": 1.4149360656738281, "learning_rate": 5.621500000000001e-05, "loss": 0.4352, "step": 11249 }, { "epoch": 0.6299697614514503, "grad_norm": 1.1021887063980103, "learning_rate": 5.622000000000001e-05, "loss": 0.2918, "step": 11250 }, { "epoch": 0.6300257587635794, "grad_norm": 1.0999575853347778, "learning_rate": 5.6225000000000005e-05, "loss": 0.4268, "step": 11251 }, { "epoch": 0.6300817560757084, "grad_norm": 1.0819861888885498, "learning_rate": 5.623e-05, "loss": 0.5071, "step": 11252 }, { "epoch": 0.6301377533878374, "grad_norm": 1.3975441455841064, "learning_rate": 5.6235000000000006e-05, "loss": 0.4135, "step": 11253 }, { "epoch": 0.6301937506999664, "grad_norm": 1.3753687143325806, "learning_rate": 5.6240000000000004e-05, "loss": 0.3901, "step": 11254 }, { "epoch": 0.6302497480120954, "grad_norm": 1.2578248977661133, "learning_rate": 5.6245e-05, "loss": 0.4152, "step": 11255 }, { "epoch": 0.6303057453242245, "grad_norm": 1.2870309352874756, "learning_rate": 5.6250000000000005e-05, "loss": 0.4242, "step": 11256 }, { "epoch": 0.6303617426363535, "grad_norm": 1.084806203842163, "learning_rate": 5.6255e-05, "loss": 0.4124, "step": 11257 }, { "epoch": 0.6304177399484825, "grad_norm": 1.4478613138198853, "learning_rate": 5.626e-05, "loss": 0.3683, "step": 11258 }, { "epoch": 0.6304737372606115, "grad_norm": 1.4411780834197998, "learning_rate": 5.6265000000000004e-05, "loss": 0.5716, "step": 11259 }, { "epoch": 0.6305297345727405, "grad_norm": 1.5875511169433594, "learning_rate": 5.627e-05, "loss": 0.4225, "step": 11260 }, { "epoch": 0.6305857318848695, "grad_norm": 1.0330952405929565, "learning_rate": 5.6275e-05, "loss": 0.4313, "step": 11261 }, { "epoch": 0.6306417291969986, "grad_norm": 1.1702361106872559, "learning_rate": 5.6279999999999996e-05, "loss": 0.4511, "step": 11262 }, { "epoch": 0.6306977265091276, "grad_norm": 1.1063364744186401, "learning_rate": 5.6285e-05, "loss": 0.3643, "step": 11263 }, { "epoch": 0.6307537238212566, "grad_norm": 1.007749080657959, "learning_rate": 5.629e-05, "loss": 0.3217, "step": 11264 }, { "epoch": 0.6308097211333856, "grad_norm": 1.3514572381973267, "learning_rate": 5.6294999999999995e-05, "loss": 0.4353, "step": 11265 }, { "epoch": 0.6308657184455146, "grad_norm": 1.3110345602035522, "learning_rate": 5.63e-05, "loss": 0.3841, "step": 11266 }, { "epoch": 0.6309217157576437, "grad_norm": 1.281907081604004, "learning_rate": 5.630500000000001e-05, "loss": 0.4366, "step": 11267 }, { "epoch": 0.6309777130697727, "grad_norm": 1.4426237344741821, "learning_rate": 5.631000000000001e-05, "loss": 0.4704, "step": 11268 }, { "epoch": 0.6310337103819017, "grad_norm": 1.2236207723617554, "learning_rate": 5.6315000000000005e-05, "loss": 0.381, "step": 11269 }, { "epoch": 0.6310897076940307, "grad_norm": 1.2905206680297852, "learning_rate": 5.632e-05, "loss": 0.4791, "step": 11270 }, { "epoch": 0.6311457050061597, "grad_norm": 1.352844476699829, "learning_rate": 5.632500000000001e-05, "loss": 0.3825, "step": 11271 }, { "epoch": 0.6312017023182888, "grad_norm": 1.462010145187378, "learning_rate": 5.6330000000000004e-05, "loss": 0.7624, "step": 11272 }, { "epoch": 0.6312576996304178, "grad_norm": 1.4606443643569946, "learning_rate": 5.6335e-05, "loss": 0.4573, "step": 11273 }, { "epoch": 0.6313136969425468, "grad_norm": 1.1596461534500122, "learning_rate": 5.6340000000000006e-05, "loss": 0.4066, "step": 11274 }, { "epoch": 0.6313696942546758, "grad_norm": 1.125241994857788, "learning_rate": 5.6345000000000003e-05, "loss": 0.3541, "step": 11275 }, { "epoch": 0.6314256915668048, "grad_norm": 1.3315197229385376, "learning_rate": 5.635e-05, "loss": 0.4487, "step": 11276 }, { "epoch": 0.6314816888789339, "grad_norm": 1.1584787368774414, "learning_rate": 5.6355000000000005e-05, "loss": 0.2897, "step": 11277 }, { "epoch": 0.6315376861910629, "grad_norm": 1.058961272239685, "learning_rate": 5.636e-05, "loss": 0.3676, "step": 11278 }, { "epoch": 0.6315936835031919, "grad_norm": 1.2803348302841187, "learning_rate": 5.6365e-05, "loss": 0.5269, "step": 11279 }, { "epoch": 0.6316496808153209, "grad_norm": 1.4021506309509277, "learning_rate": 5.637e-05, "loss": 0.3149, "step": 11280 }, { "epoch": 0.6317056781274499, "grad_norm": 1.371085286140442, "learning_rate": 5.6375e-05, "loss": 0.5523, "step": 11281 }, { "epoch": 0.631761675439579, "grad_norm": 1.7258166074752808, "learning_rate": 5.638e-05, "loss": 0.637, "step": 11282 }, { "epoch": 0.631817672751708, "grad_norm": 1.157315731048584, "learning_rate": 5.6384999999999996e-05, "loss": 0.3429, "step": 11283 }, { "epoch": 0.631873670063837, "grad_norm": 1.417036771774292, "learning_rate": 5.639e-05, "loss": 0.4629, "step": 11284 }, { "epoch": 0.631929667375966, "grad_norm": 1.314966082572937, "learning_rate": 5.6395e-05, "loss": 0.5163, "step": 11285 }, { "epoch": 0.6319856646880949, "grad_norm": 1.2751572132110596, "learning_rate": 5.6399999999999995e-05, "loss": 0.4377, "step": 11286 }, { "epoch": 0.6320416620002239, "grad_norm": 1.1246416568756104, "learning_rate": 5.6405000000000006e-05, "loss": 0.4409, "step": 11287 }, { "epoch": 0.632097659312353, "grad_norm": 1.2444161176681519, "learning_rate": 5.641000000000001e-05, "loss": 0.4737, "step": 11288 }, { "epoch": 0.632153656624482, "grad_norm": 1.0859520435333252, "learning_rate": 5.641500000000001e-05, "loss": 0.4256, "step": 11289 }, { "epoch": 0.632209653936611, "grad_norm": 1.3945276737213135, "learning_rate": 5.6420000000000005e-05, "loss": 0.3861, "step": 11290 }, { "epoch": 0.63226565124874, "grad_norm": 1.2874191999435425, "learning_rate": 5.6425e-05, "loss": 0.4776, "step": 11291 }, { "epoch": 0.632321648560869, "grad_norm": 1.2917265892028809, "learning_rate": 5.643000000000001e-05, "loss": 0.4273, "step": 11292 }, { "epoch": 0.632377645872998, "grad_norm": 1.1397157907485962, "learning_rate": 5.6435000000000004e-05, "loss": 0.3987, "step": 11293 }, { "epoch": 0.6324336431851271, "grad_norm": 1.1848894357681274, "learning_rate": 5.644e-05, "loss": 0.4868, "step": 11294 }, { "epoch": 0.6324896404972561, "grad_norm": 1.2006059885025024, "learning_rate": 5.6445000000000006e-05, "loss": 0.4025, "step": 11295 }, { "epoch": 0.6325456378093851, "grad_norm": 1.505966067314148, "learning_rate": 5.645e-05, "loss": 0.4714, "step": 11296 }, { "epoch": 0.6326016351215141, "grad_norm": 1.1986751556396484, "learning_rate": 5.6455e-05, "loss": 0.4536, "step": 11297 }, { "epoch": 0.6326576324336431, "grad_norm": 1.4272018671035767, "learning_rate": 5.6460000000000005e-05, "loss": 0.4774, "step": 11298 }, { "epoch": 0.6327136297457722, "grad_norm": 1.498241901397705, "learning_rate": 5.6465e-05, "loss": 0.5556, "step": 11299 }, { "epoch": 0.6327696270579012, "grad_norm": 6.183231830596924, "learning_rate": 5.647e-05, "loss": 0.4907, "step": 11300 }, { "epoch": 0.6328256243700302, "grad_norm": 1.3515992164611816, "learning_rate": 5.6475e-05, "loss": 0.4121, "step": 11301 }, { "epoch": 0.6328816216821592, "grad_norm": 1.2577941417694092, "learning_rate": 5.648e-05, "loss": 0.4679, "step": 11302 }, { "epoch": 0.6329376189942882, "grad_norm": 1.0943795442581177, "learning_rate": 5.6485e-05, "loss": 0.3358, "step": 11303 }, { "epoch": 0.6329936163064173, "grad_norm": 1.1649833917617798, "learning_rate": 5.6489999999999996e-05, "loss": 0.4517, "step": 11304 }, { "epoch": 0.6330496136185463, "grad_norm": 1.3486391305923462, "learning_rate": 5.6495e-05, "loss": 0.3764, "step": 11305 }, { "epoch": 0.6331056109306753, "grad_norm": 1.4467990398406982, "learning_rate": 5.65e-05, "loss": 0.4745, "step": 11306 }, { "epoch": 0.6331616082428043, "grad_norm": 1.2465481758117676, "learning_rate": 5.650500000000001e-05, "loss": 0.4743, "step": 11307 }, { "epoch": 0.6332176055549333, "grad_norm": 1.2886686325073242, "learning_rate": 5.6510000000000006e-05, "loss": 0.4211, "step": 11308 }, { "epoch": 0.6332736028670624, "grad_norm": 1.816685676574707, "learning_rate": 5.6515000000000004e-05, "loss": 0.4136, "step": 11309 }, { "epoch": 0.6333296001791914, "grad_norm": 1.1860828399658203, "learning_rate": 5.652000000000001e-05, "loss": 0.4991, "step": 11310 }, { "epoch": 0.6333855974913204, "grad_norm": 1.1487269401550293, "learning_rate": 5.6525000000000005e-05, "loss": 0.4361, "step": 11311 }, { "epoch": 0.6334415948034494, "grad_norm": 1.3433301448822021, "learning_rate": 5.653e-05, "loss": 0.3952, "step": 11312 }, { "epoch": 0.6334975921155784, "grad_norm": 1.2602077722549438, "learning_rate": 5.653500000000001e-05, "loss": 0.5292, "step": 11313 }, { "epoch": 0.6335535894277075, "grad_norm": 1.15339994430542, "learning_rate": 5.6540000000000004e-05, "loss": 0.3208, "step": 11314 }, { "epoch": 0.6336095867398365, "grad_norm": 1.2489097118377686, "learning_rate": 5.6545e-05, "loss": 0.3924, "step": 11315 }, { "epoch": 0.6336655840519655, "grad_norm": 1.2408370971679688, "learning_rate": 5.6550000000000006e-05, "loss": 0.311, "step": 11316 }, { "epoch": 0.6337215813640945, "grad_norm": 1.1563353538513184, "learning_rate": 5.6555e-05, "loss": 0.3385, "step": 11317 }, { "epoch": 0.6337775786762235, "grad_norm": 1.1346092224121094, "learning_rate": 5.656e-05, "loss": 0.2971, "step": 11318 }, { "epoch": 0.6338335759883525, "grad_norm": 1.3084992170333862, "learning_rate": 5.6565e-05, "loss": 0.4301, "step": 11319 }, { "epoch": 0.6338895733004816, "grad_norm": 1.445418357849121, "learning_rate": 5.657e-05, "loss": 0.4382, "step": 11320 }, { "epoch": 0.6339455706126106, "grad_norm": 1.1264489889144897, "learning_rate": 5.6575e-05, "loss": 0.447, "step": 11321 }, { "epoch": 0.6340015679247396, "grad_norm": 1.0811023712158203, "learning_rate": 5.658e-05, "loss": 0.3592, "step": 11322 }, { "epoch": 0.6340575652368686, "grad_norm": 1.1096742153167725, "learning_rate": 5.6585e-05, "loss": 0.3223, "step": 11323 }, { "epoch": 0.6341135625489976, "grad_norm": 1.3272207975387573, "learning_rate": 5.659e-05, "loss": 0.4864, "step": 11324 }, { "epoch": 0.6341695598611267, "grad_norm": 1.095194697380066, "learning_rate": 5.6594999999999996e-05, "loss": 0.3495, "step": 11325 }, { "epoch": 0.6342255571732557, "grad_norm": 1.1297070980072021, "learning_rate": 5.66e-05, "loss": 0.4301, "step": 11326 }, { "epoch": 0.6342815544853847, "grad_norm": 1.390947937965393, "learning_rate": 5.660500000000001e-05, "loss": 0.414, "step": 11327 }, { "epoch": 0.6343375517975137, "grad_norm": 1.2035391330718994, "learning_rate": 5.661000000000001e-05, "loss": 0.4177, "step": 11328 }, { "epoch": 0.6343935491096427, "grad_norm": 1.2064096927642822, "learning_rate": 5.6615000000000006e-05, "loss": 0.3505, "step": 11329 }, { "epoch": 0.6344495464217718, "grad_norm": 1.132487177848816, "learning_rate": 5.6620000000000003e-05, "loss": 0.4181, "step": 11330 }, { "epoch": 0.6345055437339008, "grad_norm": 1.305894136428833, "learning_rate": 5.662500000000001e-05, "loss": 0.4296, "step": 11331 }, { "epoch": 0.6345615410460298, "grad_norm": 1.579166054725647, "learning_rate": 5.6630000000000005e-05, "loss": 0.4964, "step": 11332 }, { "epoch": 0.6346175383581588, "grad_norm": 1.2170345783233643, "learning_rate": 5.6635e-05, "loss": 0.4896, "step": 11333 }, { "epoch": 0.6346735356702878, "grad_norm": 1.304287075996399, "learning_rate": 5.6640000000000007e-05, "loss": 0.5304, "step": 11334 }, { "epoch": 0.6347295329824169, "grad_norm": 1.2192530632019043, "learning_rate": 5.6645000000000004e-05, "loss": 0.3684, "step": 11335 }, { "epoch": 0.6347855302945459, "grad_norm": 1.2074230909347534, "learning_rate": 5.665e-05, "loss": 0.4713, "step": 11336 }, { "epoch": 0.6348415276066749, "grad_norm": 1.4461374282836914, "learning_rate": 5.6655000000000006e-05, "loss": 0.476, "step": 11337 }, { "epoch": 0.6348975249188039, "grad_norm": 1.287718415260315, "learning_rate": 5.666e-05, "loss": 0.4544, "step": 11338 }, { "epoch": 0.6349535222309329, "grad_norm": 1.1624178886413574, "learning_rate": 5.6665e-05, "loss": 0.3842, "step": 11339 }, { "epoch": 0.635009519543062, "grad_norm": 1.1182670593261719, "learning_rate": 5.667e-05, "loss": 0.4279, "step": 11340 }, { "epoch": 0.635065516855191, "grad_norm": 1.5850125551223755, "learning_rate": 5.6675e-05, "loss": 0.616, "step": 11341 }, { "epoch": 0.63512151416732, "grad_norm": 1.3708568811416626, "learning_rate": 5.668e-05, "loss": 0.615, "step": 11342 }, { "epoch": 0.635177511479449, "grad_norm": 1.1200449466705322, "learning_rate": 5.6685e-05, "loss": 0.35, "step": 11343 }, { "epoch": 0.635233508791578, "grad_norm": 1.2273062467575073, "learning_rate": 5.669e-05, "loss": 0.3545, "step": 11344 }, { "epoch": 0.635289506103707, "grad_norm": 1.147248387336731, "learning_rate": 5.6695e-05, "loss": 0.4342, "step": 11345 }, { "epoch": 0.6353455034158361, "grad_norm": 1.3605453968048096, "learning_rate": 5.6699999999999996e-05, "loss": 0.4196, "step": 11346 }, { "epoch": 0.6354015007279651, "grad_norm": 1.3121367692947388, "learning_rate": 5.670500000000001e-05, "loss": 0.4314, "step": 11347 }, { "epoch": 0.6354574980400941, "grad_norm": 1.303649663925171, "learning_rate": 5.6710000000000004e-05, "loss": 0.4225, "step": 11348 }, { "epoch": 0.6355134953522231, "grad_norm": 1.190080165863037, "learning_rate": 5.671500000000001e-05, "loss": 0.436, "step": 11349 }, { "epoch": 0.6355694926643521, "grad_norm": 1.2896257638931274, "learning_rate": 5.6720000000000006e-05, "loss": 0.3563, "step": 11350 }, { "epoch": 0.6356254899764812, "grad_norm": 1.3367797136306763, "learning_rate": 5.6725e-05, "loss": 0.4411, "step": 11351 }, { "epoch": 0.6356814872886102, "grad_norm": 1.5831478834152222, "learning_rate": 5.673000000000001e-05, "loss": 0.5588, "step": 11352 }, { "epoch": 0.6357374846007392, "grad_norm": 1.2519817352294922, "learning_rate": 5.6735000000000005e-05, "loss": 0.4346, "step": 11353 }, { "epoch": 0.6357934819128682, "grad_norm": 1.526275634765625, "learning_rate": 5.674e-05, "loss": 0.4632, "step": 11354 }, { "epoch": 0.6358494792249972, "grad_norm": 1.3396154642105103, "learning_rate": 5.6745000000000006e-05, "loss": 0.5087, "step": 11355 }, { "epoch": 0.6359054765371263, "grad_norm": 1.5325535535812378, "learning_rate": 5.6750000000000004e-05, "loss": 0.4743, "step": 11356 }, { "epoch": 0.6359614738492553, "grad_norm": 1.4574859142303467, "learning_rate": 5.6755e-05, "loss": 0.4546, "step": 11357 }, { "epoch": 0.6360174711613843, "grad_norm": 1.2215218544006348, "learning_rate": 5.6760000000000005e-05, "loss": 0.4006, "step": 11358 }, { "epoch": 0.6360734684735133, "grad_norm": 1.2708491086959839, "learning_rate": 5.6765e-05, "loss": 0.575, "step": 11359 }, { "epoch": 0.6361294657856423, "grad_norm": 1.4911993741989136, "learning_rate": 5.677e-05, "loss": 0.4185, "step": 11360 }, { "epoch": 0.6361854630977714, "grad_norm": 1.4460312128067017, "learning_rate": 5.6775e-05, "loss": 0.5021, "step": 11361 }, { "epoch": 0.6362414604099004, "grad_norm": 1.0688856840133667, "learning_rate": 5.678e-05, "loss": 0.3547, "step": 11362 }, { "epoch": 0.6362974577220294, "grad_norm": 1.122283935546875, "learning_rate": 5.6785e-05, "loss": 0.4026, "step": 11363 }, { "epoch": 0.6363534550341584, "grad_norm": 1.3614938259124756, "learning_rate": 5.679e-05, "loss": 0.5103, "step": 11364 }, { "epoch": 0.6364094523462874, "grad_norm": 1.4329949617385864, "learning_rate": 5.6795e-05, "loss": 0.405, "step": 11365 }, { "epoch": 0.6364654496584164, "grad_norm": 1.2382676601409912, "learning_rate": 5.68e-05, "loss": 0.4251, "step": 11366 }, { "epoch": 0.6365214469705455, "grad_norm": 1.3758963346481323, "learning_rate": 5.680500000000001e-05, "loss": 0.5959, "step": 11367 }, { "epoch": 0.6365774442826744, "grad_norm": 1.1326810121536255, "learning_rate": 5.681000000000001e-05, "loss": 0.4424, "step": 11368 }, { "epoch": 0.6366334415948034, "grad_norm": 1.1721724271774292, "learning_rate": 5.6815000000000004e-05, "loss": 0.4848, "step": 11369 }, { "epoch": 0.6366894389069324, "grad_norm": 1.471866250038147, "learning_rate": 5.682000000000001e-05, "loss": 0.3995, "step": 11370 }, { "epoch": 0.6367454362190614, "grad_norm": 1.4628483057022095, "learning_rate": 5.6825000000000006e-05, "loss": 0.4036, "step": 11371 }, { "epoch": 0.6368014335311905, "grad_norm": 1.1856060028076172, "learning_rate": 5.683e-05, "loss": 0.422, "step": 11372 }, { "epoch": 0.6368574308433195, "grad_norm": 1.3349860906600952, "learning_rate": 5.683500000000001e-05, "loss": 0.349, "step": 11373 }, { "epoch": 0.6369134281554485, "grad_norm": 1.2007198333740234, "learning_rate": 5.6840000000000005e-05, "loss": 0.3308, "step": 11374 }, { "epoch": 0.6369694254675775, "grad_norm": 1.6263080835342407, "learning_rate": 5.6845e-05, "loss": 0.4246, "step": 11375 }, { "epoch": 0.6370254227797065, "grad_norm": 1.3804060220718384, "learning_rate": 5.6850000000000006e-05, "loss": 0.5547, "step": 11376 }, { "epoch": 0.6370814200918355, "grad_norm": 1.197320580482483, "learning_rate": 5.6855000000000004e-05, "loss": 0.4101, "step": 11377 }, { "epoch": 0.6371374174039646, "grad_norm": 1.2640727758407593, "learning_rate": 5.686e-05, "loss": 0.407, "step": 11378 }, { "epoch": 0.6371934147160936, "grad_norm": 1.3745468854904175, "learning_rate": 5.6865e-05, "loss": 0.4645, "step": 11379 }, { "epoch": 0.6372494120282226, "grad_norm": 1.1405552625656128, "learning_rate": 5.687e-05, "loss": 0.3982, "step": 11380 }, { "epoch": 0.6373054093403516, "grad_norm": 1.1586295366287231, "learning_rate": 5.6875e-05, "loss": 0.3587, "step": 11381 }, { "epoch": 0.6373614066524806, "grad_norm": 1.2616404294967651, "learning_rate": 5.688e-05, "loss": 0.4723, "step": 11382 }, { "epoch": 0.6374174039646097, "grad_norm": 1.6011511087417603, "learning_rate": 5.6885e-05, "loss": 0.4597, "step": 11383 }, { "epoch": 0.6374734012767387, "grad_norm": 1.3609460592269897, "learning_rate": 5.689e-05, "loss": 0.4977, "step": 11384 }, { "epoch": 0.6375293985888677, "grad_norm": 1.7835408449172974, "learning_rate": 5.6894999999999997e-05, "loss": 0.5518, "step": 11385 }, { "epoch": 0.6375853959009967, "grad_norm": 1.487677812576294, "learning_rate": 5.69e-05, "loss": 0.5093, "step": 11386 }, { "epoch": 0.6376413932131257, "grad_norm": 1.3706413507461548, "learning_rate": 5.6905e-05, "loss": 0.5073, "step": 11387 }, { "epoch": 0.6376973905252548, "grad_norm": 1.1740338802337646, "learning_rate": 5.691000000000001e-05, "loss": 0.4306, "step": 11388 }, { "epoch": 0.6377533878373838, "grad_norm": 1.3596729040145874, "learning_rate": 5.6915000000000006e-05, "loss": 0.5845, "step": 11389 }, { "epoch": 0.6378093851495128, "grad_norm": 1.2662302255630493, "learning_rate": 5.6920000000000004e-05, "loss": 0.4194, "step": 11390 }, { "epoch": 0.6378653824616418, "grad_norm": 1.2910363674163818, "learning_rate": 5.692500000000001e-05, "loss": 0.4155, "step": 11391 }, { "epoch": 0.6379213797737708, "grad_norm": 1.5611884593963623, "learning_rate": 5.6930000000000006e-05, "loss": 0.4015, "step": 11392 }, { "epoch": 0.6379773770858999, "grad_norm": 1.6610698699951172, "learning_rate": 5.6935e-05, "loss": 0.4132, "step": 11393 }, { "epoch": 0.6380333743980289, "grad_norm": 1.3034590482711792, "learning_rate": 5.694000000000001e-05, "loss": 0.4829, "step": 11394 }, { "epoch": 0.6380893717101579, "grad_norm": 1.194251537322998, "learning_rate": 5.6945000000000005e-05, "loss": 0.3595, "step": 11395 }, { "epoch": 0.6381453690222869, "grad_norm": 1.3438389301300049, "learning_rate": 5.695e-05, "loss": 0.4172, "step": 11396 }, { "epoch": 0.6382013663344159, "grad_norm": 1.3963749408721924, "learning_rate": 5.6955000000000006e-05, "loss": 0.6238, "step": 11397 }, { "epoch": 0.638257363646545, "grad_norm": 1.1640512943267822, "learning_rate": 5.6960000000000004e-05, "loss": 0.4674, "step": 11398 }, { "epoch": 0.638313360958674, "grad_norm": 1.4330171346664429, "learning_rate": 5.6965e-05, "loss": 0.3955, "step": 11399 }, { "epoch": 0.638369358270803, "grad_norm": 1.1675355434417725, "learning_rate": 5.697e-05, "loss": 0.3712, "step": 11400 }, { "epoch": 0.638425355582932, "grad_norm": 1.525566577911377, "learning_rate": 5.6975e-05, "loss": 0.4081, "step": 11401 }, { "epoch": 0.638481352895061, "grad_norm": 1.232356309890747, "learning_rate": 5.698e-05, "loss": 0.4272, "step": 11402 }, { "epoch": 0.63853735020719, "grad_norm": 1.3237557411193848, "learning_rate": 5.6985e-05, "loss": 0.4409, "step": 11403 }, { "epoch": 0.6385933475193191, "grad_norm": 1.2331622838974, "learning_rate": 5.699e-05, "loss": 0.3428, "step": 11404 }, { "epoch": 0.6386493448314481, "grad_norm": 1.3364495038986206, "learning_rate": 5.6995e-05, "loss": 0.3592, "step": 11405 }, { "epoch": 0.6387053421435771, "grad_norm": 1.2224996089935303, "learning_rate": 5.6999999999999996e-05, "loss": 0.462, "step": 11406 }, { "epoch": 0.6387613394557061, "grad_norm": 1.1252217292785645, "learning_rate": 5.7005e-05, "loss": 0.33, "step": 11407 }, { "epoch": 0.6388173367678351, "grad_norm": 1.2200932502746582, "learning_rate": 5.7010000000000005e-05, "loss": 0.3552, "step": 11408 }, { "epoch": 0.6388733340799642, "grad_norm": 1.2461763620376587, "learning_rate": 5.701500000000001e-05, "loss": 0.3404, "step": 11409 }, { "epoch": 0.6389293313920932, "grad_norm": 1.7498670816421509, "learning_rate": 5.7020000000000006e-05, "loss": 0.576, "step": 11410 }, { "epoch": 0.6389853287042222, "grad_norm": 1.3398674726486206, "learning_rate": 5.7025000000000004e-05, "loss": 0.4954, "step": 11411 }, { "epoch": 0.6390413260163512, "grad_norm": 1.282238245010376, "learning_rate": 5.703000000000001e-05, "loss": 0.4809, "step": 11412 }, { "epoch": 0.6390973233284802, "grad_norm": 1.2816829681396484, "learning_rate": 5.7035000000000005e-05, "loss": 0.3704, "step": 11413 }, { "epoch": 0.6391533206406093, "grad_norm": 1.433122992515564, "learning_rate": 5.704e-05, "loss": 0.5439, "step": 11414 }, { "epoch": 0.6392093179527383, "grad_norm": 3.01277494430542, "learning_rate": 5.704500000000001e-05, "loss": 0.4214, "step": 11415 }, { "epoch": 0.6392653152648673, "grad_norm": 1.63400137424469, "learning_rate": 5.7050000000000004e-05, "loss": 0.3825, "step": 11416 }, { "epoch": 0.6393213125769963, "grad_norm": 1.2651917934417725, "learning_rate": 5.7055e-05, "loss": 0.4381, "step": 11417 }, { "epoch": 0.6393773098891253, "grad_norm": 1.2086280584335327, "learning_rate": 5.706e-05, "loss": 0.4712, "step": 11418 }, { "epoch": 0.6394333072012544, "grad_norm": 1.4258328676223755, "learning_rate": 5.7065e-05, "loss": 0.4264, "step": 11419 }, { "epoch": 0.6394893045133834, "grad_norm": 1.3295502662658691, "learning_rate": 5.707e-05, "loss": 0.4893, "step": 11420 }, { "epoch": 0.6395453018255124, "grad_norm": 1.505089521408081, "learning_rate": 5.7075e-05, "loss": 0.54, "step": 11421 }, { "epoch": 0.6396012991376414, "grad_norm": 1.2171998023986816, "learning_rate": 5.708e-05, "loss": 0.5263, "step": 11422 }, { "epoch": 0.6396572964497704, "grad_norm": 1.2634024620056152, "learning_rate": 5.7085e-05, "loss": 0.5035, "step": 11423 }, { "epoch": 0.6397132937618994, "grad_norm": 1.3135502338409424, "learning_rate": 5.709e-05, "loss": 0.3592, "step": 11424 }, { "epoch": 0.6397692910740285, "grad_norm": 1.3976722955703735, "learning_rate": 5.7095e-05, "loss": 0.5523, "step": 11425 }, { "epoch": 0.6398252883861575, "grad_norm": 1.4321637153625488, "learning_rate": 5.71e-05, "loss": 0.4316, "step": 11426 }, { "epoch": 0.6398812856982865, "grad_norm": 1.2029895782470703, "learning_rate": 5.7104999999999996e-05, "loss": 0.3667, "step": 11427 }, { "epoch": 0.6399372830104155, "grad_norm": 1.2664986848831177, "learning_rate": 5.711000000000001e-05, "loss": 0.3627, "step": 11428 }, { "epoch": 0.6399932803225445, "grad_norm": 1.3680680990219116, "learning_rate": 5.7115000000000005e-05, "loss": 0.3631, "step": 11429 }, { "epoch": 0.6400492776346736, "grad_norm": 1.1282374858856201, "learning_rate": 5.712000000000001e-05, "loss": 0.399, "step": 11430 }, { "epoch": 0.6401052749468026, "grad_norm": 1.3647115230560303, "learning_rate": 5.7125000000000006e-05, "loss": 0.5315, "step": 11431 }, { "epoch": 0.6401612722589316, "grad_norm": 1.1750375032424927, "learning_rate": 5.7130000000000004e-05, "loss": 0.3252, "step": 11432 }, { "epoch": 0.6402172695710606, "grad_norm": 1.1515816450119019, "learning_rate": 5.713500000000001e-05, "loss": 0.3207, "step": 11433 }, { "epoch": 0.6402732668831896, "grad_norm": 1.2982523441314697, "learning_rate": 5.7140000000000005e-05, "loss": 0.3753, "step": 11434 }, { "epoch": 0.6403292641953187, "grad_norm": 1.3154433965682983, "learning_rate": 5.7145e-05, "loss": 0.3783, "step": 11435 }, { "epoch": 0.6403852615074477, "grad_norm": 1.3863359689712524, "learning_rate": 5.715000000000001e-05, "loss": 0.4329, "step": 11436 }, { "epoch": 0.6404412588195767, "grad_norm": 1.5635638236999512, "learning_rate": 5.7155000000000004e-05, "loss": 0.3972, "step": 11437 }, { "epoch": 0.6404972561317057, "grad_norm": 1.3954685926437378, "learning_rate": 5.716e-05, "loss": 0.4937, "step": 11438 }, { "epoch": 0.6405532534438347, "grad_norm": 1.2547791004180908, "learning_rate": 5.7165e-05, "loss": 0.4415, "step": 11439 }, { "epoch": 0.6406092507559638, "grad_norm": 1.4040956497192383, "learning_rate": 5.717e-05, "loss": 0.4813, "step": 11440 }, { "epoch": 0.6406652480680928, "grad_norm": 1.5399640798568726, "learning_rate": 5.7175e-05, "loss": 0.3528, "step": 11441 }, { "epoch": 0.6407212453802218, "grad_norm": 1.1662284135818481, "learning_rate": 5.718e-05, "loss": 0.3048, "step": 11442 }, { "epoch": 0.6407772426923508, "grad_norm": 1.4365795850753784, "learning_rate": 5.7185e-05, "loss": 0.5575, "step": 11443 }, { "epoch": 0.6408332400044798, "grad_norm": 1.2736120223999023, "learning_rate": 5.719e-05, "loss": 0.4323, "step": 11444 }, { "epoch": 0.6408892373166089, "grad_norm": 1.616166591644287, "learning_rate": 5.7195e-05, "loss": 0.4377, "step": 11445 }, { "epoch": 0.6409452346287379, "grad_norm": 1.6133275032043457, "learning_rate": 5.72e-05, "loss": 0.469, "step": 11446 }, { "epoch": 0.6410012319408669, "grad_norm": 1.2576003074645996, "learning_rate": 5.7205e-05, "loss": 0.5083, "step": 11447 }, { "epoch": 0.6410572292529959, "grad_norm": 1.3431735038757324, "learning_rate": 5.721000000000001e-05, "loss": 0.4122, "step": 11448 }, { "epoch": 0.6411132265651249, "grad_norm": 1.3998430967330933, "learning_rate": 5.721500000000001e-05, "loss": 0.563, "step": 11449 }, { "epoch": 0.641169223877254, "grad_norm": 1.4633538722991943, "learning_rate": 5.7220000000000004e-05, "loss": 0.4276, "step": 11450 }, { "epoch": 0.6412252211893829, "grad_norm": 1.1596487760543823, "learning_rate": 5.722500000000001e-05, "loss": 0.3429, "step": 11451 }, { "epoch": 0.6412812185015119, "grad_norm": 1.3280785083770752, "learning_rate": 5.7230000000000006e-05, "loss": 0.4165, "step": 11452 }, { "epoch": 0.6413372158136409, "grad_norm": 1.7387182712554932, "learning_rate": 5.7235000000000003e-05, "loss": 0.3599, "step": 11453 }, { "epoch": 0.6413932131257699, "grad_norm": 1.4629881381988525, "learning_rate": 5.724000000000001e-05, "loss": 0.3769, "step": 11454 }, { "epoch": 0.6414492104378989, "grad_norm": 1.3334035873413086, "learning_rate": 5.7245000000000005e-05, "loss": 0.335, "step": 11455 }, { "epoch": 0.641505207750028, "grad_norm": 1.2458865642547607, "learning_rate": 5.725e-05, "loss": 0.43, "step": 11456 }, { "epoch": 0.641561205062157, "grad_norm": 1.3170214891433716, "learning_rate": 5.7255e-05, "loss": 0.4686, "step": 11457 }, { "epoch": 0.641617202374286, "grad_norm": 1.4576579332351685, "learning_rate": 5.7260000000000004e-05, "loss": 0.4614, "step": 11458 }, { "epoch": 0.641673199686415, "grad_norm": 1.4827557802200317, "learning_rate": 5.7265e-05, "loss": 0.5667, "step": 11459 }, { "epoch": 0.641729196998544, "grad_norm": 1.1222044229507446, "learning_rate": 5.727e-05, "loss": 0.507, "step": 11460 }, { "epoch": 0.641785194310673, "grad_norm": 1.186562418937683, "learning_rate": 5.7275e-05, "loss": 0.3673, "step": 11461 }, { "epoch": 0.6418411916228021, "grad_norm": 1.4117891788482666, "learning_rate": 5.728e-05, "loss": 0.4402, "step": 11462 }, { "epoch": 0.6418971889349311, "grad_norm": 1.2655959129333496, "learning_rate": 5.7285e-05, "loss": 0.347, "step": 11463 }, { "epoch": 0.6419531862470601, "grad_norm": 1.325129508972168, "learning_rate": 5.729e-05, "loss": 0.5245, "step": 11464 }, { "epoch": 0.6420091835591891, "grad_norm": 1.2996641397476196, "learning_rate": 5.7295e-05, "loss": 0.4196, "step": 11465 }, { "epoch": 0.6420651808713181, "grad_norm": 1.9610968828201294, "learning_rate": 5.73e-05, "loss": 0.4092, "step": 11466 }, { "epoch": 0.6421211781834472, "grad_norm": 1.3521019220352173, "learning_rate": 5.7304999999999994e-05, "loss": 0.3975, "step": 11467 }, { "epoch": 0.6421771754955762, "grad_norm": 1.2128995656967163, "learning_rate": 5.7310000000000005e-05, "loss": 0.3544, "step": 11468 }, { "epoch": 0.6422331728077052, "grad_norm": 1.2328879833221436, "learning_rate": 5.731500000000001e-05, "loss": 0.595, "step": 11469 }, { "epoch": 0.6422891701198342, "grad_norm": 1.2000852823257446, "learning_rate": 5.732000000000001e-05, "loss": 0.325, "step": 11470 }, { "epoch": 0.6423451674319632, "grad_norm": 1.6674840450286865, "learning_rate": 5.7325000000000004e-05, "loss": 0.3719, "step": 11471 }, { "epoch": 0.6424011647440923, "grad_norm": 1.1440792083740234, "learning_rate": 5.733000000000001e-05, "loss": 0.3789, "step": 11472 }, { "epoch": 0.6424571620562213, "grad_norm": 1.2247045040130615, "learning_rate": 5.7335000000000006e-05, "loss": 0.428, "step": 11473 }, { "epoch": 0.6425131593683503, "grad_norm": 1.2148150205612183, "learning_rate": 5.734e-05, "loss": 0.4804, "step": 11474 }, { "epoch": 0.6425691566804793, "grad_norm": 1.5058478116989136, "learning_rate": 5.734500000000001e-05, "loss": 0.4637, "step": 11475 }, { "epoch": 0.6426251539926083, "grad_norm": 1.2872493267059326, "learning_rate": 5.7350000000000005e-05, "loss": 0.3831, "step": 11476 }, { "epoch": 0.6426811513047374, "grad_norm": 1.0541887283325195, "learning_rate": 5.7355e-05, "loss": 0.428, "step": 11477 }, { "epoch": 0.6427371486168664, "grad_norm": 1.1574785709381104, "learning_rate": 5.736e-05, "loss": 0.313, "step": 11478 }, { "epoch": 0.6427931459289954, "grad_norm": 1.2846903800964355, "learning_rate": 5.7365000000000004e-05, "loss": 0.4837, "step": 11479 }, { "epoch": 0.6428491432411244, "grad_norm": 1.373149037361145, "learning_rate": 5.737e-05, "loss": 0.4994, "step": 11480 }, { "epoch": 0.6429051405532534, "grad_norm": 1.2504630088806152, "learning_rate": 5.7375e-05, "loss": 0.4227, "step": 11481 }, { "epoch": 0.6429611378653824, "grad_norm": 1.0435395240783691, "learning_rate": 5.738e-05, "loss": 0.3611, "step": 11482 }, { "epoch": 0.6430171351775115, "grad_norm": 1.393623948097229, "learning_rate": 5.7385e-05, "loss": 0.4542, "step": 11483 }, { "epoch": 0.6430731324896405, "grad_norm": 1.033274531364441, "learning_rate": 5.739e-05, "loss": 0.2868, "step": 11484 }, { "epoch": 0.6431291298017695, "grad_norm": 1.6692862510681152, "learning_rate": 5.7395e-05, "loss": 0.5322, "step": 11485 }, { "epoch": 0.6431851271138985, "grad_norm": 1.2050962448120117, "learning_rate": 5.74e-05, "loss": 0.3991, "step": 11486 }, { "epoch": 0.6432411244260275, "grad_norm": 1.5836702585220337, "learning_rate": 5.7405e-05, "loss": 0.3882, "step": 11487 }, { "epoch": 0.6432971217381566, "grad_norm": 1.2420437335968018, "learning_rate": 5.741000000000001e-05, "loss": 0.3548, "step": 11488 }, { "epoch": 0.6433531190502856, "grad_norm": 1.2407389879226685, "learning_rate": 5.7415000000000005e-05, "loss": 0.4549, "step": 11489 }, { "epoch": 0.6434091163624146, "grad_norm": 1.330662488937378, "learning_rate": 5.742000000000001e-05, "loss": 0.5312, "step": 11490 }, { "epoch": 0.6434651136745436, "grad_norm": 1.766160249710083, "learning_rate": 5.742500000000001e-05, "loss": 0.5731, "step": 11491 }, { "epoch": 0.6435211109866726, "grad_norm": 1.2884317636489868, "learning_rate": 5.7430000000000004e-05, "loss": 0.5438, "step": 11492 }, { "epoch": 0.6435771082988017, "grad_norm": 1.593859314918518, "learning_rate": 5.743500000000001e-05, "loss": 0.4792, "step": 11493 }, { "epoch": 0.6436331056109307, "grad_norm": 1.1537383794784546, "learning_rate": 5.7440000000000006e-05, "loss": 0.3695, "step": 11494 }, { "epoch": 0.6436891029230597, "grad_norm": 1.2668381929397583, "learning_rate": 5.7445e-05, "loss": 0.3173, "step": 11495 }, { "epoch": 0.6437451002351887, "grad_norm": 1.3317357301712036, "learning_rate": 5.745e-05, "loss": 0.6053, "step": 11496 }, { "epoch": 0.6438010975473177, "grad_norm": 1.1135140657424927, "learning_rate": 5.7455000000000005e-05, "loss": 0.3324, "step": 11497 }, { "epoch": 0.6438570948594468, "grad_norm": 1.2193281650543213, "learning_rate": 5.746e-05, "loss": 0.378, "step": 11498 }, { "epoch": 0.6439130921715758, "grad_norm": 1.1844638586044312, "learning_rate": 5.7465e-05, "loss": 0.488, "step": 11499 }, { "epoch": 0.6439690894837048, "grad_norm": 1.4308418035507202, "learning_rate": 5.7470000000000004e-05, "loss": 0.5116, "step": 11500 }, { "epoch": 0.6440250867958338, "grad_norm": 1.4272637367248535, "learning_rate": 5.7475e-05, "loss": 0.4804, "step": 11501 }, { "epoch": 0.6440810841079628, "grad_norm": 1.3765759468078613, "learning_rate": 5.748e-05, "loss": 0.4766, "step": 11502 }, { "epoch": 0.6441370814200919, "grad_norm": 1.304327368736267, "learning_rate": 5.7485e-05, "loss": 0.5071, "step": 11503 }, { "epoch": 0.6441930787322209, "grad_norm": 1.2469215393066406, "learning_rate": 5.749e-05, "loss": 0.4449, "step": 11504 }, { "epoch": 0.6442490760443499, "grad_norm": 1.2005549669265747, "learning_rate": 5.7495e-05, "loss": 0.454, "step": 11505 }, { "epoch": 0.6443050733564789, "grad_norm": 1.0989803075790405, "learning_rate": 5.7499999999999995e-05, "loss": 0.4284, "step": 11506 }, { "epoch": 0.6443610706686079, "grad_norm": 1.1446917057037354, "learning_rate": 5.7505e-05, "loss": 0.4705, "step": 11507 }, { "epoch": 0.644417067980737, "grad_norm": 1.1719930171966553, "learning_rate": 5.7509999999999997e-05, "loss": 0.3878, "step": 11508 }, { "epoch": 0.644473065292866, "grad_norm": 1.02593195438385, "learning_rate": 5.751500000000001e-05, "loss": 0.2829, "step": 11509 }, { "epoch": 0.644529062604995, "grad_norm": 1.3297885656356812, "learning_rate": 5.7520000000000005e-05, "loss": 0.611, "step": 11510 }, { "epoch": 0.644585059917124, "grad_norm": 1.445859670639038, "learning_rate": 5.752500000000001e-05, "loss": 0.4839, "step": 11511 }, { "epoch": 0.644641057229253, "grad_norm": 1.259381890296936, "learning_rate": 5.7530000000000007e-05, "loss": 0.4401, "step": 11512 }, { "epoch": 0.644697054541382, "grad_norm": 1.2717972993850708, "learning_rate": 5.7535000000000004e-05, "loss": 0.4225, "step": 11513 }, { "epoch": 0.6447530518535111, "grad_norm": 1.3548849821090698, "learning_rate": 5.754000000000001e-05, "loss": 0.3936, "step": 11514 }, { "epoch": 0.6448090491656401, "grad_norm": 1.3008915185928345, "learning_rate": 5.7545000000000006e-05, "loss": 0.4837, "step": 11515 }, { "epoch": 0.6448650464777691, "grad_norm": 1.3620336055755615, "learning_rate": 5.755e-05, "loss": 0.8122, "step": 11516 }, { "epoch": 0.6449210437898981, "grad_norm": 1.2951780557632446, "learning_rate": 5.7555e-05, "loss": 0.4222, "step": 11517 }, { "epoch": 0.6449770411020271, "grad_norm": 1.435787558555603, "learning_rate": 5.7560000000000005e-05, "loss": 0.4281, "step": 11518 }, { "epoch": 0.6450330384141562, "grad_norm": 1.430140495300293, "learning_rate": 5.7565e-05, "loss": 0.3756, "step": 11519 }, { "epoch": 0.6450890357262852, "grad_norm": 1.2110021114349365, "learning_rate": 5.757e-05, "loss": 0.4653, "step": 11520 }, { "epoch": 0.6451450330384142, "grad_norm": 2.7886364459991455, "learning_rate": 5.7575000000000004e-05, "loss": 0.4481, "step": 11521 }, { "epoch": 0.6452010303505432, "grad_norm": 1.1463032960891724, "learning_rate": 5.758e-05, "loss": 0.3699, "step": 11522 }, { "epoch": 0.6452570276626722, "grad_norm": 1.4110069274902344, "learning_rate": 5.7585e-05, "loss": 0.4299, "step": 11523 }, { "epoch": 0.6453130249748013, "grad_norm": 1.5350940227508545, "learning_rate": 5.759e-05, "loss": 0.4105, "step": 11524 }, { "epoch": 0.6453690222869303, "grad_norm": 1.5140190124511719, "learning_rate": 5.7595e-05, "loss": 0.7324, "step": 11525 }, { "epoch": 0.6454250195990593, "grad_norm": 1.4884791374206543, "learning_rate": 5.76e-05, "loss": 0.4558, "step": 11526 }, { "epoch": 0.6454810169111883, "grad_norm": 1.242226243019104, "learning_rate": 5.7604999999999995e-05, "loss": 0.5391, "step": 11527 }, { "epoch": 0.6455370142233173, "grad_norm": 1.2266403436660767, "learning_rate": 5.761e-05, "loss": 0.4052, "step": 11528 }, { "epoch": 0.6455930115354463, "grad_norm": 1.0919597148895264, "learning_rate": 5.761500000000001e-05, "loss": 0.3521, "step": 11529 }, { "epoch": 0.6456490088475754, "grad_norm": 1.1650614738464355, "learning_rate": 5.762000000000001e-05, "loss": 0.5479, "step": 11530 }, { "epoch": 0.6457050061597044, "grad_norm": 1.1579889059066772, "learning_rate": 5.7625000000000005e-05, "loss": 0.4455, "step": 11531 }, { "epoch": 0.6457610034718334, "grad_norm": 1.603376865386963, "learning_rate": 5.763000000000001e-05, "loss": 0.4191, "step": 11532 }, { "epoch": 0.6458170007839624, "grad_norm": 1.2230231761932373, "learning_rate": 5.7635000000000006e-05, "loss": 0.4944, "step": 11533 }, { "epoch": 0.6458729980960913, "grad_norm": 1.1598926782608032, "learning_rate": 5.7640000000000004e-05, "loss": 0.374, "step": 11534 }, { "epoch": 0.6459289954082204, "grad_norm": 2.9758739471435547, "learning_rate": 5.7645e-05, "loss": 0.4522, "step": 11535 }, { "epoch": 0.6459849927203494, "grad_norm": 1.1005445718765259, "learning_rate": 5.7650000000000005e-05, "loss": 0.3054, "step": 11536 }, { "epoch": 0.6460409900324784, "grad_norm": 1.3561359643936157, "learning_rate": 5.7655e-05, "loss": 0.4538, "step": 11537 }, { "epoch": 0.6460969873446074, "grad_norm": 1.477579951286316, "learning_rate": 5.766e-05, "loss": 0.3823, "step": 11538 }, { "epoch": 0.6461529846567364, "grad_norm": 1.068372130393982, "learning_rate": 5.7665000000000004e-05, "loss": 0.2992, "step": 11539 }, { "epoch": 0.6462089819688654, "grad_norm": 1.4765825271606445, "learning_rate": 5.767e-05, "loss": 0.5325, "step": 11540 }, { "epoch": 0.6462649792809945, "grad_norm": 1.3367769718170166, "learning_rate": 5.7675e-05, "loss": 0.3413, "step": 11541 }, { "epoch": 0.6463209765931235, "grad_norm": 1.3945248126983643, "learning_rate": 5.7680000000000003e-05, "loss": 0.4743, "step": 11542 }, { "epoch": 0.6463769739052525, "grad_norm": 1.352189540863037, "learning_rate": 5.7685e-05, "loss": 0.5408, "step": 11543 }, { "epoch": 0.6464329712173815, "grad_norm": 1.473279595375061, "learning_rate": 5.769e-05, "loss": 0.5688, "step": 11544 }, { "epoch": 0.6464889685295105, "grad_norm": 1.2186814546585083, "learning_rate": 5.7695e-05, "loss": 0.3343, "step": 11545 }, { "epoch": 0.6465449658416396, "grad_norm": 1.340122103691101, "learning_rate": 5.77e-05, "loss": 0.4962, "step": 11546 }, { "epoch": 0.6466009631537686, "grad_norm": 1.6914175748825073, "learning_rate": 5.7705e-05, "loss": 0.5216, "step": 11547 }, { "epoch": 0.6466569604658976, "grad_norm": 1.2420464754104614, "learning_rate": 5.7709999999999995e-05, "loss": 0.4441, "step": 11548 }, { "epoch": 0.6467129577780266, "grad_norm": 1.2060019969940186, "learning_rate": 5.7715000000000006e-05, "loss": 0.3787, "step": 11549 }, { "epoch": 0.6467689550901556, "grad_norm": 1.010940432548523, "learning_rate": 5.772000000000001e-05, "loss": 0.3713, "step": 11550 }, { "epoch": 0.6468249524022847, "grad_norm": 1.2166849374771118, "learning_rate": 5.772500000000001e-05, "loss": 0.3891, "step": 11551 }, { "epoch": 0.6468809497144137, "grad_norm": 1.2105660438537598, "learning_rate": 5.7730000000000005e-05, "loss": 0.2847, "step": 11552 }, { "epoch": 0.6469369470265427, "grad_norm": 1.383593201637268, "learning_rate": 5.773500000000001e-05, "loss": 0.5505, "step": 11553 }, { "epoch": 0.6469929443386717, "grad_norm": 1.187862515449524, "learning_rate": 5.7740000000000006e-05, "loss": 0.3641, "step": 11554 }, { "epoch": 0.6470489416508007, "grad_norm": 1.1482435464859009, "learning_rate": 5.7745000000000004e-05, "loss": 0.5302, "step": 11555 }, { "epoch": 0.6471049389629298, "grad_norm": 1.2438193559646606, "learning_rate": 5.775e-05, "loss": 0.4347, "step": 11556 }, { "epoch": 0.6471609362750588, "grad_norm": 1.4243003129959106, "learning_rate": 5.7755000000000005e-05, "loss": 0.4023, "step": 11557 }, { "epoch": 0.6472169335871878, "grad_norm": 1.2398462295532227, "learning_rate": 5.776e-05, "loss": 0.4243, "step": 11558 }, { "epoch": 0.6472729308993168, "grad_norm": 1.0839929580688477, "learning_rate": 5.7765e-05, "loss": 0.2921, "step": 11559 }, { "epoch": 0.6473289282114458, "grad_norm": 1.2982513904571533, "learning_rate": 5.7770000000000004e-05, "loss": 0.3389, "step": 11560 }, { "epoch": 0.6473849255235749, "grad_norm": 1.5267226696014404, "learning_rate": 5.7775e-05, "loss": 0.4404, "step": 11561 }, { "epoch": 0.6474409228357039, "grad_norm": 1.4346492290496826, "learning_rate": 5.778e-05, "loss": 0.4076, "step": 11562 }, { "epoch": 0.6474969201478329, "grad_norm": 1.395491123199463, "learning_rate": 5.7785e-05, "loss": 0.4529, "step": 11563 }, { "epoch": 0.6475529174599619, "grad_norm": 1.6427137851715088, "learning_rate": 5.779e-05, "loss": 0.5369, "step": 11564 }, { "epoch": 0.6476089147720909, "grad_norm": 1.3966608047485352, "learning_rate": 5.7795e-05, "loss": 0.3765, "step": 11565 }, { "epoch": 0.64766491208422, "grad_norm": 1.2554833889007568, "learning_rate": 5.7799999999999995e-05, "loss": 0.5485, "step": 11566 }, { "epoch": 0.647720909396349, "grad_norm": 1.5577747821807861, "learning_rate": 5.7805e-05, "loss": 0.7601, "step": 11567 }, { "epoch": 0.647776906708478, "grad_norm": 1.2530993223190308, "learning_rate": 5.781e-05, "loss": 0.3431, "step": 11568 }, { "epoch": 0.647832904020607, "grad_norm": 1.155864953994751, "learning_rate": 5.781500000000001e-05, "loss": 0.468, "step": 11569 }, { "epoch": 0.647888901332736, "grad_norm": 1.0874488353729248, "learning_rate": 5.7820000000000005e-05, "loss": 0.4228, "step": 11570 }, { "epoch": 0.647944898644865, "grad_norm": 1.3322001695632935, "learning_rate": 5.782500000000001e-05, "loss": 0.5208, "step": 11571 }, { "epoch": 0.6480008959569941, "grad_norm": 1.3388378620147705, "learning_rate": 5.783000000000001e-05, "loss": 0.4283, "step": 11572 }, { "epoch": 0.6480568932691231, "grad_norm": 1.1866655349731445, "learning_rate": 5.7835000000000004e-05, "loss": 0.3104, "step": 11573 }, { "epoch": 0.6481128905812521, "grad_norm": 1.391587495803833, "learning_rate": 5.784000000000001e-05, "loss": 0.6146, "step": 11574 }, { "epoch": 0.6481688878933811, "grad_norm": 1.344538688659668, "learning_rate": 5.7845000000000006e-05, "loss": 0.3789, "step": 11575 }, { "epoch": 0.6482248852055101, "grad_norm": 1.2578099966049194, "learning_rate": 5.7850000000000003e-05, "loss": 0.5257, "step": 11576 }, { "epoch": 0.6482808825176392, "grad_norm": 1.385211706161499, "learning_rate": 5.7855e-05, "loss": 0.3098, "step": 11577 }, { "epoch": 0.6483368798297682, "grad_norm": 1.5441373586654663, "learning_rate": 5.7860000000000005e-05, "loss": 0.5056, "step": 11578 }, { "epoch": 0.6483928771418972, "grad_norm": 1.4079526662826538, "learning_rate": 5.7865e-05, "loss": 0.3969, "step": 11579 }, { "epoch": 0.6484488744540262, "grad_norm": 1.1507244110107422, "learning_rate": 5.787e-05, "loss": 0.4326, "step": 11580 }, { "epoch": 0.6485048717661552, "grad_norm": 1.1137008666992188, "learning_rate": 5.7875000000000004e-05, "loss": 0.4333, "step": 11581 }, { "epoch": 0.6485608690782843, "grad_norm": 1.256856083869934, "learning_rate": 5.788e-05, "loss": 0.495, "step": 11582 }, { "epoch": 0.6486168663904133, "grad_norm": 1.2716187238693237, "learning_rate": 5.7885e-05, "loss": 0.3649, "step": 11583 }, { "epoch": 0.6486728637025423, "grad_norm": 1.5647202730178833, "learning_rate": 5.789e-05, "loss": 0.5961, "step": 11584 }, { "epoch": 0.6487288610146713, "grad_norm": 1.144774317741394, "learning_rate": 5.7895e-05, "loss": 0.3296, "step": 11585 }, { "epoch": 0.6487848583268003, "grad_norm": 1.1031969785690308, "learning_rate": 5.79e-05, "loss": 0.3849, "step": 11586 }, { "epoch": 0.6488408556389293, "grad_norm": 1.2824819087982178, "learning_rate": 5.7904999999999995e-05, "loss": 0.3177, "step": 11587 }, { "epoch": 0.6488968529510584, "grad_norm": 1.555246353149414, "learning_rate": 5.791e-05, "loss": 0.4973, "step": 11588 }, { "epoch": 0.6489528502631874, "grad_norm": 1.6722393035888672, "learning_rate": 5.791500000000001e-05, "loss": 0.4526, "step": 11589 }, { "epoch": 0.6490088475753164, "grad_norm": 1.4631997346878052, "learning_rate": 5.792000000000001e-05, "loss": 0.4488, "step": 11590 }, { "epoch": 0.6490648448874454, "grad_norm": 1.3110997676849365, "learning_rate": 5.7925000000000005e-05, "loss": 0.434, "step": 11591 }, { "epoch": 0.6491208421995744, "grad_norm": 1.4031312465667725, "learning_rate": 5.793000000000001e-05, "loss": 0.3872, "step": 11592 }, { "epoch": 0.6491768395117035, "grad_norm": 2.0797858238220215, "learning_rate": 5.793500000000001e-05, "loss": 0.3397, "step": 11593 }, { "epoch": 0.6492328368238325, "grad_norm": 1.397301435470581, "learning_rate": 5.7940000000000004e-05, "loss": 0.4672, "step": 11594 }, { "epoch": 0.6492888341359615, "grad_norm": 1.1421433687210083, "learning_rate": 5.7945e-05, "loss": 0.3781, "step": 11595 }, { "epoch": 0.6493448314480905, "grad_norm": 1.278132438659668, "learning_rate": 5.7950000000000006e-05, "loss": 0.4038, "step": 11596 }, { "epoch": 0.6494008287602195, "grad_norm": 1.68009614944458, "learning_rate": 5.7955e-05, "loss": 0.433, "step": 11597 }, { "epoch": 0.6494568260723486, "grad_norm": 1.222383737564087, "learning_rate": 5.796e-05, "loss": 0.4574, "step": 11598 }, { "epoch": 0.6495128233844776, "grad_norm": 1.118330955505371, "learning_rate": 5.7965000000000005e-05, "loss": 0.3929, "step": 11599 }, { "epoch": 0.6495688206966066, "grad_norm": 1.2085684537887573, "learning_rate": 5.797e-05, "loss": 0.5143, "step": 11600 }, { "epoch": 0.6496248180087356, "grad_norm": 1.231821894645691, "learning_rate": 5.7975e-05, "loss": 0.4428, "step": 11601 }, { "epoch": 0.6496808153208646, "grad_norm": 1.723815679550171, "learning_rate": 5.7980000000000004e-05, "loss": 0.4408, "step": 11602 }, { "epoch": 0.6497368126329937, "grad_norm": 1.2709662914276123, "learning_rate": 5.7985e-05, "loss": 0.3881, "step": 11603 }, { "epoch": 0.6497928099451227, "grad_norm": 1.2329301834106445, "learning_rate": 5.799e-05, "loss": 0.4592, "step": 11604 }, { "epoch": 0.6498488072572517, "grad_norm": 1.4668264389038086, "learning_rate": 5.7994999999999996e-05, "loss": 0.5234, "step": 11605 }, { "epoch": 0.6499048045693807, "grad_norm": 1.1704156398773193, "learning_rate": 5.8e-05, "loss": 0.412, "step": 11606 }, { "epoch": 0.6499608018815097, "grad_norm": 1.2018678188323975, "learning_rate": 5.8005e-05, "loss": 0.4805, "step": 11607 }, { "epoch": 0.6500167991936388, "grad_norm": 1.0939933061599731, "learning_rate": 5.8009999999999995e-05, "loss": 0.4825, "step": 11608 }, { "epoch": 0.6500727965057678, "grad_norm": 1.0713413953781128, "learning_rate": 5.8015000000000006e-05, "loss": 0.3814, "step": 11609 }, { "epoch": 0.6501287938178968, "grad_norm": 1.1035113334655762, "learning_rate": 5.802000000000001e-05, "loss": 0.4044, "step": 11610 }, { "epoch": 0.6501847911300258, "grad_norm": 1.2724945545196533, "learning_rate": 5.802500000000001e-05, "loss": 0.4172, "step": 11611 }, { "epoch": 0.6502407884421548, "grad_norm": 15.259182929992676, "learning_rate": 5.8030000000000005e-05, "loss": 0.5308, "step": 11612 }, { "epoch": 0.6502967857542838, "grad_norm": 1.1987518072128296, "learning_rate": 5.803500000000001e-05, "loss": 0.5636, "step": 11613 }, { "epoch": 0.6503527830664129, "grad_norm": 1.400138258934021, "learning_rate": 5.804000000000001e-05, "loss": 0.5606, "step": 11614 }, { "epoch": 0.6504087803785419, "grad_norm": 1.3241719007492065, "learning_rate": 5.8045000000000004e-05, "loss": 0.4243, "step": 11615 }, { "epoch": 0.6504647776906708, "grad_norm": 1.204487919807434, "learning_rate": 5.805e-05, "loss": 0.4068, "step": 11616 }, { "epoch": 0.6505207750027998, "grad_norm": 1.2731162309646606, "learning_rate": 5.8055000000000006e-05, "loss": 0.404, "step": 11617 }, { "epoch": 0.6505767723149288, "grad_norm": 1.1496412754058838, "learning_rate": 5.806e-05, "loss": 0.413, "step": 11618 }, { "epoch": 0.6506327696270578, "grad_norm": 1.4973740577697754, "learning_rate": 5.8065e-05, "loss": 0.3583, "step": 11619 }, { "epoch": 0.6506887669391869, "grad_norm": 1.2994894981384277, "learning_rate": 5.8070000000000005e-05, "loss": 0.3506, "step": 11620 }, { "epoch": 0.6507447642513159, "grad_norm": 1.2027066946029663, "learning_rate": 5.8075e-05, "loss": 0.4193, "step": 11621 }, { "epoch": 0.6508007615634449, "grad_norm": 1.14454984664917, "learning_rate": 5.808e-05, "loss": 0.4292, "step": 11622 }, { "epoch": 0.6508567588755739, "grad_norm": 1.2749015092849731, "learning_rate": 5.8085000000000004e-05, "loss": 0.3946, "step": 11623 }, { "epoch": 0.650912756187703, "grad_norm": 1.0972893238067627, "learning_rate": 5.809e-05, "loss": 0.3939, "step": 11624 }, { "epoch": 0.650968753499832, "grad_norm": 1.0439338684082031, "learning_rate": 5.8095e-05, "loss": 0.4078, "step": 11625 }, { "epoch": 0.651024750811961, "grad_norm": 1.4483518600463867, "learning_rate": 5.8099999999999996e-05, "loss": 0.3705, "step": 11626 }, { "epoch": 0.65108074812409, "grad_norm": 1.3294535875320435, "learning_rate": 5.8105e-05, "loss": 0.3375, "step": 11627 }, { "epoch": 0.651136745436219, "grad_norm": 1.417929768562317, "learning_rate": 5.811e-05, "loss": 0.5627, "step": 11628 }, { "epoch": 0.651192742748348, "grad_norm": 1.3771790266036987, "learning_rate": 5.811500000000001e-05, "loss": 0.7373, "step": 11629 }, { "epoch": 0.6512487400604771, "grad_norm": 1.1689010858535767, "learning_rate": 5.8120000000000006e-05, "loss": 0.3993, "step": 11630 }, { "epoch": 0.6513047373726061, "grad_norm": 1.2407886981964111, "learning_rate": 5.812500000000001e-05, "loss": 0.4528, "step": 11631 }, { "epoch": 0.6513607346847351, "grad_norm": 1.2653611898422241, "learning_rate": 5.813000000000001e-05, "loss": 0.3802, "step": 11632 }, { "epoch": 0.6514167319968641, "grad_norm": 1.2242566347122192, "learning_rate": 5.8135000000000005e-05, "loss": 0.4715, "step": 11633 }, { "epoch": 0.6514727293089931, "grad_norm": 1.299102783203125, "learning_rate": 5.814e-05, "loss": 0.4909, "step": 11634 }, { "epoch": 0.6515287266211222, "grad_norm": 1.1339411735534668, "learning_rate": 5.8145000000000007e-05, "loss": 0.4823, "step": 11635 }, { "epoch": 0.6515847239332512, "grad_norm": 1.212303638458252, "learning_rate": 5.8150000000000004e-05, "loss": 0.4722, "step": 11636 }, { "epoch": 0.6516407212453802, "grad_norm": 1.3442906141281128, "learning_rate": 5.8155e-05, "loss": 0.4407, "step": 11637 }, { "epoch": 0.6516967185575092, "grad_norm": 5.252134323120117, "learning_rate": 5.8160000000000006e-05, "loss": 0.4684, "step": 11638 }, { "epoch": 0.6517527158696382, "grad_norm": 1.1816720962524414, "learning_rate": 5.8165e-05, "loss": 0.375, "step": 11639 }, { "epoch": 0.6518087131817673, "grad_norm": 1.3227847814559937, "learning_rate": 5.817e-05, "loss": 0.6434, "step": 11640 }, { "epoch": 0.6518647104938963, "grad_norm": 1.3233706951141357, "learning_rate": 5.8175000000000005e-05, "loss": 0.6012, "step": 11641 }, { "epoch": 0.6519207078060253, "grad_norm": 1.1149864196777344, "learning_rate": 5.818e-05, "loss": 0.4763, "step": 11642 }, { "epoch": 0.6519767051181543, "grad_norm": 1.2007625102996826, "learning_rate": 5.8185e-05, "loss": 0.4474, "step": 11643 }, { "epoch": 0.6520327024302833, "grad_norm": 1.3394529819488525, "learning_rate": 5.819e-05, "loss": 0.5211, "step": 11644 }, { "epoch": 0.6520886997424123, "grad_norm": 1.2217689752578735, "learning_rate": 5.8195e-05, "loss": 0.4672, "step": 11645 }, { "epoch": 0.6521446970545414, "grad_norm": 1.3266834020614624, "learning_rate": 5.82e-05, "loss": 0.4283, "step": 11646 }, { "epoch": 0.6522006943666704, "grad_norm": 1.2351205348968506, "learning_rate": 5.8204999999999996e-05, "loss": 0.5023, "step": 11647 }, { "epoch": 0.6522566916787994, "grad_norm": 1.3728914260864258, "learning_rate": 5.821e-05, "loss": 0.4334, "step": 11648 }, { "epoch": 0.6523126889909284, "grad_norm": 1.0195527076721191, "learning_rate": 5.8215e-05, "loss": 0.3195, "step": 11649 }, { "epoch": 0.6523686863030574, "grad_norm": 1.3558344841003418, "learning_rate": 5.822000000000001e-05, "loss": 0.3579, "step": 11650 }, { "epoch": 0.6524246836151865, "grad_norm": 1.289319634437561, "learning_rate": 5.8225000000000006e-05, "loss": 0.4538, "step": 11651 }, { "epoch": 0.6524806809273155, "grad_norm": 1.212921142578125, "learning_rate": 5.823000000000001e-05, "loss": 0.5243, "step": 11652 }, { "epoch": 0.6525366782394445, "grad_norm": 1.3482457399368286, "learning_rate": 5.823500000000001e-05, "loss": 0.4709, "step": 11653 }, { "epoch": 0.6525926755515735, "grad_norm": 1.23513662815094, "learning_rate": 5.8240000000000005e-05, "loss": 0.3948, "step": 11654 }, { "epoch": 0.6526486728637025, "grad_norm": 1.2374106645584106, "learning_rate": 5.8245e-05, "loss": 0.4464, "step": 11655 }, { "epoch": 0.6527046701758316, "grad_norm": 1.2403322458267212, "learning_rate": 5.8250000000000006e-05, "loss": 0.4029, "step": 11656 }, { "epoch": 0.6527606674879606, "grad_norm": 1.2212584018707275, "learning_rate": 5.8255000000000004e-05, "loss": 0.3104, "step": 11657 }, { "epoch": 0.6528166648000896, "grad_norm": 1.8834686279296875, "learning_rate": 5.826e-05, "loss": 0.3212, "step": 11658 }, { "epoch": 0.6528726621122186, "grad_norm": 1.3160979747772217, "learning_rate": 5.8265000000000005e-05, "loss": 0.4347, "step": 11659 }, { "epoch": 0.6529286594243476, "grad_norm": 1.1711444854736328, "learning_rate": 5.827e-05, "loss": 0.3997, "step": 11660 }, { "epoch": 0.6529846567364767, "grad_norm": 1.2330718040466309, "learning_rate": 5.8275e-05, "loss": 0.4108, "step": 11661 }, { "epoch": 0.6530406540486057, "grad_norm": 1.1606751680374146, "learning_rate": 5.8280000000000004e-05, "loss": 0.3931, "step": 11662 }, { "epoch": 0.6530966513607347, "grad_norm": 1.2084978818893433, "learning_rate": 5.8285e-05, "loss": 0.3705, "step": 11663 }, { "epoch": 0.6531526486728637, "grad_norm": 1.577621340751648, "learning_rate": 5.829e-05, "loss": 0.5828, "step": 11664 }, { "epoch": 0.6532086459849927, "grad_norm": 1.4541676044464111, "learning_rate": 5.8295e-05, "loss": 0.5594, "step": 11665 }, { "epoch": 0.6532646432971217, "grad_norm": 1.2026711702346802, "learning_rate": 5.83e-05, "loss": 0.392, "step": 11666 }, { "epoch": 0.6533206406092508, "grad_norm": 1.5014997720718384, "learning_rate": 5.8305e-05, "loss": 0.4563, "step": 11667 }, { "epoch": 0.6533766379213798, "grad_norm": 1.4592264890670776, "learning_rate": 5.8309999999999996e-05, "loss": 0.4314, "step": 11668 }, { "epoch": 0.6534326352335088, "grad_norm": 1.177983045578003, "learning_rate": 5.8315e-05, "loss": 0.4513, "step": 11669 }, { "epoch": 0.6534886325456378, "grad_norm": 1.5815566778182983, "learning_rate": 5.832000000000001e-05, "loss": 0.4594, "step": 11670 }, { "epoch": 0.6535446298577668, "grad_norm": 1.4919463396072388, "learning_rate": 5.832500000000001e-05, "loss": 0.5935, "step": 11671 }, { "epoch": 0.6536006271698959, "grad_norm": 1.1126666069030762, "learning_rate": 5.8330000000000006e-05, "loss": 0.5506, "step": 11672 }, { "epoch": 0.6536566244820249, "grad_norm": 1.260085105895996, "learning_rate": 5.8335e-05, "loss": 0.5312, "step": 11673 }, { "epoch": 0.6537126217941539, "grad_norm": 1.4133983850479126, "learning_rate": 5.834000000000001e-05, "loss": 0.459, "step": 11674 }, { "epoch": 0.6537686191062829, "grad_norm": 1.1627137660980225, "learning_rate": 5.8345000000000005e-05, "loss": 0.4538, "step": 11675 }, { "epoch": 0.6538246164184119, "grad_norm": 1.4787089824676514, "learning_rate": 5.835e-05, "loss": 0.4798, "step": 11676 }, { "epoch": 0.653880613730541, "grad_norm": 1.3909518718719482, "learning_rate": 5.8355000000000006e-05, "loss": 0.4618, "step": 11677 }, { "epoch": 0.65393661104267, "grad_norm": 1.359830617904663, "learning_rate": 5.8360000000000004e-05, "loss": 0.4567, "step": 11678 }, { "epoch": 0.653992608354799, "grad_norm": 1.7665683031082153, "learning_rate": 5.8365e-05, "loss": 0.5119, "step": 11679 }, { "epoch": 0.654048605666928, "grad_norm": 1.4605189561843872, "learning_rate": 5.8370000000000005e-05, "loss": 0.6055, "step": 11680 }, { "epoch": 0.654104602979057, "grad_norm": 1.4210529327392578, "learning_rate": 5.8375e-05, "loss": 0.3897, "step": 11681 }, { "epoch": 0.6541606002911861, "grad_norm": 0.9742177128791809, "learning_rate": 5.838e-05, "loss": 0.2776, "step": 11682 }, { "epoch": 0.6542165976033151, "grad_norm": 1.2380565404891968, "learning_rate": 5.8385e-05, "loss": 0.4494, "step": 11683 }, { "epoch": 0.6542725949154441, "grad_norm": 1.3207238912582397, "learning_rate": 5.839e-05, "loss": 0.5514, "step": 11684 }, { "epoch": 0.6543285922275731, "grad_norm": 1.5492300987243652, "learning_rate": 5.8395e-05, "loss": 0.3392, "step": 11685 }, { "epoch": 0.6543845895397021, "grad_norm": 1.3901665210723877, "learning_rate": 5.8399999999999997e-05, "loss": 0.4178, "step": 11686 }, { "epoch": 0.6544405868518312, "grad_norm": 1.3766168355941772, "learning_rate": 5.8405e-05, "loss": 0.4283, "step": 11687 }, { "epoch": 0.6544965841639602, "grad_norm": 1.496849536895752, "learning_rate": 5.841e-05, "loss": 0.5418, "step": 11688 }, { "epoch": 0.6545525814760892, "grad_norm": 1.4288110733032227, "learning_rate": 5.8414999999999996e-05, "loss": 0.4658, "step": 11689 }, { "epoch": 0.6546085787882182, "grad_norm": 1.2487083673477173, "learning_rate": 5.8420000000000006e-05, "loss": 0.4451, "step": 11690 }, { "epoch": 0.6546645761003472, "grad_norm": 1.3596370220184326, "learning_rate": 5.842500000000001e-05, "loss": 0.3989, "step": 11691 }, { "epoch": 0.6547205734124762, "grad_norm": 1.147562026977539, "learning_rate": 5.843000000000001e-05, "loss": 0.4082, "step": 11692 }, { "epoch": 0.6547765707246053, "grad_norm": 1.269890308380127, "learning_rate": 5.8435000000000005e-05, "loss": 0.3645, "step": 11693 }, { "epoch": 0.6548325680367343, "grad_norm": 1.1032383441925049, "learning_rate": 5.844e-05, "loss": 0.3172, "step": 11694 }, { "epoch": 0.6548885653488633, "grad_norm": 1.4504663944244385, "learning_rate": 5.844500000000001e-05, "loss": 0.4008, "step": 11695 }, { "epoch": 0.6549445626609923, "grad_norm": 1.4644497632980347, "learning_rate": 5.8450000000000005e-05, "loss": 0.465, "step": 11696 }, { "epoch": 0.6550005599731213, "grad_norm": 1.419569492340088, "learning_rate": 5.8455e-05, "loss": 0.4715, "step": 11697 }, { "epoch": 0.6550565572852504, "grad_norm": 1.3105918169021606, "learning_rate": 5.8460000000000006e-05, "loss": 0.412, "step": 11698 }, { "epoch": 0.6551125545973793, "grad_norm": 1.656390905380249, "learning_rate": 5.8465000000000004e-05, "loss": 0.6538, "step": 11699 }, { "epoch": 0.6551685519095083, "grad_norm": 1.3274263143539429, "learning_rate": 5.847e-05, "loss": 0.4924, "step": 11700 }, { "epoch": 0.6552245492216373, "grad_norm": 1.4678207635879517, "learning_rate": 5.8475000000000005e-05, "loss": 0.6668, "step": 11701 }, { "epoch": 0.6552805465337663, "grad_norm": 1.424195647239685, "learning_rate": 5.848e-05, "loss": 0.562, "step": 11702 }, { "epoch": 0.6553365438458953, "grad_norm": 1.2749199867248535, "learning_rate": 5.8485e-05, "loss": 0.4165, "step": 11703 }, { "epoch": 0.6553925411580244, "grad_norm": 1.5304670333862305, "learning_rate": 5.849e-05, "loss": 0.5117, "step": 11704 }, { "epoch": 0.6554485384701534, "grad_norm": 1.12393319606781, "learning_rate": 5.8495e-05, "loss": 0.4185, "step": 11705 }, { "epoch": 0.6555045357822824, "grad_norm": 1.443090796470642, "learning_rate": 5.85e-05, "loss": 0.3894, "step": 11706 }, { "epoch": 0.6555605330944114, "grad_norm": 1.3098422288894653, "learning_rate": 5.8504999999999996e-05, "loss": 0.499, "step": 11707 }, { "epoch": 0.6556165304065404, "grad_norm": 1.2031232118606567, "learning_rate": 5.851e-05, "loss": 0.3625, "step": 11708 }, { "epoch": 0.6556725277186695, "grad_norm": 1.471382737159729, "learning_rate": 5.8515e-05, "loss": 0.3973, "step": 11709 }, { "epoch": 0.6557285250307985, "grad_norm": 1.3180073499679565, "learning_rate": 5.852000000000001e-05, "loss": 0.4428, "step": 11710 }, { "epoch": 0.6557845223429275, "grad_norm": 1.5244274139404297, "learning_rate": 5.8525000000000006e-05, "loss": 0.5903, "step": 11711 }, { "epoch": 0.6558405196550565, "grad_norm": 1.1666597127914429, "learning_rate": 5.8530000000000004e-05, "loss": 0.363, "step": 11712 }, { "epoch": 0.6558965169671855, "grad_norm": 1.1757006645202637, "learning_rate": 5.853500000000001e-05, "loss": 0.41, "step": 11713 }, { "epoch": 0.6559525142793146, "grad_norm": 1.4187425374984741, "learning_rate": 5.8540000000000005e-05, "loss": 0.3809, "step": 11714 }, { "epoch": 0.6560085115914436, "grad_norm": 1.2364821434020996, "learning_rate": 5.8545e-05, "loss": 0.3692, "step": 11715 }, { "epoch": 0.6560645089035726, "grad_norm": 1.5956910848617554, "learning_rate": 5.855000000000001e-05, "loss": 0.4042, "step": 11716 }, { "epoch": 0.6561205062157016, "grad_norm": 1.5447666645050049, "learning_rate": 5.8555000000000004e-05, "loss": 0.5662, "step": 11717 }, { "epoch": 0.6561765035278306, "grad_norm": 1.403235912322998, "learning_rate": 5.856e-05, "loss": 0.5386, "step": 11718 }, { "epoch": 0.6562325008399597, "grad_norm": 1.3280143737792969, "learning_rate": 5.8565000000000006e-05, "loss": 0.3795, "step": 11719 }, { "epoch": 0.6562884981520887, "grad_norm": 1.3676313161849976, "learning_rate": 5.857e-05, "loss": 0.4279, "step": 11720 }, { "epoch": 0.6563444954642177, "grad_norm": 1.336786150932312, "learning_rate": 5.8575e-05, "loss": 0.4123, "step": 11721 }, { "epoch": 0.6564004927763467, "grad_norm": 1.216882348060608, "learning_rate": 5.858e-05, "loss": 0.4441, "step": 11722 }, { "epoch": 0.6564564900884757, "grad_norm": 12.355265617370605, "learning_rate": 5.8585e-05, "loss": 0.4049, "step": 11723 }, { "epoch": 0.6565124874006047, "grad_norm": 1.1024394035339355, "learning_rate": 5.859e-05, "loss": 0.4176, "step": 11724 }, { "epoch": 0.6565684847127338, "grad_norm": 1.2378270626068115, "learning_rate": 5.8595e-05, "loss": 0.3146, "step": 11725 }, { "epoch": 0.6566244820248628, "grad_norm": 1.1719168424606323, "learning_rate": 5.86e-05, "loss": 0.3656, "step": 11726 }, { "epoch": 0.6566804793369918, "grad_norm": 1.1853828430175781, "learning_rate": 5.8605e-05, "loss": 0.3465, "step": 11727 }, { "epoch": 0.6567364766491208, "grad_norm": 1.3977810144424438, "learning_rate": 5.8609999999999996e-05, "loss": 0.436, "step": 11728 }, { "epoch": 0.6567924739612498, "grad_norm": 1.3636177778244019, "learning_rate": 5.8615e-05, "loss": 0.4208, "step": 11729 }, { "epoch": 0.6568484712733789, "grad_norm": 1.3164868354797363, "learning_rate": 5.862000000000001e-05, "loss": 0.4447, "step": 11730 }, { "epoch": 0.6569044685855079, "grad_norm": 1.3972855806350708, "learning_rate": 5.862500000000001e-05, "loss": 0.4463, "step": 11731 }, { "epoch": 0.6569604658976369, "grad_norm": 1.3859697580337524, "learning_rate": 5.8630000000000006e-05, "loss": 0.5974, "step": 11732 }, { "epoch": 0.6570164632097659, "grad_norm": 1.2899953126907349, "learning_rate": 5.8635000000000004e-05, "loss": 0.3652, "step": 11733 }, { "epoch": 0.6570724605218949, "grad_norm": 1.3799649477005005, "learning_rate": 5.864000000000001e-05, "loss": 0.4831, "step": 11734 }, { "epoch": 0.657128457834024, "grad_norm": 1.995896339416504, "learning_rate": 5.8645000000000005e-05, "loss": 0.5063, "step": 11735 }, { "epoch": 0.657184455146153, "grad_norm": 1.4469842910766602, "learning_rate": 5.865e-05, "loss": 0.4502, "step": 11736 }, { "epoch": 0.657240452458282, "grad_norm": 1.168785572052002, "learning_rate": 5.865500000000001e-05, "loss": 0.3616, "step": 11737 }, { "epoch": 0.657296449770411, "grad_norm": 1.3240330219268799, "learning_rate": 5.8660000000000004e-05, "loss": 0.4383, "step": 11738 }, { "epoch": 0.65735244708254, "grad_norm": 1.1602659225463867, "learning_rate": 5.8665e-05, "loss": 0.3743, "step": 11739 }, { "epoch": 0.6574084443946691, "grad_norm": 1.331207513809204, "learning_rate": 5.8670000000000006e-05, "loss": 0.3482, "step": 11740 }, { "epoch": 0.6574644417067981, "grad_norm": 1.1242907047271729, "learning_rate": 5.8675e-05, "loss": 0.4016, "step": 11741 }, { "epoch": 0.6575204390189271, "grad_norm": 1.4794058799743652, "learning_rate": 5.868e-05, "loss": 0.6036, "step": 11742 }, { "epoch": 0.6575764363310561, "grad_norm": 1.4201059341430664, "learning_rate": 5.8685e-05, "loss": 0.5238, "step": 11743 }, { "epoch": 0.6576324336431851, "grad_norm": 1.3712326288223267, "learning_rate": 5.869e-05, "loss": 0.4391, "step": 11744 }, { "epoch": 0.6576884309553142, "grad_norm": 1.6325640678405762, "learning_rate": 5.8695e-05, "loss": 0.4827, "step": 11745 }, { "epoch": 0.6577444282674432, "grad_norm": 1.094815969467163, "learning_rate": 5.87e-05, "loss": 0.4478, "step": 11746 }, { "epoch": 0.6578004255795722, "grad_norm": 1.4926972389221191, "learning_rate": 5.8705e-05, "loss": 0.4445, "step": 11747 }, { "epoch": 0.6578564228917012, "grad_norm": 1.0443830490112305, "learning_rate": 5.871e-05, "loss": 0.3351, "step": 11748 }, { "epoch": 0.6579124202038302, "grad_norm": 1.129218339920044, "learning_rate": 5.8714999999999996e-05, "loss": 0.3688, "step": 11749 }, { "epoch": 0.6579684175159592, "grad_norm": 1.281672477722168, "learning_rate": 5.872000000000001e-05, "loss": 0.5401, "step": 11750 }, { "epoch": 0.6580244148280883, "grad_norm": 1.2422080039978027, "learning_rate": 5.8725000000000004e-05, "loss": 0.3815, "step": 11751 }, { "epoch": 0.6580804121402173, "grad_norm": 1.2020775079727173, "learning_rate": 5.873000000000001e-05, "loss": 0.376, "step": 11752 }, { "epoch": 0.6581364094523463, "grad_norm": 1.1493568420410156, "learning_rate": 5.8735000000000006e-05, "loss": 0.4852, "step": 11753 }, { "epoch": 0.6581924067644753, "grad_norm": 1.532774806022644, "learning_rate": 5.8740000000000003e-05, "loss": 0.3621, "step": 11754 }, { "epoch": 0.6582484040766043, "grad_norm": 1.2717229127883911, "learning_rate": 5.874500000000001e-05, "loss": 0.4511, "step": 11755 }, { "epoch": 0.6583044013887334, "grad_norm": 1.4544858932495117, "learning_rate": 5.8750000000000005e-05, "loss": 0.5239, "step": 11756 }, { "epoch": 0.6583603987008624, "grad_norm": 1.1929380893707275, "learning_rate": 5.8755e-05, "loss": 0.4486, "step": 11757 }, { "epoch": 0.6584163960129914, "grad_norm": 1.1866029500961304, "learning_rate": 5.876000000000001e-05, "loss": 0.3347, "step": 11758 }, { "epoch": 0.6584723933251204, "grad_norm": 1.4016965627670288, "learning_rate": 5.8765000000000004e-05, "loss": 0.5889, "step": 11759 }, { "epoch": 0.6585283906372494, "grad_norm": 1.2263785600662231, "learning_rate": 5.877e-05, "loss": 0.3747, "step": 11760 }, { "epoch": 0.6585843879493785, "grad_norm": 1.1574546098709106, "learning_rate": 5.8775000000000006e-05, "loss": 0.4193, "step": 11761 }, { "epoch": 0.6586403852615075, "grad_norm": 1.2843056917190552, "learning_rate": 5.878e-05, "loss": 0.4564, "step": 11762 }, { "epoch": 0.6586963825736365, "grad_norm": 1.2758581638336182, "learning_rate": 5.8785e-05, "loss": 0.3833, "step": 11763 }, { "epoch": 0.6587523798857655, "grad_norm": 1.386577844619751, "learning_rate": 5.879e-05, "loss": 0.481, "step": 11764 }, { "epoch": 0.6588083771978945, "grad_norm": 1.3712282180786133, "learning_rate": 5.8795e-05, "loss": 0.5142, "step": 11765 }, { "epoch": 0.6588643745100236, "grad_norm": 1.4404501914978027, "learning_rate": 5.88e-05, "loss": 0.467, "step": 11766 }, { "epoch": 0.6589203718221526, "grad_norm": 1.213527798652649, "learning_rate": 5.8805e-05, "loss": 0.3757, "step": 11767 }, { "epoch": 0.6589763691342816, "grad_norm": 1.4828687906265259, "learning_rate": 5.881e-05, "loss": 0.4814, "step": 11768 }, { "epoch": 0.6590323664464106, "grad_norm": 1.2644912004470825, "learning_rate": 5.8815e-05, "loss": 0.4162, "step": 11769 }, { "epoch": 0.6590883637585396, "grad_norm": 1.4774798154830933, "learning_rate": 5.8819999999999996e-05, "loss": 0.6192, "step": 11770 }, { "epoch": 0.6591443610706686, "grad_norm": 2.14587140083313, "learning_rate": 5.882500000000001e-05, "loss": 0.479, "step": 11771 }, { "epoch": 0.6592003583827977, "grad_norm": 1.6047451496124268, "learning_rate": 5.8830000000000004e-05, "loss": 0.4926, "step": 11772 }, { "epoch": 0.6592563556949267, "grad_norm": 1.5371648073196411, "learning_rate": 5.883500000000001e-05, "loss": 0.3083, "step": 11773 }, { "epoch": 0.6593123530070557, "grad_norm": 1.403256893157959, "learning_rate": 5.8840000000000006e-05, "loss": 0.5472, "step": 11774 }, { "epoch": 0.6593683503191847, "grad_norm": 1.0998344421386719, "learning_rate": 5.8845e-05, "loss": 0.3273, "step": 11775 }, { "epoch": 0.6594243476313137, "grad_norm": 1.3494468927383423, "learning_rate": 5.885000000000001e-05, "loss": 0.706, "step": 11776 }, { "epoch": 0.6594803449434428, "grad_norm": 1.1570298671722412, "learning_rate": 5.8855000000000005e-05, "loss": 0.3141, "step": 11777 }, { "epoch": 0.6595363422555718, "grad_norm": 1.5827627182006836, "learning_rate": 5.886e-05, "loss": 0.5053, "step": 11778 }, { "epoch": 0.6595923395677008, "grad_norm": 1.2346322536468506, "learning_rate": 5.8865000000000006e-05, "loss": 0.3815, "step": 11779 }, { "epoch": 0.6596483368798298, "grad_norm": 1.3112822771072388, "learning_rate": 5.8870000000000004e-05, "loss": 0.4709, "step": 11780 }, { "epoch": 0.6597043341919588, "grad_norm": 1.2978674173355103, "learning_rate": 5.8875e-05, "loss": 0.3935, "step": 11781 }, { "epoch": 0.6597603315040877, "grad_norm": 1.329642653465271, "learning_rate": 5.888e-05, "loss": 0.4252, "step": 11782 }, { "epoch": 0.6598163288162168, "grad_norm": 1.0972132682800293, "learning_rate": 5.8885e-05, "loss": 0.3462, "step": 11783 }, { "epoch": 0.6598723261283458, "grad_norm": 1.3023643493652344, "learning_rate": 5.889e-05, "loss": 0.4253, "step": 11784 }, { "epoch": 0.6599283234404748, "grad_norm": 1.5814366340637207, "learning_rate": 5.8895e-05, "loss": 0.4563, "step": 11785 }, { "epoch": 0.6599843207526038, "grad_norm": 1.316964864730835, "learning_rate": 5.89e-05, "loss": 0.4273, "step": 11786 }, { "epoch": 0.6600403180647328, "grad_norm": 1.2883378267288208, "learning_rate": 5.8905e-05, "loss": 0.5418, "step": 11787 }, { "epoch": 0.6600963153768619, "grad_norm": 1.5526007413864136, "learning_rate": 5.891e-05, "loss": 0.4849, "step": 11788 }, { "epoch": 0.6601523126889909, "grad_norm": 12.64220905303955, "learning_rate": 5.8915e-05, "loss": 0.4976, "step": 11789 }, { "epoch": 0.6602083100011199, "grad_norm": 1.3078986406326294, "learning_rate": 5.892e-05, "loss": 0.5009, "step": 11790 }, { "epoch": 0.6602643073132489, "grad_norm": 1.2736436128616333, "learning_rate": 5.892500000000001e-05, "loss": 0.4243, "step": 11791 }, { "epoch": 0.6603203046253779, "grad_norm": 1.6053931713104248, "learning_rate": 5.893000000000001e-05, "loss": 0.5212, "step": 11792 }, { "epoch": 0.660376301937507, "grad_norm": 1.125852108001709, "learning_rate": 5.8935000000000004e-05, "loss": 0.3243, "step": 11793 }, { "epoch": 0.660432299249636, "grad_norm": 1.2537152767181396, "learning_rate": 5.894000000000001e-05, "loss": 0.3947, "step": 11794 }, { "epoch": 0.660488296561765, "grad_norm": 1.3903148174285889, "learning_rate": 5.8945000000000006e-05, "loss": 0.3863, "step": 11795 }, { "epoch": 0.660544293873894, "grad_norm": 1.2561471462249756, "learning_rate": 5.895e-05, "loss": 0.4033, "step": 11796 }, { "epoch": 0.660600291186023, "grad_norm": 1.3503178358078003, "learning_rate": 5.895500000000001e-05, "loss": 0.5024, "step": 11797 }, { "epoch": 0.6606562884981521, "grad_norm": 1.8672709465026855, "learning_rate": 5.8960000000000005e-05, "loss": 0.3454, "step": 11798 }, { "epoch": 0.6607122858102811, "grad_norm": 1.1492935419082642, "learning_rate": 5.8965e-05, "loss": 0.3286, "step": 11799 }, { "epoch": 0.6607682831224101, "grad_norm": 1.6859663724899292, "learning_rate": 5.8970000000000006e-05, "loss": 0.4703, "step": 11800 }, { "epoch": 0.6608242804345391, "grad_norm": 1.3863939046859741, "learning_rate": 5.8975000000000004e-05, "loss": 0.3802, "step": 11801 }, { "epoch": 0.6608802777466681, "grad_norm": 1.2479121685028076, "learning_rate": 5.898e-05, "loss": 0.428, "step": 11802 }, { "epoch": 0.6609362750587972, "grad_norm": 1.3120057582855225, "learning_rate": 5.8985e-05, "loss": 0.5067, "step": 11803 }, { "epoch": 0.6609922723709262, "grad_norm": 1.1948027610778809, "learning_rate": 5.899e-05, "loss": 0.3471, "step": 11804 }, { "epoch": 0.6610482696830552, "grad_norm": 1.3826274871826172, "learning_rate": 5.8995e-05, "loss": 0.5424, "step": 11805 }, { "epoch": 0.6611042669951842, "grad_norm": 1.0666022300720215, "learning_rate": 5.9e-05, "loss": 0.3621, "step": 11806 }, { "epoch": 0.6611602643073132, "grad_norm": 1.216614007949829, "learning_rate": 5.9005e-05, "loss": 0.3731, "step": 11807 }, { "epoch": 0.6612162616194422, "grad_norm": 1.5249807834625244, "learning_rate": 5.901e-05, "loss": 0.5525, "step": 11808 }, { "epoch": 0.6612722589315713, "grad_norm": 1.3568562269210815, "learning_rate": 5.9014999999999997e-05, "loss": 0.435, "step": 11809 }, { "epoch": 0.6613282562437003, "grad_norm": 1.2439298629760742, "learning_rate": 5.902e-05, "loss": 0.423, "step": 11810 }, { "epoch": 0.6613842535558293, "grad_norm": 1.246422290802002, "learning_rate": 5.9025000000000005e-05, "loss": 0.321, "step": 11811 }, { "epoch": 0.6614402508679583, "grad_norm": 1.2262554168701172, "learning_rate": 5.903000000000001e-05, "loss": 0.4811, "step": 11812 }, { "epoch": 0.6614962481800873, "grad_norm": 1.3477610349655151, "learning_rate": 5.9035000000000007e-05, "loss": 0.4632, "step": 11813 }, { "epoch": 0.6615522454922164, "grad_norm": 1.1747660636901855, "learning_rate": 5.9040000000000004e-05, "loss": 0.374, "step": 11814 }, { "epoch": 0.6616082428043454, "grad_norm": 1.1574561595916748, "learning_rate": 5.904500000000001e-05, "loss": 0.317, "step": 11815 }, { "epoch": 0.6616642401164744, "grad_norm": 1.3460285663604736, "learning_rate": 5.9050000000000006e-05, "loss": 0.5394, "step": 11816 }, { "epoch": 0.6617202374286034, "grad_norm": 1.1897482872009277, "learning_rate": 5.9055e-05, "loss": 0.353, "step": 11817 }, { "epoch": 0.6617762347407324, "grad_norm": 1.5638235807418823, "learning_rate": 5.906000000000001e-05, "loss": 0.4266, "step": 11818 }, { "epoch": 0.6618322320528615, "grad_norm": 1.418049931526184, "learning_rate": 5.9065000000000005e-05, "loss": 0.5869, "step": 11819 }, { "epoch": 0.6618882293649905, "grad_norm": 1.3842191696166992, "learning_rate": 5.907e-05, "loss": 0.4286, "step": 11820 }, { "epoch": 0.6619442266771195, "grad_norm": 1.5180695056915283, "learning_rate": 5.9075e-05, "loss": 0.5938, "step": 11821 }, { "epoch": 0.6620002239892485, "grad_norm": 1.3481963872909546, "learning_rate": 5.9080000000000004e-05, "loss": 0.4343, "step": 11822 }, { "epoch": 0.6620562213013775, "grad_norm": 1.3385549783706665, "learning_rate": 5.9085e-05, "loss": 0.3347, "step": 11823 }, { "epoch": 0.6621122186135066, "grad_norm": 1.5572330951690674, "learning_rate": 5.909e-05, "loss": 0.4192, "step": 11824 }, { "epoch": 0.6621682159256356, "grad_norm": 1.4639410972595215, "learning_rate": 5.9095e-05, "loss": 0.5353, "step": 11825 }, { "epoch": 0.6622242132377646, "grad_norm": 1.4320757389068604, "learning_rate": 5.91e-05, "loss": 0.473, "step": 11826 }, { "epoch": 0.6622802105498936, "grad_norm": 1.3239295482635498, "learning_rate": 5.9105e-05, "loss": 0.4496, "step": 11827 }, { "epoch": 0.6623362078620226, "grad_norm": 1.1210360527038574, "learning_rate": 5.911e-05, "loss": 0.418, "step": 11828 }, { "epoch": 0.6623922051741516, "grad_norm": 2.27600359916687, "learning_rate": 5.9115e-05, "loss": 0.4041, "step": 11829 }, { "epoch": 0.6624482024862807, "grad_norm": 1.2792423963546753, "learning_rate": 5.9119999999999996e-05, "loss": 0.36, "step": 11830 }, { "epoch": 0.6625041997984097, "grad_norm": 1.1539493799209595, "learning_rate": 5.912500000000001e-05, "loss": 0.3898, "step": 11831 }, { "epoch": 0.6625601971105387, "grad_norm": 1.31495201587677, "learning_rate": 5.9130000000000005e-05, "loss": 0.4026, "step": 11832 }, { "epoch": 0.6626161944226677, "grad_norm": 1.0842198133468628, "learning_rate": 5.913500000000001e-05, "loss": 0.3748, "step": 11833 }, { "epoch": 0.6626721917347967, "grad_norm": 1.3249589204788208, "learning_rate": 5.9140000000000006e-05, "loss": 0.5073, "step": 11834 }, { "epoch": 0.6627281890469258, "grad_norm": 1.3649876117706299, "learning_rate": 5.9145000000000004e-05, "loss": 0.43, "step": 11835 }, { "epoch": 0.6627841863590548, "grad_norm": 1.2828794717788696, "learning_rate": 5.915000000000001e-05, "loss": 0.417, "step": 11836 }, { "epoch": 0.6628401836711838, "grad_norm": 1.9186866283416748, "learning_rate": 5.9155000000000005e-05, "loss": 0.4914, "step": 11837 }, { "epoch": 0.6628961809833128, "grad_norm": 1.1696540117263794, "learning_rate": 5.916e-05, "loss": 0.3673, "step": 11838 }, { "epoch": 0.6629521782954418, "grad_norm": 1.321453332901001, "learning_rate": 5.916500000000001e-05, "loss": 0.4073, "step": 11839 }, { "epoch": 0.6630081756075709, "grad_norm": 1.1427853107452393, "learning_rate": 5.9170000000000004e-05, "loss": 0.4659, "step": 11840 }, { "epoch": 0.6630641729196999, "grad_norm": 1.3836218118667603, "learning_rate": 5.9175e-05, "loss": 0.3893, "step": 11841 }, { "epoch": 0.6631201702318289, "grad_norm": 1.653493881225586, "learning_rate": 5.918e-05, "loss": 0.431, "step": 11842 }, { "epoch": 0.6631761675439579, "grad_norm": 1.120078444480896, "learning_rate": 5.9185000000000003e-05, "loss": 0.3408, "step": 11843 }, { "epoch": 0.6632321648560869, "grad_norm": 1.3832314014434814, "learning_rate": 5.919e-05, "loss": 0.4999, "step": 11844 }, { "epoch": 0.663288162168216, "grad_norm": 1.2807661294937134, "learning_rate": 5.9195e-05, "loss": 0.3549, "step": 11845 }, { "epoch": 0.663344159480345, "grad_norm": 1.1888970136642456, "learning_rate": 5.92e-05, "loss": 0.3814, "step": 11846 }, { "epoch": 0.663400156792474, "grad_norm": 1.893478274345398, "learning_rate": 5.9205e-05, "loss": 0.4226, "step": 11847 }, { "epoch": 0.663456154104603, "grad_norm": 1.1474511623382568, "learning_rate": 5.921e-05, "loss": 0.3579, "step": 11848 }, { "epoch": 0.663512151416732, "grad_norm": 1.5900508165359497, "learning_rate": 5.9215e-05, "loss": 0.6149, "step": 11849 }, { "epoch": 0.663568148728861, "grad_norm": 1.3894587755203247, "learning_rate": 5.922e-05, "loss": 0.3685, "step": 11850 }, { "epoch": 0.6636241460409901, "grad_norm": 1.6272608041763306, "learning_rate": 5.922500000000001e-05, "loss": 0.5183, "step": 11851 }, { "epoch": 0.6636801433531191, "grad_norm": 1.393485188484192, "learning_rate": 5.923000000000001e-05, "loss": 0.5789, "step": 11852 }, { "epoch": 0.6637361406652481, "grad_norm": 1.3365730047225952, "learning_rate": 5.9235000000000005e-05, "loss": 0.3492, "step": 11853 }, { "epoch": 0.6637921379773771, "grad_norm": 1.1424380540847778, "learning_rate": 5.924000000000001e-05, "loss": 0.314, "step": 11854 }, { "epoch": 0.6638481352895061, "grad_norm": 1.5570404529571533, "learning_rate": 5.9245000000000006e-05, "loss": 0.4278, "step": 11855 }, { "epoch": 0.6639041326016352, "grad_norm": 1.0318195819854736, "learning_rate": 5.9250000000000004e-05, "loss": 0.3839, "step": 11856 }, { "epoch": 0.6639601299137642, "grad_norm": 1.310918927192688, "learning_rate": 5.925500000000001e-05, "loss": 0.4464, "step": 11857 }, { "epoch": 0.6640161272258932, "grad_norm": 1.6875386238098145, "learning_rate": 5.9260000000000005e-05, "loss": 0.557, "step": 11858 }, { "epoch": 0.6640721245380222, "grad_norm": 1.0900417566299438, "learning_rate": 5.9265e-05, "loss": 0.3614, "step": 11859 }, { "epoch": 0.6641281218501512, "grad_norm": 1.054628849029541, "learning_rate": 5.927e-05, "loss": 0.3527, "step": 11860 }, { "epoch": 0.6641841191622803, "grad_norm": 1.3811562061309814, "learning_rate": 5.9275000000000004e-05, "loss": 0.4464, "step": 11861 }, { "epoch": 0.6642401164744093, "grad_norm": 1.394370675086975, "learning_rate": 5.928e-05, "loss": 0.6263, "step": 11862 }, { "epoch": 0.6642961137865383, "grad_norm": 1.579593539237976, "learning_rate": 5.9285e-05, "loss": 0.5119, "step": 11863 }, { "epoch": 0.6643521110986672, "grad_norm": 1.3653138875961304, "learning_rate": 5.929e-05, "loss": 0.4603, "step": 11864 }, { "epoch": 0.6644081084107962, "grad_norm": 1.4746373891830444, "learning_rate": 5.9295e-05, "loss": 0.4037, "step": 11865 }, { "epoch": 0.6644641057229252, "grad_norm": 1.2131935358047485, "learning_rate": 5.93e-05, "loss": 0.3738, "step": 11866 }, { "epoch": 0.6645201030350543, "grad_norm": 1.1263573169708252, "learning_rate": 5.9305e-05, "loss": 0.3515, "step": 11867 }, { "epoch": 0.6645761003471833, "grad_norm": 1.3177452087402344, "learning_rate": 5.931e-05, "loss": 0.3774, "step": 11868 }, { "epoch": 0.6646320976593123, "grad_norm": 1.270390272140503, "learning_rate": 5.9315e-05, "loss": 0.4753, "step": 11869 }, { "epoch": 0.6646880949714413, "grad_norm": 1.494642734527588, "learning_rate": 5.9319999999999994e-05, "loss": 0.3979, "step": 11870 }, { "epoch": 0.6647440922835703, "grad_norm": 1.6839993000030518, "learning_rate": 5.9325000000000005e-05, "loss": 0.5764, "step": 11871 }, { "epoch": 0.6648000895956994, "grad_norm": 1.3141297101974487, "learning_rate": 5.933000000000001e-05, "loss": 0.3978, "step": 11872 }, { "epoch": 0.6648560869078284, "grad_norm": 1.3168694972991943, "learning_rate": 5.933500000000001e-05, "loss": 0.469, "step": 11873 }, { "epoch": 0.6649120842199574, "grad_norm": 1.372299313545227, "learning_rate": 5.9340000000000004e-05, "loss": 0.4825, "step": 11874 }, { "epoch": 0.6649680815320864, "grad_norm": 2.188124895095825, "learning_rate": 5.934500000000001e-05, "loss": 0.5163, "step": 11875 }, { "epoch": 0.6650240788442154, "grad_norm": 1.4128270149230957, "learning_rate": 5.9350000000000006e-05, "loss": 0.4864, "step": 11876 }, { "epoch": 0.6650800761563445, "grad_norm": 1.202697992324829, "learning_rate": 5.9355000000000003e-05, "loss": 0.4417, "step": 11877 }, { "epoch": 0.6651360734684735, "grad_norm": 1.3172637224197388, "learning_rate": 5.936000000000001e-05, "loss": 0.4383, "step": 11878 }, { "epoch": 0.6651920707806025, "grad_norm": 1.202485203742981, "learning_rate": 5.9365000000000005e-05, "loss": 0.3311, "step": 11879 }, { "epoch": 0.6652480680927315, "grad_norm": 1.5256835222244263, "learning_rate": 5.937e-05, "loss": 0.3707, "step": 11880 }, { "epoch": 0.6653040654048605, "grad_norm": 1.5215630531311035, "learning_rate": 5.9375e-05, "loss": 0.4309, "step": 11881 }, { "epoch": 0.6653600627169896, "grad_norm": 1.437851071357727, "learning_rate": 5.9380000000000004e-05, "loss": 0.3928, "step": 11882 }, { "epoch": 0.6654160600291186, "grad_norm": 1.1500213146209717, "learning_rate": 5.9385e-05, "loss": 0.3762, "step": 11883 }, { "epoch": 0.6654720573412476, "grad_norm": 1.196663737297058, "learning_rate": 5.939e-05, "loss": 0.3458, "step": 11884 }, { "epoch": 0.6655280546533766, "grad_norm": 1.365906000137329, "learning_rate": 5.9395e-05, "loss": 0.4867, "step": 11885 }, { "epoch": 0.6655840519655056, "grad_norm": 1.3021512031555176, "learning_rate": 5.94e-05, "loss": 0.45, "step": 11886 }, { "epoch": 0.6656400492776346, "grad_norm": 1.5049903392791748, "learning_rate": 5.9405e-05, "loss": 0.4254, "step": 11887 }, { "epoch": 0.6656960465897637, "grad_norm": 1.4015634059906006, "learning_rate": 5.941e-05, "loss": 0.4781, "step": 11888 }, { "epoch": 0.6657520439018927, "grad_norm": 1.026158332824707, "learning_rate": 5.9415e-05, "loss": 0.3566, "step": 11889 }, { "epoch": 0.6658080412140217, "grad_norm": 1.1147710084915161, "learning_rate": 5.942e-05, "loss": 0.4144, "step": 11890 }, { "epoch": 0.6658640385261507, "grad_norm": 1.2159868478775024, "learning_rate": 5.9424999999999994e-05, "loss": 0.4443, "step": 11891 }, { "epoch": 0.6659200358382797, "grad_norm": 1.2928882837295532, "learning_rate": 5.9430000000000005e-05, "loss": 0.4125, "step": 11892 }, { "epoch": 0.6659760331504088, "grad_norm": 1.3846945762634277, "learning_rate": 5.943500000000001e-05, "loss": 0.4197, "step": 11893 }, { "epoch": 0.6660320304625378, "grad_norm": 1.3825461864471436, "learning_rate": 5.944000000000001e-05, "loss": 0.5739, "step": 11894 }, { "epoch": 0.6660880277746668, "grad_norm": 1.2266974449157715, "learning_rate": 5.9445000000000004e-05, "loss": 0.375, "step": 11895 }, { "epoch": 0.6661440250867958, "grad_norm": 1.3153057098388672, "learning_rate": 5.945000000000001e-05, "loss": 0.4659, "step": 11896 }, { "epoch": 0.6662000223989248, "grad_norm": 1.5280402898788452, "learning_rate": 5.9455000000000006e-05, "loss": 0.4301, "step": 11897 }, { "epoch": 0.6662560197110539, "grad_norm": 1.302307367324829, "learning_rate": 5.946e-05, "loss": 0.5529, "step": 11898 }, { "epoch": 0.6663120170231829, "grad_norm": 1.386218547821045, "learning_rate": 5.9465e-05, "loss": 0.4544, "step": 11899 }, { "epoch": 0.6663680143353119, "grad_norm": 1.0509182214736938, "learning_rate": 5.9470000000000005e-05, "loss": 0.3286, "step": 11900 }, { "epoch": 0.6664240116474409, "grad_norm": 1.423601508140564, "learning_rate": 5.9475e-05, "loss": 0.4119, "step": 11901 }, { "epoch": 0.6664800089595699, "grad_norm": 1.2592089176177979, "learning_rate": 5.948e-05, "loss": 0.6676, "step": 11902 }, { "epoch": 0.666536006271699, "grad_norm": 1.3460267782211304, "learning_rate": 5.9485000000000004e-05, "loss": 0.4699, "step": 11903 }, { "epoch": 0.666592003583828, "grad_norm": 1.334141731262207, "learning_rate": 5.949e-05, "loss": 0.5719, "step": 11904 }, { "epoch": 0.666648000895957, "grad_norm": 1.3642629384994507, "learning_rate": 5.9495e-05, "loss": 0.5086, "step": 11905 }, { "epoch": 0.666703998208086, "grad_norm": 1.1799554824829102, "learning_rate": 5.95e-05, "loss": 0.4032, "step": 11906 }, { "epoch": 0.666759995520215, "grad_norm": 1.4249459505081177, "learning_rate": 5.9505e-05, "loss": 0.4316, "step": 11907 }, { "epoch": 0.666815992832344, "grad_norm": 1.4522744417190552, "learning_rate": 5.951e-05, "loss": 0.4173, "step": 11908 }, { "epoch": 0.6668719901444731, "grad_norm": 1.1182037591934204, "learning_rate": 5.9514999999999995e-05, "loss": 0.3313, "step": 11909 }, { "epoch": 0.6669279874566021, "grad_norm": 1.2711259126663208, "learning_rate": 5.952e-05, "loss": 0.3685, "step": 11910 }, { "epoch": 0.6669839847687311, "grad_norm": 5.938239574432373, "learning_rate": 5.9525e-05, "loss": 0.4984, "step": 11911 }, { "epoch": 0.6670399820808601, "grad_norm": 1.4067604541778564, "learning_rate": 5.953000000000001e-05, "loss": 0.4658, "step": 11912 }, { "epoch": 0.6670959793929891, "grad_norm": 1.7628065347671509, "learning_rate": 5.9535000000000005e-05, "loss": 0.3645, "step": 11913 }, { "epoch": 0.6671519767051182, "grad_norm": 1.2518030405044556, "learning_rate": 5.954000000000001e-05, "loss": 0.4882, "step": 11914 }, { "epoch": 0.6672079740172472, "grad_norm": 1.567389965057373, "learning_rate": 5.954500000000001e-05, "loss": 0.4946, "step": 11915 }, { "epoch": 0.6672639713293762, "grad_norm": 1.301295280456543, "learning_rate": 5.9550000000000004e-05, "loss": 0.3501, "step": 11916 }, { "epoch": 0.6673199686415052, "grad_norm": 1.160940170288086, "learning_rate": 5.955500000000001e-05, "loss": 0.347, "step": 11917 }, { "epoch": 0.6673759659536342, "grad_norm": 1.2055591344833374, "learning_rate": 5.9560000000000006e-05, "loss": 0.3748, "step": 11918 }, { "epoch": 0.6674319632657633, "grad_norm": 1.1648057699203491, "learning_rate": 5.9565e-05, "loss": 0.3992, "step": 11919 }, { "epoch": 0.6674879605778923, "grad_norm": 1.2689573764801025, "learning_rate": 5.957e-05, "loss": 0.3886, "step": 11920 }, { "epoch": 0.6675439578900213, "grad_norm": 1.2126526832580566, "learning_rate": 5.9575000000000005e-05, "loss": 0.3961, "step": 11921 }, { "epoch": 0.6675999552021503, "grad_norm": 1.1302536725997925, "learning_rate": 5.958e-05, "loss": 0.3949, "step": 11922 }, { "epoch": 0.6676559525142793, "grad_norm": 1.5190685987472534, "learning_rate": 5.9585e-05, "loss": 0.63, "step": 11923 }, { "epoch": 0.6677119498264084, "grad_norm": 1.2824280261993408, "learning_rate": 5.9590000000000004e-05, "loss": 0.434, "step": 11924 }, { "epoch": 0.6677679471385374, "grad_norm": 1.348188042640686, "learning_rate": 5.9595e-05, "loss": 0.368, "step": 11925 }, { "epoch": 0.6678239444506664, "grad_norm": 1.1867979764938354, "learning_rate": 5.96e-05, "loss": 0.4094, "step": 11926 }, { "epoch": 0.6678799417627954, "grad_norm": 1.3989386558532715, "learning_rate": 5.9605e-05, "loss": 0.5733, "step": 11927 }, { "epoch": 0.6679359390749244, "grad_norm": 1.3534163236618042, "learning_rate": 5.961e-05, "loss": 0.4177, "step": 11928 }, { "epoch": 0.6679919363870535, "grad_norm": 1.5746853351593018, "learning_rate": 5.9615e-05, "loss": 0.4174, "step": 11929 }, { "epoch": 0.6680479336991825, "grad_norm": 1.2192744016647339, "learning_rate": 5.9619999999999995e-05, "loss": 0.4422, "step": 11930 }, { "epoch": 0.6681039310113115, "grad_norm": 2.8761403560638428, "learning_rate": 5.9625e-05, "loss": 0.5961, "step": 11931 }, { "epoch": 0.6681599283234405, "grad_norm": 1.376819372177124, "learning_rate": 5.963000000000001e-05, "loss": 0.5999, "step": 11932 }, { "epoch": 0.6682159256355695, "grad_norm": 1.293157696723938, "learning_rate": 5.963500000000001e-05, "loss": 0.3914, "step": 11933 }, { "epoch": 0.6682719229476985, "grad_norm": 1.537792444229126, "learning_rate": 5.9640000000000005e-05, "loss": 0.5856, "step": 11934 }, { "epoch": 0.6683279202598276, "grad_norm": 2.8495757579803467, "learning_rate": 5.964500000000001e-05, "loss": 0.788, "step": 11935 }, { "epoch": 0.6683839175719566, "grad_norm": 1.2567057609558105, "learning_rate": 5.9650000000000007e-05, "loss": 0.4586, "step": 11936 }, { "epoch": 0.6684399148840856, "grad_norm": 1.1390223503112793, "learning_rate": 5.9655000000000004e-05, "loss": 0.3774, "step": 11937 }, { "epoch": 0.6684959121962146, "grad_norm": 1.2039321660995483, "learning_rate": 5.966000000000001e-05, "loss": 0.3559, "step": 11938 }, { "epoch": 0.6685519095083436, "grad_norm": 1.622532606124878, "learning_rate": 5.9665000000000006e-05, "loss": 0.3729, "step": 11939 }, { "epoch": 0.6686079068204727, "grad_norm": 1.2902191877365112, "learning_rate": 5.967e-05, "loss": 0.4087, "step": 11940 }, { "epoch": 0.6686639041326017, "grad_norm": 1.5999029874801636, "learning_rate": 5.9675e-05, "loss": 0.4653, "step": 11941 }, { "epoch": 0.6687199014447307, "grad_norm": 1.2554268836975098, "learning_rate": 5.9680000000000005e-05, "loss": 0.4276, "step": 11942 }, { "epoch": 0.6687758987568597, "grad_norm": 1.4497487545013428, "learning_rate": 5.9685e-05, "loss": 0.4258, "step": 11943 }, { "epoch": 0.6688318960689887, "grad_norm": 1.1641937494277954, "learning_rate": 5.969e-05, "loss": 0.3416, "step": 11944 }, { "epoch": 0.6688878933811178, "grad_norm": 1.5143848657608032, "learning_rate": 5.9695000000000004e-05, "loss": 0.5738, "step": 11945 }, { "epoch": 0.6689438906932468, "grad_norm": 1.2914464473724365, "learning_rate": 5.97e-05, "loss": 0.3678, "step": 11946 }, { "epoch": 0.6689998880053757, "grad_norm": 1.261762022972107, "learning_rate": 5.9705e-05, "loss": 0.5169, "step": 11947 }, { "epoch": 0.6690558853175047, "grad_norm": 1.398228406906128, "learning_rate": 5.971e-05, "loss": 0.4794, "step": 11948 }, { "epoch": 0.6691118826296337, "grad_norm": 2.0928797721862793, "learning_rate": 5.9715e-05, "loss": 0.4342, "step": 11949 }, { "epoch": 0.6691678799417627, "grad_norm": 1.274298071861267, "learning_rate": 5.972e-05, "loss": 0.3518, "step": 11950 }, { "epoch": 0.6692238772538918, "grad_norm": 1.3640156984329224, "learning_rate": 5.9724999999999995e-05, "loss": 0.4479, "step": 11951 }, { "epoch": 0.6692798745660208, "grad_norm": 1.148178219795227, "learning_rate": 5.9730000000000006e-05, "loss": 0.3329, "step": 11952 }, { "epoch": 0.6693358718781498, "grad_norm": 1.2451313734054565, "learning_rate": 5.973500000000001e-05, "loss": 0.3687, "step": 11953 }, { "epoch": 0.6693918691902788, "grad_norm": 1.5776304006576538, "learning_rate": 5.974000000000001e-05, "loss": 0.3967, "step": 11954 }, { "epoch": 0.6694478665024078, "grad_norm": 1.358848214149475, "learning_rate": 5.9745000000000005e-05, "loss": 0.416, "step": 11955 }, { "epoch": 0.6695038638145369, "grad_norm": 1.2372437715530396, "learning_rate": 5.975000000000001e-05, "loss": 0.4196, "step": 11956 }, { "epoch": 0.6695598611266659, "grad_norm": 1.1792287826538086, "learning_rate": 5.9755000000000006e-05, "loss": 0.4522, "step": 11957 }, { "epoch": 0.6696158584387949, "grad_norm": 1.178227186203003, "learning_rate": 5.9760000000000004e-05, "loss": 0.3618, "step": 11958 }, { "epoch": 0.6696718557509239, "grad_norm": 1.2957943677902222, "learning_rate": 5.9765e-05, "loss": 0.5151, "step": 11959 }, { "epoch": 0.6697278530630529, "grad_norm": 1.2186528444290161, "learning_rate": 5.9770000000000005e-05, "loss": 0.5495, "step": 11960 }, { "epoch": 0.669783850375182, "grad_norm": 1.2566514015197754, "learning_rate": 5.9775e-05, "loss": 0.4227, "step": 11961 }, { "epoch": 0.669839847687311, "grad_norm": 1.2219884395599365, "learning_rate": 5.978e-05, "loss": 0.319, "step": 11962 }, { "epoch": 0.66989584499944, "grad_norm": 1.4038097858428955, "learning_rate": 5.9785000000000004e-05, "loss": 0.4801, "step": 11963 }, { "epoch": 0.669951842311569, "grad_norm": 1.5065792798995972, "learning_rate": 5.979e-05, "loss": 0.421, "step": 11964 }, { "epoch": 0.670007839623698, "grad_norm": 1.3518203496932983, "learning_rate": 5.9795e-05, "loss": 0.3793, "step": 11965 }, { "epoch": 0.670063836935827, "grad_norm": 1.4339953660964966, "learning_rate": 5.9800000000000003e-05, "loss": 0.4307, "step": 11966 }, { "epoch": 0.6701198342479561, "grad_norm": 1.2827121019363403, "learning_rate": 5.9805e-05, "loss": 0.3806, "step": 11967 }, { "epoch": 0.6701758315600851, "grad_norm": NaN, "learning_rate": 5.9805e-05, "loss": 0.4681, "step": 11968 }, { "epoch": 0.6702318288722141, "grad_norm": 1.5028961896896362, "learning_rate": 5.981e-05, "loss": 0.4211, "step": 11969 }, { "epoch": 0.6702878261843431, "grad_norm": 1.3298301696777344, "learning_rate": 5.9814999999999996e-05, "loss": 0.303, "step": 11970 }, { "epoch": 0.6703438234964721, "grad_norm": 1.2868156433105469, "learning_rate": 5.982e-05, "loss": 0.5618, "step": 11971 }, { "epoch": 0.6703998208086012, "grad_norm": 1.4557560682296753, "learning_rate": 5.9825e-05, "loss": 0.5228, "step": 11972 }, { "epoch": 0.6704558181207302, "grad_norm": 1.225768804550171, "learning_rate": 5.983000000000001e-05, "loss": 0.4383, "step": 11973 }, { "epoch": 0.6705118154328592, "grad_norm": 1.3572560548782349, "learning_rate": 5.9835000000000006e-05, "loss": 0.4428, "step": 11974 }, { "epoch": 0.6705678127449882, "grad_norm": 2.91058349609375, "learning_rate": 5.984000000000001e-05, "loss": 0.3326, "step": 11975 }, { "epoch": 0.6706238100571172, "grad_norm": 1.3695838451385498, "learning_rate": 5.984500000000001e-05, "loss": 0.3944, "step": 11976 }, { "epoch": 0.6706798073692463, "grad_norm": 1.2205719947814941, "learning_rate": 5.9850000000000005e-05, "loss": 0.3951, "step": 11977 }, { "epoch": 0.6707358046813753, "grad_norm": 1.076446294784546, "learning_rate": 5.985500000000001e-05, "loss": 0.3704, "step": 11978 }, { "epoch": 0.6707918019935043, "grad_norm": 1.4320250749588013, "learning_rate": 5.9860000000000006e-05, "loss": 0.5801, "step": 11979 }, { "epoch": 0.6708477993056333, "grad_norm": 1.3300652503967285, "learning_rate": 5.9865000000000004e-05, "loss": 0.3682, "step": 11980 }, { "epoch": 0.6709037966177623, "grad_norm": 1.949277400970459, "learning_rate": 5.987e-05, "loss": 0.4824, "step": 11981 }, { "epoch": 0.6709597939298914, "grad_norm": 1.1415592432022095, "learning_rate": 5.9875000000000005e-05, "loss": 0.4576, "step": 11982 }, { "epoch": 0.6710157912420204, "grad_norm": 1.2905715703964233, "learning_rate": 5.988e-05, "loss": 0.425, "step": 11983 }, { "epoch": 0.6710717885541494, "grad_norm": 1.4143505096435547, "learning_rate": 5.9885e-05, "loss": 0.4279, "step": 11984 }, { "epoch": 0.6711277858662784, "grad_norm": 1.546417474746704, "learning_rate": 5.9890000000000004e-05, "loss": 0.4881, "step": 11985 }, { "epoch": 0.6711837831784074, "grad_norm": 1.5658057928085327, "learning_rate": 5.9895e-05, "loss": 0.4668, "step": 11986 }, { "epoch": 0.6712397804905365, "grad_norm": 1.1546103954315186, "learning_rate": 5.99e-05, "loss": 0.4428, "step": 11987 }, { "epoch": 0.6712957778026655, "grad_norm": 1.3035434484481812, "learning_rate": 5.9905e-05, "loss": 0.4807, "step": 11988 }, { "epoch": 0.6713517751147945, "grad_norm": 1.1266698837280273, "learning_rate": 5.991e-05, "loss": 0.4117, "step": 11989 }, { "epoch": 0.6714077724269235, "grad_norm": 1.3537970781326294, "learning_rate": 5.9915e-05, "loss": 0.4936, "step": 11990 }, { "epoch": 0.6714637697390525, "grad_norm": 1.3205552101135254, "learning_rate": 5.9919999999999996e-05, "loss": 0.4109, "step": 11991 }, { "epoch": 0.6715197670511815, "grad_norm": 1.755386471748352, "learning_rate": 5.9925e-05, "loss": 0.5293, "step": 11992 }, { "epoch": 0.6715757643633106, "grad_norm": 1.450015902519226, "learning_rate": 5.993000000000001e-05, "loss": 0.4599, "step": 11993 }, { "epoch": 0.6716317616754396, "grad_norm": 1.3256274461746216, "learning_rate": 5.993500000000001e-05, "loss": 0.4843, "step": 11994 }, { "epoch": 0.6716877589875686, "grad_norm": 1.287933349609375, "learning_rate": 5.9940000000000005e-05, "loss": 0.36, "step": 11995 }, { "epoch": 0.6717437562996976, "grad_norm": 1.2362840175628662, "learning_rate": 5.994500000000001e-05, "loss": 0.5034, "step": 11996 }, { "epoch": 0.6717997536118266, "grad_norm": 1.6679575443267822, "learning_rate": 5.995000000000001e-05, "loss": 0.4632, "step": 11997 }, { "epoch": 0.6718557509239557, "grad_norm": 1.4457893371582031, "learning_rate": 5.9955000000000004e-05, "loss": 0.6138, "step": 11998 }, { "epoch": 0.6719117482360847, "grad_norm": 1.1589275598526, "learning_rate": 5.996e-05, "loss": 0.3804, "step": 11999 }, { "epoch": 0.6719677455482137, "grad_norm": 1.1779927015304565, "learning_rate": 5.9965000000000006e-05, "loss": 0.4292, "step": 12000 }, { "epoch": 0.6720237428603427, "grad_norm": 1.5580350160598755, "learning_rate": 5.9970000000000004e-05, "loss": 0.3699, "step": 12001 }, { "epoch": 0.6720797401724717, "grad_norm": 1.4929412603378296, "learning_rate": 5.9975e-05, "loss": 0.4857, "step": 12002 }, { "epoch": 0.6721357374846008, "grad_norm": 1.3599839210510254, "learning_rate": 5.9980000000000005e-05, "loss": 0.5874, "step": 12003 }, { "epoch": 0.6721917347967298, "grad_norm": 1.2680851221084595, "learning_rate": 5.9985e-05, "loss": 0.4742, "step": 12004 }, { "epoch": 0.6722477321088588, "grad_norm": 1.1076149940490723, "learning_rate": 5.999e-05, "loss": 0.3154, "step": 12005 }, { "epoch": 0.6723037294209878, "grad_norm": 1.2600069046020508, "learning_rate": 5.9995000000000004e-05, "loss": 0.342, "step": 12006 }, { "epoch": 0.6723597267331168, "grad_norm": 1.2424794435501099, "learning_rate": 6e-05, "loss": 0.4882, "step": 12007 }, { "epoch": 0.6724157240452459, "grad_norm": 1.4509917497634888, "learning_rate": 6.0005e-05, "loss": 0.3048, "step": 12008 }, { "epoch": 0.6724717213573749, "grad_norm": 1.324674367904663, "learning_rate": 6.0009999999999996e-05, "loss": 0.395, "step": 12009 }, { "epoch": 0.6725277186695039, "grad_norm": 1.259440541267395, "learning_rate": 6.0015e-05, "loss": 0.4889, "step": 12010 }, { "epoch": 0.6725837159816329, "grad_norm": 1.3536385297775269, "learning_rate": 6.002e-05, "loss": 0.498, "step": 12011 }, { "epoch": 0.6726397132937619, "grad_norm": 1.317745566368103, "learning_rate": 6.0024999999999995e-05, "loss": 0.4705, "step": 12012 }, { "epoch": 0.672695710605891, "grad_norm": 1.2811329364776611, "learning_rate": 6.003e-05, "loss": 0.4175, "step": 12013 }, { "epoch": 0.67275170791802, "grad_norm": 1.3161181211471558, "learning_rate": 6.003500000000001e-05, "loss": 0.5964, "step": 12014 }, { "epoch": 0.672807705230149, "grad_norm": 1.2640495300292969, "learning_rate": 6.004000000000001e-05, "loss": 0.4612, "step": 12015 }, { "epoch": 0.672863702542278, "grad_norm": 1.3181904554367065, "learning_rate": 6.0045000000000005e-05, "loss": 0.3851, "step": 12016 }, { "epoch": 0.672919699854407, "grad_norm": 1.3061329126358032, "learning_rate": 6.005000000000001e-05, "loss": 0.5407, "step": 12017 }, { "epoch": 0.672975697166536, "grad_norm": 1.2655748128890991, "learning_rate": 6.005500000000001e-05, "loss": 0.3327, "step": 12018 }, { "epoch": 0.6730316944786651, "grad_norm": 1.140716552734375, "learning_rate": 6.0060000000000004e-05, "loss": 0.4395, "step": 12019 }, { "epoch": 0.6730876917907941, "grad_norm": 1.422457218170166, "learning_rate": 6.0065e-05, "loss": 0.6095, "step": 12020 }, { "epoch": 0.6731436891029231, "grad_norm": 1.1209276914596558, "learning_rate": 6.0070000000000006e-05, "loss": 0.3438, "step": 12021 }, { "epoch": 0.6731996864150521, "grad_norm": 1.7475590705871582, "learning_rate": 6.0075e-05, "loss": 0.5939, "step": 12022 }, { "epoch": 0.6732556837271811, "grad_norm": 1.2284224033355713, "learning_rate": 6.008e-05, "loss": 0.3576, "step": 12023 }, { "epoch": 0.6733116810393102, "grad_norm": 1.2152773141860962, "learning_rate": 6.0085000000000005e-05, "loss": 0.432, "step": 12024 }, { "epoch": 0.6733676783514392, "grad_norm": 1.2505340576171875, "learning_rate": 6.009e-05, "loss": 0.3708, "step": 12025 }, { "epoch": 0.6734236756635682, "grad_norm": 1.2982544898986816, "learning_rate": 6.0095e-05, "loss": 0.5278, "step": 12026 }, { "epoch": 0.6734796729756972, "grad_norm": 1.1628079414367676, "learning_rate": 6.0100000000000004e-05, "loss": 0.3659, "step": 12027 }, { "epoch": 0.6735356702878262, "grad_norm": 1.8002924919128418, "learning_rate": 6.0105e-05, "loss": 0.8369, "step": 12028 }, { "epoch": 0.6735916675999553, "grad_norm": 1.223206877708435, "learning_rate": 6.011e-05, "loss": 0.5043, "step": 12029 }, { "epoch": 0.6736476649120842, "grad_norm": 1.3584004640579224, "learning_rate": 6.0114999999999996e-05, "loss": 0.4154, "step": 12030 }, { "epoch": 0.6737036622242132, "grad_norm": 1.5717308521270752, "learning_rate": 6.012e-05, "loss": 0.4392, "step": 12031 }, { "epoch": 0.6737596595363422, "grad_norm": 1.321874737739563, "learning_rate": 6.0125e-05, "loss": 0.3455, "step": 12032 }, { "epoch": 0.6738156568484712, "grad_norm": 1.0951192378997803, "learning_rate": 6.0129999999999995e-05, "loss": 0.3107, "step": 12033 }, { "epoch": 0.6738716541606002, "grad_norm": 1.5254805088043213, "learning_rate": 6.0135000000000006e-05, "loss": 0.3985, "step": 12034 }, { "epoch": 0.6739276514727293, "grad_norm": 1.4667788743972778, "learning_rate": 6.014000000000001e-05, "loss": 0.4162, "step": 12035 }, { "epoch": 0.6739836487848583, "grad_norm": 1.4369940757751465, "learning_rate": 6.014500000000001e-05, "loss": 0.6691, "step": 12036 }, { "epoch": 0.6740396460969873, "grad_norm": 1.646856427192688, "learning_rate": 6.0150000000000005e-05, "loss": 0.392, "step": 12037 }, { "epoch": 0.6740956434091163, "grad_norm": 1.3220666646957397, "learning_rate": 6.0155e-05, "loss": 0.4313, "step": 12038 }, { "epoch": 0.6741516407212453, "grad_norm": 1.4972738027572632, "learning_rate": 6.016000000000001e-05, "loss": 0.492, "step": 12039 }, { "epoch": 0.6742076380333744, "grad_norm": 1.316117763519287, "learning_rate": 6.0165000000000004e-05, "loss": 0.5395, "step": 12040 }, { "epoch": 0.6742636353455034, "grad_norm": 1.4296866655349731, "learning_rate": 6.017e-05, "loss": 0.4829, "step": 12041 }, { "epoch": 0.6743196326576324, "grad_norm": 1.4425750970840454, "learning_rate": 6.0175000000000006e-05, "loss": 0.3922, "step": 12042 }, { "epoch": 0.6743756299697614, "grad_norm": 1.1359617710113525, "learning_rate": 6.018e-05, "loss": 0.4163, "step": 12043 }, { "epoch": 0.6744316272818904, "grad_norm": 1.3443559408187866, "learning_rate": 6.0185e-05, "loss": 0.4645, "step": 12044 }, { "epoch": 0.6744876245940195, "grad_norm": 1.6530473232269287, "learning_rate": 6.0190000000000005e-05, "loss": 0.4853, "step": 12045 }, { "epoch": 0.6745436219061485, "grad_norm": 1.2223633527755737, "learning_rate": 6.0195e-05, "loss": 0.4603, "step": 12046 }, { "epoch": 0.6745996192182775, "grad_norm": 1.292128086090088, "learning_rate": 6.02e-05, "loss": 0.4506, "step": 12047 }, { "epoch": 0.6746556165304065, "grad_norm": 1.9163436889648438, "learning_rate": 6.0205e-05, "loss": 0.6832, "step": 12048 }, { "epoch": 0.6747116138425355, "grad_norm": 1.1957800388336182, "learning_rate": 6.021e-05, "loss": 0.327, "step": 12049 }, { "epoch": 0.6747676111546645, "grad_norm": 1.2962968349456787, "learning_rate": 6.0215e-05, "loss": 0.5793, "step": 12050 }, { "epoch": 0.6748236084667936, "grad_norm": 1.2703187465667725, "learning_rate": 6.0219999999999996e-05, "loss": 0.3853, "step": 12051 }, { "epoch": 0.6748796057789226, "grad_norm": 1.4209612607955933, "learning_rate": 6.0225e-05, "loss": 0.5524, "step": 12052 }, { "epoch": 0.6749356030910516, "grad_norm": 1.231265902519226, "learning_rate": 6.023e-05, "loss": 0.5274, "step": 12053 }, { "epoch": 0.6749916004031806, "grad_norm": 1.2571219205856323, "learning_rate": 6.023500000000001e-05, "loss": 0.422, "step": 12054 }, { "epoch": 0.6750475977153096, "grad_norm": 1.330564022064209, "learning_rate": 6.0240000000000006e-05, "loss": 0.4988, "step": 12055 }, { "epoch": 0.6751035950274387, "grad_norm": 1.2424752712249756, "learning_rate": 6.024500000000001e-05, "loss": 0.5119, "step": 12056 }, { "epoch": 0.6751595923395677, "grad_norm": 1.1252355575561523, "learning_rate": 6.025000000000001e-05, "loss": 0.3737, "step": 12057 }, { "epoch": 0.6752155896516967, "grad_norm": 1.6888601779937744, "learning_rate": 6.0255000000000005e-05, "loss": 0.5231, "step": 12058 }, { "epoch": 0.6752715869638257, "grad_norm": 1.2207615375518799, "learning_rate": 6.026e-05, "loss": 0.3587, "step": 12059 }, { "epoch": 0.6753275842759547, "grad_norm": 1.3332748413085938, "learning_rate": 6.0265000000000007e-05, "loss": 0.4438, "step": 12060 }, { "epoch": 0.6753835815880838, "grad_norm": 1.7057472467422485, "learning_rate": 6.0270000000000004e-05, "loss": 0.6373, "step": 12061 }, { "epoch": 0.6754395789002128, "grad_norm": 1.2983251810073853, "learning_rate": 6.0275e-05, "loss": 0.5587, "step": 12062 }, { "epoch": 0.6754955762123418, "grad_norm": 1.5316787958145142, "learning_rate": 6.0280000000000006e-05, "loss": 0.4676, "step": 12063 }, { "epoch": 0.6755515735244708, "grad_norm": 1.215063214302063, "learning_rate": 6.0285e-05, "loss": 0.4541, "step": 12064 }, { "epoch": 0.6756075708365998, "grad_norm": 1.2608975172042847, "learning_rate": 6.029e-05, "loss": 0.5397, "step": 12065 }, { "epoch": 0.6756635681487289, "grad_norm": 1.618356704711914, "learning_rate": 6.0295000000000005e-05, "loss": 0.5113, "step": 12066 }, { "epoch": 0.6757195654608579, "grad_norm": 1.4438884258270264, "learning_rate": 6.03e-05, "loss": 0.4424, "step": 12067 }, { "epoch": 0.6757755627729869, "grad_norm": 1.1770647764205933, "learning_rate": 6.0305e-05, "loss": 0.3894, "step": 12068 }, { "epoch": 0.6758315600851159, "grad_norm": 1.245314121246338, "learning_rate": 6.031e-05, "loss": 0.4122, "step": 12069 }, { "epoch": 0.6758875573972449, "grad_norm": 1.1871123313903809, "learning_rate": 6.0315e-05, "loss": 0.4421, "step": 12070 }, { "epoch": 0.675943554709374, "grad_norm": 1.7059030532836914, "learning_rate": 6.032e-05, "loss": 0.4171, "step": 12071 }, { "epoch": 0.675999552021503, "grad_norm": 1.3483079671859741, "learning_rate": 6.0324999999999996e-05, "loss": 0.4056, "step": 12072 }, { "epoch": 0.676055549333632, "grad_norm": 1.364949345588684, "learning_rate": 6.033e-05, "loss": 0.3944, "step": 12073 }, { "epoch": 0.676111546645761, "grad_norm": 1.3981837034225464, "learning_rate": 6.033500000000001e-05, "loss": 0.398, "step": 12074 }, { "epoch": 0.67616754395789, "grad_norm": 1.3125524520874023, "learning_rate": 6.034000000000001e-05, "loss": 0.4499, "step": 12075 }, { "epoch": 0.676223541270019, "grad_norm": 1.201357364654541, "learning_rate": 6.0345000000000006e-05, "loss": 0.4868, "step": 12076 }, { "epoch": 0.6762795385821481, "grad_norm": 1.6828097105026245, "learning_rate": 6.035e-05, "loss": 0.5972, "step": 12077 }, { "epoch": 0.6763355358942771, "grad_norm": 1.5219569206237793, "learning_rate": 6.035500000000001e-05, "loss": 0.4371, "step": 12078 }, { "epoch": 0.6763915332064061, "grad_norm": 1.4877394437789917, "learning_rate": 6.0360000000000005e-05, "loss": 0.4413, "step": 12079 }, { "epoch": 0.6764475305185351, "grad_norm": 1.6040446758270264, "learning_rate": 6.0365e-05, "loss": 0.4621, "step": 12080 }, { "epoch": 0.6765035278306641, "grad_norm": 1.199930191040039, "learning_rate": 6.0370000000000006e-05, "loss": 0.4529, "step": 12081 }, { "epoch": 0.6765595251427932, "grad_norm": 1.2376455068588257, "learning_rate": 6.0375000000000004e-05, "loss": 0.4784, "step": 12082 }, { "epoch": 0.6766155224549222, "grad_norm": 1.2274621725082397, "learning_rate": 6.038e-05, "loss": 0.3986, "step": 12083 }, { "epoch": 0.6766715197670512, "grad_norm": 1.4890775680541992, "learning_rate": 6.0385000000000005e-05, "loss": 0.5813, "step": 12084 }, { "epoch": 0.6767275170791802, "grad_norm": 1.232393741607666, "learning_rate": 6.039e-05, "loss": 0.4071, "step": 12085 }, { "epoch": 0.6767835143913092, "grad_norm": 1.3618180751800537, "learning_rate": 6.0395e-05, "loss": 0.4631, "step": 12086 }, { "epoch": 0.6768395117034383, "grad_norm": 1.5815465450286865, "learning_rate": 6.04e-05, "loss": 0.6354, "step": 12087 }, { "epoch": 0.6768955090155673, "grad_norm": 1.2645756006240845, "learning_rate": 6.0405e-05, "loss": 0.427, "step": 12088 }, { "epoch": 0.6769515063276963, "grad_norm": 1.0445808172225952, "learning_rate": 6.041e-05, "loss": 0.332, "step": 12089 }, { "epoch": 0.6770075036398253, "grad_norm": 1.3043770790100098, "learning_rate": 6.0415e-05, "loss": 0.6544, "step": 12090 }, { "epoch": 0.6770635009519543, "grad_norm": 1.4303696155548096, "learning_rate": 6.042e-05, "loss": 0.4022, "step": 12091 }, { "epoch": 0.6771194982640834, "grad_norm": 1.2758795022964478, "learning_rate": 6.0425e-05, "loss": 0.4598, "step": 12092 }, { "epoch": 0.6771754955762124, "grad_norm": 1.3888673782348633, "learning_rate": 6.0429999999999996e-05, "loss": 0.4979, "step": 12093 }, { "epoch": 0.6772314928883414, "grad_norm": 1.3244187831878662, "learning_rate": 6.043500000000001e-05, "loss": 0.4616, "step": 12094 }, { "epoch": 0.6772874902004704, "grad_norm": 1.1622129678726196, "learning_rate": 6.044000000000001e-05, "loss": 0.4378, "step": 12095 }, { "epoch": 0.6773434875125994, "grad_norm": 1.3151376247406006, "learning_rate": 6.044500000000001e-05, "loss": 0.3957, "step": 12096 }, { "epoch": 0.6773994848247284, "grad_norm": 4.065365314483643, "learning_rate": 6.0450000000000006e-05, "loss": 0.4165, "step": 12097 }, { "epoch": 0.6774554821368575, "grad_norm": 1.9255039691925049, "learning_rate": 6.0455e-05, "loss": 0.5765, "step": 12098 }, { "epoch": 0.6775114794489865, "grad_norm": 1.1353352069854736, "learning_rate": 6.046000000000001e-05, "loss": 0.507, "step": 12099 }, { "epoch": 0.6775674767611155, "grad_norm": 1.3141318559646606, "learning_rate": 6.0465000000000005e-05, "loss": 0.4241, "step": 12100 }, { "epoch": 0.6776234740732445, "grad_norm": 1.657446265220642, "learning_rate": 6.047e-05, "loss": 0.4933, "step": 12101 }, { "epoch": 0.6776794713853735, "grad_norm": 1.1255770921707153, "learning_rate": 6.0475000000000006e-05, "loss": 0.3442, "step": 12102 }, { "epoch": 0.6777354686975026, "grad_norm": 1.3445061445236206, "learning_rate": 6.0480000000000004e-05, "loss": 0.4012, "step": 12103 }, { "epoch": 0.6777914660096316, "grad_norm": 1.5071191787719727, "learning_rate": 6.0485e-05, "loss": 0.431, "step": 12104 }, { "epoch": 0.6778474633217606, "grad_norm": 1.302628517150879, "learning_rate": 6.0490000000000005e-05, "loss": 0.3635, "step": 12105 }, { "epoch": 0.6779034606338896, "grad_norm": 1.3854845762252808, "learning_rate": 6.0495e-05, "loss": 0.5189, "step": 12106 }, { "epoch": 0.6779594579460186, "grad_norm": 1.2401306629180908, "learning_rate": 6.05e-05, "loss": 0.4829, "step": 12107 }, { "epoch": 0.6780154552581477, "grad_norm": 1.5215063095092773, "learning_rate": 6.0505e-05, "loss": 0.5669, "step": 12108 }, { "epoch": 0.6780714525702767, "grad_norm": 1.374367594718933, "learning_rate": 6.051e-05, "loss": 0.5544, "step": 12109 }, { "epoch": 0.6781274498824057, "grad_norm": 1.4461313486099243, "learning_rate": 6.0515e-05, "loss": 0.5488, "step": 12110 }, { "epoch": 0.6781834471945347, "grad_norm": 1.4633358716964722, "learning_rate": 6.0519999999999997e-05, "loss": 0.594, "step": 12111 }, { "epoch": 0.6782394445066637, "grad_norm": 1.371083378791809, "learning_rate": 6.0525e-05, "loss": 0.4453, "step": 12112 }, { "epoch": 0.6782954418187926, "grad_norm": 1.2095295190811157, "learning_rate": 6.053e-05, "loss": 0.34, "step": 12113 }, { "epoch": 0.6783514391309217, "grad_norm": 1.2679212093353271, "learning_rate": 6.053500000000001e-05, "loss": 0.4288, "step": 12114 }, { "epoch": 0.6784074364430507, "grad_norm": 1.5355103015899658, "learning_rate": 6.0540000000000007e-05, "loss": 0.5017, "step": 12115 }, { "epoch": 0.6784634337551797, "grad_norm": 1.0712206363677979, "learning_rate": 6.0545000000000004e-05, "loss": 0.3312, "step": 12116 }, { "epoch": 0.6785194310673087, "grad_norm": 1.1691625118255615, "learning_rate": 6.055000000000001e-05, "loss": 0.4556, "step": 12117 }, { "epoch": 0.6785754283794377, "grad_norm": 1.2647243738174438, "learning_rate": 6.0555000000000006e-05, "loss": 0.3252, "step": 12118 }, { "epoch": 0.6786314256915668, "grad_norm": 1.303964614868164, "learning_rate": 6.056e-05, "loss": 0.5992, "step": 12119 }, { "epoch": 0.6786874230036958, "grad_norm": 1.539599895477295, "learning_rate": 6.056500000000001e-05, "loss": 0.5647, "step": 12120 }, { "epoch": 0.6787434203158248, "grad_norm": 1.6583836078643799, "learning_rate": 6.0570000000000005e-05, "loss": 0.5323, "step": 12121 }, { "epoch": 0.6787994176279538, "grad_norm": 1.4732065200805664, "learning_rate": 6.0575e-05, "loss": 0.4887, "step": 12122 }, { "epoch": 0.6788554149400828, "grad_norm": 1.8859001398086548, "learning_rate": 6.0580000000000006e-05, "loss": 0.6046, "step": 12123 }, { "epoch": 0.6789114122522119, "grad_norm": 1.1522440910339355, "learning_rate": 6.0585000000000004e-05, "loss": 0.385, "step": 12124 }, { "epoch": 0.6789674095643409, "grad_norm": 1.0091975927352905, "learning_rate": 6.059e-05, "loss": 0.3164, "step": 12125 }, { "epoch": 0.6790234068764699, "grad_norm": 1.124294638633728, "learning_rate": 6.0595000000000005e-05, "loss": 0.3647, "step": 12126 }, { "epoch": 0.6790794041885989, "grad_norm": 1.23323392868042, "learning_rate": 6.06e-05, "loss": 0.4144, "step": 12127 }, { "epoch": 0.6791354015007279, "grad_norm": 1.3646094799041748, "learning_rate": 6.0605e-05, "loss": 0.7289, "step": 12128 }, { "epoch": 0.679191398812857, "grad_norm": 1.2699626684188843, "learning_rate": 6.061e-05, "loss": 0.429, "step": 12129 }, { "epoch": 0.679247396124986, "grad_norm": 1.2049400806427002, "learning_rate": 6.0615e-05, "loss": 0.4621, "step": 12130 }, { "epoch": 0.679303393437115, "grad_norm": 1.487858533859253, "learning_rate": 6.062e-05, "loss": 0.4598, "step": 12131 }, { "epoch": 0.679359390749244, "grad_norm": 1.4971121549606323, "learning_rate": 6.0624999999999996e-05, "loss": 0.508, "step": 12132 }, { "epoch": 0.679415388061373, "grad_norm": 1.1997946500778198, "learning_rate": 6.063e-05, "loss": 0.4751, "step": 12133 }, { "epoch": 0.679471385373502, "grad_norm": 1.2344303131103516, "learning_rate": 6.0635e-05, "loss": 0.4005, "step": 12134 }, { "epoch": 0.6795273826856311, "grad_norm": 1.1131056547164917, "learning_rate": 6.064000000000001e-05, "loss": 0.3827, "step": 12135 }, { "epoch": 0.6795833799977601, "grad_norm": 1.1705071926116943, "learning_rate": 6.0645000000000006e-05, "loss": 0.3733, "step": 12136 }, { "epoch": 0.6796393773098891, "grad_norm": 1.2471826076507568, "learning_rate": 6.0650000000000004e-05, "loss": 0.4472, "step": 12137 }, { "epoch": 0.6796953746220181, "grad_norm": 1.3577433824539185, "learning_rate": 6.065500000000001e-05, "loss": 0.4631, "step": 12138 }, { "epoch": 0.6797513719341471, "grad_norm": 1.1097514629364014, "learning_rate": 6.0660000000000005e-05, "loss": 0.3606, "step": 12139 }, { "epoch": 0.6798073692462762, "grad_norm": 1.6002696752548218, "learning_rate": 6.0665e-05, "loss": 0.4334, "step": 12140 }, { "epoch": 0.6798633665584052, "grad_norm": 1.2866092920303345, "learning_rate": 6.067000000000001e-05, "loss": 0.4569, "step": 12141 }, { "epoch": 0.6799193638705342, "grad_norm": 1.1357052326202393, "learning_rate": 6.0675000000000004e-05, "loss": 0.4529, "step": 12142 }, { "epoch": 0.6799753611826632, "grad_norm": 1.271215796470642, "learning_rate": 6.068e-05, "loss": 0.3355, "step": 12143 }, { "epoch": 0.6800313584947922, "grad_norm": 1.1626436710357666, "learning_rate": 6.0685000000000006e-05, "loss": 0.4175, "step": 12144 }, { "epoch": 0.6800873558069213, "grad_norm": 1.1848807334899902, "learning_rate": 6.069e-05, "loss": 0.4359, "step": 12145 }, { "epoch": 0.6801433531190503, "grad_norm": 1.495087742805481, "learning_rate": 6.0695e-05, "loss": 0.4236, "step": 12146 }, { "epoch": 0.6801993504311793, "grad_norm": 1.203644037246704, "learning_rate": 6.07e-05, "loss": 0.4306, "step": 12147 }, { "epoch": 0.6802553477433083, "grad_norm": 1.1470236778259277, "learning_rate": 6.0705e-05, "loss": 0.4808, "step": 12148 }, { "epoch": 0.6803113450554373, "grad_norm": 1.2754034996032715, "learning_rate": 6.071e-05, "loss": 0.496, "step": 12149 }, { "epoch": 0.6803673423675664, "grad_norm": 1.3479679822921753, "learning_rate": 6.0715e-05, "loss": 0.6041, "step": 12150 }, { "epoch": 0.6804233396796954, "grad_norm": 1.2230275869369507, "learning_rate": 6.072e-05, "loss": 0.3986, "step": 12151 }, { "epoch": 0.6804793369918244, "grad_norm": 1.4950847625732422, "learning_rate": 6.0725e-05, "loss": 0.4485, "step": 12152 }, { "epoch": 0.6805353343039534, "grad_norm": 1.4482049942016602, "learning_rate": 6.0729999999999996e-05, "loss": 0.4365, "step": 12153 }, { "epoch": 0.6805913316160824, "grad_norm": 1.2511510848999023, "learning_rate": 6.0735e-05, "loss": 0.5161, "step": 12154 }, { "epoch": 0.6806473289282114, "grad_norm": 1.4707467555999756, "learning_rate": 6.074000000000001e-05, "loss": 0.4518, "step": 12155 }, { "epoch": 0.6807033262403405, "grad_norm": 1.0941394567489624, "learning_rate": 6.074500000000001e-05, "loss": 0.3309, "step": 12156 }, { "epoch": 0.6807593235524695, "grad_norm": 1.1759692430496216, "learning_rate": 6.0750000000000006e-05, "loss": 0.3841, "step": 12157 }, { "epoch": 0.6808153208645985, "grad_norm": 1.0564043521881104, "learning_rate": 6.0755000000000004e-05, "loss": 0.4067, "step": 12158 }, { "epoch": 0.6808713181767275, "grad_norm": 1.4290287494659424, "learning_rate": 6.076000000000001e-05, "loss": 0.4944, "step": 12159 }, { "epoch": 0.6809273154888565, "grad_norm": 1.222448468208313, "learning_rate": 6.0765000000000005e-05, "loss": 0.3964, "step": 12160 }, { "epoch": 0.6809833128009856, "grad_norm": 1.0129196643829346, "learning_rate": 6.077e-05, "loss": 0.4556, "step": 12161 }, { "epoch": 0.6810393101131146, "grad_norm": 1.4554996490478516, "learning_rate": 6.077500000000001e-05, "loss": 0.3385, "step": 12162 }, { "epoch": 0.6810953074252436, "grad_norm": 1.183143973350525, "learning_rate": 6.0780000000000004e-05, "loss": 0.4086, "step": 12163 }, { "epoch": 0.6811513047373726, "grad_norm": 1.2586287260055542, "learning_rate": 6.0785e-05, "loss": 0.3804, "step": 12164 }, { "epoch": 0.6812073020495016, "grad_norm": 1.4544435739517212, "learning_rate": 6.0790000000000006e-05, "loss": 0.3895, "step": 12165 }, { "epoch": 0.6812632993616307, "grad_norm": 1.2856827974319458, "learning_rate": 6.0795e-05, "loss": 0.4406, "step": 12166 }, { "epoch": 0.6813192966737597, "grad_norm": 1.5319968461990356, "learning_rate": 6.08e-05, "loss": 0.4159, "step": 12167 }, { "epoch": 0.6813752939858887, "grad_norm": 1.0890932083129883, "learning_rate": 6.0805e-05, "loss": 0.3229, "step": 12168 }, { "epoch": 0.6814312912980177, "grad_norm": 1.1322203874588013, "learning_rate": 6.081e-05, "loss": 0.3951, "step": 12169 }, { "epoch": 0.6814872886101467, "grad_norm": 1.456730604171753, "learning_rate": 6.0815e-05, "loss": 0.459, "step": 12170 }, { "epoch": 0.6815432859222758, "grad_norm": 1.2333000898361206, "learning_rate": 6.082e-05, "loss": 0.3485, "step": 12171 }, { "epoch": 0.6815992832344048, "grad_norm": 1.2088539600372314, "learning_rate": 6.0825e-05, "loss": 0.4149, "step": 12172 }, { "epoch": 0.6816552805465338, "grad_norm": 1.328800916671753, "learning_rate": 6.083e-05, "loss": 0.5339, "step": 12173 }, { "epoch": 0.6817112778586628, "grad_norm": 1.2763824462890625, "learning_rate": 6.0834999999999996e-05, "loss": 0.4658, "step": 12174 }, { "epoch": 0.6817672751707918, "grad_norm": 1.3026294708251953, "learning_rate": 6.084000000000001e-05, "loss": 0.4155, "step": 12175 }, { "epoch": 0.6818232724829209, "grad_norm": 1.4607728719711304, "learning_rate": 6.0845000000000004e-05, "loss": 0.5532, "step": 12176 }, { "epoch": 0.6818792697950499, "grad_norm": 1.214138150215149, "learning_rate": 6.085000000000001e-05, "loss": 0.4987, "step": 12177 }, { "epoch": 0.6819352671071789, "grad_norm": 1.393033504486084, "learning_rate": 6.0855000000000006e-05, "loss": 0.3502, "step": 12178 }, { "epoch": 0.6819912644193079, "grad_norm": 1.378004550933838, "learning_rate": 6.0860000000000003e-05, "loss": 0.4423, "step": 12179 }, { "epoch": 0.6820472617314369, "grad_norm": 1.2123223543167114, "learning_rate": 6.086500000000001e-05, "loss": 0.3355, "step": 12180 }, { "epoch": 0.682103259043566, "grad_norm": 1.2966917753219604, "learning_rate": 6.0870000000000005e-05, "loss": 0.4241, "step": 12181 }, { "epoch": 0.682159256355695, "grad_norm": 1.3020904064178467, "learning_rate": 6.0875e-05, "loss": 0.4779, "step": 12182 }, { "epoch": 0.682215253667824, "grad_norm": 1.3528094291687012, "learning_rate": 6.088000000000001e-05, "loss": 0.427, "step": 12183 }, { "epoch": 0.682271250979953, "grad_norm": 1.0275715589523315, "learning_rate": 6.0885000000000004e-05, "loss": 0.3687, "step": 12184 }, { "epoch": 0.682327248292082, "grad_norm": 1.3559116125106812, "learning_rate": 6.089e-05, "loss": 0.4377, "step": 12185 }, { "epoch": 0.682383245604211, "grad_norm": 2.2175986766815186, "learning_rate": 6.0895e-05, "loss": 0.4603, "step": 12186 }, { "epoch": 0.6824392429163401, "grad_norm": 1.3152580261230469, "learning_rate": 6.09e-05, "loss": 0.4857, "step": 12187 }, { "epoch": 0.6824952402284691, "grad_norm": 1.3260953426361084, "learning_rate": 6.0905e-05, "loss": 0.4299, "step": 12188 }, { "epoch": 0.6825512375405981, "grad_norm": 1.5903900861740112, "learning_rate": 6.091e-05, "loss": 0.6495, "step": 12189 }, { "epoch": 0.6826072348527271, "grad_norm": 1.2022602558135986, "learning_rate": 6.0915e-05, "loss": 0.3198, "step": 12190 }, { "epoch": 0.6826632321648561, "grad_norm": 1.2887011766433716, "learning_rate": 6.092e-05, "loss": 0.409, "step": 12191 }, { "epoch": 0.6827192294769852, "grad_norm": 1.371610164642334, "learning_rate": 6.0925e-05, "loss": 0.4759, "step": 12192 }, { "epoch": 0.6827752267891142, "grad_norm": 1.2783170938491821, "learning_rate": 6.093e-05, "loss": 0.4126, "step": 12193 }, { "epoch": 0.6828312241012432, "grad_norm": 1.5165284872055054, "learning_rate": 6.0935e-05, "loss": 0.5952, "step": 12194 }, { "epoch": 0.6828872214133721, "grad_norm": 1.315489411354065, "learning_rate": 6.094000000000001e-05, "loss": 0.4228, "step": 12195 }, { "epoch": 0.6829432187255011, "grad_norm": 1.4239274263381958, "learning_rate": 6.094500000000001e-05, "loss": 0.5519, "step": 12196 }, { "epoch": 0.6829992160376301, "grad_norm": 1.2774584293365479, "learning_rate": 6.0950000000000004e-05, "loss": 0.3841, "step": 12197 }, { "epoch": 0.6830552133497592, "grad_norm": 1.2198424339294434, "learning_rate": 6.095500000000001e-05, "loss": 0.4392, "step": 12198 }, { "epoch": 0.6831112106618882, "grad_norm": 1.5404493808746338, "learning_rate": 6.0960000000000006e-05, "loss": 0.5669, "step": 12199 }, { "epoch": 0.6831672079740172, "grad_norm": 1.1638553142547607, "learning_rate": 6.0965e-05, "loss": 0.3687, "step": 12200 }, { "epoch": 0.6832232052861462, "grad_norm": 1.316155195236206, "learning_rate": 6.097000000000001e-05, "loss": 0.5432, "step": 12201 }, { "epoch": 0.6832792025982752, "grad_norm": 1.062378168106079, "learning_rate": 6.0975000000000005e-05, "loss": 0.3574, "step": 12202 }, { "epoch": 0.6833351999104043, "grad_norm": 1.0928003787994385, "learning_rate": 6.098e-05, "loss": 0.3067, "step": 12203 }, { "epoch": 0.6833911972225333, "grad_norm": 1.4202882051467896, "learning_rate": 6.0985000000000006e-05, "loss": 0.4275, "step": 12204 }, { "epoch": 0.6834471945346623, "grad_norm": 1.2420756816864014, "learning_rate": 6.0990000000000004e-05, "loss": 0.407, "step": 12205 }, { "epoch": 0.6835031918467913, "grad_norm": 1.6382828950881958, "learning_rate": 6.0995e-05, "loss": 0.8044, "step": 12206 }, { "epoch": 0.6835591891589203, "grad_norm": 1.3947349786758423, "learning_rate": 6.1e-05, "loss": 0.4024, "step": 12207 }, { "epoch": 0.6836151864710494, "grad_norm": 1.2955342531204224, "learning_rate": 6.1005e-05, "loss": 0.5735, "step": 12208 }, { "epoch": 0.6836711837831784, "grad_norm": 1.34746515750885, "learning_rate": 6.101e-05, "loss": 0.3859, "step": 12209 }, { "epoch": 0.6837271810953074, "grad_norm": 1.415920376777649, "learning_rate": 6.1015e-05, "loss": 0.5651, "step": 12210 }, { "epoch": 0.6837831784074364, "grad_norm": 1.3192853927612305, "learning_rate": 6.102e-05, "loss": 0.4725, "step": 12211 }, { "epoch": 0.6838391757195654, "grad_norm": 1.2164310216903687, "learning_rate": 6.1025e-05, "loss": 0.4347, "step": 12212 }, { "epoch": 0.6838951730316944, "grad_norm": 1.2607762813568115, "learning_rate": 6.103e-05, "loss": 0.4457, "step": 12213 }, { "epoch": 0.6839511703438235, "grad_norm": 1.4990837574005127, "learning_rate": 6.1035e-05, "loss": 0.5186, "step": 12214 }, { "epoch": 0.6840071676559525, "grad_norm": 1.424031376838684, "learning_rate": 6.104000000000001e-05, "loss": 0.4896, "step": 12215 }, { "epoch": 0.6840631649680815, "grad_norm": 1.4935369491577148, "learning_rate": 6.104500000000001e-05, "loss": 0.4456, "step": 12216 }, { "epoch": 0.6841191622802105, "grad_norm": 1.1517411470413208, "learning_rate": 6.105e-05, "loss": 0.4245, "step": 12217 }, { "epoch": 0.6841751595923395, "grad_norm": 1.3826349973678589, "learning_rate": 6.1055e-05, "loss": 0.5045, "step": 12218 }, { "epoch": 0.6842311569044686, "grad_norm": 2.0401902198791504, "learning_rate": 6.106e-05, "loss": 0.5802, "step": 12219 }, { "epoch": 0.6842871542165976, "grad_norm": 1.386125922203064, "learning_rate": 6.1065e-05, "loss": 0.3533, "step": 12220 }, { "epoch": 0.6843431515287266, "grad_norm": 1.3331139087677002, "learning_rate": 6.107000000000001e-05, "loss": 0.5166, "step": 12221 }, { "epoch": 0.6843991488408556, "grad_norm": 1.130191683769226, "learning_rate": 6.107500000000001e-05, "loss": 0.4323, "step": 12222 }, { "epoch": 0.6844551461529846, "grad_norm": 1.1950807571411133, "learning_rate": 6.108e-05, "loss": 0.4142, "step": 12223 }, { "epoch": 0.6845111434651137, "grad_norm": 1.4747769832611084, "learning_rate": 6.1085e-05, "loss": 0.505, "step": 12224 }, { "epoch": 0.6845671407772427, "grad_norm": 2.184282064437866, "learning_rate": 6.109e-05, "loss": 0.426, "step": 12225 }, { "epoch": 0.6846231380893717, "grad_norm": 1.0928298234939575, "learning_rate": 6.1095e-05, "loss": 0.3717, "step": 12226 }, { "epoch": 0.6846791354015007, "grad_norm": 1.3561042547225952, "learning_rate": 6.110000000000001e-05, "loss": 0.4217, "step": 12227 }, { "epoch": 0.6847351327136297, "grad_norm": 1.2690937519073486, "learning_rate": 6.1105e-05, "loss": 0.5044, "step": 12228 }, { "epoch": 0.6847911300257588, "grad_norm": 1.5557212829589844, "learning_rate": 6.111e-05, "loss": 0.4117, "step": 12229 }, { "epoch": 0.6848471273378878, "grad_norm": 1.4569201469421387, "learning_rate": 6.1115e-05, "loss": 0.5277, "step": 12230 }, { "epoch": 0.6849031246500168, "grad_norm": 1.5207160711288452, "learning_rate": 6.112e-05, "loss": 0.5777, "step": 12231 }, { "epoch": 0.6849591219621458, "grad_norm": 1.2555395364761353, "learning_rate": 6.1125e-05, "loss": 0.3153, "step": 12232 }, { "epoch": 0.6850151192742748, "grad_norm": 1.2674345970153809, "learning_rate": 6.112999999999999e-05, "loss": 0.4369, "step": 12233 }, { "epoch": 0.6850711165864038, "grad_norm": 1.3939169645309448, "learning_rate": 6.1135e-05, "loss": 0.5536, "step": 12234 }, { "epoch": 0.6851271138985329, "grad_norm": 1.5421286821365356, "learning_rate": 6.114000000000001e-05, "loss": 0.6619, "step": 12235 }, { "epoch": 0.6851831112106619, "grad_norm": 1.2183438539505005, "learning_rate": 6.114500000000001e-05, "loss": 0.412, "step": 12236 }, { "epoch": 0.6852391085227909, "grad_norm": 1.5787529945373535, "learning_rate": 6.115000000000001e-05, "loss": 0.4965, "step": 12237 }, { "epoch": 0.6852951058349199, "grad_norm": 1.439435362815857, "learning_rate": 6.1155e-05, "loss": 0.546, "step": 12238 }, { "epoch": 0.685351103147049, "grad_norm": 1.4464318752288818, "learning_rate": 6.116e-05, "loss": 0.4975, "step": 12239 }, { "epoch": 0.685407100459178, "grad_norm": 1.4630446434020996, "learning_rate": 6.1165e-05, "loss": 0.4319, "step": 12240 }, { "epoch": 0.685463097771307, "grad_norm": 1.3636311292648315, "learning_rate": 6.117e-05, "loss": 0.3872, "step": 12241 }, { "epoch": 0.685519095083436, "grad_norm": 1.2092517614364624, "learning_rate": 6.117500000000001e-05, "loss": 0.4466, "step": 12242 }, { "epoch": 0.685575092395565, "grad_norm": 1.2505483627319336, "learning_rate": 6.118000000000001e-05, "loss": 0.463, "step": 12243 }, { "epoch": 0.685631089707694, "grad_norm": 1.0488157272338867, "learning_rate": 6.1185e-05, "loss": 0.4061, "step": 12244 }, { "epoch": 0.6856870870198231, "grad_norm": 1.2378517389297485, "learning_rate": 6.119e-05, "loss": 0.4247, "step": 12245 }, { "epoch": 0.6857430843319521, "grad_norm": 1.3138020038604736, "learning_rate": 6.1195e-05, "loss": 0.4243, "step": 12246 }, { "epoch": 0.6857990816440811, "grad_norm": 1.1253302097320557, "learning_rate": 6.12e-05, "loss": 0.449, "step": 12247 }, { "epoch": 0.6858550789562101, "grad_norm": 1.4977877140045166, "learning_rate": 6.120500000000001e-05, "loss": 0.5941, "step": 12248 }, { "epoch": 0.6859110762683391, "grad_norm": 1.7441450357437134, "learning_rate": 6.121e-05, "loss": 0.4848, "step": 12249 }, { "epoch": 0.6859670735804682, "grad_norm": 1.2781704664230347, "learning_rate": 6.1215e-05, "loss": 0.5405, "step": 12250 }, { "epoch": 0.6860230708925972, "grad_norm": 1.3380956649780273, "learning_rate": 6.122e-05, "loss": 0.446, "step": 12251 }, { "epoch": 0.6860790682047262, "grad_norm": 1.4156084060668945, "learning_rate": 6.1225e-05, "loss": 0.6244, "step": 12252 }, { "epoch": 0.6861350655168552, "grad_norm": 1.3929370641708374, "learning_rate": 6.123e-05, "loss": 0.3996, "step": 12253 }, { "epoch": 0.6861910628289842, "grad_norm": 1.4578877687454224, "learning_rate": 6.123499999999999e-05, "loss": 0.5644, "step": 12254 }, { "epoch": 0.6862470601411133, "grad_norm": 1.2098445892333984, "learning_rate": 6.124e-05, "loss": 0.4117, "step": 12255 }, { "epoch": 0.6863030574532423, "grad_norm": 0.9289770722389221, "learning_rate": 6.124500000000001e-05, "loss": 0.2874, "step": 12256 }, { "epoch": 0.6863590547653713, "grad_norm": 1.2058159112930298, "learning_rate": 6.125000000000001e-05, "loss": 0.4046, "step": 12257 }, { "epoch": 0.6864150520775003, "grad_norm": 1.042585015296936, "learning_rate": 6.125500000000001e-05, "loss": 0.4132, "step": 12258 }, { "epoch": 0.6864710493896293, "grad_norm": 1.7090892791748047, "learning_rate": 6.126e-05, "loss": 0.4091, "step": 12259 }, { "epoch": 0.6865270467017583, "grad_norm": 1.4218131303787231, "learning_rate": 6.1265e-05, "loss": 0.4224, "step": 12260 }, { "epoch": 0.6865830440138874, "grad_norm": 1.2966798543930054, "learning_rate": 6.127e-05, "loss": 0.3678, "step": 12261 }, { "epoch": 0.6866390413260164, "grad_norm": 1.028228759765625, "learning_rate": 6.1275e-05, "loss": 0.3057, "step": 12262 }, { "epoch": 0.6866950386381454, "grad_norm": 1.1287555694580078, "learning_rate": 6.128000000000001e-05, "loss": 0.3543, "step": 12263 }, { "epoch": 0.6867510359502744, "grad_norm": 1.1865333318710327, "learning_rate": 6.128500000000001e-05, "loss": 0.5076, "step": 12264 }, { "epoch": 0.6868070332624034, "grad_norm": 1.2991281747817993, "learning_rate": 6.129e-05, "loss": 0.3995, "step": 12265 }, { "epoch": 0.6868630305745325, "grad_norm": 1.4616847038269043, "learning_rate": 6.1295e-05, "loss": 0.4717, "step": 12266 }, { "epoch": 0.6869190278866615, "grad_norm": 1.5509908199310303, "learning_rate": 6.13e-05, "loss": 0.4697, "step": 12267 }, { "epoch": 0.6869750251987905, "grad_norm": 1.568916916847229, "learning_rate": 6.1305e-05, "loss": 0.6279, "step": 12268 }, { "epoch": 0.6870310225109195, "grad_norm": 1.1783875226974487, "learning_rate": 6.131e-05, "loss": 0.3793, "step": 12269 }, { "epoch": 0.6870870198230485, "grad_norm": 1.2726467847824097, "learning_rate": 6.1315e-05, "loss": 0.5254, "step": 12270 }, { "epoch": 0.6871430171351776, "grad_norm": 1.444631576538086, "learning_rate": 6.132e-05, "loss": 0.4486, "step": 12271 }, { "epoch": 0.6871990144473066, "grad_norm": 32.72178649902344, "learning_rate": 6.1325e-05, "loss": 0.5434, "step": 12272 }, { "epoch": 0.6872550117594356, "grad_norm": 1.4250762462615967, "learning_rate": 6.133e-05, "loss": 0.4798, "step": 12273 }, { "epoch": 0.6873110090715646, "grad_norm": 1.2289494276046753, "learning_rate": 6.1335e-05, "loss": 0.4453, "step": 12274 }, { "epoch": 0.6873670063836936, "grad_norm": 1.4113556146621704, "learning_rate": 6.133999999999999e-05, "loss": 0.4163, "step": 12275 }, { "epoch": 0.6874230036958227, "grad_norm": 1.1996521949768066, "learning_rate": 6.1345e-05, "loss": 0.3363, "step": 12276 }, { "epoch": 0.6874790010079517, "grad_norm": 1.3179504871368408, "learning_rate": 6.135000000000001e-05, "loss": 0.4021, "step": 12277 }, { "epoch": 0.6875349983200806, "grad_norm": 1.270889163017273, "learning_rate": 6.135500000000001e-05, "loss": 0.4603, "step": 12278 }, { "epoch": 0.6875909956322096, "grad_norm": 1.40614652633667, "learning_rate": 6.136000000000001e-05, "loss": 0.5777, "step": 12279 }, { "epoch": 0.6876469929443386, "grad_norm": 1.3910115957260132, "learning_rate": 6.1365e-05, "loss": 0.4963, "step": 12280 }, { "epoch": 0.6877029902564676, "grad_norm": 1.5937877893447876, "learning_rate": 6.137e-05, "loss": 0.4653, "step": 12281 }, { "epoch": 0.6877589875685967, "grad_norm": 1.1973016262054443, "learning_rate": 6.1375e-05, "loss": 0.4001, "step": 12282 }, { "epoch": 0.6878149848807257, "grad_norm": 1.4405215978622437, "learning_rate": 6.138e-05, "loss": 0.5029, "step": 12283 }, { "epoch": 0.6878709821928547, "grad_norm": 1.4870195388793945, "learning_rate": 6.138500000000001e-05, "loss": 0.5531, "step": 12284 }, { "epoch": 0.6879269795049837, "grad_norm": 1.3453369140625, "learning_rate": 6.139000000000001e-05, "loss": 0.4839, "step": 12285 }, { "epoch": 0.6879829768171127, "grad_norm": 1.1019184589385986, "learning_rate": 6.1395e-05, "loss": 0.338, "step": 12286 }, { "epoch": 0.6880389741292418, "grad_norm": 1.1243878602981567, "learning_rate": 6.14e-05, "loss": 0.3211, "step": 12287 }, { "epoch": 0.6880949714413708, "grad_norm": 1.2816708087921143, "learning_rate": 6.1405e-05, "loss": 0.468, "step": 12288 }, { "epoch": 0.6881509687534998, "grad_norm": 1.57627534866333, "learning_rate": 6.141e-05, "loss": 0.6315, "step": 12289 }, { "epoch": 0.6882069660656288, "grad_norm": 1.2261550426483154, "learning_rate": 6.1415e-05, "loss": 0.3907, "step": 12290 }, { "epoch": 0.6882629633777578, "grad_norm": 1.2663525342941284, "learning_rate": 6.142e-05, "loss": 0.3551, "step": 12291 }, { "epoch": 0.6883189606898868, "grad_norm": 1.3640536069869995, "learning_rate": 6.1425e-05, "loss": 0.4819, "step": 12292 }, { "epoch": 0.6883749580020159, "grad_norm": 1.5169423818588257, "learning_rate": 6.143e-05, "loss": 0.4374, "step": 12293 }, { "epoch": 0.6884309553141449, "grad_norm": 1.093767523765564, "learning_rate": 6.1435e-05, "loss": 0.4196, "step": 12294 }, { "epoch": 0.6884869526262739, "grad_norm": 2.810868501663208, "learning_rate": 6.144e-05, "loss": 0.3948, "step": 12295 }, { "epoch": 0.6885429499384029, "grad_norm": 1.4540945291519165, "learning_rate": 6.1445e-05, "loss": 0.4799, "step": 12296 }, { "epoch": 0.688598947250532, "grad_norm": 1.3927223682403564, "learning_rate": 6.145e-05, "loss": 0.3329, "step": 12297 }, { "epoch": 0.688654944562661, "grad_norm": 1.3568576574325562, "learning_rate": 6.1455e-05, "loss": 0.42, "step": 12298 }, { "epoch": 0.68871094187479, "grad_norm": 1.440532922744751, "learning_rate": 6.146000000000001e-05, "loss": 0.6115, "step": 12299 }, { "epoch": 0.688766939186919, "grad_norm": 1.3961313962936401, "learning_rate": 6.146500000000001e-05, "loss": 0.4438, "step": 12300 }, { "epoch": 0.688822936499048, "grad_norm": 1.181216835975647, "learning_rate": 6.147e-05, "loss": 0.4751, "step": 12301 }, { "epoch": 0.688878933811177, "grad_norm": 1.2675639390945435, "learning_rate": 6.1475e-05, "loss": 0.3858, "step": 12302 }, { "epoch": 0.6889349311233061, "grad_norm": 1.2192527055740356, "learning_rate": 6.148e-05, "loss": 0.375, "step": 12303 }, { "epoch": 0.6889909284354351, "grad_norm": 1.0635168552398682, "learning_rate": 6.1485e-05, "loss": 0.4258, "step": 12304 }, { "epoch": 0.6890469257475641, "grad_norm": 1.2519820928573608, "learning_rate": 6.149000000000001e-05, "loss": 0.414, "step": 12305 }, { "epoch": 0.6891029230596931, "grad_norm": 1.3190950155258179, "learning_rate": 6.1495e-05, "loss": 0.386, "step": 12306 }, { "epoch": 0.6891589203718221, "grad_norm": 1.805222988128662, "learning_rate": 6.15e-05, "loss": 0.5352, "step": 12307 }, { "epoch": 0.6892149176839512, "grad_norm": 1.282267451286316, "learning_rate": 6.1505e-05, "loss": 0.3987, "step": 12308 }, { "epoch": 0.6892709149960802, "grad_norm": 1.27260160446167, "learning_rate": 6.151e-05, "loss": 0.5293, "step": 12309 }, { "epoch": 0.6893269123082092, "grad_norm": 1.2897119522094727, "learning_rate": 6.1515e-05, "loss": 0.3931, "step": 12310 }, { "epoch": 0.6893829096203382, "grad_norm": 1.5478432178497314, "learning_rate": 6.152e-05, "loss": 0.5605, "step": 12311 }, { "epoch": 0.6894389069324672, "grad_norm": 1.4531878232955933, "learning_rate": 6.1525e-05, "loss": 0.4144, "step": 12312 }, { "epoch": 0.6894949042445963, "grad_norm": 1.2213512659072876, "learning_rate": 6.153e-05, "loss": 0.3277, "step": 12313 }, { "epoch": 0.6895509015567253, "grad_norm": 1.3131664991378784, "learning_rate": 6.1535e-05, "loss": 0.4082, "step": 12314 }, { "epoch": 0.6896068988688543, "grad_norm": 1.3242067098617554, "learning_rate": 6.154e-05, "loss": 0.3955, "step": 12315 }, { "epoch": 0.6896628961809833, "grad_norm": 1.5959453582763672, "learning_rate": 6.154500000000001e-05, "loss": 0.5431, "step": 12316 }, { "epoch": 0.6897188934931123, "grad_norm": 1.1596336364746094, "learning_rate": 6.155e-05, "loss": 0.5232, "step": 12317 }, { "epoch": 0.6897748908052413, "grad_norm": 1.3719199895858765, "learning_rate": 6.1555e-05, "loss": 0.5622, "step": 12318 }, { "epoch": 0.6898308881173704, "grad_norm": 1.2654657363891602, "learning_rate": 6.156e-05, "loss": 0.4272, "step": 12319 }, { "epoch": 0.6898868854294994, "grad_norm": 1.224621295928955, "learning_rate": 6.156500000000001e-05, "loss": 0.3971, "step": 12320 }, { "epoch": 0.6899428827416284, "grad_norm": 1.293892502784729, "learning_rate": 6.157000000000001e-05, "loss": 0.5648, "step": 12321 }, { "epoch": 0.6899988800537574, "grad_norm": 1.255736231803894, "learning_rate": 6.1575e-05, "loss": 0.4034, "step": 12322 }, { "epoch": 0.6900548773658864, "grad_norm": 1.3798484802246094, "learning_rate": 6.158e-05, "loss": 0.3675, "step": 12323 }, { "epoch": 0.6901108746780155, "grad_norm": 1.1086690425872803, "learning_rate": 6.1585e-05, "loss": 0.3527, "step": 12324 }, { "epoch": 0.6901668719901445, "grad_norm": 1.802785873413086, "learning_rate": 6.159e-05, "loss": 0.3634, "step": 12325 }, { "epoch": 0.6902228693022735, "grad_norm": 1.3808271884918213, "learning_rate": 6.159500000000001e-05, "loss": 0.4957, "step": 12326 }, { "epoch": 0.6902788666144025, "grad_norm": 1.555474877357483, "learning_rate": 6.16e-05, "loss": 0.5661, "step": 12327 }, { "epoch": 0.6903348639265315, "grad_norm": 7.169306755065918, "learning_rate": 6.1605e-05, "loss": 0.4269, "step": 12328 }, { "epoch": 0.6903908612386606, "grad_norm": 1.2660406827926636, "learning_rate": 6.161e-05, "loss": 0.4906, "step": 12329 }, { "epoch": 0.6904468585507896, "grad_norm": 1.4176205396652222, "learning_rate": 6.1615e-05, "loss": 0.5224, "step": 12330 }, { "epoch": 0.6905028558629186, "grad_norm": 1.3572801351547241, "learning_rate": 6.162e-05, "loss": 0.483, "step": 12331 }, { "epoch": 0.6905588531750476, "grad_norm": 1.1732454299926758, "learning_rate": 6.1625e-05, "loss": 0.3916, "step": 12332 }, { "epoch": 0.6906148504871766, "grad_norm": 1.292445182800293, "learning_rate": 6.163e-05, "loss": 0.4665, "step": 12333 }, { "epoch": 0.6906708477993057, "grad_norm": 1.5300066471099854, "learning_rate": 6.1635e-05, "loss": 0.607, "step": 12334 }, { "epoch": 0.6907268451114347, "grad_norm": 1.125780701637268, "learning_rate": 6.164e-05, "loss": 0.3816, "step": 12335 }, { "epoch": 0.6907828424235637, "grad_norm": 1.141910433769226, "learning_rate": 6.164500000000001e-05, "loss": 0.4099, "step": 12336 }, { "epoch": 0.6908388397356927, "grad_norm": 1.2341314554214478, "learning_rate": 6.165000000000001e-05, "loss": 0.4981, "step": 12337 }, { "epoch": 0.6908948370478217, "grad_norm": 1.4774041175842285, "learning_rate": 6.1655e-05, "loss": 0.5521, "step": 12338 }, { "epoch": 0.6909508343599507, "grad_norm": 1.1159309148788452, "learning_rate": 6.166e-05, "loss": 0.3608, "step": 12339 }, { "epoch": 0.6910068316720798, "grad_norm": 1.3648525476455688, "learning_rate": 6.1665e-05, "loss": 0.4344, "step": 12340 }, { "epoch": 0.6910628289842088, "grad_norm": 1.148815631866455, "learning_rate": 6.167000000000001e-05, "loss": 0.4819, "step": 12341 }, { "epoch": 0.6911188262963378, "grad_norm": 1.500679612159729, "learning_rate": 6.167500000000001e-05, "loss": 0.6123, "step": 12342 }, { "epoch": 0.6911748236084668, "grad_norm": 1.464145541191101, "learning_rate": 6.168e-05, "loss": 0.5013, "step": 12343 }, { "epoch": 0.6912308209205958, "grad_norm": 1.5020862817764282, "learning_rate": 6.1685e-05, "loss": 0.4593, "step": 12344 }, { "epoch": 0.6912868182327249, "grad_norm": 1.1385457515716553, "learning_rate": 6.169e-05, "loss": 0.387, "step": 12345 }, { "epoch": 0.6913428155448539, "grad_norm": 1.4682669639587402, "learning_rate": 6.1695e-05, "loss": 0.432, "step": 12346 }, { "epoch": 0.6913988128569829, "grad_norm": 1.1639906167984009, "learning_rate": 6.170000000000001e-05, "loss": 0.3979, "step": 12347 }, { "epoch": 0.6914548101691119, "grad_norm": 2.324922561645508, "learning_rate": 6.1705e-05, "loss": 0.4675, "step": 12348 }, { "epoch": 0.6915108074812409, "grad_norm": 1.420125126838684, "learning_rate": 6.171e-05, "loss": 0.5791, "step": 12349 }, { "epoch": 0.69156680479337, "grad_norm": 1.4984275102615356, "learning_rate": 6.1715e-05, "loss": 0.5217, "step": 12350 }, { "epoch": 0.691622802105499, "grad_norm": 1.2690491676330566, "learning_rate": 6.172e-05, "loss": 0.4017, "step": 12351 }, { "epoch": 0.691678799417628, "grad_norm": 1.1910409927368164, "learning_rate": 6.1725e-05, "loss": 0.3263, "step": 12352 }, { "epoch": 0.691734796729757, "grad_norm": 3.2725179195404053, "learning_rate": 6.173e-05, "loss": 0.6993, "step": 12353 }, { "epoch": 0.691790794041886, "grad_norm": 1.2835434675216675, "learning_rate": 6.1735e-05, "loss": 0.467, "step": 12354 }, { "epoch": 0.6918467913540151, "grad_norm": 1.5669128894805908, "learning_rate": 6.174e-05, "loss": 0.4122, "step": 12355 }, { "epoch": 0.6919027886661441, "grad_norm": 1.442619800567627, "learning_rate": 6.174500000000001e-05, "loss": 0.4024, "step": 12356 }, { "epoch": 0.6919587859782731, "grad_norm": 1.4724701642990112, "learning_rate": 6.175000000000001e-05, "loss": 0.4798, "step": 12357 }, { "epoch": 0.6920147832904021, "grad_norm": 1.193954348564148, "learning_rate": 6.175500000000001e-05, "loss": 0.3632, "step": 12358 }, { "epoch": 0.6920707806025311, "grad_norm": 1.225625991821289, "learning_rate": 6.176e-05, "loss": 0.3221, "step": 12359 }, { "epoch": 0.6921267779146602, "grad_norm": 1.286931037902832, "learning_rate": 6.1765e-05, "loss": 0.417, "step": 12360 }, { "epoch": 0.6921827752267891, "grad_norm": 1.1550440788269043, "learning_rate": 6.177e-05, "loss": 0.3877, "step": 12361 }, { "epoch": 0.6922387725389181, "grad_norm": 1.173991084098816, "learning_rate": 6.177500000000001e-05, "loss": 0.4283, "step": 12362 }, { "epoch": 0.6922947698510471, "grad_norm": 1.178902268409729, "learning_rate": 6.178000000000001e-05, "loss": 0.4929, "step": 12363 }, { "epoch": 0.6923507671631761, "grad_norm": 1.4234992265701294, "learning_rate": 6.1785e-05, "loss": 0.4473, "step": 12364 }, { "epoch": 0.6924067644753051, "grad_norm": 1.3138865232467651, "learning_rate": 6.179e-05, "loss": 0.4019, "step": 12365 }, { "epoch": 0.6924627617874342, "grad_norm": 1.3836005926132202, "learning_rate": 6.1795e-05, "loss": 0.4197, "step": 12366 }, { "epoch": 0.6925187590995632, "grad_norm": 1.3653817176818848, "learning_rate": 6.18e-05, "loss": 0.422, "step": 12367 }, { "epoch": 0.6925747564116922, "grad_norm": 5.243404865264893, "learning_rate": 6.1805e-05, "loss": 0.462, "step": 12368 }, { "epoch": 0.6926307537238212, "grad_norm": 1.4292103052139282, "learning_rate": 6.181e-05, "loss": 0.5952, "step": 12369 }, { "epoch": 0.6926867510359502, "grad_norm": 1.1659621000289917, "learning_rate": 6.1815e-05, "loss": 0.4945, "step": 12370 }, { "epoch": 0.6927427483480793, "grad_norm": 1.1917778253555298, "learning_rate": 6.182e-05, "loss": 0.3642, "step": 12371 }, { "epoch": 0.6927987456602083, "grad_norm": 0.9862884283065796, "learning_rate": 6.1825e-05, "loss": 0.3606, "step": 12372 }, { "epoch": 0.6928547429723373, "grad_norm": 1.9535349607467651, "learning_rate": 6.183e-05, "loss": 0.4091, "step": 12373 }, { "epoch": 0.6929107402844663, "grad_norm": 1.199156403541565, "learning_rate": 6.1835e-05, "loss": 0.3657, "step": 12374 }, { "epoch": 0.6929667375965953, "grad_norm": 1.1949660778045654, "learning_rate": 6.184e-05, "loss": 0.4283, "step": 12375 }, { "epoch": 0.6930227349087243, "grad_norm": 1.4052292108535767, "learning_rate": 6.184500000000002e-05, "loss": 0.3788, "step": 12376 }, { "epoch": 0.6930787322208534, "grad_norm": 1.4408352375030518, "learning_rate": 6.185000000000001e-05, "loss": 0.4129, "step": 12377 }, { "epoch": 0.6931347295329824, "grad_norm": 1.9941606521606445, "learning_rate": 6.185500000000001e-05, "loss": 0.5218, "step": 12378 }, { "epoch": 0.6931907268451114, "grad_norm": 1.2464509010314941, "learning_rate": 6.186000000000001e-05, "loss": 0.4011, "step": 12379 }, { "epoch": 0.6932467241572404, "grad_norm": 1.2264740467071533, "learning_rate": 6.1865e-05, "loss": 0.3415, "step": 12380 }, { "epoch": 0.6933027214693694, "grad_norm": 1.244165062904358, "learning_rate": 6.187e-05, "loss": 0.4134, "step": 12381 }, { "epoch": 0.6933587187814985, "grad_norm": 1.3536137342453003, "learning_rate": 6.1875e-05, "loss": 0.5228, "step": 12382 }, { "epoch": 0.6934147160936275, "grad_norm": 1.158705472946167, "learning_rate": 6.188000000000001e-05, "loss": 0.3152, "step": 12383 }, { "epoch": 0.6934707134057565, "grad_norm": 1.3614567518234253, "learning_rate": 6.188500000000001e-05, "loss": 0.4177, "step": 12384 }, { "epoch": 0.6935267107178855, "grad_norm": 1.2631393671035767, "learning_rate": 6.189e-05, "loss": 0.3932, "step": 12385 }, { "epoch": 0.6935827080300145, "grad_norm": 1.2913925647735596, "learning_rate": 6.1895e-05, "loss": 0.4944, "step": 12386 }, { "epoch": 0.6936387053421436, "grad_norm": 1.3278769254684448, "learning_rate": 6.19e-05, "loss": 0.4233, "step": 12387 }, { "epoch": 0.6936947026542726, "grad_norm": 1.3033205270767212, "learning_rate": 6.1905e-05, "loss": 0.4485, "step": 12388 }, { "epoch": 0.6937506999664016, "grad_norm": 1.3875821828842163, "learning_rate": 6.191e-05, "loss": 0.5035, "step": 12389 }, { "epoch": 0.6938066972785306, "grad_norm": 1.3573859930038452, "learning_rate": 6.1915e-05, "loss": 0.4555, "step": 12390 }, { "epoch": 0.6938626945906596, "grad_norm": 1.2488489151000977, "learning_rate": 6.192e-05, "loss": 0.4228, "step": 12391 }, { "epoch": 0.6939186919027887, "grad_norm": 1.43985915184021, "learning_rate": 6.1925e-05, "loss": 0.5246, "step": 12392 }, { "epoch": 0.6939746892149177, "grad_norm": 1.0981749296188354, "learning_rate": 6.193e-05, "loss": 0.377, "step": 12393 }, { "epoch": 0.6940306865270467, "grad_norm": 1.3188191652297974, "learning_rate": 6.1935e-05, "loss": 0.3934, "step": 12394 }, { "epoch": 0.6940866838391757, "grad_norm": 1.3046042919158936, "learning_rate": 6.193999999999999e-05, "loss": 0.5209, "step": 12395 }, { "epoch": 0.6941426811513047, "grad_norm": 1.3684697151184082, "learning_rate": 6.1945e-05, "loss": 0.529, "step": 12396 }, { "epoch": 0.6941986784634337, "grad_norm": 49.712005615234375, "learning_rate": 6.195e-05, "loss": 0.5506, "step": 12397 }, { "epoch": 0.6942546757755628, "grad_norm": 1.3110227584838867, "learning_rate": 6.195500000000001e-05, "loss": 0.4521, "step": 12398 }, { "epoch": 0.6943106730876918, "grad_norm": 1.4124492406845093, "learning_rate": 6.196000000000001e-05, "loss": 0.6361, "step": 12399 }, { "epoch": 0.6943666703998208, "grad_norm": 1.3802233934402466, "learning_rate": 6.196500000000001e-05, "loss": 0.5113, "step": 12400 }, { "epoch": 0.6944226677119498, "grad_norm": 1.4428071975708008, "learning_rate": 6.197e-05, "loss": 0.5623, "step": 12401 }, { "epoch": 0.6944786650240788, "grad_norm": 1.30404531955719, "learning_rate": 6.1975e-05, "loss": 0.5918, "step": 12402 }, { "epoch": 0.6945346623362079, "grad_norm": 1.4471380710601807, "learning_rate": 6.198e-05, "loss": 0.4736, "step": 12403 }, { "epoch": 0.6945906596483369, "grad_norm": 1.5136996507644653, "learning_rate": 6.198500000000001e-05, "loss": 0.5243, "step": 12404 }, { "epoch": 0.6946466569604659, "grad_norm": 1.5524667501449585, "learning_rate": 6.199000000000001e-05, "loss": 0.4637, "step": 12405 }, { "epoch": 0.6947026542725949, "grad_norm": 10.976086616516113, "learning_rate": 6.1995e-05, "loss": 0.3784, "step": 12406 }, { "epoch": 0.6947586515847239, "grad_norm": 1.4833440780639648, "learning_rate": 6.2e-05, "loss": 0.6505, "step": 12407 }, { "epoch": 0.694814648896853, "grad_norm": 1.3871524333953857, "learning_rate": 6.2005e-05, "loss": 0.4584, "step": 12408 }, { "epoch": 0.694870646208982, "grad_norm": 1.3627489805221558, "learning_rate": 6.201e-05, "loss": 0.3724, "step": 12409 }, { "epoch": 0.694926643521111, "grad_norm": 1.328230381011963, "learning_rate": 6.2015e-05, "loss": 0.3918, "step": 12410 }, { "epoch": 0.69498264083324, "grad_norm": 1.4259014129638672, "learning_rate": 6.202e-05, "loss": 0.3789, "step": 12411 }, { "epoch": 0.695038638145369, "grad_norm": 1.0605638027191162, "learning_rate": 6.2025e-05, "loss": 0.3887, "step": 12412 }, { "epoch": 0.6950946354574981, "grad_norm": 1.4853936433792114, "learning_rate": 6.203e-05, "loss": 0.5887, "step": 12413 }, { "epoch": 0.6951506327696271, "grad_norm": 1.384651780128479, "learning_rate": 6.2035e-05, "loss": 0.6094, "step": 12414 }, { "epoch": 0.6952066300817561, "grad_norm": 1.2497036457061768, "learning_rate": 6.204e-05, "loss": 0.3074, "step": 12415 }, { "epoch": 0.6952626273938851, "grad_norm": 1.0918927192687988, "learning_rate": 6.204499999999999e-05, "loss": 0.3625, "step": 12416 }, { "epoch": 0.6953186247060141, "grad_norm": 1.1047202348709106, "learning_rate": 6.205e-05, "loss": 0.3787, "step": 12417 }, { "epoch": 0.6953746220181432, "grad_norm": 1.1791718006134033, "learning_rate": 6.2055e-05, "loss": 0.4311, "step": 12418 }, { "epoch": 0.6954306193302722, "grad_norm": 1.9835759401321411, "learning_rate": 6.206000000000001e-05, "loss": 0.5259, "step": 12419 }, { "epoch": 0.6954866166424012, "grad_norm": 1.4982942342758179, "learning_rate": 6.206500000000001e-05, "loss": 0.4543, "step": 12420 }, { "epoch": 0.6955426139545302, "grad_norm": 1.2310798168182373, "learning_rate": 6.207000000000001e-05, "loss": 0.4151, "step": 12421 }, { "epoch": 0.6955986112666592, "grad_norm": 1.3420618772506714, "learning_rate": 6.2075e-05, "loss": 0.4868, "step": 12422 }, { "epoch": 0.6956546085787882, "grad_norm": 1.1320631504058838, "learning_rate": 6.208e-05, "loss": 0.3478, "step": 12423 }, { "epoch": 0.6957106058909173, "grad_norm": 1.4887796640396118, "learning_rate": 6.2085e-05, "loss": 0.613, "step": 12424 }, { "epoch": 0.6957666032030463, "grad_norm": 1.5807126760482788, "learning_rate": 6.209000000000001e-05, "loss": 0.4946, "step": 12425 }, { "epoch": 0.6958226005151753, "grad_norm": 1.3650822639465332, "learning_rate": 6.209500000000001e-05, "loss": 0.4914, "step": 12426 }, { "epoch": 0.6958785978273043, "grad_norm": 1.8704893589019775, "learning_rate": 6.21e-05, "loss": 0.3987, "step": 12427 }, { "epoch": 0.6959345951394333, "grad_norm": 1.4728385210037231, "learning_rate": 6.2105e-05, "loss": 0.5204, "step": 12428 }, { "epoch": 0.6959905924515624, "grad_norm": 1.3263381719589233, "learning_rate": 6.211e-05, "loss": 0.4777, "step": 12429 }, { "epoch": 0.6960465897636914, "grad_norm": 1.2977200746536255, "learning_rate": 6.2115e-05, "loss": 0.3269, "step": 12430 }, { "epoch": 0.6961025870758204, "grad_norm": 1.1716684103012085, "learning_rate": 6.212e-05, "loss": 0.4632, "step": 12431 }, { "epoch": 0.6961585843879494, "grad_norm": 1.213047981262207, "learning_rate": 6.2125e-05, "loss": 0.5351, "step": 12432 }, { "epoch": 0.6962145817000784, "grad_norm": 1.2444779872894287, "learning_rate": 6.213e-05, "loss": 0.3439, "step": 12433 }, { "epoch": 0.6962705790122075, "grad_norm": 1.1619069576263428, "learning_rate": 6.2135e-05, "loss": 0.4513, "step": 12434 }, { "epoch": 0.6963265763243365, "grad_norm": 1.0866138935089111, "learning_rate": 6.214e-05, "loss": 0.3634, "step": 12435 }, { "epoch": 0.6963825736364655, "grad_norm": 1.5795514583587646, "learning_rate": 6.2145e-05, "loss": 0.6332, "step": 12436 }, { "epoch": 0.6964385709485945, "grad_norm": 1.434275507926941, "learning_rate": 6.215e-05, "loss": 0.3181, "step": 12437 }, { "epoch": 0.6964945682607235, "grad_norm": 1.4398802518844604, "learning_rate": 6.2155e-05, "loss": 0.4559, "step": 12438 }, { "epoch": 0.6965505655728526, "grad_norm": 1.1475560665130615, "learning_rate": 6.216e-05, "loss": 0.3817, "step": 12439 }, { "epoch": 0.6966065628849816, "grad_norm": 1.1969971656799316, "learning_rate": 6.216500000000001e-05, "loss": 0.4641, "step": 12440 }, { "epoch": 0.6966625601971106, "grad_norm": 1.2024345397949219, "learning_rate": 6.217000000000001e-05, "loss": 0.4557, "step": 12441 }, { "epoch": 0.6967185575092396, "grad_norm": 1.4936630725860596, "learning_rate": 6.217500000000001e-05, "loss": 0.4337, "step": 12442 }, { "epoch": 0.6967745548213685, "grad_norm": 1.4987143278121948, "learning_rate": 6.218e-05, "loss": 0.4671, "step": 12443 }, { "epoch": 0.6968305521334975, "grad_norm": 1.2767128944396973, "learning_rate": 6.2185e-05, "loss": 0.4376, "step": 12444 }, { "epoch": 0.6968865494456266, "grad_norm": 1.389953851699829, "learning_rate": 6.219e-05, "loss": 0.3997, "step": 12445 }, { "epoch": 0.6969425467577556, "grad_norm": 1.530099868774414, "learning_rate": 6.2195e-05, "loss": 0.3824, "step": 12446 }, { "epoch": 0.6969985440698846, "grad_norm": 1.2455320358276367, "learning_rate": 6.220000000000001e-05, "loss": 0.5118, "step": 12447 }, { "epoch": 0.6970545413820136, "grad_norm": 1.564921259880066, "learning_rate": 6.2205e-05, "loss": 0.5814, "step": 12448 }, { "epoch": 0.6971105386941426, "grad_norm": 1.4247167110443115, "learning_rate": 6.221e-05, "loss": 0.35, "step": 12449 }, { "epoch": 0.6971665360062717, "grad_norm": 1.2676891088485718, "learning_rate": 6.2215e-05, "loss": 0.2909, "step": 12450 }, { "epoch": 0.6972225333184007, "grad_norm": 1.6524531841278076, "learning_rate": 6.222e-05, "loss": 0.4867, "step": 12451 }, { "epoch": 0.6972785306305297, "grad_norm": 1.3316035270690918, "learning_rate": 6.2225e-05, "loss": 0.383, "step": 12452 }, { "epoch": 0.6973345279426587, "grad_norm": 1.362134337425232, "learning_rate": 6.223e-05, "loss": 0.4233, "step": 12453 }, { "epoch": 0.6973905252547877, "grad_norm": 1.4702951908111572, "learning_rate": 6.2235e-05, "loss": 0.4932, "step": 12454 }, { "epoch": 0.6974465225669167, "grad_norm": 1.5142104625701904, "learning_rate": 6.224e-05, "loss": 0.4368, "step": 12455 }, { "epoch": 0.6975025198790458, "grad_norm": 1.3793469667434692, "learning_rate": 6.2245e-05, "loss": 0.3769, "step": 12456 }, { "epoch": 0.6975585171911748, "grad_norm": 1.3909077644348145, "learning_rate": 6.225000000000001e-05, "loss": 0.4614, "step": 12457 }, { "epoch": 0.6976145145033038, "grad_norm": 1.0924532413482666, "learning_rate": 6.2255e-05, "loss": 0.4464, "step": 12458 }, { "epoch": 0.6976705118154328, "grad_norm": 1.3595489263534546, "learning_rate": 6.226e-05, "loss": 0.3865, "step": 12459 }, { "epoch": 0.6977265091275618, "grad_norm": 1.1982531547546387, "learning_rate": 6.2265e-05, "loss": 0.3582, "step": 12460 }, { "epoch": 0.6977825064396909, "grad_norm": 1.8145815134048462, "learning_rate": 6.227000000000001e-05, "loss": 0.5622, "step": 12461 }, { "epoch": 0.6978385037518199, "grad_norm": 1.2581560611724854, "learning_rate": 6.227500000000001e-05, "loss": 0.3973, "step": 12462 }, { "epoch": 0.6978945010639489, "grad_norm": 1.8066720962524414, "learning_rate": 6.228000000000001e-05, "loss": 0.4065, "step": 12463 }, { "epoch": 0.6979504983760779, "grad_norm": 1.3089772462844849, "learning_rate": 6.2285e-05, "loss": 0.5027, "step": 12464 }, { "epoch": 0.6980064956882069, "grad_norm": 1.2001739740371704, "learning_rate": 6.229e-05, "loss": 0.4057, "step": 12465 }, { "epoch": 0.698062493000336, "grad_norm": 1.4888426065444946, "learning_rate": 6.2295e-05, "loss": 0.5365, "step": 12466 }, { "epoch": 0.698118490312465, "grad_norm": 1.1244386434555054, "learning_rate": 6.23e-05, "loss": 0.4614, "step": 12467 }, { "epoch": 0.698174487624594, "grad_norm": 1.2285563945770264, "learning_rate": 6.230500000000001e-05, "loss": 0.4636, "step": 12468 }, { "epoch": 0.698230484936723, "grad_norm": 1.3267616033554077, "learning_rate": 6.231e-05, "loss": 0.4245, "step": 12469 }, { "epoch": 0.698286482248852, "grad_norm": 1.3063656091690063, "learning_rate": 6.2315e-05, "loss": 0.3789, "step": 12470 }, { "epoch": 0.698342479560981, "grad_norm": 1.3477351665496826, "learning_rate": 6.232e-05, "loss": 0.5748, "step": 12471 }, { "epoch": 0.6983984768731101, "grad_norm": 1.2163971662521362, "learning_rate": 6.2325e-05, "loss": 0.4104, "step": 12472 }, { "epoch": 0.6984544741852391, "grad_norm": 1.1639045476913452, "learning_rate": 6.233e-05, "loss": 0.4309, "step": 12473 }, { "epoch": 0.6985104714973681, "grad_norm": 1.8191872835159302, "learning_rate": 6.2335e-05, "loss": 0.478, "step": 12474 }, { "epoch": 0.6985664688094971, "grad_norm": 1.1949455738067627, "learning_rate": 6.234e-05, "loss": 0.3786, "step": 12475 }, { "epoch": 0.6986224661216262, "grad_norm": 1.2190887928009033, "learning_rate": 6.2345e-05, "loss": 0.5706, "step": 12476 }, { "epoch": 0.6986784634337552, "grad_norm": 1.1594059467315674, "learning_rate": 6.235000000000001e-05, "loss": 0.4136, "step": 12477 }, { "epoch": 0.6987344607458842, "grad_norm": 1.2436951398849487, "learning_rate": 6.235500000000001e-05, "loss": 0.4618, "step": 12478 }, { "epoch": 0.6987904580580132, "grad_norm": 1.2693654298782349, "learning_rate": 6.236e-05, "loss": 0.4969, "step": 12479 }, { "epoch": 0.6988464553701422, "grad_norm": 1.348768949508667, "learning_rate": 6.2365e-05, "loss": 0.518, "step": 12480 }, { "epoch": 0.6989024526822712, "grad_norm": 1.1391533613204956, "learning_rate": 6.237e-05, "loss": 0.4162, "step": 12481 }, { "epoch": 0.6989584499944003, "grad_norm": 1.248640537261963, "learning_rate": 6.237500000000001e-05, "loss": 0.3715, "step": 12482 }, { "epoch": 0.6990144473065293, "grad_norm": 1.3822144269943237, "learning_rate": 6.238000000000001e-05, "loss": 0.4397, "step": 12483 }, { "epoch": 0.6990704446186583, "grad_norm": 1.16531503200531, "learning_rate": 6.2385e-05, "loss": 0.4137, "step": 12484 }, { "epoch": 0.6991264419307873, "grad_norm": 1.2543193101882935, "learning_rate": 6.239e-05, "loss": 0.5052, "step": 12485 }, { "epoch": 0.6991824392429163, "grad_norm": 2.1988441944122314, "learning_rate": 6.2395e-05, "loss": 0.6602, "step": 12486 }, { "epoch": 0.6992384365550454, "grad_norm": 1.329607367515564, "learning_rate": 6.24e-05, "loss": 0.3931, "step": 12487 }, { "epoch": 0.6992944338671744, "grad_norm": 1.3688476085662842, "learning_rate": 6.2405e-05, "loss": 0.4576, "step": 12488 }, { "epoch": 0.6993504311793034, "grad_norm": 1.3245147466659546, "learning_rate": 6.241000000000001e-05, "loss": 0.5143, "step": 12489 }, { "epoch": 0.6994064284914324, "grad_norm": 1.143288016319275, "learning_rate": 6.2415e-05, "loss": 0.5049, "step": 12490 }, { "epoch": 0.6994624258035614, "grad_norm": 1.5038130283355713, "learning_rate": 6.242e-05, "loss": 0.6721, "step": 12491 }, { "epoch": 0.6995184231156905, "grad_norm": 1.218773365020752, "learning_rate": 6.2425e-05, "loss": 0.475, "step": 12492 }, { "epoch": 0.6995744204278195, "grad_norm": 1.2821155786514282, "learning_rate": 6.243e-05, "loss": 0.396, "step": 12493 }, { "epoch": 0.6996304177399485, "grad_norm": 1.1763544082641602, "learning_rate": 6.2435e-05, "loss": 0.4884, "step": 12494 }, { "epoch": 0.6996864150520775, "grad_norm": 1.1693071126937866, "learning_rate": 6.244e-05, "loss": 0.3576, "step": 12495 }, { "epoch": 0.6997424123642065, "grad_norm": 1.5130113363265991, "learning_rate": 6.2445e-05, "loss": 0.4434, "step": 12496 }, { "epoch": 0.6997984096763356, "grad_norm": 1.4724360704421997, "learning_rate": 6.245000000000001e-05, "loss": 0.4186, "step": 12497 }, { "epoch": 0.6998544069884646, "grad_norm": 1.420949935913086, "learning_rate": 6.245500000000001e-05, "loss": 0.4128, "step": 12498 }, { "epoch": 0.6999104043005936, "grad_norm": 1.2526235580444336, "learning_rate": 6.246000000000001e-05, "loss": 0.3879, "step": 12499 }, { "epoch": 0.6999664016127226, "grad_norm": 1.2274563312530518, "learning_rate": 6.2465e-05, "loss": 0.3422, "step": 12500 }, { "epoch": 0.7000223989248516, "grad_norm": 1.237396240234375, "learning_rate": 6.247e-05, "loss": 0.3423, "step": 12501 }, { "epoch": 0.7000783962369806, "grad_norm": 1.1403456926345825, "learning_rate": 6.2475e-05, "loss": 0.4302, "step": 12502 }, { "epoch": 0.7001343935491097, "grad_norm": 1.3087342977523804, "learning_rate": 6.248000000000001e-05, "loss": 0.3942, "step": 12503 }, { "epoch": 0.7001903908612387, "grad_norm": 1.552927851676941, "learning_rate": 6.248500000000001e-05, "loss": 0.6173, "step": 12504 }, { "epoch": 0.7002463881733677, "grad_norm": 1.3391687870025635, "learning_rate": 6.249e-05, "loss": 0.5829, "step": 12505 }, { "epoch": 0.7003023854854967, "grad_norm": 1.5082244873046875, "learning_rate": 6.2495e-05, "loss": 0.4968, "step": 12506 }, { "epoch": 0.7003583827976257, "grad_norm": 1.4176688194274902, "learning_rate": 6.25e-05, "loss": 0.4616, "step": 12507 }, { "epoch": 0.7004143801097548, "grad_norm": 1.3099764585494995, "learning_rate": 6.2505e-05, "loss": 0.437, "step": 12508 }, { "epoch": 0.7004703774218838, "grad_norm": 1.3941704034805298, "learning_rate": 6.251e-05, "loss": 0.3963, "step": 12509 }, { "epoch": 0.7005263747340128, "grad_norm": 1.2004023790359497, "learning_rate": 6.251500000000001e-05, "loss": 0.3655, "step": 12510 }, { "epoch": 0.7005823720461418, "grad_norm": 1.379607081413269, "learning_rate": 6.252e-05, "loss": 0.5685, "step": 12511 }, { "epoch": 0.7006383693582708, "grad_norm": 1.3818296194076538, "learning_rate": 6.2525e-05, "loss": 0.3594, "step": 12512 }, { "epoch": 0.7006943666703999, "grad_norm": 1.1807204484939575, "learning_rate": 6.253e-05, "loss": 0.3541, "step": 12513 }, { "epoch": 0.7007503639825289, "grad_norm": 1.0599114894866943, "learning_rate": 6.2535e-05, "loss": 0.3048, "step": 12514 }, { "epoch": 0.7008063612946579, "grad_norm": 1.2263143062591553, "learning_rate": 6.254e-05, "loss": 0.4669, "step": 12515 }, { "epoch": 0.7008623586067869, "grad_norm": 2.299558639526367, "learning_rate": 6.254499999999999e-05, "loss": 0.5666, "step": 12516 }, { "epoch": 0.7009183559189159, "grad_norm": 1.2690045833587646, "learning_rate": 6.255e-05, "loss": 0.3804, "step": 12517 }, { "epoch": 0.700974353231045, "grad_norm": 1.2532697916030884, "learning_rate": 6.255500000000001e-05, "loss": 0.498, "step": 12518 }, { "epoch": 0.701030350543174, "grad_norm": 1.5119946002960205, "learning_rate": 6.256000000000001e-05, "loss": 0.5361, "step": 12519 }, { "epoch": 0.701086347855303, "grad_norm": 1.0479772090911865, "learning_rate": 6.256500000000001e-05, "loss": 0.3354, "step": 12520 }, { "epoch": 0.701142345167432, "grad_norm": 1.426035761833191, "learning_rate": 6.257e-05, "loss": 0.329, "step": 12521 }, { "epoch": 0.701198342479561, "grad_norm": 1.311549425125122, "learning_rate": 6.2575e-05, "loss": 0.4817, "step": 12522 }, { "epoch": 0.70125433979169, "grad_norm": 1.2965736389160156, "learning_rate": 6.258e-05, "loss": 0.6261, "step": 12523 }, { "epoch": 0.7013103371038191, "grad_norm": 1.3294415473937988, "learning_rate": 6.258500000000001e-05, "loss": 0.4701, "step": 12524 }, { "epoch": 0.7013663344159481, "grad_norm": 1.2690424919128418, "learning_rate": 6.259000000000001e-05, "loss": 0.3445, "step": 12525 }, { "epoch": 0.701422331728077, "grad_norm": 1.2250972986221313, "learning_rate": 6.2595e-05, "loss": 0.3364, "step": 12526 }, { "epoch": 0.701478329040206, "grad_norm": 1.245665431022644, "learning_rate": 6.26e-05, "loss": 0.4073, "step": 12527 }, { "epoch": 0.701534326352335, "grad_norm": 2.446558713912964, "learning_rate": 6.2605e-05, "loss": 0.4662, "step": 12528 }, { "epoch": 0.701590323664464, "grad_norm": 1.2567524909973145, "learning_rate": 6.261e-05, "loss": 0.3178, "step": 12529 }, { "epoch": 0.7016463209765931, "grad_norm": 1.420933485031128, "learning_rate": 6.2615e-05, "loss": 0.387, "step": 12530 }, { "epoch": 0.7017023182887221, "grad_norm": 1.4116603136062622, "learning_rate": 6.262000000000001e-05, "loss": 0.4641, "step": 12531 }, { "epoch": 0.7017583156008511, "grad_norm": 1.1638530492782593, "learning_rate": 6.2625e-05, "loss": 0.4309, "step": 12532 }, { "epoch": 0.7018143129129801, "grad_norm": 1.426966667175293, "learning_rate": 6.263e-05, "loss": 0.4635, "step": 12533 }, { "epoch": 0.7018703102251092, "grad_norm": 1.309417963027954, "learning_rate": 6.2635e-05, "loss": 0.452, "step": 12534 }, { "epoch": 0.7019263075372382, "grad_norm": 1.2854201793670654, "learning_rate": 6.264e-05, "loss": 0.4823, "step": 12535 }, { "epoch": 0.7019823048493672, "grad_norm": 1.50555419921875, "learning_rate": 6.2645e-05, "loss": 0.4936, "step": 12536 }, { "epoch": 0.7020383021614962, "grad_norm": 1.2499947547912598, "learning_rate": 6.264999999999999e-05, "loss": 0.4279, "step": 12537 }, { "epoch": 0.7020942994736252, "grad_norm": 1.2926799058914185, "learning_rate": 6.2655e-05, "loss": 0.2997, "step": 12538 }, { "epoch": 0.7021502967857542, "grad_norm": 1.2959471940994263, "learning_rate": 6.266000000000001e-05, "loss": 0.3984, "step": 12539 }, { "epoch": 0.7022062940978833, "grad_norm": 1.2488656044006348, "learning_rate": 6.266500000000001e-05, "loss": 0.4622, "step": 12540 }, { "epoch": 0.7022622914100123, "grad_norm": 1.310998797416687, "learning_rate": 6.267000000000001e-05, "loss": 0.4812, "step": 12541 }, { "epoch": 0.7023182887221413, "grad_norm": 1.280368685722351, "learning_rate": 6.2675e-05, "loss": 0.4793, "step": 12542 }, { "epoch": 0.7023742860342703, "grad_norm": 1.4046286344528198, "learning_rate": 6.268e-05, "loss": 0.3768, "step": 12543 }, { "epoch": 0.7024302833463993, "grad_norm": 1.514461636543274, "learning_rate": 6.2685e-05, "loss": 0.3981, "step": 12544 }, { "epoch": 0.7024862806585284, "grad_norm": 1.2564232349395752, "learning_rate": 6.269e-05, "loss": 0.4399, "step": 12545 }, { "epoch": 0.7025422779706574, "grad_norm": 1.1517951488494873, "learning_rate": 6.269500000000001e-05, "loss": 0.424, "step": 12546 }, { "epoch": 0.7025982752827864, "grad_norm": 1.3670238256454468, "learning_rate": 6.27e-05, "loss": 0.5629, "step": 12547 }, { "epoch": 0.7026542725949154, "grad_norm": 1.3139759302139282, "learning_rate": 6.2705e-05, "loss": 0.4085, "step": 12548 }, { "epoch": 0.7027102699070444, "grad_norm": 1.2447679042816162, "learning_rate": 6.271e-05, "loss": 0.5478, "step": 12549 }, { "epoch": 0.7027662672191735, "grad_norm": 1.2819429636001587, "learning_rate": 6.2715e-05, "loss": 0.3848, "step": 12550 }, { "epoch": 0.7028222645313025, "grad_norm": 3.032064437866211, "learning_rate": 6.272e-05, "loss": 0.4426, "step": 12551 }, { "epoch": 0.7028782618434315, "grad_norm": 1.4964185953140259, "learning_rate": 6.2725e-05, "loss": 0.516, "step": 12552 }, { "epoch": 0.7029342591555605, "grad_norm": 1.7251659631729126, "learning_rate": 6.273e-05, "loss": 0.4446, "step": 12553 }, { "epoch": 0.7029902564676895, "grad_norm": 1.5610597133636475, "learning_rate": 6.2735e-05, "loss": 0.5465, "step": 12554 }, { "epoch": 0.7030462537798186, "grad_norm": 1.3260618448257446, "learning_rate": 6.274e-05, "loss": 0.49, "step": 12555 }, { "epoch": 0.7031022510919476, "grad_norm": 1.0873066186904907, "learning_rate": 6.2745e-05, "loss": 0.3162, "step": 12556 }, { "epoch": 0.7031582484040766, "grad_norm": 1.1887412071228027, "learning_rate": 6.275e-05, "loss": 0.3976, "step": 12557 }, { "epoch": 0.7032142457162056, "grad_norm": 1.362644076347351, "learning_rate": 6.2755e-05, "loss": 0.4904, "step": 12558 }, { "epoch": 0.7032702430283346, "grad_norm": 1.2175319194793701, "learning_rate": 6.276e-05, "loss": 0.4208, "step": 12559 }, { "epoch": 0.7033262403404636, "grad_norm": 1.2674423456192017, "learning_rate": 6.276500000000001e-05, "loss": 0.3864, "step": 12560 }, { "epoch": 0.7033822376525927, "grad_norm": 1.1710255146026611, "learning_rate": 6.277000000000001e-05, "loss": 0.4219, "step": 12561 }, { "epoch": 0.7034382349647217, "grad_norm": 1.1940264701843262, "learning_rate": 6.277500000000001e-05, "loss": 0.3946, "step": 12562 }, { "epoch": 0.7034942322768507, "grad_norm": 1.3122938871383667, "learning_rate": 6.278e-05, "loss": 0.3898, "step": 12563 }, { "epoch": 0.7035502295889797, "grad_norm": 1.266348958015442, "learning_rate": 6.2785e-05, "loss": 0.5555, "step": 12564 }, { "epoch": 0.7036062269011087, "grad_norm": 1.386495590209961, "learning_rate": 6.279e-05, "loss": 0.4076, "step": 12565 }, { "epoch": 0.7036622242132378, "grad_norm": 1.2832168340682983, "learning_rate": 6.2795e-05, "loss": 0.4575, "step": 12566 }, { "epoch": 0.7037182215253668, "grad_norm": 1.2073702812194824, "learning_rate": 6.280000000000001e-05, "loss": 0.3843, "step": 12567 }, { "epoch": 0.7037742188374958, "grad_norm": 1.2366464138031006, "learning_rate": 6.2805e-05, "loss": 0.4584, "step": 12568 }, { "epoch": 0.7038302161496248, "grad_norm": 1.237715482711792, "learning_rate": 6.281e-05, "loss": 0.446, "step": 12569 }, { "epoch": 0.7038862134617538, "grad_norm": 1.3560377359390259, "learning_rate": 6.2815e-05, "loss": 0.376, "step": 12570 }, { "epoch": 0.7039422107738829, "grad_norm": 1.4787386655807495, "learning_rate": 6.282e-05, "loss": 0.4276, "step": 12571 }, { "epoch": 0.7039982080860119, "grad_norm": 1.650567889213562, "learning_rate": 6.2825e-05, "loss": 0.5232, "step": 12572 }, { "epoch": 0.7040542053981409, "grad_norm": 1.2303379774093628, "learning_rate": 6.283e-05, "loss": 0.3249, "step": 12573 }, { "epoch": 0.7041102027102699, "grad_norm": 1.3153318166732788, "learning_rate": 6.2835e-05, "loss": 0.5754, "step": 12574 }, { "epoch": 0.7041662000223989, "grad_norm": 1.261511206626892, "learning_rate": 6.284e-05, "loss": 0.3882, "step": 12575 }, { "epoch": 0.704222197334528, "grad_norm": 1.7436730861663818, "learning_rate": 6.2845e-05, "loss": 0.5699, "step": 12576 }, { "epoch": 0.704278194646657, "grad_norm": 1.4731782674789429, "learning_rate": 6.285e-05, "loss": 0.4778, "step": 12577 }, { "epoch": 0.704334191958786, "grad_norm": 1.2351558208465576, "learning_rate": 6.285500000000001e-05, "loss": 0.4492, "step": 12578 }, { "epoch": 0.704390189270915, "grad_norm": 1.2724463939666748, "learning_rate": 6.286e-05, "loss": 0.3817, "step": 12579 }, { "epoch": 0.704446186583044, "grad_norm": 1.1775709390640259, "learning_rate": 6.2865e-05, "loss": 0.4378, "step": 12580 }, { "epoch": 0.704502183895173, "grad_norm": 1.4919737577438354, "learning_rate": 6.287000000000001e-05, "loss": 0.4648, "step": 12581 }, { "epoch": 0.7045581812073021, "grad_norm": 1.245802402496338, "learning_rate": 6.287500000000001e-05, "loss": 0.373, "step": 12582 }, { "epoch": 0.7046141785194311, "grad_norm": 1.3589507341384888, "learning_rate": 6.288000000000001e-05, "loss": 0.5279, "step": 12583 }, { "epoch": 0.7046701758315601, "grad_norm": 1.2483769655227661, "learning_rate": 6.2885e-05, "loss": 0.3568, "step": 12584 }, { "epoch": 0.7047261731436891, "grad_norm": 1.0464344024658203, "learning_rate": 6.289e-05, "loss": 0.4032, "step": 12585 }, { "epoch": 0.7047821704558181, "grad_norm": 1.4985077381134033, "learning_rate": 6.2895e-05, "loss": 0.4843, "step": 12586 }, { "epoch": 0.7048381677679472, "grad_norm": 1.618809461593628, "learning_rate": 6.29e-05, "loss": 0.4632, "step": 12587 }, { "epoch": 0.7048941650800762, "grad_norm": 1.4173755645751953, "learning_rate": 6.290500000000001e-05, "loss": 0.4738, "step": 12588 }, { "epoch": 0.7049501623922052, "grad_norm": 1.209714412689209, "learning_rate": 6.291e-05, "loss": 0.3928, "step": 12589 }, { "epoch": 0.7050061597043342, "grad_norm": 1.331560730934143, "learning_rate": 6.2915e-05, "loss": 0.4719, "step": 12590 }, { "epoch": 0.7050621570164632, "grad_norm": 1.475605845451355, "learning_rate": 6.292e-05, "loss": 0.5298, "step": 12591 }, { "epoch": 0.7051181543285923, "grad_norm": 1.38764488697052, "learning_rate": 6.2925e-05, "loss": 0.4586, "step": 12592 }, { "epoch": 0.7051741516407213, "grad_norm": 1.2886463403701782, "learning_rate": 6.293e-05, "loss": 0.4298, "step": 12593 }, { "epoch": 0.7052301489528503, "grad_norm": 1.1384798288345337, "learning_rate": 6.293499999999999e-05, "loss": 0.4246, "step": 12594 }, { "epoch": 0.7052861462649793, "grad_norm": 1.3754823207855225, "learning_rate": 6.294e-05, "loss": 0.4228, "step": 12595 }, { "epoch": 0.7053421435771083, "grad_norm": 1.4134275913238525, "learning_rate": 6.2945e-05, "loss": 0.597, "step": 12596 }, { "epoch": 0.7053981408892374, "grad_norm": 1.2388256788253784, "learning_rate": 6.295e-05, "loss": 0.4314, "step": 12597 }, { "epoch": 0.7054541382013664, "grad_norm": 1.4481347799301147, "learning_rate": 6.295500000000001e-05, "loss": 0.4934, "step": 12598 }, { "epoch": 0.7055101355134954, "grad_norm": 1.091850757598877, "learning_rate": 6.296000000000001e-05, "loss": 0.3545, "step": 12599 }, { "epoch": 0.7055661328256244, "grad_norm": 1.3457605838775635, "learning_rate": 6.2965e-05, "loss": 0.5096, "step": 12600 }, { "epoch": 0.7056221301377534, "grad_norm": 1.4634277820587158, "learning_rate": 6.297e-05, "loss": 0.4189, "step": 12601 }, { "epoch": 0.7056781274498825, "grad_norm": 1.5778405666351318, "learning_rate": 6.297500000000001e-05, "loss": 0.492, "step": 12602 }, { "epoch": 0.7057341247620115, "grad_norm": 1.2554309368133545, "learning_rate": 6.298000000000001e-05, "loss": 0.3473, "step": 12603 }, { "epoch": 0.7057901220741405, "grad_norm": 1.4002500772476196, "learning_rate": 6.298500000000001e-05, "loss": 0.5078, "step": 12604 }, { "epoch": 0.7058461193862695, "grad_norm": 1.460457444190979, "learning_rate": 6.299e-05, "loss": 0.3477, "step": 12605 }, { "epoch": 0.7059021166983985, "grad_norm": 1.3724795579910278, "learning_rate": 6.2995e-05, "loss": 0.4048, "step": 12606 }, { "epoch": 0.7059581140105275, "grad_norm": 1.3621898889541626, "learning_rate": 6.3e-05, "loss": 0.5058, "step": 12607 }, { "epoch": 0.7060141113226566, "grad_norm": 1.2200628519058228, "learning_rate": 6.3005e-05, "loss": 0.4472, "step": 12608 }, { "epoch": 0.7060701086347855, "grad_norm": 1.37726628780365, "learning_rate": 6.301000000000001e-05, "loss": 0.4492, "step": 12609 }, { "epoch": 0.7061261059469145, "grad_norm": 1.4450502395629883, "learning_rate": 6.3015e-05, "loss": 0.4134, "step": 12610 }, { "epoch": 0.7061821032590435, "grad_norm": 1.2746467590332031, "learning_rate": 6.302e-05, "loss": 0.4434, "step": 12611 }, { "epoch": 0.7062381005711725, "grad_norm": 1.3004050254821777, "learning_rate": 6.3025e-05, "loss": 0.4678, "step": 12612 }, { "epoch": 0.7062940978833016, "grad_norm": 1.4135793447494507, "learning_rate": 6.303e-05, "loss": 0.505, "step": 12613 }, { "epoch": 0.7063500951954306, "grad_norm": 1.072414755821228, "learning_rate": 6.3035e-05, "loss": 0.3845, "step": 12614 }, { "epoch": 0.7064060925075596, "grad_norm": 1.2727646827697754, "learning_rate": 6.303999999999999e-05, "loss": 0.5119, "step": 12615 }, { "epoch": 0.7064620898196886, "grad_norm": 1.1109548807144165, "learning_rate": 6.3045e-05, "loss": 0.3195, "step": 12616 }, { "epoch": 0.7065180871318176, "grad_norm": 1.3027209043502808, "learning_rate": 6.305e-05, "loss": 0.5456, "step": 12617 }, { "epoch": 0.7065740844439466, "grad_norm": 1.3179601430892944, "learning_rate": 6.305500000000001e-05, "loss": 0.4321, "step": 12618 }, { "epoch": 0.7066300817560757, "grad_norm": 1.138843059539795, "learning_rate": 6.306000000000001e-05, "loss": 0.4206, "step": 12619 }, { "epoch": 0.7066860790682047, "grad_norm": 1.6393864154815674, "learning_rate": 6.306500000000001e-05, "loss": 0.6926, "step": 12620 }, { "epoch": 0.7067420763803337, "grad_norm": 1.7525949478149414, "learning_rate": 6.307e-05, "loss": 0.4402, "step": 12621 }, { "epoch": 0.7067980736924627, "grad_norm": 1.4333950281143188, "learning_rate": 6.3075e-05, "loss": 0.4057, "step": 12622 }, { "epoch": 0.7068540710045917, "grad_norm": 1.2789214849472046, "learning_rate": 6.308e-05, "loss": 0.3135, "step": 12623 }, { "epoch": 0.7069100683167208, "grad_norm": 1.5111428499221802, "learning_rate": 6.308500000000001e-05, "loss": 0.3997, "step": 12624 }, { "epoch": 0.7069660656288498, "grad_norm": 1.579714298248291, "learning_rate": 6.309000000000001e-05, "loss": 0.4526, "step": 12625 }, { "epoch": 0.7070220629409788, "grad_norm": 1.551098346710205, "learning_rate": 6.3095e-05, "loss": 0.4695, "step": 12626 }, { "epoch": 0.7070780602531078, "grad_norm": 1.2486560344696045, "learning_rate": 6.31e-05, "loss": 0.4226, "step": 12627 }, { "epoch": 0.7071340575652368, "grad_norm": 1.126120924949646, "learning_rate": 6.3105e-05, "loss": 0.3612, "step": 12628 }, { "epoch": 0.7071900548773659, "grad_norm": 1.2583171129226685, "learning_rate": 6.311e-05, "loss": 0.3793, "step": 12629 }, { "epoch": 0.7072460521894949, "grad_norm": 1.390459656715393, "learning_rate": 6.311500000000001e-05, "loss": 0.4242, "step": 12630 }, { "epoch": 0.7073020495016239, "grad_norm": 1.1530574560165405, "learning_rate": 6.312e-05, "loss": 0.4057, "step": 12631 }, { "epoch": 0.7073580468137529, "grad_norm": 1.2216347455978394, "learning_rate": 6.3125e-05, "loss": 0.3678, "step": 12632 }, { "epoch": 0.7074140441258819, "grad_norm": 1.5343226194381714, "learning_rate": 6.313e-05, "loss": 0.5646, "step": 12633 }, { "epoch": 0.707470041438011, "grad_norm": 1.2741063833236694, "learning_rate": 6.3135e-05, "loss": 0.4095, "step": 12634 }, { "epoch": 0.70752603875014, "grad_norm": 1.3892006874084473, "learning_rate": 6.314e-05, "loss": 0.4059, "step": 12635 }, { "epoch": 0.707582036062269, "grad_norm": 1.618849754333496, "learning_rate": 6.314499999999999e-05, "loss": 0.4831, "step": 12636 }, { "epoch": 0.707638033374398, "grad_norm": 1.3106848001480103, "learning_rate": 6.315e-05, "loss": 0.4332, "step": 12637 }, { "epoch": 0.707694030686527, "grad_norm": 1.281575083732605, "learning_rate": 6.3155e-05, "loss": 0.4828, "step": 12638 }, { "epoch": 0.707750027998656, "grad_norm": 1.260123610496521, "learning_rate": 6.316000000000001e-05, "loss": 0.4523, "step": 12639 }, { "epoch": 0.7078060253107851, "grad_norm": 1.2649827003479004, "learning_rate": 6.316500000000001e-05, "loss": 0.5126, "step": 12640 }, { "epoch": 0.7078620226229141, "grad_norm": 1.6030091047286987, "learning_rate": 6.317e-05, "loss": 0.4465, "step": 12641 }, { "epoch": 0.7079180199350431, "grad_norm": 1.3819293975830078, "learning_rate": 6.3175e-05, "loss": 0.4599, "step": 12642 }, { "epoch": 0.7079740172471721, "grad_norm": 1.3676986694335938, "learning_rate": 6.318e-05, "loss": 0.4164, "step": 12643 }, { "epoch": 0.7080300145593011, "grad_norm": 1.3112050294876099, "learning_rate": 6.3185e-05, "loss": 0.4768, "step": 12644 }, { "epoch": 0.7080860118714302, "grad_norm": 1.3456107378005981, "learning_rate": 6.319000000000001e-05, "loss": 0.4973, "step": 12645 }, { "epoch": 0.7081420091835592, "grad_norm": 1.1713309288024902, "learning_rate": 6.319500000000001e-05, "loss": 0.3567, "step": 12646 }, { "epoch": 0.7081980064956882, "grad_norm": 1.4542640447616577, "learning_rate": 6.32e-05, "loss": 0.5801, "step": 12647 }, { "epoch": 0.7082540038078172, "grad_norm": 1.784436583518982, "learning_rate": 6.3205e-05, "loss": 0.5084, "step": 12648 }, { "epoch": 0.7083100011199462, "grad_norm": 1.2475916147232056, "learning_rate": 6.321e-05, "loss": 0.4811, "step": 12649 }, { "epoch": 0.7083659984320753, "grad_norm": 1.2328693866729736, "learning_rate": 6.3215e-05, "loss": 0.4592, "step": 12650 }, { "epoch": 0.7084219957442043, "grad_norm": 1.145531177520752, "learning_rate": 6.322000000000001e-05, "loss": 0.424, "step": 12651 }, { "epoch": 0.7084779930563333, "grad_norm": 1.4359947443008423, "learning_rate": 6.3225e-05, "loss": 0.4709, "step": 12652 }, { "epoch": 0.7085339903684623, "grad_norm": 1.411778211593628, "learning_rate": 6.323e-05, "loss": 0.4565, "step": 12653 }, { "epoch": 0.7085899876805913, "grad_norm": 1.3843156099319458, "learning_rate": 6.3235e-05, "loss": 0.485, "step": 12654 }, { "epoch": 0.7086459849927204, "grad_norm": 1.4193692207336426, "learning_rate": 6.324e-05, "loss": 0.3932, "step": 12655 }, { "epoch": 0.7087019823048494, "grad_norm": 1.2704010009765625, "learning_rate": 6.3245e-05, "loss": 0.4651, "step": 12656 }, { "epoch": 0.7087579796169784, "grad_norm": 1.1232733726501465, "learning_rate": 6.324999999999999e-05, "loss": 0.3719, "step": 12657 }, { "epoch": 0.7088139769291074, "grad_norm": 1.3058933019638062, "learning_rate": 6.3255e-05, "loss": 0.3575, "step": 12658 }, { "epoch": 0.7088699742412364, "grad_norm": 1.5541898012161255, "learning_rate": 6.326000000000001e-05, "loss": 0.6517, "step": 12659 }, { "epoch": 0.7089259715533655, "grad_norm": 1.325561761856079, "learning_rate": 6.326500000000001e-05, "loss": 0.3889, "step": 12660 }, { "epoch": 0.7089819688654945, "grad_norm": 1.5662405490875244, "learning_rate": 6.327000000000001e-05, "loss": 0.405, "step": 12661 }, { "epoch": 0.7090379661776235, "grad_norm": 1.0807271003723145, "learning_rate": 6.3275e-05, "loss": 0.3512, "step": 12662 }, { "epoch": 0.7090939634897525, "grad_norm": 1.5273305177688599, "learning_rate": 6.328e-05, "loss": 0.6008, "step": 12663 }, { "epoch": 0.7091499608018815, "grad_norm": 1.6548724174499512, "learning_rate": 6.3285e-05, "loss": 0.5224, "step": 12664 }, { "epoch": 0.7092059581140105, "grad_norm": 1.264316201210022, "learning_rate": 6.329e-05, "loss": 0.5297, "step": 12665 }, { "epoch": 0.7092619554261396, "grad_norm": 1.1656309366226196, "learning_rate": 6.329500000000001e-05, "loss": 0.3994, "step": 12666 }, { "epoch": 0.7093179527382686, "grad_norm": 2.9174137115478516, "learning_rate": 6.330000000000001e-05, "loss": 0.4231, "step": 12667 }, { "epoch": 0.7093739500503976, "grad_norm": 1.241214632987976, "learning_rate": 6.3305e-05, "loss": 0.4536, "step": 12668 }, { "epoch": 0.7094299473625266, "grad_norm": 1.5645548105239868, "learning_rate": 6.331e-05, "loss": 0.5548, "step": 12669 }, { "epoch": 0.7094859446746556, "grad_norm": 1.4205975532531738, "learning_rate": 6.3315e-05, "loss": 0.6061, "step": 12670 }, { "epoch": 0.7095419419867847, "grad_norm": 1.774970531463623, "learning_rate": 6.332e-05, "loss": 0.4486, "step": 12671 }, { "epoch": 0.7095979392989137, "grad_norm": 1.1116825342178345, "learning_rate": 6.3325e-05, "loss": 0.3824, "step": 12672 }, { "epoch": 0.7096539366110427, "grad_norm": 1.1717495918273926, "learning_rate": 6.333e-05, "loss": 0.5045, "step": 12673 }, { "epoch": 0.7097099339231717, "grad_norm": 1.1295884847640991, "learning_rate": 6.3335e-05, "loss": 0.3551, "step": 12674 }, { "epoch": 0.7097659312353007, "grad_norm": 1.3893578052520752, "learning_rate": 6.334e-05, "loss": 0.5575, "step": 12675 }, { "epoch": 0.7098219285474298, "grad_norm": 1.4093233346939087, "learning_rate": 6.3345e-05, "loss": 0.4796, "step": 12676 }, { "epoch": 0.7098779258595588, "grad_norm": 1.3685671091079712, "learning_rate": 6.335e-05, "loss": 0.4079, "step": 12677 }, { "epoch": 0.7099339231716878, "grad_norm": 1.190311074256897, "learning_rate": 6.335499999999999e-05, "loss": 0.4145, "step": 12678 }, { "epoch": 0.7099899204838168, "grad_norm": 1.2564928531646729, "learning_rate": 6.336e-05, "loss": 0.4877, "step": 12679 }, { "epoch": 0.7100459177959458, "grad_norm": 1.264681100845337, "learning_rate": 6.336500000000001e-05, "loss": 0.4781, "step": 12680 }, { "epoch": 0.7101019151080749, "grad_norm": 1.2790048122406006, "learning_rate": 6.337000000000001e-05, "loss": 0.4392, "step": 12681 }, { "epoch": 0.7101579124202039, "grad_norm": 1.1290431022644043, "learning_rate": 6.337500000000001e-05, "loss": 0.4491, "step": 12682 }, { "epoch": 0.7102139097323329, "grad_norm": 1.465829610824585, "learning_rate": 6.338e-05, "loss": 0.6806, "step": 12683 }, { "epoch": 0.7102699070444619, "grad_norm": 1.6494287252426147, "learning_rate": 6.3385e-05, "loss": 0.486, "step": 12684 }, { "epoch": 0.7103259043565909, "grad_norm": 1.357857584953308, "learning_rate": 6.339e-05, "loss": 0.4848, "step": 12685 }, { "epoch": 0.71038190166872, "grad_norm": 1.3813881874084473, "learning_rate": 6.3395e-05, "loss": 0.4979, "step": 12686 }, { "epoch": 0.710437898980849, "grad_norm": 1.551567792892456, "learning_rate": 6.340000000000001e-05, "loss": 0.3673, "step": 12687 }, { "epoch": 0.710493896292978, "grad_norm": 1.2936947345733643, "learning_rate": 6.340500000000001e-05, "loss": 0.4462, "step": 12688 }, { "epoch": 0.710549893605107, "grad_norm": 1.3695658445358276, "learning_rate": 6.341e-05, "loss": 0.4734, "step": 12689 }, { "epoch": 0.710605890917236, "grad_norm": 1.2387371063232422, "learning_rate": 6.3415e-05, "loss": 0.3892, "step": 12690 }, { "epoch": 0.7106618882293649, "grad_norm": 1.2142603397369385, "learning_rate": 6.342e-05, "loss": 0.4328, "step": 12691 }, { "epoch": 0.710717885541494, "grad_norm": 1.2077988386154175, "learning_rate": 6.3425e-05, "loss": 0.415, "step": 12692 }, { "epoch": 0.710773882853623, "grad_norm": 1.2667608261108398, "learning_rate": 6.343e-05, "loss": 0.4881, "step": 12693 }, { "epoch": 0.710829880165752, "grad_norm": 1.4840319156646729, "learning_rate": 6.3435e-05, "loss": 0.48, "step": 12694 }, { "epoch": 0.710885877477881, "grad_norm": 1.4201076030731201, "learning_rate": 6.344e-05, "loss": 0.5295, "step": 12695 }, { "epoch": 0.71094187479001, "grad_norm": 1.192383885383606, "learning_rate": 6.3445e-05, "loss": 0.3654, "step": 12696 }, { "epoch": 0.710997872102139, "grad_norm": 1.3051652908325195, "learning_rate": 6.345e-05, "loss": 0.4585, "step": 12697 }, { "epoch": 0.7110538694142681, "grad_norm": 1.7367881536483765, "learning_rate": 6.3455e-05, "loss": 0.4431, "step": 12698 }, { "epoch": 0.7111098667263971, "grad_norm": 1.2035497426986694, "learning_rate": 6.346e-05, "loss": 0.4178, "step": 12699 }, { "epoch": 0.7111658640385261, "grad_norm": 1.2499315738677979, "learning_rate": 6.3465e-05, "loss": 0.4182, "step": 12700 }, { "epoch": 0.7112218613506551, "grad_norm": 1.4131886959075928, "learning_rate": 6.347e-05, "loss": 0.4649, "step": 12701 }, { "epoch": 0.7112778586627841, "grad_norm": 1.190421223640442, "learning_rate": 6.347500000000001e-05, "loss": 0.4147, "step": 12702 }, { "epoch": 0.7113338559749132, "grad_norm": 1.193487286567688, "learning_rate": 6.348000000000001e-05, "loss": 0.4261, "step": 12703 }, { "epoch": 0.7113898532870422, "grad_norm": 1.1812646389007568, "learning_rate": 6.3485e-05, "loss": 0.4381, "step": 12704 }, { "epoch": 0.7114458505991712, "grad_norm": 1.2294243574142456, "learning_rate": 6.349e-05, "loss": 0.3583, "step": 12705 }, { "epoch": 0.7115018479113002, "grad_norm": 2.127265691757202, "learning_rate": 6.3495e-05, "loss": 0.5528, "step": 12706 }, { "epoch": 0.7115578452234292, "grad_norm": 1.3275443315505981, "learning_rate": 6.35e-05, "loss": 0.3748, "step": 12707 }, { "epoch": 0.7116138425355583, "grad_norm": 1.2263245582580566, "learning_rate": 6.350500000000001e-05, "loss": 0.4958, "step": 12708 }, { "epoch": 0.7116698398476873, "grad_norm": 1.5293418169021606, "learning_rate": 6.351000000000001e-05, "loss": 0.3799, "step": 12709 }, { "epoch": 0.7117258371598163, "grad_norm": 1.5596578121185303, "learning_rate": 6.3515e-05, "loss": 0.4192, "step": 12710 }, { "epoch": 0.7117818344719453, "grad_norm": 1.153295874595642, "learning_rate": 6.352e-05, "loss": 0.4549, "step": 12711 }, { "epoch": 0.7118378317840743, "grad_norm": 1.3420997858047485, "learning_rate": 6.3525e-05, "loss": 0.4939, "step": 12712 }, { "epoch": 0.7118938290962034, "grad_norm": 1.3926844596862793, "learning_rate": 6.353e-05, "loss": 0.3718, "step": 12713 }, { "epoch": 0.7119498264083324, "grad_norm": 1.3157209157943726, "learning_rate": 6.3535e-05, "loss": 0.5368, "step": 12714 }, { "epoch": 0.7120058237204614, "grad_norm": 1.580293893814087, "learning_rate": 6.354e-05, "loss": 0.5469, "step": 12715 }, { "epoch": 0.7120618210325904, "grad_norm": 1.5111911296844482, "learning_rate": 6.3545e-05, "loss": 0.3721, "step": 12716 }, { "epoch": 0.7121178183447194, "grad_norm": 1.2916908264160156, "learning_rate": 6.355e-05, "loss": 0.3981, "step": 12717 }, { "epoch": 0.7121738156568485, "grad_norm": 1.370994210243225, "learning_rate": 6.3555e-05, "loss": 0.3765, "step": 12718 }, { "epoch": 0.7122298129689775, "grad_norm": 1.1901644468307495, "learning_rate": 6.356000000000001e-05, "loss": 0.3658, "step": 12719 }, { "epoch": 0.7122858102811065, "grad_norm": 1.3271005153656006, "learning_rate": 6.3565e-05, "loss": 0.5493, "step": 12720 }, { "epoch": 0.7123418075932355, "grad_norm": 1.1498123407363892, "learning_rate": 6.357e-05, "loss": 0.4468, "step": 12721 }, { "epoch": 0.7123978049053645, "grad_norm": 1.1788270473480225, "learning_rate": 6.3575e-05, "loss": 0.3985, "step": 12722 }, { "epoch": 0.7124538022174935, "grad_norm": 1.8106350898742676, "learning_rate": 6.358000000000001e-05, "loss": 0.6286, "step": 12723 }, { "epoch": 0.7125097995296226, "grad_norm": 1.4803918600082397, "learning_rate": 6.358500000000001e-05, "loss": 0.4002, "step": 12724 }, { "epoch": 0.7125657968417516, "grad_norm": 1.188391923904419, "learning_rate": 6.359e-05, "loss": 0.3289, "step": 12725 }, { "epoch": 0.7126217941538806, "grad_norm": 1.2638044357299805, "learning_rate": 6.3595e-05, "loss": 0.3745, "step": 12726 }, { "epoch": 0.7126777914660096, "grad_norm": 1.017531156539917, "learning_rate": 6.36e-05, "loss": 0.337, "step": 12727 }, { "epoch": 0.7127337887781386, "grad_norm": 1.3558027744293213, "learning_rate": 6.3605e-05, "loss": 0.3277, "step": 12728 }, { "epoch": 0.7127897860902677, "grad_norm": 1.1347521543502808, "learning_rate": 6.361000000000001e-05, "loss": 0.4376, "step": 12729 }, { "epoch": 0.7128457834023967, "grad_norm": 1.4346020221710205, "learning_rate": 6.3615e-05, "loss": 0.5466, "step": 12730 }, { "epoch": 0.7129017807145257, "grad_norm": 1.54099702835083, "learning_rate": 6.362e-05, "loss": 0.517, "step": 12731 }, { "epoch": 0.7129577780266547, "grad_norm": 1.1725080013275146, "learning_rate": 6.3625e-05, "loss": 0.3622, "step": 12732 }, { "epoch": 0.7130137753387837, "grad_norm": 1.2479534149169922, "learning_rate": 6.363e-05, "loss": 0.3899, "step": 12733 }, { "epoch": 0.7130697726509128, "grad_norm": 1.2837066650390625, "learning_rate": 6.3635e-05, "loss": 0.3856, "step": 12734 }, { "epoch": 0.7131257699630418, "grad_norm": 2.2546517848968506, "learning_rate": 6.364e-05, "loss": 0.5325, "step": 12735 }, { "epoch": 0.7131817672751708, "grad_norm": 1.216312289237976, "learning_rate": 6.3645e-05, "loss": 0.4332, "step": 12736 }, { "epoch": 0.7132377645872998, "grad_norm": 1.3184088468551636, "learning_rate": 6.365e-05, "loss": 0.4355, "step": 12737 }, { "epoch": 0.7132937618994288, "grad_norm": 1.1263552904129028, "learning_rate": 6.3655e-05, "loss": 0.3072, "step": 12738 }, { "epoch": 0.7133497592115579, "grad_norm": 1.165421962738037, "learning_rate": 6.366000000000001e-05, "loss": 0.4261, "step": 12739 }, { "epoch": 0.7134057565236869, "grad_norm": 1.4257819652557373, "learning_rate": 6.366500000000001e-05, "loss": 0.5089, "step": 12740 }, { "epoch": 0.7134617538358159, "grad_norm": 1.3485167026519775, "learning_rate": 6.367e-05, "loss": 0.4479, "step": 12741 }, { "epoch": 0.7135177511479449, "grad_norm": 1.4713619947433472, "learning_rate": 6.3675e-05, "loss": 0.4504, "step": 12742 }, { "epoch": 0.7135737484600739, "grad_norm": 1.085688591003418, "learning_rate": 6.368e-05, "loss": 0.4326, "step": 12743 }, { "epoch": 0.713629745772203, "grad_norm": 1.2095509767532349, "learning_rate": 6.368500000000001e-05, "loss": 0.3957, "step": 12744 }, { "epoch": 0.713685743084332, "grad_norm": 1.5142602920532227, "learning_rate": 6.369000000000001e-05, "loss": 0.3968, "step": 12745 }, { "epoch": 0.713741740396461, "grad_norm": 1.1609373092651367, "learning_rate": 6.3695e-05, "loss": 0.5362, "step": 12746 }, { "epoch": 0.71379773770859, "grad_norm": 1.6774296760559082, "learning_rate": 6.37e-05, "loss": 0.5318, "step": 12747 }, { "epoch": 0.713853735020719, "grad_norm": 1.3412060737609863, "learning_rate": 6.3705e-05, "loss": 0.3999, "step": 12748 }, { "epoch": 0.713909732332848, "grad_norm": 1.1753593683242798, "learning_rate": 6.371e-05, "loss": 0.3468, "step": 12749 }, { "epoch": 0.7139657296449771, "grad_norm": 1.4292224645614624, "learning_rate": 6.371500000000001e-05, "loss": 0.5395, "step": 12750 }, { "epoch": 0.7140217269571061, "grad_norm": 1.2235126495361328, "learning_rate": 6.372e-05, "loss": 0.3857, "step": 12751 }, { "epoch": 0.7140777242692351, "grad_norm": 1.4244022369384766, "learning_rate": 6.3725e-05, "loss": 0.4213, "step": 12752 }, { "epoch": 0.7141337215813641, "grad_norm": 1.2730166912078857, "learning_rate": 6.373e-05, "loss": 0.4058, "step": 12753 }, { "epoch": 0.7141897188934931, "grad_norm": 1.4534602165222168, "learning_rate": 6.3735e-05, "loss": 0.4449, "step": 12754 }, { "epoch": 0.7142457162056222, "grad_norm": 1.2470502853393555, "learning_rate": 6.374e-05, "loss": 0.5016, "step": 12755 }, { "epoch": 0.7143017135177512, "grad_norm": 1.146397352218628, "learning_rate": 6.3745e-05, "loss": 0.3615, "step": 12756 }, { "epoch": 0.7143577108298802, "grad_norm": 1.190064549446106, "learning_rate": 6.375e-05, "loss": 0.4055, "step": 12757 }, { "epoch": 0.7144137081420092, "grad_norm": 1.2497453689575195, "learning_rate": 6.3755e-05, "loss": 0.4397, "step": 12758 }, { "epoch": 0.7144697054541382, "grad_norm": 2.066960096359253, "learning_rate": 6.376e-05, "loss": 0.357, "step": 12759 }, { "epoch": 0.7145257027662673, "grad_norm": 1.6281381845474243, "learning_rate": 6.376500000000001e-05, "loss": 0.5837, "step": 12760 }, { "epoch": 0.7145817000783963, "grad_norm": 1.5029911994934082, "learning_rate": 6.377000000000001e-05, "loss": 0.4694, "step": 12761 }, { "epoch": 0.7146376973905253, "grad_norm": 1.3037540912628174, "learning_rate": 6.3775e-05, "loss": 0.3326, "step": 12762 }, { "epoch": 0.7146936947026543, "grad_norm": 1.1242607831954956, "learning_rate": 6.378e-05, "loss": 0.4072, "step": 12763 }, { "epoch": 0.7147496920147833, "grad_norm": 1.3720523118972778, "learning_rate": 6.3785e-05, "loss": 0.4946, "step": 12764 }, { "epoch": 0.7148056893269124, "grad_norm": 1.3609472513198853, "learning_rate": 6.379000000000001e-05, "loss": 0.4443, "step": 12765 }, { "epoch": 0.7148616866390414, "grad_norm": 1.4723418951034546, "learning_rate": 6.379500000000001e-05, "loss": 0.4779, "step": 12766 }, { "epoch": 0.7149176839511704, "grad_norm": 1.2363005876541138, "learning_rate": 6.38e-05, "loss": 0.3756, "step": 12767 }, { "epoch": 0.7149736812632994, "grad_norm": 1.1192009449005127, "learning_rate": 6.3805e-05, "loss": 0.4275, "step": 12768 }, { "epoch": 0.7150296785754284, "grad_norm": 1.8555961847305298, "learning_rate": 6.381e-05, "loss": 0.5432, "step": 12769 }, { "epoch": 0.7150856758875574, "grad_norm": 1.238851547241211, "learning_rate": 6.3815e-05, "loss": 0.364, "step": 12770 }, { "epoch": 0.7151416731996865, "grad_norm": 1.2626633644104004, "learning_rate": 6.382e-05, "loss": 0.4722, "step": 12771 }, { "epoch": 0.7151976705118155, "grad_norm": 2.2175941467285156, "learning_rate": 6.3825e-05, "loss": 0.3666, "step": 12772 }, { "epoch": 0.7152536678239445, "grad_norm": 1.3716325759887695, "learning_rate": 6.383e-05, "loss": 0.383, "step": 12773 }, { "epoch": 0.7153096651360734, "grad_norm": 1.627503752708435, "learning_rate": 6.3835e-05, "loss": 0.4719, "step": 12774 }, { "epoch": 0.7153656624482024, "grad_norm": 1.6553013324737549, "learning_rate": 6.384e-05, "loss": 0.4163, "step": 12775 }, { "epoch": 0.7154216597603315, "grad_norm": 1.3761839866638184, "learning_rate": 6.3845e-05, "loss": 0.5103, "step": 12776 }, { "epoch": 0.7154776570724605, "grad_norm": 1.385082483291626, "learning_rate": 6.385e-05, "loss": 0.5036, "step": 12777 }, { "epoch": 0.7155336543845895, "grad_norm": 1.3864158391952515, "learning_rate": 6.3855e-05, "loss": 0.5604, "step": 12778 }, { "epoch": 0.7155896516967185, "grad_norm": 1.4270308017730713, "learning_rate": 6.386e-05, "loss": 0.5372, "step": 12779 }, { "epoch": 0.7156456490088475, "grad_norm": 1.5776902437210083, "learning_rate": 6.386500000000001e-05, "loss": 0.6883, "step": 12780 }, { "epoch": 0.7157016463209765, "grad_norm": 1.3400368690490723, "learning_rate": 6.387000000000001e-05, "loss": 0.4332, "step": 12781 }, { "epoch": 0.7157576436331056, "grad_norm": 1.2608451843261719, "learning_rate": 6.387500000000001e-05, "loss": 0.3786, "step": 12782 }, { "epoch": 0.7158136409452346, "grad_norm": 1.2663267850875854, "learning_rate": 6.388e-05, "loss": 0.4651, "step": 12783 }, { "epoch": 0.7158696382573636, "grad_norm": 1.5927826166152954, "learning_rate": 6.3885e-05, "loss": 0.4652, "step": 12784 }, { "epoch": 0.7159256355694926, "grad_norm": 1.1670840978622437, "learning_rate": 6.389e-05, "loss": 0.4082, "step": 12785 }, { "epoch": 0.7159816328816216, "grad_norm": 1.3903788328170776, "learning_rate": 6.389500000000001e-05, "loss": 0.5638, "step": 12786 }, { "epoch": 0.7160376301937507, "grad_norm": 1.4020166397094727, "learning_rate": 6.390000000000001e-05, "loss": 0.4935, "step": 12787 }, { "epoch": 0.7160936275058797, "grad_norm": 1.471501111984253, "learning_rate": 6.3905e-05, "loss": 0.4349, "step": 12788 }, { "epoch": 0.7161496248180087, "grad_norm": 1.266884207725525, "learning_rate": 6.391e-05, "loss": 0.3633, "step": 12789 }, { "epoch": 0.7162056221301377, "grad_norm": 1.3894003629684448, "learning_rate": 6.3915e-05, "loss": 0.4857, "step": 12790 }, { "epoch": 0.7162616194422667, "grad_norm": 1.3955460786819458, "learning_rate": 6.392e-05, "loss": 0.4931, "step": 12791 }, { "epoch": 0.7163176167543958, "grad_norm": 1.221116065979004, "learning_rate": 6.3925e-05, "loss": 0.3948, "step": 12792 }, { "epoch": 0.7163736140665248, "grad_norm": 3.43851900100708, "learning_rate": 6.393e-05, "loss": 0.4225, "step": 12793 }, { "epoch": 0.7164296113786538, "grad_norm": 1.3714956045150757, "learning_rate": 6.3935e-05, "loss": 0.4307, "step": 12794 }, { "epoch": 0.7164856086907828, "grad_norm": 1.4571897983551025, "learning_rate": 6.394e-05, "loss": 0.4694, "step": 12795 }, { "epoch": 0.7165416060029118, "grad_norm": 1.2747544050216675, "learning_rate": 6.3945e-05, "loss": 0.4313, "step": 12796 }, { "epoch": 0.7165976033150409, "grad_norm": 1.3819502592086792, "learning_rate": 6.395e-05, "loss": 0.4512, "step": 12797 }, { "epoch": 0.7166536006271699, "grad_norm": 1.1545897722244263, "learning_rate": 6.3955e-05, "loss": 0.3672, "step": 12798 }, { "epoch": 0.7167095979392989, "grad_norm": 1.3497068881988525, "learning_rate": 6.396e-05, "loss": 0.4123, "step": 12799 }, { "epoch": 0.7167655952514279, "grad_norm": 1.1514780521392822, "learning_rate": 6.3965e-05, "loss": 0.422, "step": 12800 }, { "epoch": 0.7168215925635569, "grad_norm": 1.310577154159546, "learning_rate": 6.397000000000001e-05, "loss": 0.3955, "step": 12801 }, { "epoch": 0.716877589875686, "grad_norm": 1.2854505777359009, "learning_rate": 6.397500000000001e-05, "loss": 0.521, "step": 12802 }, { "epoch": 0.716933587187815, "grad_norm": 1.0573272705078125, "learning_rate": 6.398000000000001e-05, "loss": 0.3111, "step": 12803 }, { "epoch": 0.716989584499944, "grad_norm": 12.162367820739746, "learning_rate": 6.3985e-05, "loss": 0.6242, "step": 12804 }, { "epoch": 0.717045581812073, "grad_norm": 1.1641361713409424, "learning_rate": 6.399e-05, "loss": 0.4572, "step": 12805 }, { "epoch": 0.717101579124202, "grad_norm": 1.853898525238037, "learning_rate": 6.3995e-05, "loss": 0.4984, "step": 12806 }, { "epoch": 0.717157576436331, "grad_norm": 1.2919056415557861, "learning_rate": 6.400000000000001e-05, "loss": 0.4071, "step": 12807 }, { "epoch": 0.7172135737484601, "grad_norm": 1.2785226106643677, "learning_rate": 6.400500000000001e-05, "loss": 0.35, "step": 12808 }, { "epoch": 0.7172695710605891, "grad_norm": 1.0631603002548218, "learning_rate": 6.401e-05, "loss": 0.389, "step": 12809 }, { "epoch": 0.7173255683727181, "grad_norm": 1.295305848121643, "learning_rate": 6.4015e-05, "loss": 0.4316, "step": 12810 }, { "epoch": 0.7173815656848471, "grad_norm": 1.3592380285263062, "learning_rate": 6.402e-05, "loss": 0.4869, "step": 12811 }, { "epoch": 0.7174375629969761, "grad_norm": 1.337204933166504, "learning_rate": 6.4025e-05, "loss": 0.3931, "step": 12812 }, { "epoch": 0.7174935603091052, "grad_norm": 1.4174797534942627, "learning_rate": 6.403e-05, "loss": 0.3998, "step": 12813 }, { "epoch": 0.7175495576212342, "grad_norm": 1.4384292364120483, "learning_rate": 6.4035e-05, "loss": 0.442, "step": 12814 }, { "epoch": 0.7176055549333632, "grad_norm": 1.1505624055862427, "learning_rate": 6.404e-05, "loss": 0.5024, "step": 12815 }, { "epoch": 0.7176615522454922, "grad_norm": 1.3162777423858643, "learning_rate": 6.4045e-05, "loss": 0.5222, "step": 12816 }, { "epoch": 0.7177175495576212, "grad_norm": 1.2605162858963013, "learning_rate": 6.405e-05, "loss": 0.2991, "step": 12817 }, { "epoch": 0.7177735468697503, "grad_norm": 1.4509625434875488, "learning_rate": 6.4055e-05, "loss": 0.4539, "step": 12818 }, { "epoch": 0.7178295441818793, "grad_norm": 1.4207115173339844, "learning_rate": 6.405999999999999e-05, "loss": 0.4775, "step": 12819 }, { "epoch": 0.7178855414940083, "grad_norm": 1.4416894912719727, "learning_rate": 6.4065e-05, "loss": 0.4061, "step": 12820 }, { "epoch": 0.7179415388061373, "grad_norm": 1.3681820631027222, "learning_rate": 6.407e-05, "loss": 0.5323, "step": 12821 }, { "epoch": 0.7179975361182663, "grad_norm": 1.2461649179458618, "learning_rate": 6.407500000000001e-05, "loss": 0.4439, "step": 12822 }, { "epoch": 0.7180535334303954, "grad_norm": 1.3961012363433838, "learning_rate": 6.408000000000001e-05, "loss": 0.5067, "step": 12823 }, { "epoch": 0.7181095307425244, "grad_norm": 1.3075144290924072, "learning_rate": 6.408500000000001e-05, "loss": 0.35, "step": 12824 }, { "epoch": 0.7181655280546534, "grad_norm": 1.199385166168213, "learning_rate": 6.409e-05, "loss": 0.5217, "step": 12825 }, { "epoch": 0.7182215253667824, "grad_norm": 1.7142282724380493, "learning_rate": 6.4095e-05, "loss": 0.5228, "step": 12826 }, { "epoch": 0.7182775226789114, "grad_norm": 1.2804813385009766, "learning_rate": 6.41e-05, "loss": 0.4752, "step": 12827 }, { "epoch": 0.7183335199910404, "grad_norm": 1.6355879306793213, "learning_rate": 6.410500000000001e-05, "loss": 0.3663, "step": 12828 }, { "epoch": 0.7183895173031695, "grad_norm": 2.1395740509033203, "learning_rate": 6.411000000000001e-05, "loss": 0.4131, "step": 12829 }, { "epoch": 0.7184455146152985, "grad_norm": 1.2016159296035767, "learning_rate": 6.4115e-05, "loss": 0.4172, "step": 12830 }, { "epoch": 0.7185015119274275, "grad_norm": 4.299350261688232, "learning_rate": 6.412e-05, "loss": 0.5124, "step": 12831 }, { "epoch": 0.7185575092395565, "grad_norm": 1.3119045495986938, "learning_rate": 6.4125e-05, "loss": 0.5367, "step": 12832 }, { "epoch": 0.7186135065516855, "grad_norm": 1.2507914304733276, "learning_rate": 6.413e-05, "loss": 0.4462, "step": 12833 }, { "epoch": 0.7186695038638146, "grad_norm": 1.6498754024505615, "learning_rate": 6.4135e-05, "loss": 0.4533, "step": 12834 }, { "epoch": 0.7187255011759436, "grad_norm": 1.2412898540496826, "learning_rate": 6.414e-05, "loss": 0.4036, "step": 12835 }, { "epoch": 0.7187814984880726, "grad_norm": 1.275437593460083, "learning_rate": 6.4145e-05, "loss": 0.5085, "step": 12836 }, { "epoch": 0.7188374958002016, "grad_norm": 1.2589842081069946, "learning_rate": 6.415e-05, "loss": 0.4849, "step": 12837 }, { "epoch": 0.7188934931123306, "grad_norm": 1.271705985069275, "learning_rate": 6.4155e-05, "loss": 0.5544, "step": 12838 }, { "epoch": 0.7189494904244597, "grad_norm": 1.4079164266586304, "learning_rate": 6.416e-05, "loss": 0.5601, "step": 12839 }, { "epoch": 0.7190054877365887, "grad_norm": 1.3210337162017822, "learning_rate": 6.4165e-05, "loss": 0.5112, "step": 12840 }, { "epoch": 0.7190614850487177, "grad_norm": 1.2597122192382812, "learning_rate": 6.417e-05, "loss": 0.4337, "step": 12841 }, { "epoch": 0.7191174823608467, "grad_norm": 1.3668289184570312, "learning_rate": 6.4175e-05, "loss": 0.443, "step": 12842 }, { "epoch": 0.7191734796729757, "grad_norm": 1.4604369401931763, "learning_rate": 6.418000000000001e-05, "loss": 0.4761, "step": 12843 }, { "epoch": 0.7192294769851048, "grad_norm": 1.1284401416778564, "learning_rate": 6.418500000000001e-05, "loss": 0.3613, "step": 12844 }, { "epoch": 0.7192854742972338, "grad_norm": 1.336661458015442, "learning_rate": 6.419000000000001e-05, "loss": 0.3995, "step": 12845 }, { "epoch": 0.7193414716093628, "grad_norm": 1.4011123180389404, "learning_rate": 6.4195e-05, "loss": 0.4359, "step": 12846 }, { "epoch": 0.7193974689214918, "grad_norm": 1.2431459426879883, "learning_rate": 6.42e-05, "loss": 0.4012, "step": 12847 }, { "epoch": 0.7194534662336208, "grad_norm": 1.1655406951904297, "learning_rate": 6.4205e-05, "loss": 0.4497, "step": 12848 }, { "epoch": 0.7195094635457498, "grad_norm": 1.292120099067688, "learning_rate": 6.421e-05, "loss": 0.5142, "step": 12849 }, { "epoch": 0.7195654608578789, "grad_norm": 1.2695152759552002, "learning_rate": 6.421500000000001e-05, "loss": 0.4447, "step": 12850 }, { "epoch": 0.7196214581700079, "grad_norm": 1.2664965391159058, "learning_rate": 6.422e-05, "loss": 0.4946, "step": 12851 }, { "epoch": 0.7196774554821369, "grad_norm": 1.2135577201843262, "learning_rate": 6.4225e-05, "loss": 0.3936, "step": 12852 }, { "epoch": 0.7197334527942659, "grad_norm": 1.182833194732666, "learning_rate": 6.423e-05, "loss": 0.3167, "step": 12853 }, { "epoch": 0.719789450106395, "grad_norm": 1.412680983543396, "learning_rate": 6.4235e-05, "loss": 0.4906, "step": 12854 }, { "epoch": 0.719845447418524, "grad_norm": 1.304890751838684, "learning_rate": 6.424e-05, "loss": 0.3311, "step": 12855 }, { "epoch": 0.719901444730653, "grad_norm": 1.1928796768188477, "learning_rate": 6.4245e-05, "loss": 0.4449, "step": 12856 }, { "epoch": 0.7199574420427819, "grad_norm": 2.066993236541748, "learning_rate": 6.425e-05, "loss": 0.3949, "step": 12857 }, { "epoch": 0.7200134393549109, "grad_norm": 1.359500527381897, "learning_rate": 6.4255e-05, "loss": 0.4686, "step": 12858 }, { "epoch": 0.7200694366670399, "grad_norm": 1.3475192785263062, "learning_rate": 6.426e-05, "loss": 0.482, "step": 12859 }, { "epoch": 0.720125433979169, "grad_norm": 1.0215314626693726, "learning_rate": 6.426500000000001e-05, "loss": 0.3383, "step": 12860 }, { "epoch": 0.720181431291298, "grad_norm": 1.4091228246688843, "learning_rate": 6.427e-05, "loss": 0.4618, "step": 12861 }, { "epoch": 0.720237428603427, "grad_norm": 1.5251009464263916, "learning_rate": 6.4275e-05, "loss": 0.4077, "step": 12862 }, { "epoch": 0.720293425915556, "grad_norm": 1.6396993398666382, "learning_rate": 6.428e-05, "loss": 0.5588, "step": 12863 }, { "epoch": 0.720349423227685, "grad_norm": 3.3280935287475586, "learning_rate": 6.428500000000001e-05, "loss": 0.5493, "step": 12864 }, { "epoch": 0.720405420539814, "grad_norm": 1.4691766500473022, "learning_rate": 6.429000000000001e-05, "loss": 0.5274, "step": 12865 }, { "epoch": 0.7204614178519431, "grad_norm": 1.2549264430999756, "learning_rate": 6.429500000000001e-05, "loss": 0.5649, "step": 12866 }, { "epoch": 0.7205174151640721, "grad_norm": 1.3790887594223022, "learning_rate": 6.43e-05, "loss": 0.4306, "step": 12867 }, { "epoch": 0.7205734124762011, "grad_norm": 1.192781925201416, "learning_rate": 6.4305e-05, "loss": 0.44, "step": 12868 }, { "epoch": 0.7206294097883301, "grad_norm": 1.3899825811386108, "learning_rate": 6.431e-05, "loss": 0.4697, "step": 12869 }, { "epoch": 0.7206854071004591, "grad_norm": 1.4914478063583374, "learning_rate": 6.4315e-05, "loss": 0.3433, "step": 12870 }, { "epoch": 0.7207414044125882, "grad_norm": 1.2485928535461426, "learning_rate": 6.432000000000001e-05, "loss": 0.3903, "step": 12871 }, { "epoch": 0.7207974017247172, "grad_norm": 1.469435691833496, "learning_rate": 6.4325e-05, "loss": 0.4088, "step": 12872 }, { "epoch": 0.7208533990368462, "grad_norm": 1.2373088598251343, "learning_rate": 6.433e-05, "loss": 0.3797, "step": 12873 }, { "epoch": 0.7209093963489752, "grad_norm": 1.1302622556686401, "learning_rate": 6.4335e-05, "loss": 0.4827, "step": 12874 }, { "epoch": 0.7209653936611042, "grad_norm": 1.3318990468978882, "learning_rate": 6.434e-05, "loss": 0.3497, "step": 12875 }, { "epoch": 0.7210213909732333, "grad_norm": 1.1196736097335815, "learning_rate": 6.4345e-05, "loss": 0.4114, "step": 12876 }, { "epoch": 0.7210773882853623, "grad_norm": 1.4482390880584717, "learning_rate": 6.435e-05, "loss": 0.4194, "step": 12877 }, { "epoch": 0.7211333855974913, "grad_norm": 1.2796387672424316, "learning_rate": 6.4355e-05, "loss": 0.4891, "step": 12878 }, { "epoch": 0.7211893829096203, "grad_norm": 1.1463838815689087, "learning_rate": 6.436e-05, "loss": 0.4692, "step": 12879 }, { "epoch": 0.7212453802217493, "grad_norm": 6.655930995941162, "learning_rate": 6.436500000000001e-05, "loss": 0.4129, "step": 12880 }, { "epoch": 0.7213013775338784, "grad_norm": 3.030787706375122, "learning_rate": 6.437000000000001e-05, "loss": 0.3954, "step": 12881 }, { "epoch": 0.7213573748460074, "grad_norm": 1.4530386924743652, "learning_rate": 6.4375e-05, "loss": 0.612, "step": 12882 }, { "epoch": 0.7214133721581364, "grad_norm": 1.3220127820968628, "learning_rate": 6.438e-05, "loss": 0.4964, "step": 12883 }, { "epoch": 0.7214693694702654, "grad_norm": 1.7192901372909546, "learning_rate": 6.4385e-05, "loss": 0.4847, "step": 12884 }, { "epoch": 0.7215253667823944, "grad_norm": 1.3402737379074097, "learning_rate": 6.439000000000001e-05, "loss": 0.4283, "step": 12885 }, { "epoch": 0.7215813640945234, "grad_norm": 1.1397950649261475, "learning_rate": 6.439500000000001e-05, "loss": 0.3931, "step": 12886 }, { "epoch": 0.7216373614066525, "grad_norm": 1.5136781930923462, "learning_rate": 6.440000000000001e-05, "loss": 0.581, "step": 12887 }, { "epoch": 0.7216933587187815, "grad_norm": 1.2671502828598022, "learning_rate": 6.4405e-05, "loss": 0.537, "step": 12888 }, { "epoch": 0.7217493560309105, "grad_norm": 1.1760938167572021, "learning_rate": 6.441e-05, "loss": 0.4935, "step": 12889 }, { "epoch": 0.7218053533430395, "grad_norm": 1.2217832803726196, "learning_rate": 6.4415e-05, "loss": 0.3571, "step": 12890 }, { "epoch": 0.7218613506551685, "grad_norm": 1.1983466148376465, "learning_rate": 6.442e-05, "loss": 0.3651, "step": 12891 }, { "epoch": 0.7219173479672976, "grad_norm": 1.4146442413330078, "learning_rate": 6.442500000000001e-05, "loss": 0.4483, "step": 12892 }, { "epoch": 0.7219733452794266, "grad_norm": 1.398146152496338, "learning_rate": 6.443e-05, "loss": 0.6141, "step": 12893 }, { "epoch": 0.7220293425915556, "grad_norm": 1.4189565181732178, "learning_rate": 6.4435e-05, "loss": 0.3798, "step": 12894 }, { "epoch": 0.7220853399036846, "grad_norm": 1.2826626300811768, "learning_rate": 6.444e-05, "loss": 0.5178, "step": 12895 }, { "epoch": 0.7221413372158136, "grad_norm": 1.3823177814483643, "learning_rate": 6.4445e-05, "loss": 0.4631, "step": 12896 }, { "epoch": 0.7221973345279427, "grad_norm": 1.345367670059204, "learning_rate": 6.445e-05, "loss": 0.437, "step": 12897 }, { "epoch": 0.7222533318400717, "grad_norm": 1.2119885683059692, "learning_rate": 6.4455e-05, "loss": 0.4164, "step": 12898 }, { "epoch": 0.7223093291522007, "grad_norm": 1.1886770725250244, "learning_rate": 6.446e-05, "loss": 0.355, "step": 12899 }, { "epoch": 0.7223653264643297, "grad_norm": 1.188043475151062, "learning_rate": 6.4465e-05, "loss": 0.4286, "step": 12900 }, { "epoch": 0.7224213237764587, "grad_norm": 1.4560166597366333, "learning_rate": 6.447000000000001e-05, "loss": 0.4311, "step": 12901 }, { "epoch": 0.7224773210885878, "grad_norm": 1.4028239250183105, "learning_rate": 6.447500000000001e-05, "loss": 0.4554, "step": 12902 }, { "epoch": 0.7225333184007168, "grad_norm": 1.1268011331558228, "learning_rate": 6.448e-05, "loss": 0.4178, "step": 12903 }, { "epoch": 0.7225893157128458, "grad_norm": 1.1890791654586792, "learning_rate": 6.4485e-05, "loss": 0.3923, "step": 12904 }, { "epoch": 0.7226453130249748, "grad_norm": 1.1738241910934448, "learning_rate": 6.449e-05, "loss": 0.3756, "step": 12905 }, { "epoch": 0.7227013103371038, "grad_norm": 1.6105811595916748, "learning_rate": 6.449500000000001e-05, "loss": 0.3918, "step": 12906 }, { "epoch": 0.7227573076492328, "grad_norm": 1.286133050918579, "learning_rate": 6.450000000000001e-05, "loss": 0.3927, "step": 12907 }, { "epoch": 0.7228133049613619, "grad_norm": 1.4088801145553589, "learning_rate": 6.4505e-05, "loss": 0.6302, "step": 12908 }, { "epoch": 0.7228693022734909, "grad_norm": 1.468022108078003, "learning_rate": 6.451e-05, "loss": 0.4253, "step": 12909 }, { "epoch": 0.7229252995856199, "grad_norm": 1.3817226886749268, "learning_rate": 6.4515e-05, "loss": 0.5141, "step": 12910 }, { "epoch": 0.7229812968977489, "grad_norm": 1.8205647468566895, "learning_rate": 6.452e-05, "loss": 0.4257, "step": 12911 }, { "epoch": 0.723037294209878, "grad_norm": 1.2383532524108887, "learning_rate": 6.4525e-05, "loss": 0.3925, "step": 12912 }, { "epoch": 0.723093291522007, "grad_norm": 1.3751835823059082, "learning_rate": 6.453000000000001e-05, "loss": 0.5789, "step": 12913 }, { "epoch": 0.723149288834136, "grad_norm": 1.4135364294052124, "learning_rate": 6.4535e-05, "loss": 0.402, "step": 12914 }, { "epoch": 0.723205286146265, "grad_norm": 1.4469807147979736, "learning_rate": 6.454e-05, "loss": 0.4442, "step": 12915 }, { "epoch": 0.723261283458394, "grad_norm": 1.4926486015319824, "learning_rate": 6.4545e-05, "loss": 0.5421, "step": 12916 }, { "epoch": 0.723317280770523, "grad_norm": 4.189614772796631, "learning_rate": 6.455e-05, "loss": 0.4641, "step": 12917 }, { "epoch": 0.7233732780826521, "grad_norm": 1.3739595413208008, "learning_rate": 6.4555e-05, "loss": 0.3941, "step": 12918 }, { "epoch": 0.7234292753947811, "grad_norm": 1.3635294437408447, "learning_rate": 6.455999999999999e-05, "loss": 0.5162, "step": 12919 }, { "epoch": 0.7234852727069101, "grad_norm": 1.2362215518951416, "learning_rate": 6.4565e-05, "loss": 0.3528, "step": 12920 }, { "epoch": 0.7235412700190391, "grad_norm": 1.1968994140625, "learning_rate": 6.457000000000001e-05, "loss": 0.4326, "step": 12921 }, { "epoch": 0.7235972673311681, "grad_norm": 1.122292160987854, "learning_rate": 6.457500000000001e-05, "loss": 0.401, "step": 12922 }, { "epoch": 0.7236532646432972, "grad_norm": 1.3467565774917603, "learning_rate": 6.458000000000001e-05, "loss": 0.4402, "step": 12923 }, { "epoch": 0.7237092619554262, "grad_norm": 1.0921175479888916, "learning_rate": 6.4585e-05, "loss": 0.392, "step": 12924 }, { "epoch": 0.7237652592675552, "grad_norm": 1.3415946960449219, "learning_rate": 6.459e-05, "loss": 0.3967, "step": 12925 }, { "epoch": 0.7238212565796842, "grad_norm": 2.375540018081665, "learning_rate": 6.4595e-05, "loss": 0.4013, "step": 12926 }, { "epoch": 0.7238772538918132, "grad_norm": 1.3974196910858154, "learning_rate": 6.460000000000001e-05, "loss": 0.5083, "step": 12927 }, { "epoch": 0.7239332512039423, "grad_norm": 1.1457161903381348, "learning_rate": 6.460500000000001e-05, "loss": 0.351, "step": 12928 }, { "epoch": 0.7239892485160713, "grad_norm": 1.063173532485962, "learning_rate": 6.461e-05, "loss": 0.3628, "step": 12929 }, { "epoch": 0.7240452458282003, "grad_norm": 1.3780083656311035, "learning_rate": 6.4615e-05, "loss": 0.4839, "step": 12930 }, { "epoch": 0.7241012431403293, "grad_norm": 1.3097578287124634, "learning_rate": 6.462e-05, "loss": 0.4928, "step": 12931 }, { "epoch": 0.7241572404524583, "grad_norm": 1.202901840209961, "learning_rate": 6.4625e-05, "loss": 0.4396, "step": 12932 }, { "epoch": 0.7242132377645873, "grad_norm": 1.4199342727661133, "learning_rate": 6.463e-05, "loss": 0.4244, "step": 12933 }, { "epoch": 0.7242692350767164, "grad_norm": 1.2170206308364868, "learning_rate": 6.463500000000001e-05, "loss": 0.421, "step": 12934 }, { "epoch": 0.7243252323888454, "grad_norm": 1.0460795164108276, "learning_rate": 6.464e-05, "loss": 0.3263, "step": 12935 }, { "epoch": 0.7243812297009744, "grad_norm": 1.2562450170516968, "learning_rate": 6.4645e-05, "loss": 0.3932, "step": 12936 }, { "epoch": 0.7244372270131034, "grad_norm": 1.3742763996124268, "learning_rate": 6.465e-05, "loss": 0.444, "step": 12937 }, { "epoch": 0.7244932243252324, "grad_norm": 1.135968804359436, "learning_rate": 6.4655e-05, "loss": 0.3864, "step": 12938 }, { "epoch": 0.7245492216373614, "grad_norm": 1.221862554550171, "learning_rate": 6.466e-05, "loss": 0.3532, "step": 12939 }, { "epoch": 0.7246052189494904, "grad_norm": 1.5764732360839844, "learning_rate": 6.466499999999999e-05, "loss": 0.4656, "step": 12940 }, { "epoch": 0.7246612162616194, "grad_norm": 1.252284288406372, "learning_rate": 6.467e-05, "loss": 0.4488, "step": 12941 }, { "epoch": 0.7247172135737484, "grad_norm": 1.1536979675292969, "learning_rate": 6.467500000000001e-05, "loss": 0.4659, "step": 12942 }, { "epoch": 0.7247732108858774, "grad_norm": 1.3067823648452759, "learning_rate": 6.468000000000001e-05, "loss": 0.514, "step": 12943 }, { "epoch": 0.7248292081980064, "grad_norm": 1.3132990598678589, "learning_rate": 6.468500000000001e-05, "loss": 0.4565, "step": 12944 }, { "epoch": 0.7248852055101355, "grad_norm": 1.3057440519332886, "learning_rate": 6.469e-05, "loss": 0.5572, "step": 12945 }, { "epoch": 0.7249412028222645, "grad_norm": 1.1999316215515137, "learning_rate": 6.4695e-05, "loss": 0.375, "step": 12946 }, { "epoch": 0.7249972001343935, "grad_norm": 1.336259365081787, "learning_rate": 6.47e-05, "loss": 0.5179, "step": 12947 }, { "epoch": 0.7250531974465225, "grad_norm": 1.6089376211166382, "learning_rate": 6.4705e-05, "loss": 0.4512, "step": 12948 }, { "epoch": 0.7251091947586515, "grad_norm": 1.5541924238204956, "learning_rate": 6.471000000000001e-05, "loss": 0.5098, "step": 12949 }, { "epoch": 0.7251651920707806, "grad_norm": 1.271735429763794, "learning_rate": 6.4715e-05, "loss": 0.5747, "step": 12950 }, { "epoch": 0.7252211893829096, "grad_norm": 1.406443476676941, "learning_rate": 6.472e-05, "loss": 0.3573, "step": 12951 }, { "epoch": 0.7252771866950386, "grad_norm": 1.480695366859436, "learning_rate": 6.4725e-05, "loss": 0.5508, "step": 12952 }, { "epoch": 0.7253331840071676, "grad_norm": 1.1610902547836304, "learning_rate": 6.473e-05, "loss": 0.4652, "step": 12953 }, { "epoch": 0.7253891813192966, "grad_norm": 1.2197450399398804, "learning_rate": 6.4735e-05, "loss": 0.3855, "step": 12954 }, { "epoch": 0.7254451786314257, "grad_norm": 1.2905586957931519, "learning_rate": 6.474000000000001e-05, "loss": 0.5056, "step": 12955 }, { "epoch": 0.7255011759435547, "grad_norm": 1.1793146133422852, "learning_rate": 6.4745e-05, "loss": 0.3365, "step": 12956 }, { "epoch": 0.7255571732556837, "grad_norm": 1.4936169385910034, "learning_rate": 6.475e-05, "loss": 0.4332, "step": 12957 }, { "epoch": 0.7256131705678127, "grad_norm": 1.0383176803588867, "learning_rate": 6.4755e-05, "loss": 0.2899, "step": 12958 }, { "epoch": 0.7256691678799417, "grad_norm": 1.3028063774108887, "learning_rate": 6.476e-05, "loss": 0.4425, "step": 12959 }, { "epoch": 0.7257251651920708, "grad_norm": 1.3707900047302246, "learning_rate": 6.4765e-05, "loss": 0.4871, "step": 12960 }, { "epoch": 0.7257811625041998, "grad_norm": 1.2507411241531372, "learning_rate": 6.477e-05, "loss": 0.4744, "step": 12961 }, { "epoch": 0.7258371598163288, "grad_norm": 1.4732862710952759, "learning_rate": 6.4775e-05, "loss": 0.5197, "step": 12962 }, { "epoch": 0.7258931571284578, "grad_norm": 1.2263737916946411, "learning_rate": 6.478000000000001e-05, "loss": 0.4142, "step": 12963 }, { "epoch": 0.7259491544405868, "grad_norm": 1.123755693435669, "learning_rate": 6.478500000000001e-05, "loss": 0.4705, "step": 12964 }, { "epoch": 0.7260051517527158, "grad_norm": 1.5642553567886353, "learning_rate": 6.479000000000001e-05, "loss": 0.3635, "step": 12965 }, { "epoch": 0.7260611490648449, "grad_norm": 3.230133295059204, "learning_rate": 6.4795e-05, "loss": 0.5285, "step": 12966 }, { "epoch": 0.7261171463769739, "grad_norm": 1.2619363069534302, "learning_rate": 6.48e-05, "loss": 0.4288, "step": 12967 }, { "epoch": 0.7261731436891029, "grad_norm": 1.3217151165008545, "learning_rate": 6.4805e-05, "loss": 0.3963, "step": 12968 }, { "epoch": 0.7262291410012319, "grad_norm": 1.244097113609314, "learning_rate": 6.481e-05, "loss": 0.4191, "step": 12969 }, { "epoch": 0.726285138313361, "grad_norm": 1.190354824066162, "learning_rate": 6.481500000000001e-05, "loss": 0.4705, "step": 12970 }, { "epoch": 0.72634113562549, "grad_norm": 1.2711552381515503, "learning_rate": 6.482e-05, "loss": 0.3871, "step": 12971 }, { "epoch": 0.726397132937619, "grad_norm": 1.2860972881317139, "learning_rate": 6.4825e-05, "loss": 0.5483, "step": 12972 }, { "epoch": 0.726453130249748, "grad_norm": 1.2053567171096802, "learning_rate": 6.483e-05, "loss": 0.3243, "step": 12973 }, { "epoch": 0.726509127561877, "grad_norm": 1.2540823221206665, "learning_rate": 6.4835e-05, "loss": 0.4828, "step": 12974 }, { "epoch": 0.726565124874006, "grad_norm": 1.240599274635315, "learning_rate": 6.484e-05, "loss": 0.466, "step": 12975 }, { "epoch": 0.7266211221861351, "grad_norm": 1.5479391813278198, "learning_rate": 6.484500000000001e-05, "loss": 0.5901, "step": 12976 }, { "epoch": 0.7266771194982641, "grad_norm": 1.4428088665008545, "learning_rate": 6.485e-05, "loss": 0.4771, "step": 12977 }, { "epoch": 0.7267331168103931, "grad_norm": 1.6157454252243042, "learning_rate": 6.4855e-05, "loss": 0.5049, "step": 12978 }, { "epoch": 0.7267891141225221, "grad_norm": 1.34217369556427, "learning_rate": 6.486e-05, "loss": 0.4706, "step": 12979 }, { "epoch": 0.7268451114346511, "grad_norm": 1.2550715208053589, "learning_rate": 6.4865e-05, "loss": 0.3991, "step": 12980 }, { "epoch": 0.7269011087467802, "grad_norm": 1.307175874710083, "learning_rate": 6.487000000000001e-05, "loss": 0.5009, "step": 12981 }, { "epoch": 0.7269571060589092, "grad_norm": 1.1628906726837158, "learning_rate": 6.4875e-05, "loss": 0.3632, "step": 12982 }, { "epoch": 0.7270131033710382, "grad_norm": 1.2792617082595825, "learning_rate": 6.488e-05, "loss": 0.4168, "step": 12983 }, { "epoch": 0.7270691006831672, "grad_norm": 1.4946825504302979, "learning_rate": 6.488500000000001e-05, "loss": 0.486, "step": 12984 }, { "epoch": 0.7271250979952962, "grad_norm": 1.4570118188858032, "learning_rate": 6.489000000000001e-05, "loss": 0.5857, "step": 12985 }, { "epoch": 0.7271810953074253, "grad_norm": 1.5580264329910278, "learning_rate": 6.489500000000001e-05, "loss": 0.353, "step": 12986 }, { "epoch": 0.7272370926195543, "grad_norm": 1.3818445205688477, "learning_rate": 6.49e-05, "loss": 0.3925, "step": 12987 }, { "epoch": 0.7272930899316833, "grad_norm": 1.2081820964813232, "learning_rate": 6.4905e-05, "loss": 0.4304, "step": 12988 }, { "epoch": 0.7273490872438123, "grad_norm": 1.2454428672790527, "learning_rate": 6.491e-05, "loss": 0.477, "step": 12989 }, { "epoch": 0.7274050845559413, "grad_norm": 1.2278811931610107, "learning_rate": 6.4915e-05, "loss": 0.3489, "step": 12990 }, { "epoch": 0.7274610818680703, "grad_norm": 1.3734474182128906, "learning_rate": 6.492000000000001e-05, "loss": 0.448, "step": 12991 }, { "epoch": 0.7275170791801994, "grad_norm": 1.1877390146255493, "learning_rate": 6.4925e-05, "loss": 0.3798, "step": 12992 }, { "epoch": 0.7275730764923284, "grad_norm": 1.2094286680221558, "learning_rate": 6.493e-05, "loss": 0.4001, "step": 12993 }, { "epoch": 0.7276290738044574, "grad_norm": 1.2303513288497925, "learning_rate": 6.4935e-05, "loss": 0.3729, "step": 12994 }, { "epoch": 0.7276850711165864, "grad_norm": 1.6889305114746094, "learning_rate": 6.494e-05, "loss": 0.4747, "step": 12995 }, { "epoch": 0.7277410684287154, "grad_norm": 1.3299304246902466, "learning_rate": 6.4945e-05, "loss": 0.5057, "step": 12996 }, { "epoch": 0.7277970657408445, "grad_norm": 1.2950594425201416, "learning_rate": 6.494999999999999e-05, "loss": 0.4396, "step": 12997 }, { "epoch": 0.7278530630529735, "grad_norm": 1.956496238708496, "learning_rate": 6.4955e-05, "loss": 0.5345, "step": 12998 }, { "epoch": 0.7279090603651025, "grad_norm": 1.3873339891433716, "learning_rate": 6.496e-05, "loss": 0.405, "step": 12999 }, { "epoch": 0.7279650576772315, "grad_norm": 1.2748873233795166, "learning_rate": 6.4965e-05, "loss": 0.5211, "step": 13000 }, { "epoch": 0.7280210549893605, "grad_norm": 1.6707043647766113, "learning_rate": 6.497000000000001e-05, "loss": 0.5523, "step": 13001 }, { "epoch": 0.7280770523014896, "grad_norm": 1.3483299016952515, "learning_rate": 6.497500000000001e-05, "loss": 0.4982, "step": 13002 }, { "epoch": 0.7281330496136186, "grad_norm": 1.3101677894592285, "learning_rate": 6.498e-05, "loss": 0.4722, "step": 13003 }, { "epoch": 0.7281890469257476, "grad_norm": 1.118424892425537, "learning_rate": 6.4985e-05, "loss": 0.3566, "step": 13004 }, { "epoch": 0.7282450442378766, "grad_norm": 1.1747689247131348, "learning_rate": 6.499000000000001e-05, "loss": 0.4689, "step": 13005 }, { "epoch": 0.7283010415500056, "grad_norm": 1.495073676109314, "learning_rate": 6.499500000000001e-05, "loss": 0.45, "step": 13006 }, { "epoch": 0.7283570388621347, "grad_norm": 1.3751918077468872, "learning_rate": 6.500000000000001e-05, "loss": 0.4043, "step": 13007 }, { "epoch": 0.7284130361742637, "grad_norm": 1.296090841293335, "learning_rate": 6.5005e-05, "loss": 0.4572, "step": 13008 }, { "epoch": 0.7284690334863927, "grad_norm": 1.2352558374404907, "learning_rate": 6.501e-05, "loss": 0.3579, "step": 13009 }, { "epoch": 0.7285250307985217, "grad_norm": 1.1969058513641357, "learning_rate": 6.5015e-05, "loss": 0.4123, "step": 13010 }, { "epoch": 0.7285810281106507, "grad_norm": 1.451724648475647, "learning_rate": 6.502e-05, "loss": 0.5631, "step": 13011 }, { "epoch": 0.7286370254227797, "grad_norm": 1.25950288772583, "learning_rate": 6.502500000000001e-05, "loss": 0.4782, "step": 13012 }, { "epoch": 0.7286930227349088, "grad_norm": 1.3958215713500977, "learning_rate": 6.503e-05, "loss": 0.5441, "step": 13013 }, { "epoch": 0.7287490200470378, "grad_norm": 1.6094831228256226, "learning_rate": 6.5035e-05, "loss": 0.5775, "step": 13014 }, { "epoch": 0.7288050173591668, "grad_norm": 1.5766825675964355, "learning_rate": 6.504e-05, "loss": 0.5992, "step": 13015 }, { "epoch": 0.7288610146712958, "grad_norm": 1.4682761430740356, "learning_rate": 6.5045e-05, "loss": 0.5773, "step": 13016 }, { "epoch": 0.7289170119834248, "grad_norm": 1.3241996765136719, "learning_rate": 6.505e-05, "loss": 0.4616, "step": 13017 }, { "epoch": 0.7289730092955539, "grad_norm": 1.1775118112564087, "learning_rate": 6.505499999999999e-05, "loss": 0.3069, "step": 13018 }, { "epoch": 0.7290290066076829, "grad_norm": 1.3832868337631226, "learning_rate": 6.506e-05, "loss": 0.5166, "step": 13019 }, { "epoch": 0.7290850039198119, "grad_norm": 1.269210934638977, "learning_rate": 6.5065e-05, "loss": 0.426, "step": 13020 }, { "epoch": 0.7291410012319409, "grad_norm": 1.6523929834365845, "learning_rate": 6.507e-05, "loss": 0.5015, "step": 13021 }, { "epoch": 0.7291969985440698, "grad_norm": 1.3177670240402222, "learning_rate": 6.507500000000001e-05, "loss": 0.4026, "step": 13022 }, { "epoch": 0.7292529958561988, "grad_norm": 1.1591671705245972, "learning_rate": 6.508000000000001e-05, "loss": 0.3467, "step": 13023 }, { "epoch": 0.7293089931683279, "grad_norm": 1.3172328472137451, "learning_rate": 6.5085e-05, "loss": 0.4742, "step": 13024 }, { "epoch": 0.7293649904804569, "grad_norm": 1.0297353267669678, "learning_rate": 6.509e-05, "loss": 0.3457, "step": 13025 }, { "epoch": 0.7294209877925859, "grad_norm": 1.4599758386611938, "learning_rate": 6.5095e-05, "loss": 0.5066, "step": 13026 }, { "epoch": 0.7294769851047149, "grad_norm": 1.2901194095611572, "learning_rate": 6.510000000000001e-05, "loss": 0.4009, "step": 13027 }, { "epoch": 0.729532982416844, "grad_norm": 1.7665644884109497, "learning_rate": 6.510500000000001e-05, "loss": 0.3786, "step": 13028 }, { "epoch": 0.729588979728973, "grad_norm": 1.3884735107421875, "learning_rate": 6.511e-05, "loss": 0.4049, "step": 13029 }, { "epoch": 0.729644977041102, "grad_norm": 1.4180721044540405, "learning_rate": 6.5115e-05, "loss": 0.4338, "step": 13030 }, { "epoch": 0.729700974353231, "grad_norm": 1.2886587381362915, "learning_rate": 6.512e-05, "loss": 0.4717, "step": 13031 }, { "epoch": 0.72975697166536, "grad_norm": 1.4126027822494507, "learning_rate": 6.5125e-05, "loss": 0.5387, "step": 13032 }, { "epoch": 0.729812968977489, "grad_norm": 1.5100982189178467, "learning_rate": 6.513000000000001e-05, "loss": 0.6658, "step": 13033 }, { "epoch": 0.7298689662896181, "grad_norm": 5.311023235321045, "learning_rate": 6.5135e-05, "loss": 0.4693, "step": 13034 }, { "epoch": 0.7299249636017471, "grad_norm": 1.2274329662322998, "learning_rate": 6.514e-05, "loss": 0.4763, "step": 13035 }, { "epoch": 0.7299809609138761, "grad_norm": 1.3793895244598389, "learning_rate": 6.5145e-05, "loss": 0.3569, "step": 13036 }, { "epoch": 0.7300369582260051, "grad_norm": 1.3775551319122314, "learning_rate": 6.515e-05, "loss": 0.6268, "step": 13037 }, { "epoch": 0.7300929555381341, "grad_norm": 1.4914031028747559, "learning_rate": 6.5155e-05, "loss": 0.6025, "step": 13038 }, { "epoch": 0.7301489528502632, "grad_norm": 1.4676573276519775, "learning_rate": 6.515999999999999e-05, "loss": 0.4987, "step": 13039 }, { "epoch": 0.7302049501623922, "grad_norm": 1.2466171979904175, "learning_rate": 6.5165e-05, "loss": 0.3876, "step": 13040 }, { "epoch": 0.7302609474745212, "grad_norm": 1.5200568437576294, "learning_rate": 6.517e-05, "loss": 0.4826, "step": 13041 }, { "epoch": 0.7303169447866502, "grad_norm": 1.2348226308822632, "learning_rate": 6.517500000000001e-05, "loss": 0.3823, "step": 13042 }, { "epoch": 0.7303729420987792, "grad_norm": 1.1773552894592285, "learning_rate": 6.518000000000001e-05, "loss": 0.4015, "step": 13043 }, { "epoch": 0.7304289394109083, "grad_norm": 1.558884620666504, "learning_rate": 6.518500000000001e-05, "loss": 0.4081, "step": 13044 }, { "epoch": 0.7304849367230373, "grad_norm": 1.3640022277832031, "learning_rate": 6.519e-05, "loss": 0.4403, "step": 13045 }, { "epoch": 0.7305409340351663, "grad_norm": 1.130803108215332, "learning_rate": 6.5195e-05, "loss": 0.3317, "step": 13046 }, { "epoch": 0.7305969313472953, "grad_norm": 1.2308452129364014, "learning_rate": 6.52e-05, "loss": 0.4061, "step": 13047 }, { "epoch": 0.7306529286594243, "grad_norm": 1.3676975965499878, "learning_rate": 6.520500000000001e-05, "loss": 0.4609, "step": 13048 }, { "epoch": 0.7307089259715533, "grad_norm": 1.168544054031372, "learning_rate": 6.521000000000001e-05, "loss": 0.374, "step": 13049 }, { "epoch": 0.7307649232836824, "grad_norm": 1.2402279376983643, "learning_rate": 6.5215e-05, "loss": 0.4881, "step": 13050 }, { "epoch": 0.7308209205958114, "grad_norm": 1.1424239873886108, "learning_rate": 6.522e-05, "loss": 0.3579, "step": 13051 }, { "epoch": 0.7308769179079404, "grad_norm": 1.6257461309432983, "learning_rate": 6.5225e-05, "loss": 0.5411, "step": 13052 }, { "epoch": 0.7309329152200694, "grad_norm": 1.138053059577942, "learning_rate": 6.523e-05, "loss": 0.3891, "step": 13053 }, { "epoch": 0.7309889125321984, "grad_norm": 1.118177056312561, "learning_rate": 6.523500000000001e-05, "loss": 0.3239, "step": 13054 }, { "epoch": 0.7310449098443275, "grad_norm": 1.5924105644226074, "learning_rate": 6.524e-05, "loss": 0.5878, "step": 13055 }, { "epoch": 0.7311009071564565, "grad_norm": 1.3991609811782837, "learning_rate": 6.5245e-05, "loss": 0.3695, "step": 13056 }, { "epoch": 0.7311569044685855, "grad_norm": 1.188379168510437, "learning_rate": 6.525e-05, "loss": 0.3401, "step": 13057 }, { "epoch": 0.7312129017807145, "grad_norm": 1.364189863204956, "learning_rate": 6.5255e-05, "loss": 0.351, "step": 13058 }, { "epoch": 0.7312688990928435, "grad_norm": 1.245988368988037, "learning_rate": 6.526e-05, "loss": 0.4417, "step": 13059 }, { "epoch": 0.7313248964049726, "grad_norm": 1.2236649990081787, "learning_rate": 6.526499999999999e-05, "loss": 0.3758, "step": 13060 }, { "epoch": 0.7313808937171016, "grad_norm": 1.0870695114135742, "learning_rate": 6.527e-05, "loss": 0.2935, "step": 13061 }, { "epoch": 0.7314368910292306, "grad_norm": 1.4593501091003418, "learning_rate": 6.527500000000001e-05, "loss": 0.4677, "step": 13062 }, { "epoch": 0.7314928883413596, "grad_norm": 1.3815888166427612, "learning_rate": 6.528000000000001e-05, "loss": 0.3849, "step": 13063 }, { "epoch": 0.7315488856534886, "grad_norm": 1.5010366439819336, "learning_rate": 6.528500000000001e-05, "loss": 0.416, "step": 13064 }, { "epoch": 0.7316048829656177, "grad_norm": 1.7675964832305908, "learning_rate": 6.529e-05, "loss": 0.3649, "step": 13065 }, { "epoch": 0.7316608802777467, "grad_norm": 1.5239472389221191, "learning_rate": 6.5295e-05, "loss": 0.4546, "step": 13066 }, { "epoch": 0.7317168775898757, "grad_norm": 1.4678200483322144, "learning_rate": 6.53e-05, "loss": 0.4545, "step": 13067 }, { "epoch": 0.7317728749020047, "grad_norm": 1.3831298351287842, "learning_rate": 6.5305e-05, "loss": 0.4052, "step": 13068 }, { "epoch": 0.7318288722141337, "grad_norm": 1.2077752351760864, "learning_rate": 6.531000000000001e-05, "loss": 0.4256, "step": 13069 }, { "epoch": 0.7318848695262627, "grad_norm": 1.3360260725021362, "learning_rate": 6.531500000000001e-05, "loss": 0.4347, "step": 13070 }, { "epoch": 0.7319408668383918, "grad_norm": 1.3099371194839478, "learning_rate": 6.532e-05, "loss": 0.4831, "step": 13071 }, { "epoch": 0.7319968641505208, "grad_norm": 1.2227438688278198, "learning_rate": 6.5325e-05, "loss": 0.4765, "step": 13072 }, { "epoch": 0.7320528614626498, "grad_norm": 1.2485935688018799, "learning_rate": 6.533e-05, "loss": 0.3406, "step": 13073 }, { "epoch": 0.7321088587747788, "grad_norm": 1.3343572616577148, "learning_rate": 6.5335e-05, "loss": 0.4625, "step": 13074 }, { "epoch": 0.7321648560869078, "grad_norm": 1.1251615285873413, "learning_rate": 6.534e-05, "loss": 0.3521, "step": 13075 }, { "epoch": 0.7322208533990369, "grad_norm": 1.5101428031921387, "learning_rate": 6.5345e-05, "loss": 0.5716, "step": 13076 }, { "epoch": 0.7322768507111659, "grad_norm": 1.3580188751220703, "learning_rate": 6.535e-05, "loss": 0.4463, "step": 13077 }, { "epoch": 0.7323328480232949, "grad_norm": 1.3270171880722046, "learning_rate": 6.5355e-05, "loss": 0.5816, "step": 13078 }, { "epoch": 0.7323888453354239, "grad_norm": 1.1969938278198242, "learning_rate": 6.536e-05, "loss": 0.354, "step": 13079 }, { "epoch": 0.7324448426475529, "grad_norm": 1.433491587638855, "learning_rate": 6.5365e-05, "loss": 0.3844, "step": 13080 }, { "epoch": 0.732500839959682, "grad_norm": 1.302200198173523, "learning_rate": 6.536999999999999e-05, "loss": 0.4395, "step": 13081 }, { "epoch": 0.732556837271811, "grad_norm": 1.4608463048934937, "learning_rate": 6.5375e-05, "loss": 0.4483, "step": 13082 }, { "epoch": 0.73261283458394, "grad_norm": 1.3989903926849365, "learning_rate": 6.538000000000001e-05, "loss": 0.4813, "step": 13083 }, { "epoch": 0.732668831896069, "grad_norm": 1.2609761953353882, "learning_rate": 6.538500000000001e-05, "loss": 0.39, "step": 13084 }, { "epoch": 0.732724829208198, "grad_norm": 1.1653172969818115, "learning_rate": 6.539000000000001e-05, "loss": 0.4204, "step": 13085 }, { "epoch": 0.732780826520327, "grad_norm": 1.3216698169708252, "learning_rate": 6.5395e-05, "loss": 0.4472, "step": 13086 }, { "epoch": 0.7328368238324561, "grad_norm": 1.3972324132919312, "learning_rate": 6.54e-05, "loss": 0.5031, "step": 13087 }, { "epoch": 0.7328928211445851, "grad_norm": 1.205294132232666, "learning_rate": 6.5405e-05, "loss": 0.4253, "step": 13088 }, { "epoch": 0.7329488184567141, "grad_norm": 1.3671629428863525, "learning_rate": 6.541e-05, "loss": 0.4748, "step": 13089 }, { "epoch": 0.7330048157688431, "grad_norm": 1.2196563482284546, "learning_rate": 6.541500000000001e-05, "loss": 0.4939, "step": 13090 }, { "epoch": 0.7330608130809722, "grad_norm": 1.2559672594070435, "learning_rate": 6.542000000000001e-05, "loss": 0.3385, "step": 13091 }, { "epoch": 0.7331168103931012, "grad_norm": 1.2398109436035156, "learning_rate": 6.5425e-05, "loss": 0.4241, "step": 13092 }, { "epoch": 0.7331728077052302, "grad_norm": 1.8966413736343384, "learning_rate": 6.543e-05, "loss": 0.5796, "step": 13093 }, { "epoch": 0.7332288050173592, "grad_norm": 1.4565420150756836, "learning_rate": 6.5435e-05, "loss": 0.4427, "step": 13094 }, { "epoch": 0.7332848023294882, "grad_norm": 1.4604127407073975, "learning_rate": 6.544e-05, "loss": 0.5573, "step": 13095 }, { "epoch": 0.7333407996416172, "grad_norm": 1.456725001335144, "learning_rate": 6.5445e-05, "loss": 0.459, "step": 13096 }, { "epoch": 0.7333967969537463, "grad_norm": 1.8771783113479614, "learning_rate": 6.545e-05, "loss": 0.4563, "step": 13097 }, { "epoch": 0.7334527942658753, "grad_norm": 1.2887972593307495, "learning_rate": 6.5455e-05, "loss": 0.5581, "step": 13098 }, { "epoch": 0.7335087915780043, "grad_norm": 1.593860387802124, "learning_rate": 6.546e-05, "loss": 0.4394, "step": 13099 }, { "epoch": 0.7335647888901333, "grad_norm": 1.411238431930542, "learning_rate": 6.5465e-05, "loss": 0.4793, "step": 13100 }, { "epoch": 0.7336207862022623, "grad_norm": 1.213128924369812, "learning_rate": 6.547e-05, "loss": 0.4316, "step": 13101 }, { "epoch": 0.7336767835143914, "grad_norm": 1.4428856372833252, "learning_rate": 6.5475e-05, "loss": 0.4737, "step": 13102 }, { "epoch": 0.7337327808265204, "grad_norm": 1.2088133096694946, "learning_rate": 6.548e-05, "loss": 0.4506, "step": 13103 }, { "epoch": 0.7337887781386494, "grad_norm": 1.3256062269210815, "learning_rate": 6.548500000000001e-05, "loss": 0.5224, "step": 13104 }, { "epoch": 0.7338447754507783, "grad_norm": 1.1555911302566528, "learning_rate": 6.549000000000001e-05, "loss": 0.3475, "step": 13105 }, { "epoch": 0.7339007727629073, "grad_norm": 1.248432993888855, "learning_rate": 6.549500000000001e-05, "loss": 0.3861, "step": 13106 }, { "epoch": 0.7339567700750363, "grad_norm": 1.095834732055664, "learning_rate": 6.55e-05, "loss": 0.3302, "step": 13107 }, { "epoch": 0.7340127673871654, "grad_norm": 0.9473075866699219, "learning_rate": 6.5505e-05, "loss": 0.2724, "step": 13108 }, { "epoch": 0.7340687646992944, "grad_norm": Infinity, "learning_rate": 6.5505e-05, "loss": 0.4543, "step": 13109 }, { "epoch": 0.7341247620114234, "grad_norm": 1.1974743604660034, "learning_rate": 6.551e-05, "loss": 0.3362, "step": 13110 }, { "epoch": 0.7341807593235524, "grad_norm": 1.695594072341919, "learning_rate": 6.5515e-05, "loss": 0.4449, "step": 13111 }, { "epoch": 0.7342367566356814, "grad_norm": 1.3305492401123047, "learning_rate": 6.552000000000001e-05, "loss": 0.3226, "step": 13112 }, { "epoch": 0.7342927539478105, "grad_norm": 1.1246436834335327, "learning_rate": 6.552500000000001e-05, "loss": 0.3647, "step": 13113 }, { "epoch": 0.7343487512599395, "grad_norm": 1.2378196716308594, "learning_rate": 6.553e-05, "loss": 0.4981, "step": 13114 }, { "epoch": 0.7344047485720685, "grad_norm": 2.289897918701172, "learning_rate": 6.5535e-05, "loss": 0.5156, "step": 13115 }, { "epoch": 0.7344607458841975, "grad_norm": 1.1310378313064575, "learning_rate": 6.554e-05, "loss": 0.4595, "step": 13116 }, { "epoch": 0.7345167431963265, "grad_norm": 1.167104959487915, "learning_rate": 6.5545e-05, "loss": 0.3827, "step": 13117 }, { "epoch": 0.7345727405084556, "grad_norm": 1.4493050575256348, "learning_rate": 6.555e-05, "loss": 0.4118, "step": 13118 }, { "epoch": 0.7346287378205846, "grad_norm": 1.5088547468185425, "learning_rate": 6.5555e-05, "loss": 0.4831, "step": 13119 }, { "epoch": 0.7346847351327136, "grad_norm": 1.1988805532455444, "learning_rate": 6.556e-05, "loss": 0.419, "step": 13120 }, { "epoch": 0.7347407324448426, "grad_norm": 1.561507225036621, "learning_rate": 6.5565e-05, "loss": 0.5169, "step": 13121 }, { "epoch": 0.7347967297569716, "grad_norm": 1.3324174880981445, "learning_rate": 6.557e-05, "loss": 0.4567, "step": 13122 }, { "epoch": 0.7348527270691007, "grad_norm": 1.2111668586730957, "learning_rate": 6.557500000000001e-05, "loss": 0.4117, "step": 13123 }, { "epoch": 0.7349087243812297, "grad_norm": 1.8720570802688599, "learning_rate": 6.558e-05, "loss": 0.5808, "step": 13124 }, { "epoch": 0.7349647216933587, "grad_norm": 1.7627100944519043, "learning_rate": 6.5585e-05, "loss": 0.4777, "step": 13125 }, { "epoch": 0.7350207190054877, "grad_norm": 1.185268759727478, "learning_rate": 6.559e-05, "loss": 0.3688, "step": 13126 }, { "epoch": 0.7350767163176167, "grad_norm": 1.5869851112365723, "learning_rate": 6.559500000000001e-05, "loss": 0.5782, "step": 13127 }, { "epoch": 0.7351327136297457, "grad_norm": 1.3997811079025269, "learning_rate": 6.560000000000001e-05, "loss": 0.3659, "step": 13128 }, { "epoch": 0.7351887109418748, "grad_norm": 1.2208737134933472, "learning_rate": 6.5605e-05, "loss": 0.4605, "step": 13129 }, { "epoch": 0.7352447082540038, "grad_norm": 1.28058922290802, "learning_rate": 6.561e-05, "loss": 0.537, "step": 13130 }, { "epoch": 0.7353007055661328, "grad_norm": 1.0700160264968872, "learning_rate": 6.5615e-05, "loss": 0.4376, "step": 13131 }, { "epoch": 0.7353567028782618, "grad_norm": 1.3895633220672607, "learning_rate": 6.562e-05, "loss": 0.4032, "step": 13132 }, { "epoch": 0.7354127001903908, "grad_norm": 1.2850769758224487, "learning_rate": 6.562500000000001e-05, "loss": 0.5439, "step": 13133 }, { "epoch": 0.7354686975025199, "grad_norm": 1.3515318632125854, "learning_rate": 6.563000000000001e-05, "loss": 0.5074, "step": 13134 }, { "epoch": 0.7355246948146489, "grad_norm": 1.4849662780761719, "learning_rate": 6.5635e-05, "loss": 0.6059, "step": 13135 }, { "epoch": 0.7355806921267779, "grad_norm": 1.2736537456512451, "learning_rate": 6.564e-05, "loss": 0.5221, "step": 13136 }, { "epoch": 0.7356366894389069, "grad_norm": 1.1367994546890259, "learning_rate": 6.5645e-05, "loss": 0.4085, "step": 13137 }, { "epoch": 0.7356926867510359, "grad_norm": 1.3408769369125366, "learning_rate": 6.565e-05, "loss": 0.4669, "step": 13138 }, { "epoch": 0.735748684063165, "grad_norm": 1.2693918943405151, "learning_rate": 6.5655e-05, "loss": 0.4661, "step": 13139 }, { "epoch": 0.735804681375294, "grad_norm": 1.6538734436035156, "learning_rate": 6.566e-05, "loss": 0.4861, "step": 13140 }, { "epoch": 0.735860678687423, "grad_norm": 1.1471478939056396, "learning_rate": 6.5665e-05, "loss": 0.4333, "step": 13141 }, { "epoch": 0.735916675999552, "grad_norm": 1.5495682954788208, "learning_rate": 6.567e-05, "loss": 0.4726, "step": 13142 }, { "epoch": 0.735972673311681, "grad_norm": 1.4408315420150757, "learning_rate": 6.5675e-05, "loss": 0.5122, "step": 13143 }, { "epoch": 0.73602867062381, "grad_norm": 1.115729808807373, "learning_rate": 6.568000000000001e-05, "loss": 0.3786, "step": 13144 }, { "epoch": 0.7360846679359391, "grad_norm": 1.2041523456573486, "learning_rate": 6.5685e-05, "loss": 0.3107, "step": 13145 }, { "epoch": 0.7361406652480681, "grad_norm": 1.2265079021453857, "learning_rate": 6.569e-05, "loss": 0.3852, "step": 13146 }, { "epoch": 0.7361966625601971, "grad_norm": 1.2132033109664917, "learning_rate": 6.5695e-05, "loss": 0.475, "step": 13147 }, { "epoch": 0.7362526598723261, "grad_norm": 1.082369089126587, "learning_rate": 6.570000000000001e-05, "loss": 0.3402, "step": 13148 }, { "epoch": 0.7363086571844552, "grad_norm": 1.333266019821167, "learning_rate": 6.570500000000001e-05, "loss": 0.4125, "step": 13149 }, { "epoch": 0.7363646544965842, "grad_norm": 1.2038555145263672, "learning_rate": 6.571e-05, "loss": 0.3986, "step": 13150 }, { "epoch": 0.7364206518087132, "grad_norm": 1.3272336721420288, "learning_rate": 6.5715e-05, "loss": 0.4539, "step": 13151 }, { "epoch": 0.7364766491208422, "grad_norm": 1.3587881326675415, "learning_rate": 6.572e-05, "loss": 0.4755, "step": 13152 }, { "epoch": 0.7365326464329712, "grad_norm": 1.3513455390930176, "learning_rate": 6.5725e-05, "loss": 0.4184, "step": 13153 }, { "epoch": 0.7365886437451002, "grad_norm": 1.403626561164856, "learning_rate": 6.573000000000001e-05, "loss": 0.5434, "step": 13154 }, { "epoch": 0.7366446410572293, "grad_norm": 1.3098336458206177, "learning_rate": 6.5735e-05, "loss": 0.4434, "step": 13155 }, { "epoch": 0.7367006383693583, "grad_norm": 1.129221796989441, "learning_rate": 6.574e-05, "loss": 0.3664, "step": 13156 }, { "epoch": 0.7367566356814873, "grad_norm": 1.4928340911865234, "learning_rate": 6.5745e-05, "loss": 0.4269, "step": 13157 }, { "epoch": 0.7368126329936163, "grad_norm": 1.458243727684021, "learning_rate": 6.575e-05, "loss": 0.44, "step": 13158 }, { "epoch": 0.7368686303057453, "grad_norm": 1.3369060754776, "learning_rate": 6.5755e-05, "loss": 0.5347, "step": 13159 }, { "epoch": 0.7369246276178744, "grad_norm": 1.359553337097168, "learning_rate": 6.576e-05, "loss": 0.3842, "step": 13160 }, { "epoch": 0.7369806249300034, "grad_norm": 1.1033194065093994, "learning_rate": 6.5765e-05, "loss": 0.3966, "step": 13161 }, { "epoch": 0.7370366222421324, "grad_norm": 1.2944777011871338, "learning_rate": 6.577e-05, "loss": 0.5503, "step": 13162 }, { "epoch": 0.7370926195542614, "grad_norm": 1.1972578763961792, "learning_rate": 6.5775e-05, "loss": 0.4032, "step": 13163 }, { "epoch": 0.7371486168663904, "grad_norm": 1.368035078048706, "learning_rate": 6.578000000000001e-05, "loss": 0.4314, "step": 13164 }, { "epoch": 0.7372046141785195, "grad_norm": 1.3839082717895508, "learning_rate": 6.578500000000001e-05, "loss": 0.3676, "step": 13165 }, { "epoch": 0.7372606114906485, "grad_norm": 1.2854747772216797, "learning_rate": 6.579e-05, "loss": 0.4638, "step": 13166 }, { "epoch": 0.7373166088027775, "grad_norm": 1.4696745872497559, "learning_rate": 6.5795e-05, "loss": 0.4465, "step": 13167 }, { "epoch": 0.7373726061149065, "grad_norm": 1.368017315864563, "learning_rate": 6.58e-05, "loss": 0.4276, "step": 13168 }, { "epoch": 0.7374286034270355, "grad_norm": 1.272612452507019, "learning_rate": 6.580500000000001e-05, "loss": 0.4243, "step": 13169 }, { "epoch": 0.7374846007391646, "grad_norm": 1.2280429601669312, "learning_rate": 6.581000000000001e-05, "loss": 0.5592, "step": 13170 }, { "epoch": 0.7375405980512936, "grad_norm": 1.3454099893569946, "learning_rate": 6.5815e-05, "loss": 0.4519, "step": 13171 }, { "epoch": 0.7375965953634226, "grad_norm": 1.195507526397705, "learning_rate": 6.582e-05, "loss": 0.384, "step": 13172 }, { "epoch": 0.7376525926755516, "grad_norm": 0.985457718372345, "learning_rate": 6.5825e-05, "loss": 0.3247, "step": 13173 }, { "epoch": 0.7377085899876806, "grad_norm": 1.602728247642517, "learning_rate": 6.583e-05, "loss": 0.4885, "step": 13174 }, { "epoch": 0.7377645872998096, "grad_norm": 1.2869107723236084, "learning_rate": 6.5835e-05, "loss": 0.3641, "step": 13175 }, { "epoch": 0.7378205846119387, "grad_norm": 1.3212437629699707, "learning_rate": 6.584e-05, "loss": 0.4423, "step": 13176 }, { "epoch": 0.7378765819240677, "grad_norm": 2.345132350921631, "learning_rate": 6.5845e-05, "loss": 0.3675, "step": 13177 }, { "epoch": 0.7379325792361967, "grad_norm": 1.5399130582809448, "learning_rate": 6.585e-05, "loss": 0.6307, "step": 13178 }, { "epoch": 0.7379885765483257, "grad_norm": 1.4886460304260254, "learning_rate": 6.5855e-05, "loss": 0.3838, "step": 13179 }, { "epoch": 0.7380445738604547, "grad_norm": 1.2662973403930664, "learning_rate": 6.586e-05, "loss": 0.4451, "step": 13180 }, { "epoch": 0.7381005711725838, "grad_norm": 1.113808035850525, "learning_rate": 6.5865e-05, "loss": 0.4122, "step": 13181 }, { "epoch": 0.7381565684847128, "grad_norm": 1.5719285011291504, "learning_rate": 6.587e-05, "loss": 0.4248, "step": 13182 }, { "epoch": 0.7382125657968418, "grad_norm": 1.3040238618850708, "learning_rate": 6.5875e-05, "loss": 0.4513, "step": 13183 }, { "epoch": 0.7382685631089708, "grad_norm": 1.3562145233154297, "learning_rate": 6.588000000000001e-05, "loss": 0.4359, "step": 13184 }, { "epoch": 0.7383245604210998, "grad_norm": 1.2630589008331299, "learning_rate": 6.588500000000001e-05, "loss": 0.4191, "step": 13185 }, { "epoch": 0.7383805577332289, "grad_norm": 1.2360169887542725, "learning_rate": 6.589000000000001e-05, "loss": 0.4591, "step": 13186 }, { "epoch": 0.7384365550453578, "grad_norm": 1.296000361442566, "learning_rate": 6.5895e-05, "loss": 0.4327, "step": 13187 }, { "epoch": 0.7384925523574868, "grad_norm": 1.523987054824829, "learning_rate": 6.59e-05, "loss": 0.456, "step": 13188 }, { "epoch": 0.7385485496696158, "grad_norm": 1.7142254114151, "learning_rate": 6.5905e-05, "loss": 0.5521, "step": 13189 }, { "epoch": 0.7386045469817448, "grad_norm": 1.0339454412460327, "learning_rate": 6.591000000000001e-05, "loss": 0.3673, "step": 13190 }, { "epoch": 0.7386605442938738, "grad_norm": 1.3455902338027954, "learning_rate": 6.591500000000001e-05, "loss": 0.4764, "step": 13191 }, { "epoch": 0.7387165416060029, "grad_norm": 1.2935000658035278, "learning_rate": 6.592e-05, "loss": 0.4256, "step": 13192 }, { "epoch": 0.7387725389181319, "grad_norm": 1.2083535194396973, "learning_rate": 6.5925e-05, "loss": 0.3604, "step": 13193 }, { "epoch": 0.7388285362302609, "grad_norm": 1.4803552627563477, "learning_rate": 6.593e-05, "loss": 0.4738, "step": 13194 }, { "epoch": 0.7388845335423899, "grad_norm": 1.3232859373092651, "learning_rate": 6.5935e-05, "loss": 0.4574, "step": 13195 }, { "epoch": 0.7389405308545189, "grad_norm": 1.2311434745788574, "learning_rate": 6.594e-05, "loss": 0.5001, "step": 13196 }, { "epoch": 0.738996528166648, "grad_norm": 1.3587087392807007, "learning_rate": 6.5945e-05, "loss": 0.5302, "step": 13197 }, { "epoch": 0.739052525478777, "grad_norm": 1.3923612833023071, "learning_rate": 6.595e-05, "loss": 0.3617, "step": 13198 }, { "epoch": 0.739108522790906, "grad_norm": 1.3061606884002686, "learning_rate": 6.5955e-05, "loss": 0.5975, "step": 13199 }, { "epoch": 0.739164520103035, "grad_norm": 1.2932555675506592, "learning_rate": 6.596e-05, "loss": 0.4979, "step": 13200 }, { "epoch": 0.739220517415164, "grad_norm": 2.5977580547332764, "learning_rate": 6.5965e-05, "loss": 0.3575, "step": 13201 }, { "epoch": 0.739276514727293, "grad_norm": 1.2210155725479126, "learning_rate": 6.597e-05, "loss": 0.5616, "step": 13202 }, { "epoch": 0.7393325120394221, "grad_norm": 1.3751977682113647, "learning_rate": 6.5975e-05, "loss": 0.4162, "step": 13203 }, { "epoch": 0.7393885093515511, "grad_norm": 1.316551685333252, "learning_rate": 6.598e-05, "loss": 0.5675, "step": 13204 }, { "epoch": 0.7394445066636801, "grad_norm": 1.4882951974868774, "learning_rate": 6.598500000000001e-05, "loss": 0.5544, "step": 13205 }, { "epoch": 0.7395005039758091, "grad_norm": 1.2412019968032837, "learning_rate": 6.599000000000001e-05, "loss": 0.3857, "step": 13206 }, { "epoch": 0.7395565012879382, "grad_norm": 1.1741154193878174, "learning_rate": 6.599500000000001e-05, "loss": 0.4972, "step": 13207 }, { "epoch": 0.7396124986000672, "grad_norm": 1.460400938987732, "learning_rate": 6.6e-05, "loss": 0.4688, "step": 13208 }, { "epoch": 0.7396684959121962, "grad_norm": 1.3373973369598389, "learning_rate": 6.6005e-05, "loss": 0.3796, "step": 13209 }, { "epoch": 0.7397244932243252, "grad_norm": 1.4087990522384644, "learning_rate": 6.601e-05, "loss": 0.7111, "step": 13210 }, { "epoch": 0.7397804905364542, "grad_norm": 1.1778781414031982, "learning_rate": 6.601500000000001e-05, "loss": 0.2816, "step": 13211 }, { "epoch": 0.7398364878485832, "grad_norm": 1.573437213897705, "learning_rate": 6.602000000000001e-05, "loss": 0.5817, "step": 13212 }, { "epoch": 0.7398924851607123, "grad_norm": 1.4049535989761353, "learning_rate": 6.6025e-05, "loss": 0.3513, "step": 13213 }, { "epoch": 0.7399484824728413, "grad_norm": 1.3372673988342285, "learning_rate": 6.603e-05, "loss": 0.523, "step": 13214 }, { "epoch": 0.7400044797849703, "grad_norm": 1.2721821069717407, "learning_rate": 6.6035e-05, "loss": 0.5244, "step": 13215 }, { "epoch": 0.7400604770970993, "grad_norm": 1.2308804988861084, "learning_rate": 6.604e-05, "loss": 0.3764, "step": 13216 }, { "epoch": 0.7401164744092283, "grad_norm": 1.28029465675354, "learning_rate": 6.6045e-05, "loss": 0.4245, "step": 13217 }, { "epoch": 0.7401724717213574, "grad_norm": 1.2959840297698975, "learning_rate": 6.605e-05, "loss": 0.5164, "step": 13218 }, { "epoch": 0.7402284690334864, "grad_norm": 1.1691176891326904, "learning_rate": 6.6055e-05, "loss": 0.4248, "step": 13219 }, { "epoch": 0.7402844663456154, "grad_norm": 1.4101749658584595, "learning_rate": 6.606e-05, "loss": 0.4106, "step": 13220 }, { "epoch": 0.7403404636577444, "grad_norm": 1.2446681261062622, "learning_rate": 6.6065e-05, "loss": 0.5375, "step": 13221 }, { "epoch": 0.7403964609698734, "grad_norm": 1.48183274269104, "learning_rate": 6.607e-05, "loss": 0.543, "step": 13222 }, { "epoch": 0.7404524582820025, "grad_norm": 1.2818819284439087, "learning_rate": 6.6075e-05, "loss": 0.3969, "step": 13223 }, { "epoch": 0.7405084555941315, "grad_norm": 1.3402365446090698, "learning_rate": 6.608e-05, "loss": 0.3758, "step": 13224 }, { "epoch": 0.7405644529062605, "grad_norm": 1.1508859395980835, "learning_rate": 6.6085e-05, "loss": 0.4746, "step": 13225 }, { "epoch": 0.7406204502183895, "grad_norm": 1.318395733833313, "learning_rate": 6.609000000000001e-05, "loss": 0.4383, "step": 13226 }, { "epoch": 0.7406764475305185, "grad_norm": 1.2404429912567139, "learning_rate": 6.609500000000001e-05, "loss": 0.4597, "step": 13227 }, { "epoch": 0.7407324448426476, "grad_norm": 1.0450571775436401, "learning_rate": 6.610000000000001e-05, "loss": 0.3557, "step": 13228 }, { "epoch": 0.7407884421547766, "grad_norm": 1.453265905380249, "learning_rate": 6.6105e-05, "loss": 0.4221, "step": 13229 }, { "epoch": 0.7408444394669056, "grad_norm": 1.6890655755996704, "learning_rate": 6.611e-05, "loss": 0.4914, "step": 13230 }, { "epoch": 0.7409004367790346, "grad_norm": 1.4456104040145874, "learning_rate": 6.6115e-05, "loss": 0.343, "step": 13231 }, { "epoch": 0.7409564340911636, "grad_norm": 1.3714569807052612, "learning_rate": 6.612000000000001e-05, "loss": 0.365, "step": 13232 }, { "epoch": 0.7410124314032926, "grad_norm": 1.425466537475586, "learning_rate": 6.612500000000001e-05, "loss": 0.431, "step": 13233 }, { "epoch": 0.7410684287154217, "grad_norm": 1.1059119701385498, "learning_rate": 6.613e-05, "loss": 0.4517, "step": 13234 }, { "epoch": 0.7411244260275507, "grad_norm": 1.4817248582839966, "learning_rate": 6.6135e-05, "loss": 0.456, "step": 13235 }, { "epoch": 0.7411804233396797, "grad_norm": 1.1598478555679321, "learning_rate": 6.614e-05, "loss": 0.3313, "step": 13236 }, { "epoch": 0.7412364206518087, "grad_norm": 1.3717542886734009, "learning_rate": 6.6145e-05, "loss": 0.5052, "step": 13237 }, { "epoch": 0.7412924179639377, "grad_norm": 1.4055230617523193, "learning_rate": 6.615e-05, "loss": 0.4718, "step": 13238 }, { "epoch": 0.7413484152760668, "grad_norm": 2.4605820178985596, "learning_rate": 6.6155e-05, "loss": 0.4463, "step": 13239 }, { "epoch": 0.7414044125881958, "grad_norm": 1.2312734127044678, "learning_rate": 6.616e-05, "loss": 0.4432, "step": 13240 }, { "epoch": 0.7414604099003248, "grad_norm": 1.5527117252349854, "learning_rate": 6.6165e-05, "loss": 0.7414, "step": 13241 }, { "epoch": 0.7415164072124538, "grad_norm": 1.4419389963150024, "learning_rate": 6.617e-05, "loss": 0.4566, "step": 13242 }, { "epoch": 0.7415724045245828, "grad_norm": 1.1105155944824219, "learning_rate": 6.6175e-05, "loss": 0.3224, "step": 13243 }, { "epoch": 0.7416284018367119, "grad_norm": 1.355978012084961, "learning_rate": 6.618e-05, "loss": 0.5827, "step": 13244 }, { "epoch": 0.7416843991488409, "grad_norm": 1.4871820211410522, "learning_rate": 6.6185e-05, "loss": 0.5677, "step": 13245 }, { "epoch": 0.7417403964609699, "grad_norm": 1.2730991840362549, "learning_rate": 6.619e-05, "loss": 0.4863, "step": 13246 }, { "epoch": 0.7417963937730989, "grad_norm": 6.5206217765808105, "learning_rate": 6.619500000000001e-05, "loss": 0.4476, "step": 13247 }, { "epoch": 0.7418523910852279, "grad_norm": 1.3759405612945557, "learning_rate": 6.620000000000001e-05, "loss": 0.4373, "step": 13248 }, { "epoch": 0.741908388397357, "grad_norm": 1.4291212558746338, "learning_rate": 6.620500000000001e-05, "loss": 0.4196, "step": 13249 }, { "epoch": 0.741964385709486, "grad_norm": 1.3058536052703857, "learning_rate": 6.621e-05, "loss": 0.4366, "step": 13250 }, { "epoch": 0.742020383021615, "grad_norm": 1.2478581666946411, "learning_rate": 6.6215e-05, "loss": 0.3752, "step": 13251 }, { "epoch": 0.742076380333744, "grad_norm": 1.4263951778411865, "learning_rate": 6.622e-05, "loss": 0.5217, "step": 13252 }, { "epoch": 0.742132377645873, "grad_norm": 1.5010628700256348, "learning_rate": 6.6225e-05, "loss": 0.5169, "step": 13253 }, { "epoch": 0.742188374958002, "grad_norm": 1.0944013595581055, "learning_rate": 6.623000000000001e-05, "loss": 0.3406, "step": 13254 }, { "epoch": 0.7422443722701311, "grad_norm": 1.07964289188385, "learning_rate": 6.6235e-05, "loss": 0.3515, "step": 13255 }, { "epoch": 0.7423003695822601, "grad_norm": 1.139193058013916, "learning_rate": 6.624e-05, "loss": 0.4078, "step": 13256 }, { "epoch": 0.7423563668943891, "grad_norm": 1.1736921072006226, "learning_rate": 6.6245e-05, "loss": 0.359, "step": 13257 }, { "epoch": 0.7424123642065181, "grad_norm": 1.059420108795166, "learning_rate": 6.625e-05, "loss": 0.4044, "step": 13258 }, { "epoch": 0.7424683615186471, "grad_norm": 1.1916396617889404, "learning_rate": 6.6255e-05, "loss": 0.3729, "step": 13259 }, { "epoch": 0.7425243588307762, "grad_norm": 1.566210150718689, "learning_rate": 6.626e-05, "loss": 0.4848, "step": 13260 }, { "epoch": 0.7425803561429052, "grad_norm": 1.3561042547225952, "learning_rate": 6.6265e-05, "loss": 0.437, "step": 13261 }, { "epoch": 0.7426363534550342, "grad_norm": 1.1792902946472168, "learning_rate": 6.627e-05, "loss": 0.3632, "step": 13262 }, { "epoch": 0.7426923507671632, "grad_norm": 1.3329291343688965, "learning_rate": 6.6275e-05, "loss": 0.4487, "step": 13263 }, { "epoch": 0.7427483480792922, "grad_norm": 1.4227898120880127, "learning_rate": 6.628e-05, "loss": 0.5119, "step": 13264 }, { "epoch": 0.7428043453914213, "grad_norm": 1.191625714302063, "learning_rate": 6.6285e-05, "loss": 0.4121, "step": 13265 }, { "epoch": 0.7428603427035503, "grad_norm": 1.4005130529403687, "learning_rate": 6.629e-05, "loss": 0.5107, "step": 13266 }, { "epoch": 0.7429163400156793, "grad_norm": 1.187508225440979, "learning_rate": 6.6295e-05, "loss": 0.4321, "step": 13267 }, { "epoch": 0.7429723373278083, "grad_norm": 1.2492766380310059, "learning_rate": 6.630000000000001e-05, "loss": 0.4224, "step": 13268 }, { "epoch": 0.7430283346399373, "grad_norm": 1.3098195791244507, "learning_rate": 6.630500000000001e-05, "loss": 0.3745, "step": 13269 }, { "epoch": 0.7430843319520662, "grad_norm": 1.9010365009307861, "learning_rate": 6.631000000000001e-05, "loss": 0.5458, "step": 13270 }, { "epoch": 0.7431403292641953, "grad_norm": 1.2563695907592773, "learning_rate": 6.6315e-05, "loss": 0.3183, "step": 13271 }, { "epoch": 0.7431963265763243, "grad_norm": 1.824275255203247, "learning_rate": 6.632e-05, "loss": 0.5482, "step": 13272 }, { "epoch": 0.7432523238884533, "grad_norm": 1.6714365482330322, "learning_rate": 6.6325e-05, "loss": 0.392, "step": 13273 }, { "epoch": 0.7433083212005823, "grad_norm": 1.7867680788040161, "learning_rate": 6.633e-05, "loss": 0.5196, "step": 13274 }, { "epoch": 0.7433643185127113, "grad_norm": 1.5641555786132812, "learning_rate": 6.633500000000001e-05, "loss": 0.5168, "step": 13275 }, { "epoch": 0.7434203158248404, "grad_norm": 1.3071430921554565, "learning_rate": 6.634e-05, "loss": 0.6255, "step": 13276 }, { "epoch": 0.7434763131369694, "grad_norm": 1.3810094594955444, "learning_rate": 6.6345e-05, "loss": 0.5977, "step": 13277 }, { "epoch": 0.7435323104490984, "grad_norm": 1.140381932258606, "learning_rate": 6.635e-05, "loss": 0.3518, "step": 13278 }, { "epoch": 0.7435883077612274, "grad_norm": 1.3058286905288696, "learning_rate": 6.6355e-05, "loss": 0.5564, "step": 13279 }, { "epoch": 0.7436443050733564, "grad_norm": 1.2812680006027222, "learning_rate": 6.636e-05, "loss": 0.391, "step": 13280 }, { "epoch": 0.7437003023854855, "grad_norm": 1.4396165609359741, "learning_rate": 6.6365e-05, "loss": 0.4724, "step": 13281 }, { "epoch": 0.7437562996976145, "grad_norm": 1.5894254446029663, "learning_rate": 6.637e-05, "loss": 0.4654, "step": 13282 }, { "epoch": 0.7438122970097435, "grad_norm": 1.2542455196380615, "learning_rate": 6.6375e-05, "loss": 0.3891, "step": 13283 }, { "epoch": 0.7438682943218725, "grad_norm": 1.3622913360595703, "learning_rate": 6.638e-05, "loss": 0.4856, "step": 13284 }, { "epoch": 0.7439242916340015, "grad_norm": 1.3187345266342163, "learning_rate": 6.638500000000001e-05, "loss": 0.4348, "step": 13285 }, { "epoch": 0.7439802889461306, "grad_norm": 1.3023624420166016, "learning_rate": 6.639e-05, "loss": 0.548, "step": 13286 }, { "epoch": 0.7440362862582596, "grad_norm": 1.7420628070831299, "learning_rate": 6.6395e-05, "loss": 0.5877, "step": 13287 }, { "epoch": 0.7440922835703886, "grad_norm": 1.394727349281311, "learning_rate": 6.64e-05, "loss": 0.4225, "step": 13288 }, { "epoch": 0.7441482808825176, "grad_norm": 1.368857741355896, "learning_rate": 6.640500000000001e-05, "loss": 0.3973, "step": 13289 }, { "epoch": 0.7442042781946466, "grad_norm": 1.4397590160369873, "learning_rate": 6.641000000000001e-05, "loss": 0.4232, "step": 13290 }, { "epoch": 0.7442602755067756, "grad_norm": 1.462835669517517, "learning_rate": 6.641500000000001e-05, "loss": 0.408, "step": 13291 }, { "epoch": 0.7443162728189047, "grad_norm": 1.2053711414337158, "learning_rate": 6.642e-05, "loss": 0.3666, "step": 13292 }, { "epoch": 0.7443722701310337, "grad_norm": 1.3954273462295532, "learning_rate": 6.6425e-05, "loss": 0.4062, "step": 13293 }, { "epoch": 0.7444282674431627, "grad_norm": 1.2463713884353638, "learning_rate": 6.643e-05, "loss": 0.4379, "step": 13294 }, { "epoch": 0.7444842647552917, "grad_norm": 1.265592098236084, "learning_rate": 6.6435e-05, "loss": 0.4643, "step": 13295 }, { "epoch": 0.7445402620674207, "grad_norm": 1.3765982389450073, "learning_rate": 6.644000000000001e-05, "loss": 0.4802, "step": 13296 }, { "epoch": 0.7445962593795498, "grad_norm": 1.4257162809371948, "learning_rate": 6.6445e-05, "loss": 0.4158, "step": 13297 }, { "epoch": 0.7446522566916788, "grad_norm": 1.2696559429168701, "learning_rate": 6.645e-05, "loss": 0.4479, "step": 13298 }, { "epoch": 0.7447082540038078, "grad_norm": 1.3374682664871216, "learning_rate": 6.6455e-05, "loss": 0.3887, "step": 13299 }, { "epoch": 0.7447642513159368, "grad_norm": 1.3666870594024658, "learning_rate": 6.646e-05, "loss": 0.4426, "step": 13300 }, { "epoch": 0.7448202486280658, "grad_norm": 1.2497977018356323, "learning_rate": 6.6465e-05, "loss": 0.5023, "step": 13301 }, { "epoch": 0.7448762459401949, "grad_norm": 1.2170549631118774, "learning_rate": 6.647e-05, "loss": 0.4167, "step": 13302 }, { "epoch": 0.7449322432523239, "grad_norm": 1.234202265739441, "learning_rate": 6.6475e-05, "loss": 0.4098, "step": 13303 }, { "epoch": 0.7449882405644529, "grad_norm": 1.3242830038070679, "learning_rate": 6.648e-05, "loss": 0.4357, "step": 13304 }, { "epoch": 0.7450442378765819, "grad_norm": 1.7305229902267456, "learning_rate": 6.648500000000001e-05, "loss": 0.4228, "step": 13305 }, { "epoch": 0.7451002351887109, "grad_norm": 1.6236753463745117, "learning_rate": 6.649000000000001e-05, "loss": 0.5207, "step": 13306 }, { "epoch": 0.74515623250084, "grad_norm": 1.654104471206665, "learning_rate": 6.6495e-05, "loss": 0.5178, "step": 13307 }, { "epoch": 0.745212229812969, "grad_norm": 1.4475064277648926, "learning_rate": 6.65e-05, "loss": 0.5265, "step": 13308 }, { "epoch": 0.745268227125098, "grad_norm": 1.9489260911941528, "learning_rate": 6.6505e-05, "loss": 0.4266, "step": 13309 }, { "epoch": 0.745324224437227, "grad_norm": 1.3361420631408691, "learning_rate": 6.651000000000001e-05, "loss": 0.3858, "step": 13310 }, { "epoch": 0.745380221749356, "grad_norm": 1.3716928958892822, "learning_rate": 6.651500000000001e-05, "loss": 0.4142, "step": 13311 }, { "epoch": 0.745436219061485, "grad_norm": 1.0638846158981323, "learning_rate": 6.652000000000001e-05, "loss": 0.3401, "step": 13312 }, { "epoch": 0.7454922163736141, "grad_norm": 1.6768540143966675, "learning_rate": 6.6525e-05, "loss": 0.4647, "step": 13313 }, { "epoch": 0.7455482136857431, "grad_norm": 1.8939396142959595, "learning_rate": 6.653e-05, "loss": 0.4594, "step": 13314 }, { "epoch": 0.7456042109978721, "grad_norm": 1.3508588075637817, "learning_rate": 6.6535e-05, "loss": 0.345, "step": 13315 }, { "epoch": 0.7456602083100011, "grad_norm": 1.226591944694519, "learning_rate": 6.654e-05, "loss": 0.3969, "step": 13316 }, { "epoch": 0.7457162056221301, "grad_norm": 1.1925842761993408, "learning_rate": 6.654500000000001e-05, "loss": 0.5444, "step": 13317 }, { "epoch": 0.7457722029342592, "grad_norm": 1.6634101867675781, "learning_rate": 6.655e-05, "loss": 0.5147, "step": 13318 }, { "epoch": 0.7458282002463882, "grad_norm": 1.3468554019927979, "learning_rate": 6.6555e-05, "loss": 0.4423, "step": 13319 }, { "epoch": 0.7458841975585172, "grad_norm": 1.2828065156936646, "learning_rate": 6.656e-05, "loss": 0.3592, "step": 13320 }, { "epoch": 0.7459401948706462, "grad_norm": 1.5055537223815918, "learning_rate": 6.6565e-05, "loss": 0.445, "step": 13321 }, { "epoch": 0.7459961921827752, "grad_norm": 1.1748046875, "learning_rate": 6.657e-05, "loss": 0.3662, "step": 13322 }, { "epoch": 0.7460521894949043, "grad_norm": 1.1813485622406006, "learning_rate": 6.657499999999999e-05, "loss": 0.3375, "step": 13323 }, { "epoch": 0.7461081868070333, "grad_norm": 1.186800241470337, "learning_rate": 6.658e-05, "loss": 0.4368, "step": 13324 }, { "epoch": 0.7461641841191623, "grad_norm": 1.5023611783981323, "learning_rate": 6.658500000000001e-05, "loss": 0.4546, "step": 13325 }, { "epoch": 0.7462201814312913, "grad_norm": 1.4181703329086304, "learning_rate": 6.659000000000001e-05, "loss": 0.4254, "step": 13326 }, { "epoch": 0.7462761787434203, "grad_norm": 1.2328901290893555, "learning_rate": 6.659500000000001e-05, "loss": 0.37, "step": 13327 }, { "epoch": 0.7463321760555494, "grad_norm": 2.5455312728881836, "learning_rate": 6.66e-05, "loss": 0.5461, "step": 13328 }, { "epoch": 0.7463881733676784, "grad_norm": 1.4440556764602661, "learning_rate": 6.6605e-05, "loss": 0.5065, "step": 13329 }, { "epoch": 0.7464441706798074, "grad_norm": 1.3674674034118652, "learning_rate": 6.661e-05, "loss": 0.4871, "step": 13330 }, { "epoch": 0.7465001679919364, "grad_norm": 1.402139663696289, "learning_rate": 6.661500000000001e-05, "loss": 0.4866, "step": 13331 }, { "epoch": 0.7465561653040654, "grad_norm": 1.5644489526748657, "learning_rate": 6.662000000000001e-05, "loss": 0.4805, "step": 13332 }, { "epoch": 0.7466121626161945, "grad_norm": 1.9279708862304688, "learning_rate": 6.6625e-05, "loss": 0.6546, "step": 13333 }, { "epoch": 0.7466681599283235, "grad_norm": 1.2155046463012695, "learning_rate": 6.663e-05, "loss": 0.3641, "step": 13334 }, { "epoch": 0.7467241572404525, "grad_norm": 1.3035589456558228, "learning_rate": 6.6635e-05, "loss": 0.3426, "step": 13335 }, { "epoch": 0.7467801545525815, "grad_norm": 1.1661807298660278, "learning_rate": 6.664e-05, "loss": 0.4449, "step": 13336 }, { "epoch": 0.7468361518647105, "grad_norm": 1.3552536964416504, "learning_rate": 6.6645e-05, "loss": 0.3868, "step": 13337 }, { "epoch": 0.7468921491768395, "grad_norm": 1.4642640352249146, "learning_rate": 6.665000000000001e-05, "loss": 0.3162, "step": 13338 }, { "epoch": 0.7469481464889686, "grad_norm": 1.3704559803009033, "learning_rate": 6.6655e-05, "loss": 0.5632, "step": 13339 }, { "epoch": 0.7470041438010976, "grad_norm": 1.122069239616394, "learning_rate": 6.666e-05, "loss": 0.5401, "step": 13340 }, { "epoch": 0.7470601411132266, "grad_norm": 1.4768750667572021, "learning_rate": 6.6665e-05, "loss": 0.782, "step": 13341 }, { "epoch": 0.7471161384253556, "grad_norm": 1.275857925415039, "learning_rate": 6.667e-05, "loss": 0.4492, "step": 13342 }, { "epoch": 0.7471721357374846, "grad_norm": 1.7961218357086182, "learning_rate": 6.6675e-05, "loss": 0.4196, "step": 13343 }, { "epoch": 0.7472281330496137, "grad_norm": 1.3244459629058838, "learning_rate": 6.667999999999999e-05, "loss": 0.3435, "step": 13344 }, { "epoch": 0.7472841303617427, "grad_norm": 1.2896100282669067, "learning_rate": 6.6685e-05, "loss": 0.4248, "step": 13345 }, { "epoch": 0.7473401276738717, "grad_norm": 1.4216582775115967, "learning_rate": 6.669000000000001e-05, "loss": 0.4556, "step": 13346 }, { "epoch": 0.7473961249860007, "grad_norm": 1.4091801643371582, "learning_rate": 6.669500000000001e-05, "loss": 0.6151, "step": 13347 }, { "epoch": 0.7474521222981297, "grad_norm": 1.1799017190933228, "learning_rate": 6.670000000000001e-05, "loss": 0.4962, "step": 13348 }, { "epoch": 0.7475081196102588, "grad_norm": 1.501054286956787, "learning_rate": 6.6705e-05, "loss": 0.6032, "step": 13349 }, { "epoch": 0.7475641169223878, "grad_norm": 1.2588993310928345, "learning_rate": 6.671e-05, "loss": 0.3253, "step": 13350 }, { "epoch": 0.7476201142345168, "grad_norm": 1.1493582725524902, "learning_rate": 6.6715e-05, "loss": 0.3031, "step": 13351 }, { "epoch": 0.7476761115466458, "grad_norm": 1.250685691833496, "learning_rate": 6.672e-05, "loss": 0.5526, "step": 13352 }, { "epoch": 0.7477321088587747, "grad_norm": 1.6470725536346436, "learning_rate": 6.672500000000001e-05, "loss": 0.6043, "step": 13353 }, { "epoch": 0.7477881061709037, "grad_norm": 1.3246068954467773, "learning_rate": 6.673e-05, "loss": 0.3791, "step": 13354 }, { "epoch": 0.7478441034830328, "grad_norm": 1.1623932123184204, "learning_rate": 6.6735e-05, "loss": 0.4429, "step": 13355 }, { "epoch": 0.7479001007951618, "grad_norm": 1.8283212184906006, "learning_rate": 6.674e-05, "loss": 0.7647, "step": 13356 }, { "epoch": 0.7479560981072908, "grad_norm": 1.244064211845398, "learning_rate": 6.6745e-05, "loss": 0.5447, "step": 13357 }, { "epoch": 0.7480120954194198, "grad_norm": 1.3712365627288818, "learning_rate": 6.675e-05, "loss": 0.5122, "step": 13358 }, { "epoch": 0.7480680927315488, "grad_norm": 1.1770358085632324, "learning_rate": 6.675500000000001e-05, "loss": 0.3591, "step": 13359 }, { "epoch": 0.7481240900436779, "grad_norm": 1.14165198802948, "learning_rate": 6.676e-05, "loss": 0.3627, "step": 13360 }, { "epoch": 0.7481800873558069, "grad_norm": 1.3313946723937988, "learning_rate": 6.6765e-05, "loss": 0.4086, "step": 13361 }, { "epoch": 0.7482360846679359, "grad_norm": 1.3255826234817505, "learning_rate": 6.677e-05, "loss": 0.4672, "step": 13362 }, { "epoch": 0.7482920819800649, "grad_norm": 1.2458029985427856, "learning_rate": 6.6775e-05, "loss": 0.4414, "step": 13363 }, { "epoch": 0.7483480792921939, "grad_norm": 1.2485015392303467, "learning_rate": 6.678e-05, "loss": 0.4208, "step": 13364 }, { "epoch": 0.748404076604323, "grad_norm": 1.3235641717910767, "learning_rate": 6.6785e-05, "loss": 0.5394, "step": 13365 }, { "epoch": 0.748460073916452, "grad_norm": 1.4212559461593628, "learning_rate": 6.679e-05, "loss": 0.4528, "step": 13366 }, { "epoch": 0.748516071228581, "grad_norm": 1.4086921215057373, "learning_rate": 6.679500000000001e-05, "loss": 0.524, "step": 13367 }, { "epoch": 0.74857206854071, "grad_norm": 0.9677045941352844, "learning_rate": 6.680000000000001e-05, "loss": 0.3381, "step": 13368 }, { "epoch": 0.748628065852839, "grad_norm": 1.6913973093032837, "learning_rate": 6.680500000000001e-05, "loss": 0.4431, "step": 13369 }, { "epoch": 0.748684063164968, "grad_norm": 1.1492834091186523, "learning_rate": 6.681e-05, "loss": 0.3583, "step": 13370 }, { "epoch": 0.7487400604770971, "grad_norm": 1.4888323545455933, "learning_rate": 6.6815e-05, "loss": 0.5411, "step": 13371 }, { "epoch": 0.7487960577892261, "grad_norm": 1.3886725902557373, "learning_rate": 6.682e-05, "loss": 0.6303, "step": 13372 }, { "epoch": 0.7488520551013551, "grad_norm": 1.3541219234466553, "learning_rate": 6.6825e-05, "loss": 0.348, "step": 13373 }, { "epoch": 0.7489080524134841, "grad_norm": 1.4529330730438232, "learning_rate": 6.683000000000001e-05, "loss": 0.4437, "step": 13374 }, { "epoch": 0.7489640497256131, "grad_norm": 2.124105930328369, "learning_rate": 6.6835e-05, "loss": 0.5812, "step": 13375 }, { "epoch": 0.7490200470377422, "grad_norm": 1.2463494539260864, "learning_rate": 6.684e-05, "loss": 0.4733, "step": 13376 }, { "epoch": 0.7490760443498712, "grad_norm": 1.0593266487121582, "learning_rate": 6.6845e-05, "loss": 0.3661, "step": 13377 }, { "epoch": 0.7491320416620002, "grad_norm": 1.3837532997131348, "learning_rate": 6.685e-05, "loss": 0.4814, "step": 13378 }, { "epoch": 0.7491880389741292, "grad_norm": 2.01131010055542, "learning_rate": 6.6855e-05, "loss": 0.4869, "step": 13379 }, { "epoch": 0.7492440362862582, "grad_norm": 1.162909746170044, "learning_rate": 6.686000000000001e-05, "loss": 0.4443, "step": 13380 }, { "epoch": 0.7493000335983873, "grad_norm": 1.2567288875579834, "learning_rate": 6.6865e-05, "loss": 0.4941, "step": 13381 }, { "epoch": 0.7493560309105163, "grad_norm": 1.3693965673446655, "learning_rate": 6.687e-05, "loss": 0.5171, "step": 13382 }, { "epoch": 0.7494120282226453, "grad_norm": 1.385277509689331, "learning_rate": 6.6875e-05, "loss": 0.36, "step": 13383 }, { "epoch": 0.7494680255347743, "grad_norm": 1.4657427072525024, "learning_rate": 6.688e-05, "loss": 0.3572, "step": 13384 }, { "epoch": 0.7495240228469033, "grad_norm": 1.1794122457504272, "learning_rate": 6.6885e-05, "loss": 0.4101, "step": 13385 }, { "epoch": 0.7495800201590324, "grad_norm": 1.3354967832565308, "learning_rate": 6.689e-05, "loss": 0.4576, "step": 13386 }, { "epoch": 0.7496360174711614, "grad_norm": 1.1651242971420288, "learning_rate": 6.6895e-05, "loss": 0.3619, "step": 13387 }, { "epoch": 0.7496920147832904, "grad_norm": 1.4015265703201294, "learning_rate": 6.690000000000001e-05, "loss": 0.4246, "step": 13388 }, { "epoch": 0.7497480120954194, "grad_norm": 1.1027058362960815, "learning_rate": 6.690500000000001e-05, "loss": 0.2979, "step": 13389 }, { "epoch": 0.7498040094075484, "grad_norm": 1.2594717741012573, "learning_rate": 6.691000000000001e-05, "loss": 0.4395, "step": 13390 }, { "epoch": 0.7498600067196775, "grad_norm": 1.2497899532318115, "learning_rate": 6.6915e-05, "loss": 0.4734, "step": 13391 }, { "epoch": 0.7499160040318065, "grad_norm": 1.5366617441177368, "learning_rate": 6.692e-05, "loss": 0.4336, "step": 13392 }, { "epoch": 0.7499720013439355, "grad_norm": 1.5138517618179321, "learning_rate": 6.6925e-05, "loss": 0.6008, "step": 13393 }, { "epoch": 0.7500279986560645, "grad_norm": 1.1237903833389282, "learning_rate": 6.693e-05, "loss": 0.4135, "step": 13394 }, { "epoch": 0.7500839959681935, "grad_norm": 1.1958673000335693, "learning_rate": 6.693500000000001e-05, "loss": 0.3744, "step": 13395 }, { "epoch": 0.7501399932803225, "grad_norm": 1.316476821899414, "learning_rate": 6.694e-05, "loss": 0.4043, "step": 13396 }, { "epoch": 0.7501959905924516, "grad_norm": 1.3259350061416626, "learning_rate": 6.6945e-05, "loss": 0.4395, "step": 13397 }, { "epoch": 0.7502519879045806, "grad_norm": 1.565314769744873, "learning_rate": 6.695e-05, "loss": 0.4311, "step": 13398 }, { "epoch": 0.7503079852167096, "grad_norm": 1.2138341665267944, "learning_rate": 6.6955e-05, "loss": 0.3549, "step": 13399 }, { "epoch": 0.7503639825288386, "grad_norm": 1.5309661626815796, "learning_rate": 6.696e-05, "loss": 0.6146, "step": 13400 }, { "epoch": 0.7504199798409676, "grad_norm": 1.2558754682540894, "learning_rate": 6.6965e-05, "loss": 0.4875, "step": 13401 }, { "epoch": 0.7504759771530967, "grad_norm": 1.4507876634597778, "learning_rate": 6.697e-05, "loss": 0.6295, "step": 13402 }, { "epoch": 0.7505319744652257, "grad_norm": 1.652795433998108, "learning_rate": 6.6975e-05, "loss": 0.3718, "step": 13403 }, { "epoch": 0.7505879717773547, "grad_norm": 1.3381702899932861, "learning_rate": 6.698e-05, "loss": 0.4106, "step": 13404 }, { "epoch": 0.7506439690894837, "grad_norm": 1.7910661697387695, "learning_rate": 6.6985e-05, "loss": 0.4129, "step": 13405 }, { "epoch": 0.7506999664016127, "grad_norm": 1.3057547807693481, "learning_rate": 6.699000000000001e-05, "loss": 0.3802, "step": 13406 }, { "epoch": 0.7507559637137418, "grad_norm": 1.3384629487991333, "learning_rate": 6.6995e-05, "loss": 0.5628, "step": 13407 }, { "epoch": 0.7508119610258708, "grad_norm": 1.34722101688385, "learning_rate": 6.7e-05, "loss": 0.4655, "step": 13408 }, { "epoch": 0.7508679583379998, "grad_norm": 1.3082317113876343, "learning_rate": 6.700500000000001e-05, "loss": 0.4847, "step": 13409 }, { "epoch": 0.7509239556501288, "grad_norm": 1.2084414958953857, "learning_rate": 6.701000000000001e-05, "loss": 0.5166, "step": 13410 }, { "epoch": 0.7509799529622578, "grad_norm": 1.056401252746582, "learning_rate": 6.701500000000001e-05, "loss": 0.4287, "step": 13411 }, { "epoch": 0.7510359502743869, "grad_norm": 1.2451685667037964, "learning_rate": 6.702e-05, "loss": 0.3245, "step": 13412 }, { "epoch": 0.7510919475865159, "grad_norm": 1.2550543546676636, "learning_rate": 6.7025e-05, "loss": 0.415, "step": 13413 }, { "epoch": 0.7511479448986449, "grad_norm": 1.3785005807876587, "learning_rate": 6.703e-05, "loss": 0.352, "step": 13414 }, { "epoch": 0.7512039422107739, "grad_norm": 1.3353605270385742, "learning_rate": 6.7035e-05, "loss": 0.445, "step": 13415 }, { "epoch": 0.7512599395229029, "grad_norm": 1.0877524614334106, "learning_rate": 6.704000000000001e-05, "loss": 0.3922, "step": 13416 }, { "epoch": 0.751315936835032, "grad_norm": 1.2942429780960083, "learning_rate": 6.7045e-05, "loss": 0.3311, "step": 13417 }, { "epoch": 0.751371934147161, "grad_norm": 1.3884506225585938, "learning_rate": 6.705e-05, "loss": 0.4471, "step": 13418 }, { "epoch": 0.75142793145929, "grad_norm": 1.247389554977417, "learning_rate": 6.7055e-05, "loss": 0.3752, "step": 13419 }, { "epoch": 0.751483928771419, "grad_norm": 1.2123414278030396, "learning_rate": 6.706e-05, "loss": 0.4752, "step": 13420 }, { "epoch": 0.751539926083548, "grad_norm": 1.7403959035873413, "learning_rate": 6.7065e-05, "loss": 0.5192, "step": 13421 }, { "epoch": 0.751595923395677, "grad_norm": 1.4864706993103027, "learning_rate": 6.706999999999999e-05, "loss": 0.3862, "step": 13422 }, { "epoch": 0.7516519207078061, "grad_norm": 1.4361765384674072, "learning_rate": 6.7075e-05, "loss": 0.4101, "step": 13423 }, { "epoch": 0.7517079180199351, "grad_norm": 1.5567173957824707, "learning_rate": 6.708e-05, "loss": 0.5029, "step": 13424 }, { "epoch": 0.7517639153320641, "grad_norm": 1.390423059463501, "learning_rate": 6.7085e-05, "loss": 0.3613, "step": 13425 }, { "epoch": 0.7518199126441931, "grad_norm": 1.4552497863769531, "learning_rate": 6.709000000000001e-05, "loss": 0.7664, "step": 13426 }, { "epoch": 0.7518759099563221, "grad_norm": 1.305458426475525, "learning_rate": 6.709500000000001e-05, "loss": 0.5114, "step": 13427 }, { "epoch": 0.7519319072684512, "grad_norm": 1.2647517919540405, "learning_rate": 6.71e-05, "loss": 0.3717, "step": 13428 }, { "epoch": 0.7519879045805802, "grad_norm": 1.1407110691070557, "learning_rate": 6.7105e-05, "loss": 0.3953, "step": 13429 }, { "epoch": 0.7520439018927092, "grad_norm": 1.303435206413269, "learning_rate": 6.711e-05, "loss": 0.4151, "step": 13430 }, { "epoch": 0.7520998992048382, "grad_norm": 1.5761874914169312, "learning_rate": 6.711500000000001e-05, "loss": 0.3881, "step": 13431 }, { "epoch": 0.7521558965169672, "grad_norm": 1.368018388748169, "learning_rate": 6.712000000000001e-05, "loss": 0.3724, "step": 13432 }, { "epoch": 0.7522118938290963, "grad_norm": 1.412675142288208, "learning_rate": 6.7125e-05, "loss": 0.5171, "step": 13433 }, { "epoch": 0.7522678911412253, "grad_norm": 1.142570972442627, "learning_rate": 6.713e-05, "loss": 0.3583, "step": 13434 }, { "epoch": 0.7523238884533542, "grad_norm": 1.1345243453979492, "learning_rate": 6.7135e-05, "loss": 0.3614, "step": 13435 }, { "epoch": 0.7523798857654832, "grad_norm": 1.4500483274459839, "learning_rate": 6.714e-05, "loss": 0.4051, "step": 13436 }, { "epoch": 0.7524358830776122, "grad_norm": 1.1398577690124512, "learning_rate": 6.714500000000001e-05, "loss": 0.3196, "step": 13437 }, { "epoch": 0.7524918803897412, "grad_norm": 1.4100042581558228, "learning_rate": 6.715e-05, "loss": 0.4972, "step": 13438 }, { "epoch": 0.7525478777018703, "grad_norm": 1.1147397756576538, "learning_rate": 6.7155e-05, "loss": 0.4089, "step": 13439 }, { "epoch": 0.7526038750139993, "grad_norm": 1.1418505907058716, "learning_rate": 6.716e-05, "loss": 0.3055, "step": 13440 }, { "epoch": 0.7526598723261283, "grad_norm": 1.6036012172698975, "learning_rate": 6.7165e-05, "loss": 0.476, "step": 13441 }, { "epoch": 0.7527158696382573, "grad_norm": 1.084713101387024, "learning_rate": 6.717e-05, "loss": 0.3097, "step": 13442 }, { "epoch": 0.7527718669503863, "grad_norm": 1.3821686506271362, "learning_rate": 6.717499999999999e-05, "loss": 0.5434, "step": 13443 }, { "epoch": 0.7528278642625154, "grad_norm": 1.393151879310608, "learning_rate": 6.718e-05, "loss": 0.4664, "step": 13444 }, { "epoch": 0.7528838615746444, "grad_norm": 1.1680833101272583, "learning_rate": 6.7185e-05, "loss": 0.4461, "step": 13445 }, { "epoch": 0.7529398588867734, "grad_norm": 1.2255024909973145, "learning_rate": 6.719000000000001e-05, "loss": 0.4013, "step": 13446 }, { "epoch": 0.7529958561989024, "grad_norm": 1.1132417917251587, "learning_rate": 6.719500000000001e-05, "loss": 0.3616, "step": 13447 }, { "epoch": 0.7530518535110314, "grad_norm": 1.598866581916809, "learning_rate": 6.720000000000001e-05, "loss": 0.4688, "step": 13448 }, { "epoch": 0.7531078508231605, "grad_norm": 1.1186206340789795, "learning_rate": 6.7205e-05, "loss": 0.3437, "step": 13449 }, { "epoch": 0.7531638481352895, "grad_norm": 1.6304622888565063, "learning_rate": 6.721e-05, "loss": 0.6735, "step": 13450 }, { "epoch": 0.7532198454474185, "grad_norm": 1.4465316534042358, "learning_rate": 6.7215e-05, "loss": 0.4097, "step": 13451 }, { "epoch": 0.7532758427595475, "grad_norm": 1.664799451828003, "learning_rate": 6.722000000000001e-05, "loss": 0.4707, "step": 13452 }, { "epoch": 0.7533318400716765, "grad_norm": 1.2107914686203003, "learning_rate": 6.722500000000001e-05, "loss": 0.3472, "step": 13453 }, { "epoch": 0.7533878373838055, "grad_norm": 1.3441057205200195, "learning_rate": 6.723e-05, "loss": 0.4359, "step": 13454 }, { "epoch": 0.7534438346959346, "grad_norm": 1.2599084377288818, "learning_rate": 6.7235e-05, "loss": 0.4591, "step": 13455 }, { "epoch": 0.7534998320080636, "grad_norm": 1.185228705406189, "learning_rate": 6.724e-05, "loss": 0.3497, "step": 13456 }, { "epoch": 0.7535558293201926, "grad_norm": 1.2232575416564941, "learning_rate": 6.7245e-05, "loss": 0.4625, "step": 13457 }, { "epoch": 0.7536118266323216, "grad_norm": 1.4489551782608032, "learning_rate": 6.725000000000001e-05, "loss": 0.5915, "step": 13458 }, { "epoch": 0.7536678239444506, "grad_norm": 1.467835783958435, "learning_rate": 6.7255e-05, "loss": 0.6399, "step": 13459 }, { "epoch": 0.7537238212565797, "grad_norm": 1.36167311668396, "learning_rate": 6.726e-05, "loss": 0.4153, "step": 13460 }, { "epoch": 0.7537798185687087, "grad_norm": 1.2901496887207031, "learning_rate": 6.7265e-05, "loss": 0.3864, "step": 13461 }, { "epoch": 0.7538358158808377, "grad_norm": 1.549117922782898, "learning_rate": 6.727e-05, "loss": 0.4436, "step": 13462 }, { "epoch": 0.7538918131929667, "grad_norm": 1.1801596879959106, "learning_rate": 6.7275e-05, "loss": 0.3789, "step": 13463 }, { "epoch": 0.7539478105050957, "grad_norm": 1.5706390142440796, "learning_rate": 6.727999999999999e-05, "loss": 0.3809, "step": 13464 }, { "epoch": 0.7540038078172248, "grad_norm": 1.2169467210769653, "learning_rate": 6.7285e-05, "loss": 0.4053, "step": 13465 }, { "epoch": 0.7540598051293538, "grad_norm": 1.3880095481872559, "learning_rate": 6.729000000000001e-05, "loss": 0.5238, "step": 13466 }, { "epoch": 0.7541158024414828, "grad_norm": 1.2271162271499634, "learning_rate": 6.729500000000001e-05, "loss": 0.386, "step": 13467 }, { "epoch": 0.7541717997536118, "grad_norm": 1.463864803314209, "learning_rate": 6.730000000000001e-05, "loss": 0.4024, "step": 13468 }, { "epoch": 0.7542277970657408, "grad_norm": 1.4349846839904785, "learning_rate": 6.730500000000001e-05, "loss": 0.5111, "step": 13469 }, { "epoch": 0.7542837943778699, "grad_norm": 1.2246228456497192, "learning_rate": 6.731e-05, "loss": 0.3641, "step": 13470 }, { "epoch": 0.7543397916899989, "grad_norm": 1.2935744524002075, "learning_rate": 6.7315e-05, "loss": 0.5459, "step": 13471 }, { "epoch": 0.7543957890021279, "grad_norm": 1.280429482460022, "learning_rate": 6.732e-05, "loss": 0.4411, "step": 13472 }, { "epoch": 0.7544517863142569, "grad_norm": 1.3651620149612427, "learning_rate": 6.732500000000001e-05, "loss": 0.4289, "step": 13473 }, { "epoch": 0.7545077836263859, "grad_norm": 1.0948057174682617, "learning_rate": 6.733000000000001e-05, "loss": 0.3642, "step": 13474 }, { "epoch": 0.754563780938515, "grad_norm": 1.3790465593338013, "learning_rate": 6.7335e-05, "loss": 0.4769, "step": 13475 }, { "epoch": 0.754619778250644, "grad_norm": 1.553052544593811, "learning_rate": 6.734e-05, "loss": 0.5529, "step": 13476 }, { "epoch": 0.754675775562773, "grad_norm": 1.7630292177200317, "learning_rate": 6.7345e-05, "loss": 0.6346, "step": 13477 }, { "epoch": 0.754731772874902, "grad_norm": 1.2925057411193848, "learning_rate": 6.735e-05, "loss": 0.4822, "step": 13478 }, { "epoch": 0.754787770187031, "grad_norm": 1.9060975313186646, "learning_rate": 6.735500000000001e-05, "loss": 0.4757, "step": 13479 }, { "epoch": 0.75484376749916, "grad_norm": 1.1911603212356567, "learning_rate": 6.736e-05, "loss": 0.4558, "step": 13480 }, { "epoch": 0.7548997648112891, "grad_norm": 1.3044967651367188, "learning_rate": 6.7365e-05, "loss": 0.4002, "step": 13481 }, { "epoch": 0.7549557621234181, "grad_norm": 1.0621124505996704, "learning_rate": 6.737e-05, "loss": 0.3953, "step": 13482 }, { "epoch": 0.7550117594355471, "grad_norm": 1.3665282726287842, "learning_rate": 6.7375e-05, "loss": 0.4164, "step": 13483 }, { "epoch": 0.7550677567476761, "grad_norm": 1.1974210739135742, "learning_rate": 6.738e-05, "loss": 0.4412, "step": 13484 }, { "epoch": 0.7551237540598051, "grad_norm": 1.2153085470199585, "learning_rate": 6.738499999999999e-05, "loss": 0.3828, "step": 13485 }, { "epoch": 0.7551797513719342, "grad_norm": 1.622483730316162, "learning_rate": 6.739e-05, "loss": 0.4713, "step": 13486 }, { "epoch": 0.7552357486840632, "grad_norm": 1.2181439399719238, "learning_rate": 6.739500000000001e-05, "loss": 0.4743, "step": 13487 }, { "epoch": 0.7552917459961922, "grad_norm": 1.6612908840179443, "learning_rate": 6.740000000000001e-05, "loss": 0.3973, "step": 13488 }, { "epoch": 0.7553477433083212, "grad_norm": 1.333847165107727, "learning_rate": 6.740500000000001e-05, "loss": 0.3932, "step": 13489 }, { "epoch": 0.7554037406204502, "grad_norm": 1.3070459365844727, "learning_rate": 6.741000000000001e-05, "loss": 0.431, "step": 13490 }, { "epoch": 0.7554597379325793, "grad_norm": 1.1635133028030396, "learning_rate": 6.7415e-05, "loss": 0.3468, "step": 13491 }, { "epoch": 0.7555157352447083, "grad_norm": 1.4309470653533936, "learning_rate": 6.742e-05, "loss": 0.4128, "step": 13492 }, { "epoch": 0.7555717325568373, "grad_norm": 1.3312102556228638, "learning_rate": 6.7425e-05, "loss": 0.4404, "step": 13493 }, { "epoch": 0.7556277298689663, "grad_norm": 1.4434672594070435, "learning_rate": 6.743000000000001e-05, "loss": 0.5623, "step": 13494 }, { "epoch": 0.7556837271810953, "grad_norm": 1.1383618116378784, "learning_rate": 6.743500000000001e-05, "loss": 0.3618, "step": 13495 }, { "epoch": 0.7557397244932244, "grad_norm": 1.1406939029693604, "learning_rate": 6.744e-05, "loss": 0.3949, "step": 13496 }, { "epoch": 0.7557957218053534, "grad_norm": 1.3334400653839111, "learning_rate": 6.7445e-05, "loss": 0.4681, "step": 13497 }, { "epoch": 0.7558517191174824, "grad_norm": 1.2865227460861206, "learning_rate": 6.745e-05, "loss": 0.4014, "step": 13498 }, { "epoch": 0.7559077164296114, "grad_norm": 1.1794874668121338, "learning_rate": 6.7455e-05, "loss": 0.4435, "step": 13499 }, { "epoch": 0.7559637137417404, "grad_norm": 1.5550849437713623, "learning_rate": 6.746e-05, "loss": 0.6375, "step": 13500 }, { "epoch": 0.7560197110538694, "grad_norm": 1.6652644872665405, "learning_rate": 6.7465e-05, "loss": 0.5682, "step": 13501 }, { "epoch": 0.7560757083659985, "grad_norm": 1.117836594581604, "learning_rate": 6.747e-05, "loss": 0.3993, "step": 13502 }, { "epoch": 0.7561317056781275, "grad_norm": 1.3415014743804932, "learning_rate": 6.7475e-05, "loss": 0.4238, "step": 13503 }, { "epoch": 0.7561877029902565, "grad_norm": 1.3984713554382324, "learning_rate": 6.748e-05, "loss": 0.4271, "step": 13504 }, { "epoch": 0.7562437003023855, "grad_norm": 1.6960017681121826, "learning_rate": 6.7485e-05, "loss": 0.3231, "step": 13505 }, { "epoch": 0.7562996976145145, "grad_norm": 1.6180530786514282, "learning_rate": 6.749e-05, "loss": 0.477, "step": 13506 }, { "epoch": 0.7563556949266436, "grad_norm": 1.4171092510223389, "learning_rate": 6.7495e-05, "loss": 0.4831, "step": 13507 }, { "epoch": 0.7564116922387726, "grad_norm": 1.1733931303024292, "learning_rate": 6.750000000000001e-05, "loss": 0.4208, "step": 13508 }, { "epoch": 0.7564676895509016, "grad_norm": 1.4384032487869263, "learning_rate": 6.750500000000001e-05, "loss": 0.5576, "step": 13509 }, { "epoch": 0.7565236868630306, "grad_norm": 1.3238472938537598, "learning_rate": 6.751000000000001e-05, "loss": 0.4113, "step": 13510 }, { "epoch": 0.7565796841751596, "grad_norm": 1.176927924156189, "learning_rate": 6.7515e-05, "loss": 0.3882, "step": 13511 }, { "epoch": 0.7566356814872887, "grad_norm": 1.1933873891830444, "learning_rate": 6.752e-05, "loss": 0.496, "step": 13512 }, { "epoch": 0.7566916787994177, "grad_norm": 1.1905460357666016, "learning_rate": 6.7525e-05, "loss": 0.3585, "step": 13513 }, { "epoch": 0.7567476761115467, "grad_norm": 1.154370665550232, "learning_rate": 6.753e-05, "loss": 0.3804, "step": 13514 }, { "epoch": 0.7568036734236757, "grad_norm": 1.5908572673797607, "learning_rate": 6.753500000000001e-05, "loss": 0.5003, "step": 13515 }, { "epoch": 0.7568596707358047, "grad_norm": 2.966088056564331, "learning_rate": 6.754000000000001e-05, "loss": 0.4471, "step": 13516 }, { "epoch": 0.7569156680479338, "grad_norm": 1.2605220079421997, "learning_rate": 6.7545e-05, "loss": 0.3171, "step": 13517 }, { "epoch": 0.7569716653600627, "grad_norm": 1.172694444656372, "learning_rate": 6.755e-05, "loss": 0.4792, "step": 13518 }, { "epoch": 0.7570276626721917, "grad_norm": 1.4007148742675781, "learning_rate": 6.7555e-05, "loss": 0.5274, "step": 13519 }, { "epoch": 0.7570836599843207, "grad_norm": 1.4705430269241333, "learning_rate": 6.756e-05, "loss": 0.5258, "step": 13520 }, { "epoch": 0.7571396572964497, "grad_norm": 1.2992222309112549, "learning_rate": 6.7565e-05, "loss": 0.4533, "step": 13521 }, { "epoch": 0.7571956546085787, "grad_norm": 1.5344949960708618, "learning_rate": 6.757e-05, "loss": 0.5269, "step": 13522 }, { "epoch": 0.7572516519207078, "grad_norm": 1.204139232635498, "learning_rate": 6.7575e-05, "loss": 0.5333, "step": 13523 }, { "epoch": 0.7573076492328368, "grad_norm": 1.3827623128890991, "learning_rate": 6.758e-05, "loss": 0.548, "step": 13524 }, { "epoch": 0.7573636465449658, "grad_norm": 1.5382299423217773, "learning_rate": 6.7585e-05, "loss": 0.5404, "step": 13525 }, { "epoch": 0.7574196438570948, "grad_norm": 1.6641407012939453, "learning_rate": 6.759e-05, "loss": 0.6407, "step": 13526 }, { "epoch": 0.7574756411692238, "grad_norm": 1.459222674369812, "learning_rate": 6.7595e-05, "loss": 0.4695, "step": 13527 }, { "epoch": 0.7575316384813529, "grad_norm": 1.2386016845703125, "learning_rate": 6.76e-05, "loss": 0.5066, "step": 13528 }, { "epoch": 0.7575876357934819, "grad_norm": 1.3396717309951782, "learning_rate": 6.7605e-05, "loss": 0.3974, "step": 13529 }, { "epoch": 0.7576436331056109, "grad_norm": 3.966275691986084, "learning_rate": 6.761000000000001e-05, "loss": 0.4284, "step": 13530 }, { "epoch": 0.7576996304177399, "grad_norm": 1.2423146963119507, "learning_rate": 6.761500000000001e-05, "loss": 0.4615, "step": 13531 }, { "epoch": 0.7577556277298689, "grad_norm": 1.0985716581344604, "learning_rate": 6.762e-05, "loss": 0.3827, "step": 13532 }, { "epoch": 0.757811625041998, "grad_norm": 1.3467158079147339, "learning_rate": 6.7625e-05, "loss": 0.4836, "step": 13533 }, { "epoch": 0.757867622354127, "grad_norm": 1.2831975221633911, "learning_rate": 6.763e-05, "loss": 0.4462, "step": 13534 }, { "epoch": 0.757923619666256, "grad_norm": 1.3927476406097412, "learning_rate": 6.7635e-05, "loss": 0.4237, "step": 13535 }, { "epoch": 0.757979616978385, "grad_norm": 1.3788520097732544, "learning_rate": 6.764000000000001e-05, "loss": 0.4037, "step": 13536 }, { "epoch": 0.758035614290514, "grad_norm": 1.3232665061950684, "learning_rate": 6.764500000000001e-05, "loss": 0.5126, "step": 13537 }, { "epoch": 0.758091611602643, "grad_norm": 1.4086594581604004, "learning_rate": 6.765e-05, "loss": 0.4718, "step": 13538 }, { "epoch": 0.7581476089147721, "grad_norm": 1.207565188407898, "learning_rate": 6.7655e-05, "loss": 0.3503, "step": 13539 }, { "epoch": 0.7582036062269011, "grad_norm": 1.3686875104904175, "learning_rate": 6.766e-05, "loss": 0.3985, "step": 13540 }, { "epoch": 0.7582596035390301, "grad_norm": 1.418041706085205, "learning_rate": 6.7665e-05, "loss": 0.4116, "step": 13541 }, { "epoch": 0.7583156008511591, "grad_norm": 1.3729579448699951, "learning_rate": 6.767e-05, "loss": 0.5298, "step": 13542 }, { "epoch": 0.7583715981632881, "grad_norm": 1.4995180368423462, "learning_rate": 6.7675e-05, "loss": 0.5458, "step": 13543 }, { "epoch": 0.7584275954754172, "grad_norm": 1.378974199295044, "learning_rate": 6.768e-05, "loss": 0.3962, "step": 13544 }, { "epoch": 0.7584835927875462, "grad_norm": 1.2189967632293701, "learning_rate": 6.7685e-05, "loss": 0.5196, "step": 13545 }, { "epoch": 0.7585395900996752, "grad_norm": 1.2721692323684692, "learning_rate": 6.769e-05, "loss": 0.38, "step": 13546 }, { "epoch": 0.7585955874118042, "grad_norm": 1.3236334323883057, "learning_rate": 6.769500000000001e-05, "loss": 0.4855, "step": 13547 }, { "epoch": 0.7586515847239332, "grad_norm": 1.087719202041626, "learning_rate": 6.77e-05, "loss": 0.3678, "step": 13548 }, { "epoch": 0.7587075820360623, "grad_norm": 1.3718489408493042, "learning_rate": 6.7705e-05, "loss": 0.4305, "step": 13549 }, { "epoch": 0.7587635793481913, "grad_norm": 1.2739298343658447, "learning_rate": 6.771e-05, "loss": 0.4258, "step": 13550 }, { "epoch": 0.7588195766603203, "grad_norm": 1.6008694171905518, "learning_rate": 6.771500000000001e-05, "loss": 0.5363, "step": 13551 }, { "epoch": 0.7588755739724493, "grad_norm": 1.3075045347213745, "learning_rate": 6.772000000000001e-05, "loss": 0.4086, "step": 13552 }, { "epoch": 0.7589315712845783, "grad_norm": 1.3609967231750488, "learning_rate": 6.7725e-05, "loss": 0.4109, "step": 13553 }, { "epoch": 0.7589875685967074, "grad_norm": 1.360772967338562, "learning_rate": 6.773e-05, "loss": 0.4327, "step": 13554 }, { "epoch": 0.7590435659088364, "grad_norm": 1.3756797313690186, "learning_rate": 6.7735e-05, "loss": 0.4837, "step": 13555 }, { "epoch": 0.7590995632209654, "grad_norm": 1.610328197479248, "learning_rate": 6.774e-05, "loss": 0.6, "step": 13556 }, { "epoch": 0.7591555605330944, "grad_norm": 1.6325621604919434, "learning_rate": 6.774500000000001e-05, "loss": 0.5773, "step": 13557 }, { "epoch": 0.7592115578452234, "grad_norm": 1.2873172760009766, "learning_rate": 6.775000000000001e-05, "loss": 0.4575, "step": 13558 }, { "epoch": 0.7592675551573524, "grad_norm": 1.4128810167312622, "learning_rate": 6.7755e-05, "loss": 0.3474, "step": 13559 }, { "epoch": 0.7593235524694815, "grad_norm": 1.3517918586730957, "learning_rate": 6.776e-05, "loss": 0.4743, "step": 13560 }, { "epoch": 0.7593795497816105, "grad_norm": 1.365537166595459, "learning_rate": 6.7765e-05, "loss": 0.5071, "step": 13561 }, { "epoch": 0.7594355470937395, "grad_norm": 1.3406610488891602, "learning_rate": 6.777e-05, "loss": 0.3936, "step": 13562 }, { "epoch": 0.7594915444058685, "grad_norm": 1.4251781702041626, "learning_rate": 6.7775e-05, "loss": 0.5897, "step": 13563 }, { "epoch": 0.7595475417179975, "grad_norm": 1.5039125680923462, "learning_rate": 6.778e-05, "loss": 0.5221, "step": 13564 }, { "epoch": 0.7596035390301266, "grad_norm": 1.3651732206344604, "learning_rate": 6.7785e-05, "loss": 0.4698, "step": 13565 }, { "epoch": 0.7596595363422556, "grad_norm": 1.6269944906234741, "learning_rate": 6.779e-05, "loss": 0.5133, "step": 13566 }, { "epoch": 0.7597155336543846, "grad_norm": 1.255621075630188, "learning_rate": 6.779500000000001e-05, "loss": 0.4111, "step": 13567 }, { "epoch": 0.7597715309665136, "grad_norm": 1.2345309257507324, "learning_rate": 6.780000000000001e-05, "loss": 0.3922, "step": 13568 }, { "epoch": 0.7598275282786426, "grad_norm": 1.3083750009536743, "learning_rate": 6.7805e-05, "loss": 0.421, "step": 13569 }, { "epoch": 0.7598835255907717, "grad_norm": 1.4425714015960693, "learning_rate": 6.781e-05, "loss": 0.7085, "step": 13570 }, { "epoch": 0.7599395229029007, "grad_norm": 1.5089733600616455, "learning_rate": 6.7815e-05, "loss": 0.4205, "step": 13571 }, { "epoch": 0.7599955202150297, "grad_norm": 1.9460489749908447, "learning_rate": 6.782000000000001e-05, "loss": 0.5807, "step": 13572 }, { "epoch": 0.7600515175271587, "grad_norm": 1.3572713136672974, "learning_rate": 6.782500000000001e-05, "loss": 0.551, "step": 13573 }, { "epoch": 0.7601075148392877, "grad_norm": 1.2764432430267334, "learning_rate": 6.783e-05, "loss": 0.5627, "step": 13574 }, { "epoch": 0.7601635121514168, "grad_norm": 1.253567099571228, "learning_rate": 6.7835e-05, "loss": 0.3933, "step": 13575 }, { "epoch": 0.7602195094635458, "grad_norm": 1.4845340251922607, "learning_rate": 6.784e-05, "loss": 0.4841, "step": 13576 }, { "epoch": 0.7602755067756748, "grad_norm": 1.1086807250976562, "learning_rate": 6.7845e-05, "loss": 0.3112, "step": 13577 }, { "epoch": 0.7603315040878038, "grad_norm": 1.3579661846160889, "learning_rate": 6.785e-05, "loss": 0.4462, "step": 13578 }, { "epoch": 0.7603875013999328, "grad_norm": 1.42841374874115, "learning_rate": 6.785500000000001e-05, "loss": 0.4167, "step": 13579 }, { "epoch": 0.7604434987120618, "grad_norm": 1.4315341711044312, "learning_rate": 6.786e-05, "loss": 0.4883, "step": 13580 }, { "epoch": 0.7604994960241909, "grad_norm": 1.3810499906539917, "learning_rate": 6.7865e-05, "loss": 0.5526, "step": 13581 }, { "epoch": 0.7605554933363199, "grad_norm": 1.5452520847320557, "learning_rate": 6.787e-05, "loss": 0.294, "step": 13582 }, { "epoch": 0.7606114906484489, "grad_norm": 1.2834396362304688, "learning_rate": 6.7875e-05, "loss": 0.5102, "step": 13583 }, { "epoch": 0.7606674879605779, "grad_norm": 1.6737674474716187, "learning_rate": 6.788e-05, "loss": 0.6717, "step": 13584 }, { "epoch": 0.760723485272707, "grad_norm": 1.3482942581176758, "learning_rate": 6.7885e-05, "loss": 0.4505, "step": 13585 }, { "epoch": 0.760779482584836, "grad_norm": 1.315356731414795, "learning_rate": 6.789e-05, "loss": 0.345, "step": 13586 }, { "epoch": 0.760835479896965, "grad_norm": 1.2866826057434082, "learning_rate": 6.789500000000001e-05, "loss": 0.3925, "step": 13587 }, { "epoch": 0.760891477209094, "grad_norm": 1.3313685655593872, "learning_rate": 6.790000000000001e-05, "loss": 0.4995, "step": 13588 }, { "epoch": 0.760947474521223, "grad_norm": 1.3239092826843262, "learning_rate": 6.790500000000001e-05, "loss": 0.5177, "step": 13589 }, { "epoch": 0.761003471833352, "grad_norm": 1.577412724494934, "learning_rate": 6.791e-05, "loss": 0.6218, "step": 13590 }, { "epoch": 0.7610594691454811, "grad_norm": 1.3489168882369995, "learning_rate": 6.7915e-05, "loss": 0.5704, "step": 13591 }, { "epoch": 0.7611154664576101, "grad_norm": 1.1966787576675415, "learning_rate": 6.792e-05, "loss": 0.4038, "step": 13592 }, { "epoch": 0.7611714637697391, "grad_norm": 1.236101746559143, "learning_rate": 6.792500000000001e-05, "loss": 0.392, "step": 13593 }, { "epoch": 0.7612274610818681, "grad_norm": 1.2937026023864746, "learning_rate": 6.793000000000001e-05, "loss": 0.39, "step": 13594 }, { "epoch": 0.7612834583939971, "grad_norm": 1.1674257516860962, "learning_rate": 6.7935e-05, "loss": 0.3888, "step": 13595 }, { "epoch": 0.7613394557061262, "grad_norm": 1.514306664466858, "learning_rate": 6.794e-05, "loss": 0.6035, "step": 13596 }, { "epoch": 0.7613954530182552, "grad_norm": 1.2262016534805298, "learning_rate": 6.7945e-05, "loss": 0.3332, "step": 13597 }, { "epoch": 0.7614514503303842, "grad_norm": 1.750225305557251, "learning_rate": 6.795e-05, "loss": 0.4764, "step": 13598 }, { "epoch": 0.7615074476425132, "grad_norm": 1.3644682168960571, "learning_rate": 6.7955e-05, "loss": 0.5144, "step": 13599 }, { "epoch": 0.7615634449546422, "grad_norm": 1.456863522529602, "learning_rate": 6.796e-05, "loss": 0.5256, "step": 13600 }, { "epoch": 0.7616194422667711, "grad_norm": 0.9988186359405518, "learning_rate": 6.7965e-05, "loss": 0.2737, "step": 13601 }, { "epoch": 0.7616754395789002, "grad_norm": 1.4243215322494507, "learning_rate": 6.797e-05, "loss": 0.4492, "step": 13602 }, { "epoch": 0.7617314368910292, "grad_norm": 1.331834316253662, "learning_rate": 6.7975e-05, "loss": 0.3962, "step": 13603 }, { "epoch": 0.7617874342031582, "grad_norm": 1.409032940864563, "learning_rate": 6.798e-05, "loss": 0.4875, "step": 13604 }, { "epoch": 0.7618434315152872, "grad_norm": 1.417380452156067, "learning_rate": 6.7985e-05, "loss": 0.4, "step": 13605 }, { "epoch": 0.7618994288274162, "grad_norm": 1.4346762895584106, "learning_rate": 6.799e-05, "loss": 0.3644, "step": 13606 }, { "epoch": 0.7619554261395453, "grad_norm": 1.5617364645004272, "learning_rate": 6.7995e-05, "loss": 0.6251, "step": 13607 }, { "epoch": 0.7620114234516743, "grad_norm": 1.664028525352478, "learning_rate": 6.800000000000001e-05, "loss": 0.4658, "step": 13608 }, { "epoch": 0.7620674207638033, "grad_norm": 1.2303657531738281, "learning_rate": 6.800500000000001e-05, "loss": 0.3691, "step": 13609 }, { "epoch": 0.7621234180759323, "grad_norm": 1.4195746183395386, "learning_rate": 6.801000000000001e-05, "loss": 0.4241, "step": 13610 }, { "epoch": 0.7621794153880613, "grad_norm": 1.0718209743499756, "learning_rate": 6.8015e-05, "loss": 0.4468, "step": 13611 }, { "epoch": 0.7622354127001904, "grad_norm": 1.1220693588256836, "learning_rate": 6.802e-05, "loss": 0.3926, "step": 13612 }, { "epoch": 0.7622914100123194, "grad_norm": 1.277579426765442, "learning_rate": 6.8025e-05, "loss": 0.3934, "step": 13613 }, { "epoch": 0.7623474073244484, "grad_norm": 1.3952373266220093, "learning_rate": 6.803000000000001e-05, "loss": 0.5871, "step": 13614 }, { "epoch": 0.7624034046365774, "grad_norm": 1.304663062095642, "learning_rate": 6.803500000000001e-05, "loss": 0.4321, "step": 13615 }, { "epoch": 0.7624594019487064, "grad_norm": 1.3302100896835327, "learning_rate": 6.804e-05, "loss": 0.4849, "step": 13616 }, { "epoch": 0.7625153992608354, "grad_norm": 1.1536314487457275, "learning_rate": 6.8045e-05, "loss": 0.4198, "step": 13617 }, { "epoch": 0.7625713965729645, "grad_norm": 1.2717252969741821, "learning_rate": 6.805e-05, "loss": 0.4946, "step": 13618 }, { "epoch": 0.7626273938850935, "grad_norm": 1.3787095546722412, "learning_rate": 6.8055e-05, "loss": 0.5078, "step": 13619 }, { "epoch": 0.7626833911972225, "grad_norm": 1.3335047960281372, "learning_rate": 6.806e-05, "loss": 0.5409, "step": 13620 }, { "epoch": 0.7627393885093515, "grad_norm": 1.2705175876617432, "learning_rate": 6.8065e-05, "loss": 0.4512, "step": 13621 }, { "epoch": 0.7627953858214805, "grad_norm": 1.4200292825698853, "learning_rate": 6.807e-05, "loss": 0.3421, "step": 13622 }, { "epoch": 0.7628513831336096, "grad_norm": 1.419069766998291, "learning_rate": 6.8075e-05, "loss": 0.4204, "step": 13623 }, { "epoch": 0.7629073804457386, "grad_norm": 1.2540982961654663, "learning_rate": 6.808e-05, "loss": 0.5944, "step": 13624 }, { "epoch": 0.7629633777578676, "grad_norm": 1.294551134109497, "learning_rate": 6.8085e-05, "loss": 0.4136, "step": 13625 }, { "epoch": 0.7630193750699966, "grad_norm": 1.2086056470870972, "learning_rate": 6.809e-05, "loss": 0.4379, "step": 13626 }, { "epoch": 0.7630753723821256, "grad_norm": 1.7807788848876953, "learning_rate": 6.8095e-05, "loss": 0.5198, "step": 13627 }, { "epoch": 0.7631313696942547, "grad_norm": 1.272684931755066, "learning_rate": 6.81e-05, "loss": 0.4241, "step": 13628 }, { "epoch": 0.7631873670063837, "grad_norm": 1.271483063697815, "learning_rate": 6.810500000000001e-05, "loss": 0.3404, "step": 13629 }, { "epoch": 0.7632433643185127, "grad_norm": 1.1285747289657593, "learning_rate": 6.811000000000001e-05, "loss": 0.4228, "step": 13630 }, { "epoch": 0.7632993616306417, "grad_norm": 1.5059736967086792, "learning_rate": 6.811500000000001e-05, "loss": 0.5092, "step": 13631 }, { "epoch": 0.7633553589427707, "grad_norm": 1.2974300384521484, "learning_rate": 6.812e-05, "loss": 0.462, "step": 13632 }, { "epoch": 0.7634113562548998, "grad_norm": 1.4447135925292969, "learning_rate": 6.8125e-05, "loss": 0.3697, "step": 13633 }, { "epoch": 0.7634673535670288, "grad_norm": 1.396356225013733, "learning_rate": 6.813e-05, "loss": 0.454, "step": 13634 }, { "epoch": 0.7635233508791578, "grad_norm": 1.2440048456192017, "learning_rate": 6.813500000000001e-05, "loss": 0.4406, "step": 13635 }, { "epoch": 0.7635793481912868, "grad_norm": 1.2469977140426636, "learning_rate": 6.814000000000001e-05, "loss": 0.4061, "step": 13636 }, { "epoch": 0.7636353455034158, "grad_norm": 1.319535255432129, "learning_rate": 6.8145e-05, "loss": 0.4064, "step": 13637 }, { "epoch": 0.7636913428155448, "grad_norm": 1.4810253381729126, "learning_rate": 6.815e-05, "loss": 0.4953, "step": 13638 }, { "epoch": 0.7637473401276739, "grad_norm": 1.311285376548767, "learning_rate": 6.8155e-05, "loss": 0.395, "step": 13639 }, { "epoch": 0.7638033374398029, "grad_norm": 1.3299849033355713, "learning_rate": 6.816e-05, "loss": 0.476, "step": 13640 }, { "epoch": 0.7638593347519319, "grad_norm": 1.3407683372497559, "learning_rate": 6.8165e-05, "loss": 0.386, "step": 13641 }, { "epoch": 0.7639153320640609, "grad_norm": 1.4075672626495361, "learning_rate": 6.817e-05, "loss": 0.4974, "step": 13642 }, { "epoch": 0.76397132937619, "grad_norm": 1.3753952980041504, "learning_rate": 6.8175e-05, "loss": 0.4195, "step": 13643 }, { "epoch": 0.764027326688319, "grad_norm": 1.2342287302017212, "learning_rate": 6.818e-05, "loss": 0.4695, "step": 13644 }, { "epoch": 0.764083324000448, "grad_norm": 1.3846280574798584, "learning_rate": 6.8185e-05, "loss": 0.3887, "step": 13645 }, { "epoch": 0.764139321312577, "grad_norm": 1.123100996017456, "learning_rate": 6.819e-05, "loss": 0.452, "step": 13646 }, { "epoch": 0.764195318624706, "grad_norm": 1.3150691986083984, "learning_rate": 6.8195e-05, "loss": 0.5618, "step": 13647 }, { "epoch": 0.764251315936835, "grad_norm": 1.6028003692626953, "learning_rate": 6.82e-05, "loss": 0.5873, "step": 13648 }, { "epoch": 0.7643073132489641, "grad_norm": 1.3623460531234741, "learning_rate": 6.8205e-05, "loss": 0.4849, "step": 13649 }, { "epoch": 0.7643633105610931, "grad_norm": 1.2319543361663818, "learning_rate": 6.821000000000001e-05, "loss": 0.4245, "step": 13650 }, { "epoch": 0.7644193078732221, "grad_norm": 1.5644632577896118, "learning_rate": 6.821500000000001e-05, "loss": 0.6767, "step": 13651 }, { "epoch": 0.7644753051853511, "grad_norm": 1.3389331102371216, "learning_rate": 6.822000000000001e-05, "loss": 0.4127, "step": 13652 }, { "epoch": 0.7645313024974801, "grad_norm": 1.673305630683899, "learning_rate": 6.8225e-05, "loss": 0.4908, "step": 13653 }, { "epoch": 0.7645872998096092, "grad_norm": 1.2320618629455566, "learning_rate": 6.823e-05, "loss": 0.4999, "step": 13654 }, { "epoch": 0.7646432971217382, "grad_norm": 1.3539533615112305, "learning_rate": 6.8235e-05, "loss": 0.4486, "step": 13655 }, { "epoch": 0.7646992944338672, "grad_norm": 1.133371114730835, "learning_rate": 6.824e-05, "loss": 0.329, "step": 13656 }, { "epoch": 0.7647552917459962, "grad_norm": 1.3948217630386353, "learning_rate": 6.824500000000001e-05, "loss": 0.4525, "step": 13657 }, { "epoch": 0.7648112890581252, "grad_norm": 1.3602687120437622, "learning_rate": 6.825e-05, "loss": 0.4447, "step": 13658 }, { "epoch": 0.7648672863702543, "grad_norm": 1.3339569568634033, "learning_rate": 6.8255e-05, "loss": 0.5464, "step": 13659 }, { "epoch": 0.7649232836823833, "grad_norm": 1.3197356462478638, "learning_rate": 6.826e-05, "loss": 0.3726, "step": 13660 }, { "epoch": 0.7649792809945123, "grad_norm": 1.3283066749572754, "learning_rate": 6.8265e-05, "loss": 0.5509, "step": 13661 }, { "epoch": 0.7650352783066413, "grad_norm": 1.8376636505126953, "learning_rate": 6.827e-05, "loss": 0.6377, "step": 13662 }, { "epoch": 0.7650912756187703, "grad_norm": 1.1652458906173706, "learning_rate": 6.8275e-05, "loss": 0.4488, "step": 13663 }, { "epoch": 0.7651472729308993, "grad_norm": 1.3074110746383667, "learning_rate": 6.828e-05, "loss": 0.4817, "step": 13664 }, { "epoch": 0.7652032702430284, "grad_norm": 1.4740490913391113, "learning_rate": 6.8285e-05, "loss": 0.4235, "step": 13665 }, { "epoch": 0.7652592675551574, "grad_norm": 1.4304113388061523, "learning_rate": 6.829e-05, "loss": 0.5427, "step": 13666 }, { "epoch": 0.7653152648672864, "grad_norm": 1.463375210762024, "learning_rate": 6.8295e-05, "loss": 0.4777, "step": 13667 }, { "epoch": 0.7653712621794154, "grad_norm": 1.1754728555679321, "learning_rate": 6.83e-05, "loss": 0.394, "step": 13668 }, { "epoch": 0.7654272594915444, "grad_norm": 1.3912222385406494, "learning_rate": 6.8305e-05, "loss": 0.5556, "step": 13669 }, { "epoch": 0.7654832568036735, "grad_norm": 1.3535871505737305, "learning_rate": 6.831e-05, "loss": 0.5625, "step": 13670 }, { "epoch": 0.7655392541158025, "grad_norm": 1.1886454820632935, "learning_rate": 6.831500000000001e-05, "loss": 0.4734, "step": 13671 }, { "epoch": 0.7655952514279315, "grad_norm": 2.200488567352295, "learning_rate": 6.832000000000001e-05, "loss": 0.5001, "step": 13672 }, { "epoch": 0.7656512487400605, "grad_norm": 1.2043800354003906, "learning_rate": 6.832500000000001e-05, "loss": 0.4501, "step": 13673 }, { "epoch": 0.7657072460521895, "grad_norm": 1.228277325630188, "learning_rate": 6.833e-05, "loss": 0.4027, "step": 13674 }, { "epoch": 0.7657632433643186, "grad_norm": 1.2629377841949463, "learning_rate": 6.8335e-05, "loss": 0.4552, "step": 13675 }, { "epoch": 0.7658192406764476, "grad_norm": 1.2324979305267334, "learning_rate": 6.834e-05, "loss": 0.3709, "step": 13676 }, { "epoch": 0.7658752379885766, "grad_norm": 1.163761854171753, "learning_rate": 6.8345e-05, "loss": 0.5439, "step": 13677 }, { "epoch": 0.7659312353007056, "grad_norm": 1.2088426351547241, "learning_rate": 6.835000000000001e-05, "loss": 0.4621, "step": 13678 }, { "epoch": 0.7659872326128346, "grad_norm": 1.2792140245437622, "learning_rate": 6.8355e-05, "loss": 0.4059, "step": 13679 }, { "epoch": 0.7660432299249637, "grad_norm": 1.3498033285140991, "learning_rate": 6.836e-05, "loss": 0.4016, "step": 13680 }, { "epoch": 0.7660992272370927, "grad_norm": 1.7404643297195435, "learning_rate": 6.8365e-05, "loss": 0.5288, "step": 13681 }, { "epoch": 0.7661552245492217, "grad_norm": 1.2052669525146484, "learning_rate": 6.837e-05, "loss": 0.4172, "step": 13682 }, { "epoch": 0.7662112218613506, "grad_norm": 1.1906694173812866, "learning_rate": 6.8375e-05, "loss": 0.3374, "step": 13683 }, { "epoch": 0.7662672191734796, "grad_norm": 1.4606155157089233, "learning_rate": 6.838e-05, "loss": 0.4478, "step": 13684 }, { "epoch": 0.7663232164856086, "grad_norm": 1.5571273565292358, "learning_rate": 6.8385e-05, "loss": 0.4997, "step": 13685 }, { "epoch": 0.7663792137977377, "grad_norm": 1.4292577505111694, "learning_rate": 6.839e-05, "loss": 0.4201, "step": 13686 }, { "epoch": 0.7664352111098667, "grad_norm": 1.3577619791030884, "learning_rate": 6.8395e-05, "loss": 0.393, "step": 13687 }, { "epoch": 0.7664912084219957, "grad_norm": 1.3015681505203247, "learning_rate": 6.840000000000001e-05, "loss": 0.3905, "step": 13688 }, { "epoch": 0.7665472057341247, "grad_norm": 1.588666558265686, "learning_rate": 6.8405e-05, "loss": 0.4982, "step": 13689 }, { "epoch": 0.7666032030462537, "grad_norm": 1.0863832235336304, "learning_rate": 6.841e-05, "loss": 0.3979, "step": 13690 }, { "epoch": 0.7666592003583828, "grad_norm": 1.5667450428009033, "learning_rate": 6.8415e-05, "loss": 0.6567, "step": 13691 }, { "epoch": 0.7667151976705118, "grad_norm": 1.1819483041763306, "learning_rate": 6.842000000000001e-05, "loss": 0.5536, "step": 13692 }, { "epoch": 0.7667711949826408, "grad_norm": 1.4729474782943726, "learning_rate": 6.842500000000001e-05, "loss": 0.5742, "step": 13693 }, { "epoch": 0.7668271922947698, "grad_norm": 1.5023722648620605, "learning_rate": 6.843000000000001e-05, "loss": 0.4246, "step": 13694 }, { "epoch": 0.7668831896068988, "grad_norm": 1.6079078912734985, "learning_rate": 6.8435e-05, "loss": 0.4576, "step": 13695 }, { "epoch": 0.7669391869190278, "grad_norm": 1.3467698097229004, "learning_rate": 6.844e-05, "loss": 0.4981, "step": 13696 }, { "epoch": 0.7669951842311569, "grad_norm": 1.4072273969650269, "learning_rate": 6.8445e-05, "loss": 0.3155, "step": 13697 }, { "epoch": 0.7670511815432859, "grad_norm": 1.3815951347351074, "learning_rate": 6.845e-05, "loss": 0.376, "step": 13698 }, { "epoch": 0.7671071788554149, "grad_norm": 1.4208966493606567, "learning_rate": 6.845500000000001e-05, "loss": 0.5671, "step": 13699 }, { "epoch": 0.7671631761675439, "grad_norm": 1.2100359201431274, "learning_rate": 6.846e-05, "loss": 0.4106, "step": 13700 }, { "epoch": 0.767219173479673, "grad_norm": 1.3779208660125732, "learning_rate": 6.8465e-05, "loss": 0.5216, "step": 13701 }, { "epoch": 0.767275170791802, "grad_norm": 1.2398920059204102, "learning_rate": 6.847e-05, "loss": 0.3448, "step": 13702 }, { "epoch": 0.767331168103931, "grad_norm": 1.2889721393585205, "learning_rate": 6.8475e-05, "loss": 0.4105, "step": 13703 }, { "epoch": 0.76738716541606, "grad_norm": 1.327928066253662, "learning_rate": 6.848e-05, "loss": 0.5157, "step": 13704 }, { "epoch": 0.767443162728189, "grad_norm": 1.3076730966567993, "learning_rate": 6.8485e-05, "loss": 0.3998, "step": 13705 }, { "epoch": 0.767499160040318, "grad_norm": 1.2470765113830566, "learning_rate": 6.849e-05, "loss": 0.3305, "step": 13706 }, { "epoch": 0.7675551573524471, "grad_norm": 1.3026156425476074, "learning_rate": 6.8495e-05, "loss": 0.4167, "step": 13707 }, { "epoch": 0.7676111546645761, "grad_norm": 1.1424435377120972, "learning_rate": 6.850000000000001e-05, "loss": 0.4479, "step": 13708 }, { "epoch": 0.7676671519767051, "grad_norm": 1.2309578657150269, "learning_rate": 6.850500000000001e-05, "loss": 0.4619, "step": 13709 }, { "epoch": 0.7677231492888341, "grad_norm": 1.623602032661438, "learning_rate": 6.851e-05, "loss": 0.4734, "step": 13710 }, { "epoch": 0.7677791466009631, "grad_norm": 1.4080479145050049, "learning_rate": 6.8515e-05, "loss": 0.421, "step": 13711 }, { "epoch": 0.7678351439130922, "grad_norm": 1.8655375242233276, "learning_rate": 6.852e-05, "loss": 0.3954, "step": 13712 }, { "epoch": 0.7678911412252212, "grad_norm": 1.8153020143508911, "learning_rate": 6.852500000000001e-05, "loss": 0.5158, "step": 13713 }, { "epoch": 0.7679471385373502, "grad_norm": 1.4256774187088013, "learning_rate": 6.853000000000001e-05, "loss": 0.423, "step": 13714 }, { "epoch": 0.7680031358494792, "grad_norm": 1.6449263095855713, "learning_rate": 6.853500000000001e-05, "loss": 0.5707, "step": 13715 }, { "epoch": 0.7680591331616082, "grad_norm": 1.398479700088501, "learning_rate": 6.854e-05, "loss": 0.4827, "step": 13716 }, { "epoch": 0.7681151304737373, "grad_norm": 1.153760313987732, "learning_rate": 6.8545e-05, "loss": 0.4057, "step": 13717 }, { "epoch": 0.7681711277858663, "grad_norm": 1.3056362867355347, "learning_rate": 6.855e-05, "loss": 0.4192, "step": 13718 }, { "epoch": 0.7682271250979953, "grad_norm": 1.4091055393218994, "learning_rate": 6.8555e-05, "loss": 0.5524, "step": 13719 }, { "epoch": 0.7682831224101243, "grad_norm": 1.2711598873138428, "learning_rate": 6.856000000000001e-05, "loss": 0.4829, "step": 13720 }, { "epoch": 0.7683391197222533, "grad_norm": 1.2869224548339844, "learning_rate": 6.8565e-05, "loss": 0.6806, "step": 13721 }, { "epoch": 0.7683951170343823, "grad_norm": 1.2931668758392334, "learning_rate": 6.857e-05, "loss": 0.4188, "step": 13722 }, { "epoch": 0.7684511143465114, "grad_norm": 1.210431456565857, "learning_rate": 6.8575e-05, "loss": 0.3685, "step": 13723 }, { "epoch": 0.7685071116586404, "grad_norm": 1.2958704233169556, "learning_rate": 6.858e-05, "loss": 0.4263, "step": 13724 }, { "epoch": 0.7685631089707694, "grad_norm": 1.4035395383834839, "learning_rate": 6.8585e-05, "loss": 0.4423, "step": 13725 }, { "epoch": 0.7686191062828984, "grad_norm": 1.3132297992706299, "learning_rate": 6.858999999999999e-05, "loss": 0.4727, "step": 13726 }, { "epoch": 0.7686751035950274, "grad_norm": 1.3928653001785278, "learning_rate": 6.8595e-05, "loss": 0.3781, "step": 13727 }, { "epoch": 0.7687311009071565, "grad_norm": 1.354992389678955, "learning_rate": 6.860000000000001e-05, "loss": 0.4253, "step": 13728 }, { "epoch": 0.7687870982192855, "grad_norm": 1.6718236207962036, "learning_rate": 6.860500000000001e-05, "loss": 0.6554, "step": 13729 }, { "epoch": 0.7688430955314145, "grad_norm": 1.3217116594314575, "learning_rate": 6.861000000000001e-05, "loss": 0.4524, "step": 13730 }, { "epoch": 0.7688990928435435, "grad_norm": 1.318081259727478, "learning_rate": 6.8615e-05, "loss": 0.4761, "step": 13731 }, { "epoch": 0.7689550901556725, "grad_norm": 0.9450418949127197, "learning_rate": 6.862e-05, "loss": 0.3411, "step": 13732 }, { "epoch": 0.7690110874678016, "grad_norm": 1.217575192451477, "learning_rate": 6.8625e-05, "loss": 0.4743, "step": 13733 }, { "epoch": 0.7690670847799306, "grad_norm": 1.4697747230529785, "learning_rate": 6.863000000000001e-05, "loss": 0.573, "step": 13734 }, { "epoch": 0.7691230820920596, "grad_norm": 1.4503259658813477, "learning_rate": 6.863500000000001e-05, "loss": 0.4563, "step": 13735 }, { "epoch": 0.7691790794041886, "grad_norm": 1.291875958442688, "learning_rate": 6.864000000000001e-05, "loss": 0.5397, "step": 13736 }, { "epoch": 0.7692350767163176, "grad_norm": 1.412310242652893, "learning_rate": 6.8645e-05, "loss": 0.5399, "step": 13737 }, { "epoch": 0.7692910740284467, "grad_norm": 1.4714233875274658, "learning_rate": 6.865e-05, "loss": 0.6858, "step": 13738 }, { "epoch": 0.7693470713405757, "grad_norm": 1.0596532821655273, "learning_rate": 6.8655e-05, "loss": 0.2782, "step": 13739 }, { "epoch": 0.7694030686527047, "grad_norm": 1.4776426553726196, "learning_rate": 6.866e-05, "loss": 0.4573, "step": 13740 }, { "epoch": 0.7694590659648337, "grad_norm": 1.279836893081665, "learning_rate": 6.866500000000001e-05, "loss": 0.3918, "step": 13741 }, { "epoch": 0.7695150632769627, "grad_norm": 1.4288512468338013, "learning_rate": 6.867e-05, "loss": 0.3375, "step": 13742 }, { "epoch": 0.7695710605890917, "grad_norm": 1.5765928030014038, "learning_rate": 6.8675e-05, "loss": 0.4134, "step": 13743 }, { "epoch": 0.7696270579012208, "grad_norm": 1.4100160598754883, "learning_rate": 6.868e-05, "loss": 0.4122, "step": 13744 }, { "epoch": 0.7696830552133498, "grad_norm": 1.3476622104644775, "learning_rate": 6.8685e-05, "loss": 0.4397, "step": 13745 }, { "epoch": 0.7697390525254788, "grad_norm": 1.3707756996154785, "learning_rate": 6.869e-05, "loss": 0.4645, "step": 13746 }, { "epoch": 0.7697950498376078, "grad_norm": 1.6237640380859375, "learning_rate": 6.869499999999999e-05, "loss": 0.5476, "step": 13747 }, { "epoch": 0.7698510471497368, "grad_norm": 1.3181239366531372, "learning_rate": 6.87e-05, "loss": 0.3942, "step": 13748 }, { "epoch": 0.7699070444618659, "grad_norm": 1.353237509727478, "learning_rate": 6.870500000000001e-05, "loss": 0.4023, "step": 13749 }, { "epoch": 0.7699630417739949, "grad_norm": 1.4873833656311035, "learning_rate": 6.871000000000001e-05, "loss": 0.4376, "step": 13750 }, { "epoch": 0.7700190390861239, "grad_norm": 1.360853672027588, "learning_rate": 6.871500000000001e-05, "loss": 0.4512, "step": 13751 }, { "epoch": 0.7700750363982529, "grad_norm": 1.18434476852417, "learning_rate": 6.872e-05, "loss": 0.3869, "step": 13752 }, { "epoch": 0.7701310337103819, "grad_norm": 1.1423625946044922, "learning_rate": 6.8725e-05, "loss": 0.3605, "step": 13753 }, { "epoch": 0.770187031022511, "grad_norm": 1.325973391532898, "learning_rate": 6.873e-05, "loss": 0.2949, "step": 13754 }, { "epoch": 0.77024302833464, "grad_norm": 1.4096765518188477, "learning_rate": 6.8735e-05, "loss": 0.3872, "step": 13755 }, { "epoch": 0.770299025646769, "grad_norm": 1.3526920080184937, "learning_rate": 6.874000000000001e-05, "loss": 0.4204, "step": 13756 }, { "epoch": 0.770355022958898, "grad_norm": 1.1253491640090942, "learning_rate": 6.8745e-05, "loss": 0.3993, "step": 13757 }, { "epoch": 0.770411020271027, "grad_norm": 1.1818180084228516, "learning_rate": 6.875e-05, "loss": 0.4489, "step": 13758 }, { "epoch": 0.770467017583156, "grad_norm": 1.7405974864959717, "learning_rate": 6.8755e-05, "loss": 0.5501, "step": 13759 }, { "epoch": 0.7705230148952851, "grad_norm": 1.3856478929519653, "learning_rate": 6.876e-05, "loss": 0.6071, "step": 13760 }, { "epoch": 0.7705790122074141, "grad_norm": 1.6659204959869385, "learning_rate": 6.8765e-05, "loss": 0.6156, "step": 13761 }, { "epoch": 0.7706350095195431, "grad_norm": 1.35427987575531, "learning_rate": 6.877000000000001e-05, "loss": 0.4552, "step": 13762 }, { "epoch": 0.7706910068316721, "grad_norm": 1.2575783729553223, "learning_rate": 6.8775e-05, "loss": 0.423, "step": 13763 }, { "epoch": 0.7707470041438012, "grad_norm": 1.2999074459075928, "learning_rate": 6.878e-05, "loss": 0.5113, "step": 13764 }, { "epoch": 0.7708030014559302, "grad_norm": 1.3737356662750244, "learning_rate": 6.8785e-05, "loss": 0.4653, "step": 13765 }, { "epoch": 0.7708589987680591, "grad_norm": 1.4483639001846313, "learning_rate": 6.879e-05, "loss": 0.5, "step": 13766 }, { "epoch": 0.7709149960801881, "grad_norm": 1.2880616188049316, "learning_rate": 6.8795e-05, "loss": 0.3674, "step": 13767 }, { "epoch": 0.7709709933923171, "grad_norm": 1.4078574180603027, "learning_rate": 6.879999999999999e-05, "loss": 0.5425, "step": 13768 }, { "epoch": 0.7710269907044461, "grad_norm": 1.2571057081222534, "learning_rate": 6.8805e-05, "loss": 0.3848, "step": 13769 }, { "epoch": 0.7710829880165752, "grad_norm": 1.589552640914917, "learning_rate": 6.881000000000001e-05, "loss": 0.6412, "step": 13770 }, { "epoch": 0.7711389853287042, "grad_norm": 1.9987590312957764, "learning_rate": 6.881500000000001e-05, "loss": 0.5261, "step": 13771 }, { "epoch": 0.7711949826408332, "grad_norm": 1.2986613512039185, "learning_rate": 6.882000000000001e-05, "loss": 0.4911, "step": 13772 }, { "epoch": 0.7712509799529622, "grad_norm": 1.2847800254821777, "learning_rate": 6.8825e-05, "loss": 0.4441, "step": 13773 }, { "epoch": 0.7713069772650912, "grad_norm": 1.485494613647461, "learning_rate": 6.883e-05, "loss": 0.6333, "step": 13774 }, { "epoch": 0.7713629745772203, "grad_norm": 1.34844970703125, "learning_rate": 6.8835e-05, "loss": 0.3288, "step": 13775 }, { "epoch": 0.7714189718893493, "grad_norm": 1.1112004518508911, "learning_rate": 6.884e-05, "loss": 0.3564, "step": 13776 }, { "epoch": 0.7714749692014783, "grad_norm": 1.1809488534927368, "learning_rate": 6.884500000000001e-05, "loss": 0.3969, "step": 13777 }, { "epoch": 0.7715309665136073, "grad_norm": 1.8307031393051147, "learning_rate": 6.885e-05, "loss": 0.4789, "step": 13778 }, { "epoch": 0.7715869638257363, "grad_norm": 1.2297064065933228, "learning_rate": 6.8855e-05, "loss": 0.4582, "step": 13779 }, { "epoch": 0.7716429611378653, "grad_norm": 1.400884747505188, "learning_rate": 6.886e-05, "loss": 0.4653, "step": 13780 }, { "epoch": 0.7716989584499944, "grad_norm": 1.321446180343628, "learning_rate": 6.8865e-05, "loss": 0.3972, "step": 13781 }, { "epoch": 0.7717549557621234, "grad_norm": 1.7086085081100464, "learning_rate": 6.887e-05, "loss": 0.6464, "step": 13782 }, { "epoch": 0.7718109530742524, "grad_norm": 1.319392204284668, "learning_rate": 6.887500000000001e-05, "loss": 0.3759, "step": 13783 }, { "epoch": 0.7718669503863814, "grad_norm": 1.6760494709014893, "learning_rate": 6.888e-05, "loss": 0.5043, "step": 13784 }, { "epoch": 0.7719229476985104, "grad_norm": 1.1474812030792236, "learning_rate": 6.8885e-05, "loss": 0.3287, "step": 13785 }, { "epoch": 0.7719789450106395, "grad_norm": 1.2620575428009033, "learning_rate": 6.889e-05, "loss": 0.4179, "step": 13786 }, { "epoch": 0.7720349423227685, "grad_norm": 1.1926451921463013, "learning_rate": 6.8895e-05, "loss": 0.3944, "step": 13787 }, { "epoch": 0.7720909396348975, "grad_norm": 1.2984777688980103, "learning_rate": 6.89e-05, "loss": 0.4094, "step": 13788 }, { "epoch": 0.7721469369470265, "grad_norm": 1.5784839391708374, "learning_rate": 6.8905e-05, "loss": 0.5894, "step": 13789 }, { "epoch": 0.7722029342591555, "grad_norm": 1.2304500341415405, "learning_rate": 6.891e-05, "loss": 0.3704, "step": 13790 }, { "epoch": 0.7722589315712846, "grad_norm": 1.6105252504348755, "learning_rate": 6.891500000000001e-05, "loss": 0.5236, "step": 13791 }, { "epoch": 0.7723149288834136, "grad_norm": 1.457632303237915, "learning_rate": 6.892000000000001e-05, "loss": 0.4524, "step": 13792 }, { "epoch": 0.7723709261955426, "grad_norm": 1.252092719078064, "learning_rate": 6.892500000000001e-05, "loss": 0.382, "step": 13793 }, { "epoch": 0.7724269235076716, "grad_norm": 1.1379681825637817, "learning_rate": 6.893e-05, "loss": 0.3817, "step": 13794 }, { "epoch": 0.7724829208198006, "grad_norm": 1.3736088275909424, "learning_rate": 6.8935e-05, "loss": 0.4011, "step": 13795 }, { "epoch": 0.7725389181319297, "grad_norm": 1.4902105331420898, "learning_rate": 6.894e-05, "loss": 0.4807, "step": 13796 }, { "epoch": 0.7725949154440587, "grad_norm": 1.2289371490478516, "learning_rate": 6.8945e-05, "loss": 0.3919, "step": 13797 }, { "epoch": 0.7726509127561877, "grad_norm": 1.515181541442871, "learning_rate": 6.895000000000001e-05, "loss": 0.4261, "step": 13798 }, { "epoch": 0.7727069100683167, "grad_norm": 1.2557302713394165, "learning_rate": 6.8955e-05, "loss": 0.518, "step": 13799 }, { "epoch": 0.7727629073804457, "grad_norm": 1.485304832458496, "learning_rate": 6.896e-05, "loss": 0.4222, "step": 13800 }, { "epoch": 0.7728189046925747, "grad_norm": 1.253737211227417, "learning_rate": 6.8965e-05, "loss": 0.4721, "step": 13801 }, { "epoch": 0.7728749020047038, "grad_norm": 1.5144121646881104, "learning_rate": 6.897e-05, "loss": 0.4277, "step": 13802 }, { "epoch": 0.7729308993168328, "grad_norm": 1.363663673400879, "learning_rate": 6.8975e-05, "loss": 0.3993, "step": 13803 }, { "epoch": 0.7729868966289618, "grad_norm": 1.209930181503296, "learning_rate": 6.898e-05, "loss": 0.3794, "step": 13804 }, { "epoch": 0.7730428939410908, "grad_norm": 1.2857134342193604, "learning_rate": 6.8985e-05, "loss": 0.4527, "step": 13805 }, { "epoch": 0.7730988912532198, "grad_norm": 2.3742313385009766, "learning_rate": 6.899e-05, "loss": 0.5023, "step": 13806 }, { "epoch": 0.7731548885653489, "grad_norm": 1.337531328201294, "learning_rate": 6.8995e-05, "loss": 0.4598, "step": 13807 }, { "epoch": 0.7732108858774779, "grad_norm": 1.3381874561309814, "learning_rate": 6.9e-05, "loss": 0.452, "step": 13808 }, { "epoch": 0.7732668831896069, "grad_norm": 1.3037018775939941, "learning_rate": 6.900500000000001e-05, "loss": 0.3577, "step": 13809 }, { "epoch": 0.7733228805017359, "grad_norm": 1.2436840534210205, "learning_rate": 6.901e-05, "loss": 0.3838, "step": 13810 }, { "epoch": 0.7733788778138649, "grad_norm": 1.198750376701355, "learning_rate": 6.9015e-05, "loss": 0.3625, "step": 13811 }, { "epoch": 0.773434875125994, "grad_norm": 1.8446670770645142, "learning_rate": 6.902000000000001e-05, "loss": 0.4405, "step": 13812 }, { "epoch": 0.773490872438123, "grad_norm": 1.359084963798523, "learning_rate": 6.902500000000001e-05, "loss": 0.4634, "step": 13813 }, { "epoch": 0.773546869750252, "grad_norm": 1.36271333694458, "learning_rate": 6.903000000000001e-05, "loss": 0.5752, "step": 13814 }, { "epoch": 0.773602867062381, "grad_norm": 1.6614081859588623, "learning_rate": 6.9035e-05, "loss": 0.51, "step": 13815 }, { "epoch": 0.77365886437451, "grad_norm": 1.3628937005996704, "learning_rate": 6.904e-05, "loss": 0.4572, "step": 13816 }, { "epoch": 0.773714861686639, "grad_norm": 1.6391777992248535, "learning_rate": 6.9045e-05, "loss": 0.6078, "step": 13817 }, { "epoch": 0.7737708589987681, "grad_norm": 1.3059842586517334, "learning_rate": 6.905e-05, "loss": 0.3356, "step": 13818 }, { "epoch": 0.7738268563108971, "grad_norm": 1.2347959280014038, "learning_rate": 6.905500000000001e-05, "loss": 0.3631, "step": 13819 }, { "epoch": 0.7738828536230261, "grad_norm": 1.3058514595031738, "learning_rate": 6.906e-05, "loss": 0.4657, "step": 13820 }, { "epoch": 0.7739388509351551, "grad_norm": 1.529215693473816, "learning_rate": 6.9065e-05, "loss": 0.4599, "step": 13821 }, { "epoch": 0.7739948482472842, "grad_norm": 1.2192203998565674, "learning_rate": 6.907e-05, "loss": 0.4931, "step": 13822 }, { "epoch": 0.7740508455594132, "grad_norm": 1.276862621307373, "learning_rate": 6.9075e-05, "loss": 0.5643, "step": 13823 }, { "epoch": 0.7741068428715422, "grad_norm": 1.3896090984344482, "learning_rate": 6.908e-05, "loss": 0.4071, "step": 13824 }, { "epoch": 0.7741628401836712, "grad_norm": 1.2592624425888062, "learning_rate": 6.9085e-05, "loss": 0.4726, "step": 13825 }, { "epoch": 0.7742188374958002, "grad_norm": 1.6384234428405762, "learning_rate": 6.909e-05, "loss": 0.4709, "step": 13826 }, { "epoch": 0.7742748348079292, "grad_norm": 1.4587829113006592, "learning_rate": 6.9095e-05, "loss": 0.3398, "step": 13827 }, { "epoch": 0.7743308321200583, "grad_norm": 1.1166255474090576, "learning_rate": 6.91e-05, "loss": 0.402, "step": 13828 }, { "epoch": 0.7743868294321873, "grad_norm": 1.5124624967575073, "learning_rate": 6.910500000000001e-05, "loss": 0.4448, "step": 13829 }, { "epoch": 0.7744428267443163, "grad_norm": 1.1770051717758179, "learning_rate": 6.911000000000001e-05, "loss": 0.3488, "step": 13830 }, { "epoch": 0.7744988240564453, "grad_norm": 1.3130956888198853, "learning_rate": 6.9115e-05, "loss": 0.4856, "step": 13831 }, { "epoch": 0.7745548213685743, "grad_norm": 1.3943486213684082, "learning_rate": 6.912e-05, "loss": 0.4011, "step": 13832 }, { "epoch": 0.7746108186807034, "grad_norm": 1.4471852779388428, "learning_rate": 6.9125e-05, "loss": 0.3976, "step": 13833 }, { "epoch": 0.7746668159928324, "grad_norm": 1.2625128030776978, "learning_rate": 6.913000000000001e-05, "loss": 0.4263, "step": 13834 }, { "epoch": 0.7747228133049614, "grad_norm": 1.5544123649597168, "learning_rate": 6.913500000000001e-05, "loss": 0.4449, "step": 13835 }, { "epoch": 0.7747788106170904, "grad_norm": 1.125077247619629, "learning_rate": 6.914e-05, "loss": 0.4424, "step": 13836 }, { "epoch": 0.7748348079292194, "grad_norm": 1.201973557472229, "learning_rate": 6.9145e-05, "loss": 0.4998, "step": 13837 }, { "epoch": 0.7748908052413485, "grad_norm": 1.315532922744751, "learning_rate": 6.915e-05, "loss": 0.3769, "step": 13838 }, { "epoch": 0.7749468025534775, "grad_norm": 2.161365032196045, "learning_rate": 6.9155e-05, "loss": 0.4246, "step": 13839 }, { "epoch": 0.7750027998656065, "grad_norm": 1.1771349906921387, "learning_rate": 6.916000000000001e-05, "loss": 0.4635, "step": 13840 }, { "epoch": 0.7750587971777355, "grad_norm": 1.7514574527740479, "learning_rate": 6.9165e-05, "loss": 0.5846, "step": 13841 }, { "epoch": 0.7751147944898645, "grad_norm": 1.6608920097351074, "learning_rate": 6.917e-05, "loss": 0.5855, "step": 13842 }, { "epoch": 0.7751707918019936, "grad_norm": 1.6512091159820557, "learning_rate": 6.9175e-05, "loss": 0.5509, "step": 13843 }, { "epoch": 0.7752267891141226, "grad_norm": 1.455511450767517, "learning_rate": 6.918e-05, "loss": 0.4465, "step": 13844 }, { "epoch": 0.7752827864262516, "grad_norm": 1.0359824895858765, "learning_rate": 6.9185e-05, "loss": 0.3096, "step": 13845 }, { "epoch": 0.7753387837383806, "grad_norm": 1.3309375047683716, "learning_rate": 6.918999999999999e-05, "loss": 0.4897, "step": 13846 }, { "epoch": 0.7753947810505096, "grad_norm": 1.2151447534561157, "learning_rate": 6.9195e-05, "loss": 0.4117, "step": 13847 }, { "epoch": 0.7754507783626386, "grad_norm": 1.316588282585144, "learning_rate": 6.92e-05, "loss": 0.3778, "step": 13848 }, { "epoch": 0.7755067756747676, "grad_norm": 1.5426836013793945, "learning_rate": 6.920500000000001e-05, "loss": 0.5271, "step": 13849 }, { "epoch": 0.7755627729868966, "grad_norm": 1.4366511106491089, "learning_rate": 6.921000000000001e-05, "loss": 0.4839, "step": 13850 }, { "epoch": 0.7756187702990256, "grad_norm": 1.266235113143921, "learning_rate": 6.921500000000001e-05, "loss": 0.3653, "step": 13851 }, { "epoch": 0.7756747676111546, "grad_norm": 1.1202263832092285, "learning_rate": 6.922e-05, "loss": 0.3637, "step": 13852 }, { "epoch": 0.7757307649232836, "grad_norm": 1.158772349357605, "learning_rate": 6.9225e-05, "loss": 0.3914, "step": 13853 }, { "epoch": 0.7757867622354127, "grad_norm": 1.7575604915618896, "learning_rate": 6.923e-05, "loss": 0.4897, "step": 13854 }, { "epoch": 0.7758427595475417, "grad_norm": 1.211358904838562, "learning_rate": 6.923500000000001e-05, "loss": 0.4437, "step": 13855 }, { "epoch": 0.7758987568596707, "grad_norm": 1.5842890739440918, "learning_rate": 6.924000000000001e-05, "loss": 0.6314, "step": 13856 }, { "epoch": 0.7759547541717997, "grad_norm": 1.2781901359558105, "learning_rate": 6.9245e-05, "loss": 0.4226, "step": 13857 }, { "epoch": 0.7760107514839287, "grad_norm": 1.6134580373764038, "learning_rate": 6.925e-05, "loss": 0.4942, "step": 13858 }, { "epoch": 0.7760667487960577, "grad_norm": 1.2906938791275024, "learning_rate": 6.9255e-05, "loss": 0.3801, "step": 13859 }, { "epoch": 0.7761227461081868, "grad_norm": 1.284714937210083, "learning_rate": 6.926e-05, "loss": 0.5016, "step": 13860 }, { "epoch": 0.7761787434203158, "grad_norm": 1.2326958179473877, "learning_rate": 6.926500000000001e-05, "loss": 0.4594, "step": 13861 }, { "epoch": 0.7762347407324448, "grad_norm": 1.385058045387268, "learning_rate": 6.927e-05, "loss": 0.4893, "step": 13862 }, { "epoch": 0.7762907380445738, "grad_norm": 1.1791129112243652, "learning_rate": 6.9275e-05, "loss": 0.4252, "step": 13863 }, { "epoch": 0.7763467353567028, "grad_norm": 1.4034109115600586, "learning_rate": 6.928e-05, "loss": 0.4095, "step": 13864 }, { "epoch": 0.7764027326688319, "grad_norm": 1.4608080387115479, "learning_rate": 6.9285e-05, "loss": 0.6999, "step": 13865 }, { "epoch": 0.7764587299809609, "grad_norm": 1.1830004453659058, "learning_rate": 6.929e-05, "loss": 0.5181, "step": 13866 }, { "epoch": 0.7765147272930899, "grad_norm": 1.135304570198059, "learning_rate": 6.929499999999999e-05, "loss": 0.4598, "step": 13867 }, { "epoch": 0.7765707246052189, "grad_norm": 1.295250415802002, "learning_rate": 6.93e-05, "loss": 0.3736, "step": 13868 }, { "epoch": 0.7766267219173479, "grad_norm": 1.9412102699279785, "learning_rate": 6.930500000000001e-05, "loss": 0.5801, "step": 13869 }, { "epoch": 0.776682719229477, "grad_norm": 1.6821688413619995, "learning_rate": 6.931000000000001e-05, "loss": 0.4209, "step": 13870 }, { "epoch": 0.776738716541606, "grad_norm": 1.1794018745422363, "learning_rate": 6.931500000000001e-05, "loss": 0.3961, "step": 13871 }, { "epoch": 0.776794713853735, "grad_norm": 1.363509178161621, "learning_rate": 6.932000000000001e-05, "loss": 0.4314, "step": 13872 }, { "epoch": 0.776850711165864, "grad_norm": 1.2927194833755493, "learning_rate": 6.9325e-05, "loss": 0.3952, "step": 13873 }, { "epoch": 0.776906708477993, "grad_norm": 1.4138803482055664, "learning_rate": 6.933e-05, "loss": 0.4139, "step": 13874 }, { "epoch": 0.776962705790122, "grad_norm": 1.3323150873184204, "learning_rate": 6.9335e-05, "loss": 0.4429, "step": 13875 }, { "epoch": 0.7770187031022511, "grad_norm": 1.705682635307312, "learning_rate": 6.934000000000001e-05, "loss": 0.4408, "step": 13876 }, { "epoch": 0.7770747004143801, "grad_norm": 1.4556496143341064, "learning_rate": 6.934500000000001e-05, "loss": 0.3778, "step": 13877 }, { "epoch": 0.7771306977265091, "grad_norm": 1.2827378511428833, "learning_rate": 6.935e-05, "loss": 0.4104, "step": 13878 }, { "epoch": 0.7771866950386381, "grad_norm": 1.4275895357131958, "learning_rate": 6.9355e-05, "loss": 0.4003, "step": 13879 }, { "epoch": 0.7772426923507672, "grad_norm": 1.1352436542510986, "learning_rate": 6.936e-05, "loss": 0.3701, "step": 13880 }, { "epoch": 0.7772986896628962, "grad_norm": 1.2699083089828491, "learning_rate": 6.9365e-05, "loss": 0.4275, "step": 13881 }, { "epoch": 0.7773546869750252, "grad_norm": 1.5980900526046753, "learning_rate": 6.937000000000001e-05, "loss": 0.3992, "step": 13882 }, { "epoch": 0.7774106842871542, "grad_norm": 1.665728211402893, "learning_rate": 6.9375e-05, "loss": 0.4682, "step": 13883 }, { "epoch": 0.7774666815992832, "grad_norm": 1.3462138175964355, "learning_rate": 6.938e-05, "loss": 0.4193, "step": 13884 }, { "epoch": 0.7775226789114122, "grad_norm": 0.9974067211151123, "learning_rate": 6.9385e-05, "loss": 0.3148, "step": 13885 }, { "epoch": 0.7775786762235413, "grad_norm": 1.3148261308670044, "learning_rate": 6.939e-05, "loss": 0.4683, "step": 13886 }, { "epoch": 0.7776346735356703, "grad_norm": 1.506017804145813, "learning_rate": 6.9395e-05, "loss": 0.5044, "step": 13887 }, { "epoch": 0.7776906708477993, "grad_norm": 1.2350032329559326, "learning_rate": 6.939999999999999e-05, "loss": 0.4377, "step": 13888 }, { "epoch": 0.7777466681599283, "grad_norm": 1.1359614133834839, "learning_rate": 6.9405e-05, "loss": 0.4154, "step": 13889 }, { "epoch": 0.7778026654720573, "grad_norm": 1.454512119293213, "learning_rate": 6.941000000000001e-05, "loss": 0.673, "step": 13890 }, { "epoch": 0.7778586627841864, "grad_norm": 1.3806427717208862, "learning_rate": 6.941500000000001e-05, "loss": 0.4112, "step": 13891 }, { "epoch": 0.7779146600963154, "grad_norm": 1.360846757888794, "learning_rate": 6.942000000000001e-05, "loss": 0.3939, "step": 13892 }, { "epoch": 0.7779706574084444, "grad_norm": 1.238250970840454, "learning_rate": 6.942500000000001e-05, "loss": 0.3914, "step": 13893 }, { "epoch": 0.7780266547205734, "grad_norm": 4.463929176330566, "learning_rate": 6.943e-05, "loss": 0.4305, "step": 13894 }, { "epoch": 0.7780826520327024, "grad_norm": 1.5247230529785156, "learning_rate": 6.9435e-05, "loss": 0.5265, "step": 13895 }, { "epoch": 0.7781386493448315, "grad_norm": 1.4591853618621826, "learning_rate": 6.944e-05, "loss": 0.4177, "step": 13896 }, { "epoch": 0.7781946466569605, "grad_norm": 1.3938111066818237, "learning_rate": 6.944500000000001e-05, "loss": 0.3971, "step": 13897 }, { "epoch": 0.7782506439690895, "grad_norm": 1.2990190982818604, "learning_rate": 6.945000000000001e-05, "loss": 0.4432, "step": 13898 }, { "epoch": 0.7783066412812185, "grad_norm": 1.4695848226547241, "learning_rate": 6.9455e-05, "loss": 0.4576, "step": 13899 }, { "epoch": 0.7783626385933475, "grad_norm": 1.2041194438934326, "learning_rate": 6.946e-05, "loss": 0.4222, "step": 13900 }, { "epoch": 0.7784186359054766, "grad_norm": 1.2224990129470825, "learning_rate": 6.9465e-05, "loss": 0.5282, "step": 13901 }, { "epoch": 0.7784746332176056, "grad_norm": 1.1354583501815796, "learning_rate": 6.947e-05, "loss": 0.3981, "step": 13902 }, { "epoch": 0.7785306305297346, "grad_norm": 1.738417387008667, "learning_rate": 6.9475e-05, "loss": 0.6282, "step": 13903 }, { "epoch": 0.7785866278418636, "grad_norm": 1.2167282104492188, "learning_rate": 6.948e-05, "loss": 0.3819, "step": 13904 }, { "epoch": 0.7786426251539926, "grad_norm": 1.361893653869629, "learning_rate": 6.9485e-05, "loss": 0.4069, "step": 13905 }, { "epoch": 0.7786986224661216, "grad_norm": 1.4380555152893066, "learning_rate": 6.949e-05, "loss": 0.4445, "step": 13906 }, { "epoch": 0.7787546197782507, "grad_norm": 1.24867582321167, "learning_rate": 6.9495e-05, "loss": 0.4167, "step": 13907 }, { "epoch": 0.7788106170903797, "grad_norm": 1.3571693897247314, "learning_rate": 6.95e-05, "loss": 0.4471, "step": 13908 }, { "epoch": 0.7788666144025087, "grad_norm": 1.3829060792922974, "learning_rate": 6.950499999999999e-05, "loss": 0.3551, "step": 13909 }, { "epoch": 0.7789226117146377, "grad_norm": 1.2228182554244995, "learning_rate": 6.951e-05, "loss": 0.5215, "step": 13910 }, { "epoch": 0.7789786090267667, "grad_norm": 1.6797186136245728, "learning_rate": 6.951500000000001e-05, "loss": 0.4942, "step": 13911 }, { "epoch": 0.7790346063388958, "grad_norm": 1.561423420906067, "learning_rate": 6.952000000000001e-05, "loss": 0.5206, "step": 13912 }, { "epoch": 0.7790906036510248, "grad_norm": 1.3722482919692993, "learning_rate": 6.952500000000001e-05, "loss": 0.4192, "step": 13913 }, { "epoch": 0.7791466009631538, "grad_norm": 1.2754452228546143, "learning_rate": 6.953000000000001e-05, "loss": 0.5925, "step": 13914 }, { "epoch": 0.7792025982752828, "grad_norm": 1.2323298454284668, "learning_rate": 6.9535e-05, "loss": 0.4066, "step": 13915 }, { "epoch": 0.7792585955874118, "grad_norm": 1.2997041940689087, "learning_rate": 6.954e-05, "loss": 0.4002, "step": 13916 }, { "epoch": 0.7793145928995409, "grad_norm": 1.1204379796981812, "learning_rate": 6.9545e-05, "loss": 0.3701, "step": 13917 }, { "epoch": 0.7793705902116699, "grad_norm": 1.3768155574798584, "learning_rate": 6.955000000000001e-05, "loss": 0.3913, "step": 13918 }, { "epoch": 0.7794265875237989, "grad_norm": 1.28208327293396, "learning_rate": 6.955500000000001e-05, "loss": 0.4146, "step": 13919 }, { "epoch": 0.7794825848359279, "grad_norm": 1.2160152196884155, "learning_rate": 6.956e-05, "loss": 0.397, "step": 13920 }, { "epoch": 0.7795385821480569, "grad_norm": 1.4384526014328003, "learning_rate": 6.9565e-05, "loss": 0.5679, "step": 13921 }, { "epoch": 0.779594579460186, "grad_norm": 1.3922152519226074, "learning_rate": 6.957e-05, "loss": 0.4086, "step": 13922 }, { "epoch": 0.779650576772315, "grad_norm": 1.5517241954803467, "learning_rate": 6.9575e-05, "loss": 0.4071, "step": 13923 }, { "epoch": 0.779706574084444, "grad_norm": 1.2017390727996826, "learning_rate": 6.958e-05, "loss": 0.4027, "step": 13924 }, { "epoch": 0.779762571396573, "grad_norm": 1.3809328079223633, "learning_rate": 6.9585e-05, "loss": 0.4484, "step": 13925 }, { "epoch": 0.779818568708702, "grad_norm": 1.2521413564682007, "learning_rate": 6.959e-05, "loss": 0.3402, "step": 13926 }, { "epoch": 0.779874566020831, "grad_norm": 1.704215407371521, "learning_rate": 6.9595e-05, "loss": 0.4422, "step": 13927 }, { "epoch": 0.7799305633329601, "grad_norm": 1.3313181400299072, "learning_rate": 6.96e-05, "loss": 0.4553, "step": 13928 }, { "epoch": 0.7799865606450891, "grad_norm": 1.4314274787902832, "learning_rate": 6.9605e-05, "loss": 0.3606, "step": 13929 }, { "epoch": 0.7800425579572181, "grad_norm": 1.0212676525115967, "learning_rate": 6.961e-05, "loss": 0.3565, "step": 13930 }, { "epoch": 0.780098555269347, "grad_norm": 1.203959345817566, "learning_rate": 6.9615e-05, "loss": 0.4249, "step": 13931 }, { "epoch": 0.780154552581476, "grad_norm": 1.0592491626739502, "learning_rate": 6.962e-05, "loss": 0.423, "step": 13932 }, { "epoch": 0.780210549893605, "grad_norm": 1.5868914127349854, "learning_rate": 6.962500000000001e-05, "loss": 0.348, "step": 13933 }, { "epoch": 0.7802665472057341, "grad_norm": 5.164170742034912, "learning_rate": 6.963000000000001e-05, "loss": 0.4902, "step": 13934 }, { "epoch": 0.7803225445178631, "grad_norm": 1.4760080575942993, "learning_rate": 6.9635e-05, "loss": 0.5056, "step": 13935 }, { "epoch": 0.7803785418299921, "grad_norm": 1.3616795539855957, "learning_rate": 6.964e-05, "loss": 0.372, "step": 13936 }, { "epoch": 0.7804345391421211, "grad_norm": 1.3553009033203125, "learning_rate": 6.9645e-05, "loss": 0.4697, "step": 13937 }, { "epoch": 0.7804905364542502, "grad_norm": 1.2548644542694092, "learning_rate": 6.965e-05, "loss": 0.4852, "step": 13938 }, { "epoch": 0.7805465337663792, "grad_norm": 1.1524333953857422, "learning_rate": 6.965500000000001e-05, "loss": 0.4164, "step": 13939 }, { "epoch": 0.7806025310785082, "grad_norm": 1.3469394445419312, "learning_rate": 6.966000000000001e-05, "loss": 0.3957, "step": 13940 }, { "epoch": 0.7806585283906372, "grad_norm": 1.2270981073379517, "learning_rate": 6.9665e-05, "loss": 0.4294, "step": 13941 }, { "epoch": 0.7807145257027662, "grad_norm": 1.1579169034957886, "learning_rate": 6.967e-05, "loss": 0.3281, "step": 13942 }, { "epoch": 0.7807705230148952, "grad_norm": 1.1557217836380005, "learning_rate": 6.9675e-05, "loss": 0.3436, "step": 13943 }, { "epoch": 0.7808265203270243, "grad_norm": 1.1874650716781616, "learning_rate": 6.968e-05, "loss": 0.3193, "step": 13944 }, { "epoch": 0.7808825176391533, "grad_norm": 1.2706297636032104, "learning_rate": 6.9685e-05, "loss": 0.5212, "step": 13945 }, { "epoch": 0.7809385149512823, "grad_norm": 1.314117431640625, "learning_rate": 6.969e-05, "loss": 0.4, "step": 13946 }, { "epoch": 0.7809945122634113, "grad_norm": 1.414960265159607, "learning_rate": 6.9695e-05, "loss": 0.4314, "step": 13947 }, { "epoch": 0.7810505095755403, "grad_norm": 1.3312770128250122, "learning_rate": 6.97e-05, "loss": 0.3932, "step": 13948 }, { "epoch": 0.7811065068876694, "grad_norm": 1.2104490995407104, "learning_rate": 6.9705e-05, "loss": 0.371, "step": 13949 }, { "epoch": 0.7811625041997984, "grad_norm": 1.268545389175415, "learning_rate": 6.971000000000001e-05, "loss": 0.4738, "step": 13950 }, { "epoch": 0.7812185015119274, "grad_norm": 1.1525225639343262, "learning_rate": 6.9715e-05, "loss": 0.4031, "step": 13951 }, { "epoch": 0.7812744988240564, "grad_norm": 1.3071669340133667, "learning_rate": 6.972e-05, "loss": 0.401, "step": 13952 }, { "epoch": 0.7813304961361854, "grad_norm": 1.194106936454773, "learning_rate": 6.9725e-05, "loss": 0.3375, "step": 13953 }, { "epoch": 0.7813864934483145, "grad_norm": 1.4018372297286987, "learning_rate": 6.973000000000001e-05, "loss": 0.4572, "step": 13954 }, { "epoch": 0.7814424907604435, "grad_norm": 1.109297275543213, "learning_rate": 6.973500000000001e-05, "loss": 0.4132, "step": 13955 }, { "epoch": 0.7814984880725725, "grad_norm": 1.2089378833770752, "learning_rate": 6.974e-05, "loss": 0.5602, "step": 13956 }, { "epoch": 0.7815544853847015, "grad_norm": 1.5532406568527222, "learning_rate": 6.9745e-05, "loss": 0.471, "step": 13957 }, { "epoch": 0.7816104826968305, "grad_norm": 1.2478727102279663, "learning_rate": 6.975e-05, "loss": 0.4428, "step": 13958 }, { "epoch": 0.7816664800089596, "grad_norm": 1.18569815158844, "learning_rate": 6.9755e-05, "loss": 0.3875, "step": 13959 }, { "epoch": 0.7817224773210886, "grad_norm": 1.3408461809158325, "learning_rate": 6.976000000000001e-05, "loss": 0.5038, "step": 13960 }, { "epoch": 0.7817784746332176, "grad_norm": 1.3232312202453613, "learning_rate": 6.976500000000001e-05, "loss": 0.4234, "step": 13961 }, { "epoch": 0.7818344719453466, "grad_norm": 1.516841173171997, "learning_rate": 6.977e-05, "loss": 0.6285, "step": 13962 }, { "epoch": 0.7818904692574756, "grad_norm": 1.223027229309082, "learning_rate": 6.9775e-05, "loss": 0.4015, "step": 13963 }, { "epoch": 0.7819464665696046, "grad_norm": 1.454139232635498, "learning_rate": 6.978e-05, "loss": 0.5237, "step": 13964 }, { "epoch": 0.7820024638817337, "grad_norm": 1.4948914051055908, "learning_rate": 6.9785e-05, "loss": 0.4537, "step": 13965 }, { "epoch": 0.7820584611938627, "grad_norm": 1.4777477979660034, "learning_rate": 6.979e-05, "loss": 0.4717, "step": 13966 }, { "epoch": 0.7821144585059917, "grad_norm": 1.4510791301727295, "learning_rate": 6.9795e-05, "loss": 0.5227, "step": 13967 }, { "epoch": 0.7821704558181207, "grad_norm": 1.7112095355987549, "learning_rate": 6.98e-05, "loss": 0.4873, "step": 13968 }, { "epoch": 0.7822264531302497, "grad_norm": 1.3013123273849487, "learning_rate": 6.9805e-05, "loss": 0.352, "step": 13969 }, { "epoch": 0.7822824504423788, "grad_norm": 1.1731970310211182, "learning_rate": 6.981000000000001e-05, "loss": 0.4815, "step": 13970 }, { "epoch": 0.7823384477545078, "grad_norm": 1.0791093111038208, "learning_rate": 6.981500000000001e-05, "loss": 0.3778, "step": 13971 }, { "epoch": 0.7823944450666368, "grad_norm": 1.2413989305496216, "learning_rate": 6.982e-05, "loss": 0.3655, "step": 13972 }, { "epoch": 0.7824504423787658, "grad_norm": 1.4981213808059692, "learning_rate": 6.9825e-05, "loss": 0.3967, "step": 13973 }, { "epoch": 0.7825064396908948, "grad_norm": 1.2649314403533936, "learning_rate": 6.983e-05, "loss": 0.4468, "step": 13974 }, { "epoch": 0.7825624370030239, "grad_norm": 1.3142908811569214, "learning_rate": 6.983500000000001e-05, "loss": 0.4962, "step": 13975 }, { "epoch": 0.7826184343151529, "grad_norm": 1.1367021799087524, "learning_rate": 6.984000000000001e-05, "loss": 0.3033, "step": 13976 }, { "epoch": 0.7826744316272819, "grad_norm": 1.2410575151443481, "learning_rate": 6.9845e-05, "loss": 0.4894, "step": 13977 }, { "epoch": 0.7827304289394109, "grad_norm": 1.424251675605774, "learning_rate": 6.985e-05, "loss": 0.365, "step": 13978 }, { "epoch": 0.7827864262515399, "grad_norm": 1.9061392545700073, "learning_rate": 6.9855e-05, "loss": 0.5065, "step": 13979 }, { "epoch": 0.782842423563669, "grad_norm": 1.4891753196716309, "learning_rate": 6.986e-05, "loss": 0.4166, "step": 13980 }, { "epoch": 0.782898420875798, "grad_norm": 1.5439541339874268, "learning_rate": 6.9865e-05, "loss": 0.4611, "step": 13981 }, { "epoch": 0.782954418187927, "grad_norm": 1.2894482612609863, "learning_rate": 6.987000000000001e-05, "loss": 0.4427, "step": 13982 }, { "epoch": 0.783010415500056, "grad_norm": 1.278134822845459, "learning_rate": 6.9875e-05, "loss": 0.5133, "step": 13983 }, { "epoch": 0.783066412812185, "grad_norm": 1.510360598564148, "learning_rate": 6.988e-05, "loss": 0.4293, "step": 13984 }, { "epoch": 0.783122410124314, "grad_norm": 1.4213038682937622, "learning_rate": 6.9885e-05, "loss": 0.4431, "step": 13985 }, { "epoch": 0.7831784074364431, "grad_norm": 1.2273017168045044, "learning_rate": 6.989e-05, "loss": 0.4495, "step": 13986 }, { "epoch": 0.7832344047485721, "grad_norm": 1.3446868658065796, "learning_rate": 6.9895e-05, "loss": 0.4086, "step": 13987 }, { "epoch": 0.7832904020607011, "grad_norm": 1.4655638933181763, "learning_rate": 6.99e-05, "loss": 0.4838, "step": 13988 }, { "epoch": 0.7833463993728301, "grad_norm": 1.3987774848937988, "learning_rate": 6.9905e-05, "loss": 0.493, "step": 13989 }, { "epoch": 0.7834023966849591, "grad_norm": 1.6490641832351685, "learning_rate": 6.991000000000001e-05, "loss": 0.5507, "step": 13990 }, { "epoch": 0.7834583939970882, "grad_norm": 1.5870490074157715, "learning_rate": 6.991500000000001e-05, "loss": 0.5309, "step": 13991 }, { "epoch": 0.7835143913092172, "grad_norm": 1.0920445919036865, "learning_rate": 6.992000000000001e-05, "loss": 0.4331, "step": 13992 }, { "epoch": 0.7835703886213462, "grad_norm": 1.8054163455963135, "learning_rate": 6.9925e-05, "loss": 0.3573, "step": 13993 }, { "epoch": 0.7836263859334752, "grad_norm": 1.3015691041946411, "learning_rate": 6.993e-05, "loss": 0.5052, "step": 13994 }, { "epoch": 0.7836823832456042, "grad_norm": 1.448464035987854, "learning_rate": 6.9935e-05, "loss": 0.3574, "step": 13995 }, { "epoch": 0.7837383805577333, "grad_norm": 1.2324483394622803, "learning_rate": 6.994000000000001e-05, "loss": 0.4016, "step": 13996 }, { "epoch": 0.7837943778698623, "grad_norm": 1.290958046913147, "learning_rate": 6.994500000000001e-05, "loss": 0.4793, "step": 13997 }, { "epoch": 0.7838503751819913, "grad_norm": 1.1485828161239624, "learning_rate": 6.995e-05, "loss": 0.3982, "step": 13998 }, { "epoch": 0.7839063724941203, "grad_norm": 1.4403445720672607, "learning_rate": 6.9955e-05, "loss": 0.3792, "step": 13999 }, { "epoch": 0.7839623698062493, "grad_norm": 1.1863340139389038, "learning_rate": 6.996e-05, "loss": 0.4099, "step": 14000 }, { "epoch": 0.7840183671183784, "grad_norm": 1.294132113456726, "learning_rate": 6.9965e-05, "loss": 0.4635, "step": 14001 }, { "epoch": 0.7840743644305074, "grad_norm": 1.403907060623169, "learning_rate": 6.997e-05, "loss": 0.4105, "step": 14002 }, { "epoch": 0.7841303617426364, "grad_norm": 1.434434413909912, "learning_rate": 6.997500000000001e-05, "loss": 0.4805, "step": 14003 }, { "epoch": 0.7841863590547654, "grad_norm": 1.1685349941253662, "learning_rate": 6.998e-05, "loss": 0.5337, "step": 14004 }, { "epoch": 0.7842423563668944, "grad_norm": 2.138056516647339, "learning_rate": 6.9985e-05, "loss": 0.4324, "step": 14005 }, { "epoch": 0.7842983536790235, "grad_norm": 1.5927270650863647, "learning_rate": 6.999e-05, "loss": 0.4646, "step": 14006 }, { "epoch": 0.7843543509911525, "grad_norm": 2.062490463256836, "learning_rate": 6.9995e-05, "loss": 0.6349, "step": 14007 }, { "epoch": 0.7844103483032815, "grad_norm": 1.1932083368301392, "learning_rate": 7e-05, "loss": 0.3989, "step": 14008 }, { "epoch": 0.7844663456154105, "grad_norm": 1.3754913806915283, "learning_rate": 7.0005e-05, "loss": 0.4343, "step": 14009 }, { "epoch": 0.7845223429275395, "grad_norm": 1.5349290370941162, "learning_rate": 7.001e-05, "loss": 0.4089, "step": 14010 }, { "epoch": 0.7845783402396685, "grad_norm": 1.2927325963974, "learning_rate": 7.001500000000001e-05, "loss": 0.5192, "step": 14011 }, { "epoch": 0.7846343375517976, "grad_norm": 2.542348861694336, "learning_rate": 7.002000000000001e-05, "loss": 0.5494, "step": 14012 }, { "epoch": 0.7846903348639266, "grad_norm": 1.211527943611145, "learning_rate": 7.002500000000001e-05, "loss": 0.3993, "step": 14013 }, { "epoch": 0.7847463321760555, "grad_norm": 1.3985072374343872, "learning_rate": 7.003e-05, "loss": 0.4081, "step": 14014 }, { "epoch": 0.7848023294881845, "grad_norm": 1.479193091392517, "learning_rate": 7.0035e-05, "loss": 0.569, "step": 14015 }, { "epoch": 0.7848583268003135, "grad_norm": 1.100593090057373, "learning_rate": 7.004e-05, "loss": 0.3533, "step": 14016 }, { "epoch": 0.7849143241124426, "grad_norm": 1.3223495483398438, "learning_rate": 7.004500000000001e-05, "loss": 0.3749, "step": 14017 }, { "epoch": 0.7849703214245716, "grad_norm": 1.162502408027649, "learning_rate": 7.005000000000001e-05, "loss": 0.4615, "step": 14018 }, { "epoch": 0.7850263187367006, "grad_norm": 1.402768611907959, "learning_rate": 7.0055e-05, "loss": 0.5245, "step": 14019 }, { "epoch": 0.7850823160488296, "grad_norm": 1.5840893983840942, "learning_rate": 7.006e-05, "loss": 0.5788, "step": 14020 }, { "epoch": 0.7851383133609586, "grad_norm": 1.3359031677246094, "learning_rate": 7.0065e-05, "loss": 0.4687, "step": 14021 }, { "epoch": 0.7851943106730876, "grad_norm": 1.5367978811264038, "learning_rate": 7.007e-05, "loss": 0.5541, "step": 14022 }, { "epoch": 0.7852503079852167, "grad_norm": 1.2812166213989258, "learning_rate": 7.0075e-05, "loss": 0.4377, "step": 14023 }, { "epoch": 0.7853063052973457, "grad_norm": 1.3251447677612305, "learning_rate": 7.008e-05, "loss": 0.405, "step": 14024 }, { "epoch": 0.7853623026094747, "grad_norm": 1.500702142715454, "learning_rate": 7.0085e-05, "loss": 0.5783, "step": 14025 }, { "epoch": 0.7854182999216037, "grad_norm": 1.5428801774978638, "learning_rate": 7.009e-05, "loss": 0.588, "step": 14026 }, { "epoch": 0.7854742972337327, "grad_norm": 1.34929358959198, "learning_rate": 7.0095e-05, "loss": 0.5225, "step": 14027 }, { "epoch": 0.7855302945458618, "grad_norm": 1.4423037767410278, "learning_rate": 7.01e-05, "loss": 0.522, "step": 14028 }, { "epoch": 0.7855862918579908, "grad_norm": 1.3482623100280762, "learning_rate": 7.0105e-05, "loss": 0.5341, "step": 14029 }, { "epoch": 0.7856422891701198, "grad_norm": 1.7001186609268188, "learning_rate": 7.011e-05, "loss": 0.5013, "step": 14030 }, { "epoch": 0.7856982864822488, "grad_norm": 1.2402490377426147, "learning_rate": 7.0115e-05, "loss": 0.5603, "step": 14031 }, { "epoch": 0.7857542837943778, "grad_norm": 1.2932721376419067, "learning_rate": 7.012000000000001e-05, "loss": 0.4235, "step": 14032 }, { "epoch": 0.7858102811065069, "grad_norm": 1.446073293685913, "learning_rate": 7.012500000000001e-05, "loss": 0.4654, "step": 14033 }, { "epoch": 0.7858662784186359, "grad_norm": 1.7081900835037231, "learning_rate": 7.013000000000001e-05, "loss": 0.4237, "step": 14034 }, { "epoch": 0.7859222757307649, "grad_norm": 1.3166462182998657, "learning_rate": 7.0135e-05, "loss": 0.4698, "step": 14035 }, { "epoch": 0.7859782730428939, "grad_norm": 1.2614250183105469, "learning_rate": 7.014e-05, "loss": 0.4302, "step": 14036 }, { "epoch": 0.7860342703550229, "grad_norm": 1.2707799673080444, "learning_rate": 7.0145e-05, "loss": 0.4416, "step": 14037 }, { "epoch": 0.786090267667152, "grad_norm": 1.3705872297286987, "learning_rate": 7.015000000000001e-05, "loss": 0.429, "step": 14038 }, { "epoch": 0.786146264979281, "grad_norm": 1.3415343761444092, "learning_rate": 7.015500000000001e-05, "loss": 0.5315, "step": 14039 }, { "epoch": 0.78620226229141, "grad_norm": 1.2904845476150513, "learning_rate": 7.016e-05, "loss": 0.4053, "step": 14040 }, { "epoch": 0.786258259603539, "grad_norm": 1.431028127670288, "learning_rate": 7.0165e-05, "loss": 0.5252, "step": 14041 }, { "epoch": 0.786314256915668, "grad_norm": 9.211777687072754, "learning_rate": 7.017e-05, "loss": 0.4757, "step": 14042 }, { "epoch": 0.786370254227797, "grad_norm": 1.9496930837631226, "learning_rate": 7.0175e-05, "loss": 0.6311, "step": 14043 }, { "epoch": 0.7864262515399261, "grad_norm": 1.450932502746582, "learning_rate": 7.018e-05, "loss": 0.712, "step": 14044 }, { "epoch": 0.7864822488520551, "grad_norm": 1.3744438886642456, "learning_rate": 7.0185e-05, "loss": 0.4701, "step": 14045 }, { "epoch": 0.7865382461641841, "grad_norm": 1.468982219696045, "learning_rate": 7.019e-05, "loss": 0.508, "step": 14046 }, { "epoch": 0.7865942434763131, "grad_norm": 1.406665563583374, "learning_rate": 7.0195e-05, "loss": 0.4218, "step": 14047 }, { "epoch": 0.7866502407884421, "grad_norm": 1.1718058586120605, "learning_rate": 7.02e-05, "loss": 0.3639, "step": 14048 }, { "epoch": 0.7867062381005712, "grad_norm": 1.7525266408920288, "learning_rate": 7.0205e-05, "loss": 0.4124, "step": 14049 }, { "epoch": 0.7867622354127002, "grad_norm": 1.2831170558929443, "learning_rate": 7.021e-05, "loss": 0.4299, "step": 14050 }, { "epoch": 0.7868182327248292, "grad_norm": 1.143329381942749, "learning_rate": 7.0215e-05, "loss": 0.386, "step": 14051 }, { "epoch": 0.7868742300369582, "grad_norm": 1.5027421712875366, "learning_rate": 7.022e-05, "loss": 0.4731, "step": 14052 }, { "epoch": 0.7869302273490872, "grad_norm": 1.4526931047439575, "learning_rate": 7.022500000000001e-05, "loss": 0.5402, "step": 14053 }, { "epoch": 0.7869862246612163, "grad_norm": 1.1389609575271606, "learning_rate": 7.023000000000001e-05, "loss": 0.3371, "step": 14054 }, { "epoch": 0.7870422219733453, "grad_norm": 1.5279737710952759, "learning_rate": 7.023500000000001e-05, "loss": 0.408, "step": 14055 }, { "epoch": 0.7870982192854743, "grad_norm": 1.5773791074752808, "learning_rate": 7.024e-05, "loss": 0.5432, "step": 14056 }, { "epoch": 0.7871542165976033, "grad_norm": 2.4261014461517334, "learning_rate": 7.0245e-05, "loss": 0.471, "step": 14057 }, { "epoch": 0.7872102139097323, "grad_norm": 1.2080491781234741, "learning_rate": 7.025e-05, "loss": 0.3309, "step": 14058 }, { "epoch": 0.7872662112218614, "grad_norm": 1.195871353149414, "learning_rate": 7.025500000000001e-05, "loss": 0.4542, "step": 14059 }, { "epoch": 0.7873222085339904, "grad_norm": 1.3698365688323975, "learning_rate": 7.026000000000001e-05, "loss": 0.5356, "step": 14060 }, { "epoch": 0.7873782058461194, "grad_norm": 1.4013510942459106, "learning_rate": 7.0265e-05, "loss": 0.5224, "step": 14061 }, { "epoch": 0.7874342031582484, "grad_norm": 1.2595869302749634, "learning_rate": 7.027e-05, "loss": 0.5053, "step": 14062 }, { "epoch": 0.7874902004703774, "grad_norm": 1.0256685018539429, "learning_rate": 7.0275e-05, "loss": 0.4091, "step": 14063 }, { "epoch": 0.7875461977825065, "grad_norm": 1.2182501554489136, "learning_rate": 7.028e-05, "loss": 0.4386, "step": 14064 }, { "epoch": 0.7876021950946355, "grad_norm": 1.3985791206359863, "learning_rate": 7.0285e-05, "loss": 0.4712, "step": 14065 }, { "epoch": 0.7876581924067645, "grad_norm": 1.2584521770477295, "learning_rate": 7.029e-05, "loss": 0.4653, "step": 14066 }, { "epoch": 0.7877141897188935, "grad_norm": 1.3298203945159912, "learning_rate": 7.0295e-05, "loss": 0.576, "step": 14067 }, { "epoch": 0.7877701870310225, "grad_norm": 1.344087839126587, "learning_rate": 7.03e-05, "loss": 0.442, "step": 14068 }, { "epoch": 0.7878261843431515, "grad_norm": 1.3564358949661255, "learning_rate": 7.0305e-05, "loss": 0.449, "step": 14069 }, { "epoch": 0.7878821816552806, "grad_norm": 1.4164124727249146, "learning_rate": 7.031e-05, "loss": 0.4238, "step": 14070 }, { "epoch": 0.7879381789674096, "grad_norm": 1.408128261566162, "learning_rate": 7.031500000000001e-05, "loss": 0.4914, "step": 14071 }, { "epoch": 0.7879941762795386, "grad_norm": 1.1966450214385986, "learning_rate": 7.032e-05, "loss": 0.3067, "step": 14072 }, { "epoch": 0.7880501735916676, "grad_norm": 1.1928439140319824, "learning_rate": 7.0325e-05, "loss": 0.3834, "step": 14073 }, { "epoch": 0.7881061709037966, "grad_norm": 1.3703701496124268, "learning_rate": 7.033000000000001e-05, "loss": 0.5714, "step": 14074 }, { "epoch": 0.7881621682159257, "grad_norm": 1.4749839305877686, "learning_rate": 7.033500000000001e-05, "loss": 0.4225, "step": 14075 }, { "epoch": 0.7882181655280547, "grad_norm": 1.267511010169983, "learning_rate": 7.034000000000001e-05, "loss": 0.511, "step": 14076 }, { "epoch": 0.7882741628401837, "grad_norm": 1.5715607404708862, "learning_rate": 7.0345e-05, "loss": 0.4602, "step": 14077 }, { "epoch": 0.7883301601523127, "grad_norm": 1.459873080253601, "learning_rate": 7.035e-05, "loss": 0.578, "step": 14078 }, { "epoch": 0.7883861574644417, "grad_norm": 1.2043129205703735, "learning_rate": 7.0355e-05, "loss": 0.3964, "step": 14079 }, { "epoch": 0.7884421547765708, "grad_norm": 1.171628713607788, "learning_rate": 7.036e-05, "loss": 0.4532, "step": 14080 }, { "epoch": 0.7884981520886998, "grad_norm": 1.5888665914535522, "learning_rate": 7.036500000000001e-05, "loss": 0.4802, "step": 14081 }, { "epoch": 0.7885541494008288, "grad_norm": 1.1802852153778076, "learning_rate": 7.037e-05, "loss": 0.3877, "step": 14082 }, { "epoch": 0.7886101467129578, "grad_norm": 1.3492436408996582, "learning_rate": 7.0375e-05, "loss": 0.4503, "step": 14083 }, { "epoch": 0.7886661440250868, "grad_norm": 3.586104393005371, "learning_rate": 7.038e-05, "loss": 0.3421, "step": 14084 }, { "epoch": 0.7887221413372159, "grad_norm": 1.3017324209213257, "learning_rate": 7.0385e-05, "loss": 0.367, "step": 14085 }, { "epoch": 0.7887781386493449, "grad_norm": 1.4720596075057983, "learning_rate": 7.039e-05, "loss": 0.47, "step": 14086 }, { "epoch": 0.7888341359614739, "grad_norm": 1.2844620943069458, "learning_rate": 7.0395e-05, "loss": 0.3905, "step": 14087 }, { "epoch": 0.7888901332736029, "grad_norm": 1.5005204677581787, "learning_rate": 7.04e-05, "loss": 0.6698, "step": 14088 }, { "epoch": 0.7889461305857319, "grad_norm": 1.1908706426620483, "learning_rate": 7.0405e-05, "loss": 0.4621, "step": 14089 }, { "epoch": 0.789002127897861, "grad_norm": 1.265588641166687, "learning_rate": 7.041e-05, "loss": 0.422, "step": 14090 }, { "epoch": 0.78905812520999, "grad_norm": 1.56885826587677, "learning_rate": 7.041500000000001e-05, "loss": 0.4852, "step": 14091 }, { "epoch": 0.789114122522119, "grad_norm": 1.1783219575881958, "learning_rate": 7.042000000000001e-05, "loss": 0.4621, "step": 14092 }, { "epoch": 0.789170119834248, "grad_norm": 1.3888453245162964, "learning_rate": 7.0425e-05, "loss": 0.3956, "step": 14093 }, { "epoch": 0.789226117146377, "grad_norm": 1.3621768951416016, "learning_rate": 7.043e-05, "loss": 0.6413, "step": 14094 }, { "epoch": 0.789282114458506, "grad_norm": 1.1520731449127197, "learning_rate": 7.043500000000001e-05, "loss": 0.353, "step": 14095 }, { "epoch": 0.7893381117706351, "grad_norm": 1.5501999855041504, "learning_rate": 7.044000000000001e-05, "loss": 0.5938, "step": 14096 }, { "epoch": 0.789394109082764, "grad_norm": 1.583893895149231, "learning_rate": 7.044500000000001e-05, "loss": 0.3894, "step": 14097 }, { "epoch": 0.789450106394893, "grad_norm": 1.5419281721115112, "learning_rate": 7.045e-05, "loss": 0.6311, "step": 14098 }, { "epoch": 0.789506103707022, "grad_norm": 1.2712949514389038, "learning_rate": 7.0455e-05, "loss": 0.5394, "step": 14099 }, { "epoch": 0.789562101019151, "grad_norm": 1.4368669986724854, "learning_rate": 7.046e-05, "loss": 0.4303, "step": 14100 }, { "epoch": 0.78961809833128, "grad_norm": 1.144955039024353, "learning_rate": 7.0465e-05, "loss": 0.4121, "step": 14101 }, { "epoch": 0.7896740956434091, "grad_norm": 7.278684139251709, "learning_rate": 7.047000000000001e-05, "loss": 0.3975, "step": 14102 }, { "epoch": 0.7897300929555381, "grad_norm": 1.2687106132507324, "learning_rate": 7.0475e-05, "loss": 0.3977, "step": 14103 }, { "epoch": 0.7897860902676671, "grad_norm": 1.5099725723266602, "learning_rate": 7.048e-05, "loss": 0.4205, "step": 14104 }, { "epoch": 0.7898420875797961, "grad_norm": 1.4072988033294678, "learning_rate": 7.0485e-05, "loss": 0.406, "step": 14105 }, { "epoch": 0.7898980848919251, "grad_norm": 1.2100664377212524, "learning_rate": 7.049e-05, "loss": 0.318, "step": 14106 }, { "epoch": 0.7899540822040542, "grad_norm": 1.2987401485443115, "learning_rate": 7.0495e-05, "loss": 0.4051, "step": 14107 }, { "epoch": 0.7900100795161832, "grad_norm": 1.3227951526641846, "learning_rate": 7.05e-05, "loss": 0.4309, "step": 14108 }, { "epoch": 0.7900660768283122, "grad_norm": 1.254512071609497, "learning_rate": 7.0505e-05, "loss": 0.4531, "step": 14109 }, { "epoch": 0.7901220741404412, "grad_norm": 1.2160489559173584, "learning_rate": 7.051e-05, "loss": 0.5015, "step": 14110 }, { "epoch": 0.7901780714525702, "grad_norm": 1.265352725982666, "learning_rate": 7.051500000000001e-05, "loss": 0.3967, "step": 14111 }, { "epoch": 0.7902340687646993, "grad_norm": 1.3767404556274414, "learning_rate": 7.052000000000001e-05, "loss": 0.4449, "step": 14112 }, { "epoch": 0.7902900660768283, "grad_norm": 1.5246636867523193, "learning_rate": 7.0525e-05, "loss": 0.5072, "step": 14113 }, { "epoch": 0.7903460633889573, "grad_norm": 1.4679063558578491, "learning_rate": 7.053e-05, "loss": 0.5503, "step": 14114 }, { "epoch": 0.7904020607010863, "grad_norm": 1.1133171319961548, "learning_rate": 7.0535e-05, "loss": 0.3749, "step": 14115 }, { "epoch": 0.7904580580132153, "grad_norm": 1.1903791427612305, "learning_rate": 7.054000000000001e-05, "loss": 0.4349, "step": 14116 }, { "epoch": 0.7905140553253444, "grad_norm": 1.286184310913086, "learning_rate": 7.054500000000001e-05, "loss": 0.3713, "step": 14117 }, { "epoch": 0.7905700526374734, "grad_norm": 1.1068525314331055, "learning_rate": 7.055000000000001e-05, "loss": 0.3936, "step": 14118 }, { "epoch": 0.7906260499496024, "grad_norm": 1.2849111557006836, "learning_rate": 7.0555e-05, "loss": 0.4914, "step": 14119 }, { "epoch": 0.7906820472617314, "grad_norm": 1.794370174407959, "learning_rate": 7.056e-05, "loss": 0.5837, "step": 14120 }, { "epoch": 0.7907380445738604, "grad_norm": 1.2527025938034058, "learning_rate": 7.0565e-05, "loss": 0.3589, "step": 14121 }, { "epoch": 0.7907940418859895, "grad_norm": 1.3123115301132202, "learning_rate": 7.057e-05, "loss": 0.4757, "step": 14122 }, { "epoch": 0.7908500391981185, "grad_norm": 1.499796986579895, "learning_rate": 7.057500000000001e-05, "loss": 0.5368, "step": 14123 }, { "epoch": 0.7909060365102475, "grad_norm": 1.1614887714385986, "learning_rate": 7.058e-05, "loss": 0.5316, "step": 14124 }, { "epoch": 0.7909620338223765, "grad_norm": 1.4116251468658447, "learning_rate": 7.0585e-05, "loss": 0.5434, "step": 14125 }, { "epoch": 0.7910180311345055, "grad_norm": 1.3411842584609985, "learning_rate": 7.059e-05, "loss": 0.407, "step": 14126 }, { "epoch": 0.7910740284466345, "grad_norm": 1.2646327018737793, "learning_rate": 7.0595e-05, "loss": 0.4557, "step": 14127 }, { "epoch": 0.7911300257587636, "grad_norm": 1.333034634590149, "learning_rate": 7.06e-05, "loss": 0.3658, "step": 14128 }, { "epoch": 0.7911860230708926, "grad_norm": 1.3084169626235962, "learning_rate": 7.060499999999999e-05, "loss": 0.4472, "step": 14129 }, { "epoch": 0.7912420203830216, "grad_norm": 1.3331811428070068, "learning_rate": 7.061e-05, "loss": 0.4248, "step": 14130 }, { "epoch": 0.7912980176951506, "grad_norm": 1.2928749322891235, "learning_rate": 7.061500000000001e-05, "loss": 0.3972, "step": 14131 }, { "epoch": 0.7913540150072796, "grad_norm": 1.325481653213501, "learning_rate": 7.062000000000001e-05, "loss": 0.3963, "step": 14132 }, { "epoch": 0.7914100123194087, "grad_norm": 1.6766866445541382, "learning_rate": 7.062500000000001e-05, "loss": 0.5129, "step": 14133 }, { "epoch": 0.7914660096315377, "grad_norm": 1.099776268005371, "learning_rate": 7.063e-05, "loss": 0.4562, "step": 14134 }, { "epoch": 0.7915220069436667, "grad_norm": 1.5759435892105103, "learning_rate": 7.0635e-05, "loss": 0.4211, "step": 14135 }, { "epoch": 0.7915780042557957, "grad_norm": 1.4366590976715088, "learning_rate": 7.064e-05, "loss": 0.5245, "step": 14136 }, { "epoch": 0.7916340015679247, "grad_norm": 1.5051610469818115, "learning_rate": 7.064500000000001e-05, "loss": 0.6648, "step": 14137 }, { "epoch": 0.7916899988800538, "grad_norm": 1.4565407037734985, "learning_rate": 7.065000000000001e-05, "loss": 0.4574, "step": 14138 }, { "epoch": 0.7917459961921828, "grad_norm": 1.1879507303237915, "learning_rate": 7.065500000000001e-05, "loss": 0.4324, "step": 14139 }, { "epoch": 0.7918019935043118, "grad_norm": 1.3431316614151, "learning_rate": 7.066e-05, "loss": 0.5131, "step": 14140 }, { "epoch": 0.7918579908164408, "grad_norm": 1.2967579364776611, "learning_rate": 7.0665e-05, "loss": 0.4321, "step": 14141 }, { "epoch": 0.7919139881285698, "grad_norm": 1.5854921340942383, "learning_rate": 7.067e-05, "loss": 0.5023, "step": 14142 }, { "epoch": 0.7919699854406989, "grad_norm": 1.1517387628555298, "learning_rate": 7.0675e-05, "loss": 0.3641, "step": 14143 }, { "epoch": 0.7920259827528279, "grad_norm": 1.1762385368347168, "learning_rate": 7.068000000000001e-05, "loss": 0.4396, "step": 14144 }, { "epoch": 0.7920819800649569, "grad_norm": 1.2819037437438965, "learning_rate": 7.0685e-05, "loss": 0.4133, "step": 14145 }, { "epoch": 0.7921379773770859, "grad_norm": 1.4618840217590332, "learning_rate": 7.069e-05, "loss": 0.3764, "step": 14146 }, { "epoch": 0.7921939746892149, "grad_norm": 1.475507140159607, "learning_rate": 7.0695e-05, "loss": 0.4936, "step": 14147 }, { "epoch": 0.792249972001344, "grad_norm": 1.2078789472579956, "learning_rate": 7.07e-05, "loss": 0.3813, "step": 14148 }, { "epoch": 0.792305969313473, "grad_norm": 1.1872726678848267, "learning_rate": 7.0705e-05, "loss": 0.3415, "step": 14149 }, { "epoch": 0.792361966625602, "grad_norm": 1.275092601776123, "learning_rate": 7.070999999999999e-05, "loss": 0.5457, "step": 14150 }, { "epoch": 0.792417963937731, "grad_norm": 1.3667881488800049, "learning_rate": 7.0715e-05, "loss": 0.5445, "step": 14151 }, { "epoch": 0.79247396124986, "grad_norm": 1.5850286483764648, "learning_rate": 7.072000000000001e-05, "loss": 0.7943, "step": 14152 }, { "epoch": 0.792529958561989, "grad_norm": 1.4312115907669067, "learning_rate": 7.072500000000001e-05, "loss": 0.4663, "step": 14153 }, { "epoch": 0.7925859558741181, "grad_norm": 1.304882526397705, "learning_rate": 7.073000000000001e-05, "loss": 0.4723, "step": 14154 }, { "epoch": 0.7926419531862471, "grad_norm": 1.721720576286316, "learning_rate": 7.0735e-05, "loss": 0.4387, "step": 14155 }, { "epoch": 0.7926979504983761, "grad_norm": 1.597560167312622, "learning_rate": 7.074e-05, "loss": 0.4141, "step": 14156 }, { "epoch": 0.7927539478105051, "grad_norm": 1.3952064514160156, "learning_rate": 7.0745e-05, "loss": 0.4149, "step": 14157 }, { "epoch": 0.7928099451226341, "grad_norm": 1.1094516515731812, "learning_rate": 7.075e-05, "loss": 0.318, "step": 14158 }, { "epoch": 0.7928659424347632, "grad_norm": 1.298769235610962, "learning_rate": 7.075500000000001e-05, "loss": 0.3279, "step": 14159 }, { "epoch": 0.7929219397468922, "grad_norm": 1.2508598566055298, "learning_rate": 7.076000000000001e-05, "loss": 0.401, "step": 14160 }, { "epoch": 0.7929779370590212, "grad_norm": 1.1670008897781372, "learning_rate": 7.0765e-05, "loss": 0.355, "step": 14161 }, { "epoch": 0.7930339343711502, "grad_norm": 1.7155007123947144, "learning_rate": 7.077e-05, "loss": 0.3922, "step": 14162 }, { "epoch": 0.7930899316832792, "grad_norm": 1.4616706371307373, "learning_rate": 7.0775e-05, "loss": 0.4847, "step": 14163 }, { "epoch": 0.7931459289954083, "grad_norm": 1.3160438537597656, "learning_rate": 7.078e-05, "loss": 0.4121, "step": 14164 }, { "epoch": 0.7932019263075373, "grad_norm": 1.2417327165603638, "learning_rate": 7.078500000000001e-05, "loss": 0.5756, "step": 14165 }, { "epoch": 0.7932579236196663, "grad_norm": 1.224461555480957, "learning_rate": 7.079e-05, "loss": 0.451, "step": 14166 }, { "epoch": 0.7933139209317953, "grad_norm": 1.3468503952026367, "learning_rate": 7.0795e-05, "loss": 0.473, "step": 14167 }, { "epoch": 0.7933699182439243, "grad_norm": 1.8887944221496582, "learning_rate": 7.08e-05, "loss": 0.4912, "step": 14168 }, { "epoch": 0.7934259155560534, "grad_norm": 1.1624226570129395, "learning_rate": 7.0805e-05, "loss": 0.4854, "step": 14169 }, { "epoch": 0.7934819128681824, "grad_norm": 1.4265356063842773, "learning_rate": 7.081e-05, "loss": 0.4913, "step": 14170 }, { "epoch": 0.7935379101803114, "grad_norm": 1.379022240638733, "learning_rate": 7.081499999999999e-05, "loss": 0.5303, "step": 14171 }, { "epoch": 0.7935939074924404, "grad_norm": 1.1742662191390991, "learning_rate": 7.082e-05, "loss": 0.3983, "step": 14172 }, { "epoch": 0.7936499048045694, "grad_norm": 1.4051592350006104, "learning_rate": 7.082500000000001e-05, "loss": 0.3314, "step": 14173 }, { "epoch": 0.7937059021166984, "grad_norm": 1.246253252029419, "learning_rate": 7.083000000000001e-05, "loss": 0.3705, "step": 14174 }, { "epoch": 0.7937618994288275, "grad_norm": 1.3388421535491943, "learning_rate": 7.083500000000001e-05, "loss": 0.5268, "step": 14175 }, { "epoch": 0.7938178967409565, "grad_norm": 1.4494574069976807, "learning_rate": 7.084e-05, "loss": 0.5604, "step": 14176 }, { "epoch": 0.7938738940530855, "grad_norm": 1.3175182342529297, "learning_rate": 7.0845e-05, "loss": 0.4475, "step": 14177 }, { "epoch": 0.7939298913652145, "grad_norm": 1.9641207456588745, "learning_rate": 7.085e-05, "loss": 0.391, "step": 14178 }, { "epoch": 0.7939858886773434, "grad_norm": 1.4529749155044556, "learning_rate": 7.0855e-05, "loss": 0.3616, "step": 14179 }, { "epoch": 0.7940418859894725, "grad_norm": 1.5091392993927002, "learning_rate": 7.086000000000001e-05, "loss": 0.6272, "step": 14180 }, { "epoch": 0.7940978833016015, "grad_norm": 1.480791687965393, "learning_rate": 7.0865e-05, "loss": 0.5521, "step": 14181 }, { "epoch": 0.7941538806137305, "grad_norm": 1.2192174196243286, "learning_rate": 7.087e-05, "loss": 0.4446, "step": 14182 }, { "epoch": 0.7942098779258595, "grad_norm": 1.1119846105575562, "learning_rate": 7.0875e-05, "loss": 0.432, "step": 14183 }, { "epoch": 0.7942658752379885, "grad_norm": 1.2087913751602173, "learning_rate": 7.088e-05, "loss": 0.4761, "step": 14184 }, { "epoch": 0.7943218725501175, "grad_norm": 1.1879780292510986, "learning_rate": 7.0885e-05, "loss": 0.4461, "step": 14185 }, { "epoch": 0.7943778698622466, "grad_norm": 2.624023914337158, "learning_rate": 7.089000000000001e-05, "loss": 0.5263, "step": 14186 }, { "epoch": 0.7944338671743756, "grad_norm": 1.4163124561309814, "learning_rate": 7.0895e-05, "loss": 0.4879, "step": 14187 }, { "epoch": 0.7944898644865046, "grad_norm": 1.2269207239151, "learning_rate": 7.09e-05, "loss": 0.4756, "step": 14188 }, { "epoch": 0.7945458617986336, "grad_norm": 1.2559641599655151, "learning_rate": 7.0905e-05, "loss": 0.6148, "step": 14189 }, { "epoch": 0.7946018591107626, "grad_norm": 1.4977549314498901, "learning_rate": 7.091e-05, "loss": 0.594, "step": 14190 }, { "epoch": 0.7946578564228917, "grad_norm": 1.6066761016845703, "learning_rate": 7.0915e-05, "loss": 0.3459, "step": 14191 }, { "epoch": 0.7947138537350207, "grad_norm": 1.1700197458267212, "learning_rate": 7.092e-05, "loss": 0.4241, "step": 14192 }, { "epoch": 0.7947698510471497, "grad_norm": 1.5863022804260254, "learning_rate": 7.0925e-05, "loss": 0.7132, "step": 14193 }, { "epoch": 0.7948258483592787, "grad_norm": 1.3558669090270996, "learning_rate": 7.093000000000001e-05, "loss": 0.3699, "step": 14194 }, { "epoch": 0.7948818456714077, "grad_norm": 1.8490558862686157, "learning_rate": 7.093500000000001e-05, "loss": 0.6204, "step": 14195 }, { "epoch": 0.7949378429835368, "grad_norm": 1.3741960525512695, "learning_rate": 7.094000000000001e-05, "loss": 0.4074, "step": 14196 }, { "epoch": 0.7949938402956658, "grad_norm": 1.4264682531356812, "learning_rate": 7.0945e-05, "loss": 0.4239, "step": 14197 }, { "epoch": 0.7950498376077948, "grad_norm": 1.9793999195098877, "learning_rate": 7.095e-05, "loss": 0.4577, "step": 14198 }, { "epoch": 0.7951058349199238, "grad_norm": 1.2810734510421753, "learning_rate": 7.0955e-05, "loss": 0.4092, "step": 14199 }, { "epoch": 0.7951618322320528, "grad_norm": 1.3208116292953491, "learning_rate": 7.096e-05, "loss": 0.4284, "step": 14200 }, { "epoch": 0.7952178295441819, "grad_norm": 1.63396418094635, "learning_rate": 7.096500000000001e-05, "loss": 0.5604, "step": 14201 }, { "epoch": 0.7952738268563109, "grad_norm": 1.3054780960083008, "learning_rate": 7.097e-05, "loss": 0.4291, "step": 14202 }, { "epoch": 0.7953298241684399, "grad_norm": 1.2806179523468018, "learning_rate": 7.0975e-05, "loss": 0.4277, "step": 14203 }, { "epoch": 0.7953858214805689, "grad_norm": 1.4290262460708618, "learning_rate": 7.098e-05, "loss": 0.5616, "step": 14204 }, { "epoch": 0.7954418187926979, "grad_norm": 1.2974903583526611, "learning_rate": 7.0985e-05, "loss": 0.3513, "step": 14205 }, { "epoch": 0.795497816104827, "grad_norm": 1.2196741104125977, "learning_rate": 7.099e-05, "loss": 0.4768, "step": 14206 }, { "epoch": 0.795553813416956, "grad_norm": 2.9551949501037598, "learning_rate": 7.0995e-05, "loss": 0.6393, "step": 14207 }, { "epoch": 0.795609810729085, "grad_norm": 1.1885533332824707, "learning_rate": 7.1e-05, "loss": 0.4344, "step": 14208 }, { "epoch": 0.795665808041214, "grad_norm": 1.4281381368637085, "learning_rate": 7.1005e-05, "loss": 0.4298, "step": 14209 }, { "epoch": 0.795721805353343, "grad_norm": 1.1432418823242188, "learning_rate": 7.101e-05, "loss": 0.4367, "step": 14210 }, { "epoch": 0.795777802665472, "grad_norm": 1.2060902118682861, "learning_rate": 7.1015e-05, "loss": 0.4498, "step": 14211 }, { "epoch": 0.7958337999776011, "grad_norm": 1.2184255123138428, "learning_rate": 7.102000000000001e-05, "loss": 0.4082, "step": 14212 }, { "epoch": 0.7958897972897301, "grad_norm": 1.3108534812927246, "learning_rate": 7.1025e-05, "loss": 0.4099, "step": 14213 }, { "epoch": 0.7959457946018591, "grad_norm": 1.232239007949829, "learning_rate": 7.103e-05, "loss": 0.6333, "step": 14214 }, { "epoch": 0.7960017919139881, "grad_norm": 1.3570442199707031, "learning_rate": 7.103500000000001e-05, "loss": 0.5427, "step": 14215 }, { "epoch": 0.7960577892261171, "grad_norm": 1.299333930015564, "learning_rate": 7.104000000000001e-05, "loss": 0.4296, "step": 14216 }, { "epoch": 0.7961137865382462, "grad_norm": 1.7934577465057373, "learning_rate": 7.104500000000001e-05, "loss": 0.6227, "step": 14217 }, { "epoch": 0.7961697838503752, "grad_norm": 1.5457813739776611, "learning_rate": 7.105e-05, "loss": 0.4957, "step": 14218 }, { "epoch": 0.7962257811625042, "grad_norm": 1.2532612085342407, "learning_rate": 7.1055e-05, "loss": 0.3346, "step": 14219 }, { "epoch": 0.7962817784746332, "grad_norm": 2.023634910583496, "learning_rate": 7.106e-05, "loss": 0.44, "step": 14220 }, { "epoch": 0.7963377757867622, "grad_norm": 6.002133369445801, "learning_rate": 7.1065e-05, "loss": 0.4516, "step": 14221 }, { "epoch": 0.7963937730988913, "grad_norm": 1.5097990036010742, "learning_rate": 7.107000000000001e-05, "loss": 0.5253, "step": 14222 }, { "epoch": 0.7964497704110203, "grad_norm": 1.0850706100463867, "learning_rate": 7.1075e-05, "loss": 0.3218, "step": 14223 }, { "epoch": 0.7965057677231493, "grad_norm": 1.238458514213562, "learning_rate": 7.108e-05, "loss": 0.4333, "step": 14224 }, { "epoch": 0.7965617650352783, "grad_norm": 1.1130365133285522, "learning_rate": 7.1085e-05, "loss": 0.4513, "step": 14225 }, { "epoch": 0.7966177623474073, "grad_norm": 1.3274425268173218, "learning_rate": 7.109e-05, "loss": 0.4381, "step": 14226 }, { "epoch": 0.7966737596595364, "grad_norm": 1.3055750131607056, "learning_rate": 7.1095e-05, "loss": 0.4583, "step": 14227 }, { "epoch": 0.7967297569716654, "grad_norm": 1.180130958557129, "learning_rate": 7.11e-05, "loss": 0.4189, "step": 14228 }, { "epoch": 0.7967857542837944, "grad_norm": 1.506287932395935, "learning_rate": 7.1105e-05, "loss": 0.5207, "step": 14229 }, { "epoch": 0.7968417515959234, "grad_norm": 1.1442244052886963, "learning_rate": 7.111e-05, "loss": 0.5126, "step": 14230 }, { "epoch": 0.7968977489080524, "grad_norm": 1.3099751472473145, "learning_rate": 7.1115e-05, "loss": 0.566, "step": 14231 }, { "epoch": 0.7969537462201814, "grad_norm": 1.828741192817688, "learning_rate": 7.112000000000001e-05, "loss": 0.3673, "step": 14232 }, { "epoch": 0.7970097435323105, "grad_norm": 1.4499508142471313, "learning_rate": 7.112500000000001e-05, "loss": 0.5663, "step": 14233 }, { "epoch": 0.7970657408444395, "grad_norm": 1.4727813005447388, "learning_rate": 7.113e-05, "loss": 0.5223, "step": 14234 }, { "epoch": 0.7971217381565685, "grad_norm": 1.4278827905654907, "learning_rate": 7.1135e-05, "loss": 0.5051, "step": 14235 }, { "epoch": 0.7971777354686975, "grad_norm": 2.509204387664795, "learning_rate": 7.114e-05, "loss": 0.5877, "step": 14236 }, { "epoch": 0.7972337327808265, "grad_norm": 1.3308311700820923, "learning_rate": 7.114500000000001e-05, "loss": 0.4522, "step": 14237 }, { "epoch": 0.7972897300929556, "grad_norm": 1.5752297639846802, "learning_rate": 7.115000000000001e-05, "loss": 0.43, "step": 14238 }, { "epoch": 0.7973457274050846, "grad_norm": 2.0403895378112793, "learning_rate": 7.1155e-05, "loss": 0.5288, "step": 14239 }, { "epoch": 0.7974017247172136, "grad_norm": 1.2725965976715088, "learning_rate": 7.116e-05, "loss": 0.5799, "step": 14240 }, { "epoch": 0.7974577220293426, "grad_norm": 1.2783291339874268, "learning_rate": 7.1165e-05, "loss": 0.4713, "step": 14241 }, { "epoch": 0.7975137193414716, "grad_norm": 1.2267341613769531, "learning_rate": 7.117e-05, "loss": 0.3125, "step": 14242 }, { "epoch": 0.7975697166536007, "grad_norm": 1.5969126224517822, "learning_rate": 7.117500000000001e-05, "loss": 0.4726, "step": 14243 }, { "epoch": 0.7976257139657297, "grad_norm": 1.3979538679122925, "learning_rate": 7.118e-05, "loss": 0.3592, "step": 14244 }, { "epoch": 0.7976817112778587, "grad_norm": 1.6626211404800415, "learning_rate": 7.1185e-05, "loss": 0.4393, "step": 14245 }, { "epoch": 0.7977377085899877, "grad_norm": 1.2637313604354858, "learning_rate": 7.119e-05, "loss": 0.3928, "step": 14246 }, { "epoch": 0.7977937059021167, "grad_norm": 1.3991094827651978, "learning_rate": 7.1195e-05, "loss": 0.452, "step": 14247 }, { "epoch": 0.7978497032142458, "grad_norm": 1.4132726192474365, "learning_rate": 7.12e-05, "loss": 0.4239, "step": 14248 }, { "epoch": 0.7979057005263748, "grad_norm": 1.7055772542953491, "learning_rate": 7.1205e-05, "loss": 0.6003, "step": 14249 }, { "epoch": 0.7979616978385038, "grad_norm": 1.3924280405044556, "learning_rate": 7.121e-05, "loss": 0.5031, "step": 14250 }, { "epoch": 0.7980176951506328, "grad_norm": 1.4206241369247437, "learning_rate": 7.1215e-05, "loss": 0.4005, "step": 14251 }, { "epoch": 0.7980736924627618, "grad_norm": 1.3712095022201538, "learning_rate": 7.122000000000001e-05, "loss": 0.4737, "step": 14252 }, { "epoch": 0.7981296897748908, "grad_norm": 1.3633575439453125, "learning_rate": 7.122500000000001e-05, "loss": 0.6029, "step": 14253 }, { "epoch": 0.7981856870870199, "grad_norm": 1.168293833732605, "learning_rate": 7.123000000000001e-05, "loss": 0.5213, "step": 14254 }, { "epoch": 0.7982416843991489, "grad_norm": 1.3674473762512207, "learning_rate": 7.1235e-05, "loss": 0.4504, "step": 14255 }, { "epoch": 0.7982976817112779, "grad_norm": 1.1504228115081787, "learning_rate": 7.124e-05, "loss": 0.431, "step": 14256 }, { "epoch": 0.7983536790234069, "grad_norm": 1.3997844457626343, "learning_rate": 7.1245e-05, "loss": 0.5008, "step": 14257 }, { "epoch": 0.798409676335536, "grad_norm": 1.2385599613189697, "learning_rate": 7.125000000000001e-05, "loss": 0.3936, "step": 14258 }, { "epoch": 0.798465673647665, "grad_norm": 1.5361371040344238, "learning_rate": 7.125500000000001e-05, "loss": 0.3703, "step": 14259 }, { "epoch": 0.798521670959794, "grad_norm": 1.2596443891525269, "learning_rate": 7.126e-05, "loss": 0.4015, "step": 14260 }, { "epoch": 0.798577668271923, "grad_norm": 1.484503149986267, "learning_rate": 7.1265e-05, "loss": 0.4397, "step": 14261 }, { "epoch": 0.7986336655840519, "grad_norm": 1.2304725646972656, "learning_rate": 7.127e-05, "loss": 0.3993, "step": 14262 }, { "epoch": 0.7986896628961809, "grad_norm": 1.341511607170105, "learning_rate": 7.1275e-05, "loss": 0.4711, "step": 14263 }, { "epoch": 0.79874566020831, "grad_norm": 1.4946208000183105, "learning_rate": 7.128000000000001e-05, "loss": 0.475, "step": 14264 }, { "epoch": 0.798801657520439, "grad_norm": 1.28714919090271, "learning_rate": 7.1285e-05, "loss": 0.4342, "step": 14265 }, { "epoch": 0.798857654832568, "grad_norm": 1.2437433004379272, "learning_rate": 7.129e-05, "loss": 0.5012, "step": 14266 }, { "epoch": 0.798913652144697, "grad_norm": 1.260019302368164, "learning_rate": 7.1295e-05, "loss": 0.3945, "step": 14267 }, { "epoch": 0.798969649456826, "grad_norm": 1.257361888885498, "learning_rate": 7.13e-05, "loss": 0.4328, "step": 14268 }, { "epoch": 0.799025646768955, "grad_norm": 1.4107636213302612, "learning_rate": 7.1305e-05, "loss": 0.5142, "step": 14269 }, { "epoch": 0.7990816440810841, "grad_norm": 1.4554567337036133, "learning_rate": 7.130999999999999e-05, "loss": 0.4655, "step": 14270 }, { "epoch": 0.7991376413932131, "grad_norm": 1.271363615989685, "learning_rate": 7.1315e-05, "loss": 0.4211, "step": 14271 }, { "epoch": 0.7991936387053421, "grad_norm": 1.533981204032898, "learning_rate": 7.132e-05, "loss": 0.3774, "step": 14272 }, { "epoch": 0.7992496360174711, "grad_norm": 1.338004231452942, "learning_rate": 7.132500000000001e-05, "loss": 0.4101, "step": 14273 }, { "epoch": 0.7993056333296001, "grad_norm": 1.4239612817764282, "learning_rate": 7.133000000000001e-05, "loss": 0.4688, "step": 14274 }, { "epoch": 0.7993616306417292, "grad_norm": 1.2443686723709106, "learning_rate": 7.133500000000001e-05, "loss": 0.452, "step": 14275 }, { "epoch": 0.7994176279538582, "grad_norm": 2.020554780960083, "learning_rate": 7.134e-05, "loss": 0.5314, "step": 14276 }, { "epoch": 0.7994736252659872, "grad_norm": 1.0896626710891724, "learning_rate": 7.1345e-05, "loss": 0.4325, "step": 14277 }, { "epoch": 0.7995296225781162, "grad_norm": 1.2125056982040405, "learning_rate": 7.135e-05, "loss": 0.3837, "step": 14278 }, { "epoch": 0.7995856198902452, "grad_norm": 1.316235065460205, "learning_rate": 7.135500000000001e-05, "loss": 0.5725, "step": 14279 }, { "epoch": 0.7996416172023743, "grad_norm": 1.1320780515670776, "learning_rate": 7.136000000000001e-05, "loss": 0.3444, "step": 14280 }, { "epoch": 0.7996976145145033, "grad_norm": 1.5037559270858765, "learning_rate": 7.1365e-05, "loss": 0.6402, "step": 14281 }, { "epoch": 0.7997536118266323, "grad_norm": 1.3142647743225098, "learning_rate": 7.137e-05, "loss": 0.4549, "step": 14282 }, { "epoch": 0.7998096091387613, "grad_norm": 1.2375869750976562, "learning_rate": 7.1375e-05, "loss": 0.3238, "step": 14283 }, { "epoch": 0.7998656064508903, "grad_norm": 1.1469448804855347, "learning_rate": 7.138e-05, "loss": 0.418, "step": 14284 }, { "epoch": 0.7999216037630194, "grad_norm": 1.3749374151229858, "learning_rate": 7.138500000000001e-05, "loss": 0.5568, "step": 14285 }, { "epoch": 0.7999776010751484, "grad_norm": 1.19504976272583, "learning_rate": 7.139e-05, "loss": 0.3862, "step": 14286 }, { "epoch": 0.8000335983872774, "grad_norm": 5.354552745819092, "learning_rate": 7.1395e-05, "loss": 0.4797, "step": 14287 }, { "epoch": 0.8000895956994064, "grad_norm": 1.2926965951919556, "learning_rate": 7.14e-05, "loss": 0.3903, "step": 14288 }, { "epoch": 0.8001455930115354, "grad_norm": 1.509556770324707, "learning_rate": 7.1405e-05, "loss": 0.4057, "step": 14289 }, { "epoch": 0.8002015903236644, "grad_norm": 1.476244330406189, "learning_rate": 7.141e-05, "loss": 0.5082, "step": 14290 }, { "epoch": 0.8002575876357935, "grad_norm": 1.268317699432373, "learning_rate": 7.141499999999999e-05, "loss": 0.4666, "step": 14291 }, { "epoch": 0.8003135849479225, "grad_norm": 1.8188954591751099, "learning_rate": 7.142e-05, "loss": 0.4773, "step": 14292 }, { "epoch": 0.8003695822600515, "grad_norm": 1.49734628200531, "learning_rate": 7.142500000000001e-05, "loss": 0.4786, "step": 14293 }, { "epoch": 0.8004255795721805, "grad_norm": 2.688525438308716, "learning_rate": 7.143000000000001e-05, "loss": 0.4933, "step": 14294 }, { "epoch": 0.8004815768843095, "grad_norm": 1.2296005487442017, "learning_rate": 7.143500000000001e-05, "loss": 0.3708, "step": 14295 }, { "epoch": 0.8005375741964386, "grad_norm": 1.2410680055618286, "learning_rate": 7.144000000000001e-05, "loss": 0.4211, "step": 14296 }, { "epoch": 0.8005935715085676, "grad_norm": 1.3349018096923828, "learning_rate": 7.1445e-05, "loss": 0.5248, "step": 14297 }, { "epoch": 0.8006495688206966, "grad_norm": 1.2120205163955688, "learning_rate": 7.145e-05, "loss": 0.4427, "step": 14298 }, { "epoch": 0.8007055661328256, "grad_norm": 1.176427960395813, "learning_rate": 7.1455e-05, "loss": 0.5057, "step": 14299 }, { "epoch": 0.8007615634449546, "grad_norm": 1.1894642114639282, "learning_rate": 7.146000000000001e-05, "loss": 0.3401, "step": 14300 }, { "epoch": 0.8008175607570837, "grad_norm": 1.3587576150894165, "learning_rate": 7.146500000000001e-05, "loss": 0.5311, "step": 14301 }, { "epoch": 0.8008735580692127, "grad_norm": 1.4819519519805908, "learning_rate": 7.147e-05, "loss": 0.6611, "step": 14302 }, { "epoch": 0.8009295553813417, "grad_norm": 1.2229000329971313, "learning_rate": 7.1475e-05, "loss": 0.42, "step": 14303 }, { "epoch": 0.8009855526934707, "grad_norm": 1.2849124670028687, "learning_rate": 7.148e-05, "loss": 0.4447, "step": 14304 }, { "epoch": 0.8010415500055997, "grad_norm": 1.5820670127868652, "learning_rate": 7.1485e-05, "loss": 0.5244, "step": 14305 }, { "epoch": 0.8010975473177288, "grad_norm": 1.4715663194656372, "learning_rate": 7.149e-05, "loss": 0.4639, "step": 14306 }, { "epoch": 0.8011535446298578, "grad_norm": 1.313481330871582, "learning_rate": 7.1495e-05, "loss": 0.341, "step": 14307 }, { "epoch": 0.8012095419419868, "grad_norm": 3.2087135314941406, "learning_rate": 7.15e-05, "loss": 0.544, "step": 14308 }, { "epoch": 0.8012655392541158, "grad_norm": 1.0600281953811646, "learning_rate": 7.1505e-05, "loss": 0.3455, "step": 14309 }, { "epoch": 0.8013215365662448, "grad_norm": 1.1321611404418945, "learning_rate": 7.151e-05, "loss": 0.4154, "step": 14310 }, { "epoch": 0.8013775338783738, "grad_norm": 1.4016467332839966, "learning_rate": 7.1515e-05, "loss": 0.4927, "step": 14311 }, { "epoch": 0.8014335311905029, "grad_norm": 1.1130542755126953, "learning_rate": 7.151999999999999e-05, "loss": 0.3972, "step": 14312 }, { "epoch": 0.8014895285026319, "grad_norm": 1.4507575035095215, "learning_rate": 7.1525e-05, "loss": 0.399, "step": 14313 }, { "epoch": 0.8015455258147609, "grad_norm": 1.2351462841033936, "learning_rate": 7.153000000000001e-05, "loss": 0.3804, "step": 14314 }, { "epoch": 0.8016015231268899, "grad_norm": 1.241011381149292, "learning_rate": 7.153500000000001e-05, "loss": 0.4423, "step": 14315 }, { "epoch": 0.801657520439019, "grad_norm": 1.063472032546997, "learning_rate": 7.154000000000001e-05, "loss": 0.2882, "step": 14316 }, { "epoch": 0.801713517751148, "grad_norm": 1.2055721282958984, "learning_rate": 7.154500000000001e-05, "loss": 0.4239, "step": 14317 }, { "epoch": 0.801769515063277, "grad_norm": 1.5081279277801514, "learning_rate": 7.155e-05, "loss": 0.413, "step": 14318 }, { "epoch": 0.801825512375406, "grad_norm": 1.3589757680892944, "learning_rate": 7.1555e-05, "loss": 0.407, "step": 14319 }, { "epoch": 0.801881509687535, "grad_norm": 1.2902450561523438, "learning_rate": 7.156e-05, "loss": 0.3748, "step": 14320 }, { "epoch": 0.801937506999664, "grad_norm": 1.3496792316436768, "learning_rate": 7.156500000000001e-05, "loss": 0.3717, "step": 14321 }, { "epoch": 0.8019935043117931, "grad_norm": 1.3816170692443848, "learning_rate": 7.157000000000001e-05, "loss": 0.3834, "step": 14322 }, { "epoch": 0.8020495016239221, "grad_norm": 1.2812403440475464, "learning_rate": 7.1575e-05, "loss": 0.4117, "step": 14323 }, { "epoch": 0.8021054989360511, "grad_norm": 1.4883240461349487, "learning_rate": 7.158e-05, "loss": 0.4894, "step": 14324 }, { "epoch": 0.8021614962481801, "grad_norm": 1.299131989479065, "learning_rate": 7.1585e-05, "loss": 0.5771, "step": 14325 }, { "epoch": 0.8022174935603091, "grad_norm": 1.1620566844940186, "learning_rate": 7.159e-05, "loss": 0.5, "step": 14326 }, { "epoch": 0.8022734908724382, "grad_norm": 1.3025226593017578, "learning_rate": 7.1595e-05, "loss": 0.4366, "step": 14327 }, { "epoch": 0.8023294881845672, "grad_norm": 1.404446005821228, "learning_rate": 7.16e-05, "loss": 0.7016, "step": 14328 }, { "epoch": 0.8023854854966962, "grad_norm": 1.3371330499649048, "learning_rate": 7.1605e-05, "loss": 0.5923, "step": 14329 }, { "epoch": 0.8024414828088252, "grad_norm": 1.2328046560287476, "learning_rate": 7.161e-05, "loss": 0.4832, "step": 14330 }, { "epoch": 0.8024974801209542, "grad_norm": 1.2135400772094727, "learning_rate": 7.1615e-05, "loss": 0.39, "step": 14331 }, { "epoch": 0.8025534774330833, "grad_norm": 1.1489354372024536, "learning_rate": 7.162e-05, "loss": 0.4436, "step": 14332 }, { "epoch": 0.8026094747452123, "grad_norm": 1.3100652694702148, "learning_rate": 7.1625e-05, "loss": 0.6064, "step": 14333 }, { "epoch": 0.8026654720573413, "grad_norm": 1.2295804023742676, "learning_rate": 7.163e-05, "loss": 0.3946, "step": 14334 }, { "epoch": 0.8027214693694703, "grad_norm": 3.268707513809204, "learning_rate": 7.1635e-05, "loss": 0.4619, "step": 14335 }, { "epoch": 0.8027774666815993, "grad_norm": 1.4066771268844604, "learning_rate": 7.164000000000001e-05, "loss": 0.5977, "step": 14336 }, { "epoch": 0.8028334639937283, "grad_norm": 1.3116049766540527, "learning_rate": 7.164500000000001e-05, "loss": 0.4666, "step": 14337 }, { "epoch": 0.8028894613058574, "grad_norm": 1.5711578130722046, "learning_rate": 7.165000000000001e-05, "loss": 0.4854, "step": 14338 }, { "epoch": 0.8029454586179864, "grad_norm": 1.2447172403335571, "learning_rate": 7.1655e-05, "loss": 0.4169, "step": 14339 }, { "epoch": 0.8030014559301154, "grad_norm": 1.405240535736084, "learning_rate": 7.166e-05, "loss": 0.7066, "step": 14340 }, { "epoch": 0.8030574532422444, "grad_norm": 1.3452883958816528, "learning_rate": 7.1665e-05, "loss": 0.4707, "step": 14341 }, { "epoch": 0.8031134505543734, "grad_norm": 1.3635317087173462, "learning_rate": 7.167000000000001e-05, "loss": 0.448, "step": 14342 }, { "epoch": 0.8031694478665025, "grad_norm": 1.2283295392990112, "learning_rate": 7.167500000000001e-05, "loss": 0.4203, "step": 14343 }, { "epoch": 0.8032254451786315, "grad_norm": 1.4867265224456787, "learning_rate": 7.168e-05, "loss": 0.4884, "step": 14344 }, { "epoch": 0.8032814424907604, "grad_norm": 1.251662015914917, "learning_rate": 7.1685e-05, "loss": 0.4322, "step": 14345 }, { "epoch": 0.8033374398028894, "grad_norm": 1.4250129461288452, "learning_rate": 7.169e-05, "loss": 0.5191, "step": 14346 }, { "epoch": 0.8033934371150184, "grad_norm": 1.3150770664215088, "learning_rate": 7.1695e-05, "loss": 0.4644, "step": 14347 }, { "epoch": 0.8034494344271474, "grad_norm": 1.645158052444458, "learning_rate": 7.17e-05, "loss": 0.4428, "step": 14348 }, { "epoch": 0.8035054317392765, "grad_norm": 1.4586005210876465, "learning_rate": 7.1705e-05, "loss": 0.5313, "step": 14349 }, { "epoch": 0.8035614290514055, "grad_norm": 1.369107961654663, "learning_rate": 7.171e-05, "loss": 0.4005, "step": 14350 }, { "epoch": 0.8036174263635345, "grad_norm": 1.3012665510177612, "learning_rate": 7.1715e-05, "loss": 0.5529, "step": 14351 }, { "epoch": 0.8036734236756635, "grad_norm": 1.325852632522583, "learning_rate": 7.172e-05, "loss": 0.3732, "step": 14352 }, { "epoch": 0.8037294209877925, "grad_norm": 1.499667763710022, "learning_rate": 7.172500000000001e-05, "loss": 0.4682, "step": 14353 }, { "epoch": 0.8037854182999216, "grad_norm": 1.3996325731277466, "learning_rate": 7.173e-05, "loss": 0.4142, "step": 14354 }, { "epoch": 0.8038414156120506, "grad_norm": 1.36122465133667, "learning_rate": 7.1735e-05, "loss": 0.4767, "step": 14355 }, { "epoch": 0.8038974129241796, "grad_norm": 1.3188210725784302, "learning_rate": 7.174e-05, "loss": 0.5409, "step": 14356 }, { "epoch": 0.8039534102363086, "grad_norm": 1.2389452457427979, "learning_rate": 7.174500000000001e-05, "loss": 0.4215, "step": 14357 }, { "epoch": 0.8040094075484376, "grad_norm": 1.310027003288269, "learning_rate": 7.175000000000001e-05, "loss": 0.4265, "step": 14358 }, { "epoch": 0.8040654048605667, "grad_norm": 1.260506272315979, "learning_rate": 7.1755e-05, "loss": 0.398, "step": 14359 }, { "epoch": 0.8041214021726957, "grad_norm": 1.1169949769973755, "learning_rate": 7.176e-05, "loss": 0.4912, "step": 14360 }, { "epoch": 0.8041773994848247, "grad_norm": 1.4652414321899414, "learning_rate": 7.1765e-05, "loss": 0.4644, "step": 14361 }, { "epoch": 0.8042333967969537, "grad_norm": 1.5234915018081665, "learning_rate": 7.177e-05, "loss": 0.5104, "step": 14362 }, { "epoch": 0.8042893941090827, "grad_norm": 1.2871246337890625, "learning_rate": 7.177500000000001e-05, "loss": 0.3849, "step": 14363 }, { "epoch": 0.8043453914212118, "grad_norm": 1.1576374769210815, "learning_rate": 7.178000000000001e-05, "loss": 0.4857, "step": 14364 }, { "epoch": 0.8044013887333408, "grad_norm": 1.2182807922363281, "learning_rate": 7.1785e-05, "loss": 0.4108, "step": 14365 }, { "epoch": 0.8044573860454698, "grad_norm": 1.3278043270111084, "learning_rate": 7.179e-05, "loss": 0.379, "step": 14366 }, { "epoch": 0.8045133833575988, "grad_norm": 1.2018606662750244, "learning_rate": 7.1795e-05, "loss": 0.4943, "step": 14367 }, { "epoch": 0.8045693806697278, "grad_norm": 1.2104520797729492, "learning_rate": 7.18e-05, "loss": 0.4814, "step": 14368 }, { "epoch": 0.8046253779818568, "grad_norm": 1.061835765838623, "learning_rate": 7.1805e-05, "loss": 0.3731, "step": 14369 }, { "epoch": 0.8046813752939859, "grad_norm": 1.4871950149536133, "learning_rate": 7.181e-05, "loss": 0.5726, "step": 14370 }, { "epoch": 0.8047373726061149, "grad_norm": 1.4260612726211548, "learning_rate": 7.1815e-05, "loss": 0.4471, "step": 14371 }, { "epoch": 0.8047933699182439, "grad_norm": 1.37614107131958, "learning_rate": 7.182e-05, "loss": 0.4413, "step": 14372 }, { "epoch": 0.8048493672303729, "grad_norm": 1.2057019472122192, "learning_rate": 7.182500000000001e-05, "loss": 0.4537, "step": 14373 }, { "epoch": 0.8049053645425019, "grad_norm": 1.2680689096450806, "learning_rate": 7.183000000000001e-05, "loss": 0.5552, "step": 14374 }, { "epoch": 0.804961361854631, "grad_norm": 1.451594591140747, "learning_rate": 7.1835e-05, "loss": 0.5571, "step": 14375 }, { "epoch": 0.80501735916676, "grad_norm": 1.197312355041504, "learning_rate": 7.184e-05, "loss": 0.3676, "step": 14376 }, { "epoch": 0.805073356478889, "grad_norm": 1.3863509893417358, "learning_rate": 7.1845e-05, "loss": 0.4757, "step": 14377 }, { "epoch": 0.805129353791018, "grad_norm": 1.2120007276535034, "learning_rate": 7.185000000000001e-05, "loss": 0.4002, "step": 14378 }, { "epoch": 0.805185351103147, "grad_norm": 1.3701173067092896, "learning_rate": 7.185500000000001e-05, "loss": 0.4783, "step": 14379 }, { "epoch": 0.8052413484152761, "grad_norm": 1.5542365312576294, "learning_rate": 7.186e-05, "loss": 0.3459, "step": 14380 }, { "epoch": 0.8052973457274051, "grad_norm": 1.4818055629730225, "learning_rate": 7.1865e-05, "loss": 0.5042, "step": 14381 }, { "epoch": 0.8053533430395341, "grad_norm": 1.4172481298446655, "learning_rate": 7.187e-05, "loss": 0.4043, "step": 14382 }, { "epoch": 0.8054093403516631, "grad_norm": 1.235385775566101, "learning_rate": 7.1875e-05, "loss": 0.3711, "step": 14383 }, { "epoch": 0.8054653376637921, "grad_norm": 1.370848536491394, "learning_rate": 7.188e-05, "loss": 0.3752, "step": 14384 }, { "epoch": 0.8055213349759212, "grad_norm": 1.4222896099090576, "learning_rate": 7.188500000000001e-05, "loss": 0.4127, "step": 14385 }, { "epoch": 0.8055773322880502, "grad_norm": 1.5399459600448608, "learning_rate": 7.189e-05, "loss": 0.4703, "step": 14386 }, { "epoch": 0.8056333296001792, "grad_norm": 1.208827018737793, "learning_rate": 7.1895e-05, "loss": 0.4682, "step": 14387 }, { "epoch": 0.8056893269123082, "grad_norm": 1.3453835248947144, "learning_rate": 7.19e-05, "loss": 0.4047, "step": 14388 }, { "epoch": 0.8057453242244372, "grad_norm": 1.356747031211853, "learning_rate": 7.1905e-05, "loss": 0.6252, "step": 14389 }, { "epoch": 0.8058013215365663, "grad_norm": 1.1763570308685303, "learning_rate": 7.191e-05, "loss": 0.4417, "step": 14390 }, { "epoch": 0.8058573188486953, "grad_norm": 1.4643850326538086, "learning_rate": 7.1915e-05, "loss": 0.6276, "step": 14391 }, { "epoch": 0.8059133161608243, "grad_norm": 1.474872350692749, "learning_rate": 7.192e-05, "loss": 0.4014, "step": 14392 }, { "epoch": 0.8059693134729533, "grad_norm": 1.3869831562042236, "learning_rate": 7.1925e-05, "loss": 0.4205, "step": 14393 }, { "epoch": 0.8060253107850823, "grad_norm": 1.3573460578918457, "learning_rate": 7.193000000000001e-05, "loss": 0.4208, "step": 14394 }, { "epoch": 0.8060813080972113, "grad_norm": 1.432698130607605, "learning_rate": 7.193500000000001e-05, "loss": 0.4238, "step": 14395 }, { "epoch": 0.8061373054093404, "grad_norm": 1.7275192737579346, "learning_rate": 7.194e-05, "loss": 0.4974, "step": 14396 }, { "epoch": 0.8061933027214694, "grad_norm": 1.3129981756210327, "learning_rate": 7.1945e-05, "loss": 0.4118, "step": 14397 }, { "epoch": 0.8062493000335984, "grad_norm": 1.3502404689788818, "learning_rate": 7.195e-05, "loss": 0.4085, "step": 14398 }, { "epoch": 0.8063052973457274, "grad_norm": 1.3459618091583252, "learning_rate": 7.195500000000001e-05, "loss": 0.4328, "step": 14399 }, { "epoch": 0.8063612946578564, "grad_norm": 1.3390995264053345, "learning_rate": 7.196000000000001e-05, "loss": 0.3985, "step": 14400 }, { "epoch": 0.8064172919699855, "grad_norm": 1.316535472869873, "learning_rate": 7.1965e-05, "loss": 0.4415, "step": 14401 }, { "epoch": 0.8064732892821145, "grad_norm": 1.365905523300171, "learning_rate": 7.197e-05, "loss": 0.4783, "step": 14402 }, { "epoch": 0.8065292865942435, "grad_norm": 1.3806086778640747, "learning_rate": 7.1975e-05, "loss": 0.4347, "step": 14403 }, { "epoch": 0.8065852839063725, "grad_norm": 1.2446452379226685, "learning_rate": 7.198e-05, "loss": 0.5143, "step": 14404 }, { "epoch": 0.8066412812185015, "grad_norm": 1.1861881017684937, "learning_rate": 7.1985e-05, "loss": 0.3499, "step": 14405 }, { "epoch": 0.8066972785306306, "grad_norm": 1.2290480136871338, "learning_rate": 7.199000000000001e-05, "loss": 0.397, "step": 14406 }, { "epoch": 0.8067532758427596, "grad_norm": 1.3085788488388062, "learning_rate": 7.1995e-05, "loss": 0.3467, "step": 14407 }, { "epoch": 0.8068092731548886, "grad_norm": 1.5729981660842896, "learning_rate": 7.2e-05, "loss": 0.4784, "step": 14408 }, { "epoch": 0.8068652704670176, "grad_norm": 1.4126801490783691, "learning_rate": 7.2005e-05, "loss": 0.4939, "step": 14409 }, { "epoch": 0.8069212677791466, "grad_norm": 1.1600823402404785, "learning_rate": 7.201e-05, "loss": 0.4275, "step": 14410 }, { "epoch": 0.8069772650912757, "grad_norm": 1.2501877546310425, "learning_rate": 7.2015e-05, "loss": 0.3635, "step": 14411 }, { "epoch": 0.8070332624034047, "grad_norm": 1.3730149269104004, "learning_rate": 7.202e-05, "loss": 0.4545, "step": 14412 }, { "epoch": 0.8070892597155337, "grad_norm": 1.0997185707092285, "learning_rate": 7.2025e-05, "loss": 0.3805, "step": 14413 }, { "epoch": 0.8071452570276627, "grad_norm": 1.1816157102584839, "learning_rate": 7.203000000000001e-05, "loss": 0.4126, "step": 14414 }, { "epoch": 0.8072012543397917, "grad_norm": 1.4850237369537354, "learning_rate": 7.203500000000001e-05, "loss": 0.4057, "step": 14415 }, { "epoch": 0.8072572516519207, "grad_norm": 1.332733154296875, "learning_rate": 7.204000000000001e-05, "loss": 0.6524, "step": 14416 }, { "epoch": 0.8073132489640498, "grad_norm": 1.2679888010025024, "learning_rate": 7.2045e-05, "loss": 0.6952, "step": 14417 }, { "epoch": 0.8073692462761788, "grad_norm": 1.5113410949707031, "learning_rate": 7.205e-05, "loss": 0.5025, "step": 14418 }, { "epoch": 0.8074252435883078, "grad_norm": 1.4489210844039917, "learning_rate": 7.2055e-05, "loss": 0.5082, "step": 14419 }, { "epoch": 0.8074812409004368, "grad_norm": 2.1691977977752686, "learning_rate": 7.206000000000001e-05, "loss": 0.6638, "step": 14420 }, { "epoch": 0.8075372382125658, "grad_norm": 1.3792576789855957, "learning_rate": 7.206500000000001e-05, "loss": 0.4289, "step": 14421 }, { "epoch": 0.8075932355246949, "grad_norm": 3.897068500518799, "learning_rate": 7.207e-05, "loss": 0.417, "step": 14422 }, { "epoch": 0.8076492328368239, "grad_norm": 1.4487427473068237, "learning_rate": 7.2075e-05, "loss": 0.5249, "step": 14423 }, { "epoch": 0.8077052301489529, "grad_norm": 1.3798766136169434, "learning_rate": 7.208e-05, "loss": 0.4358, "step": 14424 }, { "epoch": 0.8077612274610819, "grad_norm": 1.737931251525879, "learning_rate": 7.2085e-05, "loss": 0.4121, "step": 14425 }, { "epoch": 0.8078172247732109, "grad_norm": 1.1489999294281006, "learning_rate": 7.209e-05, "loss": 0.4085, "step": 14426 }, { "epoch": 0.8078732220853398, "grad_norm": 1.190069556236267, "learning_rate": 7.209500000000001e-05, "loss": 0.5369, "step": 14427 }, { "epoch": 0.8079292193974689, "grad_norm": 1.5137462615966797, "learning_rate": 7.21e-05, "loss": 0.7515, "step": 14428 }, { "epoch": 0.8079852167095979, "grad_norm": 1.2904857397079468, "learning_rate": 7.2105e-05, "loss": 0.4236, "step": 14429 }, { "epoch": 0.8080412140217269, "grad_norm": 1.3198213577270508, "learning_rate": 7.211e-05, "loss": 0.4046, "step": 14430 }, { "epoch": 0.8080972113338559, "grad_norm": 1.3352689743041992, "learning_rate": 7.2115e-05, "loss": 0.4601, "step": 14431 }, { "epoch": 0.8081532086459849, "grad_norm": 1.275355577468872, "learning_rate": 7.212e-05, "loss": 0.4128, "step": 14432 }, { "epoch": 0.808209205958114, "grad_norm": 1.2645851373672485, "learning_rate": 7.2125e-05, "loss": 0.403, "step": 14433 }, { "epoch": 0.808265203270243, "grad_norm": 1.3488813638687134, "learning_rate": 7.213e-05, "loss": 0.5088, "step": 14434 }, { "epoch": 0.808321200582372, "grad_norm": 1.4230356216430664, "learning_rate": 7.213500000000001e-05, "loss": 0.6918, "step": 14435 }, { "epoch": 0.808377197894501, "grad_norm": 1.0565516948699951, "learning_rate": 7.214000000000001e-05, "loss": 0.3467, "step": 14436 }, { "epoch": 0.80843319520663, "grad_norm": 1.523375153541565, "learning_rate": 7.214500000000001e-05, "loss": 0.3884, "step": 14437 }, { "epoch": 0.8084891925187591, "grad_norm": 1.429703950881958, "learning_rate": 7.215e-05, "loss": 0.4894, "step": 14438 }, { "epoch": 0.8085451898308881, "grad_norm": 1.278806447982788, "learning_rate": 7.2155e-05, "loss": 0.4793, "step": 14439 }, { "epoch": 0.8086011871430171, "grad_norm": 1.5889099836349487, "learning_rate": 7.216e-05, "loss": 0.5262, "step": 14440 }, { "epoch": 0.8086571844551461, "grad_norm": 1.4965639114379883, "learning_rate": 7.216500000000001e-05, "loss": 0.4079, "step": 14441 }, { "epoch": 0.8087131817672751, "grad_norm": 1.2740325927734375, "learning_rate": 7.217000000000001e-05, "loss": 0.3079, "step": 14442 }, { "epoch": 0.8087691790794042, "grad_norm": 1.7930535078048706, "learning_rate": 7.2175e-05, "loss": 0.4116, "step": 14443 }, { "epoch": 0.8088251763915332, "grad_norm": 1.1276098489761353, "learning_rate": 7.218e-05, "loss": 0.3954, "step": 14444 }, { "epoch": 0.8088811737036622, "grad_norm": 1.5948141813278198, "learning_rate": 7.2185e-05, "loss": 0.5563, "step": 14445 }, { "epoch": 0.8089371710157912, "grad_norm": 2.2157845497131348, "learning_rate": 7.219e-05, "loss": 0.5056, "step": 14446 }, { "epoch": 0.8089931683279202, "grad_norm": 1.2125484943389893, "learning_rate": 7.2195e-05, "loss": 0.3818, "step": 14447 }, { "epoch": 0.8090491656400493, "grad_norm": 1.572281837463379, "learning_rate": 7.22e-05, "loss": 0.4074, "step": 14448 }, { "epoch": 0.8091051629521783, "grad_norm": 1.3786903619766235, "learning_rate": 7.2205e-05, "loss": 0.4561, "step": 14449 }, { "epoch": 0.8091611602643073, "grad_norm": 1.3767876625061035, "learning_rate": 7.221e-05, "loss": 0.4368, "step": 14450 }, { "epoch": 0.8092171575764363, "grad_norm": 1.33573579788208, "learning_rate": 7.2215e-05, "loss": 0.3953, "step": 14451 }, { "epoch": 0.8092731548885653, "grad_norm": 1.474442720413208, "learning_rate": 7.222e-05, "loss": 0.6192, "step": 14452 }, { "epoch": 0.8093291522006943, "grad_norm": 1.7762633562088013, "learning_rate": 7.2225e-05, "loss": 0.3859, "step": 14453 }, { "epoch": 0.8093851495128234, "grad_norm": 1.1925138235092163, "learning_rate": 7.223e-05, "loss": 0.4591, "step": 14454 }, { "epoch": 0.8094411468249524, "grad_norm": 1.2057689428329468, "learning_rate": 7.2235e-05, "loss": 0.4817, "step": 14455 }, { "epoch": 0.8094971441370814, "grad_norm": 1.4963434934616089, "learning_rate": 7.224000000000001e-05, "loss": 0.4412, "step": 14456 }, { "epoch": 0.8095531414492104, "grad_norm": 1.4457064867019653, "learning_rate": 7.224500000000001e-05, "loss": 0.3906, "step": 14457 }, { "epoch": 0.8096091387613394, "grad_norm": 1.3031740188598633, "learning_rate": 7.225000000000001e-05, "loss": 0.3942, "step": 14458 }, { "epoch": 0.8096651360734685, "grad_norm": 1.4185705184936523, "learning_rate": 7.2255e-05, "loss": 0.5671, "step": 14459 }, { "epoch": 0.8097211333855975, "grad_norm": 1.244755744934082, "learning_rate": 7.226e-05, "loss": 0.5442, "step": 14460 }, { "epoch": 0.8097771306977265, "grad_norm": 2.351407766342163, "learning_rate": 7.2265e-05, "loss": 0.414, "step": 14461 }, { "epoch": 0.8098331280098555, "grad_norm": 1.2181895971298218, "learning_rate": 7.227000000000001e-05, "loss": 0.4345, "step": 14462 }, { "epoch": 0.8098891253219845, "grad_norm": 1.3296558856964111, "learning_rate": 7.227500000000001e-05, "loss": 0.4265, "step": 14463 }, { "epoch": 0.8099451226341136, "grad_norm": 1.4543119668960571, "learning_rate": 7.228e-05, "loss": 0.4575, "step": 14464 }, { "epoch": 0.8100011199462426, "grad_norm": 1.1617560386657715, "learning_rate": 7.2285e-05, "loss": 0.4275, "step": 14465 }, { "epoch": 0.8100571172583716, "grad_norm": 1.3625268936157227, "learning_rate": 7.229e-05, "loss": 0.5049, "step": 14466 }, { "epoch": 0.8101131145705006, "grad_norm": 1.1242477893829346, "learning_rate": 7.2295e-05, "loss": 0.3465, "step": 14467 }, { "epoch": 0.8101691118826296, "grad_norm": 1.2985645532608032, "learning_rate": 7.23e-05, "loss": 0.5279, "step": 14468 }, { "epoch": 0.8102251091947587, "grad_norm": 1.2515453100204468, "learning_rate": 7.2305e-05, "loss": 0.4025, "step": 14469 }, { "epoch": 0.8102811065068877, "grad_norm": 1.2598460912704468, "learning_rate": 7.231e-05, "loss": 0.4909, "step": 14470 }, { "epoch": 0.8103371038190167, "grad_norm": 1.250108003616333, "learning_rate": 7.2315e-05, "loss": 0.4534, "step": 14471 }, { "epoch": 0.8103931011311457, "grad_norm": 1.2872648239135742, "learning_rate": 7.232e-05, "loss": 0.3846, "step": 14472 }, { "epoch": 0.8104490984432747, "grad_norm": 2.0495991706848145, "learning_rate": 7.2325e-05, "loss": 0.462, "step": 14473 }, { "epoch": 0.8105050957554037, "grad_norm": 1.2422711849212646, "learning_rate": 7.233000000000001e-05, "loss": 0.3957, "step": 14474 }, { "epoch": 0.8105610930675328, "grad_norm": 1.7333513498306274, "learning_rate": 7.2335e-05, "loss": 0.4894, "step": 14475 }, { "epoch": 0.8106170903796618, "grad_norm": 1.6531480550765991, "learning_rate": 7.234e-05, "loss": 0.5644, "step": 14476 }, { "epoch": 0.8106730876917908, "grad_norm": 1.3454701900482178, "learning_rate": 7.234500000000001e-05, "loss": 0.4615, "step": 14477 }, { "epoch": 0.8107290850039198, "grad_norm": 1.1700000762939453, "learning_rate": 7.235000000000001e-05, "loss": 0.303, "step": 14478 }, { "epoch": 0.8107850823160488, "grad_norm": 1.2846060991287231, "learning_rate": 7.235500000000001e-05, "loss": 0.5067, "step": 14479 }, { "epoch": 0.8108410796281779, "grad_norm": 1.861580729484558, "learning_rate": 7.236e-05, "loss": 0.4014, "step": 14480 }, { "epoch": 0.8108970769403069, "grad_norm": 1.2220993041992188, "learning_rate": 7.2365e-05, "loss": 0.4058, "step": 14481 }, { "epoch": 0.8109530742524359, "grad_norm": 1.3710349798202515, "learning_rate": 7.237e-05, "loss": 0.4855, "step": 14482 }, { "epoch": 0.8110090715645649, "grad_norm": 2.1424448490142822, "learning_rate": 7.2375e-05, "loss": 0.3967, "step": 14483 }, { "epoch": 0.8110650688766939, "grad_norm": 1.3352257013320923, "learning_rate": 7.238000000000001e-05, "loss": 0.3667, "step": 14484 }, { "epoch": 0.811121066188823, "grad_norm": 1.3583341836929321, "learning_rate": 7.2385e-05, "loss": 0.3491, "step": 14485 }, { "epoch": 0.811177063500952, "grad_norm": 1.2811883687973022, "learning_rate": 7.239e-05, "loss": 0.3825, "step": 14486 }, { "epoch": 0.811233060813081, "grad_norm": 1.3344007730484009, "learning_rate": 7.2395e-05, "loss": 0.4904, "step": 14487 }, { "epoch": 0.81128905812521, "grad_norm": 1.446425437927246, "learning_rate": 7.24e-05, "loss": 0.5377, "step": 14488 }, { "epoch": 0.811345055437339, "grad_norm": 1.0479023456573486, "learning_rate": 7.2405e-05, "loss": 0.3216, "step": 14489 }, { "epoch": 0.811401052749468, "grad_norm": 1.2680141925811768, "learning_rate": 7.241e-05, "loss": 0.4654, "step": 14490 }, { "epoch": 0.8114570500615971, "grad_norm": 1.3281927108764648, "learning_rate": 7.2415e-05, "loss": 0.4584, "step": 14491 }, { "epoch": 0.8115130473737261, "grad_norm": 1.3772964477539062, "learning_rate": 7.242e-05, "loss": 0.375, "step": 14492 }, { "epoch": 0.8115690446858551, "grad_norm": 1.623201847076416, "learning_rate": 7.2425e-05, "loss": 0.4131, "step": 14493 }, { "epoch": 0.8116250419979841, "grad_norm": 1.2621368169784546, "learning_rate": 7.243000000000001e-05, "loss": 0.3265, "step": 14494 }, { "epoch": 0.8116810393101132, "grad_norm": 1.3126702308654785, "learning_rate": 7.243500000000001e-05, "loss": 0.3985, "step": 14495 }, { "epoch": 0.8117370366222422, "grad_norm": 1.4472118616104126, "learning_rate": 7.244e-05, "loss": 0.3782, "step": 14496 }, { "epoch": 0.8117930339343712, "grad_norm": 1.2617502212524414, "learning_rate": 7.2445e-05, "loss": 0.3656, "step": 14497 }, { "epoch": 0.8118490312465002, "grad_norm": 1.3361226320266724, "learning_rate": 7.245000000000001e-05, "loss": 0.4149, "step": 14498 }, { "epoch": 0.8119050285586292, "grad_norm": 1.5286263227462769, "learning_rate": 7.245500000000001e-05, "loss": 0.5432, "step": 14499 }, { "epoch": 0.8119610258707582, "grad_norm": 1.36464262008667, "learning_rate": 7.246000000000001e-05, "loss": 0.3893, "step": 14500 }, { "epoch": 0.8120170231828873, "grad_norm": 1.3603463172912598, "learning_rate": 7.2465e-05, "loss": 0.3698, "step": 14501 }, { "epoch": 0.8120730204950163, "grad_norm": 1.6051450967788696, "learning_rate": 7.247e-05, "loss": 0.5028, "step": 14502 }, { "epoch": 0.8121290178071453, "grad_norm": 1.5369080305099487, "learning_rate": 7.2475e-05, "loss": 0.4999, "step": 14503 }, { "epoch": 0.8121850151192743, "grad_norm": 1.2656768560409546, "learning_rate": 7.248e-05, "loss": 0.3857, "step": 14504 }, { "epoch": 0.8122410124314033, "grad_norm": 1.2804858684539795, "learning_rate": 7.248500000000001e-05, "loss": 0.3913, "step": 14505 }, { "epoch": 0.8122970097435324, "grad_norm": 1.3018038272857666, "learning_rate": 7.249e-05, "loss": 0.3833, "step": 14506 }, { "epoch": 0.8123530070556614, "grad_norm": 1.3546369075775146, "learning_rate": 7.2495e-05, "loss": 0.4575, "step": 14507 }, { "epoch": 0.8124090043677904, "grad_norm": 1.5813671350479126, "learning_rate": 7.25e-05, "loss": 0.4787, "step": 14508 }, { "epoch": 0.8124650016799194, "grad_norm": 1.3506122827529907, "learning_rate": 7.2505e-05, "loss": 0.4252, "step": 14509 }, { "epoch": 0.8125209989920483, "grad_norm": 1.5459997653961182, "learning_rate": 7.251e-05, "loss": 0.479, "step": 14510 }, { "epoch": 0.8125769963041773, "grad_norm": 1.3185906410217285, "learning_rate": 7.2515e-05, "loss": 0.3693, "step": 14511 }, { "epoch": 0.8126329936163064, "grad_norm": 1.6713619232177734, "learning_rate": 7.252e-05, "loss": 0.4934, "step": 14512 }, { "epoch": 0.8126889909284354, "grad_norm": 1.160532832145691, "learning_rate": 7.2525e-05, "loss": 0.4034, "step": 14513 }, { "epoch": 0.8127449882405644, "grad_norm": 1.6146104335784912, "learning_rate": 7.253e-05, "loss": 0.3794, "step": 14514 }, { "epoch": 0.8128009855526934, "grad_norm": 1.3655707836151123, "learning_rate": 7.253500000000001e-05, "loss": 0.5995, "step": 14515 }, { "epoch": 0.8128569828648224, "grad_norm": 1.1747153997421265, "learning_rate": 7.254000000000001e-05, "loss": 0.4118, "step": 14516 }, { "epoch": 0.8129129801769515, "grad_norm": 1.23712158203125, "learning_rate": 7.2545e-05, "loss": 0.2912, "step": 14517 }, { "epoch": 0.8129689774890805, "grad_norm": 1.6816219091415405, "learning_rate": 7.255e-05, "loss": 0.4653, "step": 14518 }, { "epoch": 0.8130249748012095, "grad_norm": 1.5153015851974487, "learning_rate": 7.255500000000001e-05, "loss": 0.4508, "step": 14519 }, { "epoch": 0.8130809721133385, "grad_norm": 1.2474896907806396, "learning_rate": 7.256000000000001e-05, "loss": 0.4264, "step": 14520 }, { "epoch": 0.8131369694254675, "grad_norm": 1.249538540840149, "learning_rate": 7.256500000000001e-05, "loss": 0.4027, "step": 14521 }, { "epoch": 0.8131929667375966, "grad_norm": 1.1467176675796509, "learning_rate": 7.257e-05, "loss": 0.3656, "step": 14522 }, { "epoch": 0.8132489640497256, "grad_norm": 1.2952277660369873, "learning_rate": 7.2575e-05, "loss": 0.3393, "step": 14523 }, { "epoch": 0.8133049613618546, "grad_norm": 1.2148288488388062, "learning_rate": 7.258e-05, "loss": 0.4045, "step": 14524 }, { "epoch": 0.8133609586739836, "grad_norm": 1.3868563175201416, "learning_rate": 7.2585e-05, "loss": 0.5312, "step": 14525 }, { "epoch": 0.8134169559861126, "grad_norm": 1.3878198862075806, "learning_rate": 7.259000000000001e-05, "loss": 0.5242, "step": 14526 }, { "epoch": 0.8134729532982417, "grad_norm": 1.3271074295043945, "learning_rate": 7.2595e-05, "loss": 0.3862, "step": 14527 }, { "epoch": 0.8135289506103707, "grad_norm": 1.4036977291107178, "learning_rate": 7.26e-05, "loss": 0.5575, "step": 14528 }, { "epoch": 0.8135849479224997, "grad_norm": 1.1951898336410522, "learning_rate": 7.2605e-05, "loss": 0.3614, "step": 14529 }, { "epoch": 0.8136409452346287, "grad_norm": 1.2342334985733032, "learning_rate": 7.261e-05, "loss": 0.4671, "step": 14530 }, { "epoch": 0.8136969425467577, "grad_norm": 1.5239843130111694, "learning_rate": 7.2615e-05, "loss": 0.5482, "step": 14531 }, { "epoch": 0.8137529398588867, "grad_norm": 1.4657853841781616, "learning_rate": 7.261999999999999e-05, "loss": 0.4421, "step": 14532 }, { "epoch": 0.8138089371710158, "grad_norm": 1.2028380632400513, "learning_rate": 7.2625e-05, "loss": 0.4187, "step": 14533 }, { "epoch": 0.8138649344831448, "grad_norm": 1.6086699962615967, "learning_rate": 7.263e-05, "loss": 0.5446, "step": 14534 }, { "epoch": 0.8139209317952738, "grad_norm": 1.3743810653686523, "learning_rate": 7.263500000000001e-05, "loss": 0.4005, "step": 14535 }, { "epoch": 0.8139769291074028, "grad_norm": 1.5481847524642944, "learning_rate": 7.264000000000001e-05, "loss": 0.5506, "step": 14536 }, { "epoch": 0.8140329264195318, "grad_norm": 1.3572007417678833, "learning_rate": 7.2645e-05, "loss": 0.3101, "step": 14537 }, { "epoch": 0.8140889237316609, "grad_norm": 1.2318710088729858, "learning_rate": 7.265e-05, "loss": 0.3469, "step": 14538 }, { "epoch": 0.8141449210437899, "grad_norm": 1.2858757972717285, "learning_rate": 7.2655e-05, "loss": 0.3661, "step": 14539 }, { "epoch": 0.8142009183559189, "grad_norm": 1.3333518505096436, "learning_rate": 7.266000000000001e-05, "loss": 0.4861, "step": 14540 }, { "epoch": 0.8142569156680479, "grad_norm": 1.3598148822784424, "learning_rate": 7.266500000000001e-05, "loss": 0.4595, "step": 14541 }, { "epoch": 0.8143129129801769, "grad_norm": 1.4633821249008179, "learning_rate": 7.267000000000001e-05, "loss": 0.4354, "step": 14542 }, { "epoch": 0.814368910292306, "grad_norm": 1.4942946434020996, "learning_rate": 7.2675e-05, "loss": 0.5181, "step": 14543 }, { "epoch": 0.814424907604435, "grad_norm": 1.449614405632019, "learning_rate": 7.268e-05, "loss": 0.4668, "step": 14544 }, { "epoch": 0.814480904916564, "grad_norm": 1.1244574785232544, "learning_rate": 7.2685e-05, "loss": 0.4041, "step": 14545 }, { "epoch": 0.814536902228693, "grad_norm": 1.3382452726364136, "learning_rate": 7.269e-05, "loss": 0.4061, "step": 14546 }, { "epoch": 0.814592899540822, "grad_norm": 1.4895119667053223, "learning_rate": 7.269500000000001e-05, "loss": 0.4328, "step": 14547 }, { "epoch": 0.814648896852951, "grad_norm": 1.4116780757904053, "learning_rate": 7.27e-05, "loss": 0.5754, "step": 14548 }, { "epoch": 0.8147048941650801, "grad_norm": 1.4497257471084595, "learning_rate": 7.2705e-05, "loss": 0.5019, "step": 14549 }, { "epoch": 0.8147608914772091, "grad_norm": 1.269838571548462, "learning_rate": 7.271e-05, "loss": 0.4255, "step": 14550 }, { "epoch": 0.8148168887893381, "grad_norm": 1.9430867433547974, "learning_rate": 7.2715e-05, "loss": 0.5007, "step": 14551 }, { "epoch": 0.8148728861014671, "grad_norm": 1.705452561378479, "learning_rate": 7.272e-05, "loss": 0.5779, "step": 14552 }, { "epoch": 0.8149288834135962, "grad_norm": 1.1430672407150269, "learning_rate": 7.272499999999999e-05, "loss": 0.4957, "step": 14553 }, { "epoch": 0.8149848807257252, "grad_norm": 1.5716427564620972, "learning_rate": 7.273e-05, "loss": 0.5149, "step": 14554 }, { "epoch": 0.8150408780378542, "grad_norm": 1.5491863489151, "learning_rate": 7.273500000000001e-05, "loss": 0.4984, "step": 14555 }, { "epoch": 0.8150968753499832, "grad_norm": 1.7216907739639282, "learning_rate": 7.274000000000001e-05, "loss": 0.4223, "step": 14556 }, { "epoch": 0.8151528726621122, "grad_norm": 1.058103322982788, "learning_rate": 7.274500000000001e-05, "loss": 0.3283, "step": 14557 }, { "epoch": 0.8152088699742412, "grad_norm": 1.297997236251831, "learning_rate": 7.275e-05, "loss": 0.4349, "step": 14558 }, { "epoch": 0.8152648672863703, "grad_norm": 1.323997139930725, "learning_rate": 7.2755e-05, "loss": 0.4357, "step": 14559 }, { "epoch": 0.8153208645984993, "grad_norm": 1.1607613563537598, "learning_rate": 7.276e-05, "loss": 0.3407, "step": 14560 }, { "epoch": 0.8153768619106283, "grad_norm": 2.034667730331421, "learning_rate": 7.2765e-05, "loss": 0.5754, "step": 14561 }, { "epoch": 0.8154328592227573, "grad_norm": 1.323005199432373, "learning_rate": 7.277000000000001e-05, "loss": 0.5629, "step": 14562 }, { "epoch": 0.8154888565348863, "grad_norm": 1.2635260820388794, "learning_rate": 7.277500000000001e-05, "loss": 0.4229, "step": 14563 }, { "epoch": 0.8155448538470154, "grad_norm": 1.1667816638946533, "learning_rate": 7.278e-05, "loss": 0.3357, "step": 14564 }, { "epoch": 0.8156008511591444, "grad_norm": 1.0956239700317383, "learning_rate": 7.2785e-05, "loss": 0.3747, "step": 14565 }, { "epoch": 0.8156568484712734, "grad_norm": 3.721742868423462, "learning_rate": 7.279e-05, "loss": 0.452, "step": 14566 }, { "epoch": 0.8157128457834024, "grad_norm": 1.572638750076294, "learning_rate": 7.2795e-05, "loss": 0.5118, "step": 14567 }, { "epoch": 0.8157688430955314, "grad_norm": 1.690908432006836, "learning_rate": 7.280000000000001e-05, "loss": 0.6523, "step": 14568 }, { "epoch": 0.8158248404076605, "grad_norm": 1.463591456413269, "learning_rate": 7.2805e-05, "loss": 0.5766, "step": 14569 }, { "epoch": 0.8158808377197895, "grad_norm": 1.3467861413955688, "learning_rate": 7.281e-05, "loss": 0.4124, "step": 14570 }, { "epoch": 0.8159368350319185, "grad_norm": 1.3160570859909058, "learning_rate": 7.2815e-05, "loss": 0.4452, "step": 14571 }, { "epoch": 0.8159928323440475, "grad_norm": 1.3122376203536987, "learning_rate": 7.282e-05, "loss": 0.4212, "step": 14572 }, { "epoch": 0.8160488296561765, "grad_norm": 1.2728044986724854, "learning_rate": 7.2825e-05, "loss": 0.4936, "step": 14573 }, { "epoch": 0.8161048269683056, "grad_norm": 1.4045751094818115, "learning_rate": 7.282999999999999e-05, "loss": 0.4088, "step": 14574 }, { "epoch": 0.8161608242804346, "grad_norm": 1.1426284313201904, "learning_rate": 7.2835e-05, "loss": 0.3795, "step": 14575 }, { "epoch": 0.8162168215925636, "grad_norm": 1.2386523485183716, "learning_rate": 7.284000000000001e-05, "loss": 0.4168, "step": 14576 }, { "epoch": 0.8162728189046926, "grad_norm": 1.2142409086227417, "learning_rate": 7.284500000000001e-05, "loss": 0.398, "step": 14577 }, { "epoch": 0.8163288162168216, "grad_norm": 1.250333547592163, "learning_rate": 7.285000000000001e-05, "loss": 0.324, "step": 14578 }, { "epoch": 0.8163848135289506, "grad_norm": 1.2744383811950684, "learning_rate": 7.2855e-05, "loss": 0.4283, "step": 14579 }, { "epoch": 0.8164408108410797, "grad_norm": 1.6865350008010864, "learning_rate": 7.286e-05, "loss": 0.4068, "step": 14580 }, { "epoch": 0.8164968081532087, "grad_norm": 1.2743353843688965, "learning_rate": 7.2865e-05, "loss": 0.3827, "step": 14581 }, { "epoch": 0.8165528054653377, "grad_norm": 1.450026035308838, "learning_rate": 7.287e-05, "loss": 0.5665, "step": 14582 }, { "epoch": 0.8166088027774667, "grad_norm": 1.5260260105133057, "learning_rate": 7.287500000000001e-05, "loss": 0.3995, "step": 14583 }, { "epoch": 0.8166648000895957, "grad_norm": 2.230234146118164, "learning_rate": 7.288000000000001e-05, "loss": 0.5908, "step": 14584 }, { "epoch": 0.8167207974017248, "grad_norm": 1.2641435861587524, "learning_rate": 7.2885e-05, "loss": 0.3928, "step": 14585 }, { "epoch": 0.8167767947138538, "grad_norm": 1.340026617050171, "learning_rate": 7.289e-05, "loss": 0.4319, "step": 14586 }, { "epoch": 0.8168327920259828, "grad_norm": 1.2149425745010376, "learning_rate": 7.2895e-05, "loss": 0.3706, "step": 14587 }, { "epoch": 0.8168887893381118, "grad_norm": 1.1634408235549927, "learning_rate": 7.29e-05, "loss": 0.5095, "step": 14588 }, { "epoch": 0.8169447866502408, "grad_norm": 1.4276528358459473, "learning_rate": 7.290500000000001e-05, "loss": 0.4456, "step": 14589 }, { "epoch": 0.8170007839623699, "grad_norm": 1.5272024869918823, "learning_rate": 7.291e-05, "loss": 0.4431, "step": 14590 }, { "epoch": 0.8170567812744989, "grad_norm": 1.952351450920105, "learning_rate": 7.2915e-05, "loss": 0.611, "step": 14591 }, { "epoch": 0.8171127785866279, "grad_norm": 1.099005103111267, "learning_rate": 7.292e-05, "loss": 0.3382, "step": 14592 }, { "epoch": 0.8171687758987568, "grad_norm": 1.6788164377212524, "learning_rate": 7.2925e-05, "loss": 0.5231, "step": 14593 }, { "epoch": 0.8172247732108858, "grad_norm": 1.1630085706710815, "learning_rate": 7.293e-05, "loss": 0.3644, "step": 14594 }, { "epoch": 0.8172807705230148, "grad_norm": 1.2604320049285889, "learning_rate": 7.2935e-05, "loss": 0.519, "step": 14595 }, { "epoch": 0.8173367678351439, "grad_norm": 5.7034831047058105, "learning_rate": 7.294e-05, "loss": 0.4025, "step": 14596 }, { "epoch": 0.8173927651472729, "grad_norm": 1.554646372795105, "learning_rate": 7.294500000000001e-05, "loss": 0.4582, "step": 14597 }, { "epoch": 0.8174487624594019, "grad_norm": 1.5256388187408447, "learning_rate": 7.295000000000001e-05, "loss": 0.4781, "step": 14598 }, { "epoch": 0.8175047597715309, "grad_norm": 1.668744683265686, "learning_rate": 7.295500000000001e-05, "loss": 0.7531, "step": 14599 }, { "epoch": 0.8175607570836599, "grad_norm": 1.4327677488327026, "learning_rate": 7.296e-05, "loss": 0.7833, "step": 14600 }, { "epoch": 0.817616754395789, "grad_norm": 1.1870002746582031, "learning_rate": 7.2965e-05, "loss": 0.4161, "step": 14601 }, { "epoch": 0.817672751707918, "grad_norm": 1.5035710334777832, "learning_rate": 7.297e-05, "loss": 0.3995, "step": 14602 }, { "epoch": 0.817728749020047, "grad_norm": 1.594335913658142, "learning_rate": 7.2975e-05, "loss": 0.52, "step": 14603 }, { "epoch": 0.817784746332176, "grad_norm": 1.2316128015518188, "learning_rate": 7.298000000000001e-05, "loss": 0.424, "step": 14604 }, { "epoch": 0.817840743644305, "grad_norm": 1.5011049509048462, "learning_rate": 7.298500000000001e-05, "loss": 0.4467, "step": 14605 }, { "epoch": 0.817896740956434, "grad_norm": 1.3760048151016235, "learning_rate": 7.299e-05, "loss": 0.3854, "step": 14606 }, { "epoch": 0.8179527382685631, "grad_norm": 1.1981438398361206, "learning_rate": 7.2995e-05, "loss": 0.5182, "step": 14607 }, { "epoch": 0.8180087355806921, "grad_norm": 1.446954369544983, "learning_rate": 7.3e-05, "loss": 0.5173, "step": 14608 }, { "epoch": 0.8180647328928211, "grad_norm": 1.686732530593872, "learning_rate": 7.3005e-05, "loss": 0.56, "step": 14609 }, { "epoch": 0.8181207302049501, "grad_norm": 1.051320195198059, "learning_rate": 7.301e-05, "loss": 0.3697, "step": 14610 }, { "epoch": 0.8181767275170791, "grad_norm": 1.1930115222930908, "learning_rate": 7.3015e-05, "loss": 0.4837, "step": 14611 }, { "epoch": 0.8182327248292082, "grad_norm": 1.5659046173095703, "learning_rate": 7.302e-05, "loss": 0.4421, "step": 14612 }, { "epoch": 0.8182887221413372, "grad_norm": 1.5599424839019775, "learning_rate": 7.3025e-05, "loss": 0.4769, "step": 14613 }, { "epoch": 0.8183447194534662, "grad_norm": 1.2555923461914062, "learning_rate": 7.303e-05, "loss": 0.4743, "step": 14614 }, { "epoch": 0.8184007167655952, "grad_norm": 1.4767690896987915, "learning_rate": 7.303500000000001e-05, "loss": 0.4749, "step": 14615 }, { "epoch": 0.8184567140777242, "grad_norm": 1.3402944803237915, "learning_rate": 7.304e-05, "loss": 0.5187, "step": 14616 }, { "epoch": 0.8185127113898533, "grad_norm": 1.3602509498596191, "learning_rate": 7.3045e-05, "loss": 0.4312, "step": 14617 }, { "epoch": 0.8185687087019823, "grad_norm": 1.3979710340499878, "learning_rate": 7.305000000000001e-05, "loss": 0.4078, "step": 14618 }, { "epoch": 0.8186247060141113, "grad_norm": 1.2349612712860107, "learning_rate": 7.305500000000001e-05, "loss": 0.3946, "step": 14619 }, { "epoch": 0.8186807033262403, "grad_norm": 1.2803958654403687, "learning_rate": 7.306000000000001e-05, "loss": 0.3911, "step": 14620 }, { "epoch": 0.8187367006383693, "grad_norm": 1.5141361951828003, "learning_rate": 7.3065e-05, "loss": 0.4704, "step": 14621 }, { "epoch": 0.8187926979504984, "grad_norm": 1.7877403497695923, "learning_rate": 7.307e-05, "loss": 0.5888, "step": 14622 }, { "epoch": 0.8188486952626274, "grad_norm": 1.3140099048614502, "learning_rate": 7.3075e-05, "loss": 0.3834, "step": 14623 }, { "epoch": 0.8189046925747564, "grad_norm": 1.3405500650405884, "learning_rate": 7.308e-05, "loss": 0.4656, "step": 14624 }, { "epoch": 0.8189606898868854, "grad_norm": 1.3182367086410522, "learning_rate": 7.308500000000001e-05, "loss": 0.3299, "step": 14625 }, { "epoch": 0.8190166871990144, "grad_norm": 1.8528672456741333, "learning_rate": 7.309e-05, "loss": 0.5492, "step": 14626 }, { "epoch": 0.8190726845111435, "grad_norm": 1.2025448083877563, "learning_rate": 7.3095e-05, "loss": 0.4631, "step": 14627 }, { "epoch": 0.8191286818232725, "grad_norm": 1.1996395587921143, "learning_rate": 7.31e-05, "loss": 0.4278, "step": 14628 }, { "epoch": 0.8191846791354015, "grad_norm": 1.288961410522461, "learning_rate": 7.3105e-05, "loss": 0.4867, "step": 14629 }, { "epoch": 0.8192406764475305, "grad_norm": 1.4086191654205322, "learning_rate": 7.311e-05, "loss": 0.463, "step": 14630 }, { "epoch": 0.8192966737596595, "grad_norm": 1.3368200063705444, "learning_rate": 7.3115e-05, "loss": 0.4777, "step": 14631 }, { "epoch": 0.8193526710717886, "grad_norm": 0.9781476855278015, "learning_rate": 7.312e-05, "loss": 0.3705, "step": 14632 }, { "epoch": 0.8194086683839176, "grad_norm": 1.185136318206787, "learning_rate": 7.3125e-05, "loss": 0.5119, "step": 14633 }, { "epoch": 0.8194646656960466, "grad_norm": 1.1368544101715088, "learning_rate": 7.313e-05, "loss": 0.4276, "step": 14634 }, { "epoch": 0.8195206630081756, "grad_norm": 1.0748132467269897, "learning_rate": 7.3135e-05, "loss": 0.3838, "step": 14635 }, { "epoch": 0.8195766603203046, "grad_norm": 1.304819941520691, "learning_rate": 7.314000000000001e-05, "loss": 0.4756, "step": 14636 }, { "epoch": 0.8196326576324336, "grad_norm": 1.168513298034668, "learning_rate": 7.3145e-05, "loss": 0.3447, "step": 14637 }, { "epoch": 0.8196886549445627, "grad_norm": 1.4905389547348022, "learning_rate": 7.315e-05, "loss": 0.5992, "step": 14638 }, { "epoch": 0.8197446522566917, "grad_norm": 1.1134607791900635, "learning_rate": 7.315500000000001e-05, "loss": 0.4236, "step": 14639 }, { "epoch": 0.8198006495688207, "grad_norm": 1.1270670890808105, "learning_rate": 7.316000000000001e-05, "loss": 0.354, "step": 14640 }, { "epoch": 0.8198566468809497, "grad_norm": 1.2572729587554932, "learning_rate": 7.316500000000001e-05, "loss": 0.3315, "step": 14641 }, { "epoch": 0.8199126441930787, "grad_norm": 1.289846420288086, "learning_rate": 7.317e-05, "loss": 0.3378, "step": 14642 }, { "epoch": 0.8199686415052078, "grad_norm": 1.543160080909729, "learning_rate": 7.3175e-05, "loss": 0.5779, "step": 14643 }, { "epoch": 0.8200246388173368, "grad_norm": 1.4106050729751587, "learning_rate": 7.318e-05, "loss": 0.4014, "step": 14644 }, { "epoch": 0.8200806361294658, "grad_norm": 1.1911691427230835, "learning_rate": 7.3185e-05, "loss": 0.4199, "step": 14645 }, { "epoch": 0.8201366334415948, "grad_norm": 1.0628832578659058, "learning_rate": 7.319000000000001e-05, "loss": 0.4342, "step": 14646 }, { "epoch": 0.8201926307537238, "grad_norm": 1.2558344602584839, "learning_rate": 7.3195e-05, "loss": 0.4735, "step": 14647 }, { "epoch": 0.8202486280658529, "grad_norm": 1.5138523578643799, "learning_rate": 7.32e-05, "loss": 0.6924, "step": 14648 }, { "epoch": 0.8203046253779819, "grad_norm": 1.438155174255371, "learning_rate": 7.3205e-05, "loss": 0.4288, "step": 14649 }, { "epoch": 0.8203606226901109, "grad_norm": 1.302852749824524, "learning_rate": 7.321e-05, "loss": 0.4153, "step": 14650 }, { "epoch": 0.8204166200022399, "grad_norm": 1.169129729270935, "learning_rate": 7.3215e-05, "loss": 0.3858, "step": 14651 }, { "epoch": 0.8204726173143689, "grad_norm": 1.8877336978912354, "learning_rate": 7.322e-05, "loss": 0.5608, "step": 14652 }, { "epoch": 0.820528614626498, "grad_norm": 1.320983648300171, "learning_rate": 7.3225e-05, "loss": 0.4959, "step": 14653 }, { "epoch": 0.820584611938627, "grad_norm": 1.4541881084442139, "learning_rate": 7.323e-05, "loss": 0.5543, "step": 14654 }, { "epoch": 0.820640609250756, "grad_norm": 1.3759751319885254, "learning_rate": 7.3235e-05, "loss": 0.4852, "step": 14655 }, { "epoch": 0.820696606562885, "grad_norm": 1.4528175592422485, "learning_rate": 7.324000000000001e-05, "loss": 0.478, "step": 14656 }, { "epoch": 0.820752603875014, "grad_norm": 1.3313637971878052, "learning_rate": 7.324500000000001e-05, "loss": 0.4429, "step": 14657 }, { "epoch": 0.820808601187143, "grad_norm": 1.0332139730453491, "learning_rate": 7.325e-05, "loss": 0.2853, "step": 14658 }, { "epoch": 0.8208645984992721, "grad_norm": 1.4055781364440918, "learning_rate": 7.3255e-05, "loss": 0.4461, "step": 14659 }, { "epoch": 0.8209205958114011, "grad_norm": 1.1871622800827026, "learning_rate": 7.326e-05, "loss": 0.5183, "step": 14660 }, { "epoch": 0.8209765931235301, "grad_norm": 1.436928391456604, "learning_rate": 7.326500000000001e-05, "loss": 0.4143, "step": 14661 }, { "epoch": 0.8210325904356591, "grad_norm": 1.2425044775009155, "learning_rate": 7.327000000000001e-05, "loss": 0.4609, "step": 14662 }, { "epoch": 0.8210885877477881, "grad_norm": 1.4125325679779053, "learning_rate": 7.3275e-05, "loss": 0.4595, "step": 14663 }, { "epoch": 0.8211445850599172, "grad_norm": 1.0556050539016724, "learning_rate": 7.328e-05, "loss": 0.3873, "step": 14664 }, { "epoch": 0.8212005823720462, "grad_norm": 1.2359877824783325, "learning_rate": 7.3285e-05, "loss": 0.3886, "step": 14665 }, { "epoch": 0.8212565796841752, "grad_norm": 1.4855868816375732, "learning_rate": 7.329e-05, "loss": 0.5341, "step": 14666 }, { "epoch": 0.8213125769963042, "grad_norm": 3.715852975845337, "learning_rate": 7.329500000000001e-05, "loss": 0.4834, "step": 14667 }, { "epoch": 0.8213685743084332, "grad_norm": 1.4016079902648926, "learning_rate": 7.33e-05, "loss": 0.502, "step": 14668 }, { "epoch": 0.8214245716205623, "grad_norm": 1.375775694847107, "learning_rate": 7.3305e-05, "loss": 0.3553, "step": 14669 }, { "epoch": 0.8214805689326913, "grad_norm": 1.2052714824676514, "learning_rate": 7.331e-05, "loss": 0.3535, "step": 14670 }, { "epoch": 0.8215365662448203, "grad_norm": 1.445199966430664, "learning_rate": 7.3315e-05, "loss": 0.5214, "step": 14671 }, { "epoch": 0.8215925635569493, "grad_norm": 1.3662270307540894, "learning_rate": 7.332e-05, "loss": 0.4388, "step": 14672 }, { "epoch": 0.8216485608690783, "grad_norm": 1.2618768215179443, "learning_rate": 7.3325e-05, "loss": 0.6232, "step": 14673 }, { "epoch": 0.8217045581812074, "grad_norm": 1.0770246982574463, "learning_rate": 7.333e-05, "loss": 0.3527, "step": 14674 }, { "epoch": 0.8217605554933363, "grad_norm": 1.4090774059295654, "learning_rate": 7.3335e-05, "loss": 0.4901, "step": 14675 }, { "epoch": 0.8218165528054653, "grad_norm": 1.4112480878829956, "learning_rate": 7.334000000000001e-05, "loss": 0.4592, "step": 14676 }, { "epoch": 0.8218725501175943, "grad_norm": 1.2244633436203003, "learning_rate": 7.334500000000001e-05, "loss": 0.3995, "step": 14677 }, { "epoch": 0.8219285474297233, "grad_norm": 1.3836400508880615, "learning_rate": 7.335000000000001e-05, "loss": 0.4217, "step": 14678 }, { "epoch": 0.8219845447418523, "grad_norm": 1.394639015197754, "learning_rate": 7.3355e-05, "loss": 0.3731, "step": 14679 }, { "epoch": 0.8220405420539814, "grad_norm": 1.2811203002929688, "learning_rate": 7.336e-05, "loss": 0.4087, "step": 14680 }, { "epoch": 0.8220965393661104, "grad_norm": 1.214834213256836, "learning_rate": 7.3365e-05, "loss": 0.467, "step": 14681 }, { "epoch": 0.8221525366782394, "grad_norm": 1.3365193605422974, "learning_rate": 7.337000000000001e-05, "loss": 0.6025, "step": 14682 }, { "epoch": 0.8222085339903684, "grad_norm": 1.3410736322402954, "learning_rate": 7.337500000000001e-05, "loss": 0.4645, "step": 14683 }, { "epoch": 0.8222645313024974, "grad_norm": 1.3538296222686768, "learning_rate": 7.338e-05, "loss": 0.4959, "step": 14684 }, { "epoch": 0.8223205286146265, "grad_norm": 1.3033719062805176, "learning_rate": 7.3385e-05, "loss": 0.4946, "step": 14685 }, { "epoch": 0.8223765259267555, "grad_norm": 1.2510207891464233, "learning_rate": 7.339e-05, "loss": 0.5141, "step": 14686 }, { "epoch": 0.8224325232388845, "grad_norm": 1.329649567604065, "learning_rate": 7.3395e-05, "loss": 0.4308, "step": 14687 }, { "epoch": 0.8224885205510135, "grad_norm": 1.3454447984695435, "learning_rate": 7.340000000000001e-05, "loss": 0.4015, "step": 14688 }, { "epoch": 0.8225445178631425, "grad_norm": 1.414605975151062, "learning_rate": 7.3405e-05, "loss": 0.4734, "step": 14689 }, { "epoch": 0.8226005151752716, "grad_norm": 1.2809250354766846, "learning_rate": 7.341e-05, "loss": 0.2809, "step": 14690 }, { "epoch": 0.8226565124874006, "grad_norm": 1.1174598932266235, "learning_rate": 7.3415e-05, "loss": 0.402, "step": 14691 }, { "epoch": 0.8227125097995296, "grad_norm": 1.1026337146759033, "learning_rate": 7.342e-05, "loss": 0.5006, "step": 14692 }, { "epoch": 0.8227685071116586, "grad_norm": 1.4404120445251465, "learning_rate": 7.3425e-05, "loss": 0.5915, "step": 14693 }, { "epoch": 0.8228245044237876, "grad_norm": 1.2131141424179077, "learning_rate": 7.342999999999999e-05, "loss": 0.4807, "step": 14694 }, { "epoch": 0.8228805017359166, "grad_norm": 2.460108757019043, "learning_rate": 7.3435e-05, "loss": 0.4563, "step": 14695 }, { "epoch": 0.8229364990480457, "grad_norm": 1.672230839729309, "learning_rate": 7.344000000000002e-05, "loss": 0.5001, "step": 14696 }, { "epoch": 0.8229924963601747, "grad_norm": 1.2148809432983398, "learning_rate": 7.344500000000001e-05, "loss": 0.4161, "step": 14697 }, { "epoch": 0.8230484936723037, "grad_norm": 1.3984878063201904, "learning_rate": 7.345000000000001e-05, "loss": 0.4249, "step": 14698 }, { "epoch": 0.8231044909844327, "grad_norm": 1.5426331758499146, "learning_rate": 7.345500000000001e-05, "loss": 0.6712, "step": 14699 }, { "epoch": 0.8231604882965617, "grad_norm": 1.246315836906433, "learning_rate": 7.346e-05, "loss": 0.5127, "step": 14700 }, { "epoch": 0.8232164856086908, "grad_norm": 1.3528317213058472, "learning_rate": 7.3465e-05, "loss": 0.4185, "step": 14701 }, { "epoch": 0.8232724829208198, "grad_norm": 2.795318841934204, "learning_rate": 7.347e-05, "loss": 0.3799, "step": 14702 }, { "epoch": 0.8233284802329488, "grad_norm": 1.1334058046340942, "learning_rate": 7.347500000000001e-05, "loss": 0.4266, "step": 14703 }, { "epoch": 0.8233844775450778, "grad_norm": 1.4261080026626587, "learning_rate": 7.348000000000001e-05, "loss": 0.4662, "step": 14704 }, { "epoch": 0.8234404748572068, "grad_norm": 1.3351155519485474, "learning_rate": 7.3485e-05, "loss": 0.4606, "step": 14705 }, { "epoch": 0.8234964721693359, "grad_norm": 1.4264384508132935, "learning_rate": 7.349e-05, "loss": 0.4275, "step": 14706 }, { "epoch": 0.8235524694814649, "grad_norm": 1.2684037685394287, "learning_rate": 7.3495e-05, "loss": 0.4041, "step": 14707 }, { "epoch": 0.8236084667935939, "grad_norm": 1.1517019271850586, "learning_rate": 7.35e-05, "loss": 0.3418, "step": 14708 }, { "epoch": 0.8236644641057229, "grad_norm": 1.2840877771377563, "learning_rate": 7.3505e-05, "loss": 0.4765, "step": 14709 }, { "epoch": 0.8237204614178519, "grad_norm": 1.1933214664459229, "learning_rate": 7.351e-05, "loss": 0.4337, "step": 14710 }, { "epoch": 0.823776458729981, "grad_norm": 1.3279502391815186, "learning_rate": 7.3515e-05, "loss": 0.3734, "step": 14711 }, { "epoch": 0.82383245604211, "grad_norm": 1.504241943359375, "learning_rate": 7.352e-05, "loss": 0.3383, "step": 14712 }, { "epoch": 0.823888453354239, "grad_norm": 1.501292109489441, "learning_rate": 7.3525e-05, "loss": 0.4715, "step": 14713 }, { "epoch": 0.823944450666368, "grad_norm": 1.2501533031463623, "learning_rate": 7.353e-05, "loss": 0.3866, "step": 14714 }, { "epoch": 0.824000447978497, "grad_norm": 1.207091212272644, "learning_rate": 7.353499999999999e-05, "loss": 0.4442, "step": 14715 }, { "epoch": 0.824056445290626, "grad_norm": 1.4631602764129639, "learning_rate": 7.354e-05, "loss": 0.445, "step": 14716 }, { "epoch": 0.8241124426027551, "grad_norm": 1.2972570657730103, "learning_rate": 7.354500000000001e-05, "loss": 0.4775, "step": 14717 }, { "epoch": 0.8241684399148841, "grad_norm": 1.363891363143921, "learning_rate": 7.355000000000001e-05, "loss": 0.3694, "step": 14718 }, { "epoch": 0.8242244372270131, "grad_norm": 1.2534159421920776, "learning_rate": 7.355500000000001e-05, "loss": 0.3984, "step": 14719 }, { "epoch": 0.8242804345391421, "grad_norm": 2.2425355911254883, "learning_rate": 7.356000000000001e-05, "loss": 0.4672, "step": 14720 }, { "epoch": 0.8243364318512711, "grad_norm": 1.1252158880233765, "learning_rate": 7.3565e-05, "loss": 0.3602, "step": 14721 }, { "epoch": 0.8243924291634002, "grad_norm": 1.5778850317001343, "learning_rate": 7.357e-05, "loss": 0.5639, "step": 14722 }, { "epoch": 0.8244484264755292, "grad_norm": 1.4036636352539062, "learning_rate": 7.3575e-05, "loss": 0.5913, "step": 14723 }, { "epoch": 0.8245044237876582, "grad_norm": 1.3537404537200928, "learning_rate": 7.358000000000001e-05, "loss": 0.4174, "step": 14724 }, { "epoch": 0.8245604210997872, "grad_norm": 1.240222692489624, "learning_rate": 7.358500000000001e-05, "loss": 0.5879, "step": 14725 }, { "epoch": 0.8246164184119162, "grad_norm": 1.2327338457107544, "learning_rate": 7.359e-05, "loss": 0.4506, "step": 14726 }, { "epoch": 0.8246724157240453, "grad_norm": 1.5453565120697021, "learning_rate": 7.3595e-05, "loss": 0.4951, "step": 14727 }, { "epoch": 0.8247284130361743, "grad_norm": 1.184607744216919, "learning_rate": 7.36e-05, "loss": 0.3828, "step": 14728 }, { "epoch": 0.8247844103483033, "grad_norm": 1.2312238216400146, "learning_rate": 7.3605e-05, "loss": 0.4146, "step": 14729 }, { "epoch": 0.8248404076604323, "grad_norm": 1.4124467372894287, "learning_rate": 7.361e-05, "loss": 0.5357, "step": 14730 }, { "epoch": 0.8248964049725613, "grad_norm": 1.378571629524231, "learning_rate": 7.3615e-05, "loss": 0.4415, "step": 14731 }, { "epoch": 0.8249524022846904, "grad_norm": 1.1218804121017456, "learning_rate": 7.362e-05, "loss": 0.3804, "step": 14732 }, { "epoch": 0.8250083995968194, "grad_norm": 1.1442683935165405, "learning_rate": 7.3625e-05, "loss": 0.3825, "step": 14733 }, { "epoch": 0.8250643969089484, "grad_norm": 1.2137649059295654, "learning_rate": 7.363e-05, "loss": 0.3702, "step": 14734 }, { "epoch": 0.8251203942210774, "grad_norm": 1.3204543590545654, "learning_rate": 7.3635e-05, "loss": 0.4778, "step": 14735 }, { "epoch": 0.8251763915332064, "grad_norm": 1.361220359802246, "learning_rate": 7.364e-05, "loss": 0.3699, "step": 14736 }, { "epoch": 0.8252323888453355, "grad_norm": 1.2736396789550781, "learning_rate": 7.3645e-05, "loss": 0.4897, "step": 14737 }, { "epoch": 0.8252883861574645, "grad_norm": 1.2590088844299316, "learning_rate": 7.365e-05, "loss": 0.4012, "step": 14738 }, { "epoch": 0.8253443834695935, "grad_norm": 1.4426013231277466, "learning_rate": 7.365500000000001e-05, "loss": 0.5248, "step": 14739 }, { "epoch": 0.8254003807817225, "grad_norm": 1.2324867248535156, "learning_rate": 7.366000000000001e-05, "loss": 0.3447, "step": 14740 }, { "epoch": 0.8254563780938515, "grad_norm": 1.4885910749435425, "learning_rate": 7.366500000000001e-05, "loss": 0.3961, "step": 14741 }, { "epoch": 0.8255123754059805, "grad_norm": 1.3118256330490112, "learning_rate": 7.367e-05, "loss": 0.428, "step": 14742 }, { "epoch": 0.8255683727181096, "grad_norm": 1.4559918642044067, "learning_rate": 7.3675e-05, "loss": 0.5029, "step": 14743 }, { "epoch": 0.8256243700302386, "grad_norm": 1.3392595052719116, "learning_rate": 7.368e-05, "loss": 0.4989, "step": 14744 }, { "epoch": 0.8256803673423676, "grad_norm": 1.6044344902038574, "learning_rate": 7.368500000000001e-05, "loss": 0.5751, "step": 14745 }, { "epoch": 0.8257363646544966, "grad_norm": 1.5788519382476807, "learning_rate": 7.369000000000001e-05, "loss": 0.4325, "step": 14746 }, { "epoch": 0.8257923619666256, "grad_norm": 1.4509316682815552, "learning_rate": 7.3695e-05, "loss": 0.5399, "step": 14747 }, { "epoch": 0.8258483592787547, "grad_norm": 1.5302283763885498, "learning_rate": 7.37e-05, "loss": 0.4131, "step": 14748 }, { "epoch": 0.8259043565908837, "grad_norm": 1.2519524097442627, "learning_rate": 7.3705e-05, "loss": 0.523, "step": 14749 }, { "epoch": 0.8259603539030127, "grad_norm": 0.9893085360527039, "learning_rate": 7.371e-05, "loss": 0.3256, "step": 14750 }, { "epoch": 0.8260163512151417, "grad_norm": 1.1434038877487183, "learning_rate": 7.3715e-05, "loss": 0.318, "step": 14751 }, { "epoch": 0.8260723485272707, "grad_norm": 1.723732352256775, "learning_rate": 7.372e-05, "loss": 0.5284, "step": 14752 }, { "epoch": 0.8261283458393998, "grad_norm": 1.2246155738830566, "learning_rate": 7.3725e-05, "loss": 0.4872, "step": 14753 }, { "epoch": 0.8261843431515288, "grad_norm": 1.3805774450302124, "learning_rate": 7.373e-05, "loss": 0.4494, "step": 14754 }, { "epoch": 0.8262403404636578, "grad_norm": 1.4114630222320557, "learning_rate": 7.3735e-05, "loss": 0.484, "step": 14755 }, { "epoch": 0.8262963377757868, "grad_norm": 1.6827677488327026, "learning_rate": 7.374000000000001e-05, "loss": 0.5552, "step": 14756 }, { "epoch": 0.8263523350879158, "grad_norm": 1.3255912065505981, "learning_rate": 7.3745e-05, "loss": 0.4747, "step": 14757 }, { "epoch": 0.8264083324000447, "grad_norm": 1.1039899587631226, "learning_rate": 7.375e-05, "loss": 0.4505, "step": 14758 }, { "epoch": 0.8264643297121738, "grad_norm": 1.1913259029388428, "learning_rate": 7.3755e-05, "loss": 0.4721, "step": 14759 }, { "epoch": 0.8265203270243028, "grad_norm": 1.2081841230392456, "learning_rate": 7.376000000000001e-05, "loss": 0.3813, "step": 14760 }, { "epoch": 0.8265763243364318, "grad_norm": 1.3080998659133911, "learning_rate": 7.376500000000001e-05, "loss": 0.4688, "step": 14761 }, { "epoch": 0.8266323216485608, "grad_norm": 1.1383557319641113, "learning_rate": 7.377000000000001e-05, "loss": 0.3989, "step": 14762 }, { "epoch": 0.8266883189606898, "grad_norm": 1.406874179840088, "learning_rate": 7.3775e-05, "loss": 0.5117, "step": 14763 }, { "epoch": 0.8267443162728189, "grad_norm": 1.470577597618103, "learning_rate": 7.378e-05, "loss": 0.4923, "step": 14764 }, { "epoch": 0.8268003135849479, "grad_norm": 1.519362449645996, "learning_rate": 7.3785e-05, "loss": 0.4208, "step": 14765 }, { "epoch": 0.8268563108970769, "grad_norm": 1.3467808961868286, "learning_rate": 7.379000000000001e-05, "loss": 0.4447, "step": 14766 }, { "epoch": 0.8269123082092059, "grad_norm": 1.2084959745407104, "learning_rate": 7.379500000000001e-05, "loss": 0.4171, "step": 14767 }, { "epoch": 0.8269683055213349, "grad_norm": 1.4759771823883057, "learning_rate": 7.38e-05, "loss": 0.3606, "step": 14768 }, { "epoch": 0.827024302833464, "grad_norm": 1.2212929725646973, "learning_rate": 7.3805e-05, "loss": 0.4007, "step": 14769 }, { "epoch": 0.827080300145593, "grad_norm": 1.5924476385116577, "learning_rate": 7.381e-05, "loss": 0.3969, "step": 14770 }, { "epoch": 0.827136297457722, "grad_norm": 1.227035641670227, "learning_rate": 7.3815e-05, "loss": 0.5113, "step": 14771 }, { "epoch": 0.827192294769851, "grad_norm": 1.3617016077041626, "learning_rate": 7.382e-05, "loss": 0.4139, "step": 14772 }, { "epoch": 0.82724829208198, "grad_norm": 1.984702706336975, "learning_rate": 7.3825e-05, "loss": 0.5963, "step": 14773 }, { "epoch": 0.827304289394109, "grad_norm": 1.2086858749389648, "learning_rate": 7.383e-05, "loss": 0.4192, "step": 14774 }, { "epoch": 0.8273602867062381, "grad_norm": 1.24480140209198, "learning_rate": 7.3835e-05, "loss": 0.4102, "step": 14775 }, { "epoch": 0.8274162840183671, "grad_norm": 1.2117775678634644, "learning_rate": 7.384e-05, "loss": 0.5034, "step": 14776 }, { "epoch": 0.8274722813304961, "grad_norm": 1.4940330982208252, "learning_rate": 7.384500000000001e-05, "loss": 0.4258, "step": 14777 }, { "epoch": 0.8275282786426251, "grad_norm": 1.0816054344177246, "learning_rate": 7.385e-05, "loss": 0.3952, "step": 14778 }, { "epoch": 0.8275842759547541, "grad_norm": 1.1663109064102173, "learning_rate": 7.3855e-05, "loss": 0.3236, "step": 14779 }, { "epoch": 0.8276402732668832, "grad_norm": 1.3746048212051392, "learning_rate": 7.386e-05, "loss": 0.3799, "step": 14780 }, { "epoch": 0.8276962705790122, "grad_norm": 1.4794203042984009, "learning_rate": 7.386500000000001e-05, "loss": 0.4392, "step": 14781 }, { "epoch": 0.8277522678911412, "grad_norm": 1.4070956707000732, "learning_rate": 7.387000000000001e-05, "loss": 0.4524, "step": 14782 }, { "epoch": 0.8278082652032702, "grad_norm": 1.380784511566162, "learning_rate": 7.3875e-05, "loss": 0.3083, "step": 14783 }, { "epoch": 0.8278642625153992, "grad_norm": 1.4081639051437378, "learning_rate": 7.388e-05, "loss": 0.5004, "step": 14784 }, { "epoch": 0.8279202598275283, "grad_norm": 1.2172077894210815, "learning_rate": 7.3885e-05, "loss": 0.4313, "step": 14785 }, { "epoch": 0.8279762571396573, "grad_norm": 1.530921459197998, "learning_rate": 7.389e-05, "loss": 0.6821, "step": 14786 }, { "epoch": 0.8280322544517863, "grad_norm": 1.1566896438598633, "learning_rate": 7.3895e-05, "loss": 0.4739, "step": 14787 }, { "epoch": 0.8280882517639153, "grad_norm": 1.351818561553955, "learning_rate": 7.390000000000001e-05, "loss": 0.4297, "step": 14788 }, { "epoch": 0.8281442490760443, "grad_norm": 1.5123695135116577, "learning_rate": 7.3905e-05, "loss": 0.6097, "step": 14789 }, { "epoch": 0.8282002463881734, "grad_norm": 1.398011565208435, "learning_rate": 7.391e-05, "loss": 0.4429, "step": 14790 }, { "epoch": 0.8282562437003024, "grad_norm": 1.2961584329605103, "learning_rate": 7.3915e-05, "loss": 0.4447, "step": 14791 }, { "epoch": 0.8283122410124314, "grad_norm": 1.4222519397735596, "learning_rate": 7.392e-05, "loss": 0.5819, "step": 14792 }, { "epoch": 0.8283682383245604, "grad_norm": 1.1378669738769531, "learning_rate": 7.3925e-05, "loss": 0.3585, "step": 14793 }, { "epoch": 0.8284242356366894, "grad_norm": 1.3640563488006592, "learning_rate": 7.393e-05, "loss": 0.3788, "step": 14794 }, { "epoch": 0.8284802329488185, "grad_norm": 1.566734790802002, "learning_rate": 7.3935e-05, "loss": 0.3884, "step": 14795 }, { "epoch": 0.8285362302609475, "grad_norm": 1.1821701526641846, "learning_rate": 7.394e-05, "loss": 0.4079, "step": 14796 }, { "epoch": 0.8285922275730765, "grad_norm": 1.4555513858795166, "learning_rate": 7.394500000000001e-05, "loss": 0.4253, "step": 14797 }, { "epoch": 0.8286482248852055, "grad_norm": 1.151355266571045, "learning_rate": 7.395000000000001e-05, "loss": 0.5326, "step": 14798 }, { "epoch": 0.8287042221973345, "grad_norm": 1.3573161363601685, "learning_rate": 7.3955e-05, "loss": 0.4826, "step": 14799 }, { "epoch": 0.8287602195094635, "grad_norm": 1.3706074953079224, "learning_rate": 7.396e-05, "loss": 0.4708, "step": 14800 }, { "epoch": 0.8288162168215926, "grad_norm": 1.7695225477218628, "learning_rate": 7.3965e-05, "loss": 0.5706, "step": 14801 }, { "epoch": 0.8288722141337216, "grad_norm": 1.3132703304290771, "learning_rate": 7.397000000000001e-05, "loss": 0.5201, "step": 14802 }, { "epoch": 0.8289282114458506, "grad_norm": 1.317012071609497, "learning_rate": 7.397500000000001e-05, "loss": 0.3876, "step": 14803 }, { "epoch": 0.8289842087579796, "grad_norm": 1.2215791940689087, "learning_rate": 7.398e-05, "loss": 0.4146, "step": 14804 }, { "epoch": 0.8290402060701086, "grad_norm": 1.2587159872055054, "learning_rate": 7.3985e-05, "loss": 0.5045, "step": 14805 }, { "epoch": 0.8290962033822377, "grad_norm": 16.493318557739258, "learning_rate": 7.399e-05, "loss": 0.3607, "step": 14806 }, { "epoch": 0.8291522006943667, "grad_norm": 1.1195287704467773, "learning_rate": 7.3995e-05, "loss": 0.3974, "step": 14807 }, { "epoch": 0.8292081980064957, "grad_norm": 1.345656394958496, "learning_rate": 7.4e-05, "loss": 0.4637, "step": 14808 }, { "epoch": 0.8292641953186247, "grad_norm": 1.370466947555542, "learning_rate": 7.400500000000001e-05, "loss": 0.4364, "step": 14809 }, { "epoch": 0.8293201926307537, "grad_norm": 1.2453478574752808, "learning_rate": 7.401e-05, "loss": 0.3906, "step": 14810 }, { "epoch": 0.8293761899428828, "grad_norm": 1.512919306755066, "learning_rate": 7.4015e-05, "loss": 0.4683, "step": 14811 }, { "epoch": 0.8294321872550118, "grad_norm": 1.2530479431152344, "learning_rate": 7.402e-05, "loss": 0.5021, "step": 14812 }, { "epoch": 0.8294881845671408, "grad_norm": 1.4404563903808594, "learning_rate": 7.4025e-05, "loss": 0.4307, "step": 14813 }, { "epoch": 0.8295441818792698, "grad_norm": 1.1528865098953247, "learning_rate": 7.403e-05, "loss": 0.4487, "step": 14814 }, { "epoch": 0.8296001791913988, "grad_norm": 1.3025373220443726, "learning_rate": 7.4035e-05, "loss": 0.4264, "step": 14815 }, { "epoch": 0.8296561765035279, "grad_norm": 1.4294639825820923, "learning_rate": 7.404e-05, "loss": 0.4792, "step": 14816 }, { "epoch": 0.8297121738156569, "grad_norm": 1.3184431791305542, "learning_rate": 7.404500000000001e-05, "loss": 0.4518, "step": 14817 }, { "epoch": 0.8297681711277859, "grad_norm": 1.3912808895111084, "learning_rate": 7.405000000000001e-05, "loss": 0.4099, "step": 14818 }, { "epoch": 0.8298241684399149, "grad_norm": 1.3266398906707764, "learning_rate": 7.405500000000001e-05, "loss": 0.4376, "step": 14819 }, { "epoch": 0.8298801657520439, "grad_norm": 1.2280153036117554, "learning_rate": 7.406e-05, "loss": 0.4315, "step": 14820 }, { "epoch": 0.829936163064173, "grad_norm": 1.4548487663269043, "learning_rate": 7.4065e-05, "loss": 0.5355, "step": 14821 }, { "epoch": 0.829992160376302, "grad_norm": 1.5744315385818481, "learning_rate": 7.407e-05, "loss": 0.4781, "step": 14822 }, { "epoch": 0.830048157688431, "grad_norm": 1.4761401414871216, "learning_rate": 7.407500000000001e-05, "loss": 0.4804, "step": 14823 }, { "epoch": 0.83010415500056, "grad_norm": 1.4220203161239624, "learning_rate": 7.408000000000001e-05, "loss": 0.4612, "step": 14824 }, { "epoch": 0.830160152312689, "grad_norm": 1.3385324478149414, "learning_rate": 7.4085e-05, "loss": 0.4546, "step": 14825 }, { "epoch": 0.830216149624818, "grad_norm": 1.2253103256225586, "learning_rate": 7.409e-05, "loss": 0.3805, "step": 14826 }, { "epoch": 0.8302721469369471, "grad_norm": 1.189700961112976, "learning_rate": 7.4095e-05, "loss": 0.3957, "step": 14827 }, { "epoch": 0.8303281442490761, "grad_norm": 1.3408607244491577, "learning_rate": 7.41e-05, "loss": 0.4735, "step": 14828 }, { "epoch": 0.8303841415612051, "grad_norm": 1.4462240934371948, "learning_rate": 7.4105e-05, "loss": 0.4593, "step": 14829 }, { "epoch": 0.8304401388733341, "grad_norm": 1.2908709049224854, "learning_rate": 7.411000000000001e-05, "loss": 0.4502, "step": 14830 }, { "epoch": 0.8304961361854631, "grad_norm": 1.2498122453689575, "learning_rate": 7.4115e-05, "loss": 0.3867, "step": 14831 }, { "epoch": 0.8305521334975922, "grad_norm": 1.355921983718872, "learning_rate": 7.412e-05, "loss": 0.4423, "step": 14832 }, { "epoch": 0.8306081308097212, "grad_norm": 1.0706628561019897, "learning_rate": 7.4125e-05, "loss": 0.3468, "step": 14833 }, { "epoch": 0.8306641281218502, "grad_norm": 1.3038891553878784, "learning_rate": 7.413e-05, "loss": 0.3944, "step": 14834 }, { "epoch": 0.8307201254339792, "grad_norm": 1.3403329849243164, "learning_rate": 7.4135e-05, "loss": 0.4666, "step": 14835 }, { "epoch": 0.8307761227461082, "grad_norm": 11.348949432373047, "learning_rate": 7.414e-05, "loss": 0.502, "step": 14836 }, { "epoch": 0.8308321200582373, "grad_norm": 2.9683735370635986, "learning_rate": 7.4145e-05, "loss": 0.3808, "step": 14837 }, { "epoch": 0.8308881173703663, "grad_norm": 1.7411690950393677, "learning_rate": 7.415000000000001e-05, "loss": 0.6224, "step": 14838 }, { "epoch": 0.8309441146824953, "grad_norm": 1.753604769706726, "learning_rate": 7.415500000000001e-05, "loss": 0.5623, "step": 14839 }, { "epoch": 0.8310001119946243, "grad_norm": 1.29805588722229, "learning_rate": 7.416000000000001e-05, "loss": 0.4957, "step": 14840 }, { "epoch": 0.8310561093067532, "grad_norm": 1.355239987373352, "learning_rate": 7.4165e-05, "loss": 0.4066, "step": 14841 }, { "epoch": 0.8311121066188822, "grad_norm": 1.1854850053787231, "learning_rate": 7.417e-05, "loss": 0.4089, "step": 14842 }, { "epoch": 0.8311681039310113, "grad_norm": 1.3505926132202148, "learning_rate": 7.4175e-05, "loss": 0.4427, "step": 14843 }, { "epoch": 0.8312241012431403, "grad_norm": 1.7071194648742676, "learning_rate": 7.418000000000001e-05, "loss": 0.5477, "step": 14844 }, { "epoch": 0.8312800985552693, "grad_norm": 1.307734727859497, "learning_rate": 7.418500000000001e-05, "loss": 0.3538, "step": 14845 }, { "epoch": 0.8313360958673983, "grad_norm": 1.3063827753067017, "learning_rate": 7.419e-05, "loss": 0.4007, "step": 14846 }, { "epoch": 0.8313920931795273, "grad_norm": 1.40926194190979, "learning_rate": 7.4195e-05, "loss": 0.4593, "step": 14847 }, { "epoch": 0.8314480904916564, "grad_norm": 1.2591607570648193, "learning_rate": 7.42e-05, "loss": 0.4995, "step": 14848 }, { "epoch": 0.8315040878037854, "grad_norm": 1.5662548542022705, "learning_rate": 7.4205e-05, "loss": 0.5592, "step": 14849 }, { "epoch": 0.8315600851159144, "grad_norm": 1.2944042682647705, "learning_rate": 7.421e-05, "loss": 0.4518, "step": 14850 }, { "epoch": 0.8316160824280434, "grad_norm": 2.1192898750305176, "learning_rate": 7.421500000000001e-05, "loss": 0.4295, "step": 14851 }, { "epoch": 0.8316720797401724, "grad_norm": 1.2583346366882324, "learning_rate": 7.422e-05, "loss": 0.4781, "step": 14852 }, { "epoch": 0.8317280770523015, "grad_norm": 1.570609450340271, "learning_rate": 7.4225e-05, "loss": 0.6466, "step": 14853 }, { "epoch": 0.8317840743644305, "grad_norm": 1.2126221656799316, "learning_rate": 7.423e-05, "loss": 0.4425, "step": 14854 }, { "epoch": 0.8318400716765595, "grad_norm": 1.2975512742996216, "learning_rate": 7.4235e-05, "loss": 0.3918, "step": 14855 }, { "epoch": 0.8318960689886885, "grad_norm": 1.341474175453186, "learning_rate": 7.424e-05, "loss": 0.4896, "step": 14856 }, { "epoch": 0.8319520663008175, "grad_norm": 1.5290112495422363, "learning_rate": 7.4245e-05, "loss": 0.4209, "step": 14857 }, { "epoch": 0.8320080636129465, "grad_norm": 1.3061316013336182, "learning_rate": 7.425e-05, "loss": 0.4874, "step": 14858 }, { "epoch": 0.8320640609250756, "grad_norm": 1.4568842649459839, "learning_rate": 7.425500000000001e-05, "loss": 0.4203, "step": 14859 }, { "epoch": 0.8321200582372046, "grad_norm": 1.3135074377059937, "learning_rate": 7.426000000000001e-05, "loss": 0.4105, "step": 14860 }, { "epoch": 0.8321760555493336, "grad_norm": 1.4716944694519043, "learning_rate": 7.426500000000001e-05, "loss": 0.5895, "step": 14861 }, { "epoch": 0.8322320528614626, "grad_norm": 1.6510661840438843, "learning_rate": 7.427e-05, "loss": 0.476, "step": 14862 }, { "epoch": 0.8322880501735916, "grad_norm": 1.1166248321533203, "learning_rate": 7.4275e-05, "loss": 0.3829, "step": 14863 }, { "epoch": 0.8323440474857207, "grad_norm": 1.3078899383544922, "learning_rate": 7.428e-05, "loss": 0.419, "step": 14864 }, { "epoch": 0.8324000447978497, "grad_norm": 1.235997200012207, "learning_rate": 7.428500000000001e-05, "loss": 0.4324, "step": 14865 }, { "epoch": 0.8324560421099787, "grad_norm": 1.4174054861068726, "learning_rate": 7.429000000000001e-05, "loss": 0.4429, "step": 14866 }, { "epoch": 0.8325120394221077, "grad_norm": 1.2512991428375244, "learning_rate": 7.4295e-05, "loss": 0.5797, "step": 14867 }, { "epoch": 0.8325680367342367, "grad_norm": 1.4484134912490845, "learning_rate": 7.43e-05, "loss": 0.4633, "step": 14868 }, { "epoch": 0.8326240340463658, "grad_norm": 1.1724870204925537, "learning_rate": 7.4305e-05, "loss": 0.4174, "step": 14869 }, { "epoch": 0.8326800313584948, "grad_norm": 1.6425628662109375, "learning_rate": 7.431e-05, "loss": 0.4574, "step": 14870 }, { "epoch": 0.8327360286706238, "grad_norm": 1.5310993194580078, "learning_rate": 7.4315e-05, "loss": 0.4455, "step": 14871 }, { "epoch": 0.8327920259827528, "grad_norm": 1.3307411670684814, "learning_rate": 7.432e-05, "loss": 0.3753, "step": 14872 }, { "epoch": 0.8328480232948818, "grad_norm": 1.6938661336898804, "learning_rate": 7.4325e-05, "loss": 0.3857, "step": 14873 }, { "epoch": 0.8329040206070109, "grad_norm": 1.1644052267074585, "learning_rate": 7.433e-05, "loss": 0.37, "step": 14874 }, { "epoch": 0.8329600179191399, "grad_norm": 1.5414255857467651, "learning_rate": 7.4335e-05, "loss": 0.7109, "step": 14875 }, { "epoch": 0.8330160152312689, "grad_norm": 1.226073145866394, "learning_rate": 7.434e-05, "loss": 0.4286, "step": 14876 }, { "epoch": 0.8330720125433979, "grad_norm": 1.476332187652588, "learning_rate": 7.434500000000001e-05, "loss": 0.5368, "step": 14877 }, { "epoch": 0.8331280098555269, "grad_norm": 1.1078234910964966, "learning_rate": 7.435e-05, "loss": 0.3977, "step": 14878 }, { "epoch": 0.833184007167656, "grad_norm": 1.0875986814498901, "learning_rate": 7.4355e-05, "loss": 0.4219, "step": 14879 }, { "epoch": 0.833240004479785, "grad_norm": 1.0852160453796387, "learning_rate": 7.436000000000001e-05, "loss": 0.3652, "step": 14880 }, { "epoch": 0.833296001791914, "grad_norm": 1.3678065538406372, "learning_rate": 7.436500000000001e-05, "loss": 0.4264, "step": 14881 }, { "epoch": 0.833351999104043, "grad_norm": 1.3367291688919067, "learning_rate": 7.437000000000001e-05, "loss": 0.4543, "step": 14882 }, { "epoch": 0.833407996416172, "grad_norm": 1.0837889909744263, "learning_rate": 7.4375e-05, "loss": 0.3629, "step": 14883 }, { "epoch": 0.833463993728301, "grad_norm": 1.1942131519317627, "learning_rate": 7.438e-05, "loss": 0.4346, "step": 14884 }, { "epoch": 0.8335199910404301, "grad_norm": 1.2060596942901611, "learning_rate": 7.4385e-05, "loss": 0.4946, "step": 14885 }, { "epoch": 0.8335759883525591, "grad_norm": 1.5555088520050049, "learning_rate": 7.439e-05, "loss": 0.4262, "step": 14886 }, { "epoch": 0.8336319856646881, "grad_norm": 1.3655695915222168, "learning_rate": 7.439500000000001e-05, "loss": 0.4072, "step": 14887 }, { "epoch": 0.8336879829768171, "grad_norm": 1.4177491664886475, "learning_rate": 7.44e-05, "loss": 0.3854, "step": 14888 }, { "epoch": 0.8337439802889461, "grad_norm": 1.4612762928009033, "learning_rate": 7.4405e-05, "loss": 0.5113, "step": 14889 }, { "epoch": 0.8337999776010752, "grad_norm": 2.1552562713623047, "learning_rate": 7.441e-05, "loss": 0.4914, "step": 14890 }, { "epoch": 0.8338559749132042, "grad_norm": 1.421797752380371, "learning_rate": 7.4415e-05, "loss": 0.4435, "step": 14891 }, { "epoch": 0.8339119722253332, "grad_norm": 28.51599884033203, "learning_rate": 7.442e-05, "loss": 0.4078, "step": 14892 }, { "epoch": 0.8339679695374622, "grad_norm": 1.7614316940307617, "learning_rate": 7.4425e-05, "loss": 0.4645, "step": 14893 }, { "epoch": 0.8340239668495912, "grad_norm": 1.2875900268554688, "learning_rate": 7.443e-05, "loss": 0.3867, "step": 14894 }, { "epoch": 0.8340799641617203, "grad_norm": 1.457829236984253, "learning_rate": 7.4435e-05, "loss": 0.5773, "step": 14895 }, { "epoch": 0.8341359614738493, "grad_norm": 1.4923008680343628, "learning_rate": 7.444e-05, "loss": 0.5008, "step": 14896 }, { "epoch": 0.8341919587859783, "grad_norm": 1.265728235244751, "learning_rate": 7.4445e-05, "loss": 0.4255, "step": 14897 }, { "epoch": 0.8342479560981073, "grad_norm": 1.3114256858825684, "learning_rate": 7.445000000000001e-05, "loss": 0.4625, "step": 14898 }, { "epoch": 0.8343039534102363, "grad_norm": 1.353049874305725, "learning_rate": 7.4455e-05, "loss": 0.4348, "step": 14899 }, { "epoch": 0.8343599507223654, "grad_norm": 1.4858341217041016, "learning_rate": 7.446e-05, "loss": 0.5839, "step": 14900 }, { "epoch": 0.8344159480344944, "grad_norm": 1.201106071472168, "learning_rate": 7.446500000000001e-05, "loss": 0.5097, "step": 14901 }, { "epoch": 0.8344719453466234, "grad_norm": 1.4960027933120728, "learning_rate": 7.447000000000001e-05, "loss": 0.3893, "step": 14902 }, { "epoch": 0.8345279426587524, "grad_norm": 1.461047887802124, "learning_rate": 7.447500000000001e-05, "loss": 0.5777, "step": 14903 }, { "epoch": 0.8345839399708814, "grad_norm": 1.317950963973999, "learning_rate": 7.448e-05, "loss": 0.3558, "step": 14904 }, { "epoch": 0.8346399372830104, "grad_norm": 1.1693061590194702, "learning_rate": 7.4485e-05, "loss": 0.3988, "step": 14905 }, { "epoch": 0.8346959345951395, "grad_norm": 1.4293357133865356, "learning_rate": 7.449e-05, "loss": 0.3711, "step": 14906 }, { "epoch": 0.8347519319072685, "grad_norm": 1.3089927434921265, "learning_rate": 7.4495e-05, "loss": 0.4544, "step": 14907 }, { "epoch": 0.8348079292193975, "grad_norm": 1.3952184915542603, "learning_rate": 7.450000000000001e-05, "loss": 0.5343, "step": 14908 }, { "epoch": 0.8348639265315265, "grad_norm": 1.7103863954544067, "learning_rate": 7.4505e-05, "loss": 0.4547, "step": 14909 }, { "epoch": 0.8349199238436555, "grad_norm": 1.2795830965042114, "learning_rate": 7.451e-05, "loss": 0.4001, "step": 14910 }, { "epoch": 0.8349759211557846, "grad_norm": 1.4509633779525757, "learning_rate": 7.4515e-05, "loss": 0.5354, "step": 14911 }, { "epoch": 0.8350319184679136, "grad_norm": 1.3074486255645752, "learning_rate": 7.452e-05, "loss": 0.3619, "step": 14912 }, { "epoch": 0.8350879157800426, "grad_norm": 1.2817742824554443, "learning_rate": 7.4525e-05, "loss": 0.4242, "step": 14913 }, { "epoch": 0.8351439130921716, "grad_norm": 1.4056373834609985, "learning_rate": 7.453e-05, "loss": 0.546, "step": 14914 }, { "epoch": 0.8351999104043006, "grad_norm": 1.1839416027069092, "learning_rate": 7.4535e-05, "loss": 0.4322, "step": 14915 }, { "epoch": 0.8352559077164297, "grad_norm": 1.1238529682159424, "learning_rate": 7.454e-05, "loss": 0.3937, "step": 14916 }, { "epoch": 0.8353119050285587, "grad_norm": 1.6225190162658691, "learning_rate": 7.4545e-05, "loss": 0.5391, "step": 14917 }, { "epoch": 0.8353679023406877, "grad_norm": 1.2570643424987793, "learning_rate": 7.455000000000001e-05, "loss": 0.5262, "step": 14918 }, { "epoch": 0.8354238996528167, "grad_norm": 1.3023627996444702, "learning_rate": 7.455500000000001e-05, "loss": 0.5817, "step": 14919 }, { "epoch": 0.8354798969649457, "grad_norm": 1.1538527011871338, "learning_rate": 7.456e-05, "loss": 0.3635, "step": 14920 }, { "epoch": 0.8355358942770748, "grad_norm": 1.3623279333114624, "learning_rate": 7.4565e-05, "loss": 0.45, "step": 14921 }, { "epoch": 0.8355918915892038, "grad_norm": 1.2881258726119995, "learning_rate": 7.457000000000001e-05, "loss": 0.4108, "step": 14922 }, { "epoch": 0.8356478889013328, "grad_norm": 1.0918225049972534, "learning_rate": 7.457500000000001e-05, "loss": 0.3451, "step": 14923 }, { "epoch": 0.8357038862134617, "grad_norm": 1.4319071769714355, "learning_rate": 7.458000000000001e-05, "loss": 0.5995, "step": 14924 }, { "epoch": 0.8357598835255907, "grad_norm": 1.2443747520446777, "learning_rate": 7.4585e-05, "loss": 0.4229, "step": 14925 }, { "epoch": 0.8358158808377197, "grad_norm": 1.3476805686950684, "learning_rate": 7.459e-05, "loss": 0.3255, "step": 14926 }, { "epoch": 0.8358718781498488, "grad_norm": 1.546398401260376, "learning_rate": 7.4595e-05, "loss": 0.4513, "step": 14927 }, { "epoch": 0.8359278754619778, "grad_norm": 1.3001435995101929, "learning_rate": 7.46e-05, "loss": 0.4581, "step": 14928 }, { "epoch": 0.8359838727741068, "grad_norm": 1.1330454349517822, "learning_rate": 7.460500000000001e-05, "loss": 0.3801, "step": 14929 }, { "epoch": 0.8360398700862358, "grad_norm": 1.2715420722961426, "learning_rate": 7.461e-05, "loss": 0.3954, "step": 14930 }, { "epoch": 0.8360958673983648, "grad_norm": 1.3216328620910645, "learning_rate": 7.4615e-05, "loss": 0.4547, "step": 14931 }, { "epoch": 0.8361518647104939, "grad_norm": 1.295122742652893, "learning_rate": 7.462e-05, "loss": 0.5081, "step": 14932 }, { "epoch": 0.8362078620226229, "grad_norm": 1.2410364151000977, "learning_rate": 7.4625e-05, "loss": 0.3546, "step": 14933 }, { "epoch": 0.8362638593347519, "grad_norm": 1.381272554397583, "learning_rate": 7.463e-05, "loss": 0.4826, "step": 14934 }, { "epoch": 0.8363198566468809, "grad_norm": 1.2284221649169922, "learning_rate": 7.463499999999999e-05, "loss": 0.4441, "step": 14935 }, { "epoch": 0.8363758539590099, "grad_norm": 1.4489960670471191, "learning_rate": 7.464e-05, "loss": 0.4896, "step": 14936 }, { "epoch": 0.836431851271139, "grad_norm": 1.1005908250808716, "learning_rate": 7.4645e-05, "loss": 0.3141, "step": 14937 }, { "epoch": 0.836487848583268, "grad_norm": 1.5796070098876953, "learning_rate": 7.465000000000001e-05, "loss": 0.5244, "step": 14938 }, { "epoch": 0.836543845895397, "grad_norm": 1.3035825490951538, "learning_rate": 7.465500000000001e-05, "loss": 0.3867, "step": 14939 }, { "epoch": 0.836599843207526, "grad_norm": 1.3744882345199585, "learning_rate": 7.466000000000001e-05, "loss": 0.4519, "step": 14940 }, { "epoch": 0.836655840519655, "grad_norm": 1.293918490409851, "learning_rate": 7.4665e-05, "loss": 0.4301, "step": 14941 }, { "epoch": 0.836711837831784, "grad_norm": 1.397953987121582, "learning_rate": 7.467e-05, "loss": 0.3475, "step": 14942 }, { "epoch": 0.8367678351439131, "grad_norm": 1.3367972373962402, "learning_rate": 7.467500000000001e-05, "loss": 0.4344, "step": 14943 }, { "epoch": 0.8368238324560421, "grad_norm": 1.3484013080596924, "learning_rate": 7.468000000000001e-05, "loss": 0.4412, "step": 14944 }, { "epoch": 0.8368798297681711, "grad_norm": 1.146432638168335, "learning_rate": 7.468500000000001e-05, "loss": 0.3475, "step": 14945 }, { "epoch": 0.8369358270803001, "grad_norm": 1.3615920543670654, "learning_rate": 7.469e-05, "loss": 0.4965, "step": 14946 }, { "epoch": 0.8369918243924291, "grad_norm": 1.2948979139328003, "learning_rate": 7.4695e-05, "loss": 0.4753, "step": 14947 }, { "epoch": 0.8370478217045582, "grad_norm": 1.672621726989746, "learning_rate": 7.47e-05, "loss": 0.3665, "step": 14948 }, { "epoch": 0.8371038190166872, "grad_norm": 1.4697469472885132, "learning_rate": 7.4705e-05, "loss": 0.5146, "step": 14949 }, { "epoch": 0.8371598163288162, "grad_norm": 1.4369267225265503, "learning_rate": 7.471000000000001e-05, "loss": 0.4534, "step": 14950 }, { "epoch": 0.8372158136409452, "grad_norm": 1.263033390045166, "learning_rate": 7.4715e-05, "loss": 0.3747, "step": 14951 }, { "epoch": 0.8372718109530742, "grad_norm": 1.2970012426376343, "learning_rate": 7.472e-05, "loss": 0.4123, "step": 14952 }, { "epoch": 0.8373278082652033, "grad_norm": 1.3430755138397217, "learning_rate": 7.4725e-05, "loss": 0.5154, "step": 14953 }, { "epoch": 0.8373838055773323, "grad_norm": 1.4384161233901978, "learning_rate": 7.473e-05, "loss": 0.4757, "step": 14954 }, { "epoch": 0.8374398028894613, "grad_norm": 1.372983694076538, "learning_rate": 7.4735e-05, "loss": 0.5302, "step": 14955 }, { "epoch": 0.8374958002015903, "grad_norm": 1.2524195909500122, "learning_rate": 7.473999999999999e-05, "loss": 0.3936, "step": 14956 }, { "epoch": 0.8375517975137193, "grad_norm": 1.3363986015319824, "learning_rate": 7.4745e-05, "loss": 0.5856, "step": 14957 }, { "epoch": 0.8376077948258484, "grad_norm": 1.2847176790237427, "learning_rate": 7.475000000000001e-05, "loss": 0.4496, "step": 14958 }, { "epoch": 0.8376637921379774, "grad_norm": 1.5691107511520386, "learning_rate": 7.475500000000001e-05, "loss": 0.8147, "step": 14959 }, { "epoch": 0.8377197894501064, "grad_norm": 1.3995745182037354, "learning_rate": 7.476000000000001e-05, "loss": 0.4678, "step": 14960 }, { "epoch": 0.8377757867622354, "grad_norm": 1.3471348285675049, "learning_rate": 7.4765e-05, "loss": 0.4194, "step": 14961 }, { "epoch": 0.8378317840743644, "grad_norm": 1.2455815076828003, "learning_rate": 7.477e-05, "loss": 0.3867, "step": 14962 }, { "epoch": 0.8378877813864934, "grad_norm": 1.4678806066513062, "learning_rate": 7.4775e-05, "loss": 0.4391, "step": 14963 }, { "epoch": 0.8379437786986225, "grad_norm": 1.3188345432281494, "learning_rate": 7.478e-05, "loss": 0.4947, "step": 14964 }, { "epoch": 0.8379997760107515, "grad_norm": 1.3664520978927612, "learning_rate": 7.478500000000001e-05, "loss": 0.4572, "step": 14965 }, { "epoch": 0.8380557733228805, "grad_norm": 1.267961025238037, "learning_rate": 7.479000000000001e-05, "loss": 0.3737, "step": 14966 }, { "epoch": 0.8381117706350095, "grad_norm": 1.320089340209961, "learning_rate": 7.4795e-05, "loss": 0.4796, "step": 14967 }, { "epoch": 0.8381677679471385, "grad_norm": 1.3131184577941895, "learning_rate": 7.48e-05, "loss": 0.4372, "step": 14968 }, { "epoch": 0.8382237652592676, "grad_norm": 1.4215002059936523, "learning_rate": 7.4805e-05, "loss": 0.4411, "step": 14969 }, { "epoch": 0.8382797625713966, "grad_norm": 1.3251869678497314, "learning_rate": 7.481e-05, "loss": 0.4582, "step": 14970 }, { "epoch": 0.8383357598835256, "grad_norm": 1.3846755027770996, "learning_rate": 7.481500000000001e-05, "loss": 0.5226, "step": 14971 }, { "epoch": 0.8383917571956546, "grad_norm": 1.3473392724990845, "learning_rate": 7.482e-05, "loss": 0.5661, "step": 14972 }, { "epoch": 0.8384477545077836, "grad_norm": 1.5047401189804077, "learning_rate": 7.4825e-05, "loss": 0.5873, "step": 14973 }, { "epoch": 0.8385037518199127, "grad_norm": 1.4454703330993652, "learning_rate": 7.483e-05, "loss": 0.6128, "step": 14974 }, { "epoch": 0.8385597491320417, "grad_norm": 1.2447439432144165, "learning_rate": 7.4835e-05, "loss": 0.3712, "step": 14975 }, { "epoch": 0.8386157464441707, "grad_norm": 1.2866668701171875, "learning_rate": 7.484e-05, "loss": 0.3689, "step": 14976 }, { "epoch": 0.8386717437562997, "grad_norm": 1.4072551727294922, "learning_rate": 7.484499999999999e-05, "loss": 0.4984, "step": 14977 }, { "epoch": 0.8387277410684287, "grad_norm": 1.4915257692337036, "learning_rate": 7.485e-05, "loss": 0.3813, "step": 14978 }, { "epoch": 0.8387837383805578, "grad_norm": 1.3246511220932007, "learning_rate": 7.485500000000001e-05, "loss": 0.4371, "step": 14979 }, { "epoch": 0.8388397356926868, "grad_norm": 1.0489832162857056, "learning_rate": 7.486000000000001e-05, "loss": 0.3958, "step": 14980 }, { "epoch": 0.8388957330048158, "grad_norm": 1.1089919805526733, "learning_rate": 7.486500000000001e-05, "loss": 0.3411, "step": 14981 }, { "epoch": 0.8389517303169448, "grad_norm": 1.6093395948410034, "learning_rate": 7.487e-05, "loss": 0.4069, "step": 14982 }, { "epoch": 0.8390077276290738, "grad_norm": 1.4661673307418823, "learning_rate": 7.4875e-05, "loss": 0.5336, "step": 14983 }, { "epoch": 0.8390637249412028, "grad_norm": 1.3630739450454712, "learning_rate": 7.488e-05, "loss": 0.4706, "step": 14984 }, { "epoch": 0.8391197222533319, "grad_norm": 1.1160573959350586, "learning_rate": 7.4885e-05, "loss": 0.3812, "step": 14985 }, { "epoch": 0.8391757195654609, "grad_norm": 1.460625410079956, "learning_rate": 7.489000000000001e-05, "loss": 0.615, "step": 14986 }, { "epoch": 0.8392317168775899, "grad_norm": 1.4919368028640747, "learning_rate": 7.489500000000001e-05, "loss": 0.6785, "step": 14987 }, { "epoch": 0.8392877141897189, "grad_norm": 1.2921801805496216, "learning_rate": 7.49e-05, "loss": 0.3843, "step": 14988 }, { "epoch": 0.8393437115018479, "grad_norm": 1.3943148851394653, "learning_rate": 7.4905e-05, "loss": 0.5812, "step": 14989 }, { "epoch": 0.839399708813977, "grad_norm": 1.2905611991882324, "learning_rate": 7.491e-05, "loss": 0.4547, "step": 14990 }, { "epoch": 0.839455706126106, "grad_norm": 1.599746823310852, "learning_rate": 7.4915e-05, "loss": 0.4198, "step": 14991 }, { "epoch": 0.839511703438235, "grad_norm": 1.3307349681854248, "learning_rate": 7.492000000000001e-05, "loss": 0.4406, "step": 14992 }, { "epoch": 0.839567700750364, "grad_norm": 1.2734228372573853, "learning_rate": 7.4925e-05, "loss": 0.5028, "step": 14993 }, { "epoch": 0.839623698062493, "grad_norm": 1.4966728687286377, "learning_rate": 7.493e-05, "loss": 0.4875, "step": 14994 }, { "epoch": 0.8396796953746221, "grad_norm": 1.447335958480835, "learning_rate": 7.4935e-05, "loss": 0.4251, "step": 14995 }, { "epoch": 0.8397356926867511, "grad_norm": 1.4132452011108398, "learning_rate": 7.494e-05, "loss": 0.5464, "step": 14996 }, { "epoch": 0.8397916899988801, "grad_norm": 1.608242154121399, "learning_rate": 7.4945e-05, "loss": 0.3826, "step": 14997 }, { "epoch": 0.8398476873110091, "grad_norm": 1.1616274118423462, "learning_rate": 7.495e-05, "loss": 0.3667, "step": 14998 }, { "epoch": 0.8399036846231381, "grad_norm": 1.2818100452423096, "learning_rate": 7.4955e-05, "loss": 0.4147, "step": 14999 }, { "epoch": 0.8399596819352672, "grad_norm": 1.1615766286849976, "learning_rate": 7.496000000000001e-05, "loss": 0.3461, "step": 15000 }, { "epoch": 0.8400156792473962, "grad_norm": 1.5612907409667969, "learning_rate": 7.496500000000001e-05, "loss": 0.4007, "step": 15001 }, { "epoch": 0.8400716765595252, "grad_norm": 1.4862791299819946, "learning_rate": 7.497000000000001e-05, "loss": 0.4315, "step": 15002 }, { "epoch": 0.8401276738716542, "grad_norm": 1.3636221885681152, "learning_rate": 7.4975e-05, "loss": 0.5253, "step": 15003 }, { "epoch": 0.8401836711837832, "grad_norm": 1.2589805126190186, "learning_rate": 7.498e-05, "loss": 0.3413, "step": 15004 }, { "epoch": 0.8402396684959123, "grad_norm": 1.2923073768615723, "learning_rate": 7.4985e-05, "loss": 0.3613, "step": 15005 }, { "epoch": 0.8402956658080412, "grad_norm": 1.5071004629135132, "learning_rate": 7.499e-05, "loss": 0.385, "step": 15006 }, { "epoch": 0.8403516631201702, "grad_norm": 1.355750560760498, "learning_rate": 7.499500000000001e-05, "loss": 0.3953, "step": 15007 }, { "epoch": 0.8404076604322992, "grad_norm": 1.3409547805786133, "learning_rate": 7.500000000000001e-05, "loss": 0.3501, "step": 15008 }, { "epoch": 0.8404636577444282, "grad_norm": 1.5930057764053345, "learning_rate": 7.5005e-05, "loss": 0.4344, "step": 15009 }, { "epoch": 0.8405196550565572, "grad_norm": 1.585240364074707, "learning_rate": 7.501e-05, "loss": 0.7247, "step": 15010 }, { "epoch": 0.8405756523686863, "grad_norm": 1.1724711656570435, "learning_rate": 7.5015e-05, "loss": 0.4556, "step": 15011 }, { "epoch": 0.8406316496808153, "grad_norm": 1.4890203475952148, "learning_rate": 7.502e-05, "loss": 0.5924, "step": 15012 }, { "epoch": 0.8406876469929443, "grad_norm": 1.615662932395935, "learning_rate": 7.502500000000001e-05, "loss": 0.4121, "step": 15013 }, { "epoch": 0.8407436443050733, "grad_norm": 1.2852932214736938, "learning_rate": 7.503e-05, "loss": 0.393, "step": 15014 }, { "epoch": 0.8407996416172023, "grad_norm": 1.6026692390441895, "learning_rate": 7.5035e-05, "loss": 0.4125, "step": 15015 }, { "epoch": 0.8408556389293314, "grad_norm": 1.4187570810317993, "learning_rate": 7.504e-05, "loss": 0.4448, "step": 15016 }, { "epoch": 0.8409116362414604, "grad_norm": 1.3563053607940674, "learning_rate": 7.5045e-05, "loss": 0.4576, "step": 15017 }, { "epoch": 0.8409676335535894, "grad_norm": 1.2016352415084839, "learning_rate": 7.505e-05, "loss": 0.4305, "step": 15018 }, { "epoch": 0.8410236308657184, "grad_norm": 1.245765209197998, "learning_rate": 7.5055e-05, "loss": 0.3862, "step": 15019 }, { "epoch": 0.8410796281778474, "grad_norm": 2.2996387481689453, "learning_rate": 7.506e-05, "loss": 0.5382, "step": 15020 }, { "epoch": 0.8411356254899764, "grad_norm": 1.327736735343933, "learning_rate": 7.506500000000001e-05, "loss": 0.5347, "step": 15021 }, { "epoch": 0.8411916228021055, "grad_norm": 1.5099338293075562, "learning_rate": 7.507000000000001e-05, "loss": 0.5358, "step": 15022 }, { "epoch": 0.8412476201142345, "grad_norm": 1.208007574081421, "learning_rate": 7.507500000000001e-05, "loss": 0.5175, "step": 15023 }, { "epoch": 0.8413036174263635, "grad_norm": 1.517074465751648, "learning_rate": 7.508e-05, "loss": 0.5982, "step": 15024 }, { "epoch": 0.8413596147384925, "grad_norm": 1.3720316886901855, "learning_rate": 7.5085e-05, "loss": 0.474, "step": 15025 }, { "epoch": 0.8414156120506215, "grad_norm": 1.3069417476654053, "learning_rate": 7.509e-05, "loss": 0.4922, "step": 15026 }, { "epoch": 0.8414716093627506, "grad_norm": 1.3628120422363281, "learning_rate": 7.5095e-05, "loss": 0.4216, "step": 15027 }, { "epoch": 0.8415276066748796, "grad_norm": 1.2434688806533813, "learning_rate": 7.510000000000001e-05, "loss": 0.629, "step": 15028 }, { "epoch": 0.8415836039870086, "grad_norm": 1.5129505395889282, "learning_rate": 7.510500000000001e-05, "loss": 0.422, "step": 15029 }, { "epoch": 0.8416396012991376, "grad_norm": 1.1953586339950562, "learning_rate": 7.511e-05, "loss": 0.4075, "step": 15030 }, { "epoch": 0.8416955986112666, "grad_norm": 2.293485403060913, "learning_rate": 7.5115e-05, "loss": 0.6196, "step": 15031 }, { "epoch": 0.8417515959233957, "grad_norm": 1.3731416463851929, "learning_rate": 7.512e-05, "loss": 0.4824, "step": 15032 }, { "epoch": 0.8418075932355247, "grad_norm": 1.2921339273452759, "learning_rate": 7.5125e-05, "loss": 0.4374, "step": 15033 }, { "epoch": 0.8418635905476537, "grad_norm": 1.2573421001434326, "learning_rate": 7.513e-05, "loss": 0.3117, "step": 15034 }, { "epoch": 0.8419195878597827, "grad_norm": 1.2236018180847168, "learning_rate": 7.5135e-05, "loss": 0.4238, "step": 15035 }, { "epoch": 0.8419755851719117, "grad_norm": 1.2526265382766724, "learning_rate": 7.514e-05, "loss": 0.3968, "step": 15036 }, { "epoch": 0.8420315824840408, "grad_norm": 1.4436498880386353, "learning_rate": 7.5145e-05, "loss": 0.4659, "step": 15037 }, { "epoch": 0.8420875797961698, "grad_norm": 1.447879433631897, "learning_rate": 7.515e-05, "loss": 0.4036, "step": 15038 }, { "epoch": 0.8421435771082988, "grad_norm": 1.3573596477508545, "learning_rate": 7.515500000000001e-05, "loss": 0.3824, "step": 15039 }, { "epoch": 0.8421995744204278, "grad_norm": 1.569237470626831, "learning_rate": 7.516e-05, "loss": 0.395, "step": 15040 }, { "epoch": 0.8422555717325568, "grad_norm": 1.4944884777069092, "learning_rate": 7.5165e-05, "loss": 0.4338, "step": 15041 }, { "epoch": 0.8423115690446858, "grad_norm": 1.386260747909546, "learning_rate": 7.517000000000001e-05, "loss": 0.5996, "step": 15042 }, { "epoch": 0.8423675663568149, "grad_norm": 2.475252389907837, "learning_rate": 7.517500000000001e-05, "loss": 0.5768, "step": 15043 }, { "epoch": 0.8424235636689439, "grad_norm": 1.4581243991851807, "learning_rate": 7.518000000000001e-05, "loss": 0.3723, "step": 15044 }, { "epoch": 0.8424795609810729, "grad_norm": 1.2736772298812866, "learning_rate": 7.5185e-05, "loss": 0.5126, "step": 15045 }, { "epoch": 0.8425355582932019, "grad_norm": 1.3051806688308716, "learning_rate": 7.519e-05, "loss": 0.4082, "step": 15046 }, { "epoch": 0.8425915556053309, "grad_norm": 1.5884802341461182, "learning_rate": 7.5195e-05, "loss": 0.4608, "step": 15047 }, { "epoch": 0.84264755291746, "grad_norm": 1.7127121686935425, "learning_rate": 7.52e-05, "loss": 0.5517, "step": 15048 }, { "epoch": 0.842703550229589, "grad_norm": 1.2679857015609741, "learning_rate": 7.520500000000001e-05, "loss": 0.4228, "step": 15049 }, { "epoch": 0.842759547541718, "grad_norm": 1.5982797145843506, "learning_rate": 7.521e-05, "loss": 0.5136, "step": 15050 }, { "epoch": 0.842815544853847, "grad_norm": 1.4622713327407837, "learning_rate": 7.5215e-05, "loss": 0.4572, "step": 15051 }, { "epoch": 0.842871542165976, "grad_norm": 1.3255765438079834, "learning_rate": 7.522e-05, "loss": 0.4086, "step": 15052 }, { "epoch": 0.8429275394781051, "grad_norm": 1.319079041481018, "learning_rate": 7.5225e-05, "loss": 0.4433, "step": 15053 }, { "epoch": 0.8429835367902341, "grad_norm": 1.2004789113998413, "learning_rate": 7.523e-05, "loss": 0.3553, "step": 15054 }, { "epoch": 0.8430395341023631, "grad_norm": 1.1800658702850342, "learning_rate": 7.5235e-05, "loss": 0.5275, "step": 15055 }, { "epoch": 0.8430955314144921, "grad_norm": 1.158860206604004, "learning_rate": 7.524e-05, "loss": 0.3951, "step": 15056 }, { "epoch": 0.8431515287266211, "grad_norm": 1.3727558851242065, "learning_rate": 7.5245e-05, "loss": 0.5408, "step": 15057 }, { "epoch": 0.8432075260387502, "grad_norm": 1.2245267629623413, "learning_rate": 7.525e-05, "loss": 0.4581, "step": 15058 }, { "epoch": 0.8432635233508792, "grad_norm": 1.5283687114715576, "learning_rate": 7.525500000000001e-05, "loss": 0.6485, "step": 15059 }, { "epoch": 0.8433195206630082, "grad_norm": 1.5196453332901, "learning_rate": 7.526000000000001e-05, "loss": 0.5316, "step": 15060 }, { "epoch": 0.8433755179751372, "grad_norm": 1.3572794198989868, "learning_rate": 7.5265e-05, "loss": 0.4793, "step": 15061 }, { "epoch": 0.8434315152872662, "grad_norm": 2.0908303260803223, "learning_rate": 7.527e-05, "loss": 0.432, "step": 15062 }, { "epoch": 0.8434875125993953, "grad_norm": 1.378475546836853, "learning_rate": 7.5275e-05, "loss": 0.4168, "step": 15063 }, { "epoch": 0.8435435099115243, "grad_norm": 1.2672103643417358, "learning_rate": 7.528000000000001e-05, "loss": 0.4288, "step": 15064 }, { "epoch": 0.8435995072236533, "grad_norm": 1.2985295057296753, "learning_rate": 7.528500000000001e-05, "loss": 0.4394, "step": 15065 }, { "epoch": 0.8436555045357823, "grad_norm": 1.3514337539672852, "learning_rate": 7.529e-05, "loss": 0.5229, "step": 15066 }, { "epoch": 0.8437115018479113, "grad_norm": 1.2348365783691406, "learning_rate": 7.5295e-05, "loss": 0.6591, "step": 15067 }, { "epoch": 0.8437674991600403, "grad_norm": 1.2063206434249878, "learning_rate": 7.53e-05, "loss": 0.4903, "step": 15068 }, { "epoch": 0.8438234964721694, "grad_norm": 1.1621588468551636, "learning_rate": 7.5305e-05, "loss": 0.5143, "step": 15069 }, { "epoch": 0.8438794937842984, "grad_norm": 1.2593555450439453, "learning_rate": 7.531000000000001e-05, "loss": 0.4102, "step": 15070 }, { "epoch": 0.8439354910964274, "grad_norm": 1.4822927713394165, "learning_rate": 7.5315e-05, "loss": 0.5137, "step": 15071 }, { "epoch": 0.8439914884085564, "grad_norm": 1.429641604423523, "learning_rate": 7.532e-05, "loss": 0.5827, "step": 15072 }, { "epoch": 0.8440474857206854, "grad_norm": 1.4930412769317627, "learning_rate": 7.5325e-05, "loss": 0.5212, "step": 15073 }, { "epoch": 0.8441034830328145, "grad_norm": 1.2361456155776978, "learning_rate": 7.533e-05, "loss": 0.4542, "step": 15074 }, { "epoch": 0.8441594803449435, "grad_norm": 1.2064471244812012, "learning_rate": 7.5335e-05, "loss": 0.3553, "step": 15075 }, { "epoch": 0.8442154776570725, "grad_norm": 1.3847750425338745, "learning_rate": 7.534e-05, "loss": 0.5201, "step": 15076 }, { "epoch": 0.8442714749692015, "grad_norm": 1.3404045104980469, "learning_rate": 7.5345e-05, "loss": 0.4421, "step": 15077 }, { "epoch": 0.8443274722813305, "grad_norm": 1.3262113332748413, "learning_rate": 7.535e-05, "loss": 0.5306, "step": 15078 }, { "epoch": 0.8443834695934596, "grad_norm": 1.6203056573867798, "learning_rate": 7.535500000000001e-05, "loss": 0.4103, "step": 15079 }, { "epoch": 0.8444394669055886, "grad_norm": 1.3159303665161133, "learning_rate": 7.536000000000001e-05, "loss": 0.4167, "step": 15080 }, { "epoch": 0.8444954642177176, "grad_norm": 1.398086428642273, "learning_rate": 7.536500000000001e-05, "loss": 0.514, "step": 15081 }, { "epoch": 0.8445514615298466, "grad_norm": 1.2203478813171387, "learning_rate": 7.537e-05, "loss": 0.3646, "step": 15082 }, { "epoch": 0.8446074588419756, "grad_norm": 1.266960859298706, "learning_rate": 7.5375e-05, "loss": 0.4621, "step": 15083 }, { "epoch": 0.8446634561541047, "grad_norm": 1.3884551525115967, "learning_rate": 7.538e-05, "loss": 0.4161, "step": 15084 }, { "epoch": 0.8447194534662337, "grad_norm": 1.5007433891296387, "learning_rate": 7.538500000000001e-05, "loss": 0.5216, "step": 15085 }, { "epoch": 0.8447754507783627, "grad_norm": 3.044949531555176, "learning_rate": 7.539000000000001e-05, "loss": 0.406, "step": 15086 }, { "epoch": 0.8448314480904917, "grad_norm": 1.3742420673370361, "learning_rate": 7.5395e-05, "loss": 0.5471, "step": 15087 }, { "epoch": 0.8448874454026207, "grad_norm": 1.5436530113220215, "learning_rate": 7.54e-05, "loss": 0.516, "step": 15088 }, { "epoch": 0.8449434427147496, "grad_norm": 1.3303571939468384, "learning_rate": 7.5405e-05, "loss": 0.4711, "step": 15089 }, { "epoch": 0.8449994400268787, "grad_norm": 1.3449771404266357, "learning_rate": 7.541e-05, "loss": 0.3937, "step": 15090 }, { "epoch": 0.8450554373390077, "grad_norm": 1.3330497741699219, "learning_rate": 7.541500000000001e-05, "loss": 0.3399, "step": 15091 }, { "epoch": 0.8451114346511367, "grad_norm": 1.2649072408676147, "learning_rate": 7.542e-05, "loss": 0.4364, "step": 15092 }, { "epoch": 0.8451674319632657, "grad_norm": 1.2320575714111328, "learning_rate": 7.5425e-05, "loss": 0.4069, "step": 15093 }, { "epoch": 0.8452234292753947, "grad_norm": 1.6531530618667603, "learning_rate": 7.543e-05, "loss": 0.4329, "step": 15094 }, { "epoch": 0.8452794265875238, "grad_norm": 1.2341358661651611, "learning_rate": 7.5435e-05, "loss": 0.3878, "step": 15095 }, { "epoch": 0.8453354238996528, "grad_norm": 2.1931910514831543, "learning_rate": 7.544e-05, "loss": 0.4141, "step": 15096 }, { "epoch": 0.8453914212117818, "grad_norm": 1.1421024799346924, "learning_rate": 7.5445e-05, "loss": 0.4015, "step": 15097 }, { "epoch": 0.8454474185239108, "grad_norm": 1.6613743305206299, "learning_rate": 7.545e-05, "loss": 0.4879, "step": 15098 }, { "epoch": 0.8455034158360398, "grad_norm": 1.3891104459762573, "learning_rate": 7.545500000000002e-05, "loss": 0.3483, "step": 15099 }, { "epoch": 0.8455594131481688, "grad_norm": 1.3898457288742065, "learning_rate": 7.546000000000001e-05, "loss": 0.6164, "step": 15100 }, { "epoch": 0.8456154104602979, "grad_norm": 1.2472468614578247, "learning_rate": 7.546500000000001e-05, "loss": 0.4381, "step": 15101 }, { "epoch": 0.8456714077724269, "grad_norm": 1.5568019151687622, "learning_rate": 7.547000000000001e-05, "loss": 0.5354, "step": 15102 }, { "epoch": 0.8457274050845559, "grad_norm": 1.1311805248260498, "learning_rate": 7.5475e-05, "loss": 0.4659, "step": 15103 }, { "epoch": 0.8457834023966849, "grad_norm": 1.4916313886642456, "learning_rate": 7.548e-05, "loss": 0.5287, "step": 15104 }, { "epoch": 0.8458393997088139, "grad_norm": 1.3321473598480225, "learning_rate": 7.5485e-05, "loss": 0.4549, "step": 15105 }, { "epoch": 0.845895397020943, "grad_norm": 1.2652349472045898, "learning_rate": 7.549000000000001e-05, "loss": 0.4562, "step": 15106 }, { "epoch": 0.845951394333072, "grad_norm": 1.383254885673523, "learning_rate": 7.549500000000001e-05, "loss": 0.3811, "step": 15107 }, { "epoch": 0.846007391645201, "grad_norm": 1.3633276224136353, "learning_rate": 7.55e-05, "loss": 0.4664, "step": 15108 }, { "epoch": 0.84606338895733, "grad_norm": 1.2604163885116577, "learning_rate": 7.5505e-05, "loss": 0.4612, "step": 15109 }, { "epoch": 0.846119386269459, "grad_norm": 1.3349034786224365, "learning_rate": 7.551e-05, "loss": 0.4329, "step": 15110 }, { "epoch": 0.8461753835815881, "grad_norm": 1.3420207500457764, "learning_rate": 7.5515e-05, "loss": 0.3875, "step": 15111 }, { "epoch": 0.8462313808937171, "grad_norm": 1.4629493951797485, "learning_rate": 7.552e-05, "loss": 0.4527, "step": 15112 }, { "epoch": 0.8462873782058461, "grad_norm": 1.209176778793335, "learning_rate": 7.5525e-05, "loss": 0.4839, "step": 15113 }, { "epoch": 0.8463433755179751, "grad_norm": 1.5998519659042358, "learning_rate": 7.553e-05, "loss": 0.4067, "step": 15114 }, { "epoch": 0.8463993728301041, "grad_norm": 1.3559772968292236, "learning_rate": 7.5535e-05, "loss": 0.3687, "step": 15115 }, { "epoch": 0.8464553701422332, "grad_norm": 3.905630350112915, "learning_rate": 7.554e-05, "loss": 0.3848, "step": 15116 }, { "epoch": 0.8465113674543622, "grad_norm": 1.3527567386627197, "learning_rate": 7.5545e-05, "loss": 0.4556, "step": 15117 }, { "epoch": 0.8465673647664912, "grad_norm": 1.3242403268814087, "learning_rate": 7.555e-05, "loss": 0.4031, "step": 15118 }, { "epoch": 0.8466233620786202, "grad_norm": 1.3410563468933105, "learning_rate": 7.5555e-05, "loss": 0.5529, "step": 15119 }, { "epoch": 0.8466793593907492, "grad_norm": 1.413779854774475, "learning_rate": 7.556000000000002e-05, "loss": 0.444, "step": 15120 }, { "epoch": 0.8467353567028783, "grad_norm": 1.4130003452301025, "learning_rate": 7.556500000000001e-05, "loss": 0.4252, "step": 15121 }, { "epoch": 0.8467913540150073, "grad_norm": 1.1051877737045288, "learning_rate": 7.557000000000001e-05, "loss": 0.3686, "step": 15122 }, { "epoch": 0.8468473513271363, "grad_norm": 1.4178258180618286, "learning_rate": 7.557500000000001e-05, "loss": 0.374, "step": 15123 }, { "epoch": 0.8469033486392653, "grad_norm": 1.5272403955459595, "learning_rate": 7.558e-05, "loss": 0.5934, "step": 15124 }, { "epoch": 0.8469593459513943, "grad_norm": 1.3880999088287354, "learning_rate": 7.5585e-05, "loss": 0.4118, "step": 15125 }, { "epoch": 0.8470153432635233, "grad_norm": 1.6361058950424194, "learning_rate": 7.559e-05, "loss": 0.5314, "step": 15126 }, { "epoch": 0.8470713405756524, "grad_norm": 1.244228720664978, "learning_rate": 7.559500000000001e-05, "loss": 0.4525, "step": 15127 }, { "epoch": 0.8471273378877814, "grad_norm": 2.738976001739502, "learning_rate": 7.560000000000001e-05, "loss": 0.4815, "step": 15128 }, { "epoch": 0.8471833351999104, "grad_norm": 1.1920626163482666, "learning_rate": 7.5605e-05, "loss": 0.4106, "step": 15129 }, { "epoch": 0.8472393325120394, "grad_norm": 1.376732587814331, "learning_rate": 7.561e-05, "loss": 0.4497, "step": 15130 }, { "epoch": 0.8472953298241684, "grad_norm": 1.3465228080749512, "learning_rate": 7.5615e-05, "loss": 0.4248, "step": 15131 }, { "epoch": 0.8473513271362975, "grad_norm": 1.2644684314727783, "learning_rate": 7.562e-05, "loss": 0.5093, "step": 15132 }, { "epoch": 0.8474073244484265, "grad_norm": 1.762965440750122, "learning_rate": 7.5625e-05, "loss": 0.446, "step": 15133 }, { "epoch": 0.8474633217605555, "grad_norm": 1.3486056327819824, "learning_rate": 7.563e-05, "loss": 0.4694, "step": 15134 }, { "epoch": 0.8475193190726845, "grad_norm": 1.1375880241394043, "learning_rate": 7.5635e-05, "loss": 0.3783, "step": 15135 }, { "epoch": 0.8475753163848135, "grad_norm": 1.2902443408966064, "learning_rate": 7.564e-05, "loss": 0.4339, "step": 15136 }, { "epoch": 0.8476313136969426, "grad_norm": 1.3332070112228394, "learning_rate": 7.5645e-05, "loss": 0.4528, "step": 15137 }, { "epoch": 0.8476873110090716, "grad_norm": 1.3735326528549194, "learning_rate": 7.565e-05, "loss": 0.421, "step": 15138 }, { "epoch": 0.8477433083212006, "grad_norm": 1.3245290517807007, "learning_rate": 7.565499999999999e-05, "loss": 0.4636, "step": 15139 }, { "epoch": 0.8477993056333296, "grad_norm": 1.122619867324829, "learning_rate": 7.566e-05, "loss": 0.3773, "step": 15140 }, { "epoch": 0.8478553029454586, "grad_norm": 1.3315730094909668, "learning_rate": 7.5665e-05, "loss": 0.378, "step": 15141 }, { "epoch": 0.8479113002575877, "grad_norm": 1.272456169128418, "learning_rate": 7.567000000000001e-05, "loss": 0.4726, "step": 15142 }, { "epoch": 0.8479672975697167, "grad_norm": 1.2605549097061157, "learning_rate": 7.567500000000001e-05, "loss": 0.4612, "step": 15143 }, { "epoch": 0.8480232948818457, "grad_norm": 1.3905287981033325, "learning_rate": 7.568000000000001e-05, "loss": 0.4521, "step": 15144 }, { "epoch": 0.8480792921939747, "grad_norm": 1.5169981718063354, "learning_rate": 7.5685e-05, "loss": 0.405, "step": 15145 }, { "epoch": 0.8481352895061037, "grad_norm": 1.1962016820907593, "learning_rate": 7.569e-05, "loss": 0.3827, "step": 15146 }, { "epoch": 0.8481912868182327, "grad_norm": 1.2600289583206177, "learning_rate": 7.5695e-05, "loss": 0.3677, "step": 15147 }, { "epoch": 0.8482472841303618, "grad_norm": 1.3277665376663208, "learning_rate": 7.570000000000001e-05, "loss": 0.5631, "step": 15148 }, { "epoch": 0.8483032814424908, "grad_norm": 1.6062098741531372, "learning_rate": 7.570500000000001e-05, "loss": 0.5264, "step": 15149 }, { "epoch": 0.8483592787546198, "grad_norm": 1.3468025922775269, "learning_rate": 7.571e-05, "loss": 0.4625, "step": 15150 }, { "epoch": 0.8484152760667488, "grad_norm": 1.3188852071762085, "learning_rate": 7.5715e-05, "loss": 0.4286, "step": 15151 }, { "epoch": 0.8484712733788778, "grad_norm": 1.5755048990249634, "learning_rate": 7.572e-05, "loss": 0.5279, "step": 15152 }, { "epoch": 0.8485272706910069, "grad_norm": 1.7589013576507568, "learning_rate": 7.5725e-05, "loss": 0.3309, "step": 15153 }, { "epoch": 0.8485832680031359, "grad_norm": 1.310774564743042, "learning_rate": 7.573e-05, "loss": 0.3594, "step": 15154 }, { "epoch": 0.8486392653152649, "grad_norm": 1.4295710325241089, "learning_rate": 7.5735e-05, "loss": 0.4235, "step": 15155 }, { "epoch": 0.8486952626273939, "grad_norm": 1.4306899309158325, "learning_rate": 7.574e-05, "loss": 0.4259, "step": 15156 }, { "epoch": 0.8487512599395229, "grad_norm": 1.4165624380111694, "learning_rate": 7.5745e-05, "loss": 0.5563, "step": 15157 }, { "epoch": 0.848807257251652, "grad_norm": 1.251570701599121, "learning_rate": 7.575e-05, "loss": 0.4512, "step": 15158 }, { "epoch": 0.848863254563781, "grad_norm": 1.3429430723190308, "learning_rate": 7.5755e-05, "loss": 0.4967, "step": 15159 }, { "epoch": 0.84891925187591, "grad_norm": 1.4190011024475098, "learning_rate": 7.576e-05, "loss": 0.4926, "step": 15160 }, { "epoch": 0.848975249188039, "grad_norm": 1.4082642793655396, "learning_rate": 7.5765e-05, "loss": 0.4795, "step": 15161 }, { "epoch": 0.849031246500168, "grad_norm": 1.2854467630386353, "learning_rate": 7.577e-05, "loss": 0.4818, "step": 15162 }, { "epoch": 0.849087243812297, "grad_norm": 1.1384862661361694, "learning_rate": 7.577500000000001e-05, "loss": 0.3906, "step": 15163 }, { "epoch": 0.8491432411244261, "grad_norm": 1.175520658493042, "learning_rate": 7.578000000000001e-05, "loss": 0.3959, "step": 15164 }, { "epoch": 0.8491992384365551, "grad_norm": 1.3986434936523438, "learning_rate": 7.578500000000001e-05, "loss": 0.6178, "step": 15165 }, { "epoch": 0.8492552357486841, "grad_norm": 1.4496233463287354, "learning_rate": 7.579e-05, "loss": 0.4642, "step": 15166 }, { "epoch": 0.8493112330608131, "grad_norm": 1.6438530683517456, "learning_rate": 7.5795e-05, "loss": 0.607, "step": 15167 }, { "epoch": 0.8493672303729422, "grad_norm": 1.8109285831451416, "learning_rate": 7.58e-05, "loss": 0.4636, "step": 15168 }, { "epoch": 0.8494232276850712, "grad_norm": 1.416939377784729, "learning_rate": 7.580500000000001e-05, "loss": 0.3799, "step": 15169 }, { "epoch": 0.8494792249972002, "grad_norm": 1.3079711198806763, "learning_rate": 7.581000000000001e-05, "loss": 0.4225, "step": 15170 }, { "epoch": 0.8495352223093292, "grad_norm": 1.6238187551498413, "learning_rate": 7.5815e-05, "loss": 0.5868, "step": 15171 }, { "epoch": 0.8495912196214581, "grad_norm": 1.2737096548080444, "learning_rate": 7.582e-05, "loss": 0.328, "step": 15172 }, { "epoch": 0.8496472169335871, "grad_norm": 1.239679217338562, "learning_rate": 7.5825e-05, "loss": 0.3602, "step": 15173 }, { "epoch": 0.8497032142457162, "grad_norm": 1.4801266193389893, "learning_rate": 7.583e-05, "loss": 0.3883, "step": 15174 }, { "epoch": 0.8497592115578452, "grad_norm": 1.7090065479278564, "learning_rate": 7.5835e-05, "loss": 0.5019, "step": 15175 }, { "epoch": 0.8498152088699742, "grad_norm": 1.245547890663147, "learning_rate": 7.584e-05, "loss": 0.3724, "step": 15176 }, { "epoch": 0.8498712061821032, "grad_norm": 1.361806869506836, "learning_rate": 7.5845e-05, "loss": 0.5009, "step": 15177 }, { "epoch": 0.8499272034942322, "grad_norm": 1.7137019634246826, "learning_rate": 7.585e-05, "loss": 0.6438, "step": 15178 }, { "epoch": 0.8499832008063612, "grad_norm": 1.3690975904464722, "learning_rate": 7.5855e-05, "loss": 0.4153, "step": 15179 }, { "epoch": 0.8500391981184903, "grad_norm": 1.3201725482940674, "learning_rate": 7.586000000000001e-05, "loss": 0.4907, "step": 15180 }, { "epoch": 0.8500951954306193, "grad_norm": 1.3027015924453735, "learning_rate": 7.5865e-05, "loss": 0.475, "step": 15181 }, { "epoch": 0.8501511927427483, "grad_norm": 1.2826457023620605, "learning_rate": 7.587e-05, "loss": 0.456, "step": 15182 }, { "epoch": 0.8502071900548773, "grad_norm": 1.317867398262024, "learning_rate": 7.5875e-05, "loss": 0.5059, "step": 15183 }, { "epoch": 0.8502631873670063, "grad_norm": 2.2673516273498535, "learning_rate": 7.588000000000001e-05, "loss": 0.6258, "step": 15184 }, { "epoch": 0.8503191846791354, "grad_norm": 1.1475238800048828, "learning_rate": 7.588500000000001e-05, "loss": 0.3576, "step": 15185 }, { "epoch": 0.8503751819912644, "grad_norm": 1.4983441829681396, "learning_rate": 7.589000000000001e-05, "loss": 0.5368, "step": 15186 }, { "epoch": 0.8504311793033934, "grad_norm": 1.2127751111984253, "learning_rate": 7.5895e-05, "loss": 0.4064, "step": 15187 }, { "epoch": 0.8504871766155224, "grad_norm": 1.471183180809021, "learning_rate": 7.59e-05, "loss": 0.4226, "step": 15188 }, { "epoch": 0.8505431739276514, "grad_norm": 1.3438286781311035, "learning_rate": 7.5905e-05, "loss": 0.4563, "step": 15189 }, { "epoch": 0.8505991712397805, "grad_norm": 1.1882891654968262, "learning_rate": 7.591e-05, "loss": 0.521, "step": 15190 }, { "epoch": 0.8506551685519095, "grad_norm": 1.0482243299484253, "learning_rate": 7.591500000000001e-05, "loss": 0.4479, "step": 15191 }, { "epoch": 0.8507111658640385, "grad_norm": 1.3335113525390625, "learning_rate": 7.592e-05, "loss": 0.3779, "step": 15192 }, { "epoch": 0.8507671631761675, "grad_norm": 1.2946522235870361, "learning_rate": 7.5925e-05, "loss": 0.4433, "step": 15193 }, { "epoch": 0.8508231604882965, "grad_norm": 1.3130769729614258, "learning_rate": 7.593e-05, "loss": 0.5303, "step": 15194 }, { "epoch": 0.8508791578004256, "grad_norm": 1.0719996690750122, "learning_rate": 7.5935e-05, "loss": 0.4205, "step": 15195 }, { "epoch": 0.8509351551125546, "grad_norm": 1.2909539937973022, "learning_rate": 7.594e-05, "loss": 0.4152, "step": 15196 }, { "epoch": 0.8509911524246836, "grad_norm": 1.605582594871521, "learning_rate": 7.5945e-05, "loss": 0.5042, "step": 15197 }, { "epoch": 0.8510471497368126, "grad_norm": 1.7889682054519653, "learning_rate": 7.595e-05, "loss": 0.4765, "step": 15198 }, { "epoch": 0.8511031470489416, "grad_norm": 1.495755910873413, "learning_rate": 7.5955e-05, "loss": 0.4569, "step": 15199 }, { "epoch": 0.8511591443610707, "grad_norm": 1.4306620359420776, "learning_rate": 7.596000000000001e-05, "loss": 0.5663, "step": 15200 }, { "epoch": 0.8512151416731997, "grad_norm": 1.3635423183441162, "learning_rate": 7.596500000000001e-05, "loss": 0.4369, "step": 15201 }, { "epoch": 0.8512711389853287, "grad_norm": 1.4563435316085815, "learning_rate": 7.597e-05, "loss": 0.4164, "step": 15202 }, { "epoch": 0.8513271362974577, "grad_norm": 1.192082166671753, "learning_rate": 7.5975e-05, "loss": 0.3142, "step": 15203 }, { "epoch": 0.8513831336095867, "grad_norm": 1.3374476432800293, "learning_rate": 7.598e-05, "loss": 0.3901, "step": 15204 }, { "epoch": 0.8514391309217157, "grad_norm": 1.3649131059646606, "learning_rate": 7.598500000000001e-05, "loss": 0.4348, "step": 15205 }, { "epoch": 0.8514951282338448, "grad_norm": 84.07178497314453, "learning_rate": 7.599000000000001e-05, "loss": 0.5072, "step": 15206 }, { "epoch": 0.8515511255459738, "grad_norm": 1.9043371677398682, "learning_rate": 7.5995e-05, "loss": 0.5572, "step": 15207 }, { "epoch": 0.8516071228581028, "grad_norm": 1.4176578521728516, "learning_rate": 7.6e-05, "loss": 0.5742, "step": 15208 }, { "epoch": 0.8516631201702318, "grad_norm": 1.2849376201629639, "learning_rate": 7.6005e-05, "loss": 0.437, "step": 15209 }, { "epoch": 0.8517191174823608, "grad_norm": 1.14579439163208, "learning_rate": 7.601e-05, "loss": 0.3665, "step": 15210 }, { "epoch": 0.8517751147944899, "grad_norm": 1.326216459274292, "learning_rate": 7.6015e-05, "loss": 0.4595, "step": 15211 }, { "epoch": 0.8518311121066189, "grad_norm": 1.5373824834823608, "learning_rate": 7.602000000000001e-05, "loss": 0.4353, "step": 15212 }, { "epoch": 0.8518871094187479, "grad_norm": 1.2989906072616577, "learning_rate": 7.6025e-05, "loss": 0.3503, "step": 15213 }, { "epoch": 0.8519431067308769, "grad_norm": 1.641305685043335, "learning_rate": 7.603e-05, "loss": 0.4397, "step": 15214 }, { "epoch": 0.8519991040430059, "grad_norm": 1.5231834650039673, "learning_rate": 7.6035e-05, "loss": 0.4273, "step": 15215 }, { "epoch": 0.852055101355135, "grad_norm": 6.989379405975342, "learning_rate": 7.604e-05, "loss": 0.3557, "step": 15216 }, { "epoch": 0.852111098667264, "grad_norm": 1.4232429265975952, "learning_rate": 7.6045e-05, "loss": 0.4825, "step": 15217 }, { "epoch": 0.852167095979393, "grad_norm": 8.40113353729248, "learning_rate": 7.605e-05, "loss": 0.3653, "step": 15218 }, { "epoch": 0.852223093291522, "grad_norm": 1.3674389123916626, "learning_rate": 7.6055e-05, "loss": 0.4705, "step": 15219 }, { "epoch": 0.852279090603651, "grad_norm": 1.4717456102371216, "learning_rate": 7.606000000000001e-05, "loss": 0.483, "step": 15220 }, { "epoch": 0.85233508791578, "grad_norm": 1.2045587301254272, "learning_rate": 7.606500000000001e-05, "loss": 0.3697, "step": 15221 }, { "epoch": 0.8523910852279091, "grad_norm": 1.6508337259292603, "learning_rate": 7.607000000000001e-05, "loss": 0.4677, "step": 15222 }, { "epoch": 0.8524470825400381, "grad_norm": 1.5064278841018677, "learning_rate": 7.6075e-05, "loss": 0.4107, "step": 15223 }, { "epoch": 0.8525030798521671, "grad_norm": 1.36526620388031, "learning_rate": 7.608e-05, "loss": 0.5868, "step": 15224 }, { "epoch": 0.8525590771642961, "grad_norm": 1.386756181716919, "learning_rate": 7.6085e-05, "loss": 0.4697, "step": 15225 }, { "epoch": 0.8526150744764251, "grad_norm": 1.6597256660461426, "learning_rate": 7.609000000000001e-05, "loss": 0.5056, "step": 15226 }, { "epoch": 0.8526710717885542, "grad_norm": 1.4845725297927856, "learning_rate": 7.609500000000001e-05, "loss": 0.4151, "step": 15227 }, { "epoch": 0.8527270691006832, "grad_norm": 1.6620419025421143, "learning_rate": 7.61e-05, "loss": 0.4783, "step": 15228 }, { "epoch": 0.8527830664128122, "grad_norm": 1.298387050628662, "learning_rate": 7.6105e-05, "loss": 0.4438, "step": 15229 }, { "epoch": 0.8528390637249412, "grad_norm": 1.2491012811660767, "learning_rate": 7.611e-05, "loss": 0.3297, "step": 15230 }, { "epoch": 0.8528950610370702, "grad_norm": 1.2535948753356934, "learning_rate": 7.6115e-05, "loss": 0.4548, "step": 15231 }, { "epoch": 0.8529510583491993, "grad_norm": 1.3734540939331055, "learning_rate": 7.612e-05, "loss": 0.3724, "step": 15232 }, { "epoch": 0.8530070556613283, "grad_norm": 1.4256091117858887, "learning_rate": 7.612500000000001e-05, "loss": 0.5826, "step": 15233 }, { "epoch": 0.8530630529734573, "grad_norm": 1.5222828388214111, "learning_rate": 7.613e-05, "loss": 0.5786, "step": 15234 }, { "epoch": 0.8531190502855863, "grad_norm": 1.5543458461761475, "learning_rate": 7.6135e-05, "loss": 0.5745, "step": 15235 }, { "epoch": 0.8531750475977153, "grad_norm": 1.5437970161437988, "learning_rate": 7.614e-05, "loss": 0.4744, "step": 15236 }, { "epoch": 0.8532310449098444, "grad_norm": 1.250725507736206, "learning_rate": 7.6145e-05, "loss": 0.4594, "step": 15237 }, { "epoch": 0.8532870422219734, "grad_norm": 1.443010926246643, "learning_rate": 7.615e-05, "loss": 0.3214, "step": 15238 }, { "epoch": 0.8533430395341024, "grad_norm": 1.4554890394210815, "learning_rate": 7.6155e-05, "loss": 0.5692, "step": 15239 }, { "epoch": 0.8533990368462314, "grad_norm": 1.387817621231079, "learning_rate": 7.616e-05, "loss": 0.3915, "step": 15240 }, { "epoch": 0.8534550341583604, "grad_norm": 1.4982044696807861, "learning_rate": 7.616500000000001e-05, "loss": 0.4794, "step": 15241 }, { "epoch": 0.8535110314704895, "grad_norm": 1.5566614866256714, "learning_rate": 7.617000000000001e-05, "loss": 0.5169, "step": 15242 }, { "epoch": 0.8535670287826185, "grad_norm": 1.342090368270874, "learning_rate": 7.617500000000001e-05, "loss": 0.5644, "step": 15243 }, { "epoch": 0.8536230260947475, "grad_norm": 1.3355064392089844, "learning_rate": 7.618e-05, "loss": 0.4928, "step": 15244 }, { "epoch": 0.8536790234068765, "grad_norm": 1.677080512046814, "learning_rate": 7.6185e-05, "loss": 0.4778, "step": 15245 }, { "epoch": 0.8537350207190055, "grad_norm": 1.5180878639221191, "learning_rate": 7.619e-05, "loss": 0.5091, "step": 15246 }, { "epoch": 0.8537910180311346, "grad_norm": 1.383988857269287, "learning_rate": 7.619500000000001e-05, "loss": 0.4522, "step": 15247 }, { "epoch": 0.8538470153432636, "grad_norm": 1.5268155336380005, "learning_rate": 7.620000000000001e-05, "loss": 0.4806, "step": 15248 }, { "epoch": 0.8539030126553926, "grad_norm": 1.1285520792007446, "learning_rate": 7.6205e-05, "loss": 0.4177, "step": 15249 }, { "epoch": 0.8539590099675216, "grad_norm": 1.116123914718628, "learning_rate": 7.621e-05, "loss": 0.3905, "step": 15250 }, { "epoch": 0.8540150072796506, "grad_norm": 1.492278814315796, "learning_rate": 7.6215e-05, "loss": 0.544, "step": 15251 }, { "epoch": 0.8540710045917796, "grad_norm": 1.5980716943740845, "learning_rate": 7.622e-05, "loss": 0.4494, "step": 15252 }, { "epoch": 0.8541270019039087, "grad_norm": 1.2674134969711304, "learning_rate": 7.6225e-05, "loss": 0.4129, "step": 15253 }, { "epoch": 0.8541829992160376, "grad_norm": 1.328264832496643, "learning_rate": 7.623000000000001e-05, "loss": 0.3812, "step": 15254 }, { "epoch": 0.8542389965281666, "grad_norm": 1.4931414127349854, "learning_rate": 7.6235e-05, "loss": 0.5149, "step": 15255 }, { "epoch": 0.8542949938402956, "grad_norm": 1.322326898574829, "learning_rate": 7.624e-05, "loss": 0.4716, "step": 15256 }, { "epoch": 0.8543509911524246, "grad_norm": 1.2993319034576416, "learning_rate": 7.6245e-05, "loss": 0.3725, "step": 15257 }, { "epoch": 0.8544069884645537, "grad_norm": 1.541629433631897, "learning_rate": 7.625e-05, "loss": 0.5011, "step": 15258 }, { "epoch": 0.8544629857766827, "grad_norm": 1.1747040748596191, "learning_rate": 7.6255e-05, "loss": 0.489, "step": 15259 }, { "epoch": 0.8545189830888117, "grad_norm": 1.365580677986145, "learning_rate": 7.625999999999999e-05, "loss": 0.4291, "step": 15260 }, { "epoch": 0.8545749804009407, "grad_norm": 1.0650475025177002, "learning_rate": 7.6265e-05, "loss": 0.3304, "step": 15261 }, { "epoch": 0.8546309777130697, "grad_norm": 1.2407748699188232, "learning_rate": 7.627000000000001e-05, "loss": 0.394, "step": 15262 }, { "epoch": 0.8546869750251987, "grad_norm": 1.5234675407409668, "learning_rate": 7.627500000000001e-05, "loss": 0.5103, "step": 15263 }, { "epoch": 0.8547429723373278, "grad_norm": 1.4129886627197266, "learning_rate": 7.628000000000001e-05, "loss": 0.5637, "step": 15264 }, { "epoch": 0.8547989696494568, "grad_norm": 1.5237212181091309, "learning_rate": 7.6285e-05, "loss": 0.4244, "step": 15265 }, { "epoch": 0.8548549669615858, "grad_norm": 1.416741132736206, "learning_rate": 7.629e-05, "loss": 0.5953, "step": 15266 }, { "epoch": 0.8549109642737148, "grad_norm": 1.2391144037246704, "learning_rate": 7.6295e-05, "loss": 0.4641, "step": 15267 }, { "epoch": 0.8549669615858438, "grad_norm": 1.3451381921768188, "learning_rate": 7.630000000000001e-05, "loss": 0.4012, "step": 15268 }, { "epoch": 0.8550229588979729, "grad_norm": 1.365315318107605, "learning_rate": 7.630500000000001e-05, "loss": 0.5176, "step": 15269 }, { "epoch": 0.8550789562101019, "grad_norm": 1.6232542991638184, "learning_rate": 7.631e-05, "loss": 0.4692, "step": 15270 }, { "epoch": 0.8551349535222309, "grad_norm": 1.7078979015350342, "learning_rate": 7.6315e-05, "loss": 0.5395, "step": 15271 }, { "epoch": 0.8551909508343599, "grad_norm": 1.5448113679885864, "learning_rate": 7.632e-05, "loss": 0.5784, "step": 15272 }, { "epoch": 0.8552469481464889, "grad_norm": 1.3233789205551147, "learning_rate": 7.6325e-05, "loss": 0.4248, "step": 15273 }, { "epoch": 0.855302945458618, "grad_norm": 1.5929086208343506, "learning_rate": 7.633e-05, "loss": 0.4113, "step": 15274 }, { "epoch": 0.855358942770747, "grad_norm": 1.4089661836624146, "learning_rate": 7.633500000000001e-05, "loss": 0.4067, "step": 15275 }, { "epoch": 0.855414940082876, "grad_norm": 1.22316575050354, "learning_rate": 7.634e-05, "loss": 0.4686, "step": 15276 }, { "epoch": 0.855470937395005, "grad_norm": 1.2601726055145264, "learning_rate": 7.6345e-05, "loss": 0.3796, "step": 15277 }, { "epoch": 0.855526934707134, "grad_norm": 1.2563194036483765, "learning_rate": 7.635e-05, "loss": 0.5263, "step": 15278 }, { "epoch": 0.855582932019263, "grad_norm": 1.494057059288025, "learning_rate": 7.6355e-05, "loss": 0.4191, "step": 15279 }, { "epoch": 0.8556389293313921, "grad_norm": 1.328596830368042, "learning_rate": 7.636e-05, "loss": 0.4457, "step": 15280 }, { "epoch": 0.8556949266435211, "grad_norm": 1.3479852676391602, "learning_rate": 7.6365e-05, "loss": 0.4119, "step": 15281 }, { "epoch": 0.8557509239556501, "grad_norm": 1.1821396350860596, "learning_rate": 7.637e-05, "loss": 0.3704, "step": 15282 }, { "epoch": 0.8558069212677791, "grad_norm": 1.317995309829712, "learning_rate": 7.637500000000001e-05, "loss": 0.6254, "step": 15283 }, { "epoch": 0.8558629185799081, "grad_norm": 1.3521196842193604, "learning_rate": 7.638000000000001e-05, "loss": 0.4053, "step": 15284 }, { "epoch": 0.8559189158920372, "grad_norm": 1.6777294874191284, "learning_rate": 7.638500000000001e-05, "loss": 0.6206, "step": 15285 }, { "epoch": 0.8559749132041662, "grad_norm": 1.53383469581604, "learning_rate": 7.639e-05, "loss": 0.5084, "step": 15286 }, { "epoch": 0.8560309105162952, "grad_norm": 1.3479366302490234, "learning_rate": 7.6395e-05, "loss": 0.5463, "step": 15287 }, { "epoch": 0.8560869078284242, "grad_norm": 1.2493224143981934, "learning_rate": 7.64e-05, "loss": 0.3651, "step": 15288 }, { "epoch": 0.8561429051405532, "grad_norm": 1.287906527519226, "learning_rate": 7.6405e-05, "loss": 0.4258, "step": 15289 }, { "epoch": 0.8561989024526823, "grad_norm": 1.672400712966919, "learning_rate": 7.641000000000001e-05, "loss": 0.5002, "step": 15290 }, { "epoch": 0.8562548997648113, "grad_norm": 1.20218026638031, "learning_rate": 7.6415e-05, "loss": 0.5053, "step": 15291 }, { "epoch": 0.8563108970769403, "grad_norm": 1.4538565874099731, "learning_rate": 7.642e-05, "loss": 0.4315, "step": 15292 }, { "epoch": 0.8563668943890693, "grad_norm": 1.3052338361740112, "learning_rate": 7.6425e-05, "loss": 0.4419, "step": 15293 }, { "epoch": 0.8564228917011983, "grad_norm": 1.3309745788574219, "learning_rate": 7.643e-05, "loss": 0.5276, "step": 15294 }, { "epoch": 0.8564788890133274, "grad_norm": 1.4033682346343994, "learning_rate": 7.6435e-05, "loss": 0.4153, "step": 15295 }, { "epoch": 0.8565348863254564, "grad_norm": 1.365225911140442, "learning_rate": 7.644e-05, "loss": 0.5216, "step": 15296 }, { "epoch": 0.8565908836375854, "grad_norm": 1.053643822669983, "learning_rate": 7.6445e-05, "loss": 0.3019, "step": 15297 }, { "epoch": 0.8566468809497144, "grad_norm": 1.4441500902175903, "learning_rate": 7.645e-05, "loss": 0.5822, "step": 15298 }, { "epoch": 0.8567028782618434, "grad_norm": 1.664686679840088, "learning_rate": 7.6455e-05, "loss": 0.5443, "step": 15299 }, { "epoch": 0.8567588755739725, "grad_norm": 1.169580101966858, "learning_rate": 7.646e-05, "loss": 0.5139, "step": 15300 }, { "epoch": 0.8568148728861015, "grad_norm": 1.4870423078536987, "learning_rate": 7.646500000000001e-05, "loss": 0.4219, "step": 15301 }, { "epoch": 0.8568708701982305, "grad_norm": 1.3848826885223389, "learning_rate": 7.647e-05, "loss": 0.5761, "step": 15302 }, { "epoch": 0.8569268675103595, "grad_norm": 1.1729873418807983, "learning_rate": 7.6475e-05, "loss": 0.3544, "step": 15303 }, { "epoch": 0.8569828648224885, "grad_norm": 3.545872926712036, "learning_rate": 7.648000000000001e-05, "loss": 0.5091, "step": 15304 }, { "epoch": 0.8570388621346176, "grad_norm": 1.288378119468689, "learning_rate": 7.648500000000001e-05, "loss": 0.4387, "step": 15305 }, { "epoch": 0.8570948594467466, "grad_norm": 1.4308961629867554, "learning_rate": 7.649000000000001e-05, "loss": 0.4135, "step": 15306 }, { "epoch": 0.8571508567588756, "grad_norm": 1.287602424621582, "learning_rate": 7.6495e-05, "loss": 0.4679, "step": 15307 }, { "epoch": 0.8572068540710046, "grad_norm": 1.4903717041015625, "learning_rate": 7.65e-05, "loss": 0.5455, "step": 15308 }, { "epoch": 0.8572628513831336, "grad_norm": 1.3370749950408936, "learning_rate": 7.6505e-05, "loss": 0.5237, "step": 15309 }, { "epoch": 0.8573188486952626, "grad_norm": 1.2127388715744019, "learning_rate": 7.651e-05, "loss": 0.4822, "step": 15310 }, { "epoch": 0.8573748460073917, "grad_norm": 1.4833921194076538, "learning_rate": 7.651500000000001e-05, "loss": 0.4984, "step": 15311 }, { "epoch": 0.8574308433195207, "grad_norm": 2.3310141563415527, "learning_rate": 7.652e-05, "loss": 0.7687, "step": 15312 }, { "epoch": 0.8574868406316497, "grad_norm": 1.4999405145645142, "learning_rate": 7.6525e-05, "loss": 0.5486, "step": 15313 }, { "epoch": 0.8575428379437787, "grad_norm": 1.5353020429611206, "learning_rate": 7.653e-05, "loss": 0.515, "step": 15314 }, { "epoch": 0.8575988352559077, "grad_norm": 1.1975631713867188, "learning_rate": 7.6535e-05, "loss": 0.3858, "step": 15315 }, { "epoch": 0.8576548325680368, "grad_norm": 1.2844549417495728, "learning_rate": 7.654e-05, "loss": 0.5304, "step": 15316 }, { "epoch": 0.8577108298801658, "grad_norm": 1.4918015003204346, "learning_rate": 7.6545e-05, "loss": 0.4314, "step": 15317 }, { "epoch": 0.8577668271922948, "grad_norm": 1.3247824907302856, "learning_rate": 7.655e-05, "loss": 0.4832, "step": 15318 }, { "epoch": 0.8578228245044238, "grad_norm": 1.0839030742645264, "learning_rate": 7.6555e-05, "loss": 0.3746, "step": 15319 }, { "epoch": 0.8578788218165528, "grad_norm": 1.2216871976852417, "learning_rate": 7.656e-05, "loss": 0.378, "step": 15320 }, { "epoch": 0.8579348191286819, "grad_norm": 1.0671459436416626, "learning_rate": 7.656500000000001e-05, "loss": 0.3347, "step": 15321 }, { "epoch": 0.8579908164408109, "grad_norm": 1.2441599369049072, "learning_rate": 7.657000000000001e-05, "loss": 0.3581, "step": 15322 }, { "epoch": 0.8580468137529399, "grad_norm": 1.1995137929916382, "learning_rate": 7.6575e-05, "loss": 0.3563, "step": 15323 }, { "epoch": 0.8581028110650689, "grad_norm": 1.481691837310791, "learning_rate": 7.658e-05, "loss": 0.4686, "step": 15324 }, { "epoch": 0.8581588083771979, "grad_norm": 1.2402544021606445, "learning_rate": 7.658500000000001e-05, "loss": 0.4165, "step": 15325 }, { "epoch": 0.858214805689327, "grad_norm": 1.3916577100753784, "learning_rate": 7.659000000000001e-05, "loss": 0.3222, "step": 15326 }, { "epoch": 0.858270803001456, "grad_norm": 1.6940088272094727, "learning_rate": 7.659500000000001e-05, "loss": 0.468, "step": 15327 }, { "epoch": 0.858326800313585, "grad_norm": 1.287348747253418, "learning_rate": 7.66e-05, "loss": 0.3956, "step": 15328 }, { "epoch": 0.858382797625714, "grad_norm": 3.0163638591766357, "learning_rate": 7.6605e-05, "loss": 0.7145, "step": 15329 }, { "epoch": 0.858438794937843, "grad_norm": 1.291735053062439, "learning_rate": 7.661e-05, "loss": 0.4072, "step": 15330 }, { "epoch": 0.858494792249972, "grad_norm": 1.3850016593933105, "learning_rate": 7.6615e-05, "loss": 0.4995, "step": 15331 }, { "epoch": 0.8585507895621011, "grad_norm": 1.3612180948257446, "learning_rate": 7.662000000000001e-05, "loss": 0.3674, "step": 15332 }, { "epoch": 0.8586067868742301, "grad_norm": 1.2428797483444214, "learning_rate": 7.6625e-05, "loss": 0.4511, "step": 15333 }, { "epoch": 0.8586627841863591, "grad_norm": 1.3600984811782837, "learning_rate": 7.663e-05, "loss": 0.4131, "step": 15334 }, { "epoch": 0.8587187814984881, "grad_norm": 1.4094133377075195, "learning_rate": 7.6635e-05, "loss": 0.4748, "step": 15335 }, { "epoch": 0.8587747788106171, "grad_norm": 2.0889029502868652, "learning_rate": 7.664e-05, "loss": 0.3707, "step": 15336 }, { "epoch": 0.858830776122746, "grad_norm": 1.264032006263733, "learning_rate": 7.6645e-05, "loss": 0.3294, "step": 15337 }, { "epoch": 0.8588867734348751, "grad_norm": 1.217186689376831, "learning_rate": 7.664999999999999e-05, "loss": 0.3299, "step": 15338 }, { "epoch": 0.8589427707470041, "grad_norm": 2.11641001701355, "learning_rate": 7.6655e-05, "loss": 0.4554, "step": 15339 }, { "epoch": 0.8589987680591331, "grad_norm": 1.3534648418426514, "learning_rate": 7.666e-05, "loss": 0.4348, "step": 15340 }, { "epoch": 0.8590547653712621, "grad_norm": 0.9543183445930481, "learning_rate": 7.666500000000001e-05, "loss": 0.333, "step": 15341 }, { "epoch": 0.8591107626833911, "grad_norm": 1.2656171321868896, "learning_rate": 7.667000000000001e-05, "loss": 0.4672, "step": 15342 }, { "epoch": 0.8591667599955202, "grad_norm": 2.572000741958618, "learning_rate": 7.667500000000001e-05, "loss": 0.5044, "step": 15343 }, { "epoch": 0.8592227573076492, "grad_norm": 1.4788674116134644, "learning_rate": 7.668e-05, "loss": 0.3862, "step": 15344 }, { "epoch": 0.8592787546197782, "grad_norm": 1.557162880897522, "learning_rate": 7.6685e-05, "loss": 0.3956, "step": 15345 }, { "epoch": 0.8593347519319072, "grad_norm": 1.3017799854278564, "learning_rate": 7.669000000000001e-05, "loss": 0.3904, "step": 15346 }, { "epoch": 0.8593907492440362, "grad_norm": 2.0657570362091064, "learning_rate": 7.669500000000001e-05, "loss": 0.46, "step": 15347 }, { "epoch": 0.8594467465561653, "grad_norm": 1.1990329027175903, "learning_rate": 7.670000000000001e-05, "loss": 0.3548, "step": 15348 }, { "epoch": 0.8595027438682943, "grad_norm": 2.280388116836548, "learning_rate": 7.6705e-05, "loss": 0.4669, "step": 15349 }, { "epoch": 0.8595587411804233, "grad_norm": 1.4168832302093506, "learning_rate": 7.671e-05, "loss": 0.4556, "step": 15350 }, { "epoch": 0.8596147384925523, "grad_norm": 1.4790887832641602, "learning_rate": 7.6715e-05, "loss": 0.3881, "step": 15351 }, { "epoch": 0.8596707358046813, "grad_norm": 1.7048856019973755, "learning_rate": 7.672e-05, "loss": 0.4468, "step": 15352 }, { "epoch": 0.8597267331168104, "grad_norm": 1.3507384061813354, "learning_rate": 7.672500000000001e-05, "loss": 0.4268, "step": 15353 }, { "epoch": 0.8597827304289394, "grad_norm": 1.2550270557403564, "learning_rate": 7.673e-05, "loss": 0.4574, "step": 15354 }, { "epoch": 0.8598387277410684, "grad_norm": 1.3710527420043945, "learning_rate": 7.6735e-05, "loss": 0.422, "step": 15355 }, { "epoch": 0.8598947250531974, "grad_norm": 1.1961610317230225, "learning_rate": 7.674e-05, "loss": 0.3967, "step": 15356 }, { "epoch": 0.8599507223653264, "grad_norm": 1.5828375816345215, "learning_rate": 7.6745e-05, "loss": 0.4511, "step": 15357 }, { "epoch": 0.8600067196774555, "grad_norm": 1.3332316875457764, "learning_rate": 7.675e-05, "loss": 0.4498, "step": 15358 }, { "epoch": 0.8600627169895845, "grad_norm": 1.31644868850708, "learning_rate": 7.675499999999999e-05, "loss": 0.4281, "step": 15359 }, { "epoch": 0.8601187143017135, "grad_norm": 1.2825725078582764, "learning_rate": 7.676e-05, "loss": 0.4345, "step": 15360 }, { "epoch": 0.8601747116138425, "grad_norm": 1.291054368019104, "learning_rate": 7.676500000000001e-05, "loss": 0.3941, "step": 15361 }, { "epoch": 0.8602307089259715, "grad_norm": 1.2731778621673584, "learning_rate": 7.677000000000001e-05, "loss": 0.438, "step": 15362 }, { "epoch": 0.8602867062381006, "grad_norm": 1.3587656021118164, "learning_rate": 7.677500000000001e-05, "loss": 0.414, "step": 15363 }, { "epoch": 0.8603427035502296, "grad_norm": 1.2961088418960571, "learning_rate": 7.678000000000001e-05, "loss": 0.4495, "step": 15364 }, { "epoch": 0.8603987008623586, "grad_norm": 1.5349632501602173, "learning_rate": 7.6785e-05, "loss": 0.6048, "step": 15365 }, { "epoch": 0.8604546981744876, "grad_norm": 1.4466733932495117, "learning_rate": 7.679e-05, "loss": 0.392, "step": 15366 }, { "epoch": 0.8605106954866166, "grad_norm": 1.298704981803894, "learning_rate": 7.6795e-05, "loss": 0.4187, "step": 15367 }, { "epoch": 0.8605666927987456, "grad_norm": 1.2818284034729004, "learning_rate": 7.680000000000001e-05, "loss": 0.4395, "step": 15368 }, { "epoch": 0.8606226901108747, "grad_norm": 1.5905777215957642, "learning_rate": 7.680500000000001e-05, "loss": 0.4923, "step": 15369 }, { "epoch": 0.8606786874230037, "grad_norm": 1.232959508895874, "learning_rate": 7.681e-05, "loss": 0.3201, "step": 15370 }, { "epoch": 0.8607346847351327, "grad_norm": 1.4469730854034424, "learning_rate": 7.6815e-05, "loss": 0.4906, "step": 15371 }, { "epoch": 0.8607906820472617, "grad_norm": 1.1165945529937744, "learning_rate": 7.682e-05, "loss": 0.4475, "step": 15372 }, { "epoch": 0.8608466793593907, "grad_norm": 1.3485347032546997, "learning_rate": 7.6825e-05, "loss": 0.4444, "step": 15373 }, { "epoch": 0.8609026766715198, "grad_norm": 1.6563959121704102, "learning_rate": 7.683000000000001e-05, "loss": 0.4601, "step": 15374 }, { "epoch": 0.8609586739836488, "grad_norm": 1.7581827640533447, "learning_rate": 7.6835e-05, "loss": 0.4572, "step": 15375 }, { "epoch": 0.8610146712957778, "grad_norm": 1.1497478485107422, "learning_rate": 7.684e-05, "loss": 0.3935, "step": 15376 }, { "epoch": 0.8610706686079068, "grad_norm": 1.2260955572128296, "learning_rate": 7.6845e-05, "loss": 0.3778, "step": 15377 }, { "epoch": 0.8611266659200358, "grad_norm": 1.366969108581543, "learning_rate": 7.685e-05, "loss": 0.4371, "step": 15378 }, { "epoch": 0.8611826632321649, "grad_norm": 1.4714306592941284, "learning_rate": 7.6855e-05, "loss": 0.4312, "step": 15379 }, { "epoch": 0.8612386605442939, "grad_norm": 1.4076884984970093, "learning_rate": 7.685999999999999e-05, "loss": 0.466, "step": 15380 }, { "epoch": 0.8612946578564229, "grad_norm": 1.2253717184066772, "learning_rate": 7.6865e-05, "loss": 0.4248, "step": 15381 }, { "epoch": 0.8613506551685519, "grad_norm": 1.476537823677063, "learning_rate": 7.687000000000001e-05, "loss": 0.3281, "step": 15382 }, { "epoch": 0.8614066524806809, "grad_norm": 1.4724180698394775, "learning_rate": 7.687500000000001e-05, "loss": 0.4495, "step": 15383 }, { "epoch": 0.86146264979281, "grad_norm": 1.313353180885315, "learning_rate": 7.688000000000001e-05, "loss": 0.4246, "step": 15384 }, { "epoch": 0.861518647104939, "grad_norm": 1.5207279920578003, "learning_rate": 7.6885e-05, "loss": 0.5077, "step": 15385 }, { "epoch": 0.861574644417068, "grad_norm": 1.2960842847824097, "learning_rate": 7.689e-05, "loss": 0.4312, "step": 15386 }, { "epoch": 0.861630641729197, "grad_norm": 1.3373053073883057, "learning_rate": 7.6895e-05, "loss": 0.5328, "step": 15387 }, { "epoch": 0.861686639041326, "grad_norm": 1.7143656015396118, "learning_rate": 7.69e-05, "loss": 0.5065, "step": 15388 }, { "epoch": 0.861742636353455, "grad_norm": 1.6221736669540405, "learning_rate": 7.690500000000001e-05, "loss": 0.4077, "step": 15389 }, { "epoch": 0.8617986336655841, "grad_norm": 1.740646243095398, "learning_rate": 7.691000000000001e-05, "loss": 0.4296, "step": 15390 }, { "epoch": 0.8618546309777131, "grad_norm": 1.6884620189666748, "learning_rate": 7.6915e-05, "loss": 0.6193, "step": 15391 }, { "epoch": 0.8619106282898421, "grad_norm": 1.6343050003051758, "learning_rate": 7.692e-05, "loss": 0.6356, "step": 15392 }, { "epoch": 0.8619666256019711, "grad_norm": 1.3310794830322266, "learning_rate": 7.6925e-05, "loss": 0.4613, "step": 15393 }, { "epoch": 0.8620226229141001, "grad_norm": 1.2125768661499023, "learning_rate": 7.693e-05, "loss": 0.3971, "step": 15394 }, { "epoch": 0.8620786202262292, "grad_norm": 1.165642499923706, "learning_rate": 7.693500000000001e-05, "loss": 0.4871, "step": 15395 }, { "epoch": 0.8621346175383582, "grad_norm": 1.8752145767211914, "learning_rate": 7.694e-05, "loss": 0.4689, "step": 15396 }, { "epoch": 0.8621906148504872, "grad_norm": 1.4456344842910767, "learning_rate": 7.6945e-05, "loss": 0.4598, "step": 15397 }, { "epoch": 0.8622466121626162, "grad_norm": 1.1199686527252197, "learning_rate": 7.695e-05, "loss": 0.371, "step": 15398 }, { "epoch": 0.8623026094747452, "grad_norm": 1.0419329404830933, "learning_rate": 7.6955e-05, "loss": 0.3077, "step": 15399 }, { "epoch": 0.8623586067868743, "grad_norm": 1.233064889907837, "learning_rate": 7.696e-05, "loss": 0.3843, "step": 15400 }, { "epoch": 0.8624146040990033, "grad_norm": 1.3966174125671387, "learning_rate": 7.696499999999999e-05, "loss": 0.5682, "step": 15401 }, { "epoch": 0.8624706014111323, "grad_norm": 2.0899782180786133, "learning_rate": 7.697e-05, "loss": 0.5866, "step": 15402 }, { "epoch": 0.8625265987232613, "grad_norm": 1.5108164548873901, "learning_rate": 7.697500000000001e-05, "loss": 0.4806, "step": 15403 }, { "epoch": 0.8625825960353903, "grad_norm": 1.3947193622589111, "learning_rate": 7.698000000000001e-05, "loss": 0.4549, "step": 15404 }, { "epoch": 0.8626385933475194, "grad_norm": 1.3674523830413818, "learning_rate": 7.698500000000001e-05, "loss": 0.5378, "step": 15405 }, { "epoch": 0.8626945906596484, "grad_norm": 1.3286527395248413, "learning_rate": 7.699e-05, "loss": 0.604, "step": 15406 }, { "epoch": 0.8627505879717774, "grad_norm": 1.4222450256347656, "learning_rate": 7.6995e-05, "loss": 0.4108, "step": 15407 }, { "epoch": 0.8628065852839064, "grad_norm": 1.323704481124878, "learning_rate": 7.7e-05, "loss": 0.5305, "step": 15408 }, { "epoch": 0.8628625825960354, "grad_norm": 1.5488990545272827, "learning_rate": 7.7005e-05, "loss": 0.6045, "step": 15409 }, { "epoch": 0.8629185799081645, "grad_norm": 1.2930275201797485, "learning_rate": 7.701000000000001e-05, "loss": 0.5057, "step": 15410 }, { "epoch": 0.8629745772202935, "grad_norm": 1.2379000186920166, "learning_rate": 7.701500000000001e-05, "loss": 0.4044, "step": 15411 }, { "epoch": 0.8630305745324225, "grad_norm": 1.2456347942352295, "learning_rate": 7.702e-05, "loss": 0.4925, "step": 15412 }, { "epoch": 0.8630865718445515, "grad_norm": 1.2400336265563965, "learning_rate": 7.7025e-05, "loss": 0.3953, "step": 15413 }, { "epoch": 0.8631425691566805, "grad_norm": 1.3767483234405518, "learning_rate": 7.703e-05, "loss": 0.3056, "step": 15414 }, { "epoch": 0.8631985664688095, "grad_norm": 1.6071982383728027, "learning_rate": 7.7035e-05, "loss": 0.7268, "step": 15415 }, { "epoch": 0.8632545637809386, "grad_norm": 1.1186975240707397, "learning_rate": 7.704000000000001e-05, "loss": 0.3486, "step": 15416 }, { "epoch": 0.8633105610930676, "grad_norm": 1.5963722467422485, "learning_rate": 7.7045e-05, "loss": 0.4169, "step": 15417 }, { "epoch": 0.8633665584051966, "grad_norm": 1.4779984951019287, "learning_rate": 7.705e-05, "loss": 0.5113, "step": 15418 }, { "epoch": 0.8634225557173256, "grad_norm": 1.7189549207687378, "learning_rate": 7.7055e-05, "loss": 0.6054, "step": 15419 }, { "epoch": 0.8634785530294545, "grad_norm": 1.1392791271209717, "learning_rate": 7.706e-05, "loss": 0.3658, "step": 15420 }, { "epoch": 0.8635345503415836, "grad_norm": 1.5345208644866943, "learning_rate": 7.7065e-05, "loss": 0.5942, "step": 15421 }, { "epoch": 0.8635905476537126, "grad_norm": 1.671932339668274, "learning_rate": 7.707e-05, "loss": 0.4105, "step": 15422 }, { "epoch": 0.8636465449658416, "grad_norm": 1.54603111743927, "learning_rate": 7.7075e-05, "loss": 0.4105, "step": 15423 }, { "epoch": 0.8637025422779706, "grad_norm": 1.509049892425537, "learning_rate": 7.708000000000001e-05, "loss": 0.4501, "step": 15424 }, { "epoch": 0.8637585395900996, "grad_norm": 1.3530577421188354, "learning_rate": 7.708500000000001e-05, "loss": 0.5009, "step": 15425 }, { "epoch": 0.8638145369022286, "grad_norm": 1.230531930923462, "learning_rate": 7.709000000000001e-05, "loss": 0.3958, "step": 15426 }, { "epoch": 0.8638705342143577, "grad_norm": 1.2434871196746826, "learning_rate": 7.7095e-05, "loss": 0.5073, "step": 15427 }, { "epoch": 0.8639265315264867, "grad_norm": 1.2971305847167969, "learning_rate": 7.71e-05, "loss": 0.4372, "step": 15428 }, { "epoch": 0.8639825288386157, "grad_norm": 1.4570695161819458, "learning_rate": 7.7105e-05, "loss": 0.4053, "step": 15429 }, { "epoch": 0.8640385261507447, "grad_norm": 1.3198705911636353, "learning_rate": 7.711e-05, "loss": 0.381, "step": 15430 }, { "epoch": 0.8640945234628737, "grad_norm": 1.161210536956787, "learning_rate": 7.711500000000001e-05, "loss": 0.4301, "step": 15431 }, { "epoch": 0.8641505207750028, "grad_norm": 1.0020915269851685, "learning_rate": 7.712000000000001e-05, "loss": 0.3555, "step": 15432 }, { "epoch": 0.8642065180871318, "grad_norm": 1.200257420539856, "learning_rate": 7.7125e-05, "loss": 0.4197, "step": 15433 }, { "epoch": 0.8642625153992608, "grad_norm": 1.7010304927825928, "learning_rate": 7.713e-05, "loss": 0.5349, "step": 15434 }, { "epoch": 0.8643185127113898, "grad_norm": 1.8268228769302368, "learning_rate": 7.7135e-05, "loss": 0.5794, "step": 15435 }, { "epoch": 0.8643745100235188, "grad_norm": 1.5086874961853027, "learning_rate": 7.714e-05, "loss": 0.4878, "step": 15436 }, { "epoch": 0.8644305073356479, "grad_norm": 1.0217642784118652, "learning_rate": 7.7145e-05, "loss": 0.3234, "step": 15437 }, { "epoch": 0.8644865046477769, "grad_norm": 5.203007698059082, "learning_rate": 7.715e-05, "loss": 0.511, "step": 15438 }, { "epoch": 0.8645425019599059, "grad_norm": 1.233720302581787, "learning_rate": 7.7155e-05, "loss": 0.4566, "step": 15439 }, { "epoch": 0.8645984992720349, "grad_norm": 1.3341400623321533, "learning_rate": 7.716e-05, "loss": 0.404, "step": 15440 }, { "epoch": 0.8646544965841639, "grad_norm": 1.4108461141586304, "learning_rate": 7.7165e-05, "loss": 0.4925, "step": 15441 }, { "epoch": 0.864710493896293, "grad_norm": 1.3659836053848267, "learning_rate": 7.717000000000001e-05, "loss": 0.4156, "step": 15442 }, { "epoch": 0.864766491208422, "grad_norm": 1.1967355012893677, "learning_rate": 7.7175e-05, "loss": 0.386, "step": 15443 }, { "epoch": 0.864822488520551, "grad_norm": 1.2768745422363281, "learning_rate": 7.718e-05, "loss": 0.4394, "step": 15444 }, { "epoch": 0.86487848583268, "grad_norm": 1.3104383945465088, "learning_rate": 7.718500000000001e-05, "loss": 0.3974, "step": 15445 }, { "epoch": 0.864934483144809, "grad_norm": 1.3468825817108154, "learning_rate": 7.719000000000001e-05, "loss": 0.4178, "step": 15446 }, { "epoch": 0.864990480456938, "grad_norm": 1.2500470876693726, "learning_rate": 7.719500000000001e-05, "loss": 0.4704, "step": 15447 }, { "epoch": 0.8650464777690671, "grad_norm": 1.2548214197158813, "learning_rate": 7.72e-05, "loss": 0.3746, "step": 15448 }, { "epoch": 0.8651024750811961, "grad_norm": 1.4752436876296997, "learning_rate": 7.7205e-05, "loss": 0.5735, "step": 15449 }, { "epoch": 0.8651584723933251, "grad_norm": 1.3975766897201538, "learning_rate": 7.721e-05, "loss": 0.4493, "step": 15450 }, { "epoch": 0.8652144697054541, "grad_norm": 6.8397417068481445, "learning_rate": 7.7215e-05, "loss": 0.4978, "step": 15451 }, { "epoch": 0.8652704670175831, "grad_norm": 1.2908713817596436, "learning_rate": 7.722000000000001e-05, "loss": 0.4509, "step": 15452 }, { "epoch": 0.8653264643297122, "grad_norm": 1.1639848947525024, "learning_rate": 7.722500000000001e-05, "loss": 0.5475, "step": 15453 }, { "epoch": 0.8653824616418412, "grad_norm": 1.2309406995773315, "learning_rate": 7.723e-05, "loss": 0.4391, "step": 15454 }, { "epoch": 0.8654384589539702, "grad_norm": 1.3136157989501953, "learning_rate": 7.7235e-05, "loss": 0.4677, "step": 15455 }, { "epoch": 0.8654944562660992, "grad_norm": 1.334875226020813, "learning_rate": 7.724e-05, "loss": 0.3689, "step": 15456 }, { "epoch": 0.8655504535782282, "grad_norm": 1.2034419775009155, "learning_rate": 7.7245e-05, "loss": 0.4591, "step": 15457 }, { "epoch": 0.8656064508903573, "grad_norm": 1.3061434030532837, "learning_rate": 7.725e-05, "loss": 0.4413, "step": 15458 }, { "epoch": 0.8656624482024863, "grad_norm": 1.155679702758789, "learning_rate": 7.7255e-05, "loss": 0.386, "step": 15459 }, { "epoch": 0.8657184455146153, "grad_norm": 1.4922846555709839, "learning_rate": 7.726e-05, "loss": 0.5156, "step": 15460 }, { "epoch": 0.8657744428267443, "grad_norm": 1.457108497619629, "learning_rate": 7.7265e-05, "loss": 0.5872, "step": 15461 }, { "epoch": 0.8658304401388733, "grad_norm": 1.2564916610717773, "learning_rate": 7.727000000000001e-05, "loss": 0.4891, "step": 15462 }, { "epoch": 0.8658864374510024, "grad_norm": 1.2155927419662476, "learning_rate": 7.727500000000001e-05, "loss": 0.4745, "step": 15463 }, { "epoch": 0.8659424347631314, "grad_norm": 1.302109956741333, "learning_rate": 7.728e-05, "loss": 0.4322, "step": 15464 }, { "epoch": 0.8659984320752604, "grad_norm": 1.4536240100860596, "learning_rate": 7.7285e-05, "loss": 0.4242, "step": 15465 }, { "epoch": 0.8660544293873894, "grad_norm": 1.3679760694503784, "learning_rate": 7.729e-05, "loss": 0.4916, "step": 15466 }, { "epoch": 0.8661104266995184, "grad_norm": 1.3587265014648438, "learning_rate": 7.729500000000001e-05, "loss": 0.4235, "step": 15467 }, { "epoch": 0.8661664240116475, "grad_norm": 1.4559144973754883, "learning_rate": 7.730000000000001e-05, "loss": 0.4911, "step": 15468 }, { "epoch": 0.8662224213237765, "grad_norm": 1.8773778676986694, "learning_rate": 7.7305e-05, "loss": 0.6553, "step": 15469 }, { "epoch": 0.8662784186359055, "grad_norm": 1.2014228105545044, "learning_rate": 7.731e-05, "loss": 0.3914, "step": 15470 }, { "epoch": 0.8663344159480345, "grad_norm": 1.4217487573623657, "learning_rate": 7.7315e-05, "loss": 0.3885, "step": 15471 }, { "epoch": 0.8663904132601635, "grad_norm": 1.4292007684707642, "learning_rate": 7.732e-05, "loss": 0.4123, "step": 15472 }, { "epoch": 0.8664464105722925, "grad_norm": 1.520644187927246, "learning_rate": 7.732500000000001e-05, "loss": 0.6345, "step": 15473 }, { "epoch": 0.8665024078844216, "grad_norm": 1.1639766693115234, "learning_rate": 7.733e-05, "loss": 0.4798, "step": 15474 }, { "epoch": 0.8665584051965506, "grad_norm": 1.337589144706726, "learning_rate": 7.7335e-05, "loss": 0.3851, "step": 15475 }, { "epoch": 0.8666144025086796, "grad_norm": 1.5339282751083374, "learning_rate": 7.734e-05, "loss": 0.4823, "step": 15476 }, { "epoch": 0.8666703998208086, "grad_norm": 1.498666524887085, "learning_rate": 7.7345e-05, "loss": 0.5346, "step": 15477 }, { "epoch": 0.8667263971329376, "grad_norm": 1.5173438787460327, "learning_rate": 7.735e-05, "loss": 0.4974, "step": 15478 }, { "epoch": 0.8667823944450667, "grad_norm": 1.3957185745239258, "learning_rate": 7.7355e-05, "loss": 0.5111, "step": 15479 }, { "epoch": 0.8668383917571957, "grad_norm": 1.2964214086532593, "learning_rate": 7.736e-05, "loss": 0.4033, "step": 15480 }, { "epoch": 0.8668943890693247, "grad_norm": 1.4217289686203003, "learning_rate": 7.7365e-05, "loss": 0.5553, "step": 15481 }, { "epoch": 0.8669503863814537, "grad_norm": 1.6912500858306885, "learning_rate": 7.737000000000001e-05, "loss": 0.4515, "step": 15482 }, { "epoch": 0.8670063836935827, "grad_norm": 1.4011566638946533, "learning_rate": 7.737500000000001e-05, "loss": 0.467, "step": 15483 }, { "epoch": 0.8670623810057118, "grad_norm": 1.3058698177337646, "learning_rate": 7.738000000000001e-05, "loss": 0.4093, "step": 15484 }, { "epoch": 0.8671183783178408, "grad_norm": 1.1876654624938965, "learning_rate": 7.7385e-05, "loss": 0.5413, "step": 15485 }, { "epoch": 0.8671743756299698, "grad_norm": 1.3856619596481323, "learning_rate": 7.739e-05, "loss": 0.5422, "step": 15486 }, { "epoch": 0.8672303729420988, "grad_norm": 1.1427805423736572, "learning_rate": 7.7395e-05, "loss": 0.3884, "step": 15487 }, { "epoch": 0.8672863702542278, "grad_norm": 1.272996425628662, "learning_rate": 7.740000000000001e-05, "loss": 0.6489, "step": 15488 }, { "epoch": 0.8673423675663569, "grad_norm": 1.6561285257339478, "learning_rate": 7.740500000000001e-05, "loss": 0.5978, "step": 15489 }, { "epoch": 0.8673983648784859, "grad_norm": 1.341091513633728, "learning_rate": 7.741e-05, "loss": 0.4276, "step": 15490 }, { "epoch": 0.8674543621906149, "grad_norm": 1.3549751043319702, "learning_rate": 7.7415e-05, "loss": 0.4541, "step": 15491 }, { "epoch": 0.8675103595027439, "grad_norm": 1.370042085647583, "learning_rate": 7.742e-05, "loss": 0.379, "step": 15492 }, { "epoch": 0.8675663568148729, "grad_norm": 1.3413833379745483, "learning_rate": 7.7425e-05, "loss": 0.4339, "step": 15493 }, { "epoch": 0.867622354127002, "grad_norm": 1.3769487142562866, "learning_rate": 7.743000000000001e-05, "loss": 0.4813, "step": 15494 }, { "epoch": 0.867678351439131, "grad_norm": 1.2802037000656128, "learning_rate": 7.7435e-05, "loss": 0.492, "step": 15495 }, { "epoch": 0.86773434875126, "grad_norm": 1.201843500137329, "learning_rate": 7.744e-05, "loss": 0.3885, "step": 15496 }, { "epoch": 0.867790346063389, "grad_norm": 1.21647310256958, "learning_rate": 7.7445e-05, "loss": 0.3994, "step": 15497 }, { "epoch": 0.867846343375518, "grad_norm": 1.3415783643722534, "learning_rate": 7.745e-05, "loss": 0.3511, "step": 15498 }, { "epoch": 0.867902340687647, "grad_norm": 1.1152540445327759, "learning_rate": 7.7455e-05, "loss": 0.406, "step": 15499 }, { "epoch": 0.8679583379997761, "grad_norm": 1.1693726778030396, "learning_rate": 7.746e-05, "loss": 0.4369, "step": 15500 }, { "epoch": 0.8680143353119051, "grad_norm": 1.5938900709152222, "learning_rate": 7.7465e-05, "loss": 0.5564, "step": 15501 }, { "epoch": 0.868070332624034, "grad_norm": 1.3347742557525635, "learning_rate": 7.747000000000002e-05, "loss": 0.481, "step": 15502 }, { "epoch": 0.868126329936163, "grad_norm": 1.2332980632781982, "learning_rate": 7.747500000000001e-05, "loss": 0.4362, "step": 15503 }, { "epoch": 0.868182327248292, "grad_norm": 1.2018414735794067, "learning_rate": 7.748000000000001e-05, "loss": 0.4191, "step": 15504 }, { "epoch": 0.868238324560421, "grad_norm": 1.2105592489242554, "learning_rate": 7.748500000000001e-05, "loss": 0.517, "step": 15505 }, { "epoch": 0.8682943218725501, "grad_norm": 1.053380012512207, "learning_rate": 7.749e-05, "loss": 0.4104, "step": 15506 }, { "epoch": 0.8683503191846791, "grad_norm": 1.3285924196243286, "learning_rate": 7.7495e-05, "loss": 0.4991, "step": 15507 }, { "epoch": 0.8684063164968081, "grad_norm": 1.1642602682113647, "learning_rate": 7.75e-05, "loss": 0.3911, "step": 15508 }, { "epoch": 0.8684623138089371, "grad_norm": 1.2761082649230957, "learning_rate": 7.750500000000001e-05, "loss": 0.4166, "step": 15509 }, { "epoch": 0.8685183111210661, "grad_norm": 1.2622443437576294, "learning_rate": 7.751000000000001e-05, "loss": 0.4354, "step": 15510 }, { "epoch": 0.8685743084331952, "grad_norm": 1.3382610082626343, "learning_rate": 7.7515e-05, "loss": 0.3583, "step": 15511 }, { "epoch": 0.8686303057453242, "grad_norm": 1.1973257064819336, "learning_rate": 7.752e-05, "loss": 0.3613, "step": 15512 }, { "epoch": 0.8686863030574532, "grad_norm": 1.2233986854553223, "learning_rate": 7.7525e-05, "loss": 0.4168, "step": 15513 }, { "epoch": 0.8687423003695822, "grad_norm": 1.197396159172058, "learning_rate": 7.753e-05, "loss": 0.344, "step": 15514 }, { "epoch": 0.8687982976817112, "grad_norm": 1.5905447006225586, "learning_rate": 7.7535e-05, "loss": 0.6329, "step": 15515 }, { "epoch": 0.8688542949938403, "grad_norm": 1.442287802696228, "learning_rate": 7.754e-05, "loss": 0.3922, "step": 15516 }, { "epoch": 0.8689102923059693, "grad_norm": 1.1113076210021973, "learning_rate": 7.7545e-05, "loss": 0.4077, "step": 15517 }, { "epoch": 0.8689662896180983, "grad_norm": 1.2457160949707031, "learning_rate": 7.755e-05, "loss": 0.471, "step": 15518 }, { "epoch": 0.8690222869302273, "grad_norm": 1.5347809791564941, "learning_rate": 7.7555e-05, "loss": 0.3644, "step": 15519 }, { "epoch": 0.8690782842423563, "grad_norm": 1.2472450733184814, "learning_rate": 7.756e-05, "loss": 0.4156, "step": 15520 }, { "epoch": 0.8691342815544854, "grad_norm": 1.208138108253479, "learning_rate": 7.7565e-05, "loss": 0.468, "step": 15521 }, { "epoch": 0.8691902788666144, "grad_norm": 1.2988399267196655, "learning_rate": 7.757e-05, "loss": 0.4566, "step": 15522 }, { "epoch": 0.8692462761787434, "grad_norm": 1.5431101322174072, "learning_rate": 7.757500000000002e-05, "loss": 0.4289, "step": 15523 }, { "epoch": 0.8693022734908724, "grad_norm": 1.3862583637237549, "learning_rate": 7.758000000000001e-05, "loss": 0.5599, "step": 15524 }, { "epoch": 0.8693582708030014, "grad_norm": 1.4205782413482666, "learning_rate": 7.758500000000001e-05, "loss": 0.4798, "step": 15525 }, { "epoch": 0.8694142681151305, "grad_norm": 1.4005579948425293, "learning_rate": 7.759000000000001e-05, "loss": 0.3991, "step": 15526 }, { "epoch": 0.8694702654272595, "grad_norm": 1.517439603805542, "learning_rate": 7.7595e-05, "loss": 0.4242, "step": 15527 }, { "epoch": 0.8695262627393885, "grad_norm": 1.5895737409591675, "learning_rate": 7.76e-05, "loss": 0.5689, "step": 15528 }, { "epoch": 0.8695822600515175, "grad_norm": 1.289066195487976, "learning_rate": 7.7605e-05, "loss": 0.4296, "step": 15529 }, { "epoch": 0.8696382573636465, "grad_norm": 1.2449251413345337, "learning_rate": 7.761000000000001e-05, "loss": 0.4266, "step": 15530 }, { "epoch": 0.8696942546757755, "grad_norm": 1.6113264560699463, "learning_rate": 7.761500000000001e-05, "loss": 0.5132, "step": 15531 }, { "epoch": 0.8697502519879046, "grad_norm": 1.1368285417556763, "learning_rate": 7.762e-05, "loss": 0.439, "step": 15532 }, { "epoch": 0.8698062493000336, "grad_norm": 1.2893623113632202, "learning_rate": 7.7625e-05, "loss": 0.4703, "step": 15533 }, { "epoch": 0.8698622466121626, "grad_norm": 1.5299174785614014, "learning_rate": 7.763e-05, "loss": 0.4481, "step": 15534 }, { "epoch": 0.8699182439242916, "grad_norm": 1.1706514358520508, "learning_rate": 7.7635e-05, "loss": 0.3218, "step": 15535 }, { "epoch": 0.8699742412364206, "grad_norm": 1.2985281944274902, "learning_rate": 7.764e-05, "loss": 0.3237, "step": 15536 }, { "epoch": 0.8700302385485497, "grad_norm": 1.3335989713668823, "learning_rate": 7.7645e-05, "loss": 0.4881, "step": 15537 }, { "epoch": 0.8700862358606787, "grad_norm": 1.3737127780914307, "learning_rate": 7.765e-05, "loss": 0.3775, "step": 15538 }, { "epoch": 0.8701422331728077, "grad_norm": 1.4452928304672241, "learning_rate": 7.7655e-05, "loss": 0.3665, "step": 15539 }, { "epoch": 0.8701982304849367, "grad_norm": 1.1835672855377197, "learning_rate": 7.766e-05, "loss": 0.3957, "step": 15540 }, { "epoch": 0.8702542277970657, "grad_norm": 1.1987941265106201, "learning_rate": 7.7665e-05, "loss": 0.4849, "step": 15541 }, { "epoch": 0.8703102251091948, "grad_norm": 1.0974875688552856, "learning_rate": 7.767e-05, "loss": 0.4018, "step": 15542 }, { "epoch": 0.8703662224213238, "grad_norm": 1.0797059535980225, "learning_rate": 7.7675e-05, "loss": 0.3083, "step": 15543 }, { "epoch": 0.8704222197334528, "grad_norm": 1.196189284324646, "learning_rate": 7.768e-05, "loss": 0.3826, "step": 15544 }, { "epoch": 0.8704782170455818, "grad_norm": 1.3408902883529663, "learning_rate": 7.768500000000001e-05, "loss": 0.4565, "step": 15545 }, { "epoch": 0.8705342143577108, "grad_norm": 1.1600315570831299, "learning_rate": 7.769000000000001e-05, "loss": 0.3222, "step": 15546 }, { "epoch": 0.8705902116698399, "grad_norm": 1.8778573274612427, "learning_rate": 7.769500000000001e-05, "loss": 0.5961, "step": 15547 }, { "epoch": 0.8706462089819689, "grad_norm": 1.2116731405258179, "learning_rate": 7.77e-05, "loss": 0.4234, "step": 15548 }, { "epoch": 0.8707022062940979, "grad_norm": 1.3063853979110718, "learning_rate": 7.7705e-05, "loss": 0.38, "step": 15549 }, { "epoch": 0.8707582036062269, "grad_norm": 1.646710753440857, "learning_rate": 7.771e-05, "loss": 0.433, "step": 15550 }, { "epoch": 0.8708142009183559, "grad_norm": 1.3051859140396118, "learning_rate": 7.771500000000001e-05, "loss": 0.4738, "step": 15551 }, { "epoch": 0.870870198230485, "grad_norm": 1.4985663890838623, "learning_rate": 7.772000000000001e-05, "loss": 0.4587, "step": 15552 }, { "epoch": 0.870926195542614, "grad_norm": 1.1227905750274658, "learning_rate": 7.7725e-05, "loss": 0.3363, "step": 15553 }, { "epoch": 0.870982192854743, "grad_norm": 1.4132035970687866, "learning_rate": 7.773e-05, "loss": 0.5337, "step": 15554 }, { "epoch": 0.871038190166872, "grad_norm": 1.3326305150985718, "learning_rate": 7.7735e-05, "loss": 0.4576, "step": 15555 }, { "epoch": 0.871094187479001, "grad_norm": 1.2112631797790527, "learning_rate": 7.774e-05, "loss": 0.405, "step": 15556 }, { "epoch": 0.87115018479113, "grad_norm": 1.383243203163147, "learning_rate": 7.7745e-05, "loss": 0.6684, "step": 15557 }, { "epoch": 0.8712061821032591, "grad_norm": 1.0421710014343262, "learning_rate": 7.775e-05, "loss": 0.3893, "step": 15558 }, { "epoch": 0.8712621794153881, "grad_norm": 1.2611445188522339, "learning_rate": 7.7755e-05, "loss": 0.5054, "step": 15559 }, { "epoch": 0.8713181767275171, "grad_norm": 1.14493727684021, "learning_rate": 7.776e-05, "loss": 0.352, "step": 15560 }, { "epoch": 0.8713741740396461, "grad_norm": 1.6791402101516724, "learning_rate": 7.7765e-05, "loss": 0.5136, "step": 15561 }, { "epoch": 0.8714301713517751, "grad_norm": 1.4859578609466553, "learning_rate": 7.777e-05, "loss": 0.3403, "step": 15562 }, { "epoch": 0.8714861686639042, "grad_norm": 1.1438791751861572, "learning_rate": 7.7775e-05, "loss": 0.3434, "step": 15563 }, { "epoch": 0.8715421659760332, "grad_norm": 1.1297763586044312, "learning_rate": 7.778e-05, "loss": 0.4069, "step": 15564 }, { "epoch": 0.8715981632881622, "grad_norm": 1.4422338008880615, "learning_rate": 7.7785e-05, "loss": 0.4356, "step": 15565 }, { "epoch": 0.8716541606002912, "grad_norm": 1.3220010995864868, "learning_rate": 7.779000000000001e-05, "loss": 0.4501, "step": 15566 }, { "epoch": 0.8717101579124202, "grad_norm": 1.1621001958847046, "learning_rate": 7.779500000000001e-05, "loss": 0.4435, "step": 15567 }, { "epoch": 0.8717661552245493, "grad_norm": 1.2144877910614014, "learning_rate": 7.780000000000001e-05, "loss": 0.4044, "step": 15568 }, { "epoch": 0.8718221525366783, "grad_norm": 1.7398021221160889, "learning_rate": 7.7805e-05, "loss": 0.7119, "step": 15569 }, { "epoch": 0.8718781498488073, "grad_norm": 1.3021275997161865, "learning_rate": 7.781e-05, "loss": 0.5303, "step": 15570 }, { "epoch": 0.8719341471609363, "grad_norm": 1.234926700592041, "learning_rate": 7.7815e-05, "loss": 0.4424, "step": 15571 }, { "epoch": 0.8719901444730653, "grad_norm": 1.5147863626480103, "learning_rate": 7.782000000000001e-05, "loss": 0.4176, "step": 15572 }, { "epoch": 0.8720461417851944, "grad_norm": 1.1744723320007324, "learning_rate": 7.782500000000001e-05, "loss": 0.3972, "step": 15573 }, { "epoch": 0.8721021390973234, "grad_norm": 1.2651808261871338, "learning_rate": 7.783e-05, "loss": 0.3941, "step": 15574 }, { "epoch": 0.8721581364094524, "grad_norm": 1.311531901359558, "learning_rate": 7.7835e-05, "loss": 0.4906, "step": 15575 }, { "epoch": 0.8722141337215814, "grad_norm": 1.4220561981201172, "learning_rate": 7.784e-05, "loss": 0.4883, "step": 15576 }, { "epoch": 0.8722701310337104, "grad_norm": 1.3571114540100098, "learning_rate": 7.7845e-05, "loss": 0.6236, "step": 15577 }, { "epoch": 0.8723261283458394, "grad_norm": 1.26026451587677, "learning_rate": 7.785e-05, "loss": 0.5265, "step": 15578 }, { "epoch": 0.8723821256579685, "grad_norm": 1.316271185874939, "learning_rate": 7.7855e-05, "loss": 0.4858, "step": 15579 }, { "epoch": 0.8724381229700975, "grad_norm": 1.3338090181350708, "learning_rate": 7.786e-05, "loss": 0.4271, "step": 15580 }, { "epoch": 0.8724941202822265, "grad_norm": 1.221513271331787, "learning_rate": 7.7865e-05, "loss": 0.3846, "step": 15581 }, { "epoch": 0.8725501175943555, "grad_norm": 1.2444828748703003, "learning_rate": 7.787e-05, "loss": 0.4285, "step": 15582 }, { "epoch": 0.8726061149064845, "grad_norm": 1.3883877992630005, "learning_rate": 7.787500000000001e-05, "loss": 0.3942, "step": 15583 }, { "epoch": 0.8726621122186136, "grad_norm": 1.7320107221603394, "learning_rate": 7.788e-05, "loss": 0.4515, "step": 15584 }, { "epoch": 0.8727181095307425, "grad_norm": 1.1985026597976685, "learning_rate": 7.7885e-05, "loss": 0.4076, "step": 15585 }, { "epoch": 0.8727741068428715, "grad_norm": 1.548317551612854, "learning_rate": 7.789e-05, "loss": 0.3293, "step": 15586 }, { "epoch": 0.8728301041550005, "grad_norm": 1.435633897781372, "learning_rate": 7.789500000000001e-05, "loss": 0.4572, "step": 15587 }, { "epoch": 0.8728861014671295, "grad_norm": 1.9437674283981323, "learning_rate": 7.790000000000001e-05, "loss": 0.508, "step": 15588 }, { "epoch": 0.8729420987792585, "grad_norm": 5.298740863800049, "learning_rate": 7.790500000000001e-05, "loss": 0.5213, "step": 15589 }, { "epoch": 0.8729980960913876, "grad_norm": 1.305383324623108, "learning_rate": 7.791e-05, "loss": 0.4151, "step": 15590 }, { "epoch": 0.8730540934035166, "grad_norm": 1.3252203464508057, "learning_rate": 7.7915e-05, "loss": 0.4882, "step": 15591 }, { "epoch": 0.8731100907156456, "grad_norm": 1.3112610578536987, "learning_rate": 7.792e-05, "loss": 0.3805, "step": 15592 }, { "epoch": 0.8731660880277746, "grad_norm": 1.2379817962646484, "learning_rate": 7.792500000000001e-05, "loss": 0.4596, "step": 15593 }, { "epoch": 0.8732220853399036, "grad_norm": 1.6146538257598877, "learning_rate": 7.793000000000001e-05, "loss": 0.4251, "step": 15594 }, { "epoch": 0.8732780826520327, "grad_norm": 1.3385467529296875, "learning_rate": 7.7935e-05, "loss": 0.4956, "step": 15595 }, { "epoch": 0.8733340799641617, "grad_norm": 1.4091204404830933, "learning_rate": 7.794e-05, "loss": 0.4343, "step": 15596 }, { "epoch": 0.8733900772762907, "grad_norm": 1.2925491333007812, "learning_rate": 7.7945e-05, "loss": 0.4075, "step": 15597 }, { "epoch": 0.8734460745884197, "grad_norm": 1.2249665260314941, "learning_rate": 7.795e-05, "loss": 0.4046, "step": 15598 }, { "epoch": 0.8735020719005487, "grad_norm": 1.1581363677978516, "learning_rate": 7.7955e-05, "loss": 0.3682, "step": 15599 }, { "epoch": 0.8735580692126778, "grad_norm": 1.7862924337387085, "learning_rate": 7.796e-05, "loss": 0.526, "step": 15600 }, { "epoch": 0.8736140665248068, "grad_norm": 1.3446723222732544, "learning_rate": 7.7965e-05, "loss": 0.4079, "step": 15601 }, { "epoch": 0.8736700638369358, "grad_norm": 1.334972620010376, "learning_rate": 7.797e-05, "loss": 0.4373, "step": 15602 }, { "epoch": 0.8737260611490648, "grad_norm": 1.2533605098724365, "learning_rate": 7.797500000000001e-05, "loss": 0.3595, "step": 15603 }, { "epoch": 0.8737820584611938, "grad_norm": 1.8424146175384521, "learning_rate": 7.798000000000001e-05, "loss": 0.566, "step": 15604 }, { "epoch": 0.8738380557733229, "grad_norm": 1.4950603246688843, "learning_rate": 7.7985e-05, "loss": 0.378, "step": 15605 }, { "epoch": 0.8738940530854519, "grad_norm": 1.2845555543899536, "learning_rate": 7.799e-05, "loss": 0.42, "step": 15606 }, { "epoch": 0.8739500503975809, "grad_norm": 1.5531342029571533, "learning_rate": 7.7995e-05, "loss": 0.515, "step": 15607 }, { "epoch": 0.8740060477097099, "grad_norm": 1.1322746276855469, "learning_rate": 7.800000000000001e-05, "loss": 0.354, "step": 15608 }, { "epoch": 0.8740620450218389, "grad_norm": 1.6688734292984009, "learning_rate": 7.800500000000001e-05, "loss": 0.4178, "step": 15609 }, { "epoch": 0.874118042333968, "grad_norm": 1.2702504396438599, "learning_rate": 7.801000000000001e-05, "loss": 0.5135, "step": 15610 }, { "epoch": 0.874174039646097, "grad_norm": 1.1688611507415771, "learning_rate": 7.8015e-05, "loss": 0.3942, "step": 15611 }, { "epoch": 0.874230036958226, "grad_norm": 1.407023549079895, "learning_rate": 7.802e-05, "loss": 0.4159, "step": 15612 }, { "epoch": 0.874286034270355, "grad_norm": 1.5352704524993896, "learning_rate": 7.8025e-05, "loss": 0.4463, "step": 15613 }, { "epoch": 0.874342031582484, "grad_norm": 1.5974230766296387, "learning_rate": 7.803e-05, "loss": 0.4942, "step": 15614 }, { "epoch": 0.874398028894613, "grad_norm": 1.779258131980896, "learning_rate": 7.803500000000001e-05, "loss": 0.515, "step": 15615 }, { "epoch": 0.8744540262067421, "grad_norm": 1.6793160438537598, "learning_rate": 7.804e-05, "loss": 0.4746, "step": 15616 }, { "epoch": 0.8745100235188711, "grad_norm": 1.193279504776001, "learning_rate": 7.8045e-05, "loss": 0.4896, "step": 15617 }, { "epoch": 0.8745660208310001, "grad_norm": 1.3777776956558228, "learning_rate": 7.805e-05, "loss": 0.5491, "step": 15618 }, { "epoch": 0.8746220181431291, "grad_norm": 1.351875901222229, "learning_rate": 7.8055e-05, "loss": 0.4955, "step": 15619 }, { "epoch": 0.8746780154552581, "grad_norm": 1.1543796062469482, "learning_rate": 7.806e-05, "loss": 0.3552, "step": 15620 }, { "epoch": 0.8747340127673872, "grad_norm": 1.2773261070251465, "learning_rate": 7.8065e-05, "loss": 0.4395, "step": 15621 }, { "epoch": 0.8747900100795162, "grad_norm": 1.5070065259933472, "learning_rate": 7.807e-05, "loss": 0.6016, "step": 15622 }, { "epoch": 0.8748460073916452, "grad_norm": 1.2362004518508911, "learning_rate": 7.807500000000001e-05, "loss": 0.4658, "step": 15623 }, { "epoch": 0.8749020047037742, "grad_norm": 1.290040135383606, "learning_rate": 7.808000000000001e-05, "loss": 0.5284, "step": 15624 }, { "epoch": 0.8749580020159032, "grad_norm": 5.069690704345703, "learning_rate": 7.808500000000001e-05, "loss": 0.57, "step": 15625 }, { "epoch": 0.8750139993280323, "grad_norm": 1.8353949785232544, "learning_rate": 7.809e-05, "loss": 0.6024, "step": 15626 }, { "epoch": 0.8750699966401613, "grad_norm": 1.3642324209213257, "learning_rate": 7.8095e-05, "loss": 0.3545, "step": 15627 }, { "epoch": 0.8751259939522903, "grad_norm": 1.3173775672912598, "learning_rate": 7.81e-05, "loss": 0.4785, "step": 15628 }, { "epoch": 0.8751819912644193, "grad_norm": 1.51857590675354, "learning_rate": 7.810500000000001e-05, "loss": 0.39, "step": 15629 }, { "epoch": 0.8752379885765483, "grad_norm": 1.6354382038116455, "learning_rate": 7.811000000000001e-05, "loss": 0.4176, "step": 15630 }, { "epoch": 0.8752939858886774, "grad_norm": 1.3690122365951538, "learning_rate": 7.811500000000001e-05, "loss": 0.4327, "step": 15631 }, { "epoch": 0.8753499832008064, "grad_norm": 1.4135335683822632, "learning_rate": 7.812e-05, "loss": 0.5097, "step": 15632 }, { "epoch": 0.8754059805129354, "grad_norm": 1.1350250244140625, "learning_rate": 7.8125e-05, "loss": 0.4193, "step": 15633 }, { "epoch": 0.8754619778250644, "grad_norm": 1.281648874282837, "learning_rate": 7.813e-05, "loss": 0.37, "step": 15634 }, { "epoch": 0.8755179751371934, "grad_norm": 1.2919368743896484, "learning_rate": 7.8135e-05, "loss": 0.4193, "step": 15635 }, { "epoch": 0.8755739724493224, "grad_norm": 1.283706784248352, "learning_rate": 7.814000000000001e-05, "loss": 0.4654, "step": 15636 }, { "epoch": 0.8756299697614515, "grad_norm": 1.662026047706604, "learning_rate": 7.8145e-05, "loss": 0.3832, "step": 15637 }, { "epoch": 0.8756859670735805, "grad_norm": 1.2785576581954956, "learning_rate": 7.815e-05, "loss": 0.3598, "step": 15638 }, { "epoch": 0.8757419643857095, "grad_norm": 1.264336109161377, "learning_rate": 7.8155e-05, "loss": 0.4208, "step": 15639 }, { "epoch": 0.8757979616978385, "grad_norm": 1.2264667749404907, "learning_rate": 7.816e-05, "loss": 0.4188, "step": 15640 }, { "epoch": 0.8758539590099675, "grad_norm": 1.6351864337921143, "learning_rate": 7.8165e-05, "loss": 0.5137, "step": 15641 }, { "epoch": 0.8759099563220966, "grad_norm": 1.2195281982421875, "learning_rate": 7.817e-05, "loss": 0.4903, "step": 15642 }, { "epoch": 0.8759659536342256, "grad_norm": 1.5257734060287476, "learning_rate": 7.8175e-05, "loss": 0.4517, "step": 15643 }, { "epoch": 0.8760219509463546, "grad_norm": 1.3661330938339233, "learning_rate": 7.818000000000001e-05, "loss": 0.4797, "step": 15644 }, { "epoch": 0.8760779482584836, "grad_norm": 1.2897977828979492, "learning_rate": 7.818500000000001e-05, "loss": 0.5837, "step": 15645 }, { "epoch": 0.8761339455706126, "grad_norm": 1.2548011541366577, "learning_rate": 7.819000000000001e-05, "loss": 0.5132, "step": 15646 }, { "epoch": 0.8761899428827417, "grad_norm": 1.4747647047042847, "learning_rate": 7.8195e-05, "loss": 0.5048, "step": 15647 }, { "epoch": 0.8762459401948707, "grad_norm": 1.3684711456298828, "learning_rate": 7.82e-05, "loss": 0.4655, "step": 15648 }, { "epoch": 0.8763019375069997, "grad_norm": 1.2558674812316895, "learning_rate": 7.8205e-05, "loss": 0.3309, "step": 15649 }, { "epoch": 0.8763579348191287, "grad_norm": 1.528770089149475, "learning_rate": 7.821000000000001e-05, "loss": 0.502, "step": 15650 }, { "epoch": 0.8764139321312577, "grad_norm": 1.3130013942718506, "learning_rate": 7.821500000000001e-05, "loss": 0.4539, "step": 15651 }, { "epoch": 0.8764699294433868, "grad_norm": 3.9340603351593018, "learning_rate": 7.822e-05, "loss": 0.4584, "step": 15652 }, { "epoch": 0.8765259267555158, "grad_norm": 1.3083142042160034, "learning_rate": 7.8225e-05, "loss": 0.3608, "step": 15653 }, { "epoch": 0.8765819240676448, "grad_norm": 1.1950018405914307, "learning_rate": 7.823e-05, "loss": 0.4288, "step": 15654 }, { "epoch": 0.8766379213797738, "grad_norm": 1.6428717374801636, "learning_rate": 7.8235e-05, "loss": 0.6502, "step": 15655 }, { "epoch": 0.8766939186919028, "grad_norm": 1.3999062776565552, "learning_rate": 7.824e-05, "loss": 0.5594, "step": 15656 }, { "epoch": 0.8767499160040318, "grad_norm": 1.3282313346862793, "learning_rate": 7.824500000000001e-05, "loss": 0.4249, "step": 15657 }, { "epoch": 0.8768059133161609, "grad_norm": 1.4244219064712524, "learning_rate": 7.825e-05, "loss": 0.3722, "step": 15658 }, { "epoch": 0.8768619106282899, "grad_norm": 1.458259105682373, "learning_rate": 7.8255e-05, "loss": 0.3885, "step": 15659 }, { "epoch": 0.8769179079404189, "grad_norm": 1.1726912260055542, "learning_rate": 7.826e-05, "loss": 0.4037, "step": 15660 }, { "epoch": 0.8769739052525479, "grad_norm": 1.3612245321273804, "learning_rate": 7.8265e-05, "loss": 0.4036, "step": 15661 }, { "epoch": 0.8770299025646769, "grad_norm": 1.4680825471878052, "learning_rate": 7.827e-05, "loss": 0.3367, "step": 15662 }, { "epoch": 0.877085899876806, "grad_norm": 2.020721435546875, "learning_rate": 7.827499999999999e-05, "loss": 0.5772, "step": 15663 }, { "epoch": 0.877141897188935, "grad_norm": 1.4212771654129028, "learning_rate": 7.828e-05, "loss": 0.6183, "step": 15664 }, { "epoch": 0.877197894501064, "grad_norm": 1.1750584840774536, "learning_rate": 7.828500000000001e-05, "loss": 0.3967, "step": 15665 }, { "epoch": 0.877253891813193, "grad_norm": 1.2902772426605225, "learning_rate": 7.829000000000001e-05, "loss": 0.4072, "step": 15666 }, { "epoch": 0.877309889125322, "grad_norm": 1.3469256162643433, "learning_rate": 7.829500000000001e-05, "loss": 0.3589, "step": 15667 }, { "epoch": 0.877365886437451, "grad_norm": 1.5020853281021118, "learning_rate": 7.83e-05, "loss": 0.6214, "step": 15668 }, { "epoch": 0.87742188374958, "grad_norm": 1.1948949098587036, "learning_rate": 7.8305e-05, "loss": 0.4081, "step": 15669 }, { "epoch": 0.877477881061709, "grad_norm": 1.28453528881073, "learning_rate": 7.831e-05, "loss": 0.3609, "step": 15670 }, { "epoch": 0.877533878373838, "grad_norm": 1.2847203016281128, "learning_rate": 7.831500000000001e-05, "loss": 0.4036, "step": 15671 }, { "epoch": 0.877589875685967, "grad_norm": 1.308562994003296, "learning_rate": 7.832000000000001e-05, "loss": 0.3373, "step": 15672 }, { "epoch": 0.877645872998096, "grad_norm": 1.245770812034607, "learning_rate": 7.8325e-05, "loss": 0.4617, "step": 15673 }, { "epoch": 0.8777018703102251, "grad_norm": 1.3665430545806885, "learning_rate": 7.833e-05, "loss": 0.622, "step": 15674 }, { "epoch": 0.8777578676223541, "grad_norm": 1.1770883798599243, "learning_rate": 7.8335e-05, "loss": 0.3847, "step": 15675 }, { "epoch": 0.8778138649344831, "grad_norm": 1.5456660985946655, "learning_rate": 7.834e-05, "loss": 0.3619, "step": 15676 }, { "epoch": 0.8778698622466121, "grad_norm": 1.3694406747817993, "learning_rate": 7.8345e-05, "loss": 0.3838, "step": 15677 }, { "epoch": 0.8779258595587411, "grad_norm": 1.4143106937408447, "learning_rate": 7.835000000000001e-05, "loss": 0.5196, "step": 15678 }, { "epoch": 0.8779818568708702, "grad_norm": 1.2907812595367432, "learning_rate": 7.8355e-05, "loss": 0.3555, "step": 15679 }, { "epoch": 0.8780378541829992, "grad_norm": 1.3018662929534912, "learning_rate": 7.836e-05, "loss": 0.4758, "step": 15680 }, { "epoch": 0.8780938514951282, "grad_norm": 1.2399076223373413, "learning_rate": 7.8365e-05, "loss": 0.32, "step": 15681 }, { "epoch": 0.8781498488072572, "grad_norm": 1.7134053707122803, "learning_rate": 7.837e-05, "loss": 0.3929, "step": 15682 }, { "epoch": 0.8782058461193862, "grad_norm": 1.286105990409851, "learning_rate": 7.8375e-05, "loss": 0.4124, "step": 15683 }, { "epoch": 0.8782618434315153, "grad_norm": 1.7660576105117798, "learning_rate": 7.838e-05, "loss": 0.4636, "step": 15684 }, { "epoch": 0.8783178407436443, "grad_norm": 1.1781138181686401, "learning_rate": 7.8385e-05, "loss": 0.5726, "step": 15685 }, { "epoch": 0.8783738380557733, "grad_norm": 1.2534667253494263, "learning_rate": 7.839000000000001e-05, "loss": 0.3376, "step": 15686 }, { "epoch": 0.8784298353679023, "grad_norm": 1.2173240184783936, "learning_rate": 7.839500000000001e-05, "loss": 0.4651, "step": 15687 }, { "epoch": 0.8784858326800313, "grad_norm": 1.2226678133010864, "learning_rate": 7.840000000000001e-05, "loss": 0.5286, "step": 15688 }, { "epoch": 0.8785418299921604, "grad_norm": 1.1224699020385742, "learning_rate": 7.8405e-05, "loss": 0.5105, "step": 15689 }, { "epoch": 0.8785978273042894, "grad_norm": 1.8798848390579224, "learning_rate": 7.841e-05, "loss": 0.5881, "step": 15690 }, { "epoch": 0.8786538246164184, "grad_norm": 1.2383431196212769, "learning_rate": 7.8415e-05, "loss": 0.3522, "step": 15691 }, { "epoch": 0.8787098219285474, "grad_norm": 1.2047781944274902, "learning_rate": 7.842e-05, "loss": 0.3768, "step": 15692 }, { "epoch": 0.8787658192406764, "grad_norm": 1.237811803817749, "learning_rate": 7.842500000000001e-05, "loss": 0.3701, "step": 15693 }, { "epoch": 0.8788218165528054, "grad_norm": 1.475827693939209, "learning_rate": 7.843e-05, "loss": 0.4622, "step": 15694 }, { "epoch": 0.8788778138649345, "grad_norm": 1.2367972135543823, "learning_rate": 7.8435e-05, "loss": 0.4071, "step": 15695 }, { "epoch": 0.8789338111770635, "grad_norm": 1.360946774482727, "learning_rate": 7.844e-05, "loss": 0.5247, "step": 15696 }, { "epoch": 0.8789898084891925, "grad_norm": 1.1919349431991577, "learning_rate": 7.8445e-05, "loss": 0.3536, "step": 15697 }, { "epoch": 0.8790458058013215, "grad_norm": 1.7997097969055176, "learning_rate": 7.845e-05, "loss": 0.6277, "step": 15698 }, { "epoch": 0.8791018031134505, "grad_norm": 1.5822625160217285, "learning_rate": 7.845500000000001e-05, "loss": 0.6634, "step": 15699 }, { "epoch": 0.8791578004255796, "grad_norm": 1.187794804573059, "learning_rate": 7.846e-05, "loss": 0.3938, "step": 15700 }, { "epoch": 0.8792137977377086, "grad_norm": 1.319350242614746, "learning_rate": 7.8465e-05, "loss": 0.3277, "step": 15701 }, { "epoch": 0.8792697950498376, "grad_norm": 1.2361481189727783, "learning_rate": 7.847e-05, "loss": 0.4237, "step": 15702 }, { "epoch": 0.8793257923619666, "grad_norm": 1.2398425340652466, "learning_rate": 7.8475e-05, "loss": 0.4074, "step": 15703 }, { "epoch": 0.8793817896740956, "grad_norm": 1.4108473062515259, "learning_rate": 7.848000000000001e-05, "loss": 0.5114, "step": 15704 }, { "epoch": 0.8794377869862247, "grad_norm": 1.2769901752471924, "learning_rate": 7.8485e-05, "loss": 0.5534, "step": 15705 }, { "epoch": 0.8794937842983537, "grad_norm": 1.167389154434204, "learning_rate": 7.849e-05, "loss": 0.3794, "step": 15706 }, { "epoch": 0.8795497816104827, "grad_norm": 1.3811312913894653, "learning_rate": 7.849500000000001e-05, "loss": 0.4559, "step": 15707 }, { "epoch": 0.8796057789226117, "grad_norm": 1.0768953561782837, "learning_rate": 7.850000000000001e-05, "loss": 0.3338, "step": 15708 }, { "epoch": 0.8796617762347407, "grad_norm": 1.198590874671936, "learning_rate": 7.850500000000001e-05, "loss": 0.3763, "step": 15709 }, { "epoch": 0.8797177735468698, "grad_norm": 1.430693507194519, "learning_rate": 7.851e-05, "loss": 0.4116, "step": 15710 }, { "epoch": 0.8797737708589988, "grad_norm": 1.407012939453125, "learning_rate": 7.8515e-05, "loss": 0.4667, "step": 15711 }, { "epoch": 0.8798297681711278, "grad_norm": 1.4793479442596436, "learning_rate": 7.852e-05, "loss": 0.3616, "step": 15712 }, { "epoch": 0.8798857654832568, "grad_norm": 1.2668476104736328, "learning_rate": 7.8525e-05, "loss": 0.58, "step": 15713 }, { "epoch": 0.8799417627953858, "grad_norm": 1.2861369848251343, "learning_rate": 7.853000000000001e-05, "loss": 0.5646, "step": 15714 }, { "epoch": 0.8799977601075148, "grad_norm": 1.1182711124420166, "learning_rate": 7.8535e-05, "loss": 0.385, "step": 15715 }, { "epoch": 0.8800537574196439, "grad_norm": 1.5841948986053467, "learning_rate": 7.854e-05, "loss": 0.4527, "step": 15716 }, { "epoch": 0.8801097547317729, "grad_norm": 1.2600511312484741, "learning_rate": 7.8545e-05, "loss": 0.4581, "step": 15717 }, { "epoch": 0.8801657520439019, "grad_norm": 1.081499695777893, "learning_rate": 7.855e-05, "loss": 0.363, "step": 15718 }, { "epoch": 0.8802217493560309, "grad_norm": 1.4237945079803467, "learning_rate": 7.8555e-05, "loss": 0.5863, "step": 15719 }, { "epoch": 0.8802777466681599, "grad_norm": 1.3789386749267578, "learning_rate": 7.856000000000001e-05, "loss": 0.3903, "step": 15720 }, { "epoch": 0.880333743980289, "grad_norm": 1.7766565084457397, "learning_rate": 7.8565e-05, "loss": 0.5534, "step": 15721 }, { "epoch": 0.880389741292418, "grad_norm": 1.305375576019287, "learning_rate": 7.857e-05, "loss": 0.4963, "step": 15722 }, { "epoch": 0.880445738604547, "grad_norm": 1.1422768831253052, "learning_rate": 7.8575e-05, "loss": 0.3225, "step": 15723 }, { "epoch": 0.880501735916676, "grad_norm": 1.2995232343673706, "learning_rate": 7.858000000000001e-05, "loss": 0.3413, "step": 15724 }, { "epoch": 0.880557733228805, "grad_norm": 1.1238868236541748, "learning_rate": 7.858500000000001e-05, "loss": 0.3428, "step": 15725 }, { "epoch": 0.8806137305409341, "grad_norm": 2.277445077896118, "learning_rate": 7.859e-05, "loss": 0.4319, "step": 15726 }, { "epoch": 0.8806697278530631, "grad_norm": 1.423354983329773, "learning_rate": 7.8595e-05, "loss": 0.5173, "step": 15727 }, { "epoch": 0.8807257251651921, "grad_norm": 1.0879390239715576, "learning_rate": 7.860000000000001e-05, "loss": 0.4703, "step": 15728 }, { "epoch": 0.8807817224773211, "grad_norm": 1.377094030380249, "learning_rate": 7.860500000000001e-05, "loss": 0.459, "step": 15729 }, { "epoch": 0.8808377197894501, "grad_norm": 1.2914865016937256, "learning_rate": 7.861000000000001e-05, "loss": 0.4543, "step": 15730 }, { "epoch": 0.8808937171015792, "grad_norm": 1.1602896451950073, "learning_rate": 7.8615e-05, "loss": 0.3261, "step": 15731 }, { "epoch": 0.8809497144137082, "grad_norm": 1.5694911479949951, "learning_rate": 7.862e-05, "loss": 0.4, "step": 15732 }, { "epoch": 0.8810057117258372, "grad_norm": 1.3546191453933716, "learning_rate": 7.8625e-05, "loss": 0.3636, "step": 15733 }, { "epoch": 0.8810617090379662, "grad_norm": 1.2792226076126099, "learning_rate": 7.863e-05, "loss": 0.4535, "step": 15734 }, { "epoch": 0.8811177063500952, "grad_norm": 1.7178654670715332, "learning_rate": 7.863500000000001e-05, "loss": 0.3848, "step": 15735 }, { "epoch": 0.8811737036622243, "grad_norm": 1.3578240871429443, "learning_rate": 7.864e-05, "loss": 0.4088, "step": 15736 }, { "epoch": 0.8812297009743533, "grad_norm": 1.5478277206420898, "learning_rate": 7.8645e-05, "loss": 0.4679, "step": 15737 }, { "epoch": 0.8812856982864823, "grad_norm": 1.336410403251648, "learning_rate": 7.865e-05, "loss": 0.4307, "step": 15738 }, { "epoch": 0.8813416955986113, "grad_norm": 1.3684492111206055, "learning_rate": 7.8655e-05, "loss": 0.4762, "step": 15739 }, { "epoch": 0.8813976929107403, "grad_norm": 1.430402398109436, "learning_rate": 7.866e-05, "loss": 0.6, "step": 15740 }, { "epoch": 0.8814536902228693, "grad_norm": 1.1227734088897705, "learning_rate": 7.866499999999999e-05, "loss": 0.3133, "step": 15741 }, { "epoch": 0.8815096875349984, "grad_norm": 1.2948923110961914, "learning_rate": 7.867e-05, "loss": 0.4961, "step": 15742 }, { "epoch": 0.8815656848471274, "grad_norm": 1.7092138528823853, "learning_rate": 7.8675e-05, "loss": 0.5236, "step": 15743 }, { "epoch": 0.8816216821592564, "grad_norm": 1.4647992849349976, "learning_rate": 7.868000000000001e-05, "loss": 0.5177, "step": 15744 }, { "epoch": 0.8816776794713854, "grad_norm": 1.3417359590530396, "learning_rate": 7.868500000000001e-05, "loss": 0.4036, "step": 15745 }, { "epoch": 0.8817336767835144, "grad_norm": 1.4374005794525146, "learning_rate": 7.869000000000001e-05, "loss": 0.4864, "step": 15746 }, { "epoch": 0.8817896740956435, "grad_norm": 1.2635575532913208, "learning_rate": 7.8695e-05, "loss": 0.4443, "step": 15747 }, { "epoch": 0.8818456714077725, "grad_norm": 1.4097075462341309, "learning_rate": 7.87e-05, "loss": 0.4626, "step": 15748 }, { "epoch": 0.8819016687199015, "grad_norm": 1.5398057699203491, "learning_rate": 7.870500000000001e-05, "loss": 0.6255, "step": 15749 }, { "epoch": 0.8819576660320304, "grad_norm": 1.2846860885620117, "learning_rate": 7.871000000000001e-05, "loss": 0.422, "step": 15750 }, { "epoch": 0.8820136633441594, "grad_norm": 1.2987191677093506, "learning_rate": 7.871500000000001e-05, "loss": 0.4639, "step": 15751 }, { "epoch": 0.8820696606562884, "grad_norm": 1.2411824464797974, "learning_rate": 7.872e-05, "loss": 0.4454, "step": 15752 }, { "epoch": 0.8821256579684175, "grad_norm": 1.18653404712677, "learning_rate": 7.8725e-05, "loss": 0.4113, "step": 15753 }, { "epoch": 0.8821816552805465, "grad_norm": 1.386753797531128, "learning_rate": 7.873e-05, "loss": 0.5086, "step": 15754 }, { "epoch": 0.8822376525926755, "grad_norm": 1.205653429031372, "learning_rate": 7.8735e-05, "loss": 0.4265, "step": 15755 }, { "epoch": 0.8822936499048045, "grad_norm": 1.688432216644287, "learning_rate": 7.874000000000001e-05, "loss": 0.531, "step": 15756 }, { "epoch": 0.8823496472169335, "grad_norm": 1.3085002899169922, "learning_rate": 7.8745e-05, "loss": 0.4349, "step": 15757 }, { "epoch": 0.8824056445290626, "grad_norm": 1.0334367752075195, "learning_rate": 7.875e-05, "loss": 0.4277, "step": 15758 }, { "epoch": 0.8824616418411916, "grad_norm": 1.3466542959213257, "learning_rate": 7.8755e-05, "loss": 0.4581, "step": 15759 }, { "epoch": 0.8825176391533206, "grad_norm": 1.5849618911743164, "learning_rate": 7.876e-05, "loss": 0.5612, "step": 15760 }, { "epoch": 0.8825736364654496, "grad_norm": 1.3699510097503662, "learning_rate": 7.8765e-05, "loss": 0.4414, "step": 15761 }, { "epoch": 0.8826296337775786, "grad_norm": 1.2993191480636597, "learning_rate": 7.876999999999999e-05, "loss": 0.4806, "step": 15762 }, { "epoch": 0.8826856310897077, "grad_norm": 1.302834153175354, "learning_rate": 7.8775e-05, "loss": 0.3442, "step": 15763 }, { "epoch": 0.8827416284018367, "grad_norm": 1.1885366439819336, "learning_rate": 7.878e-05, "loss": 0.4186, "step": 15764 }, { "epoch": 0.8827976257139657, "grad_norm": 2.4305038452148438, "learning_rate": 7.878500000000001e-05, "loss": 0.786, "step": 15765 }, { "epoch": 0.8828536230260947, "grad_norm": 1.4897726774215698, "learning_rate": 7.879000000000001e-05, "loss": 0.5225, "step": 15766 }, { "epoch": 0.8829096203382237, "grad_norm": 1.3332767486572266, "learning_rate": 7.879500000000001e-05, "loss": 0.4555, "step": 15767 }, { "epoch": 0.8829656176503528, "grad_norm": 1.3837586641311646, "learning_rate": 7.88e-05, "loss": 0.5187, "step": 15768 }, { "epoch": 0.8830216149624818, "grad_norm": 1.3469719886779785, "learning_rate": 7.8805e-05, "loss": 0.3342, "step": 15769 }, { "epoch": 0.8830776122746108, "grad_norm": 1.5540140867233276, "learning_rate": 7.881e-05, "loss": 0.4922, "step": 15770 }, { "epoch": 0.8831336095867398, "grad_norm": 1.3087104558944702, "learning_rate": 7.881500000000001e-05, "loss": 0.4845, "step": 15771 }, { "epoch": 0.8831896068988688, "grad_norm": 1.4179688692092896, "learning_rate": 7.882000000000001e-05, "loss": 0.422, "step": 15772 }, { "epoch": 0.8832456042109978, "grad_norm": 1.3407858610153198, "learning_rate": 7.8825e-05, "loss": 0.5993, "step": 15773 }, { "epoch": 0.8833016015231269, "grad_norm": 1.298976182937622, "learning_rate": 7.883e-05, "loss": 0.6, "step": 15774 }, { "epoch": 0.8833575988352559, "grad_norm": 2.641345739364624, "learning_rate": 7.8835e-05, "loss": 0.4339, "step": 15775 }, { "epoch": 0.8834135961473849, "grad_norm": 1.4696484804153442, "learning_rate": 7.884e-05, "loss": 0.7233, "step": 15776 }, { "epoch": 0.8834695934595139, "grad_norm": 1.3720401525497437, "learning_rate": 7.884500000000001e-05, "loss": 0.4915, "step": 15777 }, { "epoch": 0.8835255907716429, "grad_norm": 1.4552339315414429, "learning_rate": 7.885e-05, "loss": 0.5732, "step": 15778 }, { "epoch": 0.883581588083772, "grad_norm": 1.0989445447921753, "learning_rate": 7.8855e-05, "loss": 0.4422, "step": 15779 }, { "epoch": 0.883637585395901, "grad_norm": 1.638524055480957, "learning_rate": 7.886e-05, "loss": 0.6128, "step": 15780 }, { "epoch": 0.88369358270803, "grad_norm": 1.354801893234253, "learning_rate": 7.8865e-05, "loss": 0.4369, "step": 15781 }, { "epoch": 0.883749580020159, "grad_norm": 1.3780988454818726, "learning_rate": 7.887e-05, "loss": 0.5277, "step": 15782 }, { "epoch": 0.883805577332288, "grad_norm": 2.5515425205230713, "learning_rate": 7.887499999999999e-05, "loss": 0.4847, "step": 15783 }, { "epoch": 0.8838615746444171, "grad_norm": 1.3097612857818604, "learning_rate": 7.888e-05, "loss": 0.489, "step": 15784 }, { "epoch": 0.8839175719565461, "grad_norm": 1.5259464979171753, "learning_rate": 7.888500000000001e-05, "loss": 0.6229, "step": 15785 }, { "epoch": 0.8839735692686751, "grad_norm": 1.48321533203125, "learning_rate": 7.889000000000001e-05, "loss": 0.4613, "step": 15786 }, { "epoch": 0.8840295665808041, "grad_norm": 1.4632923603057861, "learning_rate": 7.889500000000001e-05, "loss": 0.5773, "step": 15787 }, { "epoch": 0.8840855638929331, "grad_norm": 1.2408758401870728, "learning_rate": 7.890000000000001e-05, "loss": 0.4215, "step": 15788 }, { "epoch": 0.8841415612050622, "grad_norm": 1.5239912271499634, "learning_rate": 7.8905e-05, "loss": 0.404, "step": 15789 }, { "epoch": 0.8841975585171912, "grad_norm": 1.1883434057235718, "learning_rate": 7.891e-05, "loss": 0.4913, "step": 15790 }, { "epoch": 0.8842535558293202, "grad_norm": 1.1972849369049072, "learning_rate": 7.8915e-05, "loss": 0.3678, "step": 15791 }, { "epoch": 0.8843095531414492, "grad_norm": 1.5056698322296143, "learning_rate": 7.892000000000001e-05, "loss": 0.4903, "step": 15792 }, { "epoch": 0.8843655504535782, "grad_norm": 1.1644717454910278, "learning_rate": 7.892500000000001e-05, "loss": 0.3522, "step": 15793 }, { "epoch": 0.8844215477657072, "grad_norm": 1.3911675214767456, "learning_rate": 7.893e-05, "loss": 0.4528, "step": 15794 }, { "epoch": 0.8844775450778363, "grad_norm": 1.78517484664917, "learning_rate": 7.8935e-05, "loss": 0.6006, "step": 15795 }, { "epoch": 0.8845335423899653, "grad_norm": 1.4201488494873047, "learning_rate": 7.894e-05, "loss": 0.4007, "step": 15796 }, { "epoch": 0.8845895397020943, "grad_norm": 1.1176270246505737, "learning_rate": 7.8945e-05, "loss": 0.3761, "step": 15797 }, { "epoch": 0.8846455370142233, "grad_norm": 1.5850352048873901, "learning_rate": 7.895000000000001e-05, "loss": 0.5135, "step": 15798 }, { "epoch": 0.8847015343263523, "grad_norm": 1.4305088520050049, "learning_rate": 7.8955e-05, "loss": 0.5224, "step": 15799 }, { "epoch": 0.8847575316384814, "grad_norm": 1.3241698741912842, "learning_rate": 7.896e-05, "loss": 0.4852, "step": 15800 }, { "epoch": 0.8848135289506104, "grad_norm": 1.5602823495864868, "learning_rate": 7.8965e-05, "loss": 0.5188, "step": 15801 }, { "epoch": 0.8848695262627394, "grad_norm": 1.3524249792099, "learning_rate": 7.897e-05, "loss": 0.462, "step": 15802 }, { "epoch": 0.8849255235748684, "grad_norm": 1.0575517416000366, "learning_rate": 7.8975e-05, "loss": 0.3493, "step": 15803 }, { "epoch": 0.8849815208869974, "grad_norm": 1.414480447769165, "learning_rate": 7.897999999999999e-05, "loss": 0.4526, "step": 15804 }, { "epoch": 0.8850375181991265, "grad_norm": 1.4470492601394653, "learning_rate": 7.8985e-05, "loss": 0.3683, "step": 15805 }, { "epoch": 0.8850935155112555, "grad_norm": 1.4290904998779297, "learning_rate": 7.899000000000001e-05, "loss": 0.5473, "step": 15806 }, { "epoch": 0.8851495128233845, "grad_norm": 1.4914425611495972, "learning_rate": 7.899500000000001e-05, "loss": 0.5157, "step": 15807 }, { "epoch": 0.8852055101355135, "grad_norm": 2.0020601749420166, "learning_rate": 7.900000000000001e-05, "loss": 0.5264, "step": 15808 }, { "epoch": 0.8852615074476425, "grad_norm": 1.4715309143066406, "learning_rate": 7.9005e-05, "loss": 0.5628, "step": 15809 }, { "epoch": 0.8853175047597716, "grad_norm": 1.3492498397827148, "learning_rate": 7.901e-05, "loss": 0.5557, "step": 15810 }, { "epoch": 0.8853735020719006, "grad_norm": 1.3634693622589111, "learning_rate": 7.9015e-05, "loss": 0.4594, "step": 15811 }, { "epoch": 0.8854294993840296, "grad_norm": 1.4548414945602417, "learning_rate": 7.902e-05, "loss": 0.3877, "step": 15812 }, { "epoch": 0.8854854966961586, "grad_norm": 1.302975058555603, "learning_rate": 7.902500000000001e-05, "loss": 0.4062, "step": 15813 }, { "epoch": 0.8855414940082876, "grad_norm": 1.6429636478424072, "learning_rate": 7.903000000000001e-05, "loss": 0.644, "step": 15814 }, { "epoch": 0.8855974913204167, "grad_norm": 1.5866872072219849, "learning_rate": 7.9035e-05, "loss": 0.5736, "step": 15815 }, { "epoch": 0.8856534886325457, "grad_norm": 1.7080777883529663, "learning_rate": 7.904e-05, "loss": 0.529, "step": 15816 }, { "epoch": 0.8857094859446747, "grad_norm": 1.3886007070541382, "learning_rate": 7.9045e-05, "loss": 0.3237, "step": 15817 }, { "epoch": 0.8857654832568037, "grad_norm": 1.5394459962844849, "learning_rate": 7.905e-05, "loss": 0.4803, "step": 15818 }, { "epoch": 0.8858214805689327, "grad_norm": 1.3017991781234741, "learning_rate": 7.905500000000001e-05, "loss": 0.4171, "step": 15819 }, { "epoch": 0.8858774778810617, "grad_norm": 1.491532802581787, "learning_rate": 7.906e-05, "loss": 0.5411, "step": 15820 }, { "epoch": 0.8859334751931908, "grad_norm": 1.353472113609314, "learning_rate": 7.9065e-05, "loss": 0.4454, "step": 15821 }, { "epoch": 0.8859894725053198, "grad_norm": 18.59945297241211, "learning_rate": 7.907e-05, "loss": 0.472, "step": 15822 }, { "epoch": 0.8860454698174488, "grad_norm": 1.55799400806427, "learning_rate": 7.9075e-05, "loss": 0.514, "step": 15823 }, { "epoch": 0.8861014671295778, "grad_norm": 1.2203803062438965, "learning_rate": 7.908e-05, "loss": 0.4197, "step": 15824 }, { "epoch": 0.8861574644417068, "grad_norm": 1.3588980436325073, "learning_rate": 7.9085e-05, "loss": 0.3994, "step": 15825 }, { "epoch": 0.8862134617538359, "grad_norm": 1.366632342338562, "learning_rate": 7.909e-05, "loss": 0.4418, "step": 15826 }, { "epoch": 0.8862694590659649, "grad_norm": 1.4658832550048828, "learning_rate": 7.909500000000001e-05, "loss": 0.5261, "step": 15827 }, { "epoch": 0.8863254563780939, "grad_norm": 1.6701146364212036, "learning_rate": 7.910000000000001e-05, "loss": 0.4231, "step": 15828 }, { "epoch": 0.8863814536902229, "grad_norm": 1.3779981136322021, "learning_rate": 7.910500000000001e-05, "loss": 0.5005, "step": 15829 }, { "epoch": 0.8864374510023519, "grad_norm": 1.3617491722106934, "learning_rate": 7.911e-05, "loss": 0.4605, "step": 15830 }, { "epoch": 0.886493448314481, "grad_norm": 1.4876869916915894, "learning_rate": 7.9115e-05, "loss": 0.5499, "step": 15831 }, { "epoch": 0.88654944562661, "grad_norm": 1.2605429887771606, "learning_rate": 7.912e-05, "loss": 0.3691, "step": 15832 }, { "epoch": 0.8866054429387389, "grad_norm": 1.374754548072815, "learning_rate": 7.9125e-05, "loss": 0.395, "step": 15833 }, { "epoch": 0.8866614402508679, "grad_norm": 1.4355651140213013, "learning_rate": 7.913000000000001e-05, "loss": 0.4694, "step": 15834 }, { "epoch": 0.8867174375629969, "grad_norm": 2.087085723876953, "learning_rate": 7.913500000000001e-05, "loss": 0.4419, "step": 15835 }, { "epoch": 0.8867734348751259, "grad_norm": 1.4105737209320068, "learning_rate": 7.914e-05, "loss": 0.4611, "step": 15836 }, { "epoch": 0.886829432187255, "grad_norm": 1.5528287887573242, "learning_rate": 7.9145e-05, "loss": 0.4813, "step": 15837 }, { "epoch": 0.886885429499384, "grad_norm": 1.5225160121917725, "learning_rate": 7.915e-05, "loss": 0.3797, "step": 15838 }, { "epoch": 0.886941426811513, "grad_norm": 1.4016565084457397, "learning_rate": 7.9155e-05, "loss": 0.436, "step": 15839 }, { "epoch": 0.886997424123642, "grad_norm": 1.6190400123596191, "learning_rate": 7.916e-05, "loss": 0.6496, "step": 15840 }, { "epoch": 0.887053421435771, "grad_norm": 1.536169171333313, "learning_rate": 7.9165e-05, "loss": 0.491, "step": 15841 }, { "epoch": 0.8871094187479001, "grad_norm": 1.1836466789245605, "learning_rate": 7.917e-05, "loss": 0.3229, "step": 15842 }, { "epoch": 0.8871654160600291, "grad_norm": 1.2826460599899292, "learning_rate": 7.9175e-05, "loss": 0.3797, "step": 15843 }, { "epoch": 0.8872214133721581, "grad_norm": 1.3410890102386475, "learning_rate": 7.918e-05, "loss": 0.4594, "step": 15844 }, { "epoch": 0.8872774106842871, "grad_norm": 1.5151267051696777, "learning_rate": 7.918500000000001e-05, "loss": 0.6094, "step": 15845 }, { "epoch": 0.8873334079964161, "grad_norm": 1.3532662391662598, "learning_rate": 7.919e-05, "loss": 0.552, "step": 15846 }, { "epoch": 0.8873894053085452, "grad_norm": 1.1911065578460693, "learning_rate": 7.9195e-05, "loss": 0.3841, "step": 15847 }, { "epoch": 0.8874454026206742, "grad_norm": 1.689738392829895, "learning_rate": 7.920000000000001e-05, "loss": 0.4415, "step": 15848 }, { "epoch": 0.8875013999328032, "grad_norm": 1.3184703588485718, "learning_rate": 7.920500000000001e-05, "loss": 0.4592, "step": 15849 }, { "epoch": 0.8875573972449322, "grad_norm": 1.3035863637924194, "learning_rate": 7.921000000000001e-05, "loss": 0.3915, "step": 15850 }, { "epoch": 0.8876133945570612, "grad_norm": 1.5418294668197632, "learning_rate": 7.9215e-05, "loss": 0.4651, "step": 15851 }, { "epoch": 0.8876693918691902, "grad_norm": 1.2926342487335205, "learning_rate": 7.922e-05, "loss": 0.3957, "step": 15852 }, { "epoch": 0.8877253891813193, "grad_norm": 1.4467759132385254, "learning_rate": 7.9225e-05, "loss": 0.4977, "step": 15853 }, { "epoch": 0.8877813864934483, "grad_norm": 1.3778395652770996, "learning_rate": 7.923e-05, "loss": 0.4759, "step": 15854 }, { "epoch": 0.8878373838055773, "grad_norm": 1.3714171648025513, "learning_rate": 7.923500000000001e-05, "loss": 0.3592, "step": 15855 }, { "epoch": 0.8878933811177063, "grad_norm": 1.3562978506088257, "learning_rate": 7.924000000000001e-05, "loss": 0.4602, "step": 15856 }, { "epoch": 0.8879493784298353, "grad_norm": 1.3002963066101074, "learning_rate": 7.9245e-05, "loss": 0.4336, "step": 15857 }, { "epoch": 0.8880053757419644, "grad_norm": 1.0840893983840942, "learning_rate": 7.925e-05, "loss": 0.4134, "step": 15858 }, { "epoch": 0.8880613730540934, "grad_norm": 1.65232515335083, "learning_rate": 7.9255e-05, "loss": 0.6223, "step": 15859 }, { "epoch": 0.8881173703662224, "grad_norm": 1.4145983457565308, "learning_rate": 7.926e-05, "loss": 0.5579, "step": 15860 }, { "epoch": 0.8881733676783514, "grad_norm": 1.2491377592086792, "learning_rate": 7.9265e-05, "loss": 0.4983, "step": 15861 }, { "epoch": 0.8882293649904804, "grad_norm": 1.2925677299499512, "learning_rate": 7.927e-05, "loss": 0.4141, "step": 15862 }, { "epoch": 0.8882853623026095, "grad_norm": 1.2815871238708496, "learning_rate": 7.9275e-05, "loss": 0.3611, "step": 15863 }, { "epoch": 0.8883413596147385, "grad_norm": 1.209580421447754, "learning_rate": 7.928e-05, "loss": 0.3394, "step": 15864 }, { "epoch": 0.8883973569268675, "grad_norm": 1.3125176429748535, "learning_rate": 7.928500000000001e-05, "loss": 0.4889, "step": 15865 }, { "epoch": 0.8884533542389965, "grad_norm": 1.1995253562927246, "learning_rate": 7.929000000000001e-05, "loss": 0.4111, "step": 15866 }, { "epoch": 0.8885093515511255, "grad_norm": 1.5066097974777222, "learning_rate": 7.9295e-05, "loss": 0.3786, "step": 15867 }, { "epoch": 0.8885653488632546, "grad_norm": 1.250152349472046, "learning_rate": 7.93e-05, "loss": 0.3288, "step": 15868 }, { "epoch": 0.8886213461753836, "grad_norm": 1.4152107238769531, "learning_rate": 7.9305e-05, "loss": 0.5992, "step": 15869 }, { "epoch": 0.8886773434875126, "grad_norm": 2.087266683578491, "learning_rate": 7.931000000000001e-05, "loss": 0.4672, "step": 15870 }, { "epoch": 0.8887333407996416, "grad_norm": 1.2819197177886963, "learning_rate": 7.931500000000001e-05, "loss": 0.3629, "step": 15871 }, { "epoch": 0.8887893381117706, "grad_norm": 1.2254847288131714, "learning_rate": 7.932e-05, "loss": 0.4562, "step": 15872 }, { "epoch": 0.8888453354238997, "grad_norm": 1.9349656105041504, "learning_rate": 7.9325e-05, "loss": 0.7662, "step": 15873 }, { "epoch": 0.8889013327360287, "grad_norm": 3.4781572818756104, "learning_rate": 7.933e-05, "loss": 0.4305, "step": 15874 }, { "epoch": 0.8889573300481577, "grad_norm": 1.4061557054519653, "learning_rate": 7.9335e-05, "loss": 0.5388, "step": 15875 }, { "epoch": 0.8890133273602867, "grad_norm": 1.6819357872009277, "learning_rate": 7.934000000000001e-05, "loss": 0.5555, "step": 15876 }, { "epoch": 0.8890693246724157, "grad_norm": 1.3523751497268677, "learning_rate": 7.934500000000001e-05, "loss": 0.3901, "step": 15877 }, { "epoch": 0.8891253219845447, "grad_norm": 1.488936185836792, "learning_rate": 7.935e-05, "loss": 0.6156, "step": 15878 }, { "epoch": 0.8891813192966738, "grad_norm": 1.2007721662521362, "learning_rate": 7.9355e-05, "loss": 0.3833, "step": 15879 }, { "epoch": 0.8892373166088028, "grad_norm": 1.336026668548584, "learning_rate": 7.936e-05, "loss": 0.5105, "step": 15880 }, { "epoch": 0.8892933139209318, "grad_norm": 1.1636326313018799, "learning_rate": 7.9365e-05, "loss": 0.4252, "step": 15881 }, { "epoch": 0.8893493112330608, "grad_norm": 1.8537273406982422, "learning_rate": 7.937e-05, "loss": 0.4205, "step": 15882 }, { "epoch": 0.8894053085451898, "grad_norm": 1.2801110744476318, "learning_rate": 7.9375e-05, "loss": 0.6122, "step": 15883 }, { "epoch": 0.8894613058573189, "grad_norm": 1.5170996189117432, "learning_rate": 7.938e-05, "loss": 0.439, "step": 15884 }, { "epoch": 0.8895173031694479, "grad_norm": 1.350251317024231, "learning_rate": 7.9385e-05, "loss": 0.4352, "step": 15885 }, { "epoch": 0.8895733004815769, "grad_norm": 1.1521936655044556, "learning_rate": 7.939000000000001e-05, "loss": 0.4359, "step": 15886 }, { "epoch": 0.8896292977937059, "grad_norm": 1.2937862873077393, "learning_rate": 7.939500000000001e-05, "loss": 0.3774, "step": 15887 }, { "epoch": 0.8896852951058349, "grad_norm": 1.3156973123550415, "learning_rate": 7.94e-05, "loss": 0.4469, "step": 15888 }, { "epoch": 0.889741292417964, "grad_norm": 1.879453420639038, "learning_rate": 7.9405e-05, "loss": 0.5854, "step": 15889 }, { "epoch": 0.889797289730093, "grad_norm": 18.626432418823242, "learning_rate": 7.941e-05, "loss": 0.4712, "step": 15890 }, { "epoch": 0.889853287042222, "grad_norm": 1.63935387134552, "learning_rate": 7.941500000000001e-05, "loss": 0.435, "step": 15891 }, { "epoch": 0.889909284354351, "grad_norm": 1.7789877653121948, "learning_rate": 7.942000000000001e-05, "loss": 0.4656, "step": 15892 }, { "epoch": 0.88996528166648, "grad_norm": 1.30344557762146, "learning_rate": 7.9425e-05, "loss": 0.4466, "step": 15893 }, { "epoch": 0.890021278978609, "grad_norm": 1.3301328420639038, "learning_rate": 7.943e-05, "loss": 0.4277, "step": 15894 }, { "epoch": 0.8900772762907381, "grad_norm": 1.4361047744750977, "learning_rate": 7.9435e-05, "loss": 0.4576, "step": 15895 }, { "epoch": 0.8901332736028671, "grad_norm": 1.6864228248596191, "learning_rate": 7.944e-05, "loss": 0.3797, "step": 15896 }, { "epoch": 0.8901892709149961, "grad_norm": 1.6416587829589844, "learning_rate": 7.944500000000001e-05, "loss": 0.4188, "step": 15897 }, { "epoch": 0.8902452682271251, "grad_norm": 1.5706303119659424, "learning_rate": 7.945e-05, "loss": 0.4691, "step": 15898 }, { "epoch": 0.8903012655392541, "grad_norm": 1.7088900804519653, "learning_rate": 7.9455e-05, "loss": 0.6489, "step": 15899 }, { "epoch": 0.8903572628513832, "grad_norm": 1.3953651189804077, "learning_rate": 7.946e-05, "loss": 0.4973, "step": 15900 }, { "epoch": 0.8904132601635122, "grad_norm": 1.3055979013442993, "learning_rate": 7.9465e-05, "loss": 0.3984, "step": 15901 }, { "epoch": 0.8904692574756412, "grad_norm": 1.4346344470977783, "learning_rate": 7.947e-05, "loss": 0.5526, "step": 15902 }, { "epoch": 0.8905252547877702, "grad_norm": 1.500628113746643, "learning_rate": 7.9475e-05, "loss": 0.4486, "step": 15903 }, { "epoch": 0.8905812520998992, "grad_norm": 1.1303764581680298, "learning_rate": 7.948e-05, "loss": 0.3409, "step": 15904 }, { "epoch": 0.8906372494120283, "grad_norm": 1.253481149673462, "learning_rate": 7.9485e-05, "loss": 0.4313, "step": 15905 }, { "epoch": 0.8906932467241573, "grad_norm": 1.2080345153808594, "learning_rate": 7.949000000000001e-05, "loss": 0.4027, "step": 15906 }, { "epoch": 0.8907492440362863, "grad_norm": 1.382400393486023, "learning_rate": 7.949500000000001e-05, "loss": 0.4287, "step": 15907 }, { "epoch": 0.8908052413484153, "grad_norm": 1.2246397733688354, "learning_rate": 7.950000000000001e-05, "loss": 0.4408, "step": 15908 }, { "epoch": 0.8908612386605443, "grad_norm": 1.305453896522522, "learning_rate": 7.9505e-05, "loss": 0.4523, "step": 15909 }, { "epoch": 0.8909172359726734, "grad_norm": 1.3066540956497192, "learning_rate": 7.951e-05, "loss": 0.3914, "step": 15910 }, { "epoch": 0.8909732332848024, "grad_norm": 1.8098177909851074, "learning_rate": 7.9515e-05, "loss": 0.5402, "step": 15911 }, { "epoch": 0.8910292305969314, "grad_norm": 1.3305491209030151, "learning_rate": 7.952000000000001e-05, "loss": 0.4655, "step": 15912 }, { "epoch": 0.8910852279090604, "grad_norm": 1.63957941532135, "learning_rate": 7.952500000000001e-05, "loss": 0.3788, "step": 15913 }, { "epoch": 0.8911412252211894, "grad_norm": 1.2962210178375244, "learning_rate": 7.953e-05, "loss": 0.5002, "step": 15914 }, { "epoch": 0.8911972225333185, "grad_norm": 1.2458339929580688, "learning_rate": 7.9535e-05, "loss": 0.3197, "step": 15915 }, { "epoch": 0.8912532198454474, "grad_norm": 1.050899863243103, "learning_rate": 7.954e-05, "loss": 0.3315, "step": 15916 }, { "epoch": 0.8913092171575764, "grad_norm": 1.336843490600586, "learning_rate": 7.9545e-05, "loss": 0.4845, "step": 15917 }, { "epoch": 0.8913652144697054, "grad_norm": 1.130553126335144, "learning_rate": 7.955e-05, "loss": 0.3586, "step": 15918 }, { "epoch": 0.8914212117818344, "grad_norm": 1.126340389251709, "learning_rate": 7.9555e-05, "loss": 0.3741, "step": 15919 }, { "epoch": 0.8914772090939634, "grad_norm": 1.239076018333435, "learning_rate": 7.956e-05, "loss": 0.4854, "step": 15920 }, { "epoch": 0.8915332064060925, "grad_norm": 29.588964462280273, "learning_rate": 7.9565e-05, "loss": 0.4257, "step": 15921 }, { "epoch": 0.8915892037182215, "grad_norm": 1.1070128679275513, "learning_rate": 7.957e-05, "loss": 0.3545, "step": 15922 }, { "epoch": 0.8916452010303505, "grad_norm": 1.6583384275436401, "learning_rate": 7.9575e-05, "loss": 0.4977, "step": 15923 }, { "epoch": 0.8917011983424795, "grad_norm": 1.3087083101272583, "learning_rate": 7.958e-05, "loss": 0.5173, "step": 15924 }, { "epoch": 0.8917571956546085, "grad_norm": 1.3332713842391968, "learning_rate": 7.9585e-05, "loss": 0.5326, "step": 15925 }, { "epoch": 0.8918131929667376, "grad_norm": 1.3929810523986816, "learning_rate": 7.959000000000002e-05, "loss": 0.4613, "step": 15926 }, { "epoch": 0.8918691902788666, "grad_norm": 1.5982316732406616, "learning_rate": 7.959500000000001e-05, "loss": 0.4301, "step": 15927 }, { "epoch": 0.8919251875909956, "grad_norm": 1.7531770467758179, "learning_rate": 7.960000000000001e-05, "loss": 0.4654, "step": 15928 }, { "epoch": 0.8919811849031246, "grad_norm": 1.43354070186615, "learning_rate": 7.960500000000001e-05, "loss": 0.4953, "step": 15929 }, { "epoch": 0.8920371822152536, "grad_norm": 1.5353807210922241, "learning_rate": 7.961e-05, "loss": 0.4647, "step": 15930 }, { "epoch": 0.8920931795273827, "grad_norm": 1.599859356880188, "learning_rate": 7.9615e-05, "loss": 0.4708, "step": 15931 }, { "epoch": 0.8921491768395117, "grad_norm": 1.3043773174285889, "learning_rate": 7.962e-05, "loss": 0.4345, "step": 15932 }, { "epoch": 0.8922051741516407, "grad_norm": 1.63169527053833, "learning_rate": 7.962500000000001e-05, "loss": 0.4663, "step": 15933 }, { "epoch": 0.8922611714637697, "grad_norm": 2.183722734451294, "learning_rate": 7.963000000000001e-05, "loss": 0.4604, "step": 15934 }, { "epoch": 0.8923171687758987, "grad_norm": 1.592522144317627, "learning_rate": 7.9635e-05, "loss": 0.5142, "step": 15935 }, { "epoch": 0.8923731660880277, "grad_norm": 1.4822566509246826, "learning_rate": 7.964e-05, "loss": 0.5215, "step": 15936 }, { "epoch": 0.8924291634001568, "grad_norm": 1.3690813779830933, "learning_rate": 7.9645e-05, "loss": 0.5041, "step": 15937 }, { "epoch": 0.8924851607122858, "grad_norm": 1.424587607383728, "learning_rate": 7.965e-05, "loss": 0.4089, "step": 15938 }, { "epoch": 0.8925411580244148, "grad_norm": 1.2622556686401367, "learning_rate": 7.9655e-05, "loss": 0.3578, "step": 15939 }, { "epoch": 0.8925971553365438, "grad_norm": 1.181199550628662, "learning_rate": 7.966e-05, "loss": 0.3952, "step": 15940 }, { "epoch": 0.8926531526486728, "grad_norm": 1.1826465129852295, "learning_rate": 7.9665e-05, "loss": 0.4911, "step": 15941 }, { "epoch": 0.8927091499608019, "grad_norm": 1.2569098472595215, "learning_rate": 7.967e-05, "loss": 0.4143, "step": 15942 }, { "epoch": 0.8927651472729309, "grad_norm": 1.435709834098816, "learning_rate": 7.9675e-05, "loss": 0.4349, "step": 15943 }, { "epoch": 0.8928211445850599, "grad_norm": 1.2740654945373535, "learning_rate": 7.968e-05, "loss": 0.425, "step": 15944 }, { "epoch": 0.8928771418971889, "grad_norm": 1.3047778606414795, "learning_rate": 7.9685e-05, "loss": 0.3786, "step": 15945 }, { "epoch": 0.8929331392093179, "grad_norm": 1.2718031406402588, "learning_rate": 7.969e-05, "loss": 0.3694, "step": 15946 }, { "epoch": 0.892989136521447, "grad_norm": 1.4117116928100586, "learning_rate": 7.9695e-05, "loss": 0.5094, "step": 15947 }, { "epoch": 0.893045133833576, "grad_norm": 1.591858983039856, "learning_rate": 7.970000000000001e-05, "loss": 0.5699, "step": 15948 }, { "epoch": 0.893101131145705, "grad_norm": 1.3295998573303223, "learning_rate": 7.970500000000001e-05, "loss": 0.3798, "step": 15949 }, { "epoch": 0.893157128457834, "grad_norm": 1.3497673273086548, "learning_rate": 7.971000000000001e-05, "loss": 0.3794, "step": 15950 }, { "epoch": 0.893213125769963, "grad_norm": 1.523391604423523, "learning_rate": 7.9715e-05, "loss": 0.5808, "step": 15951 }, { "epoch": 0.893269123082092, "grad_norm": 1.3123940229415894, "learning_rate": 7.972e-05, "loss": 0.4704, "step": 15952 }, { "epoch": 0.8933251203942211, "grad_norm": 1.4037657976150513, "learning_rate": 7.9725e-05, "loss": 0.4478, "step": 15953 }, { "epoch": 0.8933811177063501, "grad_norm": 21.667104721069336, "learning_rate": 7.973000000000001e-05, "loss": 0.3895, "step": 15954 }, { "epoch": 0.8934371150184791, "grad_norm": 1.4874041080474854, "learning_rate": 7.973500000000001e-05, "loss": 0.4544, "step": 15955 }, { "epoch": 0.8934931123306081, "grad_norm": 1.2745970487594604, "learning_rate": 7.974e-05, "loss": 0.3299, "step": 15956 }, { "epoch": 0.8935491096427371, "grad_norm": 1.364397644996643, "learning_rate": 7.9745e-05, "loss": 0.5139, "step": 15957 }, { "epoch": 0.8936051069548662, "grad_norm": 1.5993179082870483, "learning_rate": 7.975e-05, "loss": 0.4982, "step": 15958 }, { "epoch": 0.8936611042669952, "grad_norm": 1.1834697723388672, "learning_rate": 7.9755e-05, "loss": 0.3477, "step": 15959 }, { "epoch": 0.8937171015791242, "grad_norm": 1.264442801475525, "learning_rate": 7.976e-05, "loss": 0.3819, "step": 15960 }, { "epoch": 0.8937730988912532, "grad_norm": 1.2564032077789307, "learning_rate": 7.9765e-05, "loss": 0.3424, "step": 15961 }, { "epoch": 0.8938290962033822, "grad_norm": 1.2035491466522217, "learning_rate": 7.977e-05, "loss": 0.361, "step": 15962 }, { "epoch": 0.8938850935155113, "grad_norm": 1.5151845216751099, "learning_rate": 7.9775e-05, "loss": 0.3303, "step": 15963 }, { "epoch": 0.8939410908276403, "grad_norm": 1.3209103345870972, "learning_rate": 7.978e-05, "loss": 0.468, "step": 15964 }, { "epoch": 0.8939970881397693, "grad_norm": 1.5421687364578247, "learning_rate": 7.9785e-05, "loss": 0.5283, "step": 15965 }, { "epoch": 0.8940530854518983, "grad_norm": 1.4798215627670288, "learning_rate": 7.979000000000001e-05, "loss": 0.4598, "step": 15966 }, { "epoch": 0.8941090827640273, "grad_norm": 1.294493556022644, "learning_rate": 7.9795e-05, "loss": 0.5248, "step": 15967 }, { "epoch": 0.8941650800761564, "grad_norm": 1.600492000579834, "learning_rate": 7.98e-05, "loss": 0.5206, "step": 15968 }, { "epoch": 0.8942210773882854, "grad_norm": 1.415737509727478, "learning_rate": 7.980500000000001e-05, "loss": 0.4547, "step": 15969 }, { "epoch": 0.8942770747004144, "grad_norm": 1.2604701519012451, "learning_rate": 7.981000000000001e-05, "loss": 0.4808, "step": 15970 }, { "epoch": 0.8943330720125434, "grad_norm": 1.2768656015396118, "learning_rate": 7.981500000000001e-05, "loss": 0.3808, "step": 15971 }, { "epoch": 0.8943890693246724, "grad_norm": 1.1699621677398682, "learning_rate": 7.982e-05, "loss": 0.366, "step": 15972 }, { "epoch": 0.8944450666368015, "grad_norm": 1.4129713773727417, "learning_rate": 7.9825e-05, "loss": 0.4452, "step": 15973 }, { "epoch": 0.8945010639489305, "grad_norm": 1.3117674589157104, "learning_rate": 7.983e-05, "loss": 0.4753, "step": 15974 }, { "epoch": 0.8945570612610595, "grad_norm": 1.3465110063552856, "learning_rate": 7.983500000000001e-05, "loss": 0.468, "step": 15975 }, { "epoch": 0.8946130585731885, "grad_norm": 1.24776291847229, "learning_rate": 7.984000000000001e-05, "loss": 0.437, "step": 15976 }, { "epoch": 0.8946690558853175, "grad_norm": 1.4752442836761475, "learning_rate": 7.9845e-05, "loss": 0.3954, "step": 15977 }, { "epoch": 0.8947250531974466, "grad_norm": 1.5850889682769775, "learning_rate": 7.985e-05, "loss": 0.4348, "step": 15978 }, { "epoch": 0.8947810505095756, "grad_norm": 1.7630277872085571, "learning_rate": 7.9855e-05, "loss": 0.3952, "step": 15979 }, { "epoch": 0.8948370478217046, "grad_norm": 1.4778448343276978, "learning_rate": 7.986e-05, "loss": 0.6347, "step": 15980 }, { "epoch": 0.8948930451338336, "grad_norm": 1.1664304733276367, "learning_rate": 7.9865e-05, "loss": 0.3586, "step": 15981 }, { "epoch": 0.8949490424459626, "grad_norm": 1.3973556756973267, "learning_rate": 7.987e-05, "loss": 0.4527, "step": 15982 }, { "epoch": 0.8950050397580916, "grad_norm": 1.5247793197631836, "learning_rate": 7.9875e-05, "loss": 0.4154, "step": 15983 }, { "epoch": 0.8950610370702207, "grad_norm": 1.149987816810608, "learning_rate": 7.988e-05, "loss": 0.3322, "step": 15984 }, { "epoch": 0.8951170343823497, "grad_norm": 1.3923696279525757, "learning_rate": 7.9885e-05, "loss": 0.4424, "step": 15985 }, { "epoch": 0.8951730316944787, "grad_norm": 1.550440788269043, "learning_rate": 7.989000000000001e-05, "loss": 0.3841, "step": 15986 }, { "epoch": 0.8952290290066077, "grad_norm": 1.2467762231826782, "learning_rate": 7.9895e-05, "loss": 0.5213, "step": 15987 }, { "epoch": 0.8952850263187367, "grad_norm": 1.3135510683059692, "learning_rate": 7.99e-05, "loss": 0.4367, "step": 15988 }, { "epoch": 0.8953410236308658, "grad_norm": 1.4266338348388672, "learning_rate": 7.9905e-05, "loss": 0.5519, "step": 15989 }, { "epoch": 0.8953970209429948, "grad_norm": 1.6020103693008423, "learning_rate": 7.991000000000001e-05, "loss": 0.4317, "step": 15990 }, { "epoch": 0.8954530182551238, "grad_norm": 1.1448312997817993, "learning_rate": 7.991500000000001e-05, "loss": 0.3692, "step": 15991 }, { "epoch": 0.8955090155672528, "grad_norm": 1.4078099727630615, "learning_rate": 7.992000000000001e-05, "loss": 0.5585, "step": 15992 }, { "epoch": 0.8955650128793818, "grad_norm": 1.2579874992370605, "learning_rate": 7.9925e-05, "loss": 0.4945, "step": 15993 }, { "epoch": 0.8956210101915109, "grad_norm": 1.5178617238998413, "learning_rate": 7.993e-05, "loss": 0.524, "step": 15994 }, { "epoch": 0.8956770075036399, "grad_norm": 1.4215368032455444, "learning_rate": 7.9935e-05, "loss": 0.4524, "step": 15995 }, { "epoch": 0.8957330048157689, "grad_norm": 1.0902271270751953, "learning_rate": 7.994000000000001e-05, "loss": 0.3212, "step": 15996 }, { "epoch": 0.8957890021278979, "grad_norm": 1.4094494581222534, "learning_rate": 7.994500000000001e-05, "loss": 0.4249, "step": 15997 }, { "epoch": 0.8958449994400268, "grad_norm": 1.415575623512268, "learning_rate": 7.995e-05, "loss": 0.4705, "step": 15998 }, { "epoch": 0.8959009967521558, "grad_norm": 1.1373982429504395, "learning_rate": 7.9955e-05, "loss": 0.3789, "step": 15999 }, { "epoch": 0.8959569940642849, "grad_norm": 1.6810451745986938, "learning_rate": 7.996e-05, "loss": 0.7872, "step": 16000 }, { "epoch": 0.8960129913764139, "grad_norm": 1.4576246738433838, "learning_rate": 7.9965e-05, "loss": 0.361, "step": 16001 }, { "epoch": 0.8960689886885429, "grad_norm": 3.6870405673980713, "learning_rate": 7.997e-05, "loss": 0.4053, "step": 16002 }, { "epoch": 0.8961249860006719, "grad_norm": 1.5046871900558472, "learning_rate": 7.9975e-05, "loss": 0.4212, "step": 16003 }, { "epoch": 0.8961809833128009, "grad_norm": 1.788367509841919, "learning_rate": 7.998e-05, "loss": 0.5489, "step": 16004 }, { "epoch": 0.89623698062493, "grad_norm": 1.2205194234848022, "learning_rate": 7.9985e-05, "loss": 0.3749, "step": 16005 }, { "epoch": 0.896292977937059, "grad_norm": 1.5499311685562134, "learning_rate": 7.999000000000001e-05, "loss": 0.4994, "step": 16006 }, { "epoch": 0.896348975249188, "grad_norm": 1.4454174041748047, "learning_rate": 7.999500000000001e-05, "loss": 0.588, "step": 16007 }, { "epoch": 0.896404972561317, "grad_norm": 1.3178184032440186, "learning_rate": 8e-05, "loss": 0.4279, "step": 16008 }, { "epoch": 0.896460969873446, "grad_norm": 1.215785264968872, "learning_rate": 8.0005e-05, "loss": 0.5083, "step": 16009 }, { "epoch": 0.896516967185575, "grad_norm": 1.4268912076950073, "learning_rate": 8.001e-05, "loss": 0.4572, "step": 16010 }, { "epoch": 0.8965729644977041, "grad_norm": 1.2209932804107666, "learning_rate": 8.001500000000001e-05, "loss": 0.3982, "step": 16011 }, { "epoch": 0.8966289618098331, "grad_norm": 1.4563535451889038, "learning_rate": 8.002000000000001e-05, "loss": 0.4739, "step": 16012 }, { "epoch": 0.8966849591219621, "grad_norm": 1.3663113117218018, "learning_rate": 8.002500000000001e-05, "loss": 0.4169, "step": 16013 }, { "epoch": 0.8967409564340911, "grad_norm": 1.5242372751235962, "learning_rate": 8.003e-05, "loss": 0.4739, "step": 16014 }, { "epoch": 0.8967969537462201, "grad_norm": 1.2546685934066772, "learning_rate": 8.0035e-05, "loss": 0.423, "step": 16015 }, { "epoch": 0.8968529510583492, "grad_norm": 1.2493762969970703, "learning_rate": 8.004e-05, "loss": 0.4325, "step": 16016 }, { "epoch": 0.8969089483704782, "grad_norm": 1.390856146812439, "learning_rate": 8.0045e-05, "loss": 0.5716, "step": 16017 }, { "epoch": 0.8969649456826072, "grad_norm": 1.460411787033081, "learning_rate": 8.005000000000001e-05, "loss": 0.4483, "step": 16018 }, { "epoch": 0.8970209429947362, "grad_norm": 1.5322096347808838, "learning_rate": 8.0055e-05, "loss": 0.4411, "step": 16019 }, { "epoch": 0.8970769403068652, "grad_norm": 1.2709147930145264, "learning_rate": 8.006e-05, "loss": 0.5024, "step": 16020 }, { "epoch": 0.8971329376189943, "grad_norm": 1.3853907585144043, "learning_rate": 8.0065e-05, "loss": 0.4838, "step": 16021 }, { "epoch": 0.8971889349311233, "grad_norm": 1.5160138607025146, "learning_rate": 8.007e-05, "loss": 0.4062, "step": 16022 }, { "epoch": 0.8972449322432523, "grad_norm": 1.3848271369934082, "learning_rate": 8.0075e-05, "loss": 0.4543, "step": 16023 }, { "epoch": 0.8973009295553813, "grad_norm": 1.159733772277832, "learning_rate": 8.008e-05, "loss": 0.4621, "step": 16024 }, { "epoch": 0.8973569268675103, "grad_norm": 1.551933765411377, "learning_rate": 8.0085e-05, "loss": 0.5854, "step": 16025 }, { "epoch": 0.8974129241796394, "grad_norm": 1.224943995475769, "learning_rate": 8.009e-05, "loss": 0.4938, "step": 16026 }, { "epoch": 0.8974689214917684, "grad_norm": 1.6137648820877075, "learning_rate": 8.009500000000001e-05, "loss": 0.5849, "step": 16027 }, { "epoch": 0.8975249188038974, "grad_norm": 1.6982605457305908, "learning_rate": 8.010000000000001e-05, "loss": 0.5129, "step": 16028 }, { "epoch": 0.8975809161160264, "grad_norm": 1.256736397743225, "learning_rate": 8.0105e-05, "loss": 0.5327, "step": 16029 }, { "epoch": 0.8976369134281554, "grad_norm": 1.533632755279541, "learning_rate": 8.011e-05, "loss": 0.5459, "step": 16030 }, { "epoch": 0.8976929107402845, "grad_norm": 1.3175910711288452, "learning_rate": 8.0115e-05, "loss": 0.5595, "step": 16031 }, { "epoch": 0.8977489080524135, "grad_norm": 1.1984654664993286, "learning_rate": 8.012000000000001e-05, "loss": 0.3818, "step": 16032 }, { "epoch": 0.8978049053645425, "grad_norm": 1.3063366413116455, "learning_rate": 8.012500000000001e-05, "loss": 0.3868, "step": 16033 }, { "epoch": 0.8978609026766715, "grad_norm": 1.4270581007003784, "learning_rate": 8.013000000000001e-05, "loss": 0.4259, "step": 16034 }, { "epoch": 0.8979168999888005, "grad_norm": 1.5793156623840332, "learning_rate": 8.0135e-05, "loss": 0.4375, "step": 16035 }, { "epoch": 0.8979728973009296, "grad_norm": 1.4425928592681885, "learning_rate": 8.014e-05, "loss": 0.5081, "step": 16036 }, { "epoch": 0.8980288946130586, "grad_norm": 1.380345344543457, "learning_rate": 8.0145e-05, "loss": 0.5136, "step": 16037 }, { "epoch": 0.8980848919251876, "grad_norm": 1.4106237888336182, "learning_rate": 8.015e-05, "loss": 0.4984, "step": 16038 }, { "epoch": 0.8981408892373166, "grad_norm": 1.3798160552978516, "learning_rate": 8.015500000000001e-05, "loss": 0.4104, "step": 16039 }, { "epoch": 0.8981968865494456, "grad_norm": 1.4829031229019165, "learning_rate": 8.016e-05, "loss": 0.4247, "step": 16040 }, { "epoch": 0.8982528838615746, "grad_norm": 1.0722377300262451, "learning_rate": 8.0165e-05, "loss": 0.3371, "step": 16041 }, { "epoch": 0.8983088811737037, "grad_norm": 1.2194339036941528, "learning_rate": 8.017e-05, "loss": 0.4685, "step": 16042 }, { "epoch": 0.8983648784858327, "grad_norm": 1.2947598695755005, "learning_rate": 8.0175e-05, "loss": 0.3746, "step": 16043 }, { "epoch": 0.8984208757979617, "grad_norm": 1.1339155435562134, "learning_rate": 8.018e-05, "loss": 0.4044, "step": 16044 }, { "epoch": 0.8984768731100907, "grad_norm": 1.5063127279281616, "learning_rate": 8.0185e-05, "loss": 0.4086, "step": 16045 }, { "epoch": 0.8985328704222197, "grad_norm": 1.4349595308303833, "learning_rate": 8.019e-05, "loss": 0.5806, "step": 16046 }, { "epoch": 0.8985888677343488, "grad_norm": 1.2671962976455688, "learning_rate": 8.019500000000001e-05, "loss": 0.3964, "step": 16047 }, { "epoch": 0.8986448650464778, "grad_norm": 2.124438524246216, "learning_rate": 8.020000000000001e-05, "loss": 0.4742, "step": 16048 }, { "epoch": 0.8987008623586068, "grad_norm": 1.3917243480682373, "learning_rate": 8.020500000000001e-05, "loss": 0.5335, "step": 16049 }, { "epoch": 0.8987568596707358, "grad_norm": 1.321627140045166, "learning_rate": 8.021e-05, "loss": 0.4422, "step": 16050 }, { "epoch": 0.8988128569828648, "grad_norm": 1.4380525350570679, "learning_rate": 8.0215e-05, "loss": 0.4308, "step": 16051 }, { "epoch": 0.8988688542949939, "grad_norm": 1.2992310523986816, "learning_rate": 8.022e-05, "loss": 0.4118, "step": 16052 }, { "epoch": 0.8989248516071229, "grad_norm": 1.675029993057251, "learning_rate": 8.022500000000001e-05, "loss": 0.6676, "step": 16053 }, { "epoch": 0.8989808489192519, "grad_norm": 1.4144927263259888, "learning_rate": 8.023000000000001e-05, "loss": 0.3992, "step": 16054 }, { "epoch": 0.8990368462313809, "grad_norm": 1.5401591062545776, "learning_rate": 8.023500000000001e-05, "loss": 0.4584, "step": 16055 }, { "epoch": 0.8990928435435099, "grad_norm": 1.4692164659500122, "learning_rate": 8.024e-05, "loss": 0.5731, "step": 16056 }, { "epoch": 0.899148840855639, "grad_norm": 1.1132906675338745, "learning_rate": 8.0245e-05, "loss": 0.3796, "step": 16057 }, { "epoch": 0.899204838167768, "grad_norm": 1.2593834400177002, "learning_rate": 8.025e-05, "loss": 0.4325, "step": 16058 }, { "epoch": 0.899260835479897, "grad_norm": 1.2930701971054077, "learning_rate": 8.0255e-05, "loss": 0.4755, "step": 16059 }, { "epoch": 0.899316832792026, "grad_norm": 1.2894867658615112, "learning_rate": 8.026000000000001e-05, "loss": 0.6381, "step": 16060 }, { "epoch": 0.899372830104155, "grad_norm": 1.3451348543167114, "learning_rate": 8.0265e-05, "loss": 0.4582, "step": 16061 }, { "epoch": 0.899428827416284, "grad_norm": 1.569953441619873, "learning_rate": 8.027e-05, "loss": 0.4933, "step": 16062 }, { "epoch": 0.8994848247284131, "grad_norm": 1.1205308437347412, "learning_rate": 8.0275e-05, "loss": 0.329, "step": 16063 }, { "epoch": 0.8995408220405421, "grad_norm": 1.2865349054336548, "learning_rate": 8.028e-05, "loss": 0.4729, "step": 16064 }, { "epoch": 0.8995968193526711, "grad_norm": 1.3408750295639038, "learning_rate": 8.0285e-05, "loss": 0.4201, "step": 16065 }, { "epoch": 0.8996528166648001, "grad_norm": 1.244759440422058, "learning_rate": 8.028999999999999e-05, "loss": 0.4149, "step": 16066 }, { "epoch": 0.8997088139769291, "grad_norm": 1.2448692321777344, "learning_rate": 8.0295e-05, "loss": 0.3632, "step": 16067 }, { "epoch": 0.8997648112890582, "grad_norm": 1.2463021278381348, "learning_rate": 8.030000000000001e-05, "loss": 0.4785, "step": 16068 }, { "epoch": 0.8998208086011872, "grad_norm": 1.367929458618164, "learning_rate": 8.030500000000001e-05, "loss": 0.4811, "step": 16069 }, { "epoch": 0.8998768059133162, "grad_norm": 1.4838855266571045, "learning_rate": 8.031000000000001e-05, "loss": 0.425, "step": 16070 }, { "epoch": 0.8999328032254452, "grad_norm": 1.6777338981628418, "learning_rate": 8.0315e-05, "loss": 0.5884, "step": 16071 }, { "epoch": 0.8999888005375742, "grad_norm": 1.2199866771697998, "learning_rate": 8.032e-05, "loss": 0.4392, "step": 16072 }, { "epoch": 0.9000447978497033, "grad_norm": 1.167232632637024, "learning_rate": 8.0325e-05, "loss": 0.4452, "step": 16073 }, { "epoch": 0.9001007951618323, "grad_norm": 1.5272061824798584, "learning_rate": 8.033000000000001e-05, "loss": 0.4721, "step": 16074 }, { "epoch": 0.9001567924739613, "grad_norm": 1.4772456884384155, "learning_rate": 8.033500000000001e-05, "loss": 0.5618, "step": 16075 }, { "epoch": 0.9002127897860903, "grad_norm": 1.3673405647277832, "learning_rate": 8.034e-05, "loss": 0.4879, "step": 16076 }, { "epoch": 0.9002687870982193, "grad_norm": 1.3085155487060547, "learning_rate": 8.0345e-05, "loss": 0.3939, "step": 16077 }, { "epoch": 0.9003247844103484, "grad_norm": 1.544012188911438, "learning_rate": 8.035e-05, "loss": 0.3759, "step": 16078 }, { "epoch": 0.9003807817224774, "grad_norm": 1.4712990522384644, "learning_rate": 8.0355e-05, "loss": 0.5326, "step": 16079 }, { "epoch": 0.9004367790346064, "grad_norm": 1.9467556476593018, "learning_rate": 8.036e-05, "loss": 0.3925, "step": 16080 }, { "epoch": 0.9004927763467353, "grad_norm": 1.3120015859603882, "learning_rate": 8.036500000000001e-05, "loss": 0.3204, "step": 16081 }, { "epoch": 0.9005487736588643, "grad_norm": Infinity, "learning_rate": 8.036500000000001e-05, "loss": 0.4318, "step": 16082 }, { "epoch": 0.9006047709709933, "grad_norm": 1.3546849489212036, "learning_rate": 8.037e-05, "loss": 0.3974, "step": 16083 }, { "epoch": 0.9006607682831224, "grad_norm": 1.37135910987854, "learning_rate": 8.0375e-05, "loss": 0.5232, "step": 16084 }, { "epoch": 0.9007167655952514, "grad_norm": 1.3977696895599365, "learning_rate": 8.038e-05, "loss": 0.4193, "step": 16085 }, { "epoch": 0.9007727629073804, "grad_norm": 1.3352080583572388, "learning_rate": 8.0385e-05, "loss": 0.472, "step": 16086 }, { "epoch": 0.9008287602195094, "grad_norm": 1.2196705341339111, "learning_rate": 8.039e-05, "loss": 0.356, "step": 16087 }, { "epoch": 0.9008847575316384, "grad_norm": 1.4114445447921753, "learning_rate": 8.0395e-05, "loss": 0.4789, "step": 16088 }, { "epoch": 0.9009407548437675, "grad_norm": 1.3660703897476196, "learning_rate": 8.04e-05, "loss": 0.4888, "step": 16089 }, { "epoch": 0.9009967521558965, "grad_norm": 1.3786678314208984, "learning_rate": 8.040500000000001e-05, "loss": 0.4506, "step": 16090 }, { "epoch": 0.9010527494680255, "grad_norm": 1.1101665496826172, "learning_rate": 8.041000000000001e-05, "loss": 0.3506, "step": 16091 }, { "epoch": 0.9011087467801545, "grad_norm": 1.319434404373169, "learning_rate": 8.041500000000001e-05, "loss": 0.3757, "step": 16092 }, { "epoch": 0.9011647440922835, "grad_norm": 1.656463861465454, "learning_rate": 8.042e-05, "loss": 0.5559, "step": 16093 }, { "epoch": 0.9012207414044126, "grad_norm": 1.1022865772247314, "learning_rate": 8.0425e-05, "loss": 0.3704, "step": 16094 }, { "epoch": 0.9012767387165416, "grad_norm": 1.38589346408844, "learning_rate": 8.043e-05, "loss": 0.4217, "step": 16095 }, { "epoch": 0.9013327360286706, "grad_norm": 1.690219521522522, "learning_rate": 8.0435e-05, "loss": 0.5664, "step": 16096 }, { "epoch": 0.9013887333407996, "grad_norm": 1.1399365663528442, "learning_rate": 8.044000000000001e-05, "loss": 0.4389, "step": 16097 }, { "epoch": 0.9014447306529286, "grad_norm": 1.3624540567398071, "learning_rate": 8.0445e-05, "loss": 0.4216, "step": 16098 }, { "epoch": 0.9015007279650576, "grad_norm": 1.3338139057159424, "learning_rate": 8.045e-05, "loss": 0.4295, "step": 16099 }, { "epoch": 0.9015567252771867, "grad_norm": 1.6596754789352417, "learning_rate": 8.0455e-05, "loss": 0.4851, "step": 16100 }, { "epoch": 0.9016127225893157, "grad_norm": 1.45106840133667, "learning_rate": 8.046e-05, "loss": 0.4712, "step": 16101 }, { "epoch": 0.9016687199014447, "grad_norm": 1.7162147760391235, "learning_rate": 8.0465e-05, "loss": 0.5216, "step": 16102 }, { "epoch": 0.9017247172135737, "grad_norm": 1.3345248699188232, "learning_rate": 8.047000000000001e-05, "loss": 0.4811, "step": 16103 }, { "epoch": 0.9017807145257027, "grad_norm": 1.5572048425674438, "learning_rate": 8.0475e-05, "loss": 0.5564, "step": 16104 }, { "epoch": 0.9018367118378318, "grad_norm": 1.1450647115707397, "learning_rate": 8.048e-05, "loss": 0.4413, "step": 16105 }, { "epoch": 0.9018927091499608, "grad_norm": 1.807526707649231, "learning_rate": 8.0485e-05, "loss": 0.6043, "step": 16106 }, { "epoch": 0.9019487064620898, "grad_norm": 1.389998435974121, "learning_rate": 8.049e-05, "loss": 0.497, "step": 16107 }, { "epoch": 0.9020047037742188, "grad_norm": 1.0663912296295166, "learning_rate": 8.049500000000001e-05, "loss": 0.3759, "step": 16108 }, { "epoch": 0.9020607010863478, "grad_norm": 1.4233379364013672, "learning_rate": 8.05e-05, "loss": 0.4, "step": 16109 }, { "epoch": 0.9021166983984769, "grad_norm": 1.518378496170044, "learning_rate": 8.0505e-05, "loss": 0.4115, "step": 16110 }, { "epoch": 0.9021726957106059, "grad_norm": 1.3768906593322754, "learning_rate": 8.051000000000001e-05, "loss": 0.5397, "step": 16111 }, { "epoch": 0.9022286930227349, "grad_norm": 1.376766562461853, "learning_rate": 8.051500000000001e-05, "loss": 0.4534, "step": 16112 }, { "epoch": 0.9022846903348639, "grad_norm": 1.288089394569397, "learning_rate": 8.052000000000001e-05, "loss": 0.4622, "step": 16113 }, { "epoch": 0.9023406876469929, "grad_norm": 1.2125897407531738, "learning_rate": 8.0525e-05, "loss": 0.4396, "step": 16114 }, { "epoch": 0.902396684959122, "grad_norm": 2.0193521976470947, "learning_rate": 8.053e-05, "loss": 0.6333, "step": 16115 }, { "epoch": 0.902452682271251, "grad_norm": 1.7033817768096924, "learning_rate": 8.0535e-05, "loss": 0.5278, "step": 16116 }, { "epoch": 0.90250867958338, "grad_norm": 1.4649571180343628, "learning_rate": 8.054e-05, "loss": 0.5143, "step": 16117 }, { "epoch": 0.902564676895509, "grad_norm": 1.4818642139434814, "learning_rate": 8.054500000000001e-05, "loss": 0.4286, "step": 16118 }, { "epoch": 0.902620674207638, "grad_norm": 1.3658850193023682, "learning_rate": 8.055e-05, "loss": 0.551, "step": 16119 }, { "epoch": 0.902676671519767, "grad_norm": 1.3626432418823242, "learning_rate": 8.0555e-05, "loss": 0.4361, "step": 16120 }, { "epoch": 0.9027326688318961, "grad_norm": 1.1653423309326172, "learning_rate": 8.056e-05, "loss": 0.3681, "step": 16121 }, { "epoch": 0.9027886661440251, "grad_norm": 1.0749971866607666, "learning_rate": 8.0565e-05, "loss": 0.3763, "step": 16122 }, { "epoch": 0.9028446634561541, "grad_norm": 1.35155189037323, "learning_rate": 8.057e-05, "loss": 0.393, "step": 16123 }, { "epoch": 0.9029006607682831, "grad_norm": 1.3368000984191895, "learning_rate": 8.057500000000001e-05, "loss": 0.3983, "step": 16124 }, { "epoch": 0.9029566580804121, "grad_norm": 1.3507204055786133, "learning_rate": 8.058e-05, "loss": 0.4552, "step": 16125 }, { "epoch": 0.9030126553925412, "grad_norm": 1.1446025371551514, "learning_rate": 8.0585e-05, "loss": 0.3386, "step": 16126 }, { "epoch": 0.9030686527046702, "grad_norm": 1.4161683320999146, "learning_rate": 8.059e-05, "loss": 0.4751, "step": 16127 }, { "epoch": 0.9031246500167992, "grad_norm": 1.2866870164871216, "learning_rate": 8.059500000000001e-05, "loss": 0.45, "step": 16128 }, { "epoch": 0.9031806473289282, "grad_norm": 1.401611089706421, "learning_rate": 8.060000000000001e-05, "loss": 0.5611, "step": 16129 }, { "epoch": 0.9032366446410572, "grad_norm": 1.4381974935531616, "learning_rate": 8.0605e-05, "loss": 0.6571, "step": 16130 }, { "epoch": 0.9032926419531863, "grad_norm": 1.134797215461731, "learning_rate": 8.061e-05, "loss": 0.3239, "step": 16131 }, { "epoch": 0.9033486392653153, "grad_norm": 1.5025545358657837, "learning_rate": 8.061500000000001e-05, "loss": 0.4626, "step": 16132 }, { "epoch": 0.9034046365774443, "grad_norm": 1.3810558319091797, "learning_rate": 8.062000000000001e-05, "loss": 0.5958, "step": 16133 }, { "epoch": 0.9034606338895733, "grad_norm": 1.4801636934280396, "learning_rate": 8.062500000000001e-05, "loss": 0.4656, "step": 16134 }, { "epoch": 0.9035166312017023, "grad_norm": 1.2599828243255615, "learning_rate": 8.063e-05, "loss": 0.4976, "step": 16135 }, { "epoch": 0.9035726285138314, "grad_norm": 1.2133804559707642, "learning_rate": 8.0635e-05, "loss": 0.4485, "step": 16136 }, { "epoch": 0.9036286258259604, "grad_norm": 1.4889907836914062, "learning_rate": 8.064e-05, "loss": 0.5225, "step": 16137 }, { "epoch": 0.9036846231380894, "grad_norm": 1.5218982696533203, "learning_rate": 8.0645e-05, "loss": 0.4227, "step": 16138 }, { "epoch": 0.9037406204502184, "grad_norm": 1.1445194482803345, "learning_rate": 8.065000000000001e-05, "loss": 0.3879, "step": 16139 }, { "epoch": 0.9037966177623474, "grad_norm": 1.486558198928833, "learning_rate": 8.0655e-05, "loss": 0.5492, "step": 16140 }, { "epoch": 0.9038526150744765, "grad_norm": 1.294519305229187, "learning_rate": 8.066e-05, "loss": 0.5715, "step": 16141 }, { "epoch": 0.9039086123866055, "grad_norm": 1.384840726852417, "learning_rate": 8.0665e-05, "loss": 0.5428, "step": 16142 }, { "epoch": 0.9039646096987345, "grad_norm": 1.2878063917160034, "learning_rate": 8.067e-05, "loss": 0.4181, "step": 16143 }, { "epoch": 0.9040206070108635, "grad_norm": 1.132713794708252, "learning_rate": 8.0675e-05, "loss": 0.3261, "step": 16144 }, { "epoch": 0.9040766043229925, "grad_norm": 1.303546667098999, "learning_rate": 8.068e-05, "loss": 0.3057, "step": 16145 }, { "epoch": 0.9041326016351215, "grad_norm": 1.181791067123413, "learning_rate": 8.0685e-05, "loss": 0.3682, "step": 16146 }, { "epoch": 0.9041885989472506, "grad_norm": 1.2825514078140259, "learning_rate": 8.069e-05, "loss": 0.4431, "step": 16147 }, { "epoch": 0.9042445962593796, "grad_norm": 1.2796927690505981, "learning_rate": 8.0695e-05, "loss": 0.496, "step": 16148 }, { "epoch": 0.9043005935715086, "grad_norm": 1.2571866512298584, "learning_rate": 8.070000000000001e-05, "loss": 0.4749, "step": 16149 }, { "epoch": 0.9043565908836376, "grad_norm": 1.2255233526229858, "learning_rate": 8.070500000000001e-05, "loss": 0.4372, "step": 16150 }, { "epoch": 0.9044125881957666, "grad_norm": 1.5090402364730835, "learning_rate": 8.071e-05, "loss": 0.7129, "step": 16151 }, { "epoch": 0.9044685855078957, "grad_norm": 1.2965089082717896, "learning_rate": 8.0715e-05, "loss": 0.4358, "step": 16152 }, { "epoch": 0.9045245828200247, "grad_norm": 1.4195002317428589, "learning_rate": 8.072000000000001e-05, "loss": 0.4692, "step": 16153 }, { "epoch": 0.9045805801321537, "grad_norm": 1.3298155069351196, "learning_rate": 8.072500000000001e-05, "loss": 0.4046, "step": 16154 }, { "epoch": 0.9046365774442827, "grad_norm": 1.4429786205291748, "learning_rate": 8.073000000000001e-05, "loss": 0.5544, "step": 16155 }, { "epoch": 0.9046925747564117, "grad_norm": 1.4150007963180542, "learning_rate": 8.0735e-05, "loss": 0.4725, "step": 16156 }, { "epoch": 0.9047485720685408, "grad_norm": 1.5700832605361938, "learning_rate": 8.074e-05, "loss": 0.6968, "step": 16157 }, { "epoch": 0.9048045693806698, "grad_norm": 1.4435895681381226, "learning_rate": 8.0745e-05, "loss": 0.6842, "step": 16158 }, { "epoch": 0.9048605666927988, "grad_norm": 18.472076416015625, "learning_rate": 8.075e-05, "loss": 0.7216, "step": 16159 }, { "epoch": 0.9049165640049278, "grad_norm": 1.087300181388855, "learning_rate": 8.075500000000001e-05, "loss": 0.4338, "step": 16160 }, { "epoch": 0.9049725613170568, "grad_norm": 1.6640185117721558, "learning_rate": 8.076e-05, "loss": 0.8026, "step": 16161 }, { "epoch": 0.9050285586291859, "grad_norm": 1.339402675628662, "learning_rate": 8.0765e-05, "loss": 0.424, "step": 16162 }, { "epoch": 0.9050845559413149, "grad_norm": 1.7281773090362549, "learning_rate": 8.077e-05, "loss": 0.7183, "step": 16163 }, { "epoch": 0.9051405532534438, "grad_norm": 1.195365309715271, "learning_rate": 8.0775e-05, "loss": 0.4868, "step": 16164 }, { "epoch": 0.9051965505655728, "grad_norm": 1.2876406908035278, "learning_rate": 8.078e-05, "loss": 0.5234, "step": 16165 }, { "epoch": 0.9052525478777018, "grad_norm": 1.2299528121948242, "learning_rate": 8.078499999999999e-05, "loss": 0.4793, "step": 16166 }, { "epoch": 0.9053085451898308, "grad_norm": 1.245281457901001, "learning_rate": 8.079e-05, "loss": 0.4979, "step": 16167 }, { "epoch": 0.9053645425019599, "grad_norm": 1.3213170766830444, "learning_rate": 8.0795e-05, "loss": 0.5371, "step": 16168 }, { "epoch": 0.9054205398140889, "grad_norm": 1.53616201877594, "learning_rate": 8.080000000000001e-05, "loss": 0.6766, "step": 16169 }, { "epoch": 0.9054765371262179, "grad_norm": 1.4785538911819458, "learning_rate": 8.080500000000001e-05, "loss": 0.4185, "step": 16170 }, { "epoch": 0.9055325344383469, "grad_norm": 1.1670349836349487, "learning_rate": 8.081000000000001e-05, "loss": 0.3507, "step": 16171 }, { "epoch": 0.9055885317504759, "grad_norm": 1.273755431175232, "learning_rate": 8.0815e-05, "loss": 0.4314, "step": 16172 }, { "epoch": 0.905644529062605, "grad_norm": 1.750573992729187, "learning_rate": 8.082e-05, "loss": 0.4869, "step": 16173 }, { "epoch": 0.905700526374734, "grad_norm": 1.0891785621643066, "learning_rate": 8.082500000000001e-05, "loss": 0.3164, "step": 16174 }, { "epoch": 0.905756523686863, "grad_norm": 1.1787875890731812, "learning_rate": 8.083000000000001e-05, "loss": 0.3236, "step": 16175 }, { "epoch": 0.905812520998992, "grad_norm": 1.3453223705291748, "learning_rate": 8.083500000000001e-05, "loss": 0.3288, "step": 16176 }, { "epoch": 0.905868518311121, "grad_norm": 1.140633463859558, "learning_rate": 8.084e-05, "loss": 0.3574, "step": 16177 }, { "epoch": 0.90592451562325, "grad_norm": 1.3836084604263306, "learning_rate": 8.0845e-05, "loss": 0.4461, "step": 16178 }, { "epoch": 0.9059805129353791, "grad_norm": 3.173196792602539, "learning_rate": 8.085e-05, "loss": 0.3756, "step": 16179 }, { "epoch": 0.9060365102475081, "grad_norm": 1.382127046585083, "learning_rate": 8.0855e-05, "loss": 0.5083, "step": 16180 }, { "epoch": 0.9060925075596371, "grad_norm": 1.0873388051986694, "learning_rate": 8.086000000000001e-05, "loss": 0.4796, "step": 16181 }, { "epoch": 0.9061485048717661, "grad_norm": 1.4298620223999023, "learning_rate": 8.0865e-05, "loss": 0.4708, "step": 16182 }, { "epoch": 0.9062045021838951, "grad_norm": 1.3999799489974976, "learning_rate": 8.087e-05, "loss": 0.6052, "step": 16183 }, { "epoch": 0.9062604994960242, "grad_norm": 1.205410361289978, "learning_rate": 8.0875e-05, "loss": 0.4133, "step": 16184 }, { "epoch": 0.9063164968081532, "grad_norm": 1.6735310554504395, "learning_rate": 8.088e-05, "loss": 0.4577, "step": 16185 }, { "epoch": 0.9063724941202822, "grad_norm": 1.5331871509552002, "learning_rate": 8.0885e-05, "loss": 0.4554, "step": 16186 }, { "epoch": 0.9064284914324112, "grad_norm": 1.5027611255645752, "learning_rate": 8.088999999999999e-05, "loss": 0.57, "step": 16187 }, { "epoch": 0.9064844887445402, "grad_norm": 1.3087024688720703, "learning_rate": 8.0895e-05, "loss": 0.3807, "step": 16188 }, { "epoch": 0.9065404860566693, "grad_norm": 1.2069069147109985, "learning_rate": 8.090000000000001e-05, "loss": 0.344, "step": 16189 }, { "epoch": 0.9065964833687983, "grad_norm": 1.1890544891357422, "learning_rate": 8.090500000000001e-05, "loss": 0.4263, "step": 16190 }, { "epoch": 0.9066524806809273, "grad_norm": 1.5398627519607544, "learning_rate": 8.091000000000001e-05, "loss": 0.4006, "step": 16191 }, { "epoch": 0.9067084779930563, "grad_norm": 1.345105528831482, "learning_rate": 8.091500000000001e-05, "loss": 0.5528, "step": 16192 }, { "epoch": 0.9067644753051853, "grad_norm": 1.2979389429092407, "learning_rate": 8.092e-05, "loss": 0.3823, "step": 16193 }, { "epoch": 0.9068204726173144, "grad_norm": 1.7370052337646484, "learning_rate": 8.0925e-05, "loss": 0.7235, "step": 16194 }, { "epoch": 0.9068764699294434, "grad_norm": 1.5376818180084229, "learning_rate": 8.093e-05, "loss": 0.4005, "step": 16195 }, { "epoch": 0.9069324672415724, "grad_norm": 1.472191572189331, "learning_rate": 8.093500000000001e-05, "loss": 0.399, "step": 16196 }, { "epoch": 0.9069884645537014, "grad_norm": 1.2682433128356934, "learning_rate": 8.094000000000001e-05, "loss": 0.4769, "step": 16197 }, { "epoch": 0.9070444618658304, "grad_norm": 1.5884227752685547, "learning_rate": 8.0945e-05, "loss": 0.5694, "step": 16198 }, { "epoch": 0.9071004591779595, "grad_norm": 1.2543954849243164, "learning_rate": 8.095e-05, "loss": 0.5715, "step": 16199 }, { "epoch": 0.9071564564900885, "grad_norm": 1.4897866249084473, "learning_rate": 8.0955e-05, "loss": 0.4489, "step": 16200 }, { "epoch": 0.9072124538022175, "grad_norm": 1.388708233833313, "learning_rate": 8.096e-05, "loss": 0.5487, "step": 16201 }, { "epoch": 0.9072684511143465, "grad_norm": 1.382103443145752, "learning_rate": 8.096500000000001e-05, "loss": 0.458, "step": 16202 }, { "epoch": 0.9073244484264755, "grad_norm": 1.297139286994934, "learning_rate": 8.097e-05, "loss": 0.4633, "step": 16203 }, { "epoch": 0.9073804457386045, "grad_norm": 1.2164654731750488, "learning_rate": 8.0975e-05, "loss": 0.3735, "step": 16204 }, { "epoch": 0.9074364430507336, "grad_norm": 1.4001030921936035, "learning_rate": 8.098e-05, "loss": 0.4218, "step": 16205 }, { "epoch": 0.9074924403628626, "grad_norm": 1.3910261392593384, "learning_rate": 8.0985e-05, "loss": 0.5399, "step": 16206 }, { "epoch": 0.9075484376749916, "grad_norm": 1.693255066871643, "learning_rate": 8.099e-05, "loss": 0.5294, "step": 16207 }, { "epoch": 0.9076044349871206, "grad_norm": 1.3666377067565918, "learning_rate": 8.099499999999999e-05, "loss": 0.4751, "step": 16208 }, { "epoch": 0.9076604322992496, "grad_norm": 9.84433650970459, "learning_rate": 8.1e-05, "loss": 0.512, "step": 16209 }, { "epoch": 0.9077164296113787, "grad_norm": 1.3591711521148682, "learning_rate": 8.100500000000001e-05, "loss": 0.4619, "step": 16210 }, { "epoch": 0.9077724269235077, "grad_norm": 1.3341845273971558, "learning_rate": 8.101000000000001e-05, "loss": 0.4369, "step": 16211 }, { "epoch": 0.9078284242356367, "grad_norm": 1.8678618669509888, "learning_rate": 8.101500000000001e-05, "loss": 0.4691, "step": 16212 }, { "epoch": 0.9078844215477657, "grad_norm": 1.4997316598892212, "learning_rate": 8.102000000000001e-05, "loss": 0.5044, "step": 16213 }, { "epoch": 0.9079404188598947, "grad_norm": 1.2690948247909546, "learning_rate": 8.1025e-05, "loss": 0.3845, "step": 16214 }, { "epoch": 0.9079964161720238, "grad_norm": 1.580435037612915, "learning_rate": 8.103e-05, "loss": 0.5397, "step": 16215 }, { "epoch": 0.9080524134841528, "grad_norm": 1.1663674116134644, "learning_rate": 8.1035e-05, "loss": 0.4878, "step": 16216 }, { "epoch": 0.9081084107962818, "grad_norm": 1.1223293542861938, "learning_rate": 8.104000000000001e-05, "loss": 0.3977, "step": 16217 }, { "epoch": 0.9081644081084108, "grad_norm": 1.6381984949111938, "learning_rate": 8.104500000000001e-05, "loss": 0.5847, "step": 16218 }, { "epoch": 0.9082204054205398, "grad_norm": 1.454047679901123, "learning_rate": 8.105e-05, "loss": 0.4213, "step": 16219 }, { "epoch": 0.9082764027326689, "grad_norm": 2.117156982421875, "learning_rate": 8.1055e-05, "loss": 0.4909, "step": 16220 }, { "epoch": 0.9083324000447979, "grad_norm": 1.3888218402862549, "learning_rate": 8.106e-05, "loss": 0.3746, "step": 16221 }, { "epoch": 0.9083883973569269, "grad_norm": 1.1721341609954834, "learning_rate": 8.1065e-05, "loss": 0.3608, "step": 16222 }, { "epoch": 0.9084443946690559, "grad_norm": 1.574838399887085, "learning_rate": 8.107000000000001e-05, "loss": 0.5261, "step": 16223 }, { "epoch": 0.9085003919811849, "grad_norm": 1.385015845298767, "learning_rate": 8.1075e-05, "loss": 0.4958, "step": 16224 }, { "epoch": 0.908556389293314, "grad_norm": 1.306646704673767, "learning_rate": 8.108e-05, "loss": 0.4316, "step": 16225 }, { "epoch": 0.908612386605443, "grad_norm": 1.5103237628936768, "learning_rate": 8.1085e-05, "loss": 0.6305, "step": 16226 }, { "epoch": 0.908668383917572, "grad_norm": 1.5798276662826538, "learning_rate": 8.109e-05, "loss": 0.5126, "step": 16227 }, { "epoch": 0.908724381229701, "grad_norm": 1.7766233682632446, "learning_rate": 8.1095e-05, "loss": 0.6594, "step": 16228 }, { "epoch": 0.90878037854183, "grad_norm": 1.152177333831787, "learning_rate": 8.11e-05, "loss": 0.3107, "step": 16229 }, { "epoch": 0.908836375853959, "grad_norm": 1.5890731811523438, "learning_rate": 8.1105e-05, "loss": 0.439, "step": 16230 }, { "epoch": 0.9088923731660881, "grad_norm": 1.157092809677124, "learning_rate": 8.111000000000001e-05, "loss": 0.4707, "step": 16231 }, { "epoch": 0.9089483704782171, "grad_norm": 1.3347845077514648, "learning_rate": 8.111500000000001e-05, "loss": 0.4352, "step": 16232 }, { "epoch": 0.9090043677903461, "grad_norm": 1.407378911972046, "learning_rate": 8.112000000000001e-05, "loss": 0.3694, "step": 16233 }, { "epoch": 0.9090603651024751, "grad_norm": 1.2295944690704346, "learning_rate": 8.112500000000001e-05, "loss": 0.3802, "step": 16234 }, { "epoch": 0.9091163624146041, "grad_norm": 1.3493835926055908, "learning_rate": 8.113e-05, "loss": 0.4413, "step": 16235 }, { "epoch": 0.9091723597267332, "grad_norm": 1.043889045715332, "learning_rate": 8.1135e-05, "loss": 0.4428, "step": 16236 }, { "epoch": 0.9092283570388622, "grad_norm": 1.533015489578247, "learning_rate": 8.114e-05, "loss": 0.5359, "step": 16237 }, { "epoch": 0.9092843543509912, "grad_norm": 1.045485258102417, "learning_rate": 8.114500000000001e-05, "loss": 0.3455, "step": 16238 }, { "epoch": 0.9093403516631202, "grad_norm": 1.2184590101242065, "learning_rate": 8.115000000000001e-05, "loss": 0.3954, "step": 16239 }, { "epoch": 0.9093963489752492, "grad_norm": 1.220358967781067, "learning_rate": 8.1155e-05, "loss": 0.3347, "step": 16240 }, { "epoch": 0.9094523462873783, "grad_norm": 1.1589940786361694, "learning_rate": 8.116e-05, "loss": 0.5209, "step": 16241 }, { "epoch": 0.9095083435995073, "grad_norm": 1.555674433708191, "learning_rate": 8.1165e-05, "loss": 0.5367, "step": 16242 }, { "epoch": 0.9095643409116363, "grad_norm": 1.1752678155899048, "learning_rate": 8.117e-05, "loss": 0.4052, "step": 16243 }, { "epoch": 0.9096203382237653, "grad_norm": 1.2515757083892822, "learning_rate": 8.1175e-05, "loss": 0.4064, "step": 16244 }, { "epoch": 0.9096763355358943, "grad_norm": 1.4909850358963013, "learning_rate": 8.118e-05, "loss": 0.4566, "step": 16245 }, { "epoch": 0.9097323328480232, "grad_norm": 1.183108925819397, "learning_rate": 8.1185e-05, "loss": 0.43, "step": 16246 }, { "epoch": 0.9097883301601523, "grad_norm": 1.329343318939209, "learning_rate": 8.119e-05, "loss": 0.4417, "step": 16247 }, { "epoch": 0.9098443274722813, "grad_norm": 1.2194442749023438, "learning_rate": 8.1195e-05, "loss": 0.403, "step": 16248 }, { "epoch": 0.9099003247844103, "grad_norm": 1.368298888206482, "learning_rate": 8.120000000000001e-05, "loss": 0.4122, "step": 16249 }, { "epoch": 0.9099563220965393, "grad_norm": 1.3849939107894897, "learning_rate": 8.1205e-05, "loss": 0.3577, "step": 16250 }, { "epoch": 0.9100123194086683, "grad_norm": 1.4291472434997559, "learning_rate": 8.121e-05, "loss": 0.5803, "step": 16251 }, { "epoch": 0.9100683167207974, "grad_norm": 1.1422356367111206, "learning_rate": 8.121500000000001e-05, "loss": 0.3736, "step": 16252 }, { "epoch": 0.9101243140329264, "grad_norm": 1.247540831565857, "learning_rate": 8.122000000000001e-05, "loss": 0.4379, "step": 16253 }, { "epoch": 0.9101803113450554, "grad_norm": 1.3040695190429688, "learning_rate": 8.122500000000001e-05, "loss": 0.506, "step": 16254 }, { "epoch": 0.9102363086571844, "grad_norm": 1.257175087928772, "learning_rate": 8.123e-05, "loss": 0.3589, "step": 16255 }, { "epoch": 0.9102923059693134, "grad_norm": 1.5271435976028442, "learning_rate": 8.1235e-05, "loss": 0.4279, "step": 16256 }, { "epoch": 0.9103483032814425, "grad_norm": 1.2693156003952026, "learning_rate": 8.124e-05, "loss": 0.4413, "step": 16257 }, { "epoch": 0.9104043005935715, "grad_norm": 1.3279603719711304, "learning_rate": 8.1245e-05, "loss": 0.4359, "step": 16258 }, { "epoch": 0.9104602979057005, "grad_norm": 1.4256255626678467, "learning_rate": 8.125000000000001e-05, "loss": 0.4211, "step": 16259 }, { "epoch": 0.9105162952178295, "grad_norm": 1.497703194618225, "learning_rate": 8.125500000000001e-05, "loss": 0.4449, "step": 16260 }, { "epoch": 0.9105722925299585, "grad_norm": 1.348031997680664, "learning_rate": 8.126e-05, "loss": 0.3811, "step": 16261 }, { "epoch": 0.9106282898420875, "grad_norm": 1.5301815271377563, "learning_rate": 8.1265e-05, "loss": 0.6615, "step": 16262 }, { "epoch": 0.9106842871542166, "grad_norm": 1.6751320362091064, "learning_rate": 8.127e-05, "loss": 0.5039, "step": 16263 }, { "epoch": 0.9107402844663456, "grad_norm": 1.3533034324645996, "learning_rate": 8.1275e-05, "loss": 0.379, "step": 16264 }, { "epoch": 0.9107962817784746, "grad_norm": 1.2872544527053833, "learning_rate": 8.128e-05, "loss": 0.4944, "step": 16265 }, { "epoch": 0.9108522790906036, "grad_norm": 1.281751036643982, "learning_rate": 8.1285e-05, "loss": 0.4542, "step": 16266 }, { "epoch": 0.9109082764027326, "grad_norm": 1.4266624450683594, "learning_rate": 8.129e-05, "loss": 0.4478, "step": 16267 }, { "epoch": 0.9109642737148617, "grad_norm": 1.7636620998382568, "learning_rate": 8.1295e-05, "loss": 0.6262, "step": 16268 }, { "epoch": 0.9110202710269907, "grad_norm": 1.3292865753173828, "learning_rate": 8.13e-05, "loss": 0.456, "step": 16269 }, { "epoch": 0.9110762683391197, "grad_norm": 1.4947370290756226, "learning_rate": 8.130500000000001e-05, "loss": 0.5257, "step": 16270 }, { "epoch": 0.9111322656512487, "grad_norm": 1.2445459365844727, "learning_rate": 8.131e-05, "loss": 0.4555, "step": 16271 }, { "epoch": 0.9111882629633777, "grad_norm": 1.5176284313201904, "learning_rate": 8.1315e-05, "loss": 0.6189, "step": 16272 }, { "epoch": 0.9112442602755068, "grad_norm": 1.2148562669754028, "learning_rate": 8.132e-05, "loss": 0.4785, "step": 16273 }, { "epoch": 0.9113002575876358, "grad_norm": 1.1544235944747925, "learning_rate": 8.132500000000001e-05, "loss": 0.4687, "step": 16274 }, { "epoch": 0.9113562548997648, "grad_norm": 1.3045356273651123, "learning_rate": 8.133000000000001e-05, "loss": 0.5035, "step": 16275 }, { "epoch": 0.9114122522118938, "grad_norm": 1.4231603145599365, "learning_rate": 8.1335e-05, "loss": 0.4949, "step": 16276 }, { "epoch": 0.9114682495240228, "grad_norm": 1.5219913721084595, "learning_rate": 8.134e-05, "loss": 0.5055, "step": 16277 }, { "epoch": 0.9115242468361519, "grad_norm": 1.6135090589523315, "learning_rate": 8.1345e-05, "loss": 0.5868, "step": 16278 }, { "epoch": 0.9115802441482809, "grad_norm": 1.9982261657714844, "learning_rate": 8.135e-05, "loss": 0.7034, "step": 16279 }, { "epoch": 0.9116362414604099, "grad_norm": 1.4176361560821533, "learning_rate": 8.135500000000001e-05, "loss": 0.3944, "step": 16280 }, { "epoch": 0.9116922387725389, "grad_norm": 1.4432766437530518, "learning_rate": 8.136000000000001e-05, "loss": 0.4427, "step": 16281 }, { "epoch": 0.9117482360846679, "grad_norm": 1.5466388463974, "learning_rate": 8.1365e-05, "loss": 0.4778, "step": 16282 }, { "epoch": 0.911804233396797, "grad_norm": 1.649107575416565, "learning_rate": 8.137e-05, "loss": 0.546, "step": 16283 }, { "epoch": 0.911860230708926, "grad_norm": 1.3462942838668823, "learning_rate": 8.1375e-05, "loss": 0.38, "step": 16284 }, { "epoch": 0.911916228021055, "grad_norm": 1.2513823509216309, "learning_rate": 8.138e-05, "loss": 0.571, "step": 16285 }, { "epoch": 0.911972225333184, "grad_norm": 1.1332285404205322, "learning_rate": 8.1385e-05, "loss": 0.3993, "step": 16286 }, { "epoch": 0.912028222645313, "grad_norm": 1.279436707496643, "learning_rate": 8.139e-05, "loss": 0.5434, "step": 16287 }, { "epoch": 0.912084219957442, "grad_norm": 1.4637783765792847, "learning_rate": 8.1395e-05, "loss": 0.4162, "step": 16288 }, { "epoch": 0.9121402172695711, "grad_norm": 1.2769397497177124, "learning_rate": 8.14e-05, "loss": 0.4708, "step": 16289 }, { "epoch": 0.9121962145817001, "grad_norm": 1.3884077072143555, "learning_rate": 8.140500000000001e-05, "loss": 0.4197, "step": 16290 }, { "epoch": 0.9122522118938291, "grad_norm": 1.5181313753128052, "learning_rate": 8.141000000000001e-05, "loss": 0.4158, "step": 16291 }, { "epoch": 0.9123082092059581, "grad_norm": 1.2915698289871216, "learning_rate": 8.1415e-05, "loss": 0.4737, "step": 16292 }, { "epoch": 0.9123642065180871, "grad_norm": 1.173693299293518, "learning_rate": 8.142e-05, "loss": 0.3346, "step": 16293 }, { "epoch": 0.9124202038302162, "grad_norm": 1.1621513366699219, "learning_rate": 8.1425e-05, "loss": 0.4959, "step": 16294 }, { "epoch": 0.9124762011423452, "grad_norm": 1.2622287273406982, "learning_rate": 8.143000000000001e-05, "loss": 0.3461, "step": 16295 }, { "epoch": 0.9125321984544742, "grad_norm": 1.4559603929519653, "learning_rate": 8.143500000000001e-05, "loss": 0.5704, "step": 16296 }, { "epoch": 0.9125881957666032, "grad_norm": 1.3613815307617188, "learning_rate": 8.144e-05, "loss": 0.4548, "step": 16297 }, { "epoch": 0.9126441930787322, "grad_norm": 1.4421823024749756, "learning_rate": 8.1445e-05, "loss": 0.4864, "step": 16298 }, { "epoch": 0.9127001903908613, "grad_norm": 1.381778359413147, "learning_rate": 8.145e-05, "loss": 0.5673, "step": 16299 }, { "epoch": 0.9127561877029903, "grad_norm": 1.7580300569534302, "learning_rate": 8.1455e-05, "loss": 0.4055, "step": 16300 }, { "epoch": 0.9128121850151193, "grad_norm": 1.568524956703186, "learning_rate": 8.146000000000001e-05, "loss": 0.597, "step": 16301 }, { "epoch": 0.9128681823272483, "grad_norm": 1.4807724952697754, "learning_rate": 8.146500000000001e-05, "loss": 0.6351, "step": 16302 }, { "epoch": 0.9129241796393773, "grad_norm": 1.2681360244750977, "learning_rate": 8.147e-05, "loss": 0.3429, "step": 16303 }, { "epoch": 0.9129801769515064, "grad_norm": 1.101849913597107, "learning_rate": 8.1475e-05, "loss": 0.3718, "step": 16304 }, { "epoch": 0.9130361742636354, "grad_norm": 1.3666799068450928, "learning_rate": 8.148e-05, "loss": 0.3794, "step": 16305 }, { "epoch": 0.9130921715757644, "grad_norm": 1.3681020736694336, "learning_rate": 8.1485e-05, "loss": 0.5026, "step": 16306 }, { "epoch": 0.9131481688878934, "grad_norm": 1.272249698638916, "learning_rate": 8.149e-05, "loss": 0.3594, "step": 16307 }, { "epoch": 0.9132041662000224, "grad_norm": 1.4186328649520874, "learning_rate": 8.1495e-05, "loss": 0.5804, "step": 16308 }, { "epoch": 0.9132601635121514, "grad_norm": 1.6269071102142334, "learning_rate": 8.15e-05, "loss": 0.4691, "step": 16309 }, { "epoch": 0.9133161608242805, "grad_norm": 1.2336229085922241, "learning_rate": 8.150500000000001e-05, "loss": 0.5393, "step": 16310 }, { "epoch": 0.9133721581364095, "grad_norm": 1.4158024787902832, "learning_rate": 8.151000000000001e-05, "loss": 0.4173, "step": 16311 }, { "epoch": 0.9134281554485385, "grad_norm": 1.4624764919281006, "learning_rate": 8.151500000000001e-05, "loss": 0.4952, "step": 16312 }, { "epoch": 0.9134841527606675, "grad_norm": 1.443229079246521, "learning_rate": 8.152e-05, "loss": 0.6761, "step": 16313 }, { "epoch": 0.9135401500727965, "grad_norm": 1.2824628353118896, "learning_rate": 8.1525e-05, "loss": 0.6185, "step": 16314 }, { "epoch": 0.9135961473849256, "grad_norm": 1.2834609746932983, "learning_rate": 8.153e-05, "loss": 0.4322, "step": 16315 }, { "epoch": 0.9136521446970546, "grad_norm": 1.5608112812042236, "learning_rate": 8.153500000000001e-05, "loss": 0.5059, "step": 16316 }, { "epoch": 0.9137081420091836, "grad_norm": 1.5314184427261353, "learning_rate": 8.154000000000001e-05, "loss": 0.415, "step": 16317 }, { "epoch": 0.9137641393213126, "grad_norm": 1.286604881286621, "learning_rate": 8.1545e-05, "loss": 0.5446, "step": 16318 }, { "epoch": 0.9138201366334416, "grad_norm": 1.4087798595428467, "learning_rate": 8.155e-05, "loss": 0.4123, "step": 16319 }, { "epoch": 0.9138761339455707, "grad_norm": 1.2750279903411865, "learning_rate": 8.1555e-05, "loss": 0.5283, "step": 16320 }, { "epoch": 0.9139321312576997, "grad_norm": 1.311269760131836, "learning_rate": 8.156e-05, "loss": 0.6315, "step": 16321 }, { "epoch": 0.9139881285698287, "grad_norm": 1.63417387008667, "learning_rate": 8.1565e-05, "loss": 0.4848, "step": 16322 }, { "epoch": 0.9140441258819577, "grad_norm": 1.385966181755066, "learning_rate": 8.157e-05, "loss": 0.4425, "step": 16323 }, { "epoch": 0.9141001231940867, "grad_norm": 1.4401918649673462, "learning_rate": 8.1575e-05, "loss": 0.4885, "step": 16324 }, { "epoch": 0.9141561205062158, "grad_norm": 1.2513331174850464, "learning_rate": 8.158e-05, "loss": 0.4367, "step": 16325 }, { "epoch": 0.9142121178183448, "grad_norm": 1.186728596687317, "learning_rate": 8.1585e-05, "loss": 0.5121, "step": 16326 }, { "epoch": 0.9142681151304738, "grad_norm": 1.219286322593689, "learning_rate": 8.159e-05, "loss": 0.3755, "step": 16327 }, { "epoch": 0.9143241124426028, "grad_norm": 1.8178082704544067, "learning_rate": 8.1595e-05, "loss": 0.3789, "step": 16328 }, { "epoch": 0.9143801097547317, "grad_norm": 1.340780258178711, "learning_rate": 8.16e-05, "loss": 0.4837, "step": 16329 }, { "epoch": 0.9144361070668607, "grad_norm": 1.390148401260376, "learning_rate": 8.160500000000002e-05, "loss": 0.4514, "step": 16330 }, { "epoch": 0.9144921043789898, "grad_norm": 1.273046612739563, "learning_rate": 8.161000000000001e-05, "loss": 0.4115, "step": 16331 }, { "epoch": 0.9145481016911188, "grad_norm": 1.2908871173858643, "learning_rate": 8.161500000000001e-05, "loss": 0.4347, "step": 16332 }, { "epoch": 0.9146040990032478, "grad_norm": 1.2404327392578125, "learning_rate": 8.162000000000001e-05, "loss": 0.3864, "step": 16333 }, { "epoch": 0.9146600963153768, "grad_norm": 1.2438278198242188, "learning_rate": 8.1625e-05, "loss": 0.4607, "step": 16334 }, { "epoch": 0.9147160936275058, "grad_norm": 1.335219383239746, "learning_rate": 8.163e-05, "loss": 0.4362, "step": 16335 }, { "epoch": 0.9147720909396349, "grad_norm": 1.4740371704101562, "learning_rate": 8.1635e-05, "loss": 0.4317, "step": 16336 }, { "epoch": 0.9148280882517639, "grad_norm": 1.3565070629119873, "learning_rate": 8.164000000000001e-05, "loss": 0.4093, "step": 16337 }, { "epoch": 0.9148840855638929, "grad_norm": 1.1543887853622437, "learning_rate": 8.164500000000001e-05, "loss": 0.4376, "step": 16338 }, { "epoch": 0.9149400828760219, "grad_norm": 1.3757209777832031, "learning_rate": 8.165e-05, "loss": 0.356, "step": 16339 }, { "epoch": 0.9149960801881509, "grad_norm": 1.2366504669189453, "learning_rate": 8.1655e-05, "loss": 0.3948, "step": 16340 }, { "epoch": 0.91505207750028, "grad_norm": 1.3505820035934448, "learning_rate": 8.166e-05, "loss": 0.4726, "step": 16341 }, { "epoch": 0.915108074812409, "grad_norm": 1.359368920326233, "learning_rate": 8.1665e-05, "loss": 0.4518, "step": 16342 }, { "epoch": 0.915164072124538, "grad_norm": 1.362165093421936, "learning_rate": 8.167e-05, "loss": 0.5659, "step": 16343 }, { "epoch": 0.915220069436667, "grad_norm": 1.3214900493621826, "learning_rate": 8.1675e-05, "loss": 0.4294, "step": 16344 }, { "epoch": 0.915276066748796, "grad_norm": 1.562992811203003, "learning_rate": 8.168e-05, "loss": 0.4319, "step": 16345 }, { "epoch": 0.915332064060925, "grad_norm": 1.7184698581695557, "learning_rate": 8.1685e-05, "loss": 0.5534, "step": 16346 }, { "epoch": 0.9153880613730541, "grad_norm": 1.6614658832550049, "learning_rate": 8.169e-05, "loss": 0.5746, "step": 16347 }, { "epoch": 0.9154440586851831, "grad_norm": 1.4695688486099243, "learning_rate": 8.1695e-05, "loss": 0.5482, "step": 16348 }, { "epoch": 0.9155000559973121, "grad_norm": 1.4225592613220215, "learning_rate": 8.17e-05, "loss": 0.5707, "step": 16349 }, { "epoch": 0.9155560533094411, "grad_norm": 1.2848695516586304, "learning_rate": 8.1705e-05, "loss": 0.344, "step": 16350 }, { "epoch": 0.9156120506215701, "grad_norm": 1.327746868133545, "learning_rate": 8.171e-05, "loss": 0.5246, "step": 16351 }, { "epoch": 0.9156680479336992, "grad_norm": 1.410145878791809, "learning_rate": 8.171500000000001e-05, "loss": 0.5809, "step": 16352 }, { "epoch": 0.9157240452458282, "grad_norm": 1.1476210355758667, "learning_rate": 8.172000000000001e-05, "loss": 0.3379, "step": 16353 }, { "epoch": 0.9157800425579572, "grad_norm": 1.2684223651885986, "learning_rate": 8.172500000000001e-05, "loss": 0.4586, "step": 16354 }, { "epoch": 0.9158360398700862, "grad_norm": 1.221681833267212, "learning_rate": 8.173e-05, "loss": 0.4008, "step": 16355 }, { "epoch": 0.9158920371822152, "grad_norm": 1.29313325881958, "learning_rate": 8.1735e-05, "loss": 0.3409, "step": 16356 }, { "epoch": 0.9159480344943443, "grad_norm": 1.157299518585205, "learning_rate": 8.174e-05, "loss": 0.4208, "step": 16357 }, { "epoch": 0.9160040318064733, "grad_norm": 1.4694918394088745, "learning_rate": 8.174500000000001e-05, "loss": 0.6095, "step": 16358 }, { "epoch": 0.9160600291186023, "grad_norm": 1.538213849067688, "learning_rate": 8.175000000000001e-05, "loss": 0.4755, "step": 16359 }, { "epoch": 0.9161160264307313, "grad_norm": 1.3335171937942505, "learning_rate": 8.1755e-05, "loss": 0.4496, "step": 16360 }, { "epoch": 0.9161720237428603, "grad_norm": 1.4281189441680908, "learning_rate": 8.176e-05, "loss": 0.4759, "step": 16361 }, { "epoch": 0.9162280210549893, "grad_norm": 1.1768161058425903, "learning_rate": 8.1765e-05, "loss": 0.3872, "step": 16362 }, { "epoch": 0.9162840183671184, "grad_norm": 1.4018878936767578, "learning_rate": 8.177e-05, "loss": 0.4976, "step": 16363 }, { "epoch": 0.9163400156792474, "grad_norm": 1.1655353307724, "learning_rate": 8.1775e-05, "loss": 0.4554, "step": 16364 }, { "epoch": 0.9163960129913764, "grad_norm": 1.2682573795318604, "learning_rate": 8.178e-05, "loss": 0.3436, "step": 16365 }, { "epoch": 0.9164520103035054, "grad_norm": 1.6166516542434692, "learning_rate": 8.1785e-05, "loss": 0.4677, "step": 16366 }, { "epoch": 0.9165080076156344, "grad_norm": 1.3583062887191772, "learning_rate": 8.179e-05, "loss": 0.4421, "step": 16367 }, { "epoch": 0.9165640049277635, "grad_norm": 1.5479578971862793, "learning_rate": 8.1795e-05, "loss": 0.6007, "step": 16368 }, { "epoch": 0.9166200022398925, "grad_norm": 1.1865326166152954, "learning_rate": 8.18e-05, "loss": 0.4101, "step": 16369 }, { "epoch": 0.9166759995520215, "grad_norm": 1.5260560512542725, "learning_rate": 8.180500000000001e-05, "loss": 0.5892, "step": 16370 }, { "epoch": 0.9167319968641505, "grad_norm": 1.3571499586105347, "learning_rate": 8.181e-05, "loss": 0.3783, "step": 16371 }, { "epoch": 0.9167879941762795, "grad_norm": 1.2588247060775757, "learning_rate": 8.1815e-05, "loss": 0.4172, "step": 16372 }, { "epoch": 0.9168439914884086, "grad_norm": 1.2398922443389893, "learning_rate": 8.182000000000001e-05, "loss": 0.4312, "step": 16373 }, { "epoch": 0.9168999888005376, "grad_norm": 1.4239126443862915, "learning_rate": 8.182500000000001e-05, "loss": 0.428, "step": 16374 }, { "epoch": 0.9169559861126666, "grad_norm": 1.1611047983169556, "learning_rate": 8.183000000000001e-05, "loss": 0.3953, "step": 16375 }, { "epoch": 0.9170119834247956, "grad_norm": 1.212927222251892, "learning_rate": 8.1835e-05, "loss": 0.4476, "step": 16376 }, { "epoch": 0.9170679807369246, "grad_norm": 1.2236628532409668, "learning_rate": 8.184e-05, "loss": 0.4802, "step": 16377 }, { "epoch": 0.9171239780490537, "grad_norm": 1.3253746032714844, "learning_rate": 8.1845e-05, "loss": 0.5255, "step": 16378 }, { "epoch": 0.9171799753611827, "grad_norm": 1.1778866052627563, "learning_rate": 8.185000000000001e-05, "loss": 0.464, "step": 16379 }, { "epoch": 0.9172359726733117, "grad_norm": 1.138843059539795, "learning_rate": 8.185500000000001e-05, "loss": 0.331, "step": 16380 }, { "epoch": 0.9172919699854407, "grad_norm": 1.3189870119094849, "learning_rate": 8.186e-05, "loss": 0.397, "step": 16381 }, { "epoch": 0.9173479672975697, "grad_norm": 1.7547982931137085, "learning_rate": 8.1865e-05, "loss": 0.5269, "step": 16382 }, { "epoch": 0.9174039646096988, "grad_norm": 1.3204976320266724, "learning_rate": 8.187e-05, "loss": 0.3563, "step": 16383 }, { "epoch": 0.9174599619218278, "grad_norm": 1.531217098236084, "learning_rate": 8.1875e-05, "loss": 0.6081, "step": 16384 }, { "epoch": 0.9175159592339568, "grad_norm": 1.2082085609436035, "learning_rate": 8.188e-05, "loss": 0.3193, "step": 16385 }, { "epoch": 0.9175719565460858, "grad_norm": 1.1349927186965942, "learning_rate": 8.1885e-05, "loss": 0.3463, "step": 16386 }, { "epoch": 0.9176279538582148, "grad_norm": 1.5495916604995728, "learning_rate": 8.189e-05, "loss": 0.4939, "step": 16387 }, { "epoch": 0.9176839511703438, "grad_norm": 1.2847042083740234, "learning_rate": 8.1895e-05, "loss": 0.5711, "step": 16388 }, { "epoch": 0.9177399484824729, "grad_norm": 1.3056803941726685, "learning_rate": 8.19e-05, "loss": 0.3994, "step": 16389 }, { "epoch": 0.9177959457946019, "grad_norm": 1.2236343622207642, "learning_rate": 8.1905e-05, "loss": 0.3643, "step": 16390 }, { "epoch": 0.9178519431067309, "grad_norm": 1.272200584411621, "learning_rate": 8.191000000000001e-05, "loss": 0.4945, "step": 16391 }, { "epoch": 0.9179079404188599, "grad_norm": 1.504387378692627, "learning_rate": 8.1915e-05, "loss": 0.4055, "step": 16392 }, { "epoch": 0.9179639377309889, "grad_norm": 1.4934040307998657, "learning_rate": 8.192e-05, "loss": 0.4389, "step": 16393 }, { "epoch": 0.918019935043118, "grad_norm": 1.3719617128372192, "learning_rate": 8.192500000000001e-05, "loss": 0.4529, "step": 16394 }, { "epoch": 0.918075932355247, "grad_norm": 1.6728545427322388, "learning_rate": 8.193000000000001e-05, "loss": 0.4228, "step": 16395 }, { "epoch": 0.918131929667376, "grad_norm": 1.1509687900543213, "learning_rate": 8.193500000000001e-05, "loss": 0.341, "step": 16396 }, { "epoch": 0.918187926979505, "grad_norm": 1.6074631214141846, "learning_rate": 8.194e-05, "loss": 0.5055, "step": 16397 }, { "epoch": 0.918243924291634, "grad_norm": 1.5949623584747314, "learning_rate": 8.1945e-05, "loss": 0.4419, "step": 16398 }, { "epoch": 0.9182999216037631, "grad_norm": 1.4974658489227295, "learning_rate": 8.195e-05, "loss": 0.4307, "step": 16399 }, { "epoch": 0.9183559189158921, "grad_norm": 1.205846905708313, "learning_rate": 8.195500000000001e-05, "loss": 0.4287, "step": 16400 }, { "epoch": 0.9184119162280211, "grad_norm": 1.346282958984375, "learning_rate": 8.196000000000001e-05, "loss": 0.4806, "step": 16401 }, { "epoch": 0.9184679135401501, "grad_norm": 1.577929139137268, "learning_rate": 8.1965e-05, "loss": 0.5559, "step": 16402 }, { "epoch": 0.9185239108522791, "grad_norm": 1.194756269454956, "learning_rate": 8.197e-05, "loss": 0.42, "step": 16403 }, { "epoch": 0.9185799081644082, "grad_norm": 1.3071774244308472, "learning_rate": 8.1975e-05, "loss": 0.4382, "step": 16404 }, { "epoch": 0.9186359054765372, "grad_norm": 1.2871538400650024, "learning_rate": 8.198e-05, "loss": 0.5422, "step": 16405 }, { "epoch": 0.9186919027886662, "grad_norm": 1.2749103307724, "learning_rate": 8.1985e-05, "loss": 0.5609, "step": 16406 }, { "epoch": 0.9187479001007952, "grad_norm": 1.362830638885498, "learning_rate": 8.199e-05, "loss": 0.5608, "step": 16407 }, { "epoch": 0.9188038974129242, "grad_norm": 1.5428667068481445, "learning_rate": 8.1995e-05, "loss": 0.4483, "step": 16408 }, { "epoch": 0.9188598947250533, "grad_norm": 1.3721121549606323, "learning_rate": 8.2e-05, "loss": 0.4578, "step": 16409 }, { "epoch": 0.9189158920371823, "grad_norm": 1.4253727197647095, "learning_rate": 8.2005e-05, "loss": 0.4589, "step": 16410 }, { "epoch": 0.9189718893493113, "grad_norm": 1.0003160238265991, "learning_rate": 8.201000000000001e-05, "loss": 0.3628, "step": 16411 }, { "epoch": 0.9190278866614402, "grad_norm": 1.298473834991455, "learning_rate": 8.2015e-05, "loss": 0.3898, "step": 16412 }, { "epoch": 0.9190838839735692, "grad_norm": 1.5291424989700317, "learning_rate": 8.202e-05, "loss": 0.5072, "step": 16413 }, { "epoch": 0.9191398812856982, "grad_norm": 1.2101683616638184, "learning_rate": 8.2025e-05, "loss": 0.4064, "step": 16414 }, { "epoch": 0.9191958785978273, "grad_norm": 1.035366415977478, "learning_rate": 8.203000000000001e-05, "loss": 0.3954, "step": 16415 }, { "epoch": 0.9192518759099563, "grad_norm": 1.2232056856155396, "learning_rate": 8.203500000000001e-05, "loss": 0.3527, "step": 16416 }, { "epoch": 0.9193078732220853, "grad_norm": 1.4133915901184082, "learning_rate": 8.204000000000001e-05, "loss": 0.4413, "step": 16417 }, { "epoch": 0.9193638705342143, "grad_norm": 1.235101580619812, "learning_rate": 8.2045e-05, "loss": 0.3635, "step": 16418 }, { "epoch": 0.9194198678463433, "grad_norm": 1.292309045791626, "learning_rate": 8.205e-05, "loss": 0.5364, "step": 16419 }, { "epoch": 0.9194758651584723, "grad_norm": 1.4182237386703491, "learning_rate": 8.2055e-05, "loss": 0.3983, "step": 16420 }, { "epoch": 0.9195318624706014, "grad_norm": 1.3632667064666748, "learning_rate": 8.206e-05, "loss": 0.4713, "step": 16421 }, { "epoch": 0.9195878597827304, "grad_norm": 1.3614498376846313, "learning_rate": 8.206500000000001e-05, "loss": 0.4299, "step": 16422 }, { "epoch": 0.9196438570948594, "grad_norm": 0.9320577383041382, "learning_rate": 8.207e-05, "loss": 0.2702, "step": 16423 }, { "epoch": 0.9196998544069884, "grad_norm": 1.6794301271438599, "learning_rate": 8.2075e-05, "loss": 0.4165, "step": 16424 }, { "epoch": 0.9197558517191174, "grad_norm": 1.3629662990570068, "learning_rate": 8.208e-05, "loss": 0.39, "step": 16425 }, { "epoch": 0.9198118490312465, "grad_norm": 1.3306039571762085, "learning_rate": 8.2085e-05, "loss": 0.3896, "step": 16426 }, { "epoch": 0.9198678463433755, "grad_norm": 1.297487497329712, "learning_rate": 8.209e-05, "loss": 0.4326, "step": 16427 }, { "epoch": 0.9199238436555045, "grad_norm": 1.2556846141815186, "learning_rate": 8.2095e-05, "loss": 0.4173, "step": 16428 }, { "epoch": 0.9199798409676335, "grad_norm": 1.4238240718841553, "learning_rate": 8.21e-05, "loss": 0.4721, "step": 16429 }, { "epoch": 0.9200358382797625, "grad_norm": 1.0150232315063477, "learning_rate": 8.2105e-05, "loss": 0.3388, "step": 16430 }, { "epoch": 0.9200918355918916, "grad_norm": 1.9842538833618164, "learning_rate": 8.211000000000001e-05, "loss": 0.5868, "step": 16431 }, { "epoch": 0.9201478329040206, "grad_norm": 1.5507006645202637, "learning_rate": 8.211500000000001e-05, "loss": 0.5215, "step": 16432 }, { "epoch": 0.9202038302161496, "grad_norm": 1.2344601154327393, "learning_rate": 8.212e-05, "loss": 0.5651, "step": 16433 }, { "epoch": 0.9202598275282786, "grad_norm": 2.3398501873016357, "learning_rate": 8.2125e-05, "loss": 0.5618, "step": 16434 }, { "epoch": 0.9203158248404076, "grad_norm": 1.16732919216156, "learning_rate": 8.213e-05, "loss": 0.5229, "step": 16435 }, { "epoch": 0.9203718221525367, "grad_norm": 1.2803624868392944, "learning_rate": 8.213500000000001e-05, "loss": 0.4516, "step": 16436 }, { "epoch": 0.9204278194646657, "grad_norm": 1.6683622598648071, "learning_rate": 8.214000000000001e-05, "loss": 0.5961, "step": 16437 }, { "epoch": 0.9204838167767947, "grad_norm": 1.0868639945983887, "learning_rate": 8.214500000000001e-05, "loss": 0.4601, "step": 16438 }, { "epoch": 0.9205398140889237, "grad_norm": 1.460677981376648, "learning_rate": 8.215e-05, "loss": 0.5205, "step": 16439 }, { "epoch": 0.9205958114010527, "grad_norm": 1.5984394550323486, "learning_rate": 8.2155e-05, "loss": 0.4798, "step": 16440 }, { "epoch": 0.9206518087131818, "grad_norm": 1.2280601263046265, "learning_rate": 8.216e-05, "loss": 0.5228, "step": 16441 }, { "epoch": 0.9207078060253108, "grad_norm": 1.8393840789794922, "learning_rate": 8.2165e-05, "loss": 0.4127, "step": 16442 }, { "epoch": 0.9207638033374398, "grad_norm": 1.3817163705825806, "learning_rate": 8.217000000000001e-05, "loss": 0.4368, "step": 16443 }, { "epoch": 0.9208198006495688, "grad_norm": 2.4562225341796875, "learning_rate": 8.2175e-05, "loss": 0.6111, "step": 16444 }, { "epoch": 0.9208757979616978, "grad_norm": 1.3783890008926392, "learning_rate": 8.218e-05, "loss": 0.3892, "step": 16445 }, { "epoch": 0.9209317952738268, "grad_norm": 1.3932751417160034, "learning_rate": 8.2185e-05, "loss": 0.4043, "step": 16446 }, { "epoch": 0.9209877925859559, "grad_norm": 1.3778951168060303, "learning_rate": 8.219e-05, "loss": 0.4582, "step": 16447 }, { "epoch": 0.9210437898980849, "grad_norm": 1.257495641708374, "learning_rate": 8.2195e-05, "loss": 0.4668, "step": 16448 }, { "epoch": 0.9210997872102139, "grad_norm": 1.1436564922332764, "learning_rate": 8.22e-05, "loss": 0.3684, "step": 16449 }, { "epoch": 0.9211557845223429, "grad_norm": 1.3091379404067993, "learning_rate": 8.2205e-05, "loss": 0.398, "step": 16450 }, { "epoch": 0.9212117818344719, "grad_norm": 1.2673567533493042, "learning_rate": 8.221000000000001e-05, "loss": 0.4482, "step": 16451 }, { "epoch": 0.921267779146601, "grad_norm": 1.4292402267456055, "learning_rate": 8.221500000000001e-05, "loss": 0.5005, "step": 16452 }, { "epoch": 0.92132377645873, "grad_norm": 1.2949180603027344, "learning_rate": 8.222000000000001e-05, "loss": 0.3964, "step": 16453 }, { "epoch": 0.921379773770859, "grad_norm": 1.2452545166015625, "learning_rate": 8.2225e-05, "loss": 0.4522, "step": 16454 }, { "epoch": 0.921435771082988, "grad_norm": 1.1206986904144287, "learning_rate": 8.223e-05, "loss": 0.3496, "step": 16455 }, { "epoch": 0.921491768395117, "grad_norm": 1.346388578414917, "learning_rate": 8.2235e-05, "loss": 0.405, "step": 16456 }, { "epoch": 0.9215477657072461, "grad_norm": 1.2031829357147217, "learning_rate": 8.224000000000001e-05, "loss": 0.3709, "step": 16457 }, { "epoch": 0.9216037630193751, "grad_norm": 1.5863611698150635, "learning_rate": 8.224500000000001e-05, "loss": 0.556, "step": 16458 }, { "epoch": 0.9216597603315041, "grad_norm": 1.2165085077285767, "learning_rate": 8.225000000000001e-05, "loss": 0.3987, "step": 16459 }, { "epoch": 0.9217157576436331, "grad_norm": 1.18646240234375, "learning_rate": 8.2255e-05, "loss": 0.4075, "step": 16460 }, { "epoch": 0.9217717549557621, "grad_norm": 1.255533218383789, "learning_rate": 8.226e-05, "loss": 0.3696, "step": 16461 }, { "epoch": 0.9218277522678912, "grad_norm": 1.4369428157806396, "learning_rate": 8.2265e-05, "loss": 0.5192, "step": 16462 }, { "epoch": 0.9218837495800202, "grad_norm": 1.2948023080825806, "learning_rate": 8.227e-05, "loss": 0.4968, "step": 16463 }, { "epoch": 0.9219397468921492, "grad_norm": 1.2731658220291138, "learning_rate": 8.227500000000001e-05, "loss": 0.455, "step": 16464 }, { "epoch": 0.9219957442042782, "grad_norm": 1.2603859901428223, "learning_rate": 8.228e-05, "loss": 0.3907, "step": 16465 }, { "epoch": 0.9220517415164072, "grad_norm": 1.4726279973983765, "learning_rate": 8.2285e-05, "loss": 0.4225, "step": 16466 }, { "epoch": 0.9221077388285362, "grad_norm": 1.221868634223938, "learning_rate": 8.229e-05, "loss": 0.5006, "step": 16467 }, { "epoch": 0.9221637361406653, "grad_norm": 1.2886098623275757, "learning_rate": 8.2295e-05, "loss": 0.507, "step": 16468 }, { "epoch": 0.9222197334527943, "grad_norm": 1.2557547092437744, "learning_rate": 8.23e-05, "loss": 0.4825, "step": 16469 }, { "epoch": 0.9222757307649233, "grad_norm": 1.2773897647857666, "learning_rate": 8.230499999999999e-05, "loss": 0.3528, "step": 16470 }, { "epoch": 0.9223317280770523, "grad_norm": 1.585194706916809, "learning_rate": 8.231e-05, "loss": 0.4966, "step": 16471 }, { "epoch": 0.9223877253891813, "grad_norm": 1.293116807937622, "learning_rate": 8.231500000000001e-05, "loss": 0.653, "step": 16472 }, { "epoch": 0.9224437227013104, "grad_norm": 1.1291966438293457, "learning_rate": 8.232000000000001e-05, "loss": 0.4101, "step": 16473 }, { "epoch": 0.9224997200134394, "grad_norm": 1.5031859874725342, "learning_rate": 8.232500000000001e-05, "loss": 0.6122, "step": 16474 }, { "epoch": 0.9225557173255684, "grad_norm": 1.3225125074386597, "learning_rate": 8.233e-05, "loss": 0.346, "step": 16475 }, { "epoch": 0.9226117146376974, "grad_norm": 1.2840086221694946, "learning_rate": 8.2335e-05, "loss": 0.395, "step": 16476 }, { "epoch": 0.9226677119498264, "grad_norm": 1.2742666006088257, "learning_rate": 8.234e-05, "loss": 0.4647, "step": 16477 }, { "epoch": 0.9227237092619555, "grad_norm": 1.4567025899887085, "learning_rate": 8.234500000000001e-05, "loss": 0.4579, "step": 16478 }, { "epoch": 0.9227797065740845, "grad_norm": 1.147629976272583, "learning_rate": 8.235000000000001e-05, "loss": 0.4129, "step": 16479 }, { "epoch": 0.9228357038862135, "grad_norm": 1.72379469871521, "learning_rate": 8.235500000000001e-05, "loss": 0.5756, "step": 16480 }, { "epoch": 0.9228917011983425, "grad_norm": 1.4656895399093628, "learning_rate": 8.236e-05, "loss": 0.4804, "step": 16481 }, { "epoch": 0.9229476985104715, "grad_norm": 1.4475468397140503, "learning_rate": 8.2365e-05, "loss": 0.4392, "step": 16482 }, { "epoch": 0.9230036958226006, "grad_norm": 1.415730595588684, "learning_rate": 8.237e-05, "loss": 0.451, "step": 16483 }, { "epoch": 0.9230596931347296, "grad_norm": 1.2868967056274414, "learning_rate": 8.2375e-05, "loss": 0.5013, "step": 16484 }, { "epoch": 0.9231156904468586, "grad_norm": 1.1820437908172607, "learning_rate": 8.238000000000001e-05, "loss": 0.4294, "step": 16485 }, { "epoch": 0.9231716877589876, "grad_norm": 1.4844865798950195, "learning_rate": 8.2385e-05, "loss": 0.5896, "step": 16486 }, { "epoch": 0.9232276850711166, "grad_norm": 1.2885370254516602, "learning_rate": 8.239e-05, "loss": 0.443, "step": 16487 }, { "epoch": 0.9232836823832457, "grad_norm": 1.252520203590393, "learning_rate": 8.2395e-05, "loss": 0.3869, "step": 16488 }, { "epoch": 0.9233396796953747, "grad_norm": 1.2477056980133057, "learning_rate": 8.24e-05, "loss": 0.5054, "step": 16489 }, { "epoch": 0.9233956770075037, "grad_norm": 1.156029462814331, "learning_rate": 8.2405e-05, "loss": 0.4791, "step": 16490 }, { "epoch": 0.9234516743196327, "grad_norm": 1.2756543159484863, "learning_rate": 8.241e-05, "loss": 0.4116, "step": 16491 }, { "epoch": 0.9235076716317617, "grad_norm": 1.3437877893447876, "learning_rate": 8.2415e-05, "loss": 0.4112, "step": 16492 }, { "epoch": 0.9235636689438907, "grad_norm": 4.821994304656982, "learning_rate": 8.242000000000001e-05, "loss": 0.4393, "step": 16493 }, { "epoch": 0.9236196662560197, "grad_norm": 4.62672758102417, "learning_rate": 8.242500000000001e-05, "loss": 0.4004, "step": 16494 }, { "epoch": 0.9236756635681487, "grad_norm": 1.2945533990859985, "learning_rate": 8.243000000000001e-05, "loss": 0.3963, "step": 16495 }, { "epoch": 0.9237316608802777, "grad_norm": 1.3269444704055786, "learning_rate": 8.2435e-05, "loss": 0.343, "step": 16496 }, { "epoch": 0.9237876581924067, "grad_norm": 1.295454740524292, "learning_rate": 8.244e-05, "loss": 0.4811, "step": 16497 }, { "epoch": 0.9238436555045357, "grad_norm": 1.302964448928833, "learning_rate": 8.2445e-05, "loss": 0.5828, "step": 16498 }, { "epoch": 0.9238996528166648, "grad_norm": 1.400241494178772, "learning_rate": 8.245e-05, "loss": 0.5116, "step": 16499 }, { "epoch": 0.9239556501287938, "grad_norm": 1.2892831563949585, "learning_rate": 8.245500000000001e-05, "loss": 0.4536, "step": 16500 }, { "epoch": 0.9240116474409228, "grad_norm": 1.1384472846984863, "learning_rate": 8.246e-05, "loss": 0.3839, "step": 16501 }, { "epoch": 0.9240676447530518, "grad_norm": 1.221259355545044, "learning_rate": 8.2465e-05, "loss": 0.3242, "step": 16502 }, { "epoch": 0.9241236420651808, "grad_norm": 1.291494607925415, "learning_rate": 8.247e-05, "loss": 0.4998, "step": 16503 }, { "epoch": 0.9241796393773098, "grad_norm": 1.2921441793441772, "learning_rate": 8.2475e-05, "loss": 0.4613, "step": 16504 }, { "epoch": 0.9242356366894389, "grad_norm": 1.4710813760757446, "learning_rate": 8.248e-05, "loss": 0.369, "step": 16505 }, { "epoch": 0.9242916340015679, "grad_norm": 1.471215009689331, "learning_rate": 8.248500000000001e-05, "loss": 0.4822, "step": 16506 }, { "epoch": 0.9243476313136969, "grad_norm": 1.4474198818206787, "learning_rate": 8.249e-05, "loss": 0.5187, "step": 16507 }, { "epoch": 0.9244036286258259, "grad_norm": 1.4380496740341187, "learning_rate": 8.2495e-05, "loss": 0.5645, "step": 16508 }, { "epoch": 0.9244596259379549, "grad_norm": 1.4628536701202393, "learning_rate": 8.25e-05, "loss": 0.4177, "step": 16509 }, { "epoch": 0.924515623250084, "grad_norm": 1.2975531816482544, "learning_rate": 8.2505e-05, "loss": 0.513, "step": 16510 }, { "epoch": 0.924571620562213, "grad_norm": 1.196489930152893, "learning_rate": 8.251e-05, "loss": 0.4424, "step": 16511 }, { "epoch": 0.924627617874342, "grad_norm": 1.557321548461914, "learning_rate": 8.2515e-05, "loss": 0.5826, "step": 16512 }, { "epoch": 0.924683615186471, "grad_norm": 1.4021459817886353, "learning_rate": 8.252e-05, "loss": 0.681, "step": 16513 }, { "epoch": 0.9247396124986, "grad_norm": 1.1830254793167114, "learning_rate": 8.252500000000001e-05, "loss": 0.3778, "step": 16514 }, { "epoch": 0.9247956098107291, "grad_norm": 1.2348469495773315, "learning_rate": 8.253000000000001e-05, "loss": 0.393, "step": 16515 }, { "epoch": 0.9248516071228581, "grad_norm": 1.3843134641647339, "learning_rate": 8.253500000000001e-05, "loss": 0.4101, "step": 16516 }, { "epoch": 0.9249076044349871, "grad_norm": 1.307335376739502, "learning_rate": 8.254e-05, "loss": 0.3418, "step": 16517 }, { "epoch": 0.9249636017471161, "grad_norm": 1.4313379526138306, "learning_rate": 8.2545e-05, "loss": 0.4476, "step": 16518 }, { "epoch": 0.9250195990592451, "grad_norm": 1.4691137075424194, "learning_rate": 8.255e-05, "loss": 0.457, "step": 16519 }, { "epoch": 0.9250755963713742, "grad_norm": 3.8030176162719727, "learning_rate": 8.2555e-05, "loss": 0.4801, "step": 16520 }, { "epoch": 0.9251315936835032, "grad_norm": 1.2685890197753906, "learning_rate": 8.256000000000001e-05, "loss": 0.4545, "step": 16521 }, { "epoch": 0.9251875909956322, "grad_norm": 1.1432857513427734, "learning_rate": 8.2565e-05, "loss": 0.3794, "step": 16522 }, { "epoch": 0.9252435883077612, "grad_norm": 1.3015574216842651, "learning_rate": 8.257e-05, "loss": 0.3819, "step": 16523 }, { "epoch": 0.9252995856198902, "grad_norm": 1.5656224489212036, "learning_rate": 8.2575e-05, "loss": 0.5617, "step": 16524 }, { "epoch": 0.9253555829320192, "grad_norm": 1.4016870260238647, "learning_rate": 8.258e-05, "loss": 0.609, "step": 16525 }, { "epoch": 0.9254115802441483, "grad_norm": 1.5549887418746948, "learning_rate": 8.2585e-05, "loss": 0.4389, "step": 16526 }, { "epoch": 0.9254675775562773, "grad_norm": 1.205660343170166, "learning_rate": 8.259000000000001e-05, "loss": 0.4418, "step": 16527 }, { "epoch": 0.9255235748684063, "grad_norm": 1.5776665210723877, "learning_rate": 8.2595e-05, "loss": 0.5697, "step": 16528 }, { "epoch": 0.9255795721805353, "grad_norm": 1.3645782470703125, "learning_rate": 8.26e-05, "loss": 0.4751, "step": 16529 }, { "epoch": 0.9256355694926643, "grad_norm": 1.3706555366516113, "learning_rate": 8.2605e-05, "loss": 0.4326, "step": 16530 }, { "epoch": 0.9256915668047934, "grad_norm": 1.230024814605713, "learning_rate": 8.261e-05, "loss": 0.3903, "step": 16531 }, { "epoch": 0.9257475641169224, "grad_norm": 1.4342620372772217, "learning_rate": 8.261500000000001e-05, "loss": 0.7193, "step": 16532 }, { "epoch": 0.9258035614290514, "grad_norm": 1.3708159923553467, "learning_rate": 8.262e-05, "loss": 0.4468, "step": 16533 }, { "epoch": 0.9258595587411804, "grad_norm": 1.3850219249725342, "learning_rate": 8.2625e-05, "loss": 0.5445, "step": 16534 }, { "epoch": 0.9259155560533094, "grad_norm": 1.1610344648361206, "learning_rate": 8.263000000000001e-05, "loss": 0.3923, "step": 16535 }, { "epoch": 0.9259715533654385, "grad_norm": 1.4086459875106812, "learning_rate": 8.263500000000001e-05, "loss": 0.5273, "step": 16536 }, { "epoch": 0.9260275506775675, "grad_norm": 1.4805762767791748, "learning_rate": 8.264000000000001e-05, "loss": 0.4365, "step": 16537 }, { "epoch": 0.9260835479896965, "grad_norm": 1.6059681177139282, "learning_rate": 8.2645e-05, "loss": 0.5667, "step": 16538 }, { "epoch": 0.9261395453018255, "grad_norm": 1.3339136838912964, "learning_rate": 8.265e-05, "loss": 0.3483, "step": 16539 }, { "epoch": 0.9261955426139545, "grad_norm": 1.4039710760116577, "learning_rate": 8.2655e-05, "loss": 0.3865, "step": 16540 }, { "epoch": 0.9262515399260836, "grad_norm": 1.1990165710449219, "learning_rate": 8.266e-05, "loss": 0.3419, "step": 16541 }, { "epoch": 0.9263075372382126, "grad_norm": 1.5688766241073608, "learning_rate": 8.266500000000001e-05, "loss": 0.5469, "step": 16542 }, { "epoch": 0.9263635345503416, "grad_norm": 1.3112396001815796, "learning_rate": 8.267e-05, "loss": 0.4534, "step": 16543 }, { "epoch": 0.9264195318624706, "grad_norm": 1.5226678848266602, "learning_rate": 8.2675e-05, "loss": 0.5196, "step": 16544 }, { "epoch": 0.9264755291745996, "grad_norm": 1.2306163311004639, "learning_rate": 8.268e-05, "loss": 0.4405, "step": 16545 }, { "epoch": 0.9265315264867287, "grad_norm": 1.2804595232009888, "learning_rate": 8.2685e-05, "loss": 0.4288, "step": 16546 }, { "epoch": 0.9265875237988577, "grad_norm": 1.1443030834197998, "learning_rate": 8.269e-05, "loss": 0.4405, "step": 16547 }, { "epoch": 0.9266435211109867, "grad_norm": 1.2803187370300293, "learning_rate": 8.269500000000001e-05, "loss": 0.4594, "step": 16548 }, { "epoch": 0.9266995184231157, "grad_norm": 1.303547978401184, "learning_rate": 8.27e-05, "loss": 0.3836, "step": 16549 }, { "epoch": 0.9267555157352447, "grad_norm": 1.2507973909378052, "learning_rate": 8.2705e-05, "loss": 0.5085, "step": 16550 }, { "epoch": 0.9268115130473737, "grad_norm": 1.3687591552734375, "learning_rate": 8.271e-05, "loss": 0.539, "step": 16551 }, { "epoch": 0.9268675103595028, "grad_norm": 1.1975895166397095, "learning_rate": 8.271500000000001e-05, "loss": 0.4621, "step": 16552 }, { "epoch": 0.9269235076716318, "grad_norm": 1.4076459407806396, "learning_rate": 8.272000000000001e-05, "loss": 0.4201, "step": 16553 }, { "epoch": 0.9269795049837608, "grad_norm": 1.4311432838439941, "learning_rate": 8.2725e-05, "loss": 0.5233, "step": 16554 }, { "epoch": 0.9270355022958898, "grad_norm": 1.3195104598999023, "learning_rate": 8.273e-05, "loss": 0.4051, "step": 16555 }, { "epoch": 0.9270914996080188, "grad_norm": 1.2419683933258057, "learning_rate": 8.273500000000001e-05, "loss": 0.4731, "step": 16556 }, { "epoch": 0.9271474969201479, "grad_norm": 1.3806172609329224, "learning_rate": 8.274000000000001e-05, "loss": 0.5336, "step": 16557 }, { "epoch": 0.9272034942322769, "grad_norm": 1.577425241470337, "learning_rate": 8.274500000000001e-05, "loss": 0.43, "step": 16558 }, { "epoch": 0.9272594915444059, "grad_norm": 1.5444074869155884, "learning_rate": 8.275e-05, "loss": 0.4589, "step": 16559 }, { "epoch": 0.9273154888565349, "grad_norm": 1.5758018493652344, "learning_rate": 8.2755e-05, "loss": 0.5047, "step": 16560 }, { "epoch": 0.9273714861686639, "grad_norm": 1.2698769569396973, "learning_rate": 8.276e-05, "loss": 0.4111, "step": 16561 }, { "epoch": 0.927427483480793, "grad_norm": 1.5544837713241577, "learning_rate": 8.2765e-05, "loss": 0.4808, "step": 16562 }, { "epoch": 0.927483480792922, "grad_norm": 1.2893900871276855, "learning_rate": 8.277000000000001e-05, "loss": 0.4719, "step": 16563 }, { "epoch": 0.927539478105051, "grad_norm": 1.3675788640975952, "learning_rate": 8.2775e-05, "loss": 0.403, "step": 16564 }, { "epoch": 0.92759547541718, "grad_norm": 1.1784521341323853, "learning_rate": 8.278e-05, "loss": 0.4722, "step": 16565 }, { "epoch": 0.927651472729309, "grad_norm": 1.1031605005264282, "learning_rate": 8.2785e-05, "loss": 0.4225, "step": 16566 }, { "epoch": 0.927707470041438, "grad_norm": 1.4057633876800537, "learning_rate": 8.279e-05, "loss": 0.4327, "step": 16567 }, { "epoch": 0.9277634673535671, "grad_norm": 1.7156649827957153, "learning_rate": 8.2795e-05, "loss": 0.7462, "step": 16568 }, { "epoch": 0.9278194646656961, "grad_norm": 1.4485633373260498, "learning_rate": 8.28e-05, "loss": 0.4922, "step": 16569 }, { "epoch": 0.9278754619778251, "grad_norm": 1.3397701978683472, "learning_rate": 8.2805e-05, "loss": 0.499, "step": 16570 }, { "epoch": 0.9279314592899541, "grad_norm": 1.3101890087127686, "learning_rate": 8.281e-05, "loss": 0.4668, "step": 16571 }, { "epoch": 0.9279874566020831, "grad_norm": 1.1900054216384888, "learning_rate": 8.281500000000001e-05, "loss": 0.4898, "step": 16572 }, { "epoch": 0.9280434539142122, "grad_norm": 1.3926546573638916, "learning_rate": 8.282000000000001e-05, "loss": 0.5736, "step": 16573 }, { "epoch": 0.9280994512263412, "grad_norm": 1.7720388174057007, "learning_rate": 8.282500000000001e-05, "loss": 0.4774, "step": 16574 }, { "epoch": 0.9281554485384702, "grad_norm": 1.2900577783584595, "learning_rate": 8.283e-05, "loss": 0.3573, "step": 16575 }, { "epoch": 0.9282114458505992, "grad_norm": 1.13142991065979, "learning_rate": 8.2835e-05, "loss": 0.3229, "step": 16576 }, { "epoch": 0.9282674431627281, "grad_norm": 1.5441309213638306, "learning_rate": 8.284000000000001e-05, "loss": 0.5427, "step": 16577 }, { "epoch": 0.9283234404748572, "grad_norm": 1.2781037092208862, "learning_rate": 8.284500000000001e-05, "loss": 0.408, "step": 16578 }, { "epoch": 0.9283794377869862, "grad_norm": 1.2270132303237915, "learning_rate": 8.285000000000001e-05, "loss": 0.3863, "step": 16579 }, { "epoch": 0.9284354350991152, "grad_norm": 1.265037178993225, "learning_rate": 8.2855e-05, "loss": 0.4642, "step": 16580 }, { "epoch": 0.9284914324112442, "grad_norm": 1.491784930229187, "learning_rate": 8.286e-05, "loss": 0.5653, "step": 16581 }, { "epoch": 0.9285474297233732, "grad_norm": 1.302323341369629, "learning_rate": 8.2865e-05, "loss": 0.3473, "step": 16582 }, { "epoch": 0.9286034270355022, "grad_norm": 1.445154070854187, "learning_rate": 8.287e-05, "loss": 0.4246, "step": 16583 }, { "epoch": 0.9286594243476313, "grad_norm": 1.2335448265075684, "learning_rate": 8.287500000000001e-05, "loss": 0.4162, "step": 16584 }, { "epoch": 0.9287154216597603, "grad_norm": 1.3359366655349731, "learning_rate": 8.288e-05, "loss": 0.448, "step": 16585 }, { "epoch": 0.9287714189718893, "grad_norm": 1.3777626752853394, "learning_rate": 8.2885e-05, "loss": 0.3824, "step": 16586 }, { "epoch": 0.9288274162840183, "grad_norm": 1.393613338470459, "learning_rate": 8.289e-05, "loss": 0.5083, "step": 16587 }, { "epoch": 0.9288834135961473, "grad_norm": 1.359660029411316, "learning_rate": 8.2895e-05, "loss": 0.4547, "step": 16588 }, { "epoch": 0.9289394109082764, "grad_norm": 1.5443047285079956, "learning_rate": 8.29e-05, "loss": 0.3964, "step": 16589 }, { "epoch": 0.9289954082204054, "grad_norm": 1.789296269416809, "learning_rate": 8.290499999999999e-05, "loss": 0.5635, "step": 16590 }, { "epoch": 0.9290514055325344, "grad_norm": 1.4631083011627197, "learning_rate": 8.291e-05, "loss": 0.359, "step": 16591 }, { "epoch": 0.9291074028446634, "grad_norm": 1.3464879989624023, "learning_rate": 8.291500000000002e-05, "loss": 0.5024, "step": 16592 }, { "epoch": 0.9291634001567924, "grad_norm": 1.7417012453079224, "learning_rate": 8.292000000000001e-05, "loss": 0.5396, "step": 16593 }, { "epoch": 0.9292193974689215, "grad_norm": 1.5067992210388184, "learning_rate": 8.292500000000001e-05, "loss": 0.4364, "step": 16594 }, { "epoch": 0.9292753947810505, "grad_norm": 1.6404314041137695, "learning_rate": 8.293000000000001e-05, "loss": 0.5575, "step": 16595 }, { "epoch": 0.9293313920931795, "grad_norm": 1.6139005422592163, "learning_rate": 8.2935e-05, "loss": 0.6856, "step": 16596 }, { "epoch": 0.9293873894053085, "grad_norm": 3.3078603744506836, "learning_rate": 8.294e-05, "loss": 0.4122, "step": 16597 }, { "epoch": 0.9294433867174375, "grad_norm": 2.09658145904541, "learning_rate": 8.2945e-05, "loss": 0.5927, "step": 16598 }, { "epoch": 0.9294993840295666, "grad_norm": 1.4565671682357788, "learning_rate": 8.295000000000001e-05, "loss": 0.5263, "step": 16599 }, { "epoch": 0.9295553813416956, "grad_norm": 1.625476360321045, "learning_rate": 8.295500000000001e-05, "loss": 0.6797, "step": 16600 }, { "epoch": 0.9296113786538246, "grad_norm": 1.2399108409881592, "learning_rate": 8.296e-05, "loss": 0.4101, "step": 16601 }, { "epoch": 0.9296673759659536, "grad_norm": 1.4441423416137695, "learning_rate": 8.2965e-05, "loss": 0.4181, "step": 16602 }, { "epoch": 0.9297233732780826, "grad_norm": 1.3000669479370117, "learning_rate": 8.297e-05, "loss": 0.3524, "step": 16603 }, { "epoch": 0.9297793705902117, "grad_norm": 1.5105751752853394, "learning_rate": 8.2975e-05, "loss": 0.5565, "step": 16604 }, { "epoch": 0.9298353679023407, "grad_norm": 1.344003677368164, "learning_rate": 8.298000000000001e-05, "loss": 0.4283, "step": 16605 }, { "epoch": 0.9298913652144697, "grad_norm": 1.0473260879516602, "learning_rate": 8.2985e-05, "loss": 0.3195, "step": 16606 }, { "epoch": 0.9299473625265987, "grad_norm": 8.235751152038574, "learning_rate": 8.299e-05, "loss": 0.3389, "step": 16607 }, { "epoch": 0.9300033598387277, "grad_norm": 1.305876612663269, "learning_rate": 8.2995e-05, "loss": 0.4715, "step": 16608 }, { "epoch": 0.9300593571508567, "grad_norm": 1.1514440774917603, "learning_rate": 8.3e-05, "loss": 0.4138, "step": 16609 }, { "epoch": 0.9301153544629858, "grad_norm": 1.5276670455932617, "learning_rate": 8.3005e-05, "loss": 0.4012, "step": 16610 }, { "epoch": 0.9301713517751148, "grad_norm": 1.2899521589279175, "learning_rate": 8.300999999999999e-05, "loss": 0.4227, "step": 16611 }, { "epoch": 0.9302273490872438, "grad_norm": 1.303055763244629, "learning_rate": 8.3015e-05, "loss": 0.5667, "step": 16612 }, { "epoch": 0.9302833463993728, "grad_norm": 1.1715697050094604, "learning_rate": 8.302000000000001e-05, "loss": 0.4125, "step": 16613 }, { "epoch": 0.9303393437115018, "grad_norm": 1.5779167413711548, "learning_rate": 8.302500000000001e-05, "loss": 0.5931, "step": 16614 }, { "epoch": 0.9303953410236309, "grad_norm": 1.1428412199020386, "learning_rate": 8.303000000000001e-05, "loss": 0.3914, "step": 16615 }, { "epoch": 0.9304513383357599, "grad_norm": 1.3851007223129272, "learning_rate": 8.303500000000001e-05, "loss": 0.567, "step": 16616 }, { "epoch": 0.9305073356478889, "grad_norm": 1.3200896978378296, "learning_rate": 8.304e-05, "loss": 0.3061, "step": 16617 }, { "epoch": 0.9305633329600179, "grad_norm": 1.2225103378295898, "learning_rate": 8.3045e-05, "loss": 0.2826, "step": 16618 }, { "epoch": 0.9306193302721469, "grad_norm": 1.1365717649459839, "learning_rate": 8.305e-05, "loss": 0.3784, "step": 16619 }, { "epoch": 0.930675327584276, "grad_norm": 1.8045048713684082, "learning_rate": 8.305500000000001e-05, "loss": 0.6675, "step": 16620 }, { "epoch": 0.930731324896405, "grad_norm": 1.4891531467437744, "learning_rate": 8.306000000000001e-05, "loss": 0.4465, "step": 16621 }, { "epoch": 0.930787322208534, "grad_norm": 1.1786224842071533, "learning_rate": 8.3065e-05, "loss": 0.4102, "step": 16622 }, { "epoch": 0.930843319520663, "grad_norm": 1.4224238395690918, "learning_rate": 8.307e-05, "loss": 0.4362, "step": 16623 }, { "epoch": 0.930899316832792, "grad_norm": 1.1878151893615723, "learning_rate": 8.3075e-05, "loss": 0.4253, "step": 16624 }, { "epoch": 0.930955314144921, "grad_norm": 1.362742304801941, "learning_rate": 8.308e-05, "loss": 0.4014, "step": 16625 }, { "epoch": 0.9310113114570501, "grad_norm": 1.4134865999221802, "learning_rate": 8.308500000000001e-05, "loss": 0.5373, "step": 16626 }, { "epoch": 0.9310673087691791, "grad_norm": 1.3262542486190796, "learning_rate": 8.309e-05, "loss": 0.3624, "step": 16627 }, { "epoch": 0.9311233060813081, "grad_norm": 1.2941330671310425, "learning_rate": 8.3095e-05, "loss": 0.4494, "step": 16628 }, { "epoch": 0.9311793033934371, "grad_norm": 1.194333553314209, "learning_rate": 8.31e-05, "loss": 0.4017, "step": 16629 }, { "epoch": 0.9312353007055661, "grad_norm": 1.6164854764938354, "learning_rate": 8.3105e-05, "loss": 0.5246, "step": 16630 }, { "epoch": 0.9312912980176952, "grad_norm": 1.2223023176193237, "learning_rate": 8.311e-05, "loss": 0.4499, "step": 16631 }, { "epoch": 0.9313472953298242, "grad_norm": 1.225380539894104, "learning_rate": 8.3115e-05, "loss": 0.446, "step": 16632 }, { "epoch": 0.9314032926419532, "grad_norm": 1.4562822580337524, "learning_rate": 8.312e-05, "loss": 0.537, "step": 16633 }, { "epoch": 0.9314592899540822, "grad_norm": 1.2595573663711548, "learning_rate": 8.312500000000001e-05, "loss": 0.4092, "step": 16634 }, { "epoch": 0.9315152872662112, "grad_norm": 1.570261001586914, "learning_rate": 8.313000000000001e-05, "loss": 0.4921, "step": 16635 }, { "epoch": 0.9315712845783403, "grad_norm": 1.8107719421386719, "learning_rate": 8.313500000000001e-05, "loss": 0.4667, "step": 16636 }, { "epoch": 0.9316272818904693, "grad_norm": 1.3004285097122192, "learning_rate": 8.314000000000001e-05, "loss": 0.61, "step": 16637 }, { "epoch": 0.9316832792025983, "grad_norm": 1.2579715251922607, "learning_rate": 8.3145e-05, "loss": 0.3779, "step": 16638 }, { "epoch": 0.9317392765147273, "grad_norm": 1.3704969882965088, "learning_rate": 8.315e-05, "loss": 0.4827, "step": 16639 }, { "epoch": 0.9317952738268563, "grad_norm": 1.4027427434921265, "learning_rate": 8.3155e-05, "loss": 0.3904, "step": 16640 }, { "epoch": 0.9318512711389854, "grad_norm": 1.5530561208724976, "learning_rate": 8.316000000000001e-05, "loss": 0.6129, "step": 16641 }, { "epoch": 0.9319072684511144, "grad_norm": 1.5409584045410156, "learning_rate": 8.316500000000001e-05, "loss": 0.575, "step": 16642 }, { "epoch": 0.9319632657632434, "grad_norm": 1.448671817779541, "learning_rate": 8.317e-05, "loss": 0.4654, "step": 16643 }, { "epoch": 0.9320192630753724, "grad_norm": 1.2616862058639526, "learning_rate": 8.3175e-05, "loss": 0.5618, "step": 16644 }, { "epoch": 0.9320752603875014, "grad_norm": 1.637648344039917, "learning_rate": 8.318e-05, "loss": 0.473, "step": 16645 }, { "epoch": 0.9321312576996305, "grad_norm": 1.5618937015533447, "learning_rate": 8.3185e-05, "loss": 0.4767, "step": 16646 }, { "epoch": 0.9321872550117595, "grad_norm": 1.2212520837783813, "learning_rate": 8.319e-05, "loss": 0.4776, "step": 16647 }, { "epoch": 0.9322432523238885, "grad_norm": 1.1408857107162476, "learning_rate": 8.3195e-05, "loss": 0.3799, "step": 16648 }, { "epoch": 0.9322992496360175, "grad_norm": 1.4778286218643188, "learning_rate": 8.32e-05, "loss": 0.38, "step": 16649 }, { "epoch": 0.9323552469481465, "grad_norm": 1.2437379360198975, "learning_rate": 8.3205e-05, "loss": 0.4526, "step": 16650 }, { "epoch": 0.9324112442602756, "grad_norm": 5.390810012817383, "learning_rate": 8.321e-05, "loss": 0.3792, "step": 16651 }, { "epoch": 0.9324672415724046, "grad_norm": 1.3441483974456787, "learning_rate": 8.3215e-05, "loss": 0.4469, "step": 16652 }, { "epoch": 0.9325232388845336, "grad_norm": 1.81027090549469, "learning_rate": 8.322e-05, "loss": 0.4594, "step": 16653 }, { "epoch": 0.9325792361966626, "grad_norm": 1.5646641254425049, "learning_rate": 8.3225e-05, "loss": 0.5921, "step": 16654 }, { "epoch": 0.9326352335087916, "grad_norm": 1.2193459272384644, "learning_rate": 8.323000000000001e-05, "loss": 0.3902, "step": 16655 }, { "epoch": 0.9326912308209206, "grad_norm": 1.1926816701889038, "learning_rate": 8.323500000000001e-05, "loss": 0.4753, "step": 16656 }, { "epoch": 0.9327472281330497, "grad_norm": 1.4411406517028809, "learning_rate": 8.324000000000001e-05, "loss": 0.4647, "step": 16657 }, { "epoch": 0.9328032254451787, "grad_norm": 1.874061107635498, "learning_rate": 8.324500000000001e-05, "loss": 0.5395, "step": 16658 }, { "epoch": 0.9328592227573077, "grad_norm": 1.3584909439086914, "learning_rate": 8.325e-05, "loss": 0.4404, "step": 16659 }, { "epoch": 0.9329152200694366, "grad_norm": 1.2228422164916992, "learning_rate": 8.3255e-05, "loss": 0.2974, "step": 16660 }, { "epoch": 0.9329712173815656, "grad_norm": 1.4384647607803345, "learning_rate": 8.326e-05, "loss": 0.4368, "step": 16661 }, { "epoch": 0.9330272146936947, "grad_norm": 1.2427054643630981, "learning_rate": 8.326500000000001e-05, "loss": 0.4453, "step": 16662 }, { "epoch": 0.9330832120058237, "grad_norm": 1.569198489189148, "learning_rate": 8.327000000000001e-05, "loss": 0.5041, "step": 16663 }, { "epoch": 0.9331392093179527, "grad_norm": 1.2273691892623901, "learning_rate": 8.3275e-05, "loss": 0.4874, "step": 16664 }, { "epoch": 0.9331952066300817, "grad_norm": 1.4037173986434937, "learning_rate": 8.328e-05, "loss": 0.5313, "step": 16665 }, { "epoch": 0.9332512039422107, "grad_norm": 1.7003096342086792, "learning_rate": 8.3285e-05, "loss": 0.4736, "step": 16666 }, { "epoch": 0.9333072012543397, "grad_norm": 1.7694745063781738, "learning_rate": 8.329e-05, "loss": 0.5357, "step": 16667 }, { "epoch": 0.9333631985664688, "grad_norm": 1.3882302045822144, "learning_rate": 8.3295e-05, "loss": 0.3981, "step": 16668 }, { "epoch": 0.9334191958785978, "grad_norm": 1.2965500354766846, "learning_rate": 8.33e-05, "loss": 0.4082, "step": 16669 }, { "epoch": 0.9334751931907268, "grad_norm": 1.3104054927825928, "learning_rate": 8.3305e-05, "loss": 0.4348, "step": 16670 }, { "epoch": 0.9335311905028558, "grad_norm": 1.2672455310821533, "learning_rate": 8.331e-05, "loss": 0.3781, "step": 16671 }, { "epoch": 0.9335871878149848, "grad_norm": 1.2638646364212036, "learning_rate": 8.3315e-05, "loss": 0.5076, "step": 16672 }, { "epoch": 0.9336431851271139, "grad_norm": 1.0242950916290283, "learning_rate": 8.332000000000001e-05, "loss": 0.5022, "step": 16673 }, { "epoch": 0.9336991824392429, "grad_norm": 1.3578529357910156, "learning_rate": 8.3325e-05, "loss": 0.5606, "step": 16674 }, { "epoch": 0.9337551797513719, "grad_norm": 1.5048909187316895, "learning_rate": 8.333e-05, "loss": 0.368, "step": 16675 }, { "epoch": 0.9338111770635009, "grad_norm": 1.1758440732955933, "learning_rate": 8.3335e-05, "loss": 0.372, "step": 16676 }, { "epoch": 0.9338671743756299, "grad_norm": 1.7584550380706787, "learning_rate": 8.334000000000001e-05, "loss": 0.6781, "step": 16677 }, { "epoch": 0.933923171687759, "grad_norm": 1.4949352741241455, "learning_rate": 8.334500000000001e-05, "loss": 0.5093, "step": 16678 }, { "epoch": 0.933979168999888, "grad_norm": 1.2756378650665283, "learning_rate": 8.335e-05, "loss": 0.4561, "step": 16679 }, { "epoch": 0.934035166312017, "grad_norm": 1.7087390422821045, "learning_rate": 8.3355e-05, "loss": 0.3433, "step": 16680 }, { "epoch": 0.934091163624146, "grad_norm": 1.3154563903808594, "learning_rate": 8.336e-05, "loss": 0.5667, "step": 16681 }, { "epoch": 0.934147160936275, "grad_norm": 1.68277108669281, "learning_rate": 8.3365e-05, "loss": 0.4285, "step": 16682 }, { "epoch": 0.934203158248404, "grad_norm": 1.7122166156768799, "learning_rate": 8.337000000000001e-05, "loss": 0.5155, "step": 16683 }, { "epoch": 0.9342591555605331, "grad_norm": 1.2633670568466187, "learning_rate": 8.337500000000001e-05, "loss": 0.434, "step": 16684 }, { "epoch": 0.9343151528726621, "grad_norm": 1.2328050136566162, "learning_rate": 8.338e-05, "loss": 0.3519, "step": 16685 }, { "epoch": 0.9343711501847911, "grad_norm": 1.4168370962142944, "learning_rate": 8.3385e-05, "loss": 0.4409, "step": 16686 }, { "epoch": 0.9344271474969201, "grad_norm": 1.3801827430725098, "learning_rate": 8.339e-05, "loss": 0.4707, "step": 16687 }, { "epoch": 0.9344831448090491, "grad_norm": 1.3615226745605469, "learning_rate": 8.3395e-05, "loss": 0.5223, "step": 16688 }, { "epoch": 0.9345391421211782, "grad_norm": 1.6777215003967285, "learning_rate": 8.34e-05, "loss": 0.6625, "step": 16689 }, { "epoch": 0.9345951394333072, "grad_norm": 1.1215518712997437, "learning_rate": 8.3405e-05, "loss": 0.4153, "step": 16690 }, { "epoch": 0.9346511367454362, "grad_norm": 1.509817361831665, "learning_rate": 8.341e-05, "loss": 0.5476, "step": 16691 }, { "epoch": 0.9347071340575652, "grad_norm": 1.4509403705596924, "learning_rate": 8.3415e-05, "loss": 0.44, "step": 16692 }, { "epoch": 0.9347631313696942, "grad_norm": 1.4264498949050903, "learning_rate": 8.342000000000001e-05, "loss": 0.5042, "step": 16693 }, { "epoch": 0.9348191286818233, "grad_norm": 1.282489538192749, "learning_rate": 8.342500000000001e-05, "loss": 0.4117, "step": 16694 }, { "epoch": 0.9348751259939523, "grad_norm": 1.2684929370880127, "learning_rate": 8.343e-05, "loss": 0.528, "step": 16695 }, { "epoch": 0.9349311233060813, "grad_norm": 1.458742380142212, "learning_rate": 8.3435e-05, "loss": 0.409, "step": 16696 }, { "epoch": 0.9349871206182103, "grad_norm": 1.156498908996582, "learning_rate": 8.344e-05, "loss": 0.563, "step": 16697 }, { "epoch": 0.9350431179303393, "grad_norm": 1.2255337238311768, "learning_rate": 8.344500000000001e-05, "loss": 0.4638, "step": 16698 }, { "epoch": 0.9350991152424684, "grad_norm": 1.8082275390625, "learning_rate": 8.345000000000001e-05, "loss": 0.5849, "step": 16699 }, { "epoch": 0.9351551125545974, "grad_norm": 1.4461427927017212, "learning_rate": 8.3455e-05, "loss": 0.4325, "step": 16700 }, { "epoch": 0.9352111098667264, "grad_norm": 1.1311047077178955, "learning_rate": 8.346e-05, "loss": 0.3848, "step": 16701 }, { "epoch": 0.9352671071788554, "grad_norm": 1.0894145965576172, "learning_rate": 8.3465e-05, "loss": 0.3834, "step": 16702 }, { "epoch": 0.9353231044909844, "grad_norm": 3.1649255752563477, "learning_rate": 8.347e-05, "loss": 0.3808, "step": 16703 }, { "epoch": 0.9353791018031135, "grad_norm": 1.191104769706726, "learning_rate": 8.347500000000001e-05, "loss": 0.3527, "step": 16704 }, { "epoch": 0.9354350991152425, "grad_norm": 1.2181001901626587, "learning_rate": 8.348000000000001e-05, "loss": 0.3814, "step": 16705 }, { "epoch": 0.9354910964273715, "grad_norm": 1.340221881866455, "learning_rate": 8.3485e-05, "loss": 0.3855, "step": 16706 }, { "epoch": 0.9355470937395005, "grad_norm": 1.5500530004501343, "learning_rate": 8.349e-05, "loss": 0.4846, "step": 16707 }, { "epoch": 0.9356030910516295, "grad_norm": 1.3324302434921265, "learning_rate": 8.3495e-05, "loss": 0.4714, "step": 16708 }, { "epoch": 0.9356590883637586, "grad_norm": 1.3207728862762451, "learning_rate": 8.35e-05, "loss": 0.5499, "step": 16709 }, { "epoch": 0.9357150856758876, "grad_norm": 1.2513694763183594, "learning_rate": 8.3505e-05, "loss": 0.3918, "step": 16710 }, { "epoch": 0.9357710829880166, "grad_norm": 1.3769958019256592, "learning_rate": 8.351e-05, "loss": 0.4827, "step": 16711 }, { "epoch": 0.9358270803001456, "grad_norm": 1.632336139678955, "learning_rate": 8.3515e-05, "loss": 0.5865, "step": 16712 }, { "epoch": 0.9358830776122746, "grad_norm": 1.361846923828125, "learning_rate": 8.352000000000001e-05, "loss": 0.5407, "step": 16713 }, { "epoch": 0.9359390749244036, "grad_norm": 1.3150957822799683, "learning_rate": 8.352500000000001e-05, "loss": 0.5172, "step": 16714 }, { "epoch": 0.9359950722365327, "grad_norm": 1.3028266429901123, "learning_rate": 8.353000000000001e-05, "loss": 0.4127, "step": 16715 }, { "epoch": 0.9360510695486617, "grad_norm": 1.2794989347457886, "learning_rate": 8.3535e-05, "loss": 0.4052, "step": 16716 }, { "epoch": 0.9361070668607907, "grad_norm": 1.2054892778396606, "learning_rate": 8.354e-05, "loss": 0.3915, "step": 16717 }, { "epoch": 0.9361630641729197, "grad_norm": 1.4225001335144043, "learning_rate": 8.3545e-05, "loss": 0.4819, "step": 16718 }, { "epoch": 0.9362190614850487, "grad_norm": 1.4767318964004517, "learning_rate": 8.355000000000001e-05, "loss": 0.3541, "step": 16719 }, { "epoch": 0.9362750587971778, "grad_norm": 1.5410804748535156, "learning_rate": 8.355500000000001e-05, "loss": 0.4682, "step": 16720 }, { "epoch": 0.9363310561093068, "grad_norm": 1.2901997566223145, "learning_rate": 8.356e-05, "loss": 0.4525, "step": 16721 }, { "epoch": 0.9363870534214358, "grad_norm": 1.4899600744247437, "learning_rate": 8.3565e-05, "loss": 0.5563, "step": 16722 }, { "epoch": 0.9364430507335648, "grad_norm": 1.5106761455535889, "learning_rate": 8.357e-05, "loss": 0.4132, "step": 16723 }, { "epoch": 0.9364990480456938, "grad_norm": 1.3609869480133057, "learning_rate": 8.3575e-05, "loss": 0.4436, "step": 16724 }, { "epoch": 0.9365550453578229, "grad_norm": 1.3122451305389404, "learning_rate": 8.358e-05, "loss": 0.4533, "step": 16725 }, { "epoch": 0.9366110426699519, "grad_norm": 1.235106110572815, "learning_rate": 8.358500000000001e-05, "loss": 0.4028, "step": 16726 }, { "epoch": 0.9366670399820809, "grad_norm": 1.664569616317749, "learning_rate": 8.359e-05, "loss": 0.4744, "step": 16727 }, { "epoch": 0.9367230372942099, "grad_norm": 1.2943017482757568, "learning_rate": 8.3595e-05, "loss": 0.5061, "step": 16728 }, { "epoch": 0.9367790346063389, "grad_norm": 2.0248494148254395, "learning_rate": 8.36e-05, "loss": 0.5164, "step": 16729 }, { "epoch": 0.936835031918468, "grad_norm": 1.248183250427246, "learning_rate": 8.3605e-05, "loss": 0.4729, "step": 16730 }, { "epoch": 0.936891029230597, "grad_norm": 1.6285631656646729, "learning_rate": 8.361e-05, "loss": 0.4802, "step": 16731 }, { "epoch": 0.936947026542726, "grad_norm": 1.0719648599624634, "learning_rate": 8.3615e-05, "loss": 0.4056, "step": 16732 }, { "epoch": 0.937003023854855, "grad_norm": 1.513091802597046, "learning_rate": 8.362000000000002e-05, "loss": 0.6647, "step": 16733 }, { "epoch": 0.937059021166984, "grad_norm": 1.4053661823272705, "learning_rate": 8.362500000000001e-05, "loss": 0.4836, "step": 16734 }, { "epoch": 0.937115018479113, "grad_norm": 1.584080696105957, "learning_rate": 8.363000000000001e-05, "loss": 0.527, "step": 16735 }, { "epoch": 0.9371710157912421, "grad_norm": 1.1603384017944336, "learning_rate": 8.363500000000001e-05, "loss": 0.3775, "step": 16736 }, { "epoch": 0.9372270131033711, "grad_norm": 1.2846858501434326, "learning_rate": 8.364e-05, "loss": 0.5528, "step": 16737 }, { "epoch": 0.9372830104155001, "grad_norm": 1.8479763269424438, "learning_rate": 8.3645e-05, "loss": 0.5682, "step": 16738 }, { "epoch": 0.9373390077276291, "grad_norm": 1.514147400856018, "learning_rate": 8.365e-05, "loss": 0.3887, "step": 16739 }, { "epoch": 0.9373950050397581, "grad_norm": 1.4268624782562256, "learning_rate": 8.365500000000001e-05, "loss": 0.57, "step": 16740 }, { "epoch": 0.9374510023518872, "grad_norm": 1.175947904586792, "learning_rate": 8.366000000000001e-05, "loss": 0.4704, "step": 16741 }, { "epoch": 0.9375069996640161, "grad_norm": 1.1671470403671265, "learning_rate": 8.3665e-05, "loss": 0.3743, "step": 16742 }, { "epoch": 0.9375629969761451, "grad_norm": 1.4053637981414795, "learning_rate": 8.367e-05, "loss": 0.4791, "step": 16743 }, { "epoch": 0.9376189942882741, "grad_norm": 1.3294618129730225, "learning_rate": 8.3675e-05, "loss": 0.4177, "step": 16744 }, { "epoch": 0.9376749916004031, "grad_norm": 1.4835890531539917, "learning_rate": 8.368e-05, "loss": 0.6239, "step": 16745 }, { "epoch": 0.9377309889125321, "grad_norm": 1.3424429893493652, "learning_rate": 8.3685e-05, "loss": 0.4418, "step": 16746 }, { "epoch": 0.9377869862246612, "grad_norm": 1.3203662633895874, "learning_rate": 8.369000000000001e-05, "loss": 0.4023, "step": 16747 }, { "epoch": 0.9378429835367902, "grad_norm": 1.531296968460083, "learning_rate": 8.3695e-05, "loss": 0.4345, "step": 16748 }, { "epoch": 0.9378989808489192, "grad_norm": 1.4427509307861328, "learning_rate": 8.37e-05, "loss": 0.4551, "step": 16749 }, { "epoch": 0.9379549781610482, "grad_norm": 1.5274933576583862, "learning_rate": 8.3705e-05, "loss": 0.6143, "step": 16750 }, { "epoch": 0.9380109754731772, "grad_norm": 1.2396273612976074, "learning_rate": 8.371e-05, "loss": 0.3572, "step": 16751 }, { "epoch": 0.9380669727853063, "grad_norm": 1.183813214302063, "learning_rate": 8.3715e-05, "loss": 0.403, "step": 16752 }, { "epoch": 0.9381229700974353, "grad_norm": 1.4945588111877441, "learning_rate": 8.372e-05, "loss": 0.4316, "step": 16753 }, { "epoch": 0.9381789674095643, "grad_norm": 1.4189181327819824, "learning_rate": 8.3725e-05, "loss": 0.4808, "step": 16754 }, { "epoch": 0.9382349647216933, "grad_norm": 1.2214527130126953, "learning_rate": 8.373000000000001e-05, "loss": 0.467, "step": 16755 }, { "epoch": 0.9382909620338223, "grad_norm": 1.5569618940353394, "learning_rate": 8.373500000000001e-05, "loss": 0.458, "step": 16756 }, { "epoch": 0.9383469593459514, "grad_norm": 1.3493481874465942, "learning_rate": 8.374000000000001e-05, "loss": 0.4004, "step": 16757 }, { "epoch": 0.9384029566580804, "grad_norm": 1.4711847305297852, "learning_rate": 8.3745e-05, "loss": 0.4783, "step": 16758 }, { "epoch": 0.9384589539702094, "grad_norm": 1.4054988622665405, "learning_rate": 8.375e-05, "loss": 0.5384, "step": 16759 }, { "epoch": 0.9385149512823384, "grad_norm": 1.324813723564148, "learning_rate": 8.3755e-05, "loss": 0.3708, "step": 16760 }, { "epoch": 0.9385709485944674, "grad_norm": 1.344856858253479, "learning_rate": 8.376000000000001e-05, "loss": 0.5535, "step": 16761 }, { "epoch": 0.9386269459065965, "grad_norm": 1.283390760421753, "learning_rate": 8.376500000000001e-05, "loss": 0.6027, "step": 16762 }, { "epoch": 0.9386829432187255, "grad_norm": 1.4102137088775635, "learning_rate": 8.377e-05, "loss": 0.7469, "step": 16763 }, { "epoch": 0.9387389405308545, "grad_norm": 1.2193641662597656, "learning_rate": 8.3775e-05, "loss": 0.4421, "step": 16764 }, { "epoch": 0.9387949378429835, "grad_norm": 1.1807458400726318, "learning_rate": 8.378e-05, "loss": 0.5354, "step": 16765 }, { "epoch": 0.9388509351551125, "grad_norm": 1.3945485353469849, "learning_rate": 8.3785e-05, "loss": 0.3762, "step": 16766 }, { "epoch": 0.9389069324672416, "grad_norm": 1.2609732151031494, "learning_rate": 8.379e-05, "loss": 0.3866, "step": 16767 }, { "epoch": 0.9389629297793706, "grad_norm": 1.4377027750015259, "learning_rate": 8.3795e-05, "loss": 0.5106, "step": 16768 }, { "epoch": 0.9390189270914996, "grad_norm": 1.3723695278167725, "learning_rate": 8.38e-05, "loss": 0.4197, "step": 16769 }, { "epoch": 0.9390749244036286, "grad_norm": 1.7024558782577515, "learning_rate": 8.3805e-05, "loss": 0.533, "step": 16770 }, { "epoch": 0.9391309217157576, "grad_norm": 1.4607455730438232, "learning_rate": 8.381e-05, "loss": 0.3979, "step": 16771 }, { "epoch": 0.9391869190278866, "grad_norm": 1.6503533124923706, "learning_rate": 8.3815e-05, "loss": 0.5965, "step": 16772 }, { "epoch": 0.9392429163400157, "grad_norm": 1.5065014362335205, "learning_rate": 8.382e-05, "loss": 0.712, "step": 16773 }, { "epoch": 0.9392989136521447, "grad_norm": 1.4836997985839844, "learning_rate": 8.3825e-05, "loss": 0.5417, "step": 16774 }, { "epoch": 0.9393549109642737, "grad_norm": 1.4093387126922607, "learning_rate": 8.383e-05, "loss": 0.4704, "step": 16775 }, { "epoch": 0.9394109082764027, "grad_norm": 1.4547462463378906, "learning_rate": 8.383500000000001e-05, "loss": 0.4811, "step": 16776 }, { "epoch": 0.9394669055885317, "grad_norm": 1.3972203731536865, "learning_rate": 8.384000000000001e-05, "loss": 0.4777, "step": 16777 }, { "epoch": 0.9395229029006608, "grad_norm": 1.5986653566360474, "learning_rate": 8.384500000000001e-05, "loss": 0.4574, "step": 16778 }, { "epoch": 0.9395789002127898, "grad_norm": 1.2717307806015015, "learning_rate": 8.385e-05, "loss": 0.3281, "step": 16779 }, { "epoch": 0.9396348975249188, "grad_norm": 1.1803277730941772, "learning_rate": 8.3855e-05, "loss": 0.3698, "step": 16780 }, { "epoch": 0.9396908948370478, "grad_norm": 1.5105961561203003, "learning_rate": 8.386e-05, "loss": 0.4268, "step": 16781 }, { "epoch": 0.9397468921491768, "grad_norm": 1.2344133853912354, "learning_rate": 8.386500000000001e-05, "loss": 0.4531, "step": 16782 }, { "epoch": 0.9398028894613059, "grad_norm": 1.1643778085708618, "learning_rate": 8.387000000000001e-05, "loss": 0.3649, "step": 16783 }, { "epoch": 0.9398588867734349, "grad_norm": 1.440381646156311, "learning_rate": 8.3875e-05, "loss": 0.5847, "step": 16784 }, { "epoch": 0.9399148840855639, "grad_norm": 1.1849454641342163, "learning_rate": 8.388e-05, "loss": 0.3915, "step": 16785 }, { "epoch": 0.9399708813976929, "grad_norm": 1.1867284774780273, "learning_rate": 8.3885e-05, "loss": 0.4209, "step": 16786 }, { "epoch": 0.9400268787098219, "grad_norm": 1.245200276374817, "learning_rate": 8.389e-05, "loss": 0.4678, "step": 16787 }, { "epoch": 0.940082876021951, "grad_norm": 1.2511872053146362, "learning_rate": 8.3895e-05, "loss": 0.4868, "step": 16788 }, { "epoch": 0.94013887333408, "grad_norm": 2.0155186653137207, "learning_rate": 8.39e-05, "loss": 0.4851, "step": 16789 }, { "epoch": 0.940194870646209, "grad_norm": 1.2310574054718018, "learning_rate": 8.3905e-05, "loss": 0.4153, "step": 16790 }, { "epoch": 0.940250867958338, "grad_norm": 1.2299989461898804, "learning_rate": 8.391e-05, "loss": 0.4983, "step": 16791 }, { "epoch": 0.940306865270467, "grad_norm": 1.5917248725891113, "learning_rate": 8.3915e-05, "loss": 0.4836, "step": 16792 }, { "epoch": 0.940362862582596, "grad_norm": 1.1603355407714844, "learning_rate": 8.392e-05, "loss": 0.4125, "step": 16793 }, { "epoch": 0.9404188598947251, "grad_norm": 1.0655100345611572, "learning_rate": 8.392500000000001e-05, "loss": 0.3189, "step": 16794 }, { "epoch": 0.9404748572068541, "grad_norm": 1.6109964847564697, "learning_rate": 8.393e-05, "loss": 0.52, "step": 16795 }, { "epoch": 0.9405308545189831, "grad_norm": 1.5058531761169434, "learning_rate": 8.3935e-05, "loss": 0.4478, "step": 16796 }, { "epoch": 0.9405868518311121, "grad_norm": 1.4384710788726807, "learning_rate": 8.394000000000001e-05, "loss": 0.4512, "step": 16797 }, { "epoch": 0.9406428491432411, "grad_norm": 1.2084736824035645, "learning_rate": 8.394500000000001e-05, "loss": 0.3652, "step": 16798 }, { "epoch": 0.9406988464553702, "grad_norm": 1.5859885215759277, "learning_rate": 8.395000000000001e-05, "loss": 0.4277, "step": 16799 }, { "epoch": 0.9407548437674992, "grad_norm": 1.2028608322143555, "learning_rate": 8.3955e-05, "loss": 0.4893, "step": 16800 }, { "epoch": 0.9408108410796282, "grad_norm": 1.3841071128845215, "learning_rate": 8.396e-05, "loss": 0.439, "step": 16801 }, { "epoch": 0.9408668383917572, "grad_norm": 1.3934539556503296, "learning_rate": 8.3965e-05, "loss": 0.408, "step": 16802 }, { "epoch": 0.9409228357038862, "grad_norm": 1.1731603145599365, "learning_rate": 8.397000000000001e-05, "loss": 0.3979, "step": 16803 }, { "epoch": 0.9409788330160153, "grad_norm": 1.3989534378051758, "learning_rate": 8.397500000000001e-05, "loss": 0.432, "step": 16804 }, { "epoch": 0.9410348303281443, "grad_norm": 1.3040473461151123, "learning_rate": 8.398e-05, "loss": 0.4161, "step": 16805 }, { "epoch": 0.9410908276402733, "grad_norm": 1.228775978088379, "learning_rate": 8.3985e-05, "loss": 0.3721, "step": 16806 }, { "epoch": 0.9411468249524023, "grad_norm": 1.2703782320022583, "learning_rate": 8.399e-05, "loss": 0.5118, "step": 16807 }, { "epoch": 0.9412028222645313, "grad_norm": 1.5137567520141602, "learning_rate": 8.3995e-05, "loss": 0.4539, "step": 16808 }, { "epoch": 0.9412588195766604, "grad_norm": 1.775665283203125, "learning_rate": 8.4e-05, "loss": 0.583, "step": 16809 }, { "epoch": 0.9413148168887894, "grad_norm": 1.314042091369629, "learning_rate": 8.4005e-05, "loss": 0.426, "step": 16810 }, { "epoch": 0.9413708142009184, "grad_norm": 1.167601466178894, "learning_rate": 8.401e-05, "loss": 0.5671, "step": 16811 }, { "epoch": 0.9414268115130474, "grad_norm": 1.4067963361740112, "learning_rate": 8.4015e-05, "loss": 0.4454, "step": 16812 }, { "epoch": 0.9414828088251764, "grad_norm": 2.502490282058716, "learning_rate": 8.402e-05, "loss": 0.4565, "step": 16813 }, { "epoch": 0.9415388061373055, "grad_norm": 2.032898426055908, "learning_rate": 8.402500000000001e-05, "loss": 0.7062, "step": 16814 }, { "epoch": 0.9415948034494345, "grad_norm": 1.5732282400131226, "learning_rate": 8.403000000000001e-05, "loss": 0.4778, "step": 16815 }, { "epoch": 0.9416508007615635, "grad_norm": 1.5170363187789917, "learning_rate": 8.4035e-05, "loss": 0.5445, "step": 16816 }, { "epoch": 0.9417067980736925, "grad_norm": 1.4666023254394531, "learning_rate": 8.404e-05, "loss": 0.433, "step": 16817 }, { "epoch": 0.9417627953858215, "grad_norm": 1.441066026687622, "learning_rate": 8.404500000000001e-05, "loss": 0.6679, "step": 16818 }, { "epoch": 0.9418187926979505, "grad_norm": 1.2354779243469238, "learning_rate": 8.405000000000001e-05, "loss": 0.4044, "step": 16819 }, { "epoch": 0.9418747900100796, "grad_norm": 1.2528882026672363, "learning_rate": 8.405500000000001e-05, "loss": 0.4331, "step": 16820 }, { "epoch": 0.9419307873222086, "grad_norm": 1.175680160522461, "learning_rate": 8.406e-05, "loss": 0.4404, "step": 16821 }, { "epoch": 0.9419867846343376, "grad_norm": 2.039055347442627, "learning_rate": 8.4065e-05, "loss": 0.4098, "step": 16822 }, { "epoch": 0.9420427819464666, "grad_norm": 1.3916031122207642, "learning_rate": 8.407e-05, "loss": 0.4205, "step": 16823 }, { "epoch": 0.9420987792585956, "grad_norm": 1.3693307638168335, "learning_rate": 8.4075e-05, "loss": 0.5207, "step": 16824 }, { "epoch": 0.9421547765707246, "grad_norm": 1.4005661010742188, "learning_rate": 8.408000000000001e-05, "loss": 0.524, "step": 16825 }, { "epoch": 0.9422107738828536, "grad_norm": 1.458531141281128, "learning_rate": 8.4085e-05, "loss": 0.5229, "step": 16826 }, { "epoch": 0.9422667711949826, "grad_norm": 1.1541227102279663, "learning_rate": 8.409e-05, "loss": 0.3867, "step": 16827 }, { "epoch": 0.9423227685071116, "grad_norm": 1.5439306497573853, "learning_rate": 8.4095e-05, "loss": 0.531, "step": 16828 }, { "epoch": 0.9423787658192406, "grad_norm": 1.3004478216171265, "learning_rate": 8.41e-05, "loss": 0.4577, "step": 16829 }, { "epoch": 0.9424347631313696, "grad_norm": 1.5048997402191162, "learning_rate": 8.4105e-05, "loss": 0.5917, "step": 16830 }, { "epoch": 0.9424907604434987, "grad_norm": 1.692022681236267, "learning_rate": 8.411e-05, "loss": 0.4814, "step": 16831 }, { "epoch": 0.9425467577556277, "grad_norm": 1.2445706129074097, "learning_rate": 8.4115e-05, "loss": 0.5822, "step": 16832 }, { "epoch": 0.9426027550677567, "grad_norm": 1.32802152633667, "learning_rate": 8.412e-05, "loss": 0.499, "step": 16833 }, { "epoch": 0.9426587523798857, "grad_norm": 1.4759972095489502, "learning_rate": 8.412500000000001e-05, "loss": 0.5567, "step": 16834 }, { "epoch": 0.9427147496920147, "grad_norm": 1.3070430755615234, "learning_rate": 8.413000000000001e-05, "loss": 0.361, "step": 16835 }, { "epoch": 0.9427707470041438, "grad_norm": 1.3818913698196411, "learning_rate": 8.4135e-05, "loss": 0.4857, "step": 16836 }, { "epoch": 0.9428267443162728, "grad_norm": 1.1885535717010498, "learning_rate": 8.414e-05, "loss": 0.4102, "step": 16837 }, { "epoch": 0.9428827416284018, "grad_norm": 1.4455609321594238, "learning_rate": 8.4145e-05, "loss": 0.3973, "step": 16838 }, { "epoch": 0.9429387389405308, "grad_norm": 1.3386549949645996, "learning_rate": 8.415000000000001e-05, "loss": 0.5643, "step": 16839 }, { "epoch": 0.9429947362526598, "grad_norm": 1.6348649263381958, "learning_rate": 8.415500000000001e-05, "loss": 0.4125, "step": 16840 }, { "epoch": 0.9430507335647889, "grad_norm": 1.3806694746017456, "learning_rate": 8.416000000000001e-05, "loss": 0.4462, "step": 16841 }, { "epoch": 0.9431067308769179, "grad_norm": 1.826649785041809, "learning_rate": 8.4165e-05, "loss": 0.4605, "step": 16842 }, { "epoch": 0.9431627281890469, "grad_norm": 1.4051538705825806, "learning_rate": 8.417e-05, "loss": 0.5421, "step": 16843 }, { "epoch": 0.9432187255011759, "grad_norm": 1.68427312374115, "learning_rate": 8.4175e-05, "loss": 0.5481, "step": 16844 }, { "epoch": 0.9432747228133049, "grad_norm": 1.3187127113342285, "learning_rate": 8.418e-05, "loss": 0.405, "step": 16845 }, { "epoch": 0.943330720125434, "grad_norm": 1.2708790302276611, "learning_rate": 8.418500000000001e-05, "loss": 0.6116, "step": 16846 }, { "epoch": 0.943386717437563, "grad_norm": 1.282198190689087, "learning_rate": 8.419e-05, "loss": 0.3726, "step": 16847 }, { "epoch": 0.943442714749692, "grad_norm": 1.1929875612258911, "learning_rate": 8.4195e-05, "loss": 0.4536, "step": 16848 }, { "epoch": 0.943498712061821, "grad_norm": 1.3808897733688354, "learning_rate": 8.42e-05, "loss": 0.4514, "step": 16849 }, { "epoch": 0.94355470937395, "grad_norm": 1.4703110456466675, "learning_rate": 8.4205e-05, "loss": 0.4597, "step": 16850 }, { "epoch": 0.943610706686079, "grad_norm": 1.349044680595398, "learning_rate": 8.421e-05, "loss": 0.6292, "step": 16851 }, { "epoch": 0.9436667039982081, "grad_norm": 1.147835373878479, "learning_rate": 8.4215e-05, "loss": 0.4185, "step": 16852 }, { "epoch": 0.9437227013103371, "grad_norm": 1.3355592489242554, "learning_rate": 8.422e-05, "loss": 0.5128, "step": 16853 }, { "epoch": 0.9437786986224661, "grad_norm": 1.4008398056030273, "learning_rate": 8.422500000000001e-05, "loss": 0.5362, "step": 16854 }, { "epoch": 0.9438346959345951, "grad_norm": 1.6245315074920654, "learning_rate": 8.423000000000001e-05, "loss": 0.6372, "step": 16855 }, { "epoch": 0.9438906932467241, "grad_norm": 2.5869712829589844, "learning_rate": 8.423500000000001e-05, "loss": 0.5831, "step": 16856 }, { "epoch": 0.9439466905588532, "grad_norm": 1.4545773267745972, "learning_rate": 8.424e-05, "loss": 0.5986, "step": 16857 }, { "epoch": 0.9440026878709822, "grad_norm": 1.1646499633789062, "learning_rate": 8.4245e-05, "loss": 0.475, "step": 16858 }, { "epoch": 0.9440586851831112, "grad_norm": 1.5359723567962646, "learning_rate": 8.425e-05, "loss": 0.3858, "step": 16859 }, { "epoch": 0.9441146824952402, "grad_norm": 1.070334553718567, "learning_rate": 8.425500000000001e-05, "loss": 0.3192, "step": 16860 }, { "epoch": 0.9441706798073692, "grad_norm": 1.770363211631775, "learning_rate": 8.426000000000001e-05, "loss": 0.4904, "step": 16861 }, { "epoch": 0.9442266771194983, "grad_norm": 1.579593300819397, "learning_rate": 8.426500000000001e-05, "loss": 0.586, "step": 16862 }, { "epoch": 0.9442826744316273, "grad_norm": 1.3765833377838135, "learning_rate": 8.427e-05, "loss": 0.5836, "step": 16863 }, { "epoch": 0.9443386717437563, "grad_norm": 1.5055797100067139, "learning_rate": 8.4275e-05, "loss": 0.4513, "step": 16864 }, { "epoch": 0.9443946690558853, "grad_norm": 1.366385579109192, "learning_rate": 8.428e-05, "loss": 0.4448, "step": 16865 }, { "epoch": 0.9444506663680143, "grad_norm": 1.3146216869354248, "learning_rate": 8.4285e-05, "loss": 0.4196, "step": 16866 }, { "epoch": 0.9445066636801434, "grad_norm": 1.6299651861190796, "learning_rate": 8.429000000000001e-05, "loss": 0.517, "step": 16867 }, { "epoch": 0.9445626609922724, "grad_norm": 1.3505542278289795, "learning_rate": 8.4295e-05, "loss": 0.4939, "step": 16868 }, { "epoch": 0.9446186583044014, "grad_norm": 1.7189971208572388, "learning_rate": 8.43e-05, "loss": 0.491, "step": 16869 }, { "epoch": 0.9446746556165304, "grad_norm": 1.3334531784057617, "learning_rate": 8.4305e-05, "loss": 0.4253, "step": 16870 }, { "epoch": 0.9447306529286594, "grad_norm": 1.3857353925704956, "learning_rate": 8.431e-05, "loss": 0.4126, "step": 16871 }, { "epoch": 0.9447866502407885, "grad_norm": 1.5548064708709717, "learning_rate": 8.4315e-05, "loss": 0.5157, "step": 16872 }, { "epoch": 0.9448426475529175, "grad_norm": 1.399116039276123, "learning_rate": 8.431999999999999e-05, "loss": 0.413, "step": 16873 }, { "epoch": 0.9448986448650465, "grad_norm": 1.8226274251937866, "learning_rate": 8.4325e-05, "loss": 0.4868, "step": 16874 }, { "epoch": 0.9449546421771755, "grad_norm": 1.6294915676116943, "learning_rate": 8.433000000000001e-05, "loss": 0.511, "step": 16875 }, { "epoch": 0.9450106394893045, "grad_norm": 1.645871877670288, "learning_rate": 8.433500000000001e-05, "loss": 0.3838, "step": 16876 }, { "epoch": 0.9450666368014335, "grad_norm": 1.2966974973678589, "learning_rate": 8.434000000000001e-05, "loss": 0.4449, "step": 16877 }, { "epoch": 0.9451226341135626, "grad_norm": 1.1157574653625488, "learning_rate": 8.4345e-05, "loss": 0.3294, "step": 16878 }, { "epoch": 0.9451786314256916, "grad_norm": 1.3588204383850098, "learning_rate": 8.435e-05, "loss": 0.5036, "step": 16879 }, { "epoch": 0.9452346287378206, "grad_norm": 1.3987594842910767, "learning_rate": 8.4355e-05, "loss": 0.5028, "step": 16880 }, { "epoch": 0.9452906260499496, "grad_norm": 1.1793015003204346, "learning_rate": 8.436000000000001e-05, "loss": 0.3366, "step": 16881 }, { "epoch": 0.9453466233620786, "grad_norm": 1.2872474193572998, "learning_rate": 8.436500000000001e-05, "loss": 0.4089, "step": 16882 }, { "epoch": 0.9454026206742077, "grad_norm": 1.6301637887954712, "learning_rate": 8.437000000000001e-05, "loss": 0.4551, "step": 16883 }, { "epoch": 0.9454586179863367, "grad_norm": 1.6496950387954712, "learning_rate": 8.4375e-05, "loss": 0.5353, "step": 16884 }, { "epoch": 0.9455146152984657, "grad_norm": 1.4343160390853882, "learning_rate": 8.438e-05, "loss": 0.4322, "step": 16885 }, { "epoch": 0.9455706126105947, "grad_norm": 1.707632303237915, "learning_rate": 8.4385e-05, "loss": 0.392, "step": 16886 }, { "epoch": 0.9456266099227237, "grad_norm": 1.0887588262557983, "learning_rate": 8.439e-05, "loss": 0.3682, "step": 16887 }, { "epoch": 0.9456826072348528, "grad_norm": 1.4382503032684326, "learning_rate": 8.439500000000001e-05, "loss": 0.4761, "step": 16888 }, { "epoch": 0.9457386045469818, "grad_norm": 1.2576593160629272, "learning_rate": 8.44e-05, "loss": 0.4996, "step": 16889 }, { "epoch": 0.9457946018591108, "grad_norm": 1.357924461364746, "learning_rate": 8.4405e-05, "loss": 0.4564, "step": 16890 }, { "epoch": 0.9458505991712398, "grad_norm": 1.4293816089630127, "learning_rate": 8.441e-05, "loss": 0.4203, "step": 16891 }, { "epoch": 0.9459065964833688, "grad_norm": 1.5781736373901367, "learning_rate": 8.4415e-05, "loss": 0.4303, "step": 16892 }, { "epoch": 0.9459625937954979, "grad_norm": 1.687091588973999, "learning_rate": 8.442e-05, "loss": 0.5184, "step": 16893 }, { "epoch": 0.9460185911076269, "grad_norm": 1.4222277402877808, "learning_rate": 8.442499999999999e-05, "loss": 0.508, "step": 16894 }, { "epoch": 0.9460745884197559, "grad_norm": 1.134165644645691, "learning_rate": 8.443e-05, "loss": 0.4689, "step": 16895 }, { "epoch": 0.9461305857318849, "grad_norm": 1.3414344787597656, "learning_rate": 8.443500000000001e-05, "loss": 0.4429, "step": 16896 }, { "epoch": 0.9461865830440139, "grad_norm": 1.2850489616394043, "learning_rate": 8.444000000000001e-05, "loss": 0.4379, "step": 16897 }, { "epoch": 0.946242580356143, "grad_norm": 1.5533628463745117, "learning_rate": 8.444500000000001e-05, "loss": 0.3544, "step": 16898 }, { "epoch": 0.946298577668272, "grad_norm": 1.3350168466567993, "learning_rate": 8.445e-05, "loss": 0.4305, "step": 16899 }, { "epoch": 0.946354574980401, "grad_norm": 1.4601842164993286, "learning_rate": 8.4455e-05, "loss": 0.4564, "step": 16900 }, { "epoch": 0.94641057229253, "grad_norm": 1.246171474456787, "learning_rate": 8.446e-05, "loss": 0.3276, "step": 16901 }, { "epoch": 0.946466569604659, "grad_norm": 1.3086272478103638, "learning_rate": 8.4465e-05, "loss": 0.4221, "step": 16902 }, { "epoch": 0.946522566916788, "grad_norm": 1.4926385879516602, "learning_rate": 8.447000000000001e-05, "loss": 0.5245, "step": 16903 }, { "epoch": 0.9465785642289171, "grad_norm": 1.481048822402954, "learning_rate": 8.447500000000001e-05, "loss": 0.395, "step": 16904 }, { "epoch": 0.9466345615410461, "grad_norm": 1.3172568082809448, "learning_rate": 8.448e-05, "loss": 0.4195, "step": 16905 }, { "epoch": 0.9466905588531751, "grad_norm": 1.406826376914978, "learning_rate": 8.4485e-05, "loss": 0.4089, "step": 16906 }, { "epoch": 0.9467465561653041, "grad_norm": 1.5490084886550903, "learning_rate": 8.449e-05, "loss": 0.4135, "step": 16907 }, { "epoch": 0.946802553477433, "grad_norm": 1.8110581636428833, "learning_rate": 8.4495e-05, "loss": 0.6009, "step": 16908 }, { "epoch": 0.946858550789562, "grad_norm": 1.189125895500183, "learning_rate": 8.450000000000001e-05, "loss": 0.5155, "step": 16909 }, { "epoch": 0.9469145481016911, "grad_norm": 1.2542072534561157, "learning_rate": 8.4505e-05, "loss": 0.4491, "step": 16910 }, { "epoch": 0.9469705454138201, "grad_norm": 1.4154157638549805, "learning_rate": 8.451e-05, "loss": 0.5446, "step": 16911 }, { "epoch": 0.9470265427259491, "grad_norm": 1.1997610330581665, "learning_rate": 8.4515e-05, "loss": 0.5155, "step": 16912 }, { "epoch": 0.9470825400380781, "grad_norm": 1.0751558542251587, "learning_rate": 8.452e-05, "loss": 0.3765, "step": 16913 }, { "epoch": 0.9471385373502071, "grad_norm": 1.3078124523162842, "learning_rate": 8.4525e-05, "loss": 0.5072, "step": 16914 }, { "epoch": 0.9471945346623362, "grad_norm": 1.3146898746490479, "learning_rate": 8.453e-05, "loss": 0.4415, "step": 16915 }, { "epoch": 0.9472505319744652, "grad_norm": 1.3087681531906128, "learning_rate": 8.4535e-05, "loss": 0.3962, "step": 16916 }, { "epoch": 0.9473065292865942, "grad_norm": 1.1553888320922852, "learning_rate": 8.454000000000001e-05, "loss": 0.4969, "step": 16917 }, { "epoch": 0.9473625265987232, "grad_norm": 1.4546152353286743, "learning_rate": 8.454500000000001e-05, "loss": 0.5268, "step": 16918 }, { "epoch": 0.9474185239108522, "grad_norm": 1.5179592370986938, "learning_rate": 8.455000000000001e-05, "loss": 0.4177, "step": 16919 }, { "epoch": 0.9474745212229813, "grad_norm": 1.39425790309906, "learning_rate": 8.4555e-05, "loss": 0.568, "step": 16920 }, { "epoch": 0.9475305185351103, "grad_norm": 1.3313343524932861, "learning_rate": 8.456e-05, "loss": 0.419, "step": 16921 }, { "epoch": 0.9475865158472393, "grad_norm": 1.3817088603973389, "learning_rate": 8.4565e-05, "loss": 0.5545, "step": 16922 }, { "epoch": 0.9476425131593683, "grad_norm": 1.2225130796432495, "learning_rate": 8.457e-05, "loss": 0.4863, "step": 16923 }, { "epoch": 0.9476985104714973, "grad_norm": 1.3912243843078613, "learning_rate": 8.457500000000001e-05, "loss": 0.5267, "step": 16924 }, { "epoch": 0.9477545077836264, "grad_norm": 1.3930424451828003, "learning_rate": 8.458e-05, "loss": 0.3551, "step": 16925 }, { "epoch": 0.9478105050957554, "grad_norm": 1.426986575126648, "learning_rate": 8.4585e-05, "loss": 0.4643, "step": 16926 }, { "epoch": 0.9478665024078844, "grad_norm": 1.3757551908493042, "learning_rate": 8.459e-05, "loss": 0.4426, "step": 16927 }, { "epoch": 0.9479224997200134, "grad_norm": 1.4466967582702637, "learning_rate": 8.4595e-05, "loss": 0.4384, "step": 16928 }, { "epoch": 0.9479784970321424, "grad_norm": 1.2996398210525513, "learning_rate": 8.46e-05, "loss": 0.3729, "step": 16929 }, { "epoch": 0.9480344943442715, "grad_norm": 1.6008819341659546, "learning_rate": 8.460500000000001e-05, "loss": 0.4521, "step": 16930 }, { "epoch": 0.9480904916564005, "grad_norm": 1.7591370344161987, "learning_rate": 8.461e-05, "loss": 0.561, "step": 16931 }, { "epoch": 0.9481464889685295, "grad_norm": 1.2559250593185425, "learning_rate": 8.4615e-05, "loss": 0.3675, "step": 16932 }, { "epoch": 0.9482024862806585, "grad_norm": 1.5842159986495972, "learning_rate": 8.462e-05, "loss": 0.6105, "step": 16933 }, { "epoch": 0.9482584835927875, "grad_norm": 1.522939920425415, "learning_rate": 8.4625e-05, "loss": 0.4102, "step": 16934 }, { "epoch": 0.9483144809049165, "grad_norm": 1.306370496749878, "learning_rate": 8.463000000000001e-05, "loss": 0.515, "step": 16935 }, { "epoch": 0.9483704782170456, "grad_norm": 1.504814624786377, "learning_rate": 8.4635e-05, "loss": 0.5157, "step": 16936 }, { "epoch": 0.9484264755291746, "grad_norm": 1.1809239387512207, "learning_rate": 8.464e-05, "loss": 0.4107, "step": 16937 }, { "epoch": 0.9484824728413036, "grad_norm": 1.5091184377670288, "learning_rate": 8.464500000000001e-05, "loss": 0.4104, "step": 16938 }, { "epoch": 0.9485384701534326, "grad_norm": 1.0965172052383423, "learning_rate": 8.465000000000001e-05, "loss": 0.2987, "step": 16939 }, { "epoch": 0.9485944674655616, "grad_norm": 1.2307863235473633, "learning_rate": 8.465500000000001e-05, "loss": 0.535, "step": 16940 }, { "epoch": 0.9486504647776907, "grad_norm": 1.4812849760055542, "learning_rate": 8.466e-05, "loss": 0.4636, "step": 16941 }, { "epoch": 0.9487064620898197, "grad_norm": 1.4107130765914917, "learning_rate": 8.4665e-05, "loss": 0.622, "step": 16942 }, { "epoch": 0.9487624594019487, "grad_norm": 1.3292529582977295, "learning_rate": 8.467e-05, "loss": 0.4732, "step": 16943 }, { "epoch": 0.9488184567140777, "grad_norm": 1.2414759397506714, "learning_rate": 8.4675e-05, "loss": 0.3468, "step": 16944 }, { "epoch": 0.9488744540262067, "grad_norm": 1.1517809629440308, "learning_rate": 8.468000000000001e-05, "loss": 0.3438, "step": 16945 }, { "epoch": 0.9489304513383358, "grad_norm": 1.5465229749679565, "learning_rate": 8.4685e-05, "loss": 0.4394, "step": 16946 }, { "epoch": 0.9489864486504648, "grad_norm": 1.4621566534042358, "learning_rate": 8.469e-05, "loss": 0.3958, "step": 16947 }, { "epoch": 0.9490424459625938, "grad_norm": 1.2637304067611694, "learning_rate": 8.4695e-05, "loss": 0.3675, "step": 16948 }, { "epoch": 0.9490984432747228, "grad_norm": 1.4915884733200073, "learning_rate": 8.47e-05, "loss": 0.4249, "step": 16949 }, { "epoch": 0.9491544405868518, "grad_norm": 1.419655203819275, "learning_rate": 8.4705e-05, "loss": 0.6283, "step": 16950 }, { "epoch": 0.9492104378989809, "grad_norm": 1.1955921649932861, "learning_rate": 8.471000000000001e-05, "loss": 0.48, "step": 16951 }, { "epoch": 0.9492664352111099, "grad_norm": 1.340035319328308, "learning_rate": 8.4715e-05, "loss": 0.6691, "step": 16952 }, { "epoch": 0.9493224325232389, "grad_norm": 1.3427761793136597, "learning_rate": 8.472e-05, "loss": 0.503, "step": 16953 }, { "epoch": 0.9493784298353679, "grad_norm": 1.4067109823226929, "learning_rate": 8.4725e-05, "loss": 0.6549, "step": 16954 }, { "epoch": 0.9494344271474969, "grad_norm": 1.3630707263946533, "learning_rate": 8.473000000000001e-05, "loss": 0.4791, "step": 16955 }, { "epoch": 0.949490424459626, "grad_norm": 1.6496031284332275, "learning_rate": 8.473500000000001e-05, "loss": 0.6371, "step": 16956 }, { "epoch": 0.949546421771755, "grad_norm": 1.4741486310958862, "learning_rate": 8.474e-05, "loss": 0.3872, "step": 16957 }, { "epoch": 0.949602419083884, "grad_norm": 1.1507982015609741, "learning_rate": 8.4745e-05, "loss": 0.4622, "step": 16958 }, { "epoch": 0.949658416396013, "grad_norm": 1.512468695640564, "learning_rate": 8.475000000000001e-05, "loss": 0.6612, "step": 16959 }, { "epoch": 0.949714413708142, "grad_norm": 1.4250510931015015, "learning_rate": 8.475500000000001e-05, "loss": 0.4561, "step": 16960 }, { "epoch": 0.949770411020271, "grad_norm": 1.3143805265426636, "learning_rate": 8.476000000000001e-05, "loss": 0.4001, "step": 16961 }, { "epoch": 0.9498264083324001, "grad_norm": 1.4907798767089844, "learning_rate": 8.4765e-05, "loss": 0.5428, "step": 16962 }, { "epoch": 0.9498824056445291, "grad_norm": 1.3315340280532837, "learning_rate": 8.477e-05, "loss": 0.4319, "step": 16963 }, { "epoch": 0.9499384029566581, "grad_norm": 1.20774507522583, "learning_rate": 8.4775e-05, "loss": 0.4153, "step": 16964 }, { "epoch": 0.9499944002687871, "grad_norm": 1.1649962663650513, "learning_rate": 8.478e-05, "loss": 0.4366, "step": 16965 }, { "epoch": 0.9500503975809161, "grad_norm": 1.3137903213500977, "learning_rate": 8.478500000000001e-05, "loss": 0.4325, "step": 16966 }, { "epoch": 0.9501063948930452, "grad_norm": 1.3243414163589478, "learning_rate": 8.479e-05, "loss": 0.3953, "step": 16967 }, { "epoch": 0.9501623922051742, "grad_norm": 1.4575765132904053, "learning_rate": 8.4795e-05, "loss": 0.4123, "step": 16968 }, { "epoch": 0.9502183895173032, "grad_norm": 1.3398209810256958, "learning_rate": 8.48e-05, "loss": 0.4228, "step": 16969 }, { "epoch": 0.9502743868294322, "grad_norm": 1.8145989179611206, "learning_rate": 8.4805e-05, "loss": 0.5517, "step": 16970 }, { "epoch": 0.9503303841415612, "grad_norm": 1.2563401460647583, "learning_rate": 8.481e-05, "loss": 0.4388, "step": 16971 }, { "epoch": 0.9503863814536903, "grad_norm": 1.535429835319519, "learning_rate": 8.4815e-05, "loss": 0.4084, "step": 16972 }, { "epoch": 0.9504423787658193, "grad_norm": 1.1452304124832153, "learning_rate": 8.482e-05, "loss": 0.3845, "step": 16973 }, { "epoch": 0.9504983760779483, "grad_norm": 1.621180772781372, "learning_rate": 8.4825e-05, "loss": 0.4521, "step": 16974 }, { "epoch": 0.9505543733900773, "grad_norm": 1.3514586687088013, "learning_rate": 8.483000000000001e-05, "loss": 0.6353, "step": 16975 }, { "epoch": 0.9506103707022063, "grad_norm": 1.2678074836730957, "learning_rate": 8.483500000000001e-05, "loss": 0.4231, "step": 16976 }, { "epoch": 0.9506663680143354, "grad_norm": 2.2612082958221436, "learning_rate": 8.484000000000001e-05, "loss": 0.4763, "step": 16977 }, { "epoch": 0.9507223653264644, "grad_norm": 1.457580327987671, "learning_rate": 8.4845e-05, "loss": 0.5832, "step": 16978 }, { "epoch": 0.9507783626385934, "grad_norm": 3.180603265762329, "learning_rate": 8.485e-05, "loss": 0.4155, "step": 16979 }, { "epoch": 0.9508343599507224, "grad_norm": 1.9469780921936035, "learning_rate": 8.485500000000001e-05, "loss": 0.3662, "step": 16980 }, { "epoch": 0.9508903572628514, "grad_norm": 1.345381498336792, "learning_rate": 8.486000000000001e-05, "loss": 0.4756, "step": 16981 }, { "epoch": 0.9509463545749804, "grad_norm": 1.5418356657028198, "learning_rate": 8.486500000000001e-05, "loss": 0.4512, "step": 16982 }, { "epoch": 0.9510023518871095, "grad_norm": 1.2618006467819214, "learning_rate": 8.487e-05, "loss": 0.5224, "step": 16983 }, { "epoch": 0.9510583491992385, "grad_norm": 1.2593483924865723, "learning_rate": 8.4875e-05, "loss": 0.4596, "step": 16984 }, { "epoch": 0.9511143465113675, "grad_norm": 1.267127513885498, "learning_rate": 8.488e-05, "loss": 0.5895, "step": 16985 }, { "epoch": 0.9511703438234965, "grad_norm": 1.5504367351531982, "learning_rate": 8.4885e-05, "loss": 0.5344, "step": 16986 }, { "epoch": 0.9512263411356255, "grad_norm": 1.2736339569091797, "learning_rate": 8.489000000000001e-05, "loss": 0.4524, "step": 16987 }, { "epoch": 0.9512823384477546, "grad_norm": 1.1661581993103027, "learning_rate": 8.4895e-05, "loss": 0.343, "step": 16988 }, { "epoch": 0.9513383357598836, "grad_norm": 1.334139347076416, "learning_rate": 8.49e-05, "loss": 0.5219, "step": 16989 }, { "epoch": 0.9513943330720125, "grad_norm": 1.4772909879684448, "learning_rate": 8.4905e-05, "loss": 0.555, "step": 16990 }, { "epoch": 0.9514503303841415, "grad_norm": 1.1652812957763672, "learning_rate": 8.491e-05, "loss": 0.3417, "step": 16991 }, { "epoch": 0.9515063276962705, "grad_norm": 1.2831329107284546, "learning_rate": 8.4915e-05, "loss": 0.4816, "step": 16992 }, { "epoch": 0.9515623250083995, "grad_norm": 1.5837223529815674, "learning_rate": 8.492e-05, "loss": 0.4307, "step": 16993 }, { "epoch": 0.9516183223205286, "grad_norm": 1.555816888809204, "learning_rate": 8.4925e-05, "loss": 0.4459, "step": 16994 }, { "epoch": 0.9516743196326576, "grad_norm": 1.3275021314620972, "learning_rate": 8.493000000000002e-05, "loss": 0.4356, "step": 16995 }, { "epoch": 0.9517303169447866, "grad_norm": 1.6596986055374146, "learning_rate": 8.493500000000001e-05, "loss": 0.4259, "step": 16996 }, { "epoch": 0.9517863142569156, "grad_norm": 1.3667863607406616, "learning_rate": 8.494000000000001e-05, "loss": 0.5015, "step": 16997 }, { "epoch": 0.9518423115690446, "grad_norm": 1.5858535766601562, "learning_rate": 8.494500000000001e-05, "loss": 0.3896, "step": 16998 }, { "epoch": 0.9518983088811737, "grad_norm": 1.3232752084732056, "learning_rate": 8.495e-05, "loss": 0.3544, "step": 16999 }, { "epoch": 0.9519543061933027, "grad_norm": 1.571776270866394, "learning_rate": 8.4955e-05, "loss": 0.4382, "step": 17000 }, { "epoch": 0.9520103035054317, "grad_norm": 1.4432038068771362, "learning_rate": 8.496e-05, "loss": 0.4218, "step": 17001 }, { "epoch": 0.9520663008175607, "grad_norm": 1.3933186531066895, "learning_rate": 8.496500000000001e-05, "loss": 0.4635, "step": 17002 }, { "epoch": 0.9521222981296897, "grad_norm": 1.338154911994934, "learning_rate": 8.497000000000001e-05, "loss": 0.4775, "step": 17003 }, { "epoch": 0.9521782954418188, "grad_norm": 1.3936713933944702, "learning_rate": 8.4975e-05, "loss": 0.5565, "step": 17004 }, { "epoch": 0.9522342927539478, "grad_norm": 1.5405932664871216, "learning_rate": 8.498e-05, "loss": 0.5839, "step": 17005 }, { "epoch": 0.9522902900660768, "grad_norm": 1.1904425621032715, "learning_rate": 8.4985e-05, "loss": 0.3904, "step": 17006 }, { "epoch": 0.9523462873782058, "grad_norm": 1.8471018075942993, "learning_rate": 8.499e-05, "loss": 0.4705, "step": 17007 }, { "epoch": 0.9524022846903348, "grad_norm": 1.429958462715149, "learning_rate": 8.499500000000001e-05, "loss": 0.4797, "step": 17008 }, { "epoch": 0.9524582820024639, "grad_norm": 1.300558090209961, "learning_rate": 8.5e-05, "loss": 0.3766, "step": 17009 }, { "epoch": 0.9525142793145929, "grad_norm": 1.1773066520690918, "learning_rate": 8.5005e-05, "loss": 0.4107, "step": 17010 }, { "epoch": 0.9525702766267219, "grad_norm": 1.355825662612915, "learning_rate": 8.501e-05, "loss": 0.6292, "step": 17011 }, { "epoch": 0.9526262739388509, "grad_norm": 1.293701171875, "learning_rate": 8.5015e-05, "loss": 0.4225, "step": 17012 }, { "epoch": 0.9526822712509799, "grad_norm": 1.2822965383529663, "learning_rate": 8.502e-05, "loss": 0.5179, "step": 17013 }, { "epoch": 0.952738268563109, "grad_norm": 1.3549543619155884, "learning_rate": 8.502499999999999e-05, "loss": 0.4233, "step": 17014 }, { "epoch": 0.952794265875238, "grad_norm": 1.1742892265319824, "learning_rate": 8.503e-05, "loss": 0.3484, "step": 17015 }, { "epoch": 0.952850263187367, "grad_norm": 1.4947805404663086, "learning_rate": 8.503500000000002e-05, "loss": 0.4144, "step": 17016 }, { "epoch": 0.952906260499496, "grad_norm": 2.0162227153778076, "learning_rate": 8.504000000000001e-05, "loss": 0.5194, "step": 17017 }, { "epoch": 0.952962257811625, "grad_norm": 1.4323337078094482, "learning_rate": 8.504500000000001e-05, "loss": 0.5476, "step": 17018 }, { "epoch": 0.953018255123754, "grad_norm": 1.0337882041931152, "learning_rate": 8.505000000000001e-05, "loss": 0.3104, "step": 17019 }, { "epoch": 0.9530742524358831, "grad_norm": 1.3247967958450317, "learning_rate": 8.5055e-05, "loss": 0.4969, "step": 17020 }, { "epoch": 0.9531302497480121, "grad_norm": 1.4347736835479736, "learning_rate": 8.506e-05, "loss": 0.4299, "step": 17021 }, { "epoch": 0.9531862470601411, "grad_norm": 1.526903510093689, "learning_rate": 8.5065e-05, "loss": 0.3932, "step": 17022 }, { "epoch": 0.9532422443722701, "grad_norm": 1.2358578443527222, "learning_rate": 8.507000000000001e-05, "loss": 0.3121, "step": 17023 }, { "epoch": 0.9532982416843991, "grad_norm": 2.889700174331665, "learning_rate": 8.507500000000001e-05, "loss": 0.7682, "step": 17024 }, { "epoch": 0.9533542389965282, "grad_norm": 1.4642601013183594, "learning_rate": 8.508e-05, "loss": 0.3698, "step": 17025 }, { "epoch": 0.9534102363086572, "grad_norm": 1.27633798122406, "learning_rate": 8.5085e-05, "loss": 0.4462, "step": 17026 }, { "epoch": 0.9534662336207862, "grad_norm": 1.7518889904022217, "learning_rate": 8.509e-05, "loss": 0.5751, "step": 17027 }, { "epoch": 0.9535222309329152, "grad_norm": 1.443544864654541, "learning_rate": 8.5095e-05, "loss": 0.378, "step": 17028 }, { "epoch": 0.9535782282450442, "grad_norm": 1.3720136880874634, "learning_rate": 8.510000000000001e-05, "loss": 0.4845, "step": 17029 }, { "epoch": 0.9536342255571733, "grad_norm": 1.889662265777588, "learning_rate": 8.5105e-05, "loss": 0.4681, "step": 17030 }, { "epoch": 0.9536902228693023, "grad_norm": 1.6645822525024414, "learning_rate": 8.511e-05, "loss": 0.3319, "step": 17031 }, { "epoch": 0.9537462201814313, "grad_norm": 1.5410239696502686, "learning_rate": 8.5115e-05, "loss": 0.527, "step": 17032 }, { "epoch": 0.9538022174935603, "grad_norm": 1.3207294940948486, "learning_rate": 8.512e-05, "loss": 0.4837, "step": 17033 }, { "epoch": 0.9538582148056893, "grad_norm": 1.1162564754486084, "learning_rate": 8.5125e-05, "loss": 0.3487, "step": 17034 }, { "epoch": 0.9539142121178183, "grad_norm": 1.2722265720367432, "learning_rate": 8.512999999999999e-05, "loss": 0.4862, "step": 17035 }, { "epoch": 0.9539702094299474, "grad_norm": 1.4618171453475952, "learning_rate": 8.5135e-05, "loss": 0.4082, "step": 17036 }, { "epoch": 0.9540262067420764, "grad_norm": 1.6251577138900757, "learning_rate": 8.514000000000001e-05, "loss": 0.4704, "step": 17037 }, { "epoch": 0.9540822040542054, "grad_norm": 1.2648488283157349, "learning_rate": 8.514500000000001e-05, "loss": 0.3907, "step": 17038 }, { "epoch": 0.9541382013663344, "grad_norm": 1.171286702156067, "learning_rate": 8.515000000000001e-05, "loss": 0.36, "step": 17039 }, { "epoch": 0.9541941986784634, "grad_norm": 1.8883570432662964, "learning_rate": 8.515500000000001e-05, "loss": 0.4311, "step": 17040 }, { "epoch": 0.9542501959905925, "grad_norm": 1.5154889822006226, "learning_rate": 8.516e-05, "loss": 0.6413, "step": 17041 }, { "epoch": 0.9543061933027215, "grad_norm": 1.3099116086959839, "learning_rate": 8.5165e-05, "loss": 0.4196, "step": 17042 }, { "epoch": 0.9543621906148505, "grad_norm": 1.5377181768417358, "learning_rate": 8.517e-05, "loss": 0.5744, "step": 17043 }, { "epoch": 0.9544181879269795, "grad_norm": 1.5144118070602417, "learning_rate": 8.517500000000001e-05, "loss": 0.3586, "step": 17044 }, { "epoch": 0.9544741852391085, "grad_norm": 1.6528390645980835, "learning_rate": 8.518000000000001e-05, "loss": 0.4618, "step": 17045 }, { "epoch": 0.9545301825512376, "grad_norm": 2.705566883087158, "learning_rate": 8.5185e-05, "loss": 0.3898, "step": 17046 }, { "epoch": 0.9545861798633666, "grad_norm": 1.237256646156311, "learning_rate": 8.519e-05, "loss": 0.4158, "step": 17047 }, { "epoch": 0.9546421771754956, "grad_norm": 1.4359909296035767, "learning_rate": 8.5195e-05, "loss": 0.5538, "step": 17048 }, { "epoch": 0.9546981744876246, "grad_norm": 1.3809127807617188, "learning_rate": 8.52e-05, "loss": 0.4524, "step": 17049 }, { "epoch": 0.9547541717997536, "grad_norm": 1.2590758800506592, "learning_rate": 8.5205e-05, "loss": 0.4239, "step": 17050 }, { "epoch": 0.9548101691118827, "grad_norm": 1.2650855779647827, "learning_rate": 8.521e-05, "loss": 0.5073, "step": 17051 }, { "epoch": 0.9548661664240117, "grad_norm": 1.4202791452407837, "learning_rate": 8.5215e-05, "loss": 0.4295, "step": 17052 }, { "epoch": 0.9549221637361407, "grad_norm": 1.4298592805862427, "learning_rate": 8.522e-05, "loss": 0.645, "step": 17053 }, { "epoch": 0.9549781610482697, "grad_norm": 1.6812329292297363, "learning_rate": 8.5225e-05, "loss": 0.4883, "step": 17054 }, { "epoch": 0.9550341583603987, "grad_norm": 1.1203745603561401, "learning_rate": 8.523e-05, "loss": 0.4202, "step": 17055 }, { "epoch": 0.9550901556725278, "grad_norm": 1.4017579555511475, "learning_rate": 8.5235e-05, "loss": 0.3603, "step": 17056 }, { "epoch": 0.9551461529846568, "grad_norm": 1.3865840435028076, "learning_rate": 8.524e-05, "loss": 0.436, "step": 17057 }, { "epoch": 0.9552021502967858, "grad_norm": 1.513572096824646, "learning_rate": 8.524500000000001e-05, "loss": 0.4439, "step": 17058 }, { "epoch": 0.9552581476089148, "grad_norm": 1.321823000907898, "learning_rate": 8.525000000000001e-05, "loss": 0.4888, "step": 17059 }, { "epoch": 0.9553141449210438, "grad_norm": 1.6148601770401, "learning_rate": 8.525500000000001e-05, "loss": 0.4308, "step": 17060 }, { "epoch": 0.9553701422331728, "grad_norm": 1.5410012006759644, "learning_rate": 8.526000000000001e-05, "loss": 0.5282, "step": 17061 }, { "epoch": 0.9554261395453019, "grad_norm": 1.0875070095062256, "learning_rate": 8.5265e-05, "loss": 0.3218, "step": 17062 }, { "epoch": 0.9554821368574309, "grad_norm": 1.3766624927520752, "learning_rate": 8.527e-05, "loss": 0.4481, "step": 17063 }, { "epoch": 0.9555381341695599, "grad_norm": 1.3956143856048584, "learning_rate": 8.5275e-05, "loss": 0.4638, "step": 17064 }, { "epoch": 0.9555941314816889, "grad_norm": 1.3810175657272339, "learning_rate": 8.528000000000001e-05, "loss": 0.579, "step": 17065 }, { "epoch": 0.9556501287938179, "grad_norm": 1.3077311515808105, "learning_rate": 8.528500000000001e-05, "loss": 0.3388, "step": 17066 }, { "epoch": 0.955706126105947, "grad_norm": 2.1643025875091553, "learning_rate": 8.529e-05, "loss": 0.4967, "step": 17067 }, { "epoch": 0.955762123418076, "grad_norm": 1.6373200416564941, "learning_rate": 8.5295e-05, "loss": 0.5301, "step": 17068 }, { "epoch": 0.955818120730205, "grad_norm": 1.2569293975830078, "learning_rate": 8.53e-05, "loss": 0.3281, "step": 17069 }, { "epoch": 0.955874118042334, "grad_norm": 1.2961771488189697, "learning_rate": 8.5305e-05, "loss": 0.5222, "step": 17070 }, { "epoch": 0.955930115354463, "grad_norm": 1.2537667751312256, "learning_rate": 8.531e-05, "loss": 0.5211, "step": 17071 }, { "epoch": 0.9559861126665921, "grad_norm": 1.3391064405441284, "learning_rate": 8.5315e-05, "loss": 0.4964, "step": 17072 }, { "epoch": 0.956042109978721, "grad_norm": 1.6892999410629272, "learning_rate": 8.532e-05, "loss": 0.7413, "step": 17073 }, { "epoch": 0.95609810729085, "grad_norm": 1.190972924232483, "learning_rate": 8.5325e-05, "loss": 0.4132, "step": 17074 }, { "epoch": 0.956154104602979, "grad_norm": 1.376509666442871, "learning_rate": 8.533e-05, "loss": 0.4137, "step": 17075 }, { "epoch": 0.956210101915108, "grad_norm": 1.2693827152252197, "learning_rate": 8.533500000000001e-05, "loss": 0.3201, "step": 17076 }, { "epoch": 0.956266099227237, "grad_norm": 1.4007991552352905, "learning_rate": 8.534e-05, "loss": 0.4881, "step": 17077 }, { "epoch": 0.9563220965393661, "grad_norm": 1.4203362464904785, "learning_rate": 8.5345e-05, "loss": 0.5946, "step": 17078 }, { "epoch": 0.9563780938514951, "grad_norm": 1.3453959226608276, "learning_rate": 8.535e-05, "loss": 0.4659, "step": 17079 }, { "epoch": 0.9564340911636241, "grad_norm": 1.370445966720581, "learning_rate": 8.535500000000001e-05, "loss": 0.3657, "step": 17080 }, { "epoch": 0.9564900884757531, "grad_norm": 1.295682430267334, "learning_rate": 8.536000000000001e-05, "loss": 0.2921, "step": 17081 }, { "epoch": 0.9565460857878821, "grad_norm": 1.332443356513977, "learning_rate": 8.536500000000001e-05, "loss": 0.3777, "step": 17082 }, { "epoch": 0.9566020831000112, "grad_norm": 1.307560682296753, "learning_rate": 8.537e-05, "loss": 0.5362, "step": 17083 }, { "epoch": 0.9566580804121402, "grad_norm": 1.2536908388137817, "learning_rate": 8.5375e-05, "loss": 0.4425, "step": 17084 }, { "epoch": 0.9567140777242692, "grad_norm": 1.1129144430160522, "learning_rate": 8.538e-05, "loss": 0.3942, "step": 17085 }, { "epoch": 0.9567700750363982, "grad_norm": 1.0530842542648315, "learning_rate": 8.538500000000001e-05, "loss": 0.4956, "step": 17086 }, { "epoch": 0.9568260723485272, "grad_norm": 1.2498310804367065, "learning_rate": 8.539000000000001e-05, "loss": 0.4406, "step": 17087 }, { "epoch": 0.9568820696606563, "grad_norm": 1.247690200805664, "learning_rate": 8.5395e-05, "loss": 0.3611, "step": 17088 }, { "epoch": 0.9569380669727853, "grad_norm": 1.384846568107605, "learning_rate": 8.54e-05, "loss": 0.5635, "step": 17089 }, { "epoch": 0.9569940642849143, "grad_norm": 1.3289271593093872, "learning_rate": 8.5405e-05, "loss": 0.5475, "step": 17090 }, { "epoch": 0.9570500615970433, "grad_norm": 1.2485624551773071, "learning_rate": 8.541e-05, "loss": 0.4813, "step": 17091 }, { "epoch": 0.9571060589091723, "grad_norm": 1.3284590244293213, "learning_rate": 8.5415e-05, "loss": 0.4166, "step": 17092 }, { "epoch": 0.9571620562213013, "grad_norm": 1.539612054824829, "learning_rate": 8.542e-05, "loss": 0.3568, "step": 17093 }, { "epoch": 0.9572180535334304, "grad_norm": 1.638933777809143, "learning_rate": 8.5425e-05, "loss": 0.4398, "step": 17094 }, { "epoch": 0.9572740508455594, "grad_norm": 1.4553638696670532, "learning_rate": 8.543e-05, "loss": 0.5311, "step": 17095 }, { "epoch": 0.9573300481576884, "grad_norm": 1.381110668182373, "learning_rate": 8.543500000000001e-05, "loss": 0.5229, "step": 17096 }, { "epoch": 0.9573860454698174, "grad_norm": 1.2284245491027832, "learning_rate": 8.544000000000001e-05, "loss": 0.4791, "step": 17097 }, { "epoch": 0.9574420427819464, "grad_norm": 1.2967196702957153, "learning_rate": 8.5445e-05, "loss": 0.3985, "step": 17098 }, { "epoch": 0.9574980400940755, "grad_norm": 1.2861863374710083, "learning_rate": 8.545e-05, "loss": 0.4384, "step": 17099 }, { "epoch": 0.9575540374062045, "grad_norm": 1.610902190208435, "learning_rate": 8.5455e-05, "loss": 0.5805, "step": 17100 }, { "epoch": 0.9576100347183335, "grad_norm": 1.3004497289657593, "learning_rate": 8.546000000000001e-05, "loss": 0.4394, "step": 17101 }, { "epoch": 0.9576660320304625, "grad_norm": 1.4884735345840454, "learning_rate": 8.546500000000001e-05, "loss": 0.6698, "step": 17102 }, { "epoch": 0.9577220293425915, "grad_norm": 1.7032983303070068, "learning_rate": 8.547e-05, "loss": 0.6283, "step": 17103 }, { "epoch": 0.9577780266547206, "grad_norm": 1.5157755613327026, "learning_rate": 8.5475e-05, "loss": 0.5819, "step": 17104 }, { "epoch": 0.9578340239668496, "grad_norm": 1.2883331775665283, "learning_rate": 8.548e-05, "loss": 0.4398, "step": 17105 }, { "epoch": 0.9578900212789786, "grad_norm": 1.4675449132919312, "learning_rate": 8.5485e-05, "loss": 0.385, "step": 17106 }, { "epoch": 0.9579460185911076, "grad_norm": 1.3032020330429077, "learning_rate": 8.549000000000001e-05, "loss": 0.4696, "step": 17107 }, { "epoch": 0.9580020159032366, "grad_norm": 1.1724374294281006, "learning_rate": 8.549500000000001e-05, "loss": 0.369, "step": 17108 }, { "epoch": 0.9580580132153657, "grad_norm": 1.5858454704284668, "learning_rate": 8.55e-05, "loss": 0.5147, "step": 17109 }, { "epoch": 0.9581140105274947, "grad_norm": 1.1673816442489624, "learning_rate": 8.5505e-05, "loss": 0.3849, "step": 17110 }, { "epoch": 0.9581700078396237, "grad_norm": 1.6120600700378418, "learning_rate": 8.551e-05, "loss": 0.3709, "step": 17111 }, { "epoch": 0.9582260051517527, "grad_norm": 1.4594321250915527, "learning_rate": 8.5515e-05, "loss": 0.5787, "step": 17112 }, { "epoch": 0.9582820024638817, "grad_norm": 1.3463350534439087, "learning_rate": 8.552e-05, "loss": 0.4482, "step": 17113 }, { "epoch": 0.9583379997760108, "grad_norm": 1.336738109588623, "learning_rate": 8.5525e-05, "loss": 0.4269, "step": 17114 }, { "epoch": 0.9583939970881398, "grad_norm": 1.1716670989990234, "learning_rate": 8.553e-05, "loss": 0.4297, "step": 17115 }, { "epoch": 0.9584499944002688, "grad_norm": 1.1067936420440674, "learning_rate": 8.553500000000001e-05, "loss": 0.3488, "step": 17116 }, { "epoch": 0.9585059917123978, "grad_norm": 1.4455829858779907, "learning_rate": 8.554000000000001e-05, "loss": 0.4391, "step": 17117 }, { "epoch": 0.9585619890245268, "grad_norm": 1.3242000341415405, "learning_rate": 8.554500000000001e-05, "loss": 0.4274, "step": 17118 }, { "epoch": 0.9586179863366558, "grad_norm": 1.7039105892181396, "learning_rate": 8.555e-05, "loss": 0.4254, "step": 17119 }, { "epoch": 0.9586739836487849, "grad_norm": 1.2343722581863403, "learning_rate": 8.5555e-05, "loss": 0.4459, "step": 17120 }, { "epoch": 0.9587299809609139, "grad_norm": 1.8182319402694702, "learning_rate": 8.556e-05, "loss": 0.4805, "step": 17121 }, { "epoch": 0.9587859782730429, "grad_norm": 1.6502269506454468, "learning_rate": 8.556500000000001e-05, "loss": 0.5003, "step": 17122 }, { "epoch": 0.9588419755851719, "grad_norm": 1.1833775043487549, "learning_rate": 8.557000000000001e-05, "loss": 0.4476, "step": 17123 }, { "epoch": 0.9588979728973009, "grad_norm": 1.6448407173156738, "learning_rate": 8.5575e-05, "loss": 0.3923, "step": 17124 }, { "epoch": 0.95895397020943, "grad_norm": 1.1200000047683716, "learning_rate": 8.558e-05, "loss": 0.3327, "step": 17125 }, { "epoch": 0.959009967521559, "grad_norm": 1.3580704927444458, "learning_rate": 8.5585e-05, "loss": 0.5893, "step": 17126 }, { "epoch": 0.959065964833688, "grad_norm": 1.5426150560379028, "learning_rate": 8.559e-05, "loss": 0.5055, "step": 17127 }, { "epoch": 0.959121962145817, "grad_norm": 1.2711679935455322, "learning_rate": 8.559500000000001e-05, "loss": 0.4388, "step": 17128 }, { "epoch": 0.959177959457946, "grad_norm": 1.3287243843078613, "learning_rate": 8.560000000000001e-05, "loss": 0.4177, "step": 17129 }, { "epoch": 0.9592339567700751, "grad_norm": 1.3373806476593018, "learning_rate": 8.5605e-05, "loss": 0.4425, "step": 17130 }, { "epoch": 0.9592899540822041, "grad_norm": 1.3446122407913208, "learning_rate": 8.561e-05, "loss": 0.4756, "step": 17131 }, { "epoch": 0.9593459513943331, "grad_norm": 1.3530346155166626, "learning_rate": 8.5615e-05, "loss": 0.365, "step": 17132 }, { "epoch": 0.9594019487064621, "grad_norm": 1.2455790042877197, "learning_rate": 8.562e-05, "loss": 0.5752, "step": 17133 }, { "epoch": 0.9594579460185911, "grad_norm": 1.099302053451538, "learning_rate": 8.5625e-05, "loss": 0.3418, "step": 17134 }, { "epoch": 0.9595139433307202, "grad_norm": 1.6025974750518799, "learning_rate": 8.563e-05, "loss": 0.4204, "step": 17135 }, { "epoch": 0.9595699406428492, "grad_norm": 1.3592160940170288, "learning_rate": 8.5635e-05, "loss": 0.524, "step": 17136 }, { "epoch": 0.9596259379549782, "grad_norm": 1.4095449447631836, "learning_rate": 8.564000000000001e-05, "loss": 0.4715, "step": 17137 }, { "epoch": 0.9596819352671072, "grad_norm": 1.1044148206710815, "learning_rate": 8.564500000000001e-05, "loss": 0.5169, "step": 17138 }, { "epoch": 0.9597379325792362, "grad_norm": 1.1242592334747314, "learning_rate": 8.565000000000001e-05, "loss": 0.4757, "step": 17139 }, { "epoch": 0.9597939298913652, "grad_norm": 1.244508147239685, "learning_rate": 8.5655e-05, "loss": 0.3963, "step": 17140 }, { "epoch": 0.9598499272034943, "grad_norm": 1.54185152053833, "learning_rate": 8.566e-05, "loss": 0.4904, "step": 17141 }, { "epoch": 0.9599059245156233, "grad_norm": 1.4076370000839233, "learning_rate": 8.5665e-05, "loss": 0.4381, "step": 17142 }, { "epoch": 0.9599619218277523, "grad_norm": 1.2301522493362427, "learning_rate": 8.567000000000001e-05, "loss": 0.4052, "step": 17143 }, { "epoch": 0.9600179191398813, "grad_norm": 1.486867070198059, "learning_rate": 8.567500000000001e-05, "loss": 0.4805, "step": 17144 }, { "epoch": 0.9600739164520103, "grad_norm": 1.4362130165100098, "learning_rate": 8.568e-05, "loss": 0.3875, "step": 17145 }, { "epoch": 0.9601299137641394, "grad_norm": 1.2532141208648682, "learning_rate": 8.5685e-05, "loss": 0.4779, "step": 17146 }, { "epoch": 0.9601859110762684, "grad_norm": 1.3376730680465698, "learning_rate": 8.569e-05, "loss": 0.4556, "step": 17147 }, { "epoch": 0.9602419083883974, "grad_norm": 1.3041802644729614, "learning_rate": 8.5695e-05, "loss": 0.6248, "step": 17148 }, { "epoch": 0.9602979057005264, "grad_norm": 3.7895519733428955, "learning_rate": 8.57e-05, "loss": 0.3966, "step": 17149 }, { "epoch": 0.9603539030126554, "grad_norm": 1.2806825637817383, "learning_rate": 8.570500000000001e-05, "loss": 0.5187, "step": 17150 }, { "epoch": 0.9604099003247845, "grad_norm": 1.4069244861602783, "learning_rate": 8.571e-05, "loss": 0.5099, "step": 17151 }, { "epoch": 0.9604658976369135, "grad_norm": 1.3413307666778564, "learning_rate": 8.5715e-05, "loss": 0.3637, "step": 17152 }, { "epoch": 0.9605218949490425, "grad_norm": 1.2224640846252441, "learning_rate": 8.572e-05, "loss": 0.4022, "step": 17153 }, { "epoch": 0.9605778922611715, "grad_norm": 1.2687058448791504, "learning_rate": 8.5725e-05, "loss": 0.532, "step": 17154 }, { "epoch": 0.9606338895733005, "grad_norm": 1.3773020505905151, "learning_rate": 8.573e-05, "loss": 0.4421, "step": 17155 }, { "epoch": 0.9606898868854294, "grad_norm": 1.491414189338684, "learning_rate": 8.5735e-05, "loss": 0.5085, "step": 17156 }, { "epoch": 0.9607458841975585, "grad_norm": 1.4372575283050537, "learning_rate": 8.574000000000002e-05, "loss": 0.4531, "step": 17157 }, { "epoch": 0.9608018815096875, "grad_norm": 1.0840134620666504, "learning_rate": 8.574500000000001e-05, "loss": 0.3929, "step": 17158 }, { "epoch": 0.9608578788218165, "grad_norm": 1.4550862312316895, "learning_rate": 8.575000000000001e-05, "loss": 0.4343, "step": 17159 }, { "epoch": 0.9609138761339455, "grad_norm": 1.675118327140808, "learning_rate": 8.575500000000001e-05, "loss": 0.4982, "step": 17160 }, { "epoch": 0.9609698734460745, "grad_norm": 1.2108522653579712, "learning_rate": 8.576e-05, "loss": 0.3745, "step": 17161 }, { "epoch": 0.9610258707582036, "grad_norm": 1.415676474571228, "learning_rate": 8.5765e-05, "loss": 0.5187, "step": 17162 }, { "epoch": 0.9610818680703326, "grad_norm": 1.544421911239624, "learning_rate": 8.577e-05, "loss": 0.4471, "step": 17163 }, { "epoch": 0.9611378653824616, "grad_norm": 1.3246055841445923, "learning_rate": 8.577500000000001e-05, "loss": 0.4553, "step": 17164 }, { "epoch": 0.9611938626945906, "grad_norm": 1.423762321472168, "learning_rate": 8.578000000000001e-05, "loss": 0.5542, "step": 17165 }, { "epoch": 0.9612498600067196, "grad_norm": 1.6242868900299072, "learning_rate": 8.5785e-05, "loss": 0.4095, "step": 17166 }, { "epoch": 0.9613058573188487, "grad_norm": 1.2840968370437622, "learning_rate": 8.579e-05, "loss": 0.5696, "step": 17167 }, { "epoch": 0.9613618546309777, "grad_norm": 1.1831138134002686, "learning_rate": 8.5795e-05, "loss": 0.3876, "step": 17168 }, { "epoch": 0.9614178519431067, "grad_norm": 1.7165281772613525, "learning_rate": 8.58e-05, "loss": 0.4842, "step": 17169 }, { "epoch": 0.9614738492552357, "grad_norm": 1.2815358638763428, "learning_rate": 8.5805e-05, "loss": 0.46, "step": 17170 }, { "epoch": 0.9615298465673647, "grad_norm": 1.4860469102859497, "learning_rate": 8.581000000000001e-05, "loss": 0.4817, "step": 17171 }, { "epoch": 0.9615858438794938, "grad_norm": 1.146111011505127, "learning_rate": 8.5815e-05, "loss": 0.4267, "step": 17172 }, { "epoch": 0.9616418411916228, "grad_norm": 1.5226686000823975, "learning_rate": 8.582e-05, "loss": 0.3614, "step": 17173 }, { "epoch": 0.9616978385037518, "grad_norm": 1.1706711053848267, "learning_rate": 8.5825e-05, "loss": 0.3884, "step": 17174 }, { "epoch": 0.9617538358158808, "grad_norm": 1.2649924755096436, "learning_rate": 8.583e-05, "loss": 0.317, "step": 17175 }, { "epoch": 0.9618098331280098, "grad_norm": 1.4722168445587158, "learning_rate": 8.5835e-05, "loss": 0.4549, "step": 17176 }, { "epoch": 0.9618658304401388, "grad_norm": 1.4678112268447876, "learning_rate": 8.584e-05, "loss": 0.452, "step": 17177 }, { "epoch": 0.9619218277522679, "grad_norm": 1.369017243385315, "learning_rate": 8.5845e-05, "loss": 0.4349, "step": 17178 }, { "epoch": 0.9619778250643969, "grad_norm": 1.6606252193450928, "learning_rate": 8.585000000000001e-05, "loss": 0.4326, "step": 17179 }, { "epoch": 0.9620338223765259, "grad_norm": 1.2879390716552734, "learning_rate": 8.585500000000001e-05, "loss": 0.4629, "step": 17180 }, { "epoch": 0.9620898196886549, "grad_norm": 1.4565268754959106, "learning_rate": 8.586000000000001e-05, "loss": 0.4547, "step": 17181 }, { "epoch": 0.9621458170007839, "grad_norm": 1.0399436950683594, "learning_rate": 8.5865e-05, "loss": 0.3099, "step": 17182 }, { "epoch": 0.962201814312913, "grad_norm": 1.2132089138031006, "learning_rate": 8.587e-05, "loss": 0.4141, "step": 17183 }, { "epoch": 0.962257811625042, "grad_norm": 1.2838457822799683, "learning_rate": 8.5875e-05, "loss": 0.3578, "step": 17184 }, { "epoch": 0.962313808937171, "grad_norm": 1.459041953086853, "learning_rate": 8.588000000000001e-05, "loss": 0.444, "step": 17185 }, { "epoch": 0.9623698062493, "grad_norm": 1.8635562658309937, "learning_rate": 8.588500000000001e-05, "loss": 0.4549, "step": 17186 }, { "epoch": 0.962425803561429, "grad_norm": 1.378082036972046, "learning_rate": 8.589e-05, "loss": 0.4109, "step": 17187 }, { "epoch": 0.9624818008735581, "grad_norm": 1.1600459814071655, "learning_rate": 8.5895e-05, "loss": 0.3097, "step": 17188 }, { "epoch": 0.9625377981856871, "grad_norm": 1.5207675695419312, "learning_rate": 8.59e-05, "loss": 0.4211, "step": 17189 }, { "epoch": 0.9625937954978161, "grad_norm": 1.275919795036316, "learning_rate": 8.5905e-05, "loss": 0.5067, "step": 17190 }, { "epoch": 0.9626497928099451, "grad_norm": 1.1985430717468262, "learning_rate": 8.591e-05, "loss": 0.4454, "step": 17191 }, { "epoch": 0.9627057901220741, "grad_norm": 1.5325583219528198, "learning_rate": 8.5915e-05, "loss": 0.5317, "step": 17192 }, { "epoch": 0.9627617874342032, "grad_norm": 1.1471023559570312, "learning_rate": 8.592e-05, "loss": 0.388, "step": 17193 }, { "epoch": 0.9628177847463322, "grad_norm": 1.355360507965088, "learning_rate": 8.5925e-05, "loss": 0.5246, "step": 17194 }, { "epoch": 0.9628737820584612, "grad_norm": 1.3444854021072388, "learning_rate": 8.593e-05, "loss": 0.5281, "step": 17195 }, { "epoch": 0.9629297793705902, "grad_norm": 1.592160701751709, "learning_rate": 8.5935e-05, "loss": 0.4489, "step": 17196 }, { "epoch": 0.9629857766827192, "grad_norm": 1.5497907400131226, "learning_rate": 8.594000000000001e-05, "loss": 0.5357, "step": 17197 }, { "epoch": 0.9630417739948482, "grad_norm": 1.4236985445022583, "learning_rate": 8.5945e-05, "loss": 0.4859, "step": 17198 }, { "epoch": 0.9630977713069773, "grad_norm": 1.5251559019088745, "learning_rate": 8.595e-05, "loss": 0.5002, "step": 17199 }, { "epoch": 0.9631537686191063, "grad_norm": 1.4532705545425415, "learning_rate": 8.595500000000001e-05, "loss": 0.5314, "step": 17200 }, { "epoch": 0.9632097659312353, "grad_norm": 1.423785924911499, "learning_rate": 8.596000000000001e-05, "loss": 0.582, "step": 17201 }, { "epoch": 0.9632657632433643, "grad_norm": 1.3997337818145752, "learning_rate": 8.596500000000001e-05, "loss": 0.4636, "step": 17202 }, { "epoch": 0.9633217605554933, "grad_norm": 1.3916946649551392, "learning_rate": 8.597e-05, "loss": 0.4764, "step": 17203 }, { "epoch": 0.9633777578676224, "grad_norm": 1.5298632383346558, "learning_rate": 8.5975e-05, "loss": 0.4478, "step": 17204 }, { "epoch": 0.9634337551797514, "grad_norm": 1.5225152969360352, "learning_rate": 8.598e-05, "loss": 0.5898, "step": 17205 }, { "epoch": 0.9634897524918804, "grad_norm": 1.1563955545425415, "learning_rate": 8.598500000000001e-05, "loss": 0.539, "step": 17206 }, { "epoch": 0.9635457498040094, "grad_norm": 1.3186798095703125, "learning_rate": 8.599000000000001e-05, "loss": 0.472, "step": 17207 }, { "epoch": 0.9636017471161384, "grad_norm": 1.4567782878875732, "learning_rate": 8.5995e-05, "loss": 0.5397, "step": 17208 }, { "epoch": 0.9636577444282675, "grad_norm": 1.0257200002670288, "learning_rate": 8.6e-05, "loss": 0.3696, "step": 17209 }, { "epoch": 0.9637137417403965, "grad_norm": 1.7328156232833862, "learning_rate": 8.6005e-05, "loss": 0.3124, "step": 17210 }, { "epoch": 0.9637697390525255, "grad_norm": 1.5354329347610474, "learning_rate": 8.601e-05, "loss": 0.483, "step": 17211 }, { "epoch": 0.9638257363646545, "grad_norm": 1.1250163316726685, "learning_rate": 8.6015e-05, "loss": 0.4669, "step": 17212 }, { "epoch": 0.9638817336767835, "grad_norm": 1.317323088645935, "learning_rate": 8.602e-05, "loss": 0.3965, "step": 17213 }, { "epoch": 0.9639377309889126, "grad_norm": 1.4232598543167114, "learning_rate": 8.6025e-05, "loss": 0.4235, "step": 17214 }, { "epoch": 0.9639937283010416, "grad_norm": 1.5435523986816406, "learning_rate": 8.603e-05, "loss": 0.4698, "step": 17215 }, { "epoch": 0.9640497256131706, "grad_norm": 1.3362163305282593, "learning_rate": 8.6035e-05, "loss": 0.3945, "step": 17216 }, { "epoch": 0.9641057229252996, "grad_norm": 1.4581693410873413, "learning_rate": 8.604000000000001e-05, "loss": 0.5974, "step": 17217 }, { "epoch": 0.9641617202374286, "grad_norm": 1.4092007875442505, "learning_rate": 8.604500000000001e-05, "loss": 0.5306, "step": 17218 }, { "epoch": 0.9642177175495577, "grad_norm": 1.7647417783737183, "learning_rate": 8.605e-05, "loss": 0.5479, "step": 17219 }, { "epoch": 0.9642737148616867, "grad_norm": 1.4271140098571777, "learning_rate": 8.6055e-05, "loss": 0.5704, "step": 17220 }, { "epoch": 0.9643297121738157, "grad_norm": 1.4965245723724365, "learning_rate": 8.606000000000001e-05, "loss": 0.5712, "step": 17221 }, { "epoch": 0.9643857094859447, "grad_norm": 1.320960283279419, "learning_rate": 8.606500000000001e-05, "loss": 0.5041, "step": 17222 }, { "epoch": 0.9644417067980737, "grad_norm": 1.5413872003555298, "learning_rate": 8.607000000000001e-05, "loss": 0.5382, "step": 17223 }, { "epoch": 0.9644977041102027, "grad_norm": 1.509645938873291, "learning_rate": 8.6075e-05, "loss": 0.3631, "step": 17224 }, { "epoch": 0.9645537014223318, "grad_norm": 1.2352864742279053, "learning_rate": 8.608e-05, "loss": 0.3771, "step": 17225 }, { "epoch": 0.9646096987344608, "grad_norm": 1.4004476070404053, "learning_rate": 8.6085e-05, "loss": 0.4592, "step": 17226 }, { "epoch": 0.9646656960465898, "grad_norm": 1.2401401996612549, "learning_rate": 8.609e-05, "loss": 0.4409, "step": 17227 }, { "epoch": 0.9647216933587188, "grad_norm": 1.4171693325042725, "learning_rate": 8.609500000000001e-05, "loss": 0.4446, "step": 17228 }, { "epoch": 0.9647776906708478, "grad_norm": 1.1623423099517822, "learning_rate": 8.61e-05, "loss": 0.5162, "step": 17229 }, { "epoch": 0.9648336879829769, "grad_norm": 1.4732013940811157, "learning_rate": 8.6105e-05, "loss": 0.411, "step": 17230 }, { "epoch": 0.9648896852951059, "grad_norm": 1.3330683708190918, "learning_rate": 8.611e-05, "loss": 0.463, "step": 17231 }, { "epoch": 0.9649456826072349, "grad_norm": 1.6553144454956055, "learning_rate": 8.6115e-05, "loss": 0.4626, "step": 17232 }, { "epoch": 0.9650016799193639, "grad_norm": 1.2183603048324585, "learning_rate": 8.612e-05, "loss": 0.4028, "step": 17233 }, { "epoch": 0.9650576772314929, "grad_norm": 1.3921277523040771, "learning_rate": 8.6125e-05, "loss": 0.5236, "step": 17234 }, { "epoch": 0.965113674543622, "grad_norm": 1.318587303161621, "learning_rate": 8.613e-05, "loss": 0.3898, "step": 17235 }, { "epoch": 0.965169671855751, "grad_norm": 1.4311283826828003, "learning_rate": 8.6135e-05, "loss": 0.3776, "step": 17236 }, { "epoch": 0.96522566916788, "grad_norm": 2.413759231567383, "learning_rate": 8.614000000000001e-05, "loss": 0.602, "step": 17237 }, { "epoch": 0.9652816664800089, "grad_norm": 2.402284860610962, "learning_rate": 8.614500000000001e-05, "loss": 0.4754, "step": 17238 }, { "epoch": 0.9653376637921379, "grad_norm": 1.4711947441101074, "learning_rate": 8.615000000000001e-05, "loss": 0.5307, "step": 17239 }, { "epoch": 0.9653936611042669, "grad_norm": 1.5300489664077759, "learning_rate": 8.6155e-05, "loss": 0.5064, "step": 17240 }, { "epoch": 0.965449658416396, "grad_norm": 1.4783146381378174, "learning_rate": 8.616e-05, "loss": 0.3733, "step": 17241 }, { "epoch": 0.965505655728525, "grad_norm": 1.3357300758361816, "learning_rate": 8.616500000000001e-05, "loss": 0.4655, "step": 17242 }, { "epoch": 0.965561653040654, "grad_norm": 1.3752564191818237, "learning_rate": 8.617000000000001e-05, "loss": 0.5256, "step": 17243 }, { "epoch": 0.965617650352783, "grad_norm": 1.4365041255950928, "learning_rate": 8.617500000000001e-05, "loss": 0.4837, "step": 17244 }, { "epoch": 0.965673647664912, "grad_norm": 1.1746888160705566, "learning_rate": 8.618e-05, "loss": 0.464, "step": 17245 }, { "epoch": 0.9657296449770411, "grad_norm": 1.3307267427444458, "learning_rate": 8.6185e-05, "loss": 0.4821, "step": 17246 }, { "epoch": 0.9657856422891701, "grad_norm": 1.5634570121765137, "learning_rate": 8.619e-05, "loss": 0.5751, "step": 17247 }, { "epoch": 0.9658416396012991, "grad_norm": 1.3047250509262085, "learning_rate": 8.6195e-05, "loss": 0.4271, "step": 17248 }, { "epoch": 0.9658976369134281, "grad_norm": 1.172465443611145, "learning_rate": 8.620000000000001e-05, "loss": 0.4076, "step": 17249 }, { "epoch": 0.9659536342255571, "grad_norm": 1.4473350048065186, "learning_rate": 8.6205e-05, "loss": 0.5583, "step": 17250 }, { "epoch": 0.9660096315376862, "grad_norm": 1.2676153182983398, "learning_rate": 8.621e-05, "loss": 0.3651, "step": 17251 }, { "epoch": 0.9660656288498152, "grad_norm": 1.218522071838379, "learning_rate": 8.6215e-05, "loss": 0.4182, "step": 17252 }, { "epoch": 0.9661216261619442, "grad_norm": 1.5967514514923096, "learning_rate": 8.622e-05, "loss": 0.5798, "step": 17253 }, { "epoch": 0.9661776234740732, "grad_norm": 1.5131630897521973, "learning_rate": 8.6225e-05, "loss": 0.6109, "step": 17254 }, { "epoch": 0.9662336207862022, "grad_norm": 1.3282312154769897, "learning_rate": 8.623e-05, "loss": 0.3214, "step": 17255 }, { "epoch": 0.9662896180983312, "grad_norm": 1.2723884582519531, "learning_rate": 8.6235e-05, "loss": 0.3748, "step": 17256 }, { "epoch": 0.9663456154104603, "grad_norm": 4.182846546173096, "learning_rate": 8.624000000000001e-05, "loss": 0.4868, "step": 17257 }, { "epoch": 0.9664016127225893, "grad_norm": 1.388085961341858, "learning_rate": 8.624500000000001e-05, "loss": 0.3495, "step": 17258 }, { "epoch": 0.9664576100347183, "grad_norm": 1.6867340803146362, "learning_rate": 8.625000000000001e-05, "loss": 0.3838, "step": 17259 }, { "epoch": 0.9665136073468473, "grad_norm": 1.412258505821228, "learning_rate": 8.625500000000001e-05, "loss": 0.322, "step": 17260 }, { "epoch": 0.9665696046589763, "grad_norm": 1.4775829315185547, "learning_rate": 8.626e-05, "loss": 0.6763, "step": 17261 }, { "epoch": 0.9666256019711054, "grad_norm": 1.2024606466293335, "learning_rate": 8.6265e-05, "loss": 0.3556, "step": 17262 }, { "epoch": 0.9666815992832344, "grad_norm": 1.649871587753296, "learning_rate": 8.627000000000001e-05, "loss": 0.5428, "step": 17263 }, { "epoch": 0.9667375965953634, "grad_norm": 1.4487943649291992, "learning_rate": 8.627500000000001e-05, "loss": 0.514, "step": 17264 }, { "epoch": 0.9667935939074924, "grad_norm": 1.4263319969177246, "learning_rate": 8.628000000000001e-05, "loss": 0.422, "step": 17265 }, { "epoch": 0.9668495912196214, "grad_norm": 1.298326015472412, "learning_rate": 8.6285e-05, "loss": 0.4818, "step": 17266 }, { "epoch": 0.9669055885317505, "grad_norm": 1.5739593505859375, "learning_rate": 8.629e-05, "loss": 0.547, "step": 17267 }, { "epoch": 0.9669615858438795, "grad_norm": 1.1422499418258667, "learning_rate": 8.6295e-05, "loss": 0.327, "step": 17268 }, { "epoch": 0.9670175831560085, "grad_norm": 1.441144585609436, "learning_rate": 8.63e-05, "loss": 0.5159, "step": 17269 }, { "epoch": 0.9670735804681375, "grad_norm": 1.6179550886154175, "learning_rate": 8.630500000000001e-05, "loss": 0.5163, "step": 17270 }, { "epoch": 0.9671295777802665, "grad_norm": 1.4002225399017334, "learning_rate": 8.631e-05, "loss": 0.4987, "step": 17271 }, { "epoch": 0.9671855750923956, "grad_norm": 1.424810528755188, "learning_rate": 8.6315e-05, "loss": 0.4542, "step": 17272 }, { "epoch": 0.9672415724045246, "grad_norm": 1.5833181142807007, "learning_rate": 8.632e-05, "loss": 0.6777, "step": 17273 }, { "epoch": 0.9672975697166536, "grad_norm": 1.6691884994506836, "learning_rate": 8.6325e-05, "loss": 0.4705, "step": 17274 }, { "epoch": 0.9673535670287826, "grad_norm": 1.240027666091919, "learning_rate": 8.633e-05, "loss": 0.5124, "step": 17275 }, { "epoch": 0.9674095643409116, "grad_norm": 1.2922923564910889, "learning_rate": 8.633499999999999e-05, "loss": 0.4151, "step": 17276 }, { "epoch": 0.9674655616530407, "grad_norm": 1.161521077156067, "learning_rate": 8.634e-05, "loss": 0.3309, "step": 17277 }, { "epoch": 0.9675215589651697, "grad_norm": 1.3965418338775635, "learning_rate": 8.634500000000001e-05, "loss": 0.4749, "step": 17278 }, { "epoch": 0.9675775562772987, "grad_norm": 1.2737889289855957, "learning_rate": 8.635000000000001e-05, "loss": 0.4331, "step": 17279 }, { "epoch": 0.9676335535894277, "grad_norm": 1.2584105730056763, "learning_rate": 8.635500000000001e-05, "loss": 0.4195, "step": 17280 }, { "epoch": 0.9676895509015567, "grad_norm": 1.2243163585662842, "learning_rate": 8.636e-05, "loss": 0.429, "step": 17281 }, { "epoch": 0.9677455482136857, "grad_norm": 1.387087345123291, "learning_rate": 8.6365e-05, "loss": 0.5707, "step": 17282 }, { "epoch": 0.9678015455258148, "grad_norm": 1.210732340812683, "learning_rate": 8.637e-05, "loss": 0.5474, "step": 17283 }, { "epoch": 0.9678575428379438, "grad_norm": 1.1186721324920654, "learning_rate": 8.637500000000001e-05, "loss": 0.3673, "step": 17284 }, { "epoch": 0.9679135401500728, "grad_norm": 1.5173031091690063, "learning_rate": 8.638000000000001e-05, "loss": 0.4944, "step": 17285 }, { "epoch": 0.9679695374622018, "grad_norm": 1.1591531038284302, "learning_rate": 8.638500000000001e-05, "loss": 0.4421, "step": 17286 }, { "epoch": 0.9680255347743308, "grad_norm": 1.3933157920837402, "learning_rate": 8.639e-05, "loss": 0.443, "step": 17287 }, { "epoch": 0.9680815320864599, "grad_norm": 1.1293524503707886, "learning_rate": 8.6395e-05, "loss": 0.3323, "step": 17288 }, { "epoch": 0.9681375293985889, "grad_norm": 1.3659924268722534, "learning_rate": 8.64e-05, "loss": 0.3717, "step": 17289 }, { "epoch": 0.9681935267107179, "grad_norm": 1.6097309589385986, "learning_rate": 8.6405e-05, "loss": 0.4152, "step": 17290 }, { "epoch": 0.9682495240228469, "grad_norm": 1.181439995765686, "learning_rate": 8.641000000000001e-05, "loss": 0.404, "step": 17291 }, { "epoch": 0.9683055213349759, "grad_norm": 1.6815600395202637, "learning_rate": 8.6415e-05, "loss": 0.6659, "step": 17292 }, { "epoch": 0.968361518647105, "grad_norm": 1.570206642150879, "learning_rate": 8.642e-05, "loss": 0.4618, "step": 17293 }, { "epoch": 0.968417515959234, "grad_norm": 2.163844347000122, "learning_rate": 8.6425e-05, "loss": 0.643, "step": 17294 }, { "epoch": 0.968473513271363, "grad_norm": 1.467824101448059, "learning_rate": 8.643e-05, "loss": 0.5083, "step": 17295 }, { "epoch": 0.968529510583492, "grad_norm": 1.1489348411560059, "learning_rate": 8.6435e-05, "loss": 0.3732, "step": 17296 }, { "epoch": 0.968585507895621, "grad_norm": 1.3897266387939453, "learning_rate": 8.643999999999999e-05, "loss": 0.4392, "step": 17297 }, { "epoch": 0.96864150520775, "grad_norm": 1.2860050201416016, "learning_rate": 8.6445e-05, "loss": 0.4827, "step": 17298 }, { "epoch": 0.9686975025198791, "grad_norm": 1.547238826751709, "learning_rate": 8.645000000000001e-05, "loss": 0.4105, "step": 17299 }, { "epoch": 0.9687534998320081, "grad_norm": 1.42068350315094, "learning_rate": 8.645500000000001e-05, "loss": 0.4412, "step": 17300 }, { "epoch": 0.9688094971441371, "grad_norm": 1.328653335571289, "learning_rate": 8.646000000000001e-05, "loss": 0.4863, "step": 17301 }, { "epoch": 0.9688654944562661, "grad_norm": 1.281003713607788, "learning_rate": 8.6465e-05, "loss": 0.4571, "step": 17302 }, { "epoch": 0.9689214917683951, "grad_norm": 1.40518057346344, "learning_rate": 8.647e-05, "loss": 0.3568, "step": 17303 }, { "epoch": 0.9689774890805242, "grad_norm": 1.4284865856170654, "learning_rate": 8.6475e-05, "loss": 0.5044, "step": 17304 }, { "epoch": 0.9690334863926532, "grad_norm": 1.402702808380127, "learning_rate": 8.648e-05, "loss": 0.5023, "step": 17305 }, { "epoch": 0.9690894837047822, "grad_norm": 1.465108871459961, "learning_rate": 8.648500000000001e-05, "loss": 0.5385, "step": 17306 }, { "epoch": 0.9691454810169112, "grad_norm": 1.6162699460983276, "learning_rate": 8.649000000000001e-05, "loss": 0.5153, "step": 17307 }, { "epoch": 0.9692014783290402, "grad_norm": 1.3178231716156006, "learning_rate": 8.6495e-05, "loss": 0.4102, "step": 17308 }, { "epoch": 0.9692574756411693, "grad_norm": 1.271968960762024, "learning_rate": 8.65e-05, "loss": 0.3879, "step": 17309 }, { "epoch": 0.9693134729532983, "grad_norm": 1.488120436668396, "learning_rate": 8.6505e-05, "loss": 0.4599, "step": 17310 }, { "epoch": 0.9693694702654273, "grad_norm": 1.620308518409729, "learning_rate": 8.651e-05, "loss": 0.665, "step": 17311 }, { "epoch": 0.9694254675775563, "grad_norm": 1.3775193691253662, "learning_rate": 8.651500000000001e-05, "loss": 0.4418, "step": 17312 }, { "epoch": 0.9694814648896853, "grad_norm": 1.3837276697158813, "learning_rate": 8.652e-05, "loss": 0.5351, "step": 17313 }, { "epoch": 0.9695374622018144, "grad_norm": 1.4161573648452759, "learning_rate": 8.6525e-05, "loss": 0.4661, "step": 17314 }, { "epoch": 0.9695934595139434, "grad_norm": 1.5309301614761353, "learning_rate": 8.653e-05, "loss": 0.5751, "step": 17315 }, { "epoch": 0.9696494568260724, "grad_norm": 1.2019197940826416, "learning_rate": 8.6535e-05, "loss": 0.4031, "step": 17316 }, { "epoch": 0.9697054541382014, "grad_norm": 1.1792582273483276, "learning_rate": 8.654e-05, "loss": 0.4181, "step": 17317 }, { "epoch": 0.9697614514503304, "grad_norm": 1.3851470947265625, "learning_rate": 8.6545e-05, "loss": 0.5365, "step": 17318 }, { "epoch": 0.9698174487624595, "grad_norm": 1.6818479299545288, "learning_rate": 8.655e-05, "loss": 0.4797, "step": 17319 }, { "epoch": 0.9698734460745885, "grad_norm": 1.4252855777740479, "learning_rate": 8.655500000000001e-05, "loss": 0.4359, "step": 17320 }, { "epoch": 0.9699294433867174, "grad_norm": 1.3790574073791504, "learning_rate": 8.656000000000001e-05, "loss": 0.7042, "step": 17321 }, { "epoch": 0.9699854406988464, "grad_norm": 1.2539957761764526, "learning_rate": 8.656500000000001e-05, "loss": 0.4094, "step": 17322 }, { "epoch": 0.9700414380109754, "grad_norm": 1.3838392496109009, "learning_rate": 8.657e-05, "loss": 0.4915, "step": 17323 }, { "epoch": 0.9700974353231044, "grad_norm": 1.2799766063690186, "learning_rate": 8.6575e-05, "loss": 0.4512, "step": 17324 }, { "epoch": 0.9701534326352335, "grad_norm": 1.4863401651382446, "learning_rate": 8.658e-05, "loss": 0.4828, "step": 17325 }, { "epoch": 0.9702094299473625, "grad_norm": 1.1538506746292114, "learning_rate": 8.6585e-05, "loss": 0.3684, "step": 17326 }, { "epoch": 0.9702654272594915, "grad_norm": 1.3310805559158325, "learning_rate": 8.659000000000001e-05, "loss": 0.3406, "step": 17327 }, { "epoch": 0.9703214245716205, "grad_norm": 1.559973120689392, "learning_rate": 8.659500000000001e-05, "loss": 0.3151, "step": 17328 }, { "epoch": 0.9703774218837495, "grad_norm": 1.297188401222229, "learning_rate": 8.66e-05, "loss": 0.3679, "step": 17329 }, { "epoch": 0.9704334191958786, "grad_norm": 1.1994104385375977, "learning_rate": 8.6605e-05, "loss": 0.3181, "step": 17330 }, { "epoch": 0.9704894165080076, "grad_norm": 1.5522327423095703, "learning_rate": 8.661e-05, "loss": 0.4533, "step": 17331 }, { "epoch": 0.9705454138201366, "grad_norm": 1.4846142530441284, "learning_rate": 8.6615e-05, "loss": 0.497, "step": 17332 }, { "epoch": 0.9706014111322656, "grad_norm": 4.936802864074707, "learning_rate": 8.662000000000001e-05, "loss": 0.4634, "step": 17333 }, { "epoch": 0.9706574084443946, "grad_norm": 1.461236834526062, "learning_rate": 8.6625e-05, "loss": 0.4195, "step": 17334 }, { "epoch": 0.9707134057565237, "grad_norm": 1.3690143823623657, "learning_rate": 8.663e-05, "loss": 0.3715, "step": 17335 }, { "epoch": 0.9707694030686527, "grad_norm": 1.4367308616638184, "learning_rate": 8.6635e-05, "loss": 0.4884, "step": 17336 }, { "epoch": 0.9708254003807817, "grad_norm": 1.3397955894470215, "learning_rate": 8.664e-05, "loss": 0.3518, "step": 17337 }, { "epoch": 0.9708813976929107, "grad_norm": 1.7689563035964966, "learning_rate": 8.664500000000001e-05, "loss": 0.5787, "step": 17338 }, { "epoch": 0.9709373950050397, "grad_norm": 1.5664008855819702, "learning_rate": 8.665e-05, "loss": 0.3727, "step": 17339 }, { "epoch": 0.9709933923171687, "grad_norm": 1.1659214496612549, "learning_rate": 8.6655e-05, "loss": 0.4004, "step": 17340 }, { "epoch": 0.9710493896292978, "grad_norm": 1.307281255722046, "learning_rate": 8.666000000000001e-05, "loss": 0.3862, "step": 17341 }, { "epoch": 0.9711053869414268, "grad_norm": 1.6505941152572632, "learning_rate": 8.666500000000001e-05, "loss": 0.4579, "step": 17342 }, { "epoch": 0.9711613842535558, "grad_norm": 1.163074254989624, "learning_rate": 8.667000000000001e-05, "loss": 0.4259, "step": 17343 }, { "epoch": 0.9712173815656848, "grad_norm": 1.2999498844146729, "learning_rate": 8.6675e-05, "loss": 0.4441, "step": 17344 }, { "epoch": 0.9712733788778138, "grad_norm": 2.1191458702087402, "learning_rate": 8.668e-05, "loss": 0.426, "step": 17345 }, { "epoch": 0.9713293761899429, "grad_norm": 1.8392921686172485, "learning_rate": 8.6685e-05, "loss": 0.4483, "step": 17346 }, { "epoch": 0.9713853735020719, "grad_norm": 1.2343018054962158, "learning_rate": 8.669e-05, "loss": 0.5183, "step": 17347 }, { "epoch": 0.9714413708142009, "grad_norm": 1.1489077806472778, "learning_rate": 8.669500000000001e-05, "loss": 0.3422, "step": 17348 }, { "epoch": 0.9714973681263299, "grad_norm": 1.1706421375274658, "learning_rate": 8.67e-05, "loss": 0.3887, "step": 17349 }, { "epoch": 0.9715533654384589, "grad_norm": 1.3635989427566528, "learning_rate": 8.6705e-05, "loss": 0.5152, "step": 17350 }, { "epoch": 0.971609362750588, "grad_norm": 2.037436008453369, "learning_rate": 8.671e-05, "loss": 0.3749, "step": 17351 }, { "epoch": 0.971665360062717, "grad_norm": 1.750329852104187, "learning_rate": 8.6715e-05, "loss": 0.6067, "step": 17352 }, { "epoch": 0.971721357374846, "grad_norm": 1.270072340965271, "learning_rate": 8.672e-05, "loss": 0.4039, "step": 17353 }, { "epoch": 0.971777354686975, "grad_norm": 1.7762908935546875, "learning_rate": 8.672500000000001e-05, "loss": 0.5357, "step": 17354 }, { "epoch": 0.971833351999104, "grad_norm": 1.4279133081436157, "learning_rate": 8.673e-05, "loss": 0.4171, "step": 17355 }, { "epoch": 0.971889349311233, "grad_norm": 1.1249388456344604, "learning_rate": 8.6735e-05, "loss": 0.3568, "step": 17356 }, { "epoch": 0.9719453466233621, "grad_norm": 1.4502819776535034, "learning_rate": 8.674e-05, "loss": 0.4598, "step": 17357 }, { "epoch": 0.9720013439354911, "grad_norm": 1.2537726163864136, "learning_rate": 8.674500000000001e-05, "loss": 0.3844, "step": 17358 }, { "epoch": 0.9720573412476201, "grad_norm": 1.2995517253875732, "learning_rate": 8.675000000000001e-05, "loss": 0.5004, "step": 17359 }, { "epoch": 0.9721133385597491, "grad_norm": 1.4475985765457153, "learning_rate": 8.6755e-05, "loss": 0.4624, "step": 17360 }, { "epoch": 0.9721693358718781, "grad_norm": 1.2398110628128052, "learning_rate": 8.676e-05, "loss": 0.3665, "step": 17361 }, { "epoch": 0.9722253331840072, "grad_norm": 1.439234972000122, "learning_rate": 8.676500000000001e-05, "loss": 0.4756, "step": 17362 }, { "epoch": 0.9722813304961362, "grad_norm": 1.49754798412323, "learning_rate": 8.677000000000001e-05, "loss": 0.455, "step": 17363 }, { "epoch": 0.9723373278082652, "grad_norm": 1.302474021911621, "learning_rate": 8.677500000000001e-05, "loss": 0.4373, "step": 17364 }, { "epoch": 0.9723933251203942, "grad_norm": 1.0640931129455566, "learning_rate": 8.678e-05, "loss": 0.3671, "step": 17365 }, { "epoch": 0.9724493224325232, "grad_norm": 1.2272655963897705, "learning_rate": 8.6785e-05, "loss": 0.4005, "step": 17366 }, { "epoch": 0.9725053197446523, "grad_norm": 1.508810043334961, "learning_rate": 8.679e-05, "loss": 0.4766, "step": 17367 }, { "epoch": 0.9725613170567813, "grad_norm": 1.224143147468567, "learning_rate": 8.6795e-05, "loss": 0.4397, "step": 17368 }, { "epoch": 0.9726173143689103, "grad_norm": 1.2601372003555298, "learning_rate": 8.680000000000001e-05, "loss": 0.4508, "step": 17369 }, { "epoch": 0.9726733116810393, "grad_norm": 1.5283092260360718, "learning_rate": 8.6805e-05, "loss": 0.3897, "step": 17370 }, { "epoch": 0.9727293089931683, "grad_norm": 1.430024266242981, "learning_rate": 8.681e-05, "loss": 0.5453, "step": 17371 }, { "epoch": 0.9727853063052974, "grad_norm": 1.3398207426071167, "learning_rate": 8.6815e-05, "loss": 0.4539, "step": 17372 }, { "epoch": 0.9728413036174264, "grad_norm": 1.4060691595077515, "learning_rate": 8.682e-05, "loss": 0.4397, "step": 17373 }, { "epoch": 0.9728973009295554, "grad_norm": 1.418944001197815, "learning_rate": 8.6825e-05, "loss": 0.4962, "step": 17374 }, { "epoch": 0.9729532982416844, "grad_norm": 1.4422852993011475, "learning_rate": 8.683e-05, "loss": 0.5276, "step": 17375 }, { "epoch": 0.9730092955538134, "grad_norm": 1.3235855102539062, "learning_rate": 8.6835e-05, "loss": 0.4473, "step": 17376 }, { "epoch": 0.9730652928659425, "grad_norm": 1.2004975080490112, "learning_rate": 8.684e-05, "loss": 0.418, "step": 17377 }, { "epoch": 0.9731212901780715, "grad_norm": 1.4729422330856323, "learning_rate": 8.684500000000001e-05, "loss": 0.455, "step": 17378 }, { "epoch": 0.9731772874902005, "grad_norm": 1.4228463172912598, "learning_rate": 8.685000000000001e-05, "loss": 0.5516, "step": 17379 }, { "epoch": 0.9732332848023295, "grad_norm": 1.2448140382766724, "learning_rate": 8.685500000000001e-05, "loss": 0.4987, "step": 17380 }, { "epoch": 0.9732892821144585, "grad_norm": 1.2316290140151978, "learning_rate": 8.686e-05, "loss": 0.4023, "step": 17381 }, { "epoch": 0.9733452794265876, "grad_norm": 1.3800770044326782, "learning_rate": 8.6865e-05, "loss": 0.5761, "step": 17382 }, { "epoch": 0.9734012767387166, "grad_norm": 1.0687819719314575, "learning_rate": 8.687000000000001e-05, "loss": 0.4053, "step": 17383 }, { "epoch": 0.9734572740508456, "grad_norm": 1.395995855331421, "learning_rate": 8.687500000000001e-05, "loss": 0.4674, "step": 17384 }, { "epoch": 0.9735132713629746, "grad_norm": 1.3109594583511353, "learning_rate": 8.688000000000001e-05, "loss": 0.4991, "step": 17385 }, { "epoch": 0.9735692686751036, "grad_norm": 1.2651557922363281, "learning_rate": 8.6885e-05, "loss": 0.4577, "step": 17386 }, { "epoch": 0.9736252659872326, "grad_norm": 1.2861329317092896, "learning_rate": 8.689e-05, "loss": 0.4672, "step": 17387 }, { "epoch": 0.9736812632993617, "grad_norm": 1.2998154163360596, "learning_rate": 8.6895e-05, "loss": 0.3578, "step": 17388 }, { "epoch": 0.9737372606114907, "grad_norm": 1.3852159976959229, "learning_rate": 8.69e-05, "loss": 0.4885, "step": 17389 }, { "epoch": 0.9737932579236197, "grad_norm": 1.4287030696868896, "learning_rate": 8.690500000000001e-05, "loss": 0.4675, "step": 17390 }, { "epoch": 0.9738492552357487, "grad_norm": 1.1838635206222534, "learning_rate": 8.691e-05, "loss": 0.4898, "step": 17391 }, { "epoch": 0.9739052525478777, "grad_norm": 1.3776044845581055, "learning_rate": 8.6915e-05, "loss": 0.4377, "step": 17392 }, { "epoch": 0.9739612498600068, "grad_norm": 1.543620228767395, "learning_rate": 8.692e-05, "loss": 0.5551, "step": 17393 }, { "epoch": 0.9740172471721358, "grad_norm": 1.5677738189697266, "learning_rate": 8.6925e-05, "loss": 0.5707, "step": 17394 }, { "epoch": 0.9740732444842648, "grad_norm": 1.1838555335998535, "learning_rate": 8.693e-05, "loss": 0.4575, "step": 17395 }, { "epoch": 0.9741292417963938, "grad_norm": 1.5340828895568848, "learning_rate": 8.6935e-05, "loss": 0.5805, "step": 17396 }, { "epoch": 0.9741852391085228, "grad_norm": 1.3245877027511597, "learning_rate": 8.694e-05, "loss": 0.5607, "step": 17397 }, { "epoch": 0.9742412364206519, "grad_norm": 1.3191213607788086, "learning_rate": 8.6945e-05, "loss": 0.5458, "step": 17398 }, { "epoch": 0.9742972337327809, "grad_norm": 1.0440481901168823, "learning_rate": 8.695000000000001e-05, "loss": 0.3783, "step": 17399 }, { "epoch": 0.9743532310449099, "grad_norm": 1.5507785081863403, "learning_rate": 8.695500000000001e-05, "loss": 0.443, "step": 17400 }, { "epoch": 0.9744092283570389, "grad_norm": 1.5314637422561646, "learning_rate": 8.696000000000001e-05, "loss": 0.5015, "step": 17401 }, { "epoch": 0.9744652256691679, "grad_norm": 1.307962417602539, "learning_rate": 8.6965e-05, "loss": 0.4961, "step": 17402 }, { "epoch": 0.974521222981297, "grad_norm": 1.4043619632720947, "learning_rate": 8.697e-05, "loss": 0.5223, "step": 17403 }, { "epoch": 0.9745772202934259, "grad_norm": 1.1751432418823242, "learning_rate": 8.6975e-05, "loss": 0.3579, "step": 17404 }, { "epoch": 0.9746332176055549, "grad_norm": 1.4457268714904785, "learning_rate": 8.698000000000001e-05, "loss": 0.431, "step": 17405 }, { "epoch": 0.9746892149176839, "grad_norm": 1.4647364616394043, "learning_rate": 8.698500000000001e-05, "loss": 0.5258, "step": 17406 }, { "epoch": 0.9747452122298129, "grad_norm": 1.5925337076187134, "learning_rate": 8.699e-05, "loss": 0.3815, "step": 17407 }, { "epoch": 0.9748012095419419, "grad_norm": 1.443375587463379, "learning_rate": 8.6995e-05, "loss": 0.4828, "step": 17408 }, { "epoch": 0.974857206854071, "grad_norm": 1.2334703207015991, "learning_rate": 8.7e-05, "loss": 0.4202, "step": 17409 }, { "epoch": 0.9749132041662, "grad_norm": 1.5328794717788696, "learning_rate": 8.7005e-05, "loss": 0.3994, "step": 17410 }, { "epoch": 0.974969201478329, "grad_norm": 1.2115094661712646, "learning_rate": 8.701000000000001e-05, "loss": 0.3725, "step": 17411 }, { "epoch": 0.975025198790458, "grad_norm": 1.4653737545013428, "learning_rate": 8.7015e-05, "loss": 0.5088, "step": 17412 }, { "epoch": 0.975081196102587, "grad_norm": 1.5247368812561035, "learning_rate": 8.702e-05, "loss": 0.5465, "step": 17413 }, { "epoch": 0.975137193414716, "grad_norm": 1.2807185649871826, "learning_rate": 8.7025e-05, "loss": 0.3851, "step": 17414 }, { "epoch": 0.9751931907268451, "grad_norm": 1.2373749017715454, "learning_rate": 8.703e-05, "loss": 0.343, "step": 17415 }, { "epoch": 0.9752491880389741, "grad_norm": 1.3598071336746216, "learning_rate": 8.7035e-05, "loss": 0.6644, "step": 17416 }, { "epoch": 0.9753051853511031, "grad_norm": 1.1725260019302368, "learning_rate": 8.704e-05, "loss": 0.4689, "step": 17417 }, { "epoch": 0.9753611826632321, "grad_norm": 1.242869257926941, "learning_rate": 8.7045e-05, "loss": 0.4059, "step": 17418 }, { "epoch": 0.9754171799753611, "grad_norm": 1.1475459337234497, "learning_rate": 8.705000000000002e-05, "loss": 0.4537, "step": 17419 }, { "epoch": 0.9754731772874902, "grad_norm": 1.1499022245407104, "learning_rate": 8.705500000000001e-05, "loss": 0.3748, "step": 17420 }, { "epoch": 0.9755291745996192, "grad_norm": 1.149317979812622, "learning_rate": 8.706000000000001e-05, "loss": 0.4822, "step": 17421 }, { "epoch": 0.9755851719117482, "grad_norm": 1.4652608633041382, "learning_rate": 8.706500000000001e-05, "loss": 0.4387, "step": 17422 }, { "epoch": 0.9756411692238772, "grad_norm": 1.5358394384384155, "learning_rate": 8.707e-05, "loss": 0.5631, "step": 17423 }, { "epoch": 0.9756971665360062, "grad_norm": 1.3846830129623413, "learning_rate": 8.7075e-05, "loss": 0.4613, "step": 17424 }, { "epoch": 0.9757531638481353, "grad_norm": 1.6016181707382202, "learning_rate": 8.708e-05, "loss": 0.5172, "step": 17425 }, { "epoch": 0.9758091611602643, "grad_norm": 1.251336693763733, "learning_rate": 8.708500000000001e-05, "loss": 0.4126, "step": 17426 }, { "epoch": 0.9758651584723933, "grad_norm": 1.4441380500793457, "learning_rate": 8.709000000000001e-05, "loss": 0.3877, "step": 17427 }, { "epoch": 0.9759211557845223, "grad_norm": 1.1034600734710693, "learning_rate": 8.7095e-05, "loss": 0.4298, "step": 17428 }, { "epoch": 0.9759771530966513, "grad_norm": 1.1990065574645996, "learning_rate": 8.71e-05, "loss": 0.4104, "step": 17429 }, { "epoch": 0.9760331504087804, "grad_norm": 1.752121090888977, "learning_rate": 8.7105e-05, "loss": 0.664, "step": 17430 }, { "epoch": 0.9760891477209094, "grad_norm": 1.2689826488494873, "learning_rate": 8.711e-05, "loss": 0.517, "step": 17431 }, { "epoch": 0.9761451450330384, "grad_norm": 1.443172812461853, "learning_rate": 8.711500000000001e-05, "loss": 0.4914, "step": 17432 }, { "epoch": 0.9762011423451674, "grad_norm": 1.3688075542449951, "learning_rate": 8.712e-05, "loss": 0.4629, "step": 17433 }, { "epoch": 0.9762571396572964, "grad_norm": 1.4039663076400757, "learning_rate": 8.7125e-05, "loss": 0.4362, "step": 17434 }, { "epoch": 0.9763131369694255, "grad_norm": 1.5527126789093018, "learning_rate": 8.713e-05, "loss": 0.5913, "step": 17435 }, { "epoch": 0.9763691342815545, "grad_norm": 1.6251251697540283, "learning_rate": 8.7135e-05, "loss": 0.4114, "step": 17436 }, { "epoch": 0.9764251315936835, "grad_norm": 1.7148405313491821, "learning_rate": 8.714e-05, "loss": 0.531, "step": 17437 }, { "epoch": 0.9764811289058125, "grad_norm": 1.2098383903503418, "learning_rate": 8.714499999999999e-05, "loss": 0.3567, "step": 17438 }, { "epoch": 0.9765371262179415, "grad_norm": 1.2557868957519531, "learning_rate": 8.715e-05, "loss": 0.3424, "step": 17439 }, { "epoch": 0.9765931235300706, "grad_norm": 1.4591310024261475, "learning_rate": 8.715500000000002e-05, "loss": 0.5535, "step": 17440 }, { "epoch": 0.9766491208421996, "grad_norm": 1.4864718914031982, "learning_rate": 8.716000000000001e-05, "loss": 0.4163, "step": 17441 }, { "epoch": 0.9767051181543286, "grad_norm": 1.585402488708496, "learning_rate": 8.716500000000001e-05, "loss": 0.4101, "step": 17442 }, { "epoch": 0.9767611154664576, "grad_norm": 1.4526938199996948, "learning_rate": 8.717000000000001e-05, "loss": 0.5043, "step": 17443 }, { "epoch": 0.9768171127785866, "grad_norm": 1.1431570053100586, "learning_rate": 8.7175e-05, "loss": 0.3464, "step": 17444 }, { "epoch": 0.9768731100907156, "grad_norm": 1.8126105070114136, "learning_rate": 8.718e-05, "loss": 0.656, "step": 17445 }, { "epoch": 0.9769291074028447, "grad_norm": 1.3256500959396362, "learning_rate": 8.7185e-05, "loss": 0.4084, "step": 17446 }, { "epoch": 0.9769851047149737, "grad_norm": 1.631635069847107, "learning_rate": 8.719000000000001e-05, "loss": 0.5274, "step": 17447 }, { "epoch": 0.9770411020271027, "grad_norm": 1.4243981838226318, "learning_rate": 8.719500000000001e-05, "loss": 0.4474, "step": 17448 }, { "epoch": 0.9770970993392317, "grad_norm": 1.605637788772583, "learning_rate": 8.72e-05, "loss": 0.4262, "step": 17449 }, { "epoch": 0.9771530966513607, "grad_norm": 1.4703837633132935, "learning_rate": 8.7205e-05, "loss": 0.5151, "step": 17450 }, { "epoch": 0.9772090939634898, "grad_norm": 1.3215372562408447, "learning_rate": 8.721e-05, "loss": 0.3395, "step": 17451 }, { "epoch": 0.9772650912756188, "grad_norm": 1.419661283493042, "learning_rate": 8.7215e-05, "loss": 0.5087, "step": 17452 }, { "epoch": 0.9773210885877478, "grad_norm": 1.253587245941162, "learning_rate": 8.722e-05, "loss": 0.4516, "step": 17453 }, { "epoch": 0.9773770858998768, "grad_norm": 1.3379052877426147, "learning_rate": 8.7225e-05, "loss": 0.3923, "step": 17454 }, { "epoch": 0.9774330832120058, "grad_norm": 1.2216262817382812, "learning_rate": 8.723e-05, "loss": 0.4176, "step": 17455 }, { "epoch": 0.9774890805241349, "grad_norm": 1.610276460647583, "learning_rate": 8.7235e-05, "loss": 0.5338, "step": 17456 }, { "epoch": 0.9775450778362639, "grad_norm": 1.5220662355422974, "learning_rate": 8.724e-05, "loss": 0.7544, "step": 17457 }, { "epoch": 0.9776010751483929, "grad_norm": 1.516425371170044, "learning_rate": 8.7245e-05, "loss": 0.4472, "step": 17458 }, { "epoch": 0.9776570724605219, "grad_norm": 1.4756171703338623, "learning_rate": 8.725e-05, "loss": 0.4746, "step": 17459 }, { "epoch": 0.9777130697726509, "grad_norm": 1.1557035446166992, "learning_rate": 8.7255e-05, "loss": 0.4096, "step": 17460 }, { "epoch": 0.97776906708478, "grad_norm": 4.008048057556152, "learning_rate": 8.726000000000001e-05, "loss": 0.5853, "step": 17461 }, { "epoch": 0.977825064396909, "grad_norm": 1.3212580680847168, "learning_rate": 8.726500000000001e-05, "loss": 0.4024, "step": 17462 }, { "epoch": 0.977881061709038, "grad_norm": 1.3305388689041138, "learning_rate": 8.727000000000001e-05, "loss": 0.3946, "step": 17463 }, { "epoch": 0.977937059021167, "grad_norm": 1.2800099849700928, "learning_rate": 8.727500000000001e-05, "loss": 0.4303, "step": 17464 }, { "epoch": 0.977993056333296, "grad_norm": 1.755967140197754, "learning_rate": 8.728e-05, "loss": 0.6422, "step": 17465 }, { "epoch": 0.978049053645425, "grad_norm": 1.3023176193237305, "learning_rate": 8.7285e-05, "loss": 0.4761, "step": 17466 }, { "epoch": 0.9781050509575541, "grad_norm": 1.6691690683364868, "learning_rate": 8.729e-05, "loss": 0.4489, "step": 17467 }, { "epoch": 0.9781610482696831, "grad_norm": 1.2901387214660645, "learning_rate": 8.729500000000001e-05, "loss": 0.3703, "step": 17468 }, { "epoch": 0.9782170455818121, "grad_norm": 1.5257415771484375, "learning_rate": 8.730000000000001e-05, "loss": 0.5251, "step": 17469 }, { "epoch": 0.9782730428939411, "grad_norm": 1.2863874435424805, "learning_rate": 8.7305e-05, "loss": 0.5034, "step": 17470 }, { "epoch": 0.9783290402060701, "grad_norm": 1.2762781381607056, "learning_rate": 8.731e-05, "loss": 0.6128, "step": 17471 }, { "epoch": 0.9783850375181992, "grad_norm": 1.3336706161499023, "learning_rate": 8.7315e-05, "loss": 0.5472, "step": 17472 }, { "epoch": 0.9784410348303282, "grad_norm": 1.3449336290359497, "learning_rate": 8.732e-05, "loss": 0.4165, "step": 17473 }, { "epoch": 0.9784970321424572, "grad_norm": 1.3002980947494507, "learning_rate": 8.7325e-05, "loss": 0.4092, "step": 17474 }, { "epoch": 0.9785530294545862, "grad_norm": 1.4328842163085938, "learning_rate": 8.733e-05, "loss": 0.3841, "step": 17475 }, { "epoch": 0.9786090267667152, "grad_norm": 1.2100359201431274, "learning_rate": 8.7335e-05, "loss": 0.3087, "step": 17476 }, { "epoch": 0.9786650240788443, "grad_norm": 1.296263337135315, "learning_rate": 8.734e-05, "loss": 0.4679, "step": 17477 }, { "epoch": 0.9787210213909733, "grad_norm": 1.3720515966415405, "learning_rate": 8.7345e-05, "loss": 0.5072, "step": 17478 }, { "epoch": 0.9787770187031023, "grad_norm": 1.2821012735366821, "learning_rate": 8.735000000000001e-05, "loss": 0.4174, "step": 17479 }, { "epoch": 0.9788330160152313, "grad_norm": 1.033850073814392, "learning_rate": 8.7355e-05, "loss": 0.429, "step": 17480 }, { "epoch": 0.9788890133273603, "grad_norm": 1.2118123769760132, "learning_rate": 8.736e-05, "loss": 0.3842, "step": 17481 }, { "epoch": 0.9789450106394894, "grad_norm": 1.198876142501831, "learning_rate": 8.7365e-05, "loss": 0.4736, "step": 17482 }, { "epoch": 0.9790010079516184, "grad_norm": 2.4156970977783203, "learning_rate": 8.737000000000001e-05, "loss": 0.5503, "step": 17483 }, { "epoch": 0.9790570052637474, "grad_norm": 1.1911977529525757, "learning_rate": 8.737500000000001e-05, "loss": 0.4204, "step": 17484 }, { "epoch": 0.9791130025758764, "grad_norm": 1.6712185144424438, "learning_rate": 8.738000000000001e-05, "loss": 0.4103, "step": 17485 }, { "epoch": 0.9791689998880053, "grad_norm": 1.511715292930603, "learning_rate": 8.7385e-05, "loss": 0.4508, "step": 17486 }, { "epoch": 0.9792249972001343, "grad_norm": 1.4734218120574951, "learning_rate": 8.739e-05, "loss": 0.4137, "step": 17487 }, { "epoch": 0.9792809945122634, "grad_norm": 1.347332239151001, "learning_rate": 8.7395e-05, "loss": 0.4178, "step": 17488 }, { "epoch": 0.9793369918243924, "grad_norm": 1.2072690725326538, "learning_rate": 8.740000000000001e-05, "loss": 0.4265, "step": 17489 }, { "epoch": 0.9793929891365214, "grad_norm": 1.4905452728271484, "learning_rate": 8.740500000000001e-05, "loss": 0.5137, "step": 17490 }, { "epoch": 0.9794489864486504, "grad_norm": 1.2617844343185425, "learning_rate": 8.741e-05, "loss": 0.303, "step": 17491 }, { "epoch": 0.9795049837607794, "grad_norm": 1.215404748916626, "learning_rate": 8.7415e-05, "loss": 0.4278, "step": 17492 }, { "epoch": 0.9795609810729085, "grad_norm": 1.5266979932785034, "learning_rate": 8.742e-05, "loss": 0.435, "step": 17493 }, { "epoch": 0.9796169783850375, "grad_norm": 1.3276944160461426, "learning_rate": 8.7425e-05, "loss": 0.5681, "step": 17494 }, { "epoch": 0.9796729756971665, "grad_norm": 1.4566097259521484, "learning_rate": 8.743e-05, "loss": 0.5974, "step": 17495 }, { "epoch": 0.9797289730092955, "grad_norm": 1.4847010374069214, "learning_rate": 8.7435e-05, "loss": 0.4646, "step": 17496 }, { "epoch": 0.9797849703214245, "grad_norm": 1.2967168092727661, "learning_rate": 8.744e-05, "loss": 0.4152, "step": 17497 }, { "epoch": 0.9798409676335536, "grad_norm": 1.37638521194458, "learning_rate": 8.7445e-05, "loss": 0.3974, "step": 17498 }, { "epoch": 0.9798969649456826, "grad_norm": 1.160549283027649, "learning_rate": 8.745000000000001e-05, "loss": 0.4809, "step": 17499 }, { "epoch": 0.9799529622578116, "grad_norm": 1.4034357070922852, "learning_rate": 8.745500000000001e-05, "loss": 0.4828, "step": 17500 }, { "epoch": 0.9800089595699406, "grad_norm": 1.6993513107299805, "learning_rate": 8.746e-05, "loss": 0.403, "step": 17501 }, { "epoch": 0.9800649568820696, "grad_norm": 1.4433549642562866, "learning_rate": 8.7465e-05, "loss": 0.5207, "step": 17502 }, { "epoch": 0.9801209541941986, "grad_norm": 1.9773705005645752, "learning_rate": 8.747e-05, "loss": 0.6333, "step": 17503 }, { "epoch": 0.9801769515063277, "grad_norm": 1.4764324426651, "learning_rate": 8.747500000000001e-05, "loss": 0.5438, "step": 17504 }, { "epoch": 0.9802329488184567, "grad_norm": 1.410233736038208, "learning_rate": 8.748000000000001e-05, "loss": 0.5832, "step": 17505 }, { "epoch": 0.9802889461305857, "grad_norm": 1.5553776025772095, "learning_rate": 8.748500000000001e-05, "loss": 0.5932, "step": 17506 }, { "epoch": 0.9803449434427147, "grad_norm": 1.5294225215911865, "learning_rate": 8.749e-05, "loss": 0.6163, "step": 17507 }, { "epoch": 0.9804009407548437, "grad_norm": 1.3601484298706055, "learning_rate": 8.7495e-05, "loss": 0.5146, "step": 17508 }, { "epoch": 0.9804569380669728, "grad_norm": 1.3084996938705444, "learning_rate": 8.75e-05, "loss": 0.4682, "step": 17509 }, { "epoch": 0.9805129353791018, "grad_norm": 1.2972066402435303, "learning_rate": 8.750500000000001e-05, "loss": 0.4591, "step": 17510 }, { "epoch": 0.9805689326912308, "grad_norm": 1.3795164823532104, "learning_rate": 8.751000000000001e-05, "loss": 0.406, "step": 17511 }, { "epoch": 0.9806249300033598, "grad_norm": 1.280336618423462, "learning_rate": 8.7515e-05, "loss": 0.4563, "step": 17512 }, { "epoch": 0.9806809273154888, "grad_norm": 1.3981839418411255, "learning_rate": 8.752e-05, "loss": 0.4962, "step": 17513 }, { "epoch": 0.9807369246276179, "grad_norm": 1.3525123596191406, "learning_rate": 8.7525e-05, "loss": 0.3947, "step": 17514 }, { "epoch": 0.9807929219397469, "grad_norm": 1.4933942556381226, "learning_rate": 8.753e-05, "loss": 0.4733, "step": 17515 }, { "epoch": 0.9808489192518759, "grad_norm": 1.405686378479004, "learning_rate": 8.7535e-05, "loss": 0.467, "step": 17516 }, { "epoch": 0.9809049165640049, "grad_norm": 1.1117249727249146, "learning_rate": 8.754e-05, "loss": 0.3743, "step": 17517 }, { "epoch": 0.9809609138761339, "grad_norm": 1.4071615934371948, "learning_rate": 8.7545e-05, "loss": 0.3418, "step": 17518 }, { "epoch": 0.981016911188263, "grad_norm": 1.627893090248108, "learning_rate": 8.755e-05, "loss": 0.6502, "step": 17519 }, { "epoch": 0.981072908500392, "grad_norm": 1.286058783531189, "learning_rate": 8.755500000000001e-05, "loss": 0.4648, "step": 17520 }, { "epoch": 0.981128905812521, "grad_norm": 1.3681645393371582, "learning_rate": 8.756000000000001e-05, "loss": 0.4158, "step": 17521 }, { "epoch": 0.98118490312465, "grad_norm": 1.4059162139892578, "learning_rate": 8.7565e-05, "loss": 0.4763, "step": 17522 }, { "epoch": 0.981240900436779, "grad_norm": 126.30540466308594, "learning_rate": 8.757e-05, "loss": 0.4891, "step": 17523 }, { "epoch": 0.981296897748908, "grad_norm": 1.467044472694397, "learning_rate": 8.7575e-05, "loss": 0.576, "step": 17524 }, { "epoch": 0.9813528950610371, "grad_norm": 1.4908976554870605, "learning_rate": 8.758000000000001e-05, "loss": 0.506, "step": 17525 }, { "epoch": 0.9814088923731661, "grad_norm": 1.2298582792282104, "learning_rate": 8.758500000000001e-05, "loss": 0.4315, "step": 17526 }, { "epoch": 0.9814648896852951, "grad_norm": 1.304078221321106, "learning_rate": 8.759e-05, "loss": 0.42, "step": 17527 }, { "epoch": 0.9815208869974241, "grad_norm": 1.5696616172790527, "learning_rate": 8.7595e-05, "loss": 0.5689, "step": 17528 }, { "epoch": 0.9815768843095531, "grad_norm": 1.8061628341674805, "learning_rate": 8.76e-05, "loss": 0.5197, "step": 17529 }, { "epoch": 0.9816328816216822, "grad_norm": 2.1518683433532715, "learning_rate": 8.7605e-05, "loss": 0.597, "step": 17530 }, { "epoch": 0.9816888789338112, "grad_norm": 1.4769752025604248, "learning_rate": 8.761000000000001e-05, "loss": 0.5969, "step": 17531 }, { "epoch": 0.9817448762459402, "grad_norm": 1.3505018949508667, "learning_rate": 8.761500000000001e-05, "loss": 0.4329, "step": 17532 }, { "epoch": 0.9818008735580692, "grad_norm": 1.226112723350525, "learning_rate": 8.762e-05, "loss": 0.4346, "step": 17533 }, { "epoch": 0.9818568708701982, "grad_norm": 1.3486183881759644, "learning_rate": 8.7625e-05, "loss": 0.409, "step": 17534 }, { "epoch": 0.9819128681823273, "grad_norm": 1.6185193061828613, "learning_rate": 8.763e-05, "loss": 0.4879, "step": 17535 }, { "epoch": 0.9819688654944563, "grad_norm": 1.1983236074447632, "learning_rate": 8.7635e-05, "loss": 0.2968, "step": 17536 }, { "epoch": 0.9820248628065853, "grad_norm": 1.2322864532470703, "learning_rate": 8.764e-05, "loss": 0.4065, "step": 17537 }, { "epoch": 0.9820808601187143, "grad_norm": 1.4110888242721558, "learning_rate": 8.7645e-05, "loss": 0.4467, "step": 17538 }, { "epoch": 0.9821368574308433, "grad_norm": 1.2793995141983032, "learning_rate": 8.765e-05, "loss": 0.4855, "step": 17539 }, { "epoch": 0.9821928547429724, "grad_norm": 1.2857661247253418, "learning_rate": 8.765500000000001e-05, "loss": 0.4428, "step": 17540 }, { "epoch": 0.9822488520551014, "grad_norm": 1.4148931503295898, "learning_rate": 8.766000000000001e-05, "loss": 0.3957, "step": 17541 }, { "epoch": 0.9823048493672304, "grad_norm": 1.4639850854873657, "learning_rate": 8.766500000000001e-05, "loss": 0.5383, "step": 17542 }, { "epoch": 0.9823608466793594, "grad_norm": 1.7329463958740234, "learning_rate": 8.767e-05, "loss": 0.3988, "step": 17543 }, { "epoch": 0.9824168439914884, "grad_norm": 1.2866063117980957, "learning_rate": 8.7675e-05, "loss": 0.4273, "step": 17544 }, { "epoch": 0.9824728413036175, "grad_norm": 1.3373000621795654, "learning_rate": 8.768e-05, "loss": 0.5587, "step": 17545 }, { "epoch": 0.9825288386157465, "grad_norm": 1.371846318244934, "learning_rate": 8.768500000000001e-05, "loss": 0.451, "step": 17546 }, { "epoch": 0.9825848359278755, "grad_norm": 2.8197245597839355, "learning_rate": 8.769000000000001e-05, "loss": 0.4816, "step": 17547 }, { "epoch": 0.9826408332400045, "grad_norm": 3.393686532974243, "learning_rate": 8.7695e-05, "loss": 0.4298, "step": 17548 }, { "epoch": 0.9826968305521335, "grad_norm": 1.4075634479522705, "learning_rate": 8.77e-05, "loss": 0.4598, "step": 17549 }, { "epoch": 0.9827528278642625, "grad_norm": 1.9391776323318481, "learning_rate": 8.7705e-05, "loss": 0.5691, "step": 17550 }, { "epoch": 0.9828088251763916, "grad_norm": 1.6095472574234009, "learning_rate": 8.771e-05, "loss": 0.7115, "step": 17551 }, { "epoch": 0.9828648224885206, "grad_norm": 1.1371383666992188, "learning_rate": 8.7715e-05, "loss": 0.3423, "step": 17552 }, { "epoch": 0.9829208198006496, "grad_norm": 1.347645878791809, "learning_rate": 8.772000000000001e-05, "loss": 0.463, "step": 17553 }, { "epoch": 0.9829768171127786, "grad_norm": 1.7110610008239746, "learning_rate": 8.7725e-05, "loss": 0.5672, "step": 17554 }, { "epoch": 0.9830328144249076, "grad_norm": 1.2346199750900269, "learning_rate": 8.773e-05, "loss": 0.4478, "step": 17555 }, { "epoch": 0.9830888117370367, "grad_norm": 1.3220741748809814, "learning_rate": 8.7735e-05, "loss": 0.4089, "step": 17556 }, { "epoch": 0.9831448090491657, "grad_norm": 1.3453768491744995, "learning_rate": 8.774e-05, "loss": 0.4418, "step": 17557 }, { "epoch": 0.9832008063612947, "grad_norm": 1.3212966918945312, "learning_rate": 8.7745e-05, "loss": 0.383, "step": 17558 }, { "epoch": 0.9832568036734237, "grad_norm": 1.2123596668243408, "learning_rate": 8.775e-05, "loss": 0.4264, "step": 17559 }, { "epoch": 0.9833128009855527, "grad_norm": 1.3323320150375366, "learning_rate": 8.775500000000002e-05, "loss": 0.4671, "step": 17560 }, { "epoch": 0.9833687982976818, "grad_norm": 1.3458378314971924, "learning_rate": 8.776000000000001e-05, "loss": 0.407, "step": 17561 }, { "epoch": 0.9834247956098108, "grad_norm": 1.4096357822418213, "learning_rate": 8.776500000000001e-05, "loss": 0.4173, "step": 17562 }, { "epoch": 0.9834807929219398, "grad_norm": 1.3322290182113647, "learning_rate": 8.777000000000001e-05, "loss": 0.3692, "step": 17563 }, { "epoch": 0.9835367902340688, "grad_norm": 1.205859661102295, "learning_rate": 8.7775e-05, "loss": 0.4507, "step": 17564 }, { "epoch": 0.9835927875461978, "grad_norm": 1.7954727411270142, "learning_rate": 8.778e-05, "loss": 0.4743, "step": 17565 }, { "epoch": 0.9836487848583269, "grad_norm": 1.2893177270889282, "learning_rate": 8.7785e-05, "loss": 0.379, "step": 17566 }, { "epoch": 0.9837047821704559, "grad_norm": 1.3160830736160278, "learning_rate": 8.779000000000001e-05, "loss": 0.4332, "step": 17567 }, { "epoch": 0.9837607794825849, "grad_norm": 1.1817042827606201, "learning_rate": 8.779500000000001e-05, "loss": 0.5421, "step": 17568 }, { "epoch": 0.9838167767947138, "grad_norm": 1.2465851306915283, "learning_rate": 8.78e-05, "loss": 0.4129, "step": 17569 }, { "epoch": 0.9838727741068428, "grad_norm": 1.3811452388763428, "learning_rate": 8.7805e-05, "loss": 0.4752, "step": 17570 }, { "epoch": 0.9839287714189718, "grad_norm": 1.6648184061050415, "learning_rate": 8.781e-05, "loss": 0.4578, "step": 17571 }, { "epoch": 0.9839847687311009, "grad_norm": 1.3105189800262451, "learning_rate": 8.7815e-05, "loss": 0.5465, "step": 17572 }, { "epoch": 0.9840407660432299, "grad_norm": 1.455780029296875, "learning_rate": 8.782e-05, "loss": 0.4989, "step": 17573 }, { "epoch": 0.9840967633553589, "grad_norm": 1.006490707397461, "learning_rate": 8.782500000000001e-05, "loss": 0.4122, "step": 17574 }, { "epoch": 0.9841527606674879, "grad_norm": 1.4180700778961182, "learning_rate": 8.783e-05, "loss": 0.5415, "step": 17575 }, { "epoch": 0.9842087579796169, "grad_norm": 1.6900314092636108, "learning_rate": 8.7835e-05, "loss": 0.5968, "step": 17576 }, { "epoch": 0.984264755291746, "grad_norm": 1.7504936456680298, "learning_rate": 8.784e-05, "loss": 0.5195, "step": 17577 }, { "epoch": 0.984320752603875, "grad_norm": 1.4187743663787842, "learning_rate": 8.7845e-05, "loss": 0.45, "step": 17578 }, { "epoch": 0.984376749916004, "grad_norm": 1.480238914489746, "learning_rate": 8.785e-05, "loss": 0.5978, "step": 17579 }, { "epoch": 0.984432747228133, "grad_norm": 1.2080800533294678, "learning_rate": 8.7855e-05, "loss": 0.4829, "step": 17580 }, { "epoch": 0.984488744540262, "grad_norm": 1.6299505233764648, "learning_rate": 8.786e-05, "loss": 0.501, "step": 17581 }, { "epoch": 0.984544741852391, "grad_norm": 1.3159761428833008, "learning_rate": 8.786500000000001e-05, "loss": 0.4247, "step": 17582 }, { "epoch": 0.9846007391645201, "grad_norm": 1.294948935508728, "learning_rate": 8.787000000000001e-05, "loss": 0.3801, "step": 17583 }, { "epoch": 0.9846567364766491, "grad_norm": 1.212965488433838, "learning_rate": 8.787500000000001e-05, "loss": 0.3939, "step": 17584 }, { "epoch": 0.9847127337887781, "grad_norm": 1.37157142162323, "learning_rate": 8.788e-05, "loss": 0.634, "step": 17585 }, { "epoch": 0.9847687311009071, "grad_norm": 2.70932936668396, "learning_rate": 8.7885e-05, "loss": 0.5367, "step": 17586 }, { "epoch": 0.9848247284130361, "grad_norm": 1.6919304132461548, "learning_rate": 8.789e-05, "loss": 0.5444, "step": 17587 }, { "epoch": 0.9848807257251652, "grad_norm": 1.4852466583251953, "learning_rate": 8.789500000000001e-05, "loss": 0.4759, "step": 17588 }, { "epoch": 0.9849367230372942, "grad_norm": 1.3218944072723389, "learning_rate": 8.790000000000001e-05, "loss": 0.4551, "step": 17589 }, { "epoch": 0.9849927203494232, "grad_norm": 1.262381672859192, "learning_rate": 8.7905e-05, "loss": 0.3755, "step": 17590 }, { "epoch": 0.9850487176615522, "grad_norm": 1.518549919128418, "learning_rate": 8.791e-05, "loss": 0.4507, "step": 17591 }, { "epoch": 0.9851047149736812, "grad_norm": 1.26132333278656, "learning_rate": 8.7915e-05, "loss": 0.5181, "step": 17592 }, { "epoch": 0.9851607122858103, "grad_norm": 1.578444480895996, "learning_rate": 8.792e-05, "loss": 0.4636, "step": 17593 }, { "epoch": 0.9852167095979393, "grad_norm": 1.3818082809448242, "learning_rate": 8.7925e-05, "loss": 0.5473, "step": 17594 }, { "epoch": 0.9852727069100683, "grad_norm": 1.6644206047058105, "learning_rate": 8.793000000000001e-05, "loss": 0.4045, "step": 17595 }, { "epoch": 0.9853287042221973, "grad_norm": 1.2437721490859985, "learning_rate": 8.7935e-05, "loss": 0.5042, "step": 17596 }, { "epoch": 0.9853847015343263, "grad_norm": 1.3465697765350342, "learning_rate": 8.794e-05, "loss": 0.423, "step": 17597 }, { "epoch": 0.9854406988464554, "grad_norm": 1.3346095085144043, "learning_rate": 8.7945e-05, "loss": 0.4192, "step": 17598 }, { "epoch": 0.9854966961585844, "grad_norm": 1.2999358177185059, "learning_rate": 8.795e-05, "loss": 0.461, "step": 17599 }, { "epoch": 0.9855526934707134, "grad_norm": 1.4455510377883911, "learning_rate": 8.795500000000001e-05, "loss": 0.5385, "step": 17600 }, { "epoch": 0.9856086907828424, "grad_norm": 1.5050334930419922, "learning_rate": 8.796e-05, "loss": 0.5009, "step": 17601 }, { "epoch": 0.9856646880949714, "grad_norm": 1.2636960744857788, "learning_rate": 8.7965e-05, "loss": 0.4595, "step": 17602 }, { "epoch": 0.9857206854071004, "grad_norm": 1.377002477645874, "learning_rate": 8.797000000000001e-05, "loss": 0.4101, "step": 17603 }, { "epoch": 0.9857766827192295, "grad_norm": 1.3277605772018433, "learning_rate": 8.797500000000001e-05, "loss": 0.4982, "step": 17604 }, { "epoch": 0.9858326800313585, "grad_norm": 1.494991660118103, "learning_rate": 8.798000000000001e-05, "loss": 0.4443, "step": 17605 }, { "epoch": 0.9858886773434875, "grad_norm": 1.3216522932052612, "learning_rate": 8.7985e-05, "loss": 0.4982, "step": 17606 }, { "epoch": 0.9859446746556165, "grad_norm": 1.3640851974487305, "learning_rate": 8.799e-05, "loss": 0.417, "step": 17607 }, { "epoch": 0.9860006719677455, "grad_norm": 1.2586225271224976, "learning_rate": 8.7995e-05, "loss": 0.499, "step": 17608 }, { "epoch": 0.9860566692798746, "grad_norm": 1.2728242874145508, "learning_rate": 8.800000000000001e-05, "loss": 0.3669, "step": 17609 }, { "epoch": 0.9861126665920036, "grad_norm": 1.3577446937561035, "learning_rate": 8.800500000000001e-05, "loss": 0.6406, "step": 17610 }, { "epoch": 0.9861686639041326, "grad_norm": 1.105636715888977, "learning_rate": 8.801e-05, "loss": 0.3837, "step": 17611 }, { "epoch": 0.9862246612162616, "grad_norm": 1.332719087600708, "learning_rate": 8.8015e-05, "loss": 0.5369, "step": 17612 }, { "epoch": 0.9862806585283906, "grad_norm": 1.1900907754898071, "learning_rate": 8.802e-05, "loss": 0.4035, "step": 17613 }, { "epoch": 0.9863366558405197, "grad_norm": 1.3678020238876343, "learning_rate": 8.8025e-05, "loss": 0.4893, "step": 17614 }, { "epoch": 0.9863926531526487, "grad_norm": 1.2033085823059082, "learning_rate": 8.803e-05, "loss": 0.4647, "step": 17615 }, { "epoch": 0.9864486504647777, "grad_norm": 1.5403302907943726, "learning_rate": 8.8035e-05, "loss": 0.4371, "step": 17616 }, { "epoch": 0.9865046477769067, "grad_norm": 1.3274661302566528, "learning_rate": 8.804e-05, "loss": 0.446, "step": 17617 }, { "epoch": 0.9865606450890357, "grad_norm": 1.631810188293457, "learning_rate": 8.8045e-05, "loss": 0.4998, "step": 17618 }, { "epoch": 0.9866166424011648, "grad_norm": 11.381399154663086, "learning_rate": 8.805e-05, "loss": 0.4679, "step": 17619 }, { "epoch": 0.9866726397132938, "grad_norm": 1.4126825332641602, "learning_rate": 8.805500000000001e-05, "loss": 0.4284, "step": 17620 }, { "epoch": 0.9867286370254228, "grad_norm": 1.444777488708496, "learning_rate": 8.806000000000001e-05, "loss": 0.4622, "step": 17621 }, { "epoch": 0.9867846343375518, "grad_norm": 1.8333213329315186, "learning_rate": 8.8065e-05, "loss": 0.6327, "step": 17622 }, { "epoch": 0.9868406316496808, "grad_norm": 1.1897823810577393, "learning_rate": 8.807e-05, "loss": 0.4171, "step": 17623 }, { "epoch": 0.9868966289618099, "grad_norm": 1.3115020990371704, "learning_rate": 8.807500000000001e-05, "loss": 0.3187, "step": 17624 }, { "epoch": 0.9869526262739389, "grad_norm": 1.45095956325531, "learning_rate": 8.808000000000001e-05, "loss": 0.4641, "step": 17625 }, { "epoch": 0.9870086235860679, "grad_norm": 1.350498914718628, "learning_rate": 8.808500000000001e-05, "loss": 0.4694, "step": 17626 }, { "epoch": 0.9870646208981969, "grad_norm": 1.168498158454895, "learning_rate": 8.809e-05, "loss": 0.4279, "step": 17627 }, { "epoch": 0.9871206182103259, "grad_norm": 1.2061278820037842, "learning_rate": 8.8095e-05, "loss": 0.5163, "step": 17628 }, { "epoch": 0.987176615522455, "grad_norm": 1.3452043533325195, "learning_rate": 8.81e-05, "loss": 0.5216, "step": 17629 }, { "epoch": 0.987232612834584, "grad_norm": 1.3370094299316406, "learning_rate": 8.8105e-05, "loss": 0.4456, "step": 17630 }, { "epoch": 0.987288610146713, "grad_norm": 1.491627812385559, "learning_rate": 8.811000000000001e-05, "loss": 0.5166, "step": 17631 }, { "epoch": 0.987344607458842, "grad_norm": 1.3679686784744263, "learning_rate": 8.8115e-05, "loss": 0.3736, "step": 17632 }, { "epoch": 0.987400604770971, "grad_norm": 1.4824379682540894, "learning_rate": 8.812e-05, "loss": 0.4467, "step": 17633 }, { "epoch": 0.9874566020831, "grad_norm": 0.9939557909965515, "learning_rate": 8.8125e-05, "loss": 0.4024, "step": 17634 }, { "epoch": 0.9875125993952291, "grad_norm": 1.2251510620117188, "learning_rate": 8.813e-05, "loss": 0.4063, "step": 17635 }, { "epoch": 0.9875685967073581, "grad_norm": 1.4843429327011108, "learning_rate": 8.8135e-05, "loss": 0.5691, "step": 17636 }, { "epoch": 0.9876245940194871, "grad_norm": 1.5110747814178467, "learning_rate": 8.814e-05, "loss": 0.7143, "step": 17637 }, { "epoch": 0.9876805913316161, "grad_norm": 1.2455945014953613, "learning_rate": 8.8145e-05, "loss": 0.3352, "step": 17638 }, { "epoch": 0.9877365886437451, "grad_norm": 1.3711780309677124, "learning_rate": 8.815e-05, "loss": 0.5051, "step": 17639 }, { "epoch": 0.9877925859558742, "grad_norm": 1.326949954032898, "learning_rate": 8.8155e-05, "loss": 0.5136, "step": 17640 }, { "epoch": 0.9878485832680032, "grad_norm": 1.5293852090835571, "learning_rate": 8.816000000000001e-05, "loss": 0.5172, "step": 17641 }, { "epoch": 0.9879045805801322, "grad_norm": 1.548806071281433, "learning_rate": 8.816500000000001e-05, "loss": 0.4185, "step": 17642 }, { "epoch": 0.9879605778922612, "grad_norm": 1.25412917137146, "learning_rate": 8.817e-05, "loss": 0.495, "step": 17643 }, { "epoch": 0.9880165752043902, "grad_norm": 1.476805329322815, "learning_rate": 8.8175e-05, "loss": 0.4058, "step": 17644 }, { "epoch": 0.9880725725165193, "grad_norm": 1.4195153713226318, "learning_rate": 8.818000000000001e-05, "loss": 0.4833, "step": 17645 }, { "epoch": 0.9881285698286483, "grad_norm": 1.246290683746338, "learning_rate": 8.818500000000001e-05, "loss": 0.3048, "step": 17646 }, { "epoch": 0.9881845671407773, "grad_norm": 1.3022139072418213, "learning_rate": 8.819000000000001e-05, "loss": 0.437, "step": 17647 }, { "epoch": 0.9882405644529063, "grad_norm": 1.6030832529067993, "learning_rate": 8.8195e-05, "loss": 0.3815, "step": 17648 }, { "epoch": 0.9882965617650353, "grad_norm": 1.3837579488754272, "learning_rate": 8.82e-05, "loss": 0.5043, "step": 17649 }, { "epoch": 0.9883525590771643, "grad_norm": 1.3769114017486572, "learning_rate": 8.8205e-05, "loss": 0.4242, "step": 17650 }, { "epoch": 0.9884085563892934, "grad_norm": 1.2110369205474854, "learning_rate": 8.821e-05, "loss": 0.5027, "step": 17651 }, { "epoch": 0.9884645537014223, "grad_norm": 1.4913884401321411, "learning_rate": 8.821500000000001e-05, "loss": 0.4245, "step": 17652 }, { "epoch": 0.9885205510135513, "grad_norm": 1.4101661443710327, "learning_rate": 8.822e-05, "loss": 0.4611, "step": 17653 }, { "epoch": 0.9885765483256803, "grad_norm": 1.5386029481887817, "learning_rate": 8.8225e-05, "loss": 0.7085, "step": 17654 }, { "epoch": 0.9886325456378093, "grad_norm": 1.4647287130355835, "learning_rate": 8.823e-05, "loss": 0.5565, "step": 17655 }, { "epoch": 0.9886885429499384, "grad_norm": 1.3486679792404175, "learning_rate": 8.8235e-05, "loss": 0.4155, "step": 17656 }, { "epoch": 0.9887445402620674, "grad_norm": 1.1760742664337158, "learning_rate": 8.824e-05, "loss": 0.42, "step": 17657 }, { "epoch": 0.9888005375741964, "grad_norm": 10.632444381713867, "learning_rate": 8.8245e-05, "loss": 0.4341, "step": 17658 }, { "epoch": 0.9888565348863254, "grad_norm": 1.3989155292510986, "learning_rate": 8.825e-05, "loss": 0.491, "step": 17659 }, { "epoch": 0.9889125321984544, "grad_norm": 1.2893259525299072, "learning_rate": 8.8255e-05, "loss": 0.4318, "step": 17660 }, { "epoch": 0.9889685295105834, "grad_norm": 1.8009469509124756, "learning_rate": 8.826000000000001e-05, "loss": 0.5432, "step": 17661 }, { "epoch": 0.9890245268227125, "grad_norm": 1.3617624044418335, "learning_rate": 8.826500000000001e-05, "loss": 0.4691, "step": 17662 }, { "epoch": 0.9890805241348415, "grad_norm": 1.30439293384552, "learning_rate": 8.827000000000001e-05, "loss": 0.4716, "step": 17663 }, { "epoch": 0.9891365214469705, "grad_norm": 1.1191641092300415, "learning_rate": 8.8275e-05, "loss": 0.3351, "step": 17664 }, { "epoch": 0.9891925187590995, "grad_norm": 1.3394598960876465, "learning_rate": 8.828e-05, "loss": 0.4725, "step": 17665 }, { "epoch": 0.9892485160712285, "grad_norm": 1.580841302871704, "learning_rate": 8.828500000000001e-05, "loss": 0.6894, "step": 17666 }, { "epoch": 0.9893045133833576, "grad_norm": 1.1006057262420654, "learning_rate": 8.829000000000001e-05, "loss": 0.3338, "step": 17667 }, { "epoch": 0.9893605106954866, "grad_norm": 1.2439556121826172, "learning_rate": 8.829500000000001e-05, "loss": 0.5283, "step": 17668 }, { "epoch": 0.9894165080076156, "grad_norm": 1.3054133653640747, "learning_rate": 8.83e-05, "loss": 0.4204, "step": 17669 }, { "epoch": 0.9894725053197446, "grad_norm": 1.3397842645645142, "learning_rate": 8.8305e-05, "loss": 0.39, "step": 17670 }, { "epoch": 0.9895285026318736, "grad_norm": 1.3987393379211426, "learning_rate": 8.831e-05, "loss": 0.5048, "step": 17671 }, { "epoch": 0.9895844999440027, "grad_norm": 1.7089719772338867, "learning_rate": 8.8315e-05, "loss": 0.4487, "step": 17672 }, { "epoch": 0.9896404972561317, "grad_norm": 1.145412802696228, "learning_rate": 8.832000000000001e-05, "loss": 0.452, "step": 17673 }, { "epoch": 0.9896964945682607, "grad_norm": 1.261096715927124, "learning_rate": 8.8325e-05, "loss": 0.3973, "step": 17674 }, { "epoch": 0.9897524918803897, "grad_norm": 1.509187936782837, "learning_rate": 8.833e-05, "loss": 0.5809, "step": 17675 }, { "epoch": 0.9898084891925187, "grad_norm": 1.092221975326538, "learning_rate": 8.8335e-05, "loss": 0.3629, "step": 17676 }, { "epoch": 0.9898644865046478, "grad_norm": 1.2134952545166016, "learning_rate": 8.834e-05, "loss": 0.4333, "step": 17677 }, { "epoch": 0.9899204838167768, "grad_norm": 1.1407698392868042, "learning_rate": 8.8345e-05, "loss": 0.3594, "step": 17678 }, { "epoch": 0.9899764811289058, "grad_norm": 1.4732252359390259, "learning_rate": 8.834999999999999e-05, "loss": 0.3671, "step": 17679 }, { "epoch": 0.9900324784410348, "grad_norm": 1.455226182937622, "learning_rate": 8.8355e-05, "loss": 0.448, "step": 17680 }, { "epoch": 0.9900884757531638, "grad_norm": 1.2765408754348755, "learning_rate": 8.836000000000001e-05, "loss": 0.464, "step": 17681 }, { "epoch": 0.9901444730652929, "grad_norm": 1.222954511642456, "learning_rate": 8.836500000000001e-05, "loss": 0.4208, "step": 17682 }, { "epoch": 0.9902004703774219, "grad_norm": 2.3051717281341553, "learning_rate": 8.837000000000001e-05, "loss": 0.647, "step": 17683 }, { "epoch": 0.9902564676895509, "grad_norm": 1.7088819742202759, "learning_rate": 8.837500000000001e-05, "loss": 0.4154, "step": 17684 }, { "epoch": 0.9903124650016799, "grad_norm": 1.267587661743164, "learning_rate": 8.838e-05, "loss": 0.404, "step": 17685 }, { "epoch": 0.9903684623138089, "grad_norm": 1.8990023136138916, "learning_rate": 8.8385e-05, "loss": 0.4948, "step": 17686 }, { "epoch": 0.990424459625938, "grad_norm": 1.133603572845459, "learning_rate": 8.839000000000001e-05, "loss": 0.3743, "step": 17687 }, { "epoch": 0.990480456938067, "grad_norm": 1.6615586280822754, "learning_rate": 8.839500000000001e-05, "loss": 0.5648, "step": 17688 }, { "epoch": 0.990536454250196, "grad_norm": 1.5147998332977295, "learning_rate": 8.840000000000001e-05, "loss": 0.4682, "step": 17689 }, { "epoch": 0.990592451562325, "grad_norm": 1.6206152439117432, "learning_rate": 8.8405e-05, "loss": 0.47, "step": 17690 }, { "epoch": 0.990648448874454, "grad_norm": 1.3913646936416626, "learning_rate": 8.841e-05, "loss": 0.4006, "step": 17691 }, { "epoch": 0.990704446186583, "grad_norm": 1.4173760414123535, "learning_rate": 8.8415e-05, "loss": 0.4517, "step": 17692 }, { "epoch": 0.9907604434987121, "grad_norm": 1.551391839981079, "learning_rate": 8.842e-05, "loss": 0.5697, "step": 17693 }, { "epoch": 0.9908164408108411, "grad_norm": 1.2823461294174194, "learning_rate": 8.842500000000001e-05, "loss": 0.3911, "step": 17694 }, { "epoch": 0.9908724381229701, "grad_norm": 1.2825812101364136, "learning_rate": 8.843e-05, "loss": 0.5166, "step": 17695 }, { "epoch": 0.9909284354350991, "grad_norm": 1.237261176109314, "learning_rate": 8.8435e-05, "loss": 0.4583, "step": 17696 }, { "epoch": 0.9909844327472281, "grad_norm": 1.2546817064285278, "learning_rate": 8.844e-05, "loss": 0.4322, "step": 17697 }, { "epoch": 0.9910404300593572, "grad_norm": 1.210726261138916, "learning_rate": 8.8445e-05, "loss": 0.4521, "step": 17698 }, { "epoch": 0.9910964273714862, "grad_norm": 1.7791892290115356, "learning_rate": 8.845e-05, "loss": 0.4756, "step": 17699 }, { "epoch": 0.9911524246836152, "grad_norm": 1.4492498636245728, "learning_rate": 8.845499999999999e-05, "loss": 0.4875, "step": 17700 }, { "epoch": 0.9912084219957442, "grad_norm": 1.4883558750152588, "learning_rate": 8.846e-05, "loss": 0.4393, "step": 17701 }, { "epoch": 0.9912644193078732, "grad_norm": 1.568536639213562, "learning_rate": 8.846500000000001e-05, "loss": 0.5384, "step": 17702 }, { "epoch": 0.9913204166200023, "grad_norm": 1.1386370658874512, "learning_rate": 8.847000000000001e-05, "loss": 0.4197, "step": 17703 }, { "epoch": 0.9913764139321313, "grad_norm": 1.130841851234436, "learning_rate": 8.847500000000001e-05, "loss": 0.553, "step": 17704 }, { "epoch": 0.9914324112442603, "grad_norm": 1.9779821634292603, "learning_rate": 8.848e-05, "loss": 0.4041, "step": 17705 }, { "epoch": 0.9914884085563893, "grad_norm": 1.2233911752700806, "learning_rate": 8.8485e-05, "loss": 0.4139, "step": 17706 }, { "epoch": 0.9915444058685183, "grad_norm": 1.1328318119049072, "learning_rate": 8.849e-05, "loss": 0.3593, "step": 17707 }, { "epoch": 0.9916004031806473, "grad_norm": 1.307611107826233, "learning_rate": 8.849500000000001e-05, "loss": 0.4044, "step": 17708 }, { "epoch": 0.9916564004927764, "grad_norm": 1.3479758501052856, "learning_rate": 8.850000000000001e-05, "loss": 0.4071, "step": 17709 }, { "epoch": 0.9917123978049054, "grad_norm": 1.8989241123199463, "learning_rate": 8.850500000000001e-05, "loss": 0.5629, "step": 17710 }, { "epoch": 0.9917683951170344, "grad_norm": 1.6310759782791138, "learning_rate": 8.851e-05, "loss": 0.3843, "step": 17711 }, { "epoch": 0.9918243924291634, "grad_norm": 1.456925392150879, "learning_rate": 8.8515e-05, "loss": 0.416, "step": 17712 }, { "epoch": 0.9918803897412924, "grad_norm": 1.4533954858779907, "learning_rate": 8.852e-05, "loss": 0.5127, "step": 17713 }, { "epoch": 0.9919363870534215, "grad_norm": 1.5278843641281128, "learning_rate": 8.8525e-05, "loss": 0.5731, "step": 17714 }, { "epoch": 0.9919923843655505, "grad_norm": 1.6043784618377686, "learning_rate": 8.853000000000001e-05, "loss": 0.4541, "step": 17715 }, { "epoch": 0.9920483816776795, "grad_norm": 1.2234134674072266, "learning_rate": 8.8535e-05, "loss": 0.4573, "step": 17716 }, { "epoch": 0.9921043789898085, "grad_norm": 1.176305890083313, "learning_rate": 8.854e-05, "loss": 0.3466, "step": 17717 }, { "epoch": 0.9921603763019375, "grad_norm": 1.4414507150650024, "learning_rate": 8.8545e-05, "loss": 0.4582, "step": 17718 }, { "epoch": 0.9922163736140666, "grad_norm": 1.346954345703125, "learning_rate": 8.855e-05, "loss": 0.4941, "step": 17719 }, { "epoch": 0.9922723709261956, "grad_norm": 1.308688998222351, "learning_rate": 8.8555e-05, "loss": 0.5205, "step": 17720 }, { "epoch": 0.9923283682383246, "grad_norm": 1.1533242464065552, "learning_rate": 8.856e-05, "loss": 0.5297, "step": 17721 }, { "epoch": 0.9923843655504536, "grad_norm": 1.1984797716140747, "learning_rate": 8.8565e-05, "loss": 0.43, "step": 17722 }, { "epoch": 0.9924403628625826, "grad_norm": 1.5509682893753052, "learning_rate": 8.857000000000001e-05, "loss": 0.4958, "step": 17723 }, { "epoch": 0.9924963601747117, "grad_norm": 1.3433796167373657, "learning_rate": 8.857500000000001e-05, "loss": 0.3858, "step": 17724 }, { "epoch": 0.9925523574868407, "grad_norm": 1.4598404169082642, "learning_rate": 8.858000000000001e-05, "loss": 0.4344, "step": 17725 }, { "epoch": 0.9926083547989697, "grad_norm": 1.2984787225723267, "learning_rate": 8.8585e-05, "loss": 0.4054, "step": 17726 }, { "epoch": 0.9926643521110987, "grad_norm": 1.8481416702270508, "learning_rate": 8.859e-05, "loss": 0.3646, "step": 17727 }, { "epoch": 0.9927203494232277, "grad_norm": 1.2608082294464111, "learning_rate": 8.8595e-05, "loss": 0.3594, "step": 17728 }, { "epoch": 0.9927763467353568, "grad_norm": 1.4416066408157349, "learning_rate": 8.86e-05, "loss": 0.5569, "step": 17729 }, { "epoch": 0.9928323440474858, "grad_norm": 1.2480077743530273, "learning_rate": 8.860500000000001e-05, "loss": 0.3737, "step": 17730 }, { "epoch": 0.9928883413596148, "grad_norm": 1.1432256698608398, "learning_rate": 8.861000000000001e-05, "loss": 0.4381, "step": 17731 }, { "epoch": 0.9929443386717438, "grad_norm": 1.6371777057647705, "learning_rate": 8.8615e-05, "loss": 0.5299, "step": 17732 }, { "epoch": 0.9930003359838728, "grad_norm": 1.286447525024414, "learning_rate": 8.862e-05, "loss": 0.4627, "step": 17733 }, { "epoch": 0.9930563332960017, "grad_norm": 1.3441230058670044, "learning_rate": 8.8625e-05, "loss": 0.38, "step": 17734 }, { "epoch": 0.9931123306081308, "grad_norm": 1.1864452362060547, "learning_rate": 8.863e-05, "loss": 0.4335, "step": 17735 }, { "epoch": 0.9931683279202598, "grad_norm": 1.2555714845657349, "learning_rate": 8.863500000000001e-05, "loss": 0.4425, "step": 17736 }, { "epoch": 0.9932243252323888, "grad_norm": 1.1101124286651611, "learning_rate": 8.864e-05, "loss": 0.3574, "step": 17737 }, { "epoch": 0.9932803225445178, "grad_norm": 1.2899377346038818, "learning_rate": 8.8645e-05, "loss": 0.3888, "step": 17738 }, { "epoch": 0.9933363198566468, "grad_norm": 1.324385643005371, "learning_rate": 8.865e-05, "loss": 0.4137, "step": 17739 }, { "epoch": 0.9933923171687759, "grad_norm": 1.7079662084579468, "learning_rate": 8.8655e-05, "loss": 0.6243, "step": 17740 }, { "epoch": 0.9934483144809049, "grad_norm": 1.450404405593872, "learning_rate": 8.866000000000001e-05, "loss": 0.5835, "step": 17741 }, { "epoch": 0.9935043117930339, "grad_norm": 1.3371881246566772, "learning_rate": 8.8665e-05, "loss": 0.4234, "step": 17742 }, { "epoch": 0.9935603091051629, "grad_norm": 1.562181830406189, "learning_rate": 8.867e-05, "loss": 0.4009, "step": 17743 }, { "epoch": 0.9936163064172919, "grad_norm": 1.3748550415039062, "learning_rate": 8.867500000000001e-05, "loss": 0.4405, "step": 17744 }, { "epoch": 0.993672303729421, "grad_norm": 1.4513928890228271, "learning_rate": 8.868000000000001e-05, "loss": 0.425, "step": 17745 }, { "epoch": 0.99372830104155, "grad_norm": 1.3562967777252197, "learning_rate": 8.868500000000001e-05, "loss": 0.3928, "step": 17746 }, { "epoch": 0.993784298353679, "grad_norm": 1.7254388332366943, "learning_rate": 8.869e-05, "loss": 0.502, "step": 17747 }, { "epoch": 0.993840295665808, "grad_norm": 1.351060152053833, "learning_rate": 8.8695e-05, "loss": 0.3679, "step": 17748 }, { "epoch": 0.993896292977937, "grad_norm": 1.322018027305603, "learning_rate": 8.87e-05, "loss": 0.461, "step": 17749 }, { "epoch": 0.993952290290066, "grad_norm": 1.4136666059494019, "learning_rate": 8.8705e-05, "loss": 0.4954, "step": 17750 }, { "epoch": 0.9940082876021951, "grad_norm": 1.2768642902374268, "learning_rate": 8.871000000000001e-05, "loss": 0.4126, "step": 17751 }, { "epoch": 0.9940642849143241, "grad_norm": 1.3570771217346191, "learning_rate": 8.871500000000001e-05, "loss": 0.4779, "step": 17752 }, { "epoch": 0.9941202822264531, "grad_norm": 1.1728743314743042, "learning_rate": 8.872e-05, "loss": 0.5258, "step": 17753 }, { "epoch": 0.9941762795385821, "grad_norm": 1.4993699789047241, "learning_rate": 8.8725e-05, "loss": 0.4644, "step": 17754 }, { "epoch": 0.9942322768507111, "grad_norm": 1.169426441192627, "learning_rate": 8.873e-05, "loss": 0.4376, "step": 17755 }, { "epoch": 0.9942882741628402, "grad_norm": 1.6827619075775146, "learning_rate": 8.8735e-05, "loss": 0.5448, "step": 17756 }, { "epoch": 0.9943442714749692, "grad_norm": 1.4702149629592896, "learning_rate": 8.874000000000001e-05, "loss": 0.4164, "step": 17757 }, { "epoch": 0.9944002687870982, "grad_norm": 1.4925825595855713, "learning_rate": 8.8745e-05, "loss": 0.4294, "step": 17758 }, { "epoch": 0.9944562660992272, "grad_norm": 1.1866360902786255, "learning_rate": 8.875e-05, "loss": 0.3493, "step": 17759 }, { "epoch": 0.9945122634113562, "grad_norm": 1.4568305015563965, "learning_rate": 8.8755e-05, "loss": 0.4478, "step": 17760 }, { "epoch": 0.9945682607234853, "grad_norm": 1.296949863433838, "learning_rate": 8.876e-05, "loss": 0.3953, "step": 17761 }, { "epoch": 0.9946242580356143, "grad_norm": 1.423387050628662, "learning_rate": 8.876500000000001e-05, "loss": 0.5378, "step": 17762 }, { "epoch": 0.9946802553477433, "grad_norm": 1.4243385791778564, "learning_rate": 8.877e-05, "loss": 0.4648, "step": 17763 }, { "epoch": 0.9947362526598723, "grad_norm": 1.4275507926940918, "learning_rate": 8.8775e-05, "loss": 0.5134, "step": 17764 }, { "epoch": 0.9947922499720013, "grad_norm": 1.2002226114273071, "learning_rate": 8.878000000000001e-05, "loss": 0.3779, "step": 17765 }, { "epoch": 0.9948482472841303, "grad_norm": 1.3369922637939453, "learning_rate": 8.878500000000001e-05, "loss": 0.4891, "step": 17766 }, { "epoch": 0.9949042445962594, "grad_norm": 2.1659765243530273, "learning_rate": 8.879000000000001e-05, "loss": 0.4689, "step": 17767 }, { "epoch": 0.9949602419083884, "grad_norm": 1.449819803237915, "learning_rate": 8.8795e-05, "loss": 0.4796, "step": 17768 }, { "epoch": 0.9950162392205174, "grad_norm": 1.4384304285049438, "learning_rate": 8.88e-05, "loss": 0.5106, "step": 17769 }, { "epoch": 0.9950722365326464, "grad_norm": 1.7336833477020264, "learning_rate": 8.8805e-05, "loss": 0.5333, "step": 17770 }, { "epoch": 0.9951282338447754, "grad_norm": 1.467752456665039, "learning_rate": 8.881e-05, "loss": 0.5923, "step": 17771 }, { "epoch": 0.9951842311569045, "grad_norm": 1.5609163045883179, "learning_rate": 8.881500000000001e-05, "loss": 0.5225, "step": 17772 }, { "epoch": 0.9952402284690335, "grad_norm": 1.3803917169570923, "learning_rate": 8.882000000000001e-05, "loss": 0.4381, "step": 17773 }, { "epoch": 0.9952962257811625, "grad_norm": 1.325348138809204, "learning_rate": 8.8825e-05, "loss": 0.5434, "step": 17774 }, { "epoch": 0.9953522230932915, "grad_norm": 1.1791011095046997, "learning_rate": 8.883e-05, "loss": 0.3529, "step": 17775 }, { "epoch": 0.9954082204054205, "grad_norm": 1.1796578168869019, "learning_rate": 8.8835e-05, "loss": 0.4318, "step": 17776 }, { "epoch": 0.9954642177175496, "grad_norm": 1.7437174320220947, "learning_rate": 8.884e-05, "loss": 0.462, "step": 17777 }, { "epoch": 0.9955202150296786, "grad_norm": 1.3527717590332031, "learning_rate": 8.8845e-05, "loss": 0.5139, "step": 17778 }, { "epoch": 0.9955762123418076, "grad_norm": 1.0909597873687744, "learning_rate": 8.885e-05, "loss": 0.3406, "step": 17779 }, { "epoch": 0.9956322096539366, "grad_norm": 15.612887382507324, "learning_rate": 8.8855e-05, "loss": 0.5652, "step": 17780 }, { "epoch": 0.9956882069660656, "grad_norm": 1.3589320182800293, "learning_rate": 8.886e-05, "loss": 0.4209, "step": 17781 }, { "epoch": 0.9957442042781947, "grad_norm": 1.6181668043136597, "learning_rate": 8.886500000000001e-05, "loss": 0.4392, "step": 17782 }, { "epoch": 0.9958002015903237, "grad_norm": 1.5727357864379883, "learning_rate": 8.887000000000001e-05, "loss": 0.5498, "step": 17783 }, { "epoch": 0.9958561989024527, "grad_norm": 1.449902892112732, "learning_rate": 8.8875e-05, "loss": 0.3859, "step": 17784 }, { "epoch": 0.9959121962145817, "grad_norm": 1.5982789993286133, "learning_rate": 8.888e-05, "loss": 0.4078, "step": 17785 }, { "epoch": 0.9959681935267107, "grad_norm": 1.2767308950424194, "learning_rate": 8.888500000000001e-05, "loss": 0.4659, "step": 17786 }, { "epoch": 0.9960241908388398, "grad_norm": 1.3726425170898438, "learning_rate": 8.889000000000001e-05, "loss": 0.4481, "step": 17787 }, { "epoch": 0.9960801881509688, "grad_norm": 1.338931918144226, "learning_rate": 8.889500000000001e-05, "loss": 0.4569, "step": 17788 }, { "epoch": 0.9961361854630978, "grad_norm": 1.3055534362792969, "learning_rate": 8.89e-05, "loss": 0.3763, "step": 17789 }, { "epoch": 0.9961921827752268, "grad_norm": 1.2787420749664307, "learning_rate": 8.8905e-05, "loss": 0.4308, "step": 17790 }, { "epoch": 0.9962481800873558, "grad_norm": 1.5374587774276733, "learning_rate": 8.891e-05, "loss": 0.5375, "step": 17791 }, { "epoch": 0.9963041773994848, "grad_norm": 1.3534173965454102, "learning_rate": 8.8915e-05, "loss": 0.4976, "step": 17792 }, { "epoch": 0.9963601747116139, "grad_norm": 1.5223244428634644, "learning_rate": 8.892000000000001e-05, "loss": 0.4936, "step": 17793 }, { "epoch": 0.9964161720237429, "grad_norm": 1.176383137702942, "learning_rate": 8.8925e-05, "loss": 0.373, "step": 17794 }, { "epoch": 0.9964721693358719, "grad_norm": 1.2590463161468506, "learning_rate": 8.893e-05, "loss": 0.4684, "step": 17795 }, { "epoch": 0.9965281666480009, "grad_norm": 1.581672191619873, "learning_rate": 8.8935e-05, "loss": 0.5669, "step": 17796 }, { "epoch": 0.9965841639601299, "grad_norm": 2.492479085922241, "learning_rate": 8.894e-05, "loss": 0.5244, "step": 17797 }, { "epoch": 0.996640161272259, "grad_norm": 1.3283013105392456, "learning_rate": 8.8945e-05, "loss": 0.55, "step": 17798 }, { "epoch": 0.996696158584388, "grad_norm": 1.2206213474273682, "learning_rate": 8.895e-05, "loss": 0.3917, "step": 17799 }, { "epoch": 0.996752155896517, "grad_norm": 1.5171903371810913, "learning_rate": 8.8955e-05, "loss": 0.5488, "step": 17800 }, { "epoch": 0.996808153208646, "grad_norm": 1.507001280784607, "learning_rate": 8.896e-05, "loss": 0.5199, "step": 17801 }, { "epoch": 0.996864150520775, "grad_norm": 1.26813542842865, "learning_rate": 8.896500000000001e-05, "loss": 0.3859, "step": 17802 }, { "epoch": 0.9969201478329041, "grad_norm": 2.1569724082946777, "learning_rate": 8.897000000000001e-05, "loss": 0.5384, "step": 17803 }, { "epoch": 0.9969761451450331, "grad_norm": 2.1141278743743896, "learning_rate": 8.897500000000001e-05, "loss": 0.4599, "step": 17804 }, { "epoch": 0.9970321424571621, "grad_norm": 1.3248432874679565, "learning_rate": 8.898e-05, "loss": 0.4108, "step": 17805 }, { "epoch": 0.9970881397692911, "grad_norm": 1.2197864055633545, "learning_rate": 8.8985e-05, "loss": 0.4157, "step": 17806 }, { "epoch": 0.9971441370814201, "grad_norm": 1.5786558389663696, "learning_rate": 8.899e-05, "loss": 0.5085, "step": 17807 }, { "epoch": 0.9972001343935492, "grad_norm": 1.2038183212280273, "learning_rate": 8.899500000000001e-05, "loss": 0.3443, "step": 17808 }, { "epoch": 0.9972561317056782, "grad_norm": 1.4003777503967285, "learning_rate": 8.900000000000001e-05, "loss": 0.4955, "step": 17809 }, { "epoch": 0.9973121290178072, "grad_norm": 1.3404408693313599, "learning_rate": 8.9005e-05, "loss": 0.424, "step": 17810 }, { "epoch": 0.9973681263299362, "grad_norm": 1.5213918685913086, "learning_rate": 8.901e-05, "loss": 0.4937, "step": 17811 }, { "epoch": 0.9974241236420652, "grad_norm": 1.4018992185592651, "learning_rate": 8.9015e-05, "loss": 0.4621, "step": 17812 }, { "epoch": 0.9974801209541942, "grad_norm": 1.1799334287643433, "learning_rate": 8.902e-05, "loss": 0.3614, "step": 17813 }, { "epoch": 0.9975361182663233, "grad_norm": 1.24405038356781, "learning_rate": 8.902500000000001e-05, "loss": 0.4992, "step": 17814 }, { "epoch": 0.9975921155784523, "grad_norm": 1.2421824932098389, "learning_rate": 8.903e-05, "loss": 0.3443, "step": 17815 }, { "epoch": 0.9976481128905813, "grad_norm": 1.294106125831604, "learning_rate": 8.9035e-05, "loss": 0.5638, "step": 17816 }, { "epoch": 0.9977041102027102, "grad_norm": 1.2034837007522583, "learning_rate": 8.904e-05, "loss": 0.4029, "step": 17817 }, { "epoch": 0.9977601075148392, "grad_norm": 1.393774151802063, "learning_rate": 8.9045e-05, "loss": 0.5215, "step": 17818 }, { "epoch": 0.9978161048269683, "grad_norm": 5.649298667907715, "learning_rate": 8.905e-05, "loss": 0.4517, "step": 17819 }, { "epoch": 0.9978721021390973, "grad_norm": 1.5890209674835205, "learning_rate": 8.9055e-05, "loss": 0.5306, "step": 17820 }, { "epoch": 0.9979280994512263, "grad_norm": 1.5135754346847534, "learning_rate": 8.906e-05, "loss": 0.4627, "step": 17821 }, { "epoch": 0.9979840967633553, "grad_norm": 1.3312042951583862, "learning_rate": 8.906500000000002e-05, "loss": 0.4954, "step": 17822 }, { "epoch": 0.9980400940754843, "grad_norm": 1.1777615547180176, "learning_rate": 8.907000000000001e-05, "loss": 0.3609, "step": 17823 }, { "epoch": 0.9980960913876133, "grad_norm": 1.5533791780471802, "learning_rate": 8.907500000000001e-05, "loss": 0.4959, "step": 17824 }, { "epoch": 0.9981520886997424, "grad_norm": 1.2568227052688599, "learning_rate": 8.908000000000001e-05, "loss": 0.4182, "step": 17825 }, { "epoch": 0.9982080860118714, "grad_norm": 1.339856743812561, "learning_rate": 8.9085e-05, "loss": 0.4881, "step": 17826 }, { "epoch": 0.9982640833240004, "grad_norm": 1.3573658466339111, "learning_rate": 8.909e-05, "loss": 0.5631, "step": 17827 }, { "epoch": 0.9983200806361294, "grad_norm": 1.392850637435913, "learning_rate": 8.9095e-05, "loss": 0.3904, "step": 17828 }, { "epoch": 0.9983760779482584, "grad_norm": 1.140561580657959, "learning_rate": 8.910000000000001e-05, "loss": 0.3946, "step": 17829 }, { "epoch": 0.9984320752603875, "grad_norm": 1.7466707229614258, "learning_rate": 8.910500000000001e-05, "loss": 0.6854, "step": 17830 }, { "epoch": 0.9984880725725165, "grad_norm": 1.1528984308242798, "learning_rate": 8.911e-05, "loss": 0.4211, "step": 17831 }, { "epoch": 0.9985440698846455, "grad_norm": 1.7609009742736816, "learning_rate": 8.9115e-05, "loss": 0.4003, "step": 17832 }, { "epoch": 0.9986000671967745, "grad_norm": 1.2808367013931274, "learning_rate": 8.912e-05, "loss": 0.4723, "step": 17833 }, { "epoch": 0.9986560645089035, "grad_norm": 1.5797677040100098, "learning_rate": 8.9125e-05, "loss": 0.4325, "step": 17834 }, { "epoch": 0.9987120618210326, "grad_norm": 1.264971375465393, "learning_rate": 8.913000000000001e-05, "loss": 0.4001, "step": 17835 }, { "epoch": 0.9987680591331616, "grad_norm": 1.4042302370071411, "learning_rate": 8.9135e-05, "loss": 0.3977, "step": 17836 }, { "epoch": 0.9988240564452906, "grad_norm": 1.3492649793624878, "learning_rate": 8.914e-05, "loss": 0.4877, "step": 17837 }, { "epoch": 0.9988800537574196, "grad_norm": 1.1757004261016846, "learning_rate": 8.9145e-05, "loss": 0.4391, "step": 17838 }, { "epoch": 0.9989360510695486, "grad_norm": 1.409857153892517, "learning_rate": 8.915e-05, "loss": 0.4832, "step": 17839 }, { "epoch": 0.9989920483816777, "grad_norm": 1.3181517124176025, "learning_rate": 8.9155e-05, "loss": 0.3506, "step": 17840 }, { "epoch": 0.9990480456938067, "grad_norm": 1.33633553981781, "learning_rate": 8.916e-05, "loss": 0.5155, "step": 17841 }, { "epoch": 0.9991040430059357, "grad_norm": 1.723892092704773, "learning_rate": 8.9165e-05, "loss": 0.5702, "step": 17842 }, { "epoch": 0.9991600403180647, "grad_norm": 1.2972443103790283, "learning_rate": 8.917000000000002e-05, "loss": 0.4963, "step": 17843 }, { "epoch": 0.9992160376301937, "grad_norm": 1.8608943223953247, "learning_rate": 8.917500000000001e-05, "loss": 0.5375, "step": 17844 }, { "epoch": 0.9992720349423228, "grad_norm": 1.3446626663208008, "learning_rate": 8.918000000000001e-05, "loss": 0.5504, "step": 17845 }, { "epoch": 0.9993280322544518, "grad_norm": 1.269340991973877, "learning_rate": 8.918500000000001e-05, "loss": 0.4592, "step": 17846 }, { "epoch": 0.9993840295665808, "grad_norm": 1.349295735359192, "learning_rate": 8.919e-05, "loss": 0.4749, "step": 17847 }, { "epoch": 0.9994400268787098, "grad_norm": 1.5455998182296753, "learning_rate": 8.9195e-05, "loss": 0.5692, "step": 17848 }, { "epoch": 0.9994960241908388, "grad_norm": 1.5913318395614624, "learning_rate": 8.92e-05, "loss": 0.3864, "step": 17849 }, { "epoch": 0.9995520215029678, "grad_norm": 1.3120813369750977, "learning_rate": 8.920500000000001e-05, "loss": 0.4912, "step": 17850 }, { "epoch": 0.9996080188150969, "grad_norm": 1.6308808326721191, "learning_rate": 8.921000000000001e-05, "loss": 0.5462, "step": 17851 }, { "epoch": 0.9996640161272259, "grad_norm": 1.414174199104309, "learning_rate": 8.9215e-05, "loss": 0.5364, "step": 17852 }, { "epoch": 0.9997200134393549, "grad_norm": 1.3153877258300781, "learning_rate": 8.922e-05, "loss": 0.4214, "step": 17853 }, { "epoch": 0.9997760107514839, "grad_norm": 1.3772268295288086, "learning_rate": 8.9225e-05, "loss": 0.5077, "step": 17854 }, { "epoch": 0.9998320080636129, "grad_norm": 1.1395400762557983, "learning_rate": 8.923e-05, "loss": 0.4192, "step": 17855 }, { "epoch": 0.999888005375742, "grad_norm": 1.4553617238998413, "learning_rate": 8.9235e-05, "loss": 0.5172, "step": 17856 }, { "epoch": 0.999944002687871, "grad_norm": 1.2411185503005981, "learning_rate": 8.924e-05, "loss": 0.3903, "step": 17857 }, { "epoch": 1.0, "grad_norm": 3.379507541656494, "learning_rate": 8.9245e-05, "loss": 0.2554, "step": 17858 }, { "epoch": 1.000055997312129, "grad_norm": 1.1951563358306885, "learning_rate": 8.925e-05, "loss": 0.3561, "step": 17859 }, { "epoch": 1.000111994624258, "grad_norm": 1.2772942781448364, "learning_rate": 8.9255e-05, "loss": 0.4737, "step": 17860 }, { "epoch": 1.000167991936387, "grad_norm": 1.2272050380706787, "learning_rate": 8.926e-05, "loss": 0.3712, "step": 17861 }, { "epoch": 1.000223989248516, "grad_norm": 1.2424622774124146, "learning_rate": 8.9265e-05, "loss": 0.44, "step": 17862 }, { "epoch": 1.000279986560645, "grad_norm": 1.1765683889389038, "learning_rate": 8.927e-05, "loss": 0.4108, "step": 17863 }, { "epoch": 1.0003359838727741, "grad_norm": 1.3974347114562988, "learning_rate": 8.927500000000002e-05, "loss": 0.4063, "step": 17864 }, { "epoch": 1.0003919811849031, "grad_norm": 1.3876872062683105, "learning_rate": 8.928000000000001e-05, "loss": 0.3698, "step": 17865 }, { "epoch": 1.0004479784970322, "grad_norm": 1.5575116872787476, "learning_rate": 8.928500000000001e-05, "loss": 0.3858, "step": 17866 }, { "epoch": 1.0005039758091612, "grad_norm": 1.4825502634048462, "learning_rate": 8.929000000000001e-05, "loss": 0.4251, "step": 17867 }, { "epoch": 1.0005599731212902, "grad_norm": 2.1418943405151367, "learning_rate": 8.9295e-05, "loss": 0.4481, "step": 17868 }, { "epoch": 1.0006159704334192, "grad_norm": 1.5139989852905273, "learning_rate": 8.93e-05, "loss": 0.4508, "step": 17869 }, { "epoch": 1.0006719677455482, "grad_norm": 1.6377043724060059, "learning_rate": 8.9305e-05, "loss": 0.42, "step": 17870 }, { "epoch": 1.0007279650576772, "grad_norm": 1.472362995147705, "learning_rate": 8.931000000000001e-05, "loss": 0.5715, "step": 17871 }, { "epoch": 1.0007839623698063, "grad_norm": 1.52193284034729, "learning_rate": 8.931500000000001e-05, "loss": 0.517, "step": 17872 }, { "epoch": 1.0008399596819353, "grad_norm": 2.265660047531128, "learning_rate": 8.932e-05, "loss": 0.4281, "step": 17873 }, { "epoch": 1.0008959569940643, "grad_norm": 1.5788400173187256, "learning_rate": 8.9325e-05, "loss": 0.4571, "step": 17874 }, { "epoch": 1.0009519543061933, "grad_norm": 1.347461223602295, "learning_rate": 8.933e-05, "loss": 0.3671, "step": 17875 }, { "epoch": 1.0010079516183223, "grad_norm": 1.3150992393493652, "learning_rate": 8.9335e-05, "loss": 0.4848, "step": 17876 }, { "epoch": 1.0010639489304514, "grad_norm": 1.1444026231765747, "learning_rate": 8.934e-05, "loss": 0.3704, "step": 17877 }, { "epoch": 1.0011199462425804, "grad_norm": 1.5898854732513428, "learning_rate": 8.9345e-05, "loss": 0.4398, "step": 17878 }, { "epoch": 1.0011759435547094, "grad_norm": 1.4028594493865967, "learning_rate": 8.935e-05, "loss": 0.3731, "step": 17879 }, { "epoch": 1.0012319408668384, "grad_norm": 1.3735319375991821, "learning_rate": 8.9355e-05, "loss": 0.4651, "step": 17880 }, { "epoch": 1.0012879381789674, "grad_norm": 1.1216611862182617, "learning_rate": 8.936e-05, "loss": 0.4876, "step": 17881 }, { "epoch": 1.0013439354910965, "grad_norm": 1.6765284538269043, "learning_rate": 8.936500000000001e-05, "loss": 0.6061, "step": 17882 }, { "epoch": 1.0013999328032255, "grad_norm": 1.298638939857483, "learning_rate": 8.937e-05, "loss": 0.3629, "step": 17883 }, { "epoch": 1.0014559301153545, "grad_norm": 1.259941577911377, "learning_rate": 8.9375e-05, "loss": 0.2662, "step": 17884 }, { "epoch": 1.0015119274274835, "grad_norm": 1.2943047285079956, "learning_rate": 8.938e-05, "loss": 0.4017, "step": 17885 }, { "epoch": 1.0015679247396125, "grad_norm": 1.2645395994186401, "learning_rate": 8.938500000000001e-05, "loss": 0.4045, "step": 17886 }, { "epoch": 1.0016239220517416, "grad_norm": 1.259494662284851, "learning_rate": 8.939000000000001e-05, "loss": 0.3124, "step": 17887 }, { "epoch": 1.0016799193638706, "grad_norm": 1.5386265516281128, "learning_rate": 8.939500000000001e-05, "loss": 0.5339, "step": 17888 }, { "epoch": 1.0017359166759996, "grad_norm": 2.078693389892578, "learning_rate": 8.94e-05, "loss": 0.414, "step": 17889 }, { "epoch": 1.0017919139881286, "grad_norm": 1.1553360223770142, "learning_rate": 8.9405e-05, "loss": 0.478, "step": 17890 }, { "epoch": 1.0018479113002576, "grad_norm": 1.3309661149978638, "learning_rate": 8.941e-05, "loss": 0.4211, "step": 17891 }, { "epoch": 1.0019039086123867, "grad_norm": 1.216221809387207, "learning_rate": 8.941500000000001e-05, "loss": 0.5105, "step": 17892 }, { "epoch": 1.0019599059245157, "grad_norm": 1.3124853372573853, "learning_rate": 8.942000000000001e-05, "loss": 0.5729, "step": 17893 }, { "epoch": 1.0020159032366447, "grad_norm": 1.3122268915176392, "learning_rate": 8.9425e-05, "loss": 0.4651, "step": 17894 }, { "epoch": 1.0020719005487737, "grad_norm": 1.1882063150405884, "learning_rate": 8.943e-05, "loss": 0.3396, "step": 17895 }, { "epoch": 1.0021278978609027, "grad_norm": 1.3756940364837646, "learning_rate": 8.9435e-05, "loss": 0.3477, "step": 17896 }, { "epoch": 1.0021838951730317, "grad_norm": 1.2090256214141846, "learning_rate": 8.944e-05, "loss": 0.4787, "step": 17897 }, { "epoch": 1.0022398924851608, "grad_norm": 1.4074785709381104, "learning_rate": 8.9445e-05, "loss": 0.5455, "step": 17898 }, { "epoch": 1.0022958897972898, "grad_norm": 1.383531928062439, "learning_rate": 8.945e-05, "loss": 0.4804, "step": 17899 }, { "epoch": 1.0023518871094188, "grad_norm": 1.4040472507476807, "learning_rate": 8.9455e-05, "loss": 0.5287, "step": 17900 }, { "epoch": 1.0024078844215478, "grad_norm": 1.351027488708496, "learning_rate": 8.946e-05, "loss": 0.3817, "step": 17901 }, { "epoch": 1.0024638817336768, "grad_norm": 1.2312889099121094, "learning_rate": 8.9465e-05, "loss": 0.4973, "step": 17902 }, { "epoch": 1.0025198790458059, "grad_norm": 1.5979794263839722, "learning_rate": 8.947000000000001e-05, "loss": 0.5239, "step": 17903 }, { "epoch": 1.0025758763579349, "grad_norm": 1.2218527793884277, "learning_rate": 8.9475e-05, "loss": 0.5034, "step": 17904 }, { "epoch": 1.002631873670064, "grad_norm": 1.7417958974838257, "learning_rate": 8.948e-05, "loss": 0.5016, "step": 17905 }, { "epoch": 1.002687870982193, "grad_norm": 1.3673913478851318, "learning_rate": 8.9485e-05, "loss": 0.3737, "step": 17906 }, { "epoch": 1.002743868294322, "grad_norm": 1.341664433479309, "learning_rate": 8.949000000000001e-05, "loss": 0.4445, "step": 17907 }, { "epoch": 1.002799865606451, "grad_norm": 1.3660603761672974, "learning_rate": 8.949500000000001e-05, "loss": 0.4294, "step": 17908 }, { "epoch": 1.00285586291858, "grad_norm": 1.244755744934082, "learning_rate": 8.950000000000001e-05, "loss": 0.5295, "step": 17909 }, { "epoch": 1.002911860230709, "grad_norm": 1.594760775566101, "learning_rate": 8.9505e-05, "loss": 0.3796, "step": 17910 }, { "epoch": 1.002967857542838, "grad_norm": 1.0852961540222168, "learning_rate": 8.951e-05, "loss": 0.3523, "step": 17911 }, { "epoch": 1.003023854854967, "grad_norm": 1.42945396900177, "learning_rate": 8.9515e-05, "loss": 0.4033, "step": 17912 }, { "epoch": 1.003079852167096, "grad_norm": 1.4561173915863037, "learning_rate": 8.952000000000001e-05, "loss": 0.3425, "step": 17913 }, { "epoch": 1.003135849479225, "grad_norm": 1.454376459121704, "learning_rate": 8.952500000000001e-05, "loss": 0.368, "step": 17914 }, { "epoch": 1.003191846791354, "grad_norm": 1.29525887966156, "learning_rate": 8.953e-05, "loss": 0.3543, "step": 17915 }, { "epoch": 1.003247844103483, "grad_norm": 1.374119520187378, "learning_rate": 8.9535e-05, "loss": 0.486, "step": 17916 }, { "epoch": 1.0033038414156121, "grad_norm": 1.313292384147644, "learning_rate": 8.954e-05, "loss": 0.3136, "step": 17917 }, { "epoch": 1.0033598387277411, "grad_norm": 1.3198938369750977, "learning_rate": 8.9545e-05, "loss": 0.3961, "step": 17918 }, { "epoch": 1.0034158360398702, "grad_norm": 1.3486634492874146, "learning_rate": 8.955e-05, "loss": 0.3769, "step": 17919 }, { "epoch": 1.0034718333519992, "grad_norm": 1.2486048936843872, "learning_rate": 8.9555e-05, "loss": 0.3641, "step": 17920 }, { "epoch": 1.0035278306641282, "grad_norm": 1.2347756624221802, "learning_rate": 8.956e-05, "loss": 0.4007, "step": 17921 }, { "epoch": 1.0035838279762572, "grad_norm": 1.5285927057266235, "learning_rate": 8.9565e-05, "loss": 0.5858, "step": 17922 }, { "epoch": 1.0036398252883862, "grad_norm": 1.3821576833724976, "learning_rate": 8.957000000000001e-05, "loss": 0.3995, "step": 17923 }, { "epoch": 1.0036958226005153, "grad_norm": 1.405597448348999, "learning_rate": 8.957500000000001e-05, "loss": 0.423, "step": 17924 }, { "epoch": 1.0037518199126443, "grad_norm": 1.061468243598938, "learning_rate": 8.958e-05, "loss": 0.3636, "step": 17925 }, { "epoch": 1.0038078172247733, "grad_norm": 1.2281023263931274, "learning_rate": 8.9585e-05, "loss": 0.3992, "step": 17926 }, { "epoch": 1.0038638145369023, "grad_norm": 1.4296306371688843, "learning_rate": 8.959e-05, "loss": 0.4056, "step": 17927 }, { "epoch": 1.0039198118490313, "grad_norm": 1.3915297985076904, "learning_rate": 8.959500000000001e-05, "loss": 0.4944, "step": 17928 }, { "epoch": 1.0039758091611604, "grad_norm": 1.2303251028060913, "learning_rate": 8.960000000000001e-05, "loss": 0.4113, "step": 17929 }, { "epoch": 1.0040318064732894, "grad_norm": 1.4397661685943604, "learning_rate": 8.960500000000001e-05, "loss": 0.4788, "step": 17930 }, { "epoch": 1.0040878037854184, "grad_norm": 1.4920543432235718, "learning_rate": 8.961e-05, "loss": 0.4156, "step": 17931 }, { "epoch": 1.0041438010975474, "grad_norm": 1.8774982690811157, "learning_rate": 8.9615e-05, "loss": 0.4992, "step": 17932 }, { "epoch": 1.0041997984096764, "grad_norm": 1.4324315786361694, "learning_rate": 8.962e-05, "loss": 0.3682, "step": 17933 }, { "epoch": 1.0042557957218055, "grad_norm": 1.3314342498779297, "learning_rate": 8.962500000000001e-05, "loss": 0.4155, "step": 17934 }, { "epoch": 1.0043117930339345, "grad_norm": 1.4088034629821777, "learning_rate": 8.963000000000001e-05, "loss": 0.497, "step": 17935 }, { "epoch": 1.0043677903460635, "grad_norm": 1.3769229650497437, "learning_rate": 8.9635e-05, "loss": 0.4401, "step": 17936 }, { "epoch": 1.0044237876581925, "grad_norm": 1.2523490190505981, "learning_rate": 8.964e-05, "loss": 0.3812, "step": 17937 }, { "epoch": 1.0044797849703215, "grad_norm": 1.2642509937286377, "learning_rate": 8.9645e-05, "loss": 0.4266, "step": 17938 }, { "epoch": 1.0045357822824506, "grad_norm": 1.5512166023254395, "learning_rate": 8.965e-05, "loss": 0.4254, "step": 17939 }, { "epoch": 1.0045917795945796, "grad_norm": 1.2381703853607178, "learning_rate": 8.9655e-05, "loss": 0.3338, "step": 17940 }, { "epoch": 1.0046477769067084, "grad_norm": 1.2523268461227417, "learning_rate": 8.966e-05, "loss": 0.4377, "step": 17941 }, { "epoch": 1.0047037742188374, "grad_norm": 1.6331632137298584, "learning_rate": 8.9665e-05, "loss": 0.5569, "step": 17942 }, { "epoch": 1.0047597715309664, "grad_norm": 1.2084189653396606, "learning_rate": 8.967000000000001e-05, "loss": 0.5117, "step": 17943 }, { "epoch": 1.0048157688430954, "grad_norm": 1.5852941274642944, "learning_rate": 8.967500000000001e-05, "loss": 0.4931, "step": 17944 }, { "epoch": 1.0048717661552244, "grad_norm": 1.2750952243804932, "learning_rate": 8.968000000000001e-05, "loss": 0.4169, "step": 17945 }, { "epoch": 1.0049277634673535, "grad_norm": 1.368571162223816, "learning_rate": 8.9685e-05, "loss": 0.4283, "step": 17946 }, { "epoch": 1.0049837607794825, "grad_norm": 1.2909531593322754, "learning_rate": 8.969e-05, "loss": 0.4161, "step": 17947 }, { "epoch": 1.0050397580916115, "grad_norm": 1.4677278995513916, "learning_rate": 8.9695e-05, "loss": 0.3967, "step": 17948 }, { "epoch": 1.0050957554037405, "grad_norm": 1.840155839920044, "learning_rate": 8.970000000000001e-05, "loss": 0.4164, "step": 17949 }, { "epoch": 1.0051517527158695, "grad_norm": 1.129811406135559, "learning_rate": 8.970500000000001e-05, "loss": 0.3269, "step": 17950 }, { "epoch": 1.0052077500279986, "grad_norm": 1.4732576608657837, "learning_rate": 8.971e-05, "loss": 0.4069, "step": 17951 }, { "epoch": 1.0052637473401276, "grad_norm": 1.497945785522461, "learning_rate": 8.9715e-05, "loss": 0.4268, "step": 17952 }, { "epoch": 1.0053197446522566, "grad_norm": 1.3697718381881714, "learning_rate": 8.972e-05, "loss": 0.467, "step": 17953 }, { "epoch": 1.0053757419643856, "grad_norm": 1.3891284465789795, "learning_rate": 8.9725e-05, "loss": 0.3637, "step": 17954 }, { "epoch": 1.0054317392765146, "grad_norm": 1.3554348945617676, "learning_rate": 8.973e-05, "loss": 0.5177, "step": 17955 }, { "epoch": 1.0054877365886437, "grad_norm": 1.5390160083770752, "learning_rate": 8.973500000000001e-05, "loss": 0.4884, "step": 17956 }, { "epoch": 1.0055437339007727, "grad_norm": 1.1982821226119995, "learning_rate": 8.974e-05, "loss": 0.4356, "step": 17957 }, { "epoch": 1.0055997312129017, "grad_norm": 1.4162977933883667, "learning_rate": 8.9745e-05, "loss": 0.4273, "step": 17958 }, { "epoch": 1.0056557285250307, "grad_norm": 1.566547155380249, "learning_rate": 8.975e-05, "loss": 0.5884, "step": 17959 }, { "epoch": 1.0057117258371597, "grad_norm": 1.2957085371017456, "learning_rate": 8.9755e-05, "loss": 0.3804, "step": 17960 }, { "epoch": 1.0057677231492888, "grad_norm": 1.5151442289352417, "learning_rate": 8.976e-05, "loss": 0.5292, "step": 17961 }, { "epoch": 1.0058237204614178, "grad_norm": 1.4708261489868164, "learning_rate": 8.9765e-05, "loss": 0.4407, "step": 17962 }, { "epoch": 1.0058797177735468, "grad_norm": 1.3044770956039429, "learning_rate": 8.977000000000002e-05, "loss": 0.6537, "step": 17963 }, { "epoch": 1.0059357150856758, "grad_norm": 1.1265450716018677, "learning_rate": 8.977500000000001e-05, "loss": 0.4736, "step": 17964 }, { "epoch": 1.0059917123978048, "grad_norm": 1.3219960927963257, "learning_rate": 8.978000000000001e-05, "loss": 0.3908, "step": 17965 }, { "epoch": 1.0060477097099338, "grad_norm": 1.1987253427505493, "learning_rate": 8.978500000000001e-05, "loss": 0.4455, "step": 17966 }, { "epoch": 1.0061037070220629, "grad_norm": 1.7745444774627686, "learning_rate": 8.979e-05, "loss": 0.5543, "step": 17967 }, { "epoch": 1.0061597043341919, "grad_norm": 1.3833746910095215, "learning_rate": 8.9795e-05, "loss": 0.5038, "step": 17968 }, { "epoch": 1.006215701646321, "grad_norm": 1.6064419746398926, "learning_rate": 8.98e-05, "loss": 0.4707, "step": 17969 }, { "epoch": 1.00627169895845, "grad_norm": 1.8970528841018677, "learning_rate": 8.980500000000001e-05, "loss": 0.532, "step": 17970 }, { "epoch": 1.006327696270579, "grad_norm": 1.2309457063674927, "learning_rate": 8.981000000000001e-05, "loss": 0.375, "step": 17971 }, { "epoch": 1.006383693582708, "grad_norm": 1.2881358861923218, "learning_rate": 8.9815e-05, "loss": 0.4442, "step": 17972 }, { "epoch": 1.006439690894837, "grad_norm": 2.184856653213501, "learning_rate": 8.982e-05, "loss": 0.3165, "step": 17973 }, { "epoch": 1.006495688206966, "grad_norm": 1.2457536458969116, "learning_rate": 8.9825e-05, "loss": 0.3519, "step": 17974 }, { "epoch": 1.006551685519095, "grad_norm": 1.2922468185424805, "learning_rate": 8.983e-05, "loss": 0.361, "step": 17975 }, { "epoch": 1.006607682831224, "grad_norm": 1.3228814601898193, "learning_rate": 8.9835e-05, "loss": 0.377, "step": 17976 }, { "epoch": 1.006663680143353, "grad_norm": 1.4710068702697754, "learning_rate": 8.984000000000001e-05, "loss": 0.4726, "step": 17977 }, { "epoch": 1.006719677455482, "grad_norm": 1.1677091121673584, "learning_rate": 8.9845e-05, "loss": 0.5768, "step": 17978 }, { "epoch": 1.006775674767611, "grad_norm": 1.220658302307129, "learning_rate": 8.985e-05, "loss": 0.4234, "step": 17979 }, { "epoch": 1.0068316720797401, "grad_norm": 1.3482164144515991, "learning_rate": 8.9855e-05, "loss": 0.3233, "step": 17980 }, { "epoch": 1.0068876693918691, "grad_norm": 1.226611614227295, "learning_rate": 8.986e-05, "loss": 0.4663, "step": 17981 }, { "epoch": 1.0069436667039982, "grad_norm": 2.298543691635132, "learning_rate": 8.9865e-05, "loss": 0.4426, "step": 17982 }, { "epoch": 1.0069996640161272, "grad_norm": 1.257027268409729, "learning_rate": 8.987e-05, "loss": 0.454, "step": 17983 }, { "epoch": 1.0070556613282562, "grad_norm": 1.2130361795425415, "learning_rate": 8.9875e-05, "loss": 0.5599, "step": 17984 }, { "epoch": 1.0071116586403852, "grad_norm": 1.4071629047393799, "learning_rate": 8.988000000000001e-05, "loss": 0.4986, "step": 17985 }, { "epoch": 1.0071676559525142, "grad_norm": 1.417580485343933, "learning_rate": 8.988500000000001e-05, "loss": 0.426, "step": 17986 }, { "epoch": 1.0072236532646432, "grad_norm": 1.2887449264526367, "learning_rate": 8.989000000000001e-05, "loss": 0.3997, "step": 17987 }, { "epoch": 1.0072796505767723, "grad_norm": 0.9595540165901184, "learning_rate": 8.9895e-05, "loss": 0.2766, "step": 17988 }, { "epoch": 1.0073356478889013, "grad_norm": 1.271773099899292, "learning_rate": 8.99e-05, "loss": 0.4387, "step": 17989 }, { "epoch": 1.0073916452010303, "grad_norm": 1.2614787817001343, "learning_rate": 8.9905e-05, "loss": 0.3617, "step": 17990 }, { "epoch": 1.0074476425131593, "grad_norm": 1.2842191457748413, "learning_rate": 8.991000000000001e-05, "loss": 0.3945, "step": 17991 }, { "epoch": 1.0075036398252883, "grad_norm": 1.3162118196487427, "learning_rate": 8.991500000000001e-05, "loss": 0.402, "step": 17992 }, { "epoch": 1.0075596371374174, "grad_norm": 1.1518367528915405, "learning_rate": 8.992e-05, "loss": 0.453, "step": 17993 }, { "epoch": 1.0076156344495464, "grad_norm": 1.467724323272705, "learning_rate": 8.9925e-05, "loss": 0.3989, "step": 17994 }, { "epoch": 1.0076716317616754, "grad_norm": 1.5824946165084839, "learning_rate": 8.993e-05, "loss": 0.4059, "step": 17995 }, { "epoch": 1.0077276290738044, "grad_norm": 1.2596561908721924, "learning_rate": 8.9935e-05, "loss": 0.3671, "step": 17996 }, { "epoch": 1.0077836263859334, "grad_norm": 1.2339483499526978, "learning_rate": 8.994e-05, "loss": 0.359, "step": 17997 }, { "epoch": 1.0078396236980625, "grad_norm": 1.5595121383666992, "learning_rate": 8.994500000000001e-05, "loss": 0.4069, "step": 17998 }, { "epoch": 1.0078956210101915, "grad_norm": 1.2758089303970337, "learning_rate": 8.995e-05, "loss": 0.5216, "step": 17999 }, { "epoch": 1.0079516183223205, "grad_norm": 1.5260242223739624, "learning_rate": 8.9955e-05, "loss": 0.49, "step": 18000 }, { "epoch": 1.0080076156344495, "grad_norm": 1.3660736083984375, "learning_rate": 8.996e-05, "loss": 0.3987, "step": 18001 }, { "epoch": 1.0080636129465785, "grad_norm": 1.0238418579101562, "learning_rate": 8.9965e-05, "loss": 0.3057, "step": 18002 }, { "epoch": 1.0081196102587076, "grad_norm": 1.3377869129180908, "learning_rate": 8.997000000000001e-05, "loss": 0.5516, "step": 18003 }, { "epoch": 1.0081756075708366, "grad_norm": 1.399433970451355, "learning_rate": 8.9975e-05, "loss": 0.433, "step": 18004 }, { "epoch": 1.0082316048829656, "grad_norm": 1.4594414234161377, "learning_rate": 8.998e-05, "loss": 0.3464, "step": 18005 }, { "epoch": 1.0082876021950946, "grad_norm": 1.1530085802078247, "learning_rate": 8.998500000000001e-05, "loss": 0.3205, "step": 18006 }, { "epoch": 1.0083435995072236, "grad_norm": 1.2954506874084473, "learning_rate": 8.999000000000001e-05, "loss": 0.3969, "step": 18007 }, { "epoch": 1.0083995968193527, "grad_norm": 1.3576916456222534, "learning_rate": 8.999500000000001e-05, "loss": 0.4051, "step": 18008 }, { "epoch": 1.0084555941314817, "grad_norm": 1.2132610082626343, "learning_rate": 9e-05, "loss": 0.4627, "step": 18009 }, { "epoch": 1.0085115914436107, "grad_norm": 1.4459677934646606, "learning_rate": 9.0005e-05, "loss": 0.4382, "step": 18010 }, { "epoch": 1.0085675887557397, "grad_norm": 1.4301097393035889, "learning_rate": 9.001e-05, "loss": 0.477, "step": 18011 }, { "epoch": 1.0086235860678687, "grad_norm": 1.1272304058074951, "learning_rate": 9.001500000000001e-05, "loss": 0.3235, "step": 18012 }, { "epoch": 1.0086795833799977, "grad_norm": 1.4270473718643188, "learning_rate": 9.002000000000001e-05, "loss": 0.4742, "step": 18013 }, { "epoch": 1.0087355806921268, "grad_norm": 1.3131418228149414, "learning_rate": 9.0025e-05, "loss": 0.3297, "step": 18014 }, { "epoch": 1.0087915780042558, "grad_norm": 1.2703737020492554, "learning_rate": 9.003e-05, "loss": 0.3442, "step": 18015 }, { "epoch": 1.0088475753163848, "grad_norm": 1.1593832969665527, "learning_rate": 9.0035e-05, "loss": 0.3833, "step": 18016 }, { "epoch": 1.0089035726285138, "grad_norm": 1.2208229303359985, "learning_rate": 9.004e-05, "loss": 0.2724, "step": 18017 }, { "epoch": 1.0089595699406428, "grad_norm": 1.154589056968689, "learning_rate": 9.0045e-05, "loss": 0.3801, "step": 18018 }, { "epoch": 1.0090155672527719, "grad_norm": 1.2062815427780151, "learning_rate": 9.005000000000001e-05, "loss": 0.3698, "step": 18019 }, { "epoch": 1.0090715645649009, "grad_norm": 1.1477242708206177, "learning_rate": 9.0055e-05, "loss": 0.3661, "step": 18020 }, { "epoch": 1.00912756187703, "grad_norm": 1.3812172412872314, "learning_rate": 9.006e-05, "loss": 0.4077, "step": 18021 }, { "epoch": 1.009183559189159, "grad_norm": 1.115505337715149, "learning_rate": 9.0065e-05, "loss": 0.4108, "step": 18022 }, { "epoch": 1.009239556501288, "grad_norm": 1.1311782598495483, "learning_rate": 9.007e-05, "loss": 0.3722, "step": 18023 }, { "epoch": 1.009295553813417, "grad_norm": 1.1441736221313477, "learning_rate": 9.007500000000001e-05, "loss": 0.3694, "step": 18024 }, { "epoch": 1.009351551125546, "grad_norm": 1.6131408214569092, "learning_rate": 9.008e-05, "loss": 0.4297, "step": 18025 }, { "epoch": 1.009407548437675, "grad_norm": 1.1815921068191528, "learning_rate": 9.0085e-05, "loss": 0.4238, "step": 18026 }, { "epoch": 1.009463545749804, "grad_norm": 1.164927363395691, "learning_rate": 9.009000000000001e-05, "loss": 0.4517, "step": 18027 }, { "epoch": 1.009519543061933, "grad_norm": 1.2917762994766235, "learning_rate": 9.009500000000001e-05, "loss": 0.3382, "step": 18028 }, { "epoch": 1.009575540374062, "grad_norm": 1.2980742454528809, "learning_rate": 9.010000000000001e-05, "loss": 0.4674, "step": 18029 }, { "epoch": 1.009631537686191, "grad_norm": 1.3152427673339844, "learning_rate": 9.0105e-05, "loss": 0.5338, "step": 18030 }, { "epoch": 1.00968753499832, "grad_norm": 1.1749911308288574, "learning_rate": 9.011e-05, "loss": 0.5392, "step": 18031 }, { "epoch": 1.009743532310449, "grad_norm": 1.306301236152649, "learning_rate": 9.0115e-05, "loss": 0.4575, "step": 18032 }, { "epoch": 1.0097995296225781, "grad_norm": 1.0710113048553467, "learning_rate": 9.012e-05, "loss": 0.3521, "step": 18033 }, { "epoch": 1.0098555269347071, "grad_norm": 1.607362985610962, "learning_rate": 9.012500000000001e-05, "loss": 0.4986, "step": 18034 }, { "epoch": 1.0099115242468362, "grad_norm": 1.424028992652893, "learning_rate": 9.013e-05, "loss": 0.4623, "step": 18035 }, { "epoch": 1.0099675215589652, "grad_norm": 1.2317618131637573, "learning_rate": 9.0135e-05, "loss": 0.3437, "step": 18036 }, { "epoch": 1.0100235188710942, "grad_norm": 1.0915732383728027, "learning_rate": 9.014e-05, "loss": 0.3834, "step": 18037 }, { "epoch": 1.0100795161832232, "grad_norm": 1.811660647392273, "learning_rate": 9.0145e-05, "loss": 0.3943, "step": 18038 }, { "epoch": 1.0101355134953522, "grad_norm": 1.3503577709197998, "learning_rate": 9.015e-05, "loss": 0.4379, "step": 18039 }, { "epoch": 1.0101915108074813, "grad_norm": 1.38282310962677, "learning_rate": 9.0155e-05, "loss": 0.5986, "step": 18040 }, { "epoch": 1.0102475081196103, "grad_norm": 1.1291764974594116, "learning_rate": 9.016e-05, "loss": 0.3217, "step": 18041 }, { "epoch": 1.0103035054317393, "grad_norm": 1.450632929801941, "learning_rate": 9.0165e-05, "loss": 0.3424, "step": 18042 }, { "epoch": 1.0103595027438683, "grad_norm": 1.3379839658737183, "learning_rate": 9.017e-05, "loss": 0.3377, "step": 18043 }, { "epoch": 1.0104155000559973, "grad_norm": 1.598143458366394, "learning_rate": 9.017500000000001e-05, "loss": 0.4294, "step": 18044 }, { "epoch": 1.0104714973681264, "grad_norm": 1.1232203245162964, "learning_rate": 9.018000000000001e-05, "loss": 0.3088, "step": 18045 }, { "epoch": 1.0105274946802554, "grad_norm": 1.3722296953201294, "learning_rate": 9.0185e-05, "loss": 0.466, "step": 18046 }, { "epoch": 1.0105834919923844, "grad_norm": 2.807011127471924, "learning_rate": 9.019e-05, "loss": 0.5928, "step": 18047 }, { "epoch": 1.0106394893045134, "grad_norm": 1.2643423080444336, "learning_rate": 9.019500000000001e-05, "loss": 0.4106, "step": 18048 }, { "epoch": 1.0106954866166424, "grad_norm": 1.3486216068267822, "learning_rate": 9.020000000000001e-05, "loss": 0.4426, "step": 18049 }, { "epoch": 1.0107514839287715, "grad_norm": 1.446391224861145, "learning_rate": 9.020500000000001e-05, "loss": 0.3916, "step": 18050 }, { "epoch": 1.0108074812409005, "grad_norm": 1.3402318954467773, "learning_rate": 9.021e-05, "loss": 0.3619, "step": 18051 }, { "epoch": 1.0108634785530295, "grad_norm": 1.302295446395874, "learning_rate": 9.0215e-05, "loss": 0.2946, "step": 18052 }, { "epoch": 1.0109194758651585, "grad_norm": 1.367607831954956, "learning_rate": 9.022e-05, "loss": 0.4433, "step": 18053 }, { "epoch": 1.0109754731772875, "grad_norm": 1.4880040884017944, "learning_rate": 9.0225e-05, "loss": 0.4124, "step": 18054 }, { "epoch": 1.0110314704894166, "grad_norm": 1.5772989988327026, "learning_rate": 9.023000000000001e-05, "loss": 0.5283, "step": 18055 }, { "epoch": 1.0110874678015456, "grad_norm": 1.2875374555587769, "learning_rate": 9.0235e-05, "loss": 0.5414, "step": 18056 }, { "epoch": 1.0111434651136746, "grad_norm": 1.363318920135498, "learning_rate": 9.024e-05, "loss": 0.484, "step": 18057 }, { "epoch": 1.0111994624258036, "grad_norm": 1.4215304851531982, "learning_rate": 9.0245e-05, "loss": 0.3954, "step": 18058 }, { "epoch": 1.0112554597379326, "grad_norm": 1.273647665977478, "learning_rate": 9.025e-05, "loss": 0.3881, "step": 18059 }, { "epoch": 1.0113114570500616, "grad_norm": 1.1219748258590698, "learning_rate": 9.0255e-05, "loss": 0.432, "step": 18060 }, { "epoch": 1.0113674543621907, "grad_norm": 1.283856749534607, "learning_rate": 9.026e-05, "loss": 0.3797, "step": 18061 }, { "epoch": 1.0114234516743197, "grad_norm": 1.2342506647109985, "learning_rate": 9.0265e-05, "loss": 0.3694, "step": 18062 }, { "epoch": 1.0114794489864487, "grad_norm": 1.2998380661010742, "learning_rate": 9.027e-05, "loss": 0.3719, "step": 18063 }, { "epoch": 1.0115354462985777, "grad_norm": 1.250680685043335, "learning_rate": 9.027500000000001e-05, "loss": 0.4127, "step": 18064 }, { "epoch": 1.0115914436107067, "grad_norm": 1.2504264116287231, "learning_rate": 9.028000000000001e-05, "loss": 0.4795, "step": 18065 }, { "epoch": 1.0116474409228358, "grad_norm": 1.4863789081573486, "learning_rate": 9.028500000000001e-05, "loss": 0.4474, "step": 18066 }, { "epoch": 1.0117034382349648, "grad_norm": 1.2361491918563843, "learning_rate": 9.029e-05, "loss": 0.4698, "step": 18067 }, { "epoch": 1.0117594355470938, "grad_norm": 1.6292223930358887, "learning_rate": 9.0295e-05, "loss": 0.4569, "step": 18068 }, { "epoch": 1.0118154328592228, "grad_norm": 1.5858267545700073, "learning_rate": 9.030000000000001e-05, "loss": 0.3559, "step": 18069 }, { "epoch": 1.0118714301713518, "grad_norm": 1.1331160068511963, "learning_rate": 9.030500000000001e-05, "loss": 0.295, "step": 18070 }, { "epoch": 1.0119274274834809, "grad_norm": 1.5241893529891968, "learning_rate": 9.031000000000001e-05, "loss": 0.4411, "step": 18071 }, { "epoch": 1.0119834247956099, "grad_norm": 1.3934721946716309, "learning_rate": 9.0315e-05, "loss": 0.5948, "step": 18072 }, { "epoch": 1.012039422107739, "grad_norm": 1.6788913011550903, "learning_rate": 9.032e-05, "loss": 0.3659, "step": 18073 }, { "epoch": 1.012095419419868, "grad_norm": 1.335060715675354, "learning_rate": 9.0325e-05, "loss": 0.31, "step": 18074 }, { "epoch": 1.012151416731997, "grad_norm": 1.4532748460769653, "learning_rate": 9.033e-05, "loss": 0.524, "step": 18075 }, { "epoch": 1.012207414044126, "grad_norm": 1.3463493585586548, "learning_rate": 9.033500000000001e-05, "loss": 0.3406, "step": 18076 }, { "epoch": 1.012263411356255, "grad_norm": 1.3908874988555908, "learning_rate": 9.034e-05, "loss": 0.3856, "step": 18077 }, { "epoch": 1.012319408668384, "grad_norm": 1.3213330507278442, "learning_rate": 9.0345e-05, "loss": 0.3887, "step": 18078 }, { "epoch": 1.012375405980513, "grad_norm": 1.2592127323150635, "learning_rate": 9.035e-05, "loss": 0.3366, "step": 18079 }, { "epoch": 1.012431403292642, "grad_norm": 1.0710480213165283, "learning_rate": 9.0355e-05, "loss": 0.3704, "step": 18080 }, { "epoch": 1.012487400604771, "grad_norm": 1.4461910724639893, "learning_rate": 9.036e-05, "loss": 0.5877, "step": 18081 }, { "epoch": 1.0125433979169, "grad_norm": 1.5060564279556274, "learning_rate": 9.0365e-05, "loss": 0.5551, "step": 18082 }, { "epoch": 1.012599395229029, "grad_norm": 1.3501626253128052, "learning_rate": 9.037e-05, "loss": 0.4159, "step": 18083 }, { "epoch": 1.012655392541158, "grad_norm": 1.2449951171875, "learning_rate": 9.037500000000001e-05, "loss": 0.4655, "step": 18084 }, { "epoch": 1.0127113898532871, "grad_norm": 1.2275044918060303, "learning_rate": 9.038000000000001e-05, "loss": 0.3742, "step": 18085 }, { "epoch": 1.0127673871654161, "grad_norm": 1.4492491483688354, "learning_rate": 9.038500000000001e-05, "loss": 0.4606, "step": 18086 }, { "epoch": 1.0128233844775452, "grad_norm": 1.2193833589553833, "learning_rate": 9.039000000000001e-05, "loss": 0.447, "step": 18087 }, { "epoch": 1.0128793817896742, "grad_norm": 1.247789978981018, "learning_rate": 9.0395e-05, "loss": 0.446, "step": 18088 }, { "epoch": 1.0129353791018032, "grad_norm": 1.3559398651123047, "learning_rate": 9.04e-05, "loss": 0.5124, "step": 18089 }, { "epoch": 1.0129913764139322, "grad_norm": 1.4070031642913818, "learning_rate": 9.040500000000001e-05, "loss": 0.39, "step": 18090 }, { "epoch": 1.0130473737260612, "grad_norm": 1.2927995920181274, "learning_rate": 9.041000000000001e-05, "loss": 0.3433, "step": 18091 }, { "epoch": 1.0131033710381903, "grad_norm": 1.4199851751327515, "learning_rate": 9.041500000000001e-05, "loss": 0.3848, "step": 18092 }, { "epoch": 1.0131593683503193, "grad_norm": 1.090008020401001, "learning_rate": 9.042e-05, "loss": 0.3827, "step": 18093 }, { "epoch": 1.0132153656624483, "grad_norm": 1.307547926902771, "learning_rate": 9.0425e-05, "loss": 0.4308, "step": 18094 }, { "epoch": 1.0132713629745773, "grad_norm": 1.404300332069397, "learning_rate": 9.043e-05, "loss": 0.5085, "step": 18095 }, { "epoch": 1.0133273602867063, "grad_norm": 1.1837518215179443, "learning_rate": 9.0435e-05, "loss": 0.404, "step": 18096 }, { "epoch": 1.0133833575988354, "grad_norm": 1.3322854042053223, "learning_rate": 9.044000000000001e-05, "loss": 0.4325, "step": 18097 }, { "epoch": 1.0134393549109644, "grad_norm": 1.3566287755966187, "learning_rate": 9.0445e-05, "loss": 0.5156, "step": 18098 }, { "epoch": 1.0134953522230934, "grad_norm": 1.5615484714508057, "learning_rate": 9.045e-05, "loss": 0.3841, "step": 18099 }, { "epoch": 1.0135513495352224, "grad_norm": 3.3497726917266846, "learning_rate": 9.0455e-05, "loss": 0.4027, "step": 18100 }, { "epoch": 1.0136073468473514, "grad_norm": 1.3943313360214233, "learning_rate": 9.046e-05, "loss": 0.3069, "step": 18101 }, { "epoch": 1.0136633441594805, "grad_norm": 1.2476478815078735, "learning_rate": 9.0465e-05, "loss": 0.5064, "step": 18102 }, { "epoch": 1.0137193414716095, "grad_norm": 1.4256521463394165, "learning_rate": 9.046999999999999e-05, "loss": 0.4229, "step": 18103 }, { "epoch": 1.0137753387837385, "grad_norm": 1.2707245349884033, "learning_rate": 9.0475e-05, "loss": 0.5447, "step": 18104 }, { "epoch": 1.0138313360958675, "grad_norm": 1.1792269945144653, "learning_rate": 9.048000000000001e-05, "loss": 0.4014, "step": 18105 }, { "epoch": 1.0138873334079963, "grad_norm": 1.4103285074234009, "learning_rate": 9.048500000000001e-05, "loss": 0.5126, "step": 18106 }, { "epoch": 1.0139433307201253, "grad_norm": 1.3352609872817993, "learning_rate": 9.049000000000001e-05, "loss": 0.3986, "step": 18107 }, { "epoch": 1.0139993280322543, "grad_norm": 1.3411178588867188, "learning_rate": 9.049500000000001e-05, "loss": 0.32, "step": 18108 }, { "epoch": 1.0140553253443834, "grad_norm": 1.361093282699585, "learning_rate": 9.05e-05, "loss": 0.4446, "step": 18109 }, { "epoch": 1.0141113226565124, "grad_norm": 1.238129734992981, "learning_rate": 9.0505e-05, "loss": 0.3947, "step": 18110 }, { "epoch": 1.0141673199686414, "grad_norm": 1.7928906679153442, "learning_rate": 9.051000000000001e-05, "loss": 0.3852, "step": 18111 }, { "epoch": 1.0142233172807704, "grad_norm": 1.4398564100265503, "learning_rate": 9.051500000000001e-05, "loss": 0.4593, "step": 18112 }, { "epoch": 1.0142793145928994, "grad_norm": 1.3197253942489624, "learning_rate": 9.052000000000001e-05, "loss": 0.3959, "step": 18113 }, { "epoch": 1.0143353119050285, "grad_norm": 1.3324925899505615, "learning_rate": 9.0525e-05, "loss": 0.3748, "step": 18114 }, { "epoch": 1.0143913092171575, "grad_norm": 1.260272741317749, "learning_rate": 9.053e-05, "loss": 0.3949, "step": 18115 }, { "epoch": 1.0144473065292865, "grad_norm": 1.5691372156143188, "learning_rate": 9.0535e-05, "loss": 0.3, "step": 18116 }, { "epoch": 1.0145033038414155, "grad_norm": 1.142928123474121, "learning_rate": 9.054e-05, "loss": 0.3668, "step": 18117 }, { "epoch": 1.0145593011535445, "grad_norm": 1.4724918603897095, "learning_rate": 9.054500000000001e-05, "loss": 0.4484, "step": 18118 }, { "epoch": 1.0146152984656736, "grad_norm": 1.3410335779190063, "learning_rate": 9.055e-05, "loss": 0.4898, "step": 18119 }, { "epoch": 1.0146712957778026, "grad_norm": 1.319427728652954, "learning_rate": 9.0555e-05, "loss": 0.4391, "step": 18120 }, { "epoch": 1.0147272930899316, "grad_norm": 1.2103686332702637, "learning_rate": 9.056e-05, "loss": 0.3887, "step": 18121 }, { "epoch": 1.0147832904020606, "grad_norm": 1.0283188819885254, "learning_rate": 9.0565e-05, "loss": 0.3699, "step": 18122 }, { "epoch": 1.0148392877141896, "grad_norm": 1.727451205253601, "learning_rate": 9.057e-05, "loss": 0.6659, "step": 18123 }, { "epoch": 1.0148952850263186, "grad_norm": 1.2413078546524048, "learning_rate": 9.0575e-05, "loss": 0.4249, "step": 18124 }, { "epoch": 1.0149512823384477, "grad_norm": 2.3689627647399902, "learning_rate": 9.058e-05, "loss": 0.4267, "step": 18125 }, { "epoch": 1.0150072796505767, "grad_norm": 1.4286799430847168, "learning_rate": 9.058500000000001e-05, "loss": 0.3831, "step": 18126 }, { "epoch": 1.0150632769627057, "grad_norm": 1.223140835762024, "learning_rate": 9.059000000000001e-05, "loss": 0.4263, "step": 18127 }, { "epoch": 1.0151192742748347, "grad_norm": 1.2765775918960571, "learning_rate": 9.059500000000001e-05, "loss": 0.4055, "step": 18128 }, { "epoch": 1.0151752715869637, "grad_norm": 1.4111768007278442, "learning_rate": 9.06e-05, "loss": 0.4041, "step": 18129 }, { "epoch": 1.0152312688990928, "grad_norm": 1.454330325126648, "learning_rate": 9.0605e-05, "loss": 0.3906, "step": 18130 }, { "epoch": 1.0152872662112218, "grad_norm": 1.5828416347503662, "learning_rate": 9.061e-05, "loss": 0.4863, "step": 18131 }, { "epoch": 1.0153432635233508, "grad_norm": 1.7145265340805054, "learning_rate": 9.0615e-05, "loss": 0.5591, "step": 18132 }, { "epoch": 1.0153992608354798, "grad_norm": 1.3797694444656372, "learning_rate": 9.062000000000001e-05, "loss": 0.4024, "step": 18133 }, { "epoch": 1.0154552581476088, "grad_norm": 1.3734862804412842, "learning_rate": 9.062500000000001e-05, "loss": 0.3615, "step": 18134 }, { "epoch": 1.0155112554597379, "grad_norm": 1.5914740562438965, "learning_rate": 9.063e-05, "loss": 0.3757, "step": 18135 }, { "epoch": 1.0155672527718669, "grad_norm": 1.196204423904419, "learning_rate": 9.0635e-05, "loss": 0.3574, "step": 18136 }, { "epoch": 1.015623250083996, "grad_norm": 1.4615166187286377, "learning_rate": 9.064e-05, "loss": 0.4394, "step": 18137 }, { "epoch": 1.015679247396125, "grad_norm": 1.5073596239089966, "learning_rate": 9.0645e-05, "loss": 0.3384, "step": 18138 }, { "epoch": 1.015735244708254, "grad_norm": 1.6935042142868042, "learning_rate": 9.065000000000001e-05, "loss": 0.6493, "step": 18139 }, { "epoch": 1.015791242020383, "grad_norm": 1.2316350936889648, "learning_rate": 9.0655e-05, "loss": 0.4317, "step": 18140 }, { "epoch": 1.015847239332512, "grad_norm": 1.423403024673462, "learning_rate": 9.066e-05, "loss": 0.4692, "step": 18141 }, { "epoch": 1.015903236644641, "grad_norm": 1.4836894273757935, "learning_rate": 9.0665e-05, "loss": 0.4839, "step": 18142 }, { "epoch": 1.01595923395677, "grad_norm": 1.208901047706604, "learning_rate": 9.067e-05, "loss": 0.2979, "step": 18143 }, { "epoch": 1.016015231268899, "grad_norm": 1.1822084188461304, "learning_rate": 9.0675e-05, "loss": 0.3664, "step": 18144 }, { "epoch": 1.016071228581028, "grad_norm": 1.3010916709899902, "learning_rate": 9.068e-05, "loss": 0.4969, "step": 18145 }, { "epoch": 1.016127225893157, "grad_norm": 1.1672778129577637, "learning_rate": 9.0685e-05, "loss": 0.4616, "step": 18146 }, { "epoch": 1.016183223205286, "grad_norm": 1.3962794542312622, "learning_rate": 9.069000000000001e-05, "loss": 0.3049, "step": 18147 }, { "epoch": 1.016239220517415, "grad_norm": 1.4669468402862549, "learning_rate": 9.069500000000001e-05, "loss": 0.4411, "step": 18148 }, { "epoch": 1.0162952178295441, "grad_norm": 1.3549784421920776, "learning_rate": 9.070000000000001e-05, "loss": 0.3852, "step": 18149 }, { "epoch": 1.0163512151416731, "grad_norm": 1.1801265478134155, "learning_rate": 9.0705e-05, "loss": 0.3111, "step": 18150 }, { "epoch": 1.0164072124538022, "grad_norm": 1.7732163667678833, "learning_rate": 9.071e-05, "loss": 0.4371, "step": 18151 }, { "epoch": 1.0164632097659312, "grad_norm": 1.2452828884124756, "learning_rate": 9.0715e-05, "loss": 0.4395, "step": 18152 }, { "epoch": 1.0165192070780602, "grad_norm": 1.396883487701416, "learning_rate": 9.072e-05, "loss": 0.4847, "step": 18153 }, { "epoch": 1.0165752043901892, "grad_norm": 1.3976457118988037, "learning_rate": 9.072500000000001e-05, "loss": 0.4056, "step": 18154 }, { "epoch": 1.0166312017023182, "grad_norm": 1.2956854104995728, "learning_rate": 9.073000000000001e-05, "loss": 0.402, "step": 18155 }, { "epoch": 1.0166871990144473, "grad_norm": 1.400383472442627, "learning_rate": 9.0735e-05, "loss": 0.3289, "step": 18156 }, { "epoch": 1.0167431963265763, "grad_norm": 1.3283774852752686, "learning_rate": 9.074e-05, "loss": 0.3727, "step": 18157 }, { "epoch": 1.0167991936387053, "grad_norm": 1.4578040838241577, "learning_rate": 9.0745e-05, "loss": 0.4059, "step": 18158 }, { "epoch": 1.0168551909508343, "grad_norm": 1.25513756275177, "learning_rate": 9.075e-05, "loss": 0.4249, "step": 18159 }, { "epoch": 1.0169111882629633, "grad_norm": 2.642794370651245, "learning_rate": 9.075500000000001e-05, "loss": 0.5193, "step": 18160 }, { "epoch": 1.0169671855750924, "grad_norm": 1.7181202173233032, "learning_rate": 9.076e-05, "loss": 0.4596, "step": 18161 }, { "epoch": 1.0170231828872214, "grad_norm": 1.3589255809783936, "learning_rate": 9.0765e-05, "loss": 0.615, "step": 18162 }, { "epoch": 1.0170791801993504, "grad_norm": 1.5083166360855103, "learning_rate": 9.077e-05, "loss": 0.4295, "step": 18163 }, { "epoch": 1.0171351775114794, "grad_norm": 2.4867968559265137, "learning_rate": 9.0775e-05, "loss": 0.4186, "step": 18164 }, { "epoch": 1.0171911748236084, "grad_norm": 1.244420051574707, "learning_rate": 9.078000000000001e-05, "loss": 0.399, "step": 18165 }, { "epoch": 1.0172471721357375, "grad_norm": 1.086611270904541, "learning_rate": 9.0785e-05, "loss": 0.4294, "step": 18166 }, { "epoch": 1.0173031694478665, "grad_norm": 1.6594682931900024, "learning_rate": 9.079e-05, "loss": 0.6585, "step": 18167 }, { "epoch": 1.0173591667599955, "grad_norm": Infinity, "learning_rate": 9.079e-05, "loss": 0.4973, "step": 18168 }, { "epoch": 1.0174151640721245, "grad_norm": 1.4147491455078125, "learning_rate": 9.079500000000001e-05, "loss": 0.3646, "step": 18169 }, { "epoch": 1.0174711613842535, "grad_norm": 1.4743132591247559, "learning_rate": 9.080000000000001e-05, "loss": 0.4226, "step": 18170 }, { "epoch": 1.0175271586963825, "grad_norm": 1.4382675886154175, "learning_rate": 9.080500000000001e-05, "loss": 0.4246, "step": 18171 }, { "epoch": 1.0175831560085116, "grad_norm": 1.3491721153259277, "learning_rate": 9.081e-05, "loss": 0.5039, "step": 18172 }, { "epoch": 1.0176391533206406, "grad_norm": 1.3085360527038574, "learning_rate": 9.0815e-05, "loss": 0.4194, "step": 18173 }, { "epoch": 1.0176951506327696, "grad_norm": 1.3242313861846924, "learning_rate": 9.082e-05, "loss": 0.4244, "step": 18174 }, { "epoch": 1.0177511479448986, "grad_norm": 1.317463755607605, "learning_rate": 9.0825e-05, "loss": 0.4964, "step": 18175 }, { "epoch": 1.0178071452570276, "grad_norm": 1.1602150201797485, "learning_rate": 9.083000000000001e-05, "loss": 0.341, "step": 18176 }, { "epoch": 1.0178631425691567, "grad_norm": 1.1702351570129395, "learning_rate": 9.083500000000001e-05, "loss": 0.3677, "step": 18177 }, { "epoch": 1.0179191398812857, "grad_norm": 1.5528980493545532, "learning_rate": 9.084e-05, "loss": 0.4968, "step": 18178 }, { "epoch": 1.0179751371934147, "grad_norm": 1.2101691961288452, "learning_rate": 9.0845e-05, "loss": 0.3585, "step": 18179 }, { "epoch": 1.0180311345055437, "grad_norm": 1.367945671081543, "learning_rate": 9.085e-05, "loss": 0.3716, "step": 18180 }, { "epoch": 1.0180871318176727, "grad_norm": 1.5212262868881226, "learning_rate": 9.0855e-05, "loss": 0.4099, "step": 18181 }, { "epoch": 1.0181431291298018, "grad_norm": 1.1195323467254639, "learning_rate": 9.086e-05, "loss": 0.3555, "step": 18182 }, { "epoch": 1.0181991264419308, "grad_norm": 1.7062853574752808, "learning_rate": 9.0865e-05, "loss": 0.4578, "step": 18183 }, { "epoch": 1.0182551237540598, "grad_norm": 1.607496976852417, "learning_rate": 9.087e-05, "loss": 0.5234, "step": 18184 }, { "epoch": 1.0183111210661888, "grad_norm": 1.255609393119812, "learning_rate": 9.0875e-05, "loss": 0.5072, "step": 18185 }, { "epoch": 1.0183671183783178, "grad_norm": 1.1043859720230103, "learning_rate": 9.088000000000001e-05, "loss": 0.4505, "step": 18186 }, { "epoch": 1.0184231156904469, "grad_norm": 1.301988959312439, "learning_rate": 9.088500000000001e-05, "loss": 0.4329, "step": 18187 }, { "epoch": 1.0184791130025759, "grad_norm": 1.3064353466033936, "learning_rate": 9.089e-05, "loss": 0.3995, "step": 18188 }, { "epoch": 1.018535110314705, "grad_norm": 1.32597815990448, "learning_rate": 9.0895e-05, "loss": 0.4248, "step": 18189 }, { "epoch": 1.018591107626834, "grad_norm": 1.489014983177185, "learning_rate": 9.090000000000001e-05, "loss": 0.4669, "step": 18190 }, { "epoch": 1.018647104938963, "grad_norm": 1.5068203210830688, "learning_rate": 9.090500000000001e-05, "loss": 0.4461, "step": 18191 }, { "epoch": 1.018703102251092, "grad_norm": 1.1500208377838135, "learning_rate": 9.091000000000001e-05, "loss": 0.3835, "step": 18192 }, { "epoch": 1.018759099563221, "grad_norm": 1.5500932931900024, "learning_rate": 9.0915e-05, "loss": 0.4162, "step": 18193 }, { "epoch": 1.01881509687535, "grad_norm": 1.2251145839691162, "learning_rate": 9.092e-05, "loss": 0.3043, "step": 18194 }, { "epoch": 1.018871094187479, "grad_norm": 1.2725141048431396, "learning_rate": 9.0925e-05, "loss": 0.4343, "step": 18195 }, { "epoch": 1.018927091499608, "grad_norm": 1.1705259084701538, "learning_rate": 9.093e-05, "loss": 0.3781, "step": 18196 }, { "epoch": 1.018983088811737, "grad_norm": 1.4429186582565308, "learning_rate": 9.093500000000001e-05, "loss": 0.5007, "step": 18197 }, { "epoch": 1.019039086123866, "grad_norm": 1.3703272342681885, "learning_rate": 9.094000000000001e-05, "loss": 0.54, "step": 18198 }, { "epoch": 1.019095083435995, "grad_norm": 1.161527395248413, "learning_rate": 9.0945e-05, "loss": 0.3372, "step": 18199 }, { "epoch": 1.019151080748124, "grad_norm": 1.3180170059204102, "learning_rate": 9.095e-05, "loss": 0.4064, "step": 18200 }, { "epoch": 1.0192070780602531, "grad_norm": 1.463131070137024, "learning_rate": 9.0955e-05, "loss": 0.5625, "step": 18201 }, { "epoch": 1.0192630753723821, "grad_norm": 1.2182964086532593, "learning_rate": 9.096e-05, "loss": 0.3779, "step": 18202 }, { "epoch": 1.0193190726845112, "grad_norm": 1.4279230833053589, "learning_rate": 9.0965e-05, "loss": 0.4652, "step": 18203 }, { "epoch": 1.0193750699966402, "grad_norm": 1.2451354265213013, "learning_rate": 9.097e-05, "loss": 0.4531, "step": 18204 }, { "epoch": 1.0194310673087692, "grad_norm": 1.2272124290466309, "learning_rate": 9.0975e-05, "loss": 0.3787, "step": 18205 }, { "epoch": 1.0194870646208982, "grad_norm": 1.1763030290603638, "learning_rate": 9.098000000000001e-05, "loss": 0.4008, "step": 18206 }, { "epoch": 1.0195430619330272, "grad_norm": 1.3578581809997559, "learning_rate": 9.098500000000001e-05, "loss": 0.4206, "step": 18207 }, { "epoch": 1.0195990592451563, "grad_norm": 1.4249365329742432, "learning_rate": 9.099000000000001e-05, "loss": 0.4374, "step": 18208 }, { "epoch": 1.0196550565572853, "grad_norm": 1.328666090965271, "learning_rate": 9.0995e-05, "loss": 0.4919, "step": 18209 }, { "epoch": 1.0197110538694143, "grad_norm": 1.2589588165283203, "learning_rate": 9.1e-05, "loss": 0.5186, "step": 18210 }, { "epoch": 1.0197670511815433, "grad_norm": 1.3986668586730957, "learning_rate": 9.1005e-05, "loss": 0.3669, "step": 18211 }, { "epoch": 1.0198230484936723, "grad_norm": 1.1582605838775635, "learning_rate": 9.101000000000001e-05, "loss": 0.4514, "step": 18212 }, { "epoch": 1.0198790458058014, "grad_norm": 1.40287184715271, "learning_rate": 9.101500000000001e-05, "loss": 0.5558, "step": 18213 }, { "epoch": 1.0199350431179304, "grad_norm": 1.3524171113967896, "learning_rate": 9.102e-05, "loss": 0.4443, "step": 18214 }, { "epoch": 1.0199910404300594, "grad_norm": 1.5427931547164917, "learning_rate": 9.1025e-05, "loss": 0.4846, "step": 18215 }, { "epoch": 1.0200470377421884, "grad_norm": 1.319544792175293, "learning_rate": 9.103e-05, "loss": 0.4859, "step": 18216 }, { "epoch": 1.0201030350543174, "grad_norm": 1.5592738389968872, "learning_rate": 9.1035e-05, "loss": 0.4762, "step": 18217 }, { "epoch": 1.0201590323664464, "grad_norm": 1.3388237953186035, "learning_rate": 9.104000000000001e-05, "loss": 0.578, "step": 18218 }, { "epoch": 1.0202150296785755, "grad_norm": 1.6388988494873047, "learning_rate": 9.1045e-05, "loss": 0.4988, "step": 18219 }, { "epoch": 1.0202710269907045, "grad_norm": 1.4908218383789062, "learning_rate": 9.105e-05, "loss": 0.3613, "step": 18220 }, { "epoch": 1.0203270243028335, "grad_norm": 1.2812426090240479, "learning_rate": 9.1055e-05, "loss": 0.2655, "step": 18221 }, { "epoch": 1.0203830216149625, "grad_norm": 1.3546313047409058, "learning_rate": 9.106e-05, "loss": 0.3827, "step": 18222 }, { "epoch": 1.0204390189270915, "grad_norm": 1.247186541557312, "learning_rate": 9.1065e-05, "loss": 0.4516, "step": 18223 }, { "epoch": 1.0204950162392206, "grad_norm": 1.2498414516448975, "learning_rate": 9.107e-05, "loss": 0.4024, "step": 18224 }, { "epoch": 1.0205510135513496, "grad_norm": 1.2576067447662354, "learning_rate": 9.1075e-05, "loss": 0.4612, "step": 18225 }, { "epoch": 1.0206070108634786, "grad_norm": 1.5343748331069946, "learning_rate": 9.108000000000002e-05, "loss": 0.4571, "step": 18226 }, { "epoch": 1.0206630081756076, "grad_norm": 1.9127240180969238, "learning_rate": 9.108500000000001e-05, "loss": 0.369, "step": 18227 }, { "epoch": 1.0207190054877366, "grad_norm": 1.4005156755447388, "learning_rate": 9.109000000000001e-05, "loss": 0.508, "step": 18228 }, { "epoch": 1.0207750027998657, "grad_norm": 1.408104658126831, "learning_rate": 9.109500000000001e-05, "loss": 0.468, "step": 18229 }, { "epoch": 1.0208310001119947, "grad_norm": 1.2827550172805786, "learning_rate": 9.11e-05, "loss": 0.5104, "step": 18230 }, { "epoch": 1.0208869974241237, "grad_norm": 1.292176604270935, "learning_rate": 9.1105e-05, "loss": 0.4796, "step": 18231 }, { "epoch": 1.0209429947362527, "grad_norm": 1.4443269968032837, "learning_rate": 9.111e-05, "loss": 0.3355, "step": 18232 }, { "epoch": 1.0209989920483817, "grad_norm": 1.3418875932693481, "learning_rate": 9.111500000000001e-05, "loss": 0.5003, "step": 18233 }, { "epoch": 1.0210549893605108, "grad_norm": 1.4856966733932495, "learning_rate": 9.112000000000001e-05, "loss": 0.3811, "step": 18234 }, { "epoch": 1.0211109866726398, "grad_norm": 1.056575059890747, "learning_rate": 9.1125e-05, "loss": 0.2906, "step": 18235 }, { "epoch": 1.0211669839847688, "grad_norm": 1.321616291999817, "learning_rate": 9.113e-05, "loss": 0.3858, "step": 18236 }, { "epoch": 1.0212229812968978, "grad_norm": 1.1759700775146484, "learning_rate": 9.1135e-05, "loss": 0.3259, "step": 18237 }, { "epoch": 1.0212789786090268, "grad_norm": 1.299067735671997, "learning_rate": 9.114e-05, "loss": 0.4186, "step": 18238 }, { "epoch": 1.0213349759211559, "grad_norm": 1.4101417064666748, "learning_rate": 9.114500000000001e-05, "loss": 0.4097, "step": 18239 }, { "epoch": 1.0213909732332849, "grad_norm": 1.4865729808807373, "learning_rate": 9.115e-05, "loss": 0.4895, "step": 18240 }, { "epoch": 1.021446970545414, "grad_norm": 1.4383394718170166, "learning_rate": 9.1155e-05, "loss": 0.4237, "step": 18241 }, { "epoch": 1.021502967857543, "grad_norm": 1.3762346506118774, "learning_rate": 9.116e-05, "loss": 0.4098, "step": 18242 }, { "epoch": 1.021558965169672, "grad_norm": 1.2840474843978882, "learning_rate": 9.1165e-05, "loss": 0.396, "step": 18243 }, { "epoch": 1.021614962481801, "grad_norm": 1.7581106424331665, "learning_rate": 9.117e-05, "loss": 0.3805, "step": 18244 }, { "epoch": 1.02167095979393, "grad_norm": 1.226104497909546, "learning_rate": 9.1175e-05, "loss": 0.3699, "step": 18245 }, { "epoch": 1.021726957106059, "grad_norm": 1.236308217048645, "learning_rate": 9.118e-05, "loss": 0.4676, "step": 18246 }, { "epoch": 1.021782954418188, "grad_norm": 1.1668379306793213, "learning_rate": 9.118500000000002e-05, "loss": 0.3804, "step": 18247 }, { "epoch": 1.021838951730317, "grad_norm": 1.8728046417236328, "learning_rate": 9.119000000000001e-05, "loss": 0.5154, "step": 18248 }, { "epoch": 1.021894949042446, "grad_norm": 1.1699916124343872, "learning_rate": 9.119500000000001e-05, "loss": 0.4142, "step": 18249 }, { "epoch": 1.021950946354575, "grad_norm": 1.4368023872375488, "learning_rate": 9.120000000000001e-05, "loss": 0.5335, "step": 18250 }, { "epoch": 1.022006943666704, "grad_norm": 1.3821005821228027, "learning_rate": 9.1205e-05, "loss": 0.417, "step": 18251 }, { "epoch": 1.022062940978833, "grad_norm": 1.9109565019607544, "learning_rate": 9.121e-05, "loss": 0.5542, "step": 18252 }, { "epoch": 1.0221189382909621, "grad_norm": 1.257783055305481, "learning_rate": 9.1215e-05, "loss": 0.3066, "step": 18253 }, { "epoch": 1.0221749356030911, "grad_norm": 1.4408705234527588, "learning_rate": 9.122000000000001e-05, "loss": 0.4391, "step": 18254 }, { "epoch": 1.0222309329152202, "grad_norm": 1.3678799867630005, "learning_rate": 9.122500000000001e-05, "loss": 0.3597, "step": 18255 }, { "epoch": 1.0222869302273492, "grad_norm": 1.3051496744155884, "learning_rate": 9.123e-05, "loss": 0.4144, "step": 18256 }, { "epoch": 1.0223429275394782, "grad_norm": 1.2545008659362793, "learning_rate": 9.1235e-05, "loss": 0.4483, "step": 18257 }, { "epoch": 1.0223989248516072, "grad_norm": 1.4460846185684204, "learning_rate": 9.124e-05, "loss": 0.3909, "step": 18258 }, { "epoch": 1.0224549221637362, "grad_norm": 1.2341158390045166, "learning_rate": 9.1245e-05, "loss": 0.387, "step": 18259 }, { "epoch": 1.0225109194758653, "grad_norm": 1.3547271490097046, "learning_rate": 9.125e-05, "loss": 0.4874, "step": 18260 }, { "epoch": 1.0225669167879943, "grad_norm": 1.2583049535751343, "learning_rate": 9.1255e-05, "loss": 0.3336, "step": 18261 }, { "epoch": 1.0226229141001233, "grad_norm": 1.4579052925109863, "learning_rate": 9.126e-05, "loss": 0.4597, "step": 18262 }, { "epoch": 1.0226789114122523, "grad_norm": 1.7459715604782104, "learning_rate": 9.1265e-05, "loss": 0.4076, "step": 18263 }, { "epoch": 1.0227349087243813, "grad_norm": 1.2544258832931519, "learning_rate": 9.127e-05, "loss": 0.338, "step": 18264 }, { "epoch": 1.0227909060365103, "grad_norm": 2.3433997631073, "learning_rate": 9.1275e-05, "loss": 0.5613, "step": 18265 }, { "epoch": 1.0228469033486394, "grad_norm": 1.5074681043624878, "learning_rate": 9.128e-05, "loss": 0.4918, "step": 18266 }, { "epoch": 1.0229029006607684, "grad_norm": 1.2791783809661865, "learning_rate": 9.1285e-05, "loss": 0.4341, "step": 18267 }, { "epoch": 1.0229588979728974, "grad_norm": 1.2252360582351685, "learning_rate": 9.129000000000002e-05, "loss": 0.3463, "step": 18268 }, { "epoch": 1.0230148952850264, "grad_norm": 1.2673494815826416, "learning_rate": 9.129500000000001e-05, "loss": 0.3853, "step": 18269 }, { "epoch": 1.0230708925971554, "grad_norm": 1.4124587774276733, "learning_rate": 9.130000000000001e-05, "loss": 0.3272, "step": 18270 }, { "epoch": 1.0231268899092845, "grad_norm": 1.2844294309616089, "learning_rate": 9.130500000000001e-05, "loss": 0.3963, "step": 18271 }, { "epoch": 1.0231828872214135, "grad_norm": 1.359400749206543, "learning_rate": 9.131e-05, "loss": 0.5973, "step": 18272 }, { "epoch": 1.0232388845335423, "grad_norm": 1.4733316898345947, "learning_rate": 9.1315e-05, "loss": 0.5078, "step": 18273 }, { "epoch": 1.0232948818456713, "grad_norm": 1.5604074001312256, "learning_rate": 9.132e-05, "loss": 0.6136, "step": 18274 }, { "epoch": 1.0233508791578003, "grad_norm": 1.272163987159729, "learning_rate": 9.132500000000001e-05, "loss": 0.3756, "step": 18275 }, { "epoch": 1.0234068764699293, "grad_norm": 1.4646979570388794, "learning_rate": 9.133000000000001e-05, "loss": 0.5123, "step": 18276 }, { "epoch": 1.0234628737820584, "grad_norm": 1.6235682964324951, "learning_rate": 9.1335e-05, "loss": 0.4594, "step": 18277 }, { "epoch": 1.0235188710941874, "grad_norm": 1.4288482666015625, "learning_rate": 9.134e-05, "loss": 0.4221, "step": 18278 }, { "epoch": 1.0235748684063164, "grad_norm": 1.1828737258911133, "learning_rate": 9.1345e-05, "loss": 0.3298, "step": 18279 }, { "epoch": 1.0236308657184454, "grad_norm": 1.3354418277740479, "learning_rate": 9.135e-05, "loss": 0.4301, "step": 18280 }, { "epoch": 1.0236868630305744, "grad_norm": 1.3682681322097778, "learning_rate": 9.1355e-05, "loss": 0.5212, "step": 18281 }, { "epoch": 1.0237428603427035, "grad_norm": 1.308532953262329, "learning_rate": 9.136e-05, "loss": 0.4403, "step": 18282 }, { "epoch": 1.0237988576548325, "grad_norm": 1.3649508953094482, "learning_rate": 9.1365e-05, "loss": 0.3935, "step": 18283 }, { "epoch": 1.0238548549669615, "grad_norm": 1.3734993934631348, "learning_rate": 9.137e-05, "loss": 0.4426, "step": 18284 }, { "epoch": 1.0239108522790905, "grad_norm": 1.473650336265564, "learning_rate": 9.1375e-05, "loss": 0.5127, "step": 18285 }, { "epoch": 1.0239668495912195, "grad_norm": 1.4458955526351929, "learning_rate": 9.138e-05, "loss": 0.4166, "step": 18286 }, { "epoch": 1.0240228469033485, "grad_norm": 1.4608911275863647, "learning_rate": 9.138500000000001e-05, "loss": 0.6066, "step": 18287 }, { "epoch": 1.0240788442154776, "grad_norm": 1.498123049736023, "learning_rate": 9.139e-05, "loss": 0.5515, "step": 18288 }, { "epoch": 1.0241348415276066, "grad_norm": 1.2164201736450195, "learning_rate": 9.1395e-05, "loss": 0.4468, "step": 18289 }, { "epoch": 1.0241908388397356, "grad_norm": 1.4614959955215454, "learning_rate": 9.140000000000001e-05, "loss": 0.4695, "step": 18290 }, { "epoch": 1.0242468361518646, "grad_norm": 1.2819448709487915, "learning_rate": 9.140500000000001e-05, "loss": 0.302, "step": 18291 }, { "epoch": 1.0243028334639936, "grad_norm": 1.484433650970459, "learning_rate": 9.141000000000001e-05, "loss": 0.418, "step": 18292 }, { "epoch": 1.0243588307761227, "grad_norm": 1.1295392513275146, "learning_rate": 9.1415e-05, "loss": 0.377, "step": 18293 }, { "epoch": 1.0244148280882517, "grad_norm": 1.4290921688079834, "learning_rate": 9.142e-05, "loss": 0.4408, "step": 18294 }, { "epoch": 1.0244708254003807, "grad_norm": 1.4190647602081299, "learning_rate": 9.1425e-05, "loss": 0.4337, "step": 18295 }, { "epoch": 1.0245268227125097, "grad_norm": 1.1803679466247559, "learning_rate": 9.143000000000001e-05, "loss": 0.4562, "step": 18296 }, { "epoch": 1.0245828200246387, "grad_norm": 1.1149933338165283, "learning_rate": 9.143500000000001e-05, "loss": 0.3951, "step": 18297 }, { "epoch": 1.0246388173367678, "grad_norm": 1.1982587575912476, "learning_rate": 9.144e-05, "loss": 0.3147, "step": 18298 }, { "epoch": 1.0246948146488968, "grad_norm": 1.5093990564346313, "learning_rate": 9.1445e-05, "loss": 0.65, "step": 18299 }, { "epoch": 1.0247508119610258, "grad_norm": 1.3103488683700562, "learning_rate": 9.145e-05, "loss": 0.3724, "step": 18300 }, { "epoch": 1.0248068092731548, "grad_norm": 1.3719797134399414, "learning_rate": 9.1455e-05, "loss": 0.3564, "step": 18301 }, { "epoch": 1.0248628065852838, "grad_norm": 1.3001863956451416, "learning_rate": 9.146e-05, "loss": 0.5071, "step": 18302 }, { "epoch": 1.0249188038974129, "grad_norm": 1.3157297372817993, "learning_rate": 9.1465e-05, "loss": 0.4178, "step": 18303 }, { "epoch": 1.0249748012095419, "grad_norm": 1.3867977857589722, "learning_rate": 9.147e-05, "loss": 0.4625, "step": 18304 }, { "epoch": 1.025030798521671, "grad_norm": 1.1817615032196045, "learning_rate": 9.1475e-05, "loss": 0.397, "step": 18305 }, { "epoch": 1.0250867958338, "grad_norm": 1.3469206094741821, "learning_rate": 9.148e-05, "loss": 0.4254, "step": 18306 }, { "epoch": 1.025142793145929, "grad_norm": 1.2162150144577026, "learning_rate": 9.148500000000001e-05, "loss": 0.4049, "step": 18307 }, { "epoch": 1.025198790458058, "grad_norm": 1.859230399131775, "learning_rate": 9.149e-05, "loss": 0.6715, "step": 18308 }, { "epoch": 1.025254787770187, "grad_norm": 1.5233372449874878, "learning_rate": 9.1495e-05, "loss": 0.5154, "step": 18309 }, { "epoch": 1.025310785082316, "grad_norm": 1.410709023475647, "learning_rate": 9.15e-05, "loss": 0.4479, "step": 18310 }, { "epoch": 1.025366782394445, "grad_norm": 1.3257741928100586, "learning_rate": 9.150500000000001e-05, "loss": 0.5305, "step": 18311 }, { "epoch": 1.025422779706574, "grad_norm": 1.2185670137405396, "learning_rate": 9.151000000000001e-05, "loss": 0.4023, "step": 18312 }, { "epoch": 1.025478777018703, "grad_norm": 1.3654381036758423, "learning_rate": 9.151500000000001e-05, "loss": 0.4937, "step": 18313 }, { "epoch": 1.025534774330832, "grad_norm": 1.2604769468307495, "learning_rate": 9.152e-05, "loss": 0.3389, "step": 18314 }, { "epoch": 1.025590771642961, "grad_norm": 1.3107566833496094, "learning_rate": 9.1525e-05, "loss": 0.399, "step": 18315 }, { "epoch": 1.02564676895509, "grad_norm": 1.2344932556152344, "learning_rate": 9.153e-05, "loss": 0.388, "step": 18316 }, { "epoch": 1.0257027662672191, "grad_norm": 1.4607726335525513, "learning_rate": 9.153500000000001e-05, "loss": 0.4296, "step": 18317 }, { "epoch": 1.0257587635793481, "grad_norm": 1.532738208770752, "learning_rate": 9.154000000000001e-05, "loss": 0.3561, "step": 18318 }, { "epoch": 1.0258147608914772, "grad_norm": 1.3724843263626099, "learning_rate": 9.1545e-05, "loss": 0.3672, "step": 18319 }, { "epoch": 1.0258707582036062, "grad_norm": 1.2287768125534058, "learning_rate": 9.155e-05, "loss": 0.4009, "step": 18320 }, { "epoch": 1.0259267555157352, "grad_norm": 1.1697520017623901, "learning_rate": 9.1555e-05, "loss": 0.3992, "step": 18321 }, { "epoch": 1.0259827528278642, "grad_norm": 1.450138807296753, "learning_rate": 9.156e-05, "loss": 0.5989, "step": 18322 }, { "epoch": 1.0260387501399932, "grad_norm": 1.4806028604507446, "learning_rate": 9.1565e-05, "loss": 0.4256, "step": 18323 }, { "epoch": 1.0260947474521223, "grad_norm": 1.3607051372528076, "learning_rate": 9.157e-05, "loss": 0.4582, "step": 18324 }, { "epoch": 1.0261507447642513, "grad_norm": 1.2938125133514404, "learning_rate": 9.1575e-05, "loss": 0.4895, "step": 18325 }, { "epoch": 1.0262067420763803, "grad_norm": 1.1564923524856567, "learning_rate": 9.158e-05, "loss": 0.347, "step": 18326 }, { "epoch": 1.0262627393885093, "grad_norm": 1.0445786714553833, "learning_rate": 9.158500000000001e-05, "loss": 0.3474, "step": 18327 }, { "epoch": 1.0263187367006383, "grad_norm": 1.220381259918213, "learning_rate": 9.159000000000001e-05, "loss": 0.3853, "step": 18328 }, { "epoch": 1.0263747340127674, "grad_norm": 1.3459187746047974, "learning_rate": 9.1595e-05, "loss": 0.3873, "step": 18329 }, { "epoch": 1.0264307313248964, "grad_norm": 1.1996020078659058, "learning_rate": 9.16e-05, "loss": 0.3603, "step": 18330 }, { "epoch": 1.0264867286370254, "grad_norm": 1.67203688621521, "learning_rate": 9.1605e-05, "loss": 0.5885, "step": 18331 }, { "epoch": 1.0265427259491544, "grad_norm": 1.098457932472229, "learning_rate": 9.161000000000001e-05, "loss": 0.3171, "step": 18332 }, { "epoch": 1.0265987232612834, "grad_norm": 1.4290902614593506, "learning_rate": 9.161500000000001e-05, "loss": 0.4719, "step": 18333 }, { "epoch": 1.0266547205734124, "grad_norm": 1.4965934753417969, "learning_rate": 9.162000000000001e-05, "loss": 0.43, "step": 18334 }, { "epoch": 1.0267107178855415, "grad_norm": 1.3025615215301514, "learning_rate": 9.1625e-05, "loss": 0.353, "step": 18335 }, { "epoch": 1.0267667151976705, "grad_norm": 1.5858601331710815, "learning_rate": 9.163e-05, "loss": 0.5798, "step": 18336 }, { "epoch": 1.0268227125097995, "grad_norm": 1.6514794826507568, "learning_rate": 9.1635e-05, "loss": 0.5493, "step": 18337 }, { "epoch": 1.0268787098219285, "grad_norm": 1.5772913694381714, "learning_rate": 9.164000000000001e-05, "loss": 0.4204, "step": 18338 }, { "epoch": 1.0269347071340575, "grad_norm": 1.5542999505996704, "learning_rate": 9.164500000000001e-05, "loss": 0.4284, "step": 18339 }, { "epoch": 1.0269907044461866, "grad_norm": 1.5654528141021729, "learning_rate": 9.165e-05, "loss": 0.3435, "step": 18340 }, { "epoch": 1.0270467017583156, "grad_norm": 1.1904045343399048, "learning_rate": 9.1655e-05, "loss": 0.4665, "step": 18341 }, { "epoch": 1.0271026990704446, "grad_norm": 1.2764575481414795, "learning_rate": 9.166e-05, "loss": 0.3937, "step": 18342 }, { "epoch": 1.0271586963825736, "grad_norm": 1.2397255897521973, "learning_rate": 9.1665e-05, "loss": 0.4035, "step": 18343 }, { "epoch": 1.0272146936947026, "grad_norm": 1.5256158113479614, "learning_rate": 9.167e-05, "loss": 0.4795, "step": 18344 }, { "epoch": 1.0272706910068317, "grad_norm": 1.3618723154067993, "learning_rate": 9.1675e-05, "loss": 0.5174, "step": 18345 }, { "epoch": 1.0273266883189607, "grad_norm": 1.2115530967712402, "learning_rate": 9.168e-05, "loss": 0.338, "step": 18346 }, { "epoch": 1.0273826856310897, "grad_norm": 1.500746250152588, "learning_rate": 9.168500000000001e-05, "loss": 0.4163, "step": 18347 }, { "epoch": 1.0274386829432187, "grad_norm": 1.482271432876587, "learning_rate": 9.169000000000001e-05, "loss": 0.4528, "step": 18348 }, { "epoch": 1.0274946802553477, "grad_norm": 1.9786421060562134, "learning_rate": 9.169500000000001e-05, "loss": 0.451, "step": 18349 }, { "epoch": 1.0275506775674768, "grad_norm": 1.3467090129852295, "learning_rate": 9.17e-05, "loss": 0.3977, "step": 18350 }, { "epoch": 1.0276066748796058, "grad_norm": 1.366949439048767, "learning_rate": 9.1705e-05, "loss": 0.5047, "step": 18351 }, { "epoch": 1.0276626721917348, "grad_norm": 1.2228416204452515, "learning_rate": 9.171e-05, "loss": 0.3472, "step": 18352 }, { "epoch": 1.0277186695038638, "grad_norm": 1.6578097343444824, "learning_rate": 9.171500000000001e-05, "loss": 0.4835, "step": 18353 }, { "epoch": 1.0277746668159928, "grad_norm": 1.2654086351394653, "learning_rate": 9.172000000000001e-05, "loss": 0.3836, "step": 18354 }, { "epoch": 1.0278306641281219, "grad_norm": 1.4109214544296265, "learning_rate": 9.172500000000001e-05, "loss": 0.487, "step": 18355 }, { "epoch": 1.0278866614402509, "grad_norm": 1.3260384798049927, "learning_rate": 9.173e-05, "loss": 0.4185, "step": 18356 }, { "epoch": 1.02794265875238, "grad_norm": 1.336680293083191, "learning_rate": 9.1735e-05, "loss": 0.3993, "step": 18357 }, { "epoch": 1.027998656064509, "grad_norm": 1.0807775259017944, "learning_rate": 9.174e-05, "loss": 0.3301, "step": 18358 }, { "epoch": 1.028054653376638, "grad_norm": 1.4308130741119385, "learning_rate": 9.1745e-05, "loss": 0.5478, "step": 18359 }, { "epoch": 1.028110650688767, "grad_norm": 1.6996850967407227, "learning_rate": 9.175000000000001e-05, "loss": 0.5778, "step": 18360 }, { "epoch": 1.028166648000896, "grad_norm": 1.3647295236587524, "learning_rate": 9.1755e-05, "loss": 0.4445, "step": 18361 }, { "epoch": 1.028222645313025, "grad_norm": 1.6652171611785889, "learning_rate": 9.176e-05, "loss": 0.556, "step": 18362 }, { "epoch": 1.028278642625154, "grad_norm": 1.6018441915512085, "learning_rate": 9.1765e-05, "loss": 0.5107, "step": 18363 }, { "epoch": 1.028334639937283, "grad_norm": 1.3393938541412354, "learning_rate": 9.177e-05, "loss": 0.4032, "step": 18364 }, { "epoch": 1.028390637249412, "grad_norm": 1.1796034574508667, "learning_rate": 9.1775e-05, "loss": 0.4536, "step": 18365 }, { "epoch": 1.028446634561541, "grad_norm": 1.275696039199829, "learning_rate": 9.178e-05, "loss": 0.4154, "step": 18366 }, { "epoch": 1.02850263187367, "grad_norm": 3.7940006256103516, "learning_rate": 9.178500000000002e-05, "loss": 0.5143, "step": 18367 }, { "epoch": 1.028558629185799, "grad_norm": 1.4529520273208618, "learning_rate": 9.179000000000001e-05, "loss": 0.5135, "step": 18368 }, { "epoch": 1.0286146264979281, "grad_norm": 1.36226487159729, "learning_rate": 9.179500000000001e-05, "loss": 0.4201, "step": 18369 }, { "epoch": 1.0286706238100571, "grad_norm": 1.2332403659820557, "learning_rate": 9.180000000000001e-05, "loss": 0.3876, "step": 18370 }, { "epoch": 1.0287266211221862, "grad_norm": 1.2992181777954102, "learning_rate": 9.1805e-05, "loss": 0.6808, "step": 18371 }, { "epoch": 1.0287826184343152, "grad_norm": 1.4992225170135498, "learning_rate": 9.181e-05, "loss": 0.3968, "step": 18372 }, { "epoch": 1.0288386157464442, "grad_norm": 1.1045231819152832, "learning_rate": 9.1815e-05, "loss": 0.3763, "step": 18373 }, { "epoch": 1.0288946130585732, "grad_norm": 1.3332529067993164, "learning_rate": 9.182000000000001e-05, "loss": 0.3534, "step": 18374 }, { "epoch": 1.0289506103707022, "grad_norm": 1.446587085723877, "learning_rate": 9.182500000000001e-05, "loss": 0.3957, "step": 18375 }, { "epoch": 1.0290066076828313, "grad_norm": 1.5129408836364746, "learning_rate": 9.183000000000001e-05, "loss": 0.5899, "step": 18376 }, { "epoch": 1.0290626049949603, "grad_norm": 1.2569142580032349, "learning_rate": 9.1835e-05, "loss": 0.3884, "step": 18377 }, { "epoch": 1.0291186023070893, "grad_norm": 1.3488404750823975, "learning_rate": 9.184e-05, "loss": 0.4009, "step": 18378 }, { "epoch": 1.0291745996192183, "grad_norm": 1.46926748752594, "learning_rate": 9.1845e-05, "loss": 0.4344, "step": 18379 }, { "epoch": 1.0292305969313473, "grad_norm": 1.3317769765853882, "learning_rate": 9.185e-05, "loss": 0.4538, "step": 18380 }, { "epoch": 1.0292865942434763, "grad_norm": 1.6697031259536743, "learning_rate": 9.185500000000001e-05, "loss": 0.3738, "step": 18381 }, { "epoch": 1.0293425915556054, "grad_norm": 1.295625925064087, "learning_rate": 9.186e-05, "loss": 0.5232, "step": 18382 }, { "epoch": 1.0293985888677344, "grad_norm": 1.269540786743164, "learning_rate": 9.1865e-05, "loss": 0.4541, "step": 18383 }, { "epoch": 1.0294545861798634, "grad_norm": 1.8306798934936523, "learning_rate": 9.187e-05, "loss": 0.3955, "step": 18384 }, { "epoch": 1.0295105834919924, "grad_norm": 1.1614495515823364, "learning_rate": 9.1875e-05, "loss": 0.3892, "step": 18385 }, { "epoch": 1.0295665808041214, "grad_norm": 1.5643037557601929, "learning_rate": 9.188e-05, "loss": 0.398, "step": 18386 }, { "epoch": 1.0296225781162505, "grad_norm": 1.1431219577789307, "learning_rate": 9.1885e-05, "loss": 0.4726, "step": 18387 }, { "epoch": 1.0296785754283795, "grad_norm": 1.1050772666931152, "learning_rate": 9.189e-05, "loss": 0.3813, "step": 18388 }, { "epoch": 1.0297345727405085, "grad_norm": 3.193002223968506, "learning_rate": 9.189500000000001e-05, "loss": 0.524, "step": 18389 }, { "epoch": 1.0297905700526375, "grad_norm": 1.2781938314437866, "learning_rate": 9.190000000000001e-05, "loss": 0.3412, "step": 18390 }, { "epoch": 1.0298465673647665, "grad_norm": 1.3508274555206299, "learning_rate": 9.190500000000001e-05, "loss": 0.5309, "step": 18391 }, { "epoch": 1.0299025646768956, "grad_norm": 1.6212399005889893, "learning_rate": 9.191e-05, "loss": 0.5822, "step": 18392 }, { "epoch": 1.0299585619890246, "grad_norm": 1.4462803602218628, "learning_rate": 9.1915e-05, "loss": 0.2874, "step": 18393 }, { "epoch": 1.0300145593011536, "grad_norm": 1.1816555261611938, "learning_rate": 9.192e-05, "loss": 0.4075, "step": 18394 }, { "epoch": 1.0300705566132826, "grad_norm": 1.5166535377502441, "learning_rate": 9.192500000000001e-05, "loss": 0.389, "step": 18395 }, { "epoch": 1.0301265539254116, "grad_norm": 1.4264452457427979, "learning_rate": 9.193000000000001e-05, "loss": 0.4658, "step": 18396 }, { "epoch": 1.0301825512375407, "grad_norm": 1.33519446849823, "learning_rate": 9.1935e-05, "loss": 0.5087, "step": 18397 }, { "epoch": 1.0302385485496697, "grad_norm": 1.2397849559783936, "learning_rate": 9.194e-05, "loss": 0.4526, "step": 18398 }, { "epoch": 1.0302945458617987, "grad_norm": 1.933770775794983, "learning_rate": 9.1945e-05, "loss": 0.3647, "step": 18399 }, { "epoch": 1.0303505431739277, "grad_norm": 1.5117770433425903, "learning_rate": 9.195e-05, "loss": 0.4367, "step": 18400 }, { "epoch": 1.0304065404860567, "grad_norm": 1.671377182006836, "learning_rate": 9.1955e-05, "loss": 0.3343, "step": 18401 }, { "epoch": 1.0304625377981858, "grad_norm": 1.203366756439209, "learning_rate": 9.196000000000001e-05, "loss": 0.3819, "step": 18402 }, { "epoch": 1.0305185351103148, "grad_norm": 1.5717040300369263, "learning_rate": 9.1965e-05, "loss": 0.4543, "step": 18403 }, { "epoch": 1.0305745324224438, "grad_norm": 1.1201183795928955, "learning_rate": 9.197e-05, "loss": 0.2794, "step": 18404 }, { "epoch": 1.0306305297345728, "grad_norm": 1.88822603225708, "learning_rate": 9.1975e-05, "loss": 0.4396, "step": 18405 }, { "epoch": 1.0306865270467018, "grad_norm": 1.2081702947616577, "learning_rate": 9.198e-05, "loss": 0.3072, "step": 18406 }, { "epoch": 1.0307425243588308, "grad_norm": 1.5484460592269897, "learning_rate": 9.1985e-05, "loss": 0.5021, "step": 18407 }, { "epoch": 1.0307985216709599, "grad_norm": 1.197572946548462, "learning_rate": 9.199e-05, "loss": 0.3615, "step": 18408 }, { "epoch": 1.0308545189830889, "grad_norm": 1.2413281202316284, "learning_rate": 9.1995e-05, "loss": 0.4234, "step": 18409 }, { "epoch": 1.030910516295218, "grad_norm": 1.3042575120925903, "learning_rate": 9.200000000000001e-05, "loss": 0.4667, "step": 18410 }, { "epoch": 1.030966513607347, "grad_norm": 1.9163047075271606, "learning_rate": 9.200500000000001e-05, "loss": 0.2956, "step": 18411 }, { "epoch": 1.031022510919476, "grad_norm": 1.655187726020813, "learning_rate": 9.201000000000001e-05, "loss": 0.3838, "step": 18412 }, { "epoch": 1.031078508231605, "grad_norm": 1.4335438013076782, "learning_rate": 9.2015e-05, "loss": 0.4645, "step": 18413 }, { "epoch": 1.031134505543734, "grad_norm": 1.4914640188217163, "learning_rate": 9.202e-05, "loss": 0.403, "step": 18414 }, { "epoch": 1.031190502855863, "grad_norm": 1.4218559265136719, "learning_rate": 9.2025e-05, "loss": 0.3799, "step": 18415 }, { "epoch": 1.031246500167992, "grad_norm": 1.4433684349060059, "learning_rate": 9.203000000000001e-05, "loss": 0.4391, "step": 18416 }, { "epoch": 1.031302497480121, "grad_norm": 1.1900036334991455, "learning_rate": 9.203500000000001e-05, "loss": 0.5269, "step": 18417 }, { "epoch": 1.03135849479225, "grad_norm": 1.30314040184021, "learning_rate": 9.204e-05, "loss": 0.3646, "step": 18418 }, { "epoch": 1.031414492104379, "grad_norm": 1.6013752222061157, "learning_rate": 9.2045e-05, "loss": 0.3689, "step": 18419 }, { "epoch": 1.031470489416508, "grad_norm": 1.0355218648910522, "learning_rate": 9.205e-05, "loss": 0.3397, "step": 18420 }, { "epoch": 1.0315264867286371, "grad_norm": 1.4996814727783203, "learning_rate": 9.2055e-05, "loss": 0.3549, "step": 18421 }, { "epoch": 1.0315824840407661, "grad_norm": 1.2774845361709595, "learning_rate": 9.206e-05, "loss": 0.4775, "step": 18422 }, { "epoch": 1.0316384813528952, "grad_norm": 1.398349642753601, "learning_rate": 9.206500000000001e-05, "loss": 0.3908, "step": 18423 }, { "epoch": 1.0316944786650242, "grad_norm": 1.645747423171997, "learning_rate": 9.207e-05, "loss": 0.5009, "step": 18424 }, { "epoch": 1.0317504759771532, "grad_norm": 1.55572509765625, "learning_rate": 9.2075e-05, "loss": 0.5306, "step": 18425 }, { "epoch": 1.0318064732892822, "grad_norm": 1.4952770471572876, "learning_rate": 9.208e-05, "loss": 0.4225, "step": 18426 }, { "epoch": 1.0318624706014112, "grad_norm": 1.4356719255447388, "learning_rate": 9.2085e-05, "loss": 0.5911, "step": 18427 }, { "epoch": 1.0319184679135402, "grad_norm": 1.3004816770553589, "learning_rate": 9.209000000000001e-05, "loss": 0.4836, "step": 18428 }, { "epoch": 1.0319744652256693, "grad_norm": 1.3479303121566772, "learning_rate": 9.2095e-05, "loss": 0.4258, "step": 18429 }, { "epoch": 1.0320304625377983, "grad_norm": 1.1808431148529053, "learning_rate": 9.21e-05, "loss": 0.4005, "step": 18430 }, { "epoch": 1.0320864598499273, "grad_norm": 1.135550618171692, "learning_rate": 9.210500000000001e-05, "loss": 0.3771, "step": 18431 }, { "epoch": 1.0321424571620563, "grad_norm": 1.316703200340271, "learning_rate": 9.211000000000001e-05, "loss": 0.4245, "step": 18432 }, { "epoch": 1.0321984544741853, "grad_norm": 1.4228503704071045, "learning_rate": 9.211500000000001e-05, "loss": 0.4636, "step": 18433 }, { "epoch": 1.0322544517863144, "grad_norm": 1.084437370300293, "learning_rate": 9.212e-05, "loss": 0.3343, "step": 18434 }, { "epoch": 1.0323104490984432, "grad_norm": 1.2737746238708496, "learning_rate": 9.2125e-05, "loss": 0.4865, "step": 18435 }, { "epoch": 1.0323664464105722, "grad_norm": 1.5287773609161377, "learning_rate": 9.213e-05, "loss": 0.5212, "step": 18436 }, { "epoch": 1.0324224437227012, "grad_norm": 1.218571662902832, "learning_rate": 9.2135e-05, "loss": 0.3727, "step": 18437 }, { "epoch": 1.0324784410348302, "grad_norm": 1.2497469186782837, "learning_rate": 9.214000000000001e-05, "loss": 0.4716, "step": 18438 }, { "epoch": 1.0325344383469592, "grad_norm": 1.2556761503219604, "learning_rate": 9.2145e-05, "loss": 0.5003, "step": 18439 }, { "epoch": 1.0325904356590883, "grad_norm": 1.5302544832229614, "learning_rate": 9.215e-05, "loss": 0.353, "step": 18440 }, { "epoch": 1.0326464329712173, "grad_norm": 1.4618993997573853, "learning_rate": 9.2155e-05, "loss": 0.5285, "step": 18441 }, { "epoch": 1.0327024302833463, "grad_norm": 1.3802485466003418, "learning_rate": 9.216e-05, "loss": 0.4933, "step": 18442 }, { "epoch": 1.0327584275954753, "grad_norm": 1.2149531841278076, "learning_rate": 9.2165e-05, "loss": 0.4026, "step": 18443 }, { "epoch": 1.0328144249076043, "grad_norm": 1.3150397539138794, "learning_rate": 9.217000000000001e-05, "loss": 0.5103, "step": 18444 }, { "epoch": 1.0328704222197334, "grad_norm": 1.1010187864303589, "learning_rate": 9.2175e-05, "loss": 0.4004, "step": 18445 }, { "epoch": 1.0329264195318624, "grad_norm": 1.3528146743774414, "learning_rate": 9.218e-05, "loss": 0.4566, "step": 18446 }, { "epoch": 1.0329824168439914, "grad_norm": 1.2506998777389526, "learning_rate": 9.2185e-05, "loss": 0.378, "step": 18447 }, { "epoch": 1.0330384141561204, "grad_norm": 1.246854543685913, "learning_rate": 9.219000000000001e-05, "loss": 0.351, "step": 18448 }, { "epoch": 1.0330944114682494, "grad_norm": 1.083481788635254, "learning_rate": 9.219500000000001e-05, "loss": 0.3952, "step": 18449 }, { "epoch": 1.0331504087803784, "grad_norm": 1.1672245264053345, "learning_rate": 9.22e-05, "loss": 0.3209, "step": 18450 }, { "epoch": 1.0332064060925075, "grad_norm": 1.3521337509155273, "learning_rate": 9.2205e-05, "loss": 0.4215, "step": 18451 }, { "epoch": 1.0332624034046365, "grad_norm": 2.070510149002075, "learning_rate": 9.221000000000001e-05, "loss": 0.4247, "step": 18452 }, { "epoch": 1.0333184007167655, "grad_norm": 1.2192219495773315, "learning_rate": 9.221500000000001e-05, "loss": 0.4238, "step": 18453 }, { "epoch": 1.0333743980288945, "grad_norm": 1.336152195930481, "learning_rate": 9.222000000000001e-05, "loss": 0.3947, "step": 18454 }, { "epoch": 1.0334303953410235, "grad_norm": 1.402063250541687, "learning_rate": 9.2225e-05, "loss": 0.4521, "step": 18455 }, { "epoch": 1.0334863926531526, "grad_norm": 1.3092293739318848, "learning_rate": 9.223e-05, "loss": 0.4352, "step": 18456 }, { "epoch": 1.0335423899652816, "grad_norm": 1.3286820650100708, "learning_rate": 9.2235e-05, "loss": 0.4113, "step": 18457 }, { "epoch": 1.0335983872774106, "grad_norm": 2.4885976314544678, "learning_rate": 9.224e-05, "loss": 0.3486, "step": 18458 }, { "epoch": 1.0336543845895396, "grad_norm": 1.1975092887878418, "learning_rate": 9.224500000000001e-05, "loss": 0.3241, "step": 18459 }, { "epoch": 1.0337103819016686, "grad_norm": 1.226528525352478, "learning_rate": 9.225e-05, "loss": 0.4842, "step": 18460 }, { "epoch": 1.0337663792137977, "grad_norm": 1.5238783359527588, "learning_rate": 9.2255e-05, "loss": 0.4655, "step": 18461 }, { "epoch": 1.0338223765259267, "grad_norm": 1.4806811809539795, "learning_rate": 9.226e-05, "loss": 0.4675, "step": 18462 }, { "epoch": 1.0338783738380557, "grad_norm": 1.292662501335144, "learning_rate": 9.2265e-05, "loss": 0.369, "step": 18463 }, { "epoch": 1.0339343711501847, "grad_norm": 1.2743792533874512, "learning_rate": 9.227e-05, "loss": 0.4125, "step": 18464 }, { "epoch": 1.0339903684623137, "grad_norm": 1.2489051818847656, "learning_rate": 9.2275e-05, "loss": 0.3937, "step": 18465 }, { "epoch": 1.0340463657744428, "grad_norm": 1.2261844873428345, "learning_rate": 9.228e-05, "loss": 0.3851, "step": 18466 }, { "epoch": 1.0341023630865718, "grad_norm": 1.483903169631958, "learning_rate": 9.2285e-05, "loss": 0.5393, "step": 18467 }, { "epoch": 1.0341583603987008, "grad_norm": 1.15414297580719, "learning_rate": 9.229000000000001e-05, "loss": 0.4449, "step": 18468 }, { "epoch": 1.0342143577108298, "grad_norm": 1.0663937330245972, "learning_rate": 9.229500000000001e-05, "loss": 0.3002, "step": 18469 }, { "epoch": 1.0342703550229588, "grad_norm": 1.4299031496047974, "learning_rate": 9.230000000000001e-05, "loss": 0.4362, "step": 18470 }, { "epoch": 1.0343263523350879, "grad_norm": 1.37778902053833, "learning_rate": 9.2305e-05, "loss": 0.4495, "step": 18471 }, { "epoch": 1.0343823496472169, "grad_norm": 1.7153373956680298, "learning_rate": 9.231e-05, "loss": 0.4534, "step": 18472 }, { "epoch": 1.034438346959346, "grad_norm": 1.1465803384780884, "learning_rate": 9.231500000000001e-05, "loss": 0.3172, "step": 18473 }, { "epoch": 1.034494344271475, "grad_norm": 1.223066806793213, "learning_rate": 9.232000000000001e-05, "loss": 0.5094, "step": 18474 }, { "epoch": 1.034550341583604, "grad_norm": 1.3934199810028076, "learning_rate": 9.232500000000001e-05, "loss": 0.3479, "step": 18475 }, { "epoch": 1.034606338895733, "grad_norm": 1.3378269672393799, "learning_rate": 9.233e-05, "loss": 0.496, "step": 18476 }, { "epoch": 1.034662336207862, "grad_norm": 1.2652432918548584, "learning_rate": 9.2335e-05, "loss": 0.461, "step": 18477 }, { "epoch": 1.034718333519991, "grad_norm": 1.155977725982666, "learning_rate": 9.234e-05, "loss": 0.2798, "step": 18478 }, { "epoch": 1.03477433083212, "grad_norm": 1.3027223348617554, "learning_rate": 9.2345e-05, "loss": 0.4, "step": 18479 }, { "epoch": 1.034830328144249, "grad_norm": 1.2647991180419922, "learning_rate": 9.235000000000001e-05, "loss": 0.4596, "step": 18480 }, { "epoch": 1.034886325456378, "grad_norm": 1.5008424520492554, "learning_rate": 9.2355e-05, "loss": 0.4096, "step": 18481 }, { "epoch": 1.034942322768507, "grad_norm": 1.7912589311599731, "learning_rate": 9.236e-05, "loss": 0.4737, "step": 18482 }, { "epoch": 1.034998320080636, "grad_norm": 1.201886534690857, "learning_rate": 9.2365e-05, "loss": 0.5048, "step": 18483 }, { "epoch": 1.035054317392765, "grad_norm": 1.1117593050003052, "learning_rate": 9.237e-05, "loss": 0.3959, "step": 18484 }, { "epoch": 1.0351103147048941, "grad_norm": 1.2172096967697144, "learning_rate": 9.2375e-05, "loss": 0.4206, "step": 18485 }, { "epoch": 1.0351663120170231, "grad_norm": 1.2553060054779053, "learning_rate": 9.238e-05, "loss": 0.5313, "step": 18486 }, { "epoch": 1.0352223093291522, "grad_norm": 1.4407403469085693, "learning_rate": 9.2385e-05, "loss": 0.4232, "step": 18487 }, { "epoch": 1.0352783066412812, "grad_norm": 1.471834659576416, "learning_rate": 9.239000000000001e-05, "loss": 0.383, "step": 18488 }, { "epoch": 1.0353343039534102, "grad_norm": 1.1300737857818604, "learning_rate": 9.239500000000001e-05, "loss": 0.3147, "step": 18489 }, { "epoch": 1.0353903012655392, "grad_norm": 1.2885539531707764, "learning_rate": 9.240000000000001e-05, "loss": 0.3815, "step": 18490 }, { "epoch": 1.0354462985776682, "grad_norm": 1.3831018209457397, "learning_rate": 9.240500000000001e-05, "loss": 0.4184, "step": 18491 }, { "epoch": 1.0355022958897973, "grad_norm": 1.3933441638946533, "learning_rate": 9.241e-05, "loss": 0.501, "step": 18492 }, { "epoch": 1.0355582932019263, "grad_norm": 1.4546265602111816, "learning_rate": 9.2415e-05, "loss": 0.3986, "step": 18493 }, { "epoch": 1.0356142905140553, "grad_norm": 1.6683248281478882, "learning_rate": 9.242000000000001e-05, "loss": 0.42, "step": 18494 }, { "epoch": 1.0356702878261843, "grad_norm": 1.5890135765075684, "learning_rate": 9.242500000000001e-05, "loss": 0.3606, "step": 18495 }, { "epoch": 1.0357262851383133, "grad_norm": 1.2694878578186035, "learning_rate": 9.243000000000001e-05, "loss": 0.3669, "step": 18496 }, { "epoch": 1.0357822824504423, "grad_norm": 1.2919161319732666, "learning_rate": 9.2435e-05, "loss": 0.3652, "step": 18497 }, { "epoch": 1.0358382797625714, "grad_norm": 1.3796987533569336, "learning_rate": 9.244e-05, "loss": 0.6024, "step": 18498 }, { "epoch": 1.0358942770747004, "grad_norm": 1.7107839584350586, "learning_rate": 9.2445e-05, "loss": 0.6868, "step": 18499 }, { "epoch": 1.0359502743868294, "grad_norm": 1.4201178550720215, "learning_rate": 9.245e-05, "loss": 0.3733, "step": 18500 }, { "epoch": 1.0360062716989584, "grad_norm": 1.321673035621643, "learning_rate": 9.245500000000001e-05, "loss": 0.3997, "step": 18501 }, { "epoch": 1.0360622690110874, "grad_norm": 1.5087130069732666, "learning_rate": 9.246e-05, "loss": 0.422, "step": 18502 }, { "epoch": 1.0361182663232165, "grad_norm": 1.3422858715057373, "learning_rate": 9.2465e-05, "loss": 0.4653, "step": 18503 }, { "epoch": 1.0361742636353455, "grad_norm": 1.1116987466812134, "learning_rate": 9.247e-05, "loss": 0.2964, "step": 18504 }, { "epoch": 1.0362302609474745, "grad_norm": 1.3748716115951538, "learning_rate": 9.2475e-05, "loss": 0.4254, "step": 18505 }, { "epoch": 1.0362862582596035, "grad_norm": 1.7915778160095215, "learning_rate": 9.248e-05, "loss": 0.4567, "step": 18506 }, { "epoch": 1.0363422555717325, "grad_norm": 1.5386706590652466, "learning_rate": 9.248499999999999e-05, "loss": 0.474, "step": 18507 }, { "epoch": 1.0363982528838616, "grad_norm": 1.4944480657577515, "learning_rate": 9.249e-05, "loss": 0.351, "step": 18508 }, { "epoch": 1.0364542501959906, "grad_norm": 1.542141318321228, "learning_rate": 9.249500000000001e-05, "loss": 0.4844, "step": 18509 }, { "epoch": 1.0365102475081196, "grad_norm": 1.530866265296936, "learning_rate": 9.250000000000001e-05, "loss": 0.4474, "step": 18510 }, { "epoch": 1.0365662448202486, "grad_norm": 1.4048144817352295, "learning_rate": 9.250500000000001e-05, "loss": 0.4107, "step": 18511 }, { "epoch": 1.0366222421323776, "grad_norm": 1.313151478767395, "learning_rate": 9.251000000000001e-05, "loss": 0.5219, "step": 18512 }, { "epoch": 1.0366782394445067, "grad_norm": 1.5241057872772217, "learning_rate": 9.2515e-05, "loss": 0.4295, "step": 18513 }, { "epoch": 1.0367342367566357, "grad_norm": 1.1829602718353271, "learning_rate": 9.252e-05, "loss": 0.3259, "step": 18514 }, { "epoch": 1.0367902340687647, "grad_norm": 1.3312814235687256, "learning_rate": 9.252500000000001e-05, "loss": 0.3562, "step": 18515 }, { "epoch": 1.0368462313808937, "grad_norm": 1.2442947626113892, "learning_rate": 9.253000000000001e-05, "loss": 0.4495, "step": 18516 }, { "epoch": 1.0369022286930227, "grad_norm": 1.3141032457351685, "learning_rate": 9.253500000000001e-05, "loss": 0.4944, "step": 18517 }, { "epoch": 1.0369582260051518, "grad_norm": 2.926708459854126, "learning_rate": 9.254e-05, "loss": 0.42, "step": 18518 }, { "epoch": 1.0370142233172808, "grad_norm": 1.6537495851516724, "learning_rate": 9.2545e-05, "loss": 0.3126, "step": 18519 }, { "epoch": 1.0370702206294098, "grad_norm": 1.189548134803772, "learning_rate": 9.255e-05, "loss": 0.4293, "step": 18520 }, { "epoch": 1.0371262179415388, "grad_norm": 1.8494853973388672, "learning_rate": 9.2555e-05, "loss": 0.4212, "step": 18521 }, { "epoch": 1.0371822152536678, "grad_norm": 1.5236074924468994, "learning_rate": 9.256000000000001e-05, "loss": 0.484, "step": 18522 }, { "epoch": 1.0372382125657968, "grad_norm": 1.5900028944015503, "learning_rate": 9.2565e-05, "loss": 0.4683, "step": 18523 }, { "epoch": 1.0372942098779259, "grad_norm": 1.4063224792480469, "learning_rate": 9.257e-05, "loss": 0.4586, "step": 18524 }, { "epoch": 1.0373502071900549, "grad_norm": 1.189079999923706, "learning_rate": 9.2575e-05, "loss": 0.389, "step": 18525 }, { "epoch": 1.037406204502184, "grad_norm": 1.4907859563827515, "learning_rate": 9.258e-05, "loss": 0.4019, "step": 18526 }, { "epoch": 1.037462201814313, "grad_norm": 1.4381401538848877, "learning_rate": 9.2585e-05, "loss": 0.5002, "step": 18527 }, { "epoch": 1.037518199126442, "grad_norm": 1.2284777164459229, "learning_rate": 9.258999999999999e-05, "loss": 0.4127, "step": 18528 }, { "epoch": 1.037574196438571, "grad_norm": 1.3698266744613647, "learning_rate": 9.2595e-05, "loss": 0.4886, "step": 18529 }, { "epoch": 1.0376301937507, "grad_norm": 1.2363755702972412, "learning_rate": 9.260000000000001e-05, "loss": 0.4111, "step": 18530 }, { "epoch": 1.037686191062829, "grad_norm": 1.3494611978530884, "learning_rate": 9.260500000000001e-05, "loss": 0.4135, "step": 18531 }, { "epoch": 1.037742188374958, "grad_norm": 1.5822937488555908, "learning_rate": 9.261000000000001e-05, "loss": 0.5227, "step": 18532 }, { "epoch": 1.037798185687087, "grad_norm": 1.212691307067871, "learning_rate": 9.261500000000001e-05, "loss": 0.3829, "step": 18533 }, { "epoch": 1.037854182999216, "grad_norm": 1.24557626247406, "learning_rate": 9.262e-05, "loss": 0.3104, "step": 18534 }, { "epoch": 1.037910180311345, "grad_norm": 1.5108773708343506, "learning_rate": 9.2625e-05, "loss": 0.5821, "step": 18535 }, { "epoch": 1.037966177623474, "grad_norm": 1.411641240119934, "learning_rate": 9.263e-05, "loss": 0.4712, "step": 18536 }, { "epoch": 1.0380221749356031, "grad_norm": 1.4698002338409424, "learning_rate": 9.263500000000001e-05, "loss": 0.4311, "step": 18537 }, { "epoch": 1.0380781722477321, "grad_norm": 1.2794344425201416, "learning_rate": 9.264000000000001e-05, "loss": 0.5081, "step": 18538 }, { "epoch": 1.0381341695598612, "grad_norm": 1.553958773612976, "learning_rate": 9.2645e-05, "loss": 0.3672, "step": 18539 }, { "epoch": 1.0381901668719902, "grad_norm": 1.2659039497375488, "learning_rate": 9.265e-05, "loss": 0.4383, "step": 18540 }, { "epoch": 1.0382461641841192, "grad_norm": 1.397771954536438, "learning_rate": 9.2655e-05, "loss": 0.4917, "step": 18541 }, { "epoch": 1.0383021614962482, "grad_norm": 1.0613263845443726, "learning_rate": 9.266e-05, "loss": 0.303, "step": 18542 }, { "epoch": 1.0383581588083772, "grad_norm": 1.3095909357070923, "learning_rate": 9.266500000000001e-05, "loss": 0.3641, "step": 18543 }, { "epoch": 1.0384141561205062, "grad_norm": 1.1477950811386108, "learning_rate": 9.267e-05, "loss": 0.4294, "step": 18544 }, { "epoch": 1.0384701534326353, "grad_norm": 1.4217934608459473, "learning_rate": 9.2675e-05, "loss": 0.3987, "step": 18545 }, { "epoch": 1.0385261507447643, "grad_norm": 1.280185580253601, "learning_rate": 9.268e-05, "loss": 0.3985, "step": 18546 }, { "epoch": 1.0385821480568933, "grad_norm": 1.384057641029358, "learning_rate": 9.2685e-05, "loss": 0.6648, "step": 18547 }, { "epoch": 1.0386381453690223, "grad_norm": 1.3243428468704224, "learning_rate": 9.269e-05, "loss": 0.4763, "step": 18548 }, { "epoch": 1.0386941426811513, "grad_norm": 1.3901629447937012, "learning_rate": 9.2695e-05, "loss": 0.5321, "step": 18549 }, { "epoch": 1.0387501399932804, "grad_norm": 1.280961513519287, "learning_rate": 9.27e-05, "loss": 0.4644, "step": 18550 }, { "epoch": 1.0388061373054094, "grad_norm": 1.2251585721969604, "learning_rate": 9.270500000000001e-05, "loss": 0.4093, "step": 18551 }, { "epoch": 1.0388621346175384, "grad_norm": 1.186393141746521, "learning_rate": 9.271000000000001e-05, "loss": 0.3122, "step": 18552 }, { "epoch": 1.0389181319296674, "grad_norm": 1.3896955251693726, "learning_rate": 9.271500000000001e-05, "loss": 0.4324, "step": 18553 }, { "epoch": 1.0389741292417964, "grad_norm": 1.251495361328125, "learning_rate": 9.272e-05, "loss": 0.3037, "step": 18554 }, { "epoch": 1.0390301265539255, "grad_norm": 1.3746366500854492, "learning_rate": 9.2725e-05, "loss": 0.4114, "step": 18555 }, { "epoch": 1.0390861238660545, "grad_norm": 1.6332991123199463, "learning_rate": 9.273e-05, "loss": 0.4307, "step": 18556 }, { "epoch": 1.0391421211781835, "grad_norm": 1.337018609046936, "learning_rate": 9.2735e-05, "loss": 0.4085, "step": 18557 }, { "epoch": 1.0391981184903125, "grad_norm": 1.35471510887146, "learning_rate": 9.274000000000001e-05, "loss": 0.5234, "step": 18558 }, { "epoch": 1.0392541158024415, "grad_norm": 1.4117554426193237, "learning_rate": 9.274500000000001e-05, "loss": 0.4672, "step": 18559 }, { "epoch": 1.0393101131145706, "grad_norm": 1.4028817415237427, "learning_rate": 9.275e-05, "loss": 0.4823, "step": 18560 }, { "epoch": 1.0393661104266996, "grad_norm": 1.233064889907837, "learning_rate": 9.2755e-05, "loss": 0.5555, "step": 18561 }, { "epoch": 1.0394221077388286, "grad_norm": 1.1610888242721558, "learning_rate": 9.276e-05, "loss": 0.3154, "step": 18562 }, { "epoch": 1.0394781050509576, "grad_norm": 1.180823564529419, "learning_rate": 9.2765e-05, "loss": 0.3393, "step": 18563 }, { "epoch": 1.0395341023630866, "grad_norm": 1.627644419670105, "learning_rate": 9.277000000000001e-05, "loss": 0.5276, "step": 18564 }, { "epoch": 1.0395900996752157, "grad_norm": 1.388357162475586, "learning_rate": 9.2775e-05, "loss": 0.4377, "step": 18565 }, { "epoch": 1.0396460969873447, "grad_norm": 1.2008401155471802, "learning_rate": 9.278e-05, "loss": 0.4566, "step": 18566 }, { "epoch": 1.0397020942994737, "grad_norm": 1.2332335710525513, "learning_rate": 9.2785e-05, "loss": 0.4352, "step": 18567 }, { "epoch": 1.0397580916116027, "grad_norm": 1.3955116271972656, "learning_rate": 9.279e-05, "loss": 0.3749, "step": 18568 }, { "epoch": 1.0398140889237317, "grad_norm": 1.4083069562911987, "learning_rate": 9.279500000000001e-05, "loss": 0.4986, "step": 18569 }, { "epoch": 1.0398700862358607, "grad_norm": 1.2220817804336548, "learning_rate": 9.28e-05, "loss": 0.3388, "step": 18570 }, { "epoch": 1.0399260835479898, "grad_norm": 3.772998571395874, "learning_rate": 9.2805e-05, "loss": 0.4862, "step": 18571 }, { "epoch": 1.0399820808601188, "grad_norm": 1.30063796043396, "learning_rate": 9.281000000000001e-05, "loss": 0.5021, "step": 18572 }, { "epoch": 1.0400380781722478, "grad_norm": 1.389096736907959, "learning_rate": 9.281500000000001e-05, "loss": 0.4203, "step": 18573 }, { "epoch": 1.0400940754843768, "grad_norm": 1.1973382234573364, "learning_rate": 9.282000000000001e-05, "loss": 0.46, "step": 18574 }, { "epoch": 1.0401500727965058, "grad_norm": 1.3328988552093506, "learning_rate": 9.2825e-05, "loss": 0.4274, "step": 18575 }, { "epoch": 1.0402060701086349, "grad_norm": 1.541388988494873, "learning_rate": 9.283e-05, "loss": 0.5745, "step": 18576 }, { "epoch": 1.0402620674207639, "grad_norm": 1.273723840713501, "learning_rate": 9.2835e-05, "loss": 0.3905, "step": 18577 }, { "epoch": 1.040318064732893, "grad_norm": 1.2285714149475098, "learning_rate": 9.284e-05, "loss": 0.3658, "step": 18578 }, { "epoch": 1.040374062045022, "grad_norm": 1.3290058374404907, "learning_rate": 9.284500000000001e-05, "loss": 0.3227, "step": 18579 }, { "epoch": 1.040430059357151, "grad_norm": 1.489951491355896, "learning_rate": 9.285000000000001e-05, "loss": 0.4602, "step": 18580 }, { "epoch": 1.04048605666928, "grad_norm": 1.428558111190796, "learning_rate": 9.2855e-05, "loss": 0.4313, "step": 18581 }, { "epoch": 1.040542053981409, "grad_norm": 1.4195235967636108, "learning_rate": 9.286e-05, "loss": 0.4642, "step": 18582 }, { "epoch": 1.040598051293538, "grad_norm": 1.3619436025619507, "learning_rate": 9.2865e-05, "loss": 0.4122, "step": 18583 }, { "epoch": 1.040654048605667, "grad_norm": 1.297425627708435, "learning_rate": 9.287e-05, "loss": 0.4537, "step": 18584 }, { "epoch": 1.040710045917796, "grad_norm": 1.6776831150054932, "learning_rate": 9.2875e-05, "loss": 0.5702, "step": 18585 }, { "epoch": 1.040766043229925, "grad_norm": 1.1916131973266602, "learning_rate": 9.288e-05, "loss": 0.3784, "step": 18586 }, { "epoch": 1.040822040542054, "grad_norm": 1.7893197536468506, "learning_rate": 9.2885e-05, "loss": 0.4912, "step": 18587 }, { "epoch": 1.040878037854183, "grad_norm": 1.0918089151382446, "learning_rate": 9.289e-05, "loss": 0.3608, "step": 18588 }, { "epoch": 1.040934035166312, "grad_norm": 1.0850043296813965, "learning_rate": 9.289500000000001e-05, "loss": 0.321, "step": 18589 }, { "epoch": 1.0409900324784411, "grad_norm": 1.3966138362884521, "learning_rate": 9.290000000000001e-05, "loss": 0.4968, "step": 18590 }, { "epoch": 1.0410460297905701, "grad_norm": 1.399404525756836, "learning_rate": 9.2905e-05, "loss": 0.4167, "step": 18591 }, { "epoch": 1.0411020271026992, "grad_norm": 1.5733047723770142, "learning_rate": 9.291e-05, "loss": 0.399, "step": 18592 }, { "epoch": 1.0411580244148282, "grad_norm": 1.2910435199737549, "learning_rate": 9.291500000000001e-05, "loss": 0.5577, "step": 18593 }, { "epoch": 1.0412140217269572, "grad_norm": 1.4125996828079224, "learning_rate": 9.292000000000001e-05, "loss": 0.4281, "step": 18594 }, { "epoch": 1.0412700190390862, "grad_norm": 1.448743224143982, "learning_rate": 9.292500000000001e-05, "loss": 0.373, "step": 18595 }, { "epoch": 1.0413260163512152, "grad_norm": 1.3208740949630737, "learning_rate": 9.293e-05, "loss": 0.4838, "step": 18596 }, { "epoch": 1.0413820136633443, "grad_norm": 2.6943812370300293, "learning_rate": 9.2935e-05, "loss": 0.3669, "step": 18597 }, { "epoch": 1.0414380109754733, "grad_norm": 1.666571021080017, "learning_rate": 9.294e-05, "loss": 0.4782, "step": 18598 }, { "epoch": 1.0414940082876023, "grad_norm": 1.346853256225586, "learning_rate": 9.2945e-05, "loss": 0.4829, "step": 18599 }, { "epoch": 1.0415500055997313, "grad_norm": 1.322637915611267, "learning_rate": 9.295000000000001e-05, "loss": 0.3847, "step": 18600 }, { "epoch": 1.0416060029118603, "grad_norm": 1.4859142303466797, "learning_rate": 9.295500000000001e-05, "loss": 0.4769, "step": 18601 }, { "epoch": 1.0416620002239894, "grad_norm": 1.3755728006362915, "learning_rate": 9.296e-05, "loss": 0.4521, "step": 18602 }, { "epoch": 1.0417179975361184, "grad_norm": 1.2686610221862793, "learning_rate": 9.2965e-05, "loss": 0.3776, "step": 18603 }, { "epoch": 1.0417739948482474, "grad_norm": 1.4579969644546509, "learning_rate": 9.297e-05, "loss": 0.5432, "step": 18604 }, { "epoch": 1.0418299921603762, "grad_norm": 1.4644408226013184, "learning_rate": 9.2975e-05, "loss": 0.4334, "step": 18605 }, { "epoch": 1.0418859894725052, "grad_norm": 1.4493407011032104, "learning_rate": 9.298e-05, "loss": 0.4742, "step": 18606 }, { "epoch": 1.0419419867846342, "grad_norm": 1.270177960395813, "learning_rate": 9.2985e-05, "loss": 0.3793, "step": 18607 }, { "epoch": 1.0419979840967633, "grad_norm": 1.3753674030303955, "learning_rate": 9.299e-05, "loss": 0.4165, "step": 18608 }, { "epoch": 1.0420539814088923, "grad_norm": 1.2743052244186401, "learning_rate": 9.299500000000001e-05, "loss": 0.443, "step": 18609 }, { "epoch": 1.0421099787210213, "grad_norm": 1.2452137470245361, "learning_rate": 9.300000000000001e-05, "loss": 0.3657, "step": 18610 }, { "epoch": 1.0421659760331503, "grad_norm": 1.2093377113342285, "learning_rate": 9.300500000000001e-05, "loss": 0.4707, "step": 18611 }, { "epoch": 1.0422219733452793, "grad_norm": 1.4324147701263428, "learning_rate": 9.301e-05, "loss": 0.392, "step": 18612 }, { "epoch": 1.0422779706574083, "grad_norm": 1.2902770042419434, "learning_rate": 9.3015e-05, "loss": 0.4421, "step": 18613 }, { "epoch": 1.0423339679695374, "grad_norm": 1.3648157119750977, "learning_rate": 9.302e-05, "loss": 0.4302, "step": 18614 }, { "epoch": 1.0423899652816664, "grad_norm": 1.7705236673355103, "learning_rate": 9.302500000000001e-05, "loss": 0.4566, "step": 18615 }, { "epoch": 1.0424459625937954, "grad_norm": 1.3672006130218506, "learning_rate": 9.303000000000001e-05, "loss": 0.4822, "step": 18616 }, { "epoch": 1.0425019599059244, "grad_norm": 1.1181634664535522, "learning_rate": 9.3035e-05, "loss": 0.3502, "step": 18617 }, { "epoch": 1.0425579572180534, "grad_norm": 1.2181497812271118, "learning_rate": 9.304e-05, "loss": 0.3622, "step": 18618 }, { "epoch": 1.0426139545301825, "grad_norm": 1.4582486152648926, "learning_rate": 9.3045e-05, "loss": 0.3453, "step": 18619 }, { "epoch": 1.0426699518423115, "grad_norm": 1.049888253211975, "learning_rate": 9.305e-05, "loss": 0.385, "step": 18620 }, { "epoch": 1.0427259491544405, "grad_norm": 1.425157904624939, "learning_rate": 9.305500000000001e-05, "loss": 0.4258, "step": 18621 }, { "epoch": 1.0427819464665695, "grad_norm": 1.3080180883407593, "learning_rate": 9.306000000000001e-05, "loss": 0.4685, "step": 18622 }, { "epoch": 1.0428379437786985, "grad_norm": 1.5657936334609985, "learning_rate": 9.3065e-05, "loss": 0.6739, "step": 18623 }, { "epoch": 1.0428939410908276, "grad_norm": 1.6061358451843262, "learning_rate": 9.307e-05, "loss": 0.4863, "step": 18624 }, { "epoch": 1.0429499384029566, "grad_norm": 1.7165899276733398, "learning_rate": 9.3075e-05, "loss": 0.6032, "step": 18625 }, { "epoch": 1.0430059357150856, "grad_norm": 1.1328483819961548, "learning_rate": 9.308e-05, "loss": 0.3495, "step": 18626 }, { "epoch": 1.0430619330272146, "grad_norm": 1.3610625267028809, "learning_rate": 9.3085e-05, "loss": 0.4514, "step": 18627 }, { "epoch": 1.0431179303393436, "grad_norm": 1.328114628791809, "learning_rate": 9.309e-05, "loss": 0.4111, "step": 18628 }, { "epoch": 1.0431739276514727, "grad_norm": 1.3617134094238281, "learning_rate": 9.309500000000002e-05, "loss": 0.4748, "step": 18629 }, { "epoch": 1.0432299249636017, "grad_norm": 4.176542282104492, "learning_rate": 9.310000000000001e-05, "loss": 0.4475, "step": 18630 }, { "epoch": 1.0432859222757307, "grad_norm": 1.5613341331481934, "learning_rate": 9.310500000000001e-05, "loss": 0.472, "step": 18631 }, { "epoch": 1.0433419195878597, "grad_norm": 1.3893775939941406, "learning_rate": 9.311000000000001e-05, "loss": 0.4798, "step": 18632 }, { "epoch": 1.0433979168999887, "grad_norm": 1.4824298620224, "learning_rate": 9.3115e-05, "loss": 0.4829, "step": 18633 }, { "epoch": 1.0434539142121178, "grad_norm": 1.209849238395691, "learning_rate": 9.312e-05, "loss": 0.36, "step": 18634 }, { "epoch": 1.0435099115242468, "grad_norm": 1.2994004487991333, "learning_rate": 9.3125e-05, "loss": 0.5197, "step": 18635 }, { "epoch": 1.0435659088363758, "grad_norm": 1.2604016065597534, "learning_rate": 9.313000000000001e-05, "loss": 0.4214, "step": 18636 }, { "epoch": 1.0436219061485048, "grad_norm": 1.1662832498550415, "learning_rate": 9.313500000000001e-05, "loss": 0.3889, "step": 18637 }, { "epoch": 1.0436779034606338, "grad_norm": 1.609951376914978, "learning_rate": 9.314e-05, "loss": 0.3799, "step": 18638 }, { "epoch": 1.0437339007727628, "grad_norm": 1.2422561645507812, "learning_rate": 9.3145e-05, "loss": 0.4896, "step": 18639 }, { "epoch": 1.0437898980848919, "grad_norm": 1.5622755289077759, "learning_rate": 9.315e-05, "loss": 0.4953, "step": 18640 }, { "epoch": 1.0438458953970209, "grad_norm": 1.173612356185913, "learning_rate": 9.3155e-05, "loss": 0.4147, "step": 18641 }, { "epoch": 1.04390189270915, "grad_norm": 1.1745073795318604, "learning_rate": 9.316000000000001e-05, "loss": 0.366, "step": 18642 }, { "epoch": 1.043957890021279, "grad_norm": 1.1597683429718018, "learning_rate": 9.3165e-05, "loss": 0.3775, "step": 18643 }, { "epoch": 1.044013887333408, "grad_norm": 1.605271339416504, "learning_rate": 9.317e-05, "loss": 0.3384, "step": 18644 }, { "epoch": 1.044069884645537, "grad_norm": 1.1824640035629272, "learning_rate": 9.3175e-05, "loss": 0.5947, "step": 18645 }, { "epoch": 1.044125881957666, "grad_norm": 1.3627582788467407, "learning_rate": 9.318e-05, "loss": 0.4234, "step": 18646 }, { "epoch": 1.044181879269795, "grad_norm": 1.2604939937591553, "learning_rate": 9.3185e-05, "loss": 0.4672, "step": 18647 }, { "epoch": 1.044237876581924, "grad_norm": 1.429374098777771, "learning_rate": 9.319e-05, "loss": 0.47, "step": 18648 }, { "epoch": 1.044293873894053, "grad_norm": 1.1948559284210205, "learning_rate": 9.3195e-05, "loss": 0.3278, "step": 18649 }, { "epoch": 1.044349871206182, "grad_norm": 1.5485488176345825, "learning_rate": 9.320000000000002e-05, "loss": 0.6168, "step": 18650 }, { "epoch": 1.044405868518311, "grad_norm": 1.5370079278945923, "learning_rate": 9.320500000000001e-05, "loss": 0.3781, "step": 18651 }, { "epoch": 1.04446186583044, "grad_norm": 1.2994308471679688, "learning_rate": 9.321000000000001e-05, "loss": 0.437, "step": 18652 }, { "epoch": 1.0445178631425691, "grad_norm": 1.4331862926483154, "learning_rate": 9.321500000000001e-05, "loss": 0.5217, "step": 18653 }, { "epoch": 1.0445738604546981, "grad_norm": 1.451013445854187, "learning_rate": 9.322e-05, "loss": 0.5015, "step": 18654 }, { "epoch": 1.0446298577668272, "grad_norm": 1.409241795539856, "learning_rate": 9.3225e-05, "loss": 0.5115, "step": 18655 }, { "epoch": 1.0446858550789562, "grad_norm": 1.174866795539856, "learning_rate": 9.323e-05, "loss": 0.4376, "step": 18656 }, { "epoch": 1.0447418523910852, "grad_norm": 1.2477177381515503, "learning_rate": 9.323500000000001e-05, "loss": 0.3533, "step": 18657 }, { "epoch": 1.0447978497032142, "grad_norm": 1.4175711870193481, "learning_rate": 9.324000000000001e-05, "loss": 0.3326, "step": 18658 }, { "epoch": 1.0448538470153432, "grad_norm": 1.3794015645980835, "learning_rate": 9.3245e-05, "loss": 0.3185, "step": 18659 }, { "epoch": 1.0449098443274722, "grad_norm": 1.3675583600997925, "learning_rate": 9.325e-05, "loss": 0.3837, "step": 18660 }, { "epoch": 1.0449658416396013, "grad_norm": 1.3625034093856812, "learning_rate": 9.3255e-05, "loss": 0.5808, "step": 18661 }, { "epoch": 1.0450218389517303, "grad_norm": 2.7807505130767822, "learning_rate": 9.326e-05, "loss": 0.4191, "step": 18662 }, { "epoch": 1.0450778362638593, "grad_norm": 1.6819387674331665, "learning_rate": 9.326500000000001e-05, "loss": 0.5341, "step": 18663 }, { "epoch": 1.0451338335759883, "grad_norm": 1.4379454851150513, "learning_rate": 9.327e-05, "loss": 0.4973, "step": 18664 }, { "epoch": 1.0451898308881173, "grad_norm": 1.2826933860778809, "learning_rate": 9.3275e-05, "loss": 0.4619, "step": 18665 }, { "epoch": 1.0452458282002464, "grad_norm": 1.3095221519470215, "learning_rate": 9.328e-05, "loss": 0.451, "step": 18666 }, { "epoch": 1.0453018255123754, "grad_norm": 1.3317927122116089, "learning_rate": 9.3285e-05, "loss": 0.4667, "step": 18667 }, { "epoch": 1.0453578228245044, "grad_norm": 1.3291807174682617, "learning_rate": 9.329e-05, "loss": 0.353, "step": 18668 }, { "epoch": 1.0454138201366334, "grad_norm": 1.4625712633132935, "learning_rate": 9.3295e-05, "loss": 0.4523, "step": 18669 }, { "epoch": 1.0454698174487624, "grad_norm": 1.2807096242904663, "learning_rate": 9.33e-05, "loss": 0.451, "step": 18670 }, { "epoch": 1.0455258147608915, "grad_norm": 1.2990058660507202, "learning_rate": 9.330500000000002e-05, "loss": 0.4871, "step": 18671 }, { "epoch": 1.0455818120730205, "grad_norm": 1.5651144981384277, "learning_rate": 9.331000000000001e-05, "loss": 0.3684, "step": 18672 }, { "epoch": 1.0456378093851495, "grad_norm": 3.8242337703704834, "learning_rate": 9.331500000000001e-05, "loss": 0.3723, "step": 18673 }, { "epoch": 1.0456938066972785, "grad_norm": 1.4493603706359863, "learning_rate": 9.332000000000001e-05, "loss": 0.4018, "step": 18674 }, { "epoch": 1.0457498040094075, "grad_norm": 1.471799612045288, "learning_rate": 9.3325e-05, "loss": 0.5327, "step": 18675 }, { "epoch": 1.0458058013215366, "grad_norm": 1.222642183303833, "learning_rate": 9.333e-05, "loss": 0.3335, "step": 18676 }, { "epoch": 1.0458617986336656, "grad_norm": 1.3658721446990967, "learning_rate": 9.3335e-05, "loss": 0.4231, "step": 18677 }, { "epoch": 1.0459177959457946, "grad_norm": 1.2885686159133911, "learning_rate": 9.334000000000001e-05, "loss": 0.3942, "step": 18678 }, { "epoch": 1.0459737932579236, "grad_norm": 1.3054735660552979, "learning_rate": 9.334500000000001e-05, "loss": 0.3332, "step": 18679 }, { "epoch": 1.0460297905700526, "grad_norm": 1.490426778793335, "learning_rate": 9.335e-05, "loss": 0.4277, "step": 18680 }, { "epoch": 1.0460857878821817, "grad_norm": 1.3818128108978271, "learning_rate": 9.3355e-05, "loss": 0.3945, "step": 18681 }, { "epoch": 1.0461417851943107, "grad_norm": 1.3044003248214722, "learning_rate": 9.336e-05, "loss": 0.4266, "step": 18682 }, { "epoch": 1.0461977825064397, "grad_norm": 1.2530335187911987, "learning_rate": 9.3365e-05, "loss": 0.3952, "step": 18683 }, { "epoch": 1.0462537798185687, "grad_norm": 1.3521382808685303, "learning_rate": 9.337e-05, "loss": 0.4088, "step": 18684 }, { "epoch": 1.0463097771306977, "grad_norm": 1.6739455461502075, "learning_rate": 9.3375e-05, "loss": 0.4374, "step": 18685 }, { "epoch": 1.0463657744428267, "grad_norm": 1.3913389444351196, "learning_rate": 9.338e-05, "loss": 0.4193, "step": 18686 }, { "epoch": 1.0464217717549558, "grad_norm": 1.3583130836486816, "learning_rate": 9.3385e-05, "loss": 0.5621, "step": 18687 }, { "epoch": 1.0464777690670848, "grad_norm": 2.3243002891540527, "learning_rate": 9.339e-05, "loss": 0.4014, "step": 18688 }, { "epoch": 1.0465337663792138, "grad_norm": 1.4661026000976562, "learning_rate": 9.3395e-05, "loss": 0.4266, "step": 18689 }, { "epoch": 1.0465897636913428, "grad_norm": 1.2348803281784058, "learning_rate": 9.340000000000001e-05, "loss": 0.3395, "step": 18690 }, { "epoch": 1.0466457610034718, "grad_norm": 1.4044138193130493, "learning_rate": 9.3405e-05, "loss": 0.4421, "step": 18691 }, { "epoch": 1.0467017583156009, "grad_norm": 1.2582522630691528, "learning_rate": 9.341000000000002e-05, "loss": 0.3806, "step": 18692 }, { "epoch": 1.0467577556277299, "grad_norm": 1.372855544090271, "learning_rate": 9.341500000000001e-05, "loss": 0.4049, "step": 18693 }, { "epoch": 1.046813752939859, "grad_norm": 1.436766505241394, "learning_rate": 9.342000000000001e-05, "loss": 0.4663, "step": 18694 }, { "epoch": 1.046869750251988, "grad_norm": 1.523422360420227, "learning_rate": 9.342500000000001e-05, "loss": 0.4943, "step": 18695 }, { "epoch": 1.046925747564117, "grad_norm": 1.4499166011810303, "learning_rate": 9.343e-05, "loss": 0.5359, "step": 18696 }, { "epoch": 1.046981744876246, "grad_norm": 1.202656626701355, "learning_rate": 9.3435e-05, "loss": 0.4509, "step": 18697 }, { "epoch": 1.047037742188375, "grad_norm": 1.2682124376296997, "learning_rate": 9.344e-05, "loss": 0.4227, "step": 18698 }, { "epoch": 1.047093739500504, "grad_norm": 1.3726247549057007, "learning_rate": 9.344500000000001e-05, "loss": 0.4103, "step": 18699 }, { "epoch": 1.047149736812633, "grad_norm": 1.339306116104126, "learning_rate": 9.345000000000001e-05, "loss": 0.585, "step": 18700 }, { "epoch": 1.047205734124762, "grad_norm": 2.1495375633239746, "learning_rate": 9.3455e-05, "loss": 0.4186, "step": 18701 }, { "epoch": 1.047261731436891, "grad_norm": 1.4257663488388062, "learning_rate": 9.346e-05, "loss": 0.4874, "step": 18702 }, { "epoch": 1.04731772874902, "grad_norm": 1.301926851272583, "learning_rate": 9.3465e-05, "loss": 0.581, "step": 18703 }, { "epoch": 1.047373726061149, "grad_norm": 1.357356071472168, "learning_rate": 9.347e-05, "loss": 0.4752, "step": 18704 }, { "epoch": 1.047429723373278, "grad_norm": 1.335082769393921, "learning_rate": 9.3475e-05, "loss": 0.5403, "step": 18705 }, { "epoch": 1.0474857206854071, "grad_norm": 1.386541724205017, "learning_rate": 9.348e-05, "loss": 0.3512, "step": 18706 }, { "epoch": 1.0475417179975361, "grad_norm": 1.3624595403671265, "learning_rate": 9.3485e-05, "loss": 0.5465, "step": 18707 }, { "epoch": 1.0475977153096652, "grad_norm": 1.5704821348190308, "learning_rate": 9.349e-05, "loss": 0.4433, "step": 18708 }, { "epoch": 1.0476537126217942, "grad_norm": 1.186461091041565, "learning_rate": 9.3495e-05, "loss": 0.3858, "step": 18709 }, { "epoch": 1.0477097099339232, "grad_norm": 1.1894114017486572, "learning_rate": 9.350000000000001e-05, "loss": 0.3432, "step": 18710 }, { "epoch": 1.0477657072460522, "grad_norm": 1.700202226638794, "learning_rate": 9.350500000000001e-05, "loss": 0.482, "step": 18711 }, { "epoch": 1.0478217045581812, "grad_norm": 1.487416386604309, "learning_rate": 9.351e-05, "loss": 0.4917, "step": 18712 }, { "epoch": 1.0478777018703103, "grad_norm": 1.6582293510437012, "learning_rate": 9.3515e-05, "loss": 0.4482, "step": 18713 }, { "epoch": 1.0479336991824393, "grad_norm": 1.4676433801651, "learning_rate": 9.352000000000001e-05, "loss": 0.4053, "step": 18714 }, { "epoch": 1.0479896964945683, "grad_norm": 1.5263688564300537, "learning_rate": 9.352500000000001e-05, "loss": 0.5713, "step": 18715 }, { "epoch": 1.0480456938066973, "grad_norm": 1.2526209354400635, "learning_rate": 9.353000000000001e-05, "loss": 0.6906, "step": 18716 }, { "epoch": 1.0481016911188263, "grad_norm": 1.3255534172058105, "learning_rate": 9.3535e-05, "loss": 0.4322, "step": 18717 }, { "epoch": 1.0481576884309554, "grad_norm": 1.4175266027450562, "learning_rate": 9.354e-05, "loss": 0.3491, "step": 18718 }, { "epoch": 1.0482136857430844, "grad_norm": 1.4038658142089844, "learning_rate": 9.3545e-05, "loss": 0.3943, "step": 18719 }, { "epoch": 1.0482696830552134, "grad_norm": 1.6249943971633911, "learning_rate": 9.355000000000001e-05, "loss": 0.4448, "step": 18720 }, { "epoch": 1.0483256803673424, "grad_norm": 1.6626101732254028, "learning_rate": 9.355500000000001e-05, "loss": 0.4816, "step": 18721 }, { "epoch": 1.0483816776794714, "grad_norm": 1.1830973625183105, "learning_rate": 9.356e-05, "loss": 0.4251, "step": 18722 }, { "epoch": 1.0484376749916005, "grad_norm": 1.3155674934387207, "learning_rate": 9.3565e-05, "loss": 0.4556, "step": 18723 }, { "epoch": 1.0484936723037295, "grad_norm": 1.5291848182678223, "learning_rate": 9.357e-05, "loss": 0.5577, "step": 18724 }, { "epoch": 1.0485496696158585, "grad_norm": 10.447802543640137, "learning_rate": 9.3575e-05, "loss": 0.4634, "step": 18725 }, { "epoch": 1.0486056669279875, "grad_norm": 1.464624047279358, "learning_rate": 9.358e-05, "loss": 0.5386, "step": 18726 }, { "epoch": 1.0486616642401165, "grad_norm": 1.2470518350601196, "learning_rate": 9.3585e-05, "loss": 0.4673, "step": 18727 }, { "epoch": 1.0487176615522456, "grad_norm": 1.7545257806777954, "learning_rate": 9.359e-05, "loss": 0.4946, "step": 18728 }, { "epoch": 1.0487736588643746, "grad_norm": 1.2152029275894165, "learning_rate": 9.3595e-05, "loss": 0.3201, "step": 18729 }, { "epoch": 1.0488296561765036, "grad_norm": 1.2757127285003662, "learning_rate": 9.360000000000001e-05, "loss": 0.4564, "step": 18730 }, { "epoch": 1.0488856534886326, "grad_norm": 1.3454784154891968, "learning_rate": 9.360500000000001e-05, "loss": 0.4314, "step": 18731 }, { "epoch": 1.0489416508007616, "grad_norm": 1.4702589511871338, "learning_rate": 9.361e-05, "loss": 0.4204, "step": 18732 }, { "epoch": 1.0489976481128906, "grad_norm": 1.1373652219772339, "learning_rate": 9.3615e-05, "loss": 0.3671, "step": 18733 }, { "epoch": 1.0490536454250197, "grad_norm": 1.4210302829742432, "learning_rate": 9.362e-05, "loss": 0.419, "step": 18734 }, { "epoch": 1.0491096427371487, "grad_norm": 1.38583505153656, "learning_rate": 9.362500000000001e-05, "loss": 0.4431, "step": 18735 }, { "epoch": 1.0491656400492777, "grad_norm": 1.4215130805969238, "learning_rate": 9.363000000000001e-05, "loss": 0.5827, "step": 18736 }, { "epoch": 1.0492216373614067, "grad_norm": 1.4129101037979126, "learning_rate": 9.363500000000001e-05, "loss": 0.4041, "step": 18737 }, { "epoch": 1.0492776346735357, "grad_norm": 1.5683997869491577, "learning_rate": 9.364e-05, "loss": 0.5229, "step": 18738 }, { "epoch": 1.0493336319856648, "grad_norm": 1.083092451095581, "learning_rate": 9.3645e-05, "loss": 0.3352, "step": 18739 }, { "epoch": 1.0493896292977938, "grad_norm": 1.3357940912246704, "learning_rate": 9.365e-05, "loss": 0.5065, "step": 18740 }, { "epoch": 1.0494456266099228, "grad_norm": 1.3490136861801147, "learning_rate": 9.365500000000001e-05, "loss": 0.4825, "step": 18741 }, { "epoch": 1.0495016239220518, "grad_norm": 1.1608885526657104, "learning_rate": 9.366000000000001e-05, "loss": 0.3584, "step": 18742 }, { "epoch": 1.0495576212341808, "grad_norm": 3.1415653228759766, "learning_rate": 9.3665e-05, "loss": 0.4131, "step": 18743 }, { "epoch": 1.0496136185463099, "grad_norm": 1.2254018783569336, "learning_rate": 9.367e-05, "loss": 0.3774, "step": 18744 }, { "epoch": 1.0496696158584389, "grad_norm": 1.1179746389389038, "learning_rate": 9.3675e-05, "loss": 0.3453, "step": 18745 }, { "epoch": 1.049725613170568, "grad_norm": 1.2203619480133057, "learning_rate": 9.368e-05, "loss": 0.5304, "step": 18746 }, { "epoch": 1.049781610482697, "grad_norm": 1.4112677574157715, "learning_rate": 9.3685e-05, "loss": 0.7577, "step": 18747 }, { "epoch": 1.049837607794826, "grad_norm": 1.3277983665466309, "learning_rate": 9.369e-05, "loss": 0.3917, "step": 18748 }, { "epoch": 1.049893605106955, "grad_norm": 1.2955080270767212, "learning_rate": 9.3695e-05, "loss": 0.3897, "step": 18749 }, { "epoch": 1.049949602419084, "grad_norm": 1.3035756349563599, "learning_rate": 9.370000000000001e-05, "loss": 0.3826, "step": 18750 }, { "epoch": 1.050005599731213, "grad_norm": 1.3221250772476196, "learning_rate": 9.370500000000001e-05, "loss": 0.3784, "step": 18751 }, { "epoch": 1.050061597043342, "grad_norm": 1.2354234457015991, "learning_rate": 9.371000000000001e-05, "loss": 0.3577, "step": 18752 }, { "epoch": 1.050117594355471, "grad_norm": 1.1802183389663696, "learning_rate": 9.3715e-05, "loss": 0.4117, "step": 18753 }, { "epoch": 1.0501735916676, "grad_norm": 1.2856926918029785, "learning_rate": 9.372e-05, "loss": 0.4059, "step": 18754 }, { "epoch": 1.050229588979729, "grad_norm": 1.5393387079238892, "learning_rate": 9.3725e-05, "loss": 0.3178, "step": 18755 }, { "epoch": 1.050285586291858, "grad_norm": 1.3233131170272827, "learning_rate": 9.373000000000001e-05, "loss": 0.3248, "step": 18756 }, { "epoch": 1.050341583603987, "grad_norm": 1.6761584281921387, "learning_rate": 9.373500000000001e-05, "loss": 0.4015, "step": 18757 }, { "epoch": 1.0503975809161161, "grad_norm": 1.2219070196151733, "learning_rate": 9.374000000000001e-05, "loss": 0.6149, "step": 18758 }, { "epoch": 1.0504535782282451, "grad_norm": 1.2063491344451904, "learning_rate": 9.3745e-05, "loss": 0.3681, "step": 18759 }, { "epoch": 1.0505095755403742, "grad_norm": 1.8176213502883911, "learning_rate": 9.375e-05, "loss": 0.5237, "step": 18760 }, { "epoch": 1.0505655728525032, "grad_norm": 1.5886811017990112, "learning_rate": 9.3755e-05, "loss": 0.461, "step": 18761 }, { "epoch": 1.0506215701646322, "grad_norm": 1.32243812084198, "learning_rate": 9.376e-05, "loss": 0.4032, "step": 18762 }, { "epoch": 1.0506775674767612, "grad_norm": 1.169313669204712, "learning_rate": 9.376500000000001e-05, "loss": 0.4166, "step": 18763 }, { "epoch": 1.0507335647888902, "grad_norm": 1.393068790435791, "learning_rate": 9.377e-05, "loss": 0.4246, "step": 18764 }, { "epoch": 1.050789562101019, "grad_norm": 1.259882926940918, "learning_rate": 9.3775e-05, "loss": 0.4142, "step": 18765 }, { "epoch": 1.050845559413148, "grad_norm": 1.1903698444366455, "learning_rate": 9.378e-05, "loss": 0.3495, "step": 18766 }, { "epoch": 1.050901556725277, "grad_norm": 1.4354101419448853, "learning_rate": 9.3785e-05, "loss": 0.3947, "step": 18767 }, { "epoch": 1.050957554037406, "grad_norm": 1.1266363859176636, "learning_rate": 9.379e-05, "loss": 0.3524, "step": 18768 }, { "epoch": 1.0510135513495351, "grad_norm": 1.3983913660049438, "learning_rate": 9.3795e-05, "loss": 0.4935, "step": 18769 }, { "epoch": 1.0510695486616641, "grad_norm": 1.611491084098816, "learning_rate": 9.38e-05, "loss": 0.5854, "step": 18770 }, { "epoch": 1.0511255459737932, "grad_norm": 1.1961493492126465, "learning_rate": 9.380500000000001e-05, "loss": 0.403, "step": 18771 }, { "epoch": 1.0511815432859222, "grad_norm": 1.6286276578903198, "learning_rate": 9.381000000000001e-05, "loss": 0.3887, "step": 18772 }, { "epoch": 1.0512375405980512, "grad_norm": 1.3481632471084595, "learning_rate": 9.381500000000001e-05, "loss": 0.3905, "step": 18773 }, { "epoch": 1.0512935379101802, "grad_norm": 1.800219178199768, "learning_rate": 9.382e-05, "loss": 0.6068, "step": 18774 }, { "epoch": 1.0513495352223092, "grad_norm": 1.3384664058685303, "learning_rate": 9.3825e-05, "loss": 0.3587, "step": 18775 }, { "epoch": 1.0514055325344382, "grad_norm": 1.3007813692092896, "learning_rate": 9.383e-05, "loss": 0.3219, "step": 18776 }, { "epoch": 1.0514615298465673, "grad_norm": 1.9404252767562866, "learning_rate": 9.383500000000001e-05, "loss": 0.535, "step": 18777 }, { "epoch": 1.0515175271586963, "grad_norm": 1.2272975444793701, "learning_rate": 9.384000000000001e-05, "loss": 0.432, "step": 18778 }, { "epoch": 1.0515735244708253, "grad_norm": 1.2873376607894897, "learning_rate": 9.384500000000001e-05, "loss": 0.3672, "step": 18779 }, { "epoch": 1.0516295217829543, "grad_norm": 1.1425484418869019, "learning_rate": 9.385e-05, "loss": 0.3483, "step": 18780 }, { "epoch": 1.0516855190950833, "grad_norm": 1.2005585432052612, "learning_rate": 9.3855e-05, "loss": 0.4503, "step": 18781 }, { "epoch": 1.0517415164072124, "grad_norm": 1.5329253673553467, "learning_rate": 9.386e-05, "loss": 0.565, "step": 18782 }, { "epoch": 1.0517975137193414, "grad_norm": 1.3012638092041016, "learning_rate": 9.3865e-05, "loss": 0.4552, "step": 18783 }, { "epoch": 1.0518535110314704, "grad_norm": 1.4215017557144165, "learning_rate": 9.387000000000001e-05, "loss": 0.385, "step": 18784 }, { "epoch": 1.0519095083435994, "grad_norm": 1.4076894521713257, "learning_rate": 9.3875e-05, "loss": 0.4185, "step": 18785 }, { "epoch": 1.0519655056557284, "grad_norm": 1.6196259260177612, "learning_rate": 9.388e-05, "loss": 0.4357, "step": 18786 }, { "epoch": 1.0520215029678575, "grad_norm": 1.3829759359359741, "learning_rate": 9.3885e-05, "loss": 0.5274, "step": 18787 }, { "epoch": 1.0520775002799865, "grad_norm": 1.3827085494995117, "learning_rate": 9.389e-05, "loss": 0.3359, "step": 18788 }, { "epoch": 1.0521334975921155, "grad_norm": 1.3541009426116943, "learning_rate": 9.3895e-05, "loss": 0.3799, "step": 18789 }, { "epoch": 1.0521894949042445, "grad_norm": 7.985986232757568, "learning_rate": 9.39e-05, "loss": 0.478, "step": 18790 }, { "epoch": 1.0522454922163735, "grad_norm": 1.4385887384414673, "learning_rate": 9.3905e-05, "loss": 0.495, "step": 18791 }, { "epoch": 1.0523014895285026, "grad_norm": 1.43264639377594, "learning_rate": 9.391000000000001e-05, "loss": 0.492, "step": 18792 }, { "epoch": 1.0523574868406316, "grad_norm": 1.3586641550064087, "learning_rate": 9.391500000000001e-05, "loss": 0.4638, "step": 18793 }, { "epoch": 1.0524134841527606, "grad_norm": 1.3236429691314697, "learning_rate": 9.392000000000001e-05, "loss": 0.4102, "step": 18794 }, { "epoch": 1.0524694814648896, "grad_norm": 1.5813895463943481, "learning_rate": 9.3925e-05, "loss": 0.7058, "step": 18795 }, { "epoch": 1.0525254787770186, "grad_norm": 1.8021186590194702, "learning_rate": 9.393e-05, "loss": 0.3773, "step": 18796 }, { "epoch": 1.0525814760891476, "grad_norm": 1.561477780342102, "learning_rate": 9.3935e-05, "loss": 0.4236, "step": 18797 }, { "epoch": 1.0526374734012767, "grad_norm": 1.3825465440750122, "learning_rate": 9.394000000000001e-05, "loss": 0.4629, "step": 18798 }, { "epoch": 1.0526934707134057, "grad_norm": 2.319859504699707, "learning_rate": 9.394500000000001e-05, "loss": 0.3467, "step": 18799 }, { "epoch": 1.0527494680255347, "grad_norm": 1.4497575759887695, "learning_rate": 9.395000000000001e-05, "loss": 0.3631, "step": 18800 }, { "epoch": 1.0528054653376637, "grad_norm": 1.0492933988571167, "learning_rate": 9.3955e-05, "loss": 0.2956, "step": 18801 }, { "epoch": 1.0528614626497927, "grad_norm": 1.265881061553955, "learning_rate": 9.396e-05, "loss": 0.463, "step": 18802 }, { "epoch": 1.0529174599619218, "grad_norm": 1.474715232849121, "learning_rate": 9.3965e-05, "loss": 0.5241, "step": 18803 }, { "epoch": 1.0529734572740508, "grad_norm": 1.3244526386260986, "learning_rate": 9.397e-05, "loss": 0.3758, "step": 18804 }, { "epoch": 1.0530294545861798, "grad_norm": 1.293935775756836, "learning_rate": 9.397500000000001e-05, "loss": 0.3872, "step": 18805 }, { "epoch": 1.0530854518983088, "grad_norm": 1.3484982252120972, "learning_rate": 9.398e-05, "loss": 0.4226, "step": 18806 }, { "epoch": 1.0531414492104378, "grad_norm": 1.3209012746810913, "learning_rate": 9.3985e-05, "loss": 0.4594, "step": 18807 }, { "epoch": 1.0531974465225669, "grad_norm": 1.4305565357208252, "learning_rate": 9.399e-05, "loss": 0.4233, "step": 18808 }, { "epoch": 1.0532534438346959, "grad_norm": 1.4356799125671387, "learning_rate": 9.3995e-05, "loss": 0.4133, "step": 18809 }, { "epoch": 1.053309441146825, "grad_norm": 1.3230232000350952, "learning_rate": 9.4e-05, "loss": 0.4562, "step": 18810 }, { "epoch": 1.053365438458954, "grad_norm": 1.3221404552459717, "learning_rate": 9.4005e-05, "loss": 0.3312, "step": 18811 }, { "epoch": 1.053421435771083, "grad_norm": 1.6068238019943237, "learning_rate": 9.401e-05, "loss": 0.5273, "step": 18812 }, { "epoch": 1.053477433083212, "grad_norm": 1.308494210243225, "learning_rate": 9.401500000000001e-05, "loss": 0.5055, "step": 18813 }, { "epoch": 1.053533430395341, "grad_norm": 1.3897660970687866, "learning_rate": 9.402000000000001e-05, "loss": 0.4352, "step": 18814 }, { "epoch": 1.05358942770747, "grad_norm": 1.3756804466247559, "learning_rate": 9.402500000000001e-05, "loss": 0.5146, "step": 18815 }, { "epoch": 1.053645425019599, "grad_norm": 1.3395336866378784, "learning_rate": 9.403e-05, "loss": 0.5114, "step": 18816 }, { "epoch": 1.053701422331728, "grad_norm": 1.4863812923431396, "learning_rate": 9.4035e-05, "loss": 0.4387, "step": 18817 }, { "epoch": 1.053757419643857, "grad_norm": 1.3136694431304932, "learning_rate": 9.404e-05, "loss": 0.5058, "step": 18818 }, { "epoch": 1.053813416955986, "grad_norm": 1.463541865348816, "learning_rate": 9.404500000000001e-05, "loss": 0.372, "step": 18819 }, { "epoch": 1.053869414268115, "grad_norm": 1.429172158241272, "learning_rate": 9.405000000000001e-05, "loss": 0.5003, "step": 18820 }, { "epoch": 1.053925411580244, "grad_norm": 1.1957285404205322, "learning_rate": 9.4055e-05, "loss": 0.4128, "step": 18821 }, { "epoch": 1.0539814088923731, "grad_norm": 1.3483213186264038, "learning_rate": 9.406e-05, "loss": 0.4783, "step": 18822 }, { "epoch": 1.0540374062045021, "grad_norm": 1.742538332939148, "learning_rate": 9.4065e-05, "loss": 0.4073, "step": 18823 }, { "epoch": 1.0540934035166312, "grad_norm": 1.7641878128051758, "learning_rate": 9.407e-05, "loss": 0.4633, "step": 18824 }, { "epoch": 1.0541494008287602, "grad_norm": 1.4936950206756592, "learning_rate": 9.4075e-05, "loss": 0.5027, "step": 18825 }, { "epoch": 1.0542053981408892, "grad_norm": 1.5363181829452515, "learning_rate": 9.408000000000001e-05, "loss": 0.3967, "step": 18826 }, { "epoch": 1.0542613954530182, "grad_norm": 1.4092928171157837, "learning_rate": 9.4085e-05, "loss": 0.4312, "step": 18827 }, { "epoch": 1.0543173927651472, "grad_norm": 1.1858136653900146, "learning_rate": 9.409e-05, "loss": 0.5055, "step": 18828 }, { "epoch": 1.0543733900772763, "grad_norm": 1.7902591228485107, "learning_rate": 9.4095e-05, "loss": 0.479, "step": 18829 }, { "epoch": 1.0544293873894053, "grad_norm": 1.6157180070877075, "learning_rate": 9.41e-05, "loss": 0.369, "step": 18830 }, { "epoch": 1.0544853847015343, "grad_norm": 1.1800177097320557, "learning_rate": 9.410500000000001e-05, "loss": 0.332, "step": 18831 }, { "epoch": 1.0545413820136633, "grad_norm": 1.1747642755508423, "learning_rate": 9.411e-05, "loss": 0.4682, "step": 18832 }, { "epoch": 1.0545973793257923, "grad_norm": 1.514975905418396, "learning_rate": 9.4115e-05, "loss": 0.4393, "step": 18833 }, { "epoch": 1.0546533766379214, "grad_norm": 1.2441729307174683, "learning_rate": 9.412000000000001e-05, "loss": 0.4483, "step": 18834 }, { "epoch": 1.0547093739500504, "grad_norm": 1.2949442863464355, "learning_rate": 9.412500000000001e-05, "loss": 0.4996, "step": 18835 }, { "epoch": 1.0547653712621794, "grad_norm": 1.818418264389038, "learning_rate": 9.413000000000001e-05, "loss": 0.5126, "step": 18836 }, { "epoch": 1.0548213685743084, "grad_norm": 1.7463511228561401, "learning_rate": 9.4135e-05, "loss": 0.5482, "step": 18837 }, { "epoch": 1.0548773658864374, "grad_norm": 1.280035376548767, "learning_rate": 9.414e-05, "loss": 0.4009, "step": 18838 }, { "epoch": 1.0549333631985665, "grad_norm": 1.5921680927276611, "learning_rate": 9.4145e-05, "loss": 0.6184, "step": 18839 }, { "epoch": 1.0549893605106955, "grad_norm": 1.3982776403427124, "learning_rate": 9.415e-05, "loss": 0.3379, "step": 18840 }, { "epoch": 1.0550453578228245, "grad_norm": 1.1702890396118164, "learning_rate": 9.415500000000001e-05, "loss": 0.3119, "step": 18841 }, { "epoch": 1.0551013551349535, "grad_norm": 1.1738208532333374, "learning_rate": 9.416e-05, "loss": 0.3463, "step": 18842 }, { "epoch": 1.0551573524470825, "grad_norm": 1.2477097511291504, "learning_rate": 9.4165e-05, "loss": 0.4123, "step": 18843 }, { "epoch": 1.0552133497592115, "grad_norm": 1.3173540830612183, "learning_rate": 9.417e-05, "loss": 0.4082, "step": 18844 }, { "epoch": 1.0552693470713406, "grad_norm": 1.4024620056152344, "learning_rate": 9.4175e-05, "loss": 0.498, "step": 18845 }, { "epoch": 1.0553253443834696, "grad_norm": 1.6407623291015625, "learning_rate": 9.418e-05, "loss": 0.5054, "step": 18846 }, { "epoch": 1.0553813416955986, "grad_norm": 1.3435959815979004, "learning_rate": 9.418500000000001e-05, "loss": 0.5827, "step": 18847 }, { "epoch": 1.0554373390077276, "grad_norm": 1.2907919883728027, "learning_rate": 9.419e-05, "loss": 0.4695, "step": 18848 }, { "epoch": 1.0554933363198566, "grad_norm": 1.2824134826660156, "learning_rate": 9.4195e-05, "loss": 0.4491, "step": 18849 }, { "epoch": 1.0555493336319857, "grad_norm": 1.4175795316696167, "learning_rate": 9.42e-05, "loss": 0.4447, "step": 18850 }, { "epoch": 1.0556053309441147, "grad_norm": 2.4582059383392334, "learning_rate": 9.420500000000001e-05, "loss": 0.4851, "step": 18851 }, { "epoch": 1.0556613282562437, "grad_norm": 1.510203242301941, "learning_rate": 9.421000000000001e-05, "loss": 0.4234, "step": 18852 }, { "epoch": 1.0557173255683727, "grad_norm": 1.3534705638885498, "learning_rate": 9.4215e-05, "loss": 0.4249, "step": 18853 }, { "epoch": 1.0557733228805017, "grad_norm": 1.524954915046692, "learning_rate": 9.422e-05, "loss": 0.5413, "step": 18854 }, { "epoch": 1.0558293201926308, "grad_norm": 1.2934017181396484, "learning_rate": 9.422500000000001e-05, "loss": 0.4422, "step": 18855 }, { "epoch": 1.0558853175047598, "grad_norm": 1.3412871360778809, "learning_rate": 9.423000000000001e-05, "loss": 0.4235, "step": 18856 }, { "epoch": 1.0559413148168888, "grad_norm": 1.5189380645751953, "learning_rate": 9.423500000000001e-05, "loss": 0.4656, "step": 18857 }, { "epoch": 1.0559973121290178, "grad_norm": 1.3191028833389282, "learning_rate": 9.424e-05, "loss": 0.4504, "step": 18858 }, { "epoch": 1.0560533094411468, "grad_norm": 1.3765827417373657, "learning_rate": 9.4245e-05, "loss": 0.4747, "step": 18859 }, { "epoch": 1.0561093067532759, "grad_norm": 1.6227412223815918, "learning_rate": 9.425e-05, "loss": 0.488, "step": 18860 }, { "epoch": 1.0561653040654049, "grad_norm": 2.1638755798339844, "learning_rate": 9.4255e-05, "loss": 0.4988, "step": 18861 }, { "epoch": 1.056221301377534, "grad_norm": 1.5395900011062622, "learning_rate": 9.426000000000001e-05, "loss": 0.594, "step": 18862 }, { "epoch": 1.056277298689663, "grad_norm": 1.609810709953308, "learning_rate": 9.4265e-05, "loss": 0.4513, "step": 18863 }, { "epoch": 1.056333296001792, "grad_norm": 1.3589756488800049, "learning_rate": 9.427e-05, "loss": 0.4122, "step": 18864 }, { "epoch": 1.056389293313921, "grad_norm": 1.2109688520431519, "learning_rate": 9.4275e-05, "loss": 0.3998, "step": 18865 }, { "epoch": 1.05644529062605, "grad_norm": 1.3328644037246704, "learning_rate": 9.428e-05, "loss": 0.447, "step": 18866 }, { "epoch": 1.056501287938179, "grad_norm": 1.28719961643219, "learning_rate": 9.4285e-05, "loss": 0.4122, "step": 18867 }, { "epoch": 1.056557285250308, "grad_norm": 1.3460605144500732, "learning_rate": 9.429000000000001e-05, "loss": 0.4158, "step": 18868 }, { "epoch": 1.056613282562437, "grad_norm": 1.3511147499084473, "learning_rate": 9.4295e-05, "loss": 0.4722, "step": 18869 }, { "epoch": 1.056669279874566, "grad_norm": 1.66860830783844, "learning_rate": 9.43e-05, "loss": 0.4749, "step": 18870 }, { "epoch": 1.056725277186695, "grad_norm": 1.2137311697006226, "learning_rate": 9.430500000000001e-05, "loss": 0.3863, "step": 18871 }, { "epoch": 1.056781274498824, "grad_norm": 1.441867709159851, "learning_rate": 9.431000000000001e-05, "loss": 0.5332, "step": 18872 }, { "epoch": 1.056837271810953, "grad_norm": 1.3843780755996704, "learning_rate": 9.431500000000001e-05, "loss": 0.3801, "step": 18873 }, { "epoch": 1.0568932691230821, "grad_norm": 1.5861767530441284, "learning_rate": 9.432e-05, "loss": 0.478, "step": 18874 }, { "epoch": 1.0569492664352111, "grad_norm": 1.8049677610397339, "learning_rate": 9.4325e-05, "loss": 0.5031, "step": 18875 }, { "epoch": 1.0570052637473402, "grad_norm": 1.302350640296936, "learning_rate": 9.433000000000001e-05, "loss": 0.5332, "step": 18876 }, { "epoch": 1.0570612610594692, "grad_norm": 1.555769920349121, "learning_rate": 9.433500000000001e-05, "loss": 0.4586, "step": 18877 }, { "epoch": 1.0571172583715982, "grad_norm": 1.334062933921814, "learning_rate": 9.434000000000001e-05, "loss": 0.4274, "step": 18878 }, { "epoch": 1.0571732556837272, "grad_norm": 1.4169632196426392, "learning_rate": 9.4345e-05, "loss": 0.4329, "step": 18879 }, { "epoch": 1.0572292529958562, "grad_norm": 1.3843140602111816, "learning_rate": 9.435e-05, "loss": 0.4196, "step": 18880 }, { "epoch": 1.0572852503079853, "grad_norm": 1.5343679189682007, "learning_rate": 9.4355e-05, "loss": 0.434, "step": 18881 }, { "epoch": 1.0573412476201143, "grad_norm": 1.3447123765945435, "learning_rate": 9.436e-05, "loss": 0.4374, "step": 18882 }, { "epoch": 1.0573972449322433, "grad_norm": 1.2755141258239746, "learning_rate": 9.436500000000001e-05, "loss": 0.497, "step": 18883 }, { "epoch": 1.0574532422443723, "grad_norm": 1.4663509130477905, "learning_rate": 9.437e-05, "loss": 0.449, "step": 18884 }, { "epoch": 1.0575092395565013, "grad_norm": 1.2889498472213745, "learning_rate": 9.4375e-05, "loss": 0.4059, "step": 18885 }, { "epoch": 1.0575652368686304, "grad_norm": 1.7589125633239746, "learning_rate": 9.438e-05, "loss": 0.3877, "step": 18886 }, { "epoch": 1.0576212341807594, "grad_norm": 1.254963994026184, "learning_rate": 9.4385e-05, "loss": 0.3096, "step": 18887 }, { "epoch": 1.0576772314928884, "grad_norm": 1.3054019212722778, "learning_rate": 9.439e-05, "loss": 0.3835, "step": 18888 }, { "epoch": 1.0577332288050174, "grad_norm": 1.5005621910095215, "learning_rate": 9.439500000000001e-05, "loss": 0.4646, "step": 18889 }, { "epoch": 1.0577892261171464, "grad_norm": 1.1724367141723633, "learning_rate": 9.44e-05, "loss": 0.4213, "step": 18890 }, { "epoch": 1.0578452234292754, "grad_norm": 1.463555097579956, "learning_rate": 9.4405e-05, "loss": 0.4219, "step": 18891 }, { "epoch": 1.0579012207414045, "grad_norm": 1.779472827911377, "learning_rate": 9.441000000000001e-05, "loss": 0.3379, "step": 18892 }, { "epoch": 1.0579572180535335, "grad_norm": 1.2690653800964355, "learning_rate": 9.441500000000001e-05, "loss": 0.3729, "step": 18893 }, { "epoch": 1.0580132153656625, "grad_norm": 1.3225834369659424, "learning_rate": 9.442000000000001e-05, "loss": 0.4082, "step": 18894 }, { "epoch": 1.0580692126777915, "grad_norm": 1.572891354560852, "learning_rate": 9.4425e-05, "loss": 0.3969, "step": 18895 }, { "epoch": 1.0581252099899205, "grad_norm": 1.2776237726211548, "learning_rate": 9.443e-05, "loss": 0.4541, "step": 18896 }, { "epoch": 1.0581812073020496, "grad_norm": 1.635740041732788, "learning_rate": 9.443500000000001e-05, "loss": 0.4054, "step": 18897 }, { "epoch": 1.0582372046141786, "grad_norm": 1.3353666067123413, "learning_rate": 9.444000000000001e-05, "loss": 0.4354, "step": 18898 }, { "epoch": 1.0582932019263076, "grad_norm": 1.283207654953003, "learning_rate": 9.444500000000001e-05, "loss": 0.4254, "step": 18899 }, { "epoch": 1.0583491992384366, "grad_norm": 1.3528674840927124, "learning_rate": 9.445e-05, "loss": 0.3926, "step": 18900 }, { "epoch": 1.0584051965505656, "grad_norm": 1.2042877674102783, "learning_rate": 9.4455e-05, "loss": 0.4369, "step": 18901 }, { "epoch": 1.0584611938626947, "grad_norm": 2.829955816268921, "learning_rate": 9.446e-05, "loss": 0.4321, "step": 18902 }, { "epoch": 1.0585171911748237, "grad_norm": 1.424607515335083, "learning_rate": 9.4465e-05, "loss": 0.3693, "step": 18903 }, { "epoch": 1.0585731884869527, "grad_norm": 1.0847461223602295, "learning_rate": 9.447000000000001e-05, "loss": 0.3607, "step": 18904 }, { "epoch": 1.0586291857990817, "grad_norm": 1.390052080154419, "learning_rate": 9.4475e-05, "loss": 0.4118, "step": 18905 }, { "epoch": 1.0586851831112107, "grad_norm": 1.8173706531524658, "learning_rate": 9.448e-05, "loss": 0.5275, "step": 18906 }, { "epoch": 1.0587411804233398, "grad_norm": 1.606350302696228, "learning_rate": 9.4485e-05, "loss": 0.3748, "step": 18907 }, { "epoch": 1.0587971777354688, "grad_norm": 1.3614684343338013, "learning_rate": 9.449e-05, "loss": 0.384, "step": 18908 }, { "epoch": 1.0588531750475978, "grad_norm": 1.325899600982666, "learning_rate": 9.4495e-05, "loss": 0.3822, "step": 18909 }, { "epoch": 1.0589091723597268, "grad_norm": 1.5942211151123047, "learning_rate": 9.449999999999999e-05, "loss": 0.3482, "step": 18910 }, { "epoch": 1.0589651696718558, "grad_norm": 1.237679362297058, "learning_rate": 9.4505e-05, "loss": 0.3954, "step": 18911 }, { "epoch": 1.0590211669839849, "grad_norm": 1.4813698530197144, "learning_rate": 9.451000000000002e-05, "loss": 0.4544, "step": 18912 }, { "epoch": 1.0590771642961139, "grad_norm": 1.36385977268219, "learning_rate": 9.451500000000001e-05, "loss": 0.4298, "step": 18913 }, { "epoch": 1.059133161608243, "grad_norm": 1.14071786403656, "learning_rate": 9.452000000000001e-05, "loss": 0.4289, "step": 18914 }, { "epoch": 1.059189158920372, "grad_norm": 1.6904393434524536, "learning_rate": 9.452500000000001e-05, "loss": 0.4813, "step": 18915 }, { "epoch": 1.059245156232501, "grad_norm": 1.3985432386398315, "learning_rate": 9.453e-05, "loss": 0.3883, "step": 18916 }, { "epoch": 1.05930115354463, "grad_norm": 1.5503175258636475, "learning_rate": 9.4535e-05, "loss": 0.3758, "step": 18917 }, { "epoch": 1.059357150856759, "grad_norm": 1.3641659021377563, "learning_rate": 9.454000000000001e-05, "loss": 0.4202, "step": 18918 }, { "epoch": 1.059413148168888, "grad_norm": 1.790743350982666, "learning_rate": 9.454500000000001e-05, "loss": 0.3753, "step": 18919 }, { "epoch": 1.059469145481017, "grad_norm": 1.3057456016540527, "learning_rate": 9.455000000000001e-05, "loss": 0.4931, "step": 18920 }, { "epoch": 1.059525142793146, "grad_norm": 1.4196068048477173, "learning_rate": 9.4555e-05, "loss": 0.399, "step": 18921 }, { "epoch": 1.059581140105275, "grad_norm": 1.8194833993911743, "learning_rate": 9.456e-05, "loss": 0.4895, "step": 18922 }, { "epoch": 1.059637137417404, "grad_norm": 1.3155800104141235, "learning_rate": 9.4565e-05, "loss": 0.4106, "step": 18923 }, { "epoch": 1.059693134729533, "grad_norm": 1.8820222616195679, "learning_rate": 9.457e-05, "loss": 0.4736, "step": 18924 }, { "epoch": 1.059749132041662, "grad_norm": 1.1942620277404785, "learning_rate": 9.457500000000001e-05, "loss": 0.3874, "step": 18925 }, { "epoch": 1.0598051293537911, "grad_norm": 1.4331386089324951, "learning_rate": 9.458e-05, "loss": 0.5039, "step": 18926 }, { "epoch": 1.0598611266659201, "grad_norm": 1.2756425142288208, "learning_rate": 9.4585e-05, "loss": 0.5379, "step": 18927 }, { "epoch": 1.0599171239780492, "grad_norm": 1.3890256881713867, "learning_rate": 9.459e-05, "loss": 0.3719, "step": 18928 }, { "epoch": 1.0599731212901782, "grad_norm": 1.3086320161819458, "learning_rate": 9.4595e-05, "loss": 0.5043, "step": 18929 }, { "epoch": 1.0600291186023072, "grad_norm": 1.5591317415237427, "learning_rate": 9.46e-05, "loss": 0.3195, "step": 18930 }, { "epoch": 1.0600851159144362, "grad_norm": 1.288242220878601, "learning_rate": 9.460499999999999e-05, "loss": 0.3377, "step": 18931 }, { "epoch": 1.0601411132265652, "grad_norm": 1.243438720703125, "learning_rate": 9.461e-05, "loss": 0.3815, "step": 18932 }, { "epoch": 1.0601971105386943, "grad_norm": 1.3783605098724365, "learning_rate": 9.461500000000001e-05, "loss": 0.3725, "step": 18933 }, { "epoch": 1.0602531078508233, "grad_norm": 1.2204675674438477, "learning_rate": 9.462000000000001e-05, "loss": 0.4538, "step": 18934 }, { "epoch": 1.0603091051629523, "grad_norm": 1.437117099761963, "learning_rate": 9.462500000000001e-05, "loss": 0.4731, "step": 18935 }, { "epoch": 1.060365102475081, "grad_norm": 1.4627983570098877, "learning_rate": 9.463000000000001e-05, "loss": 0.4642, "step": 18936 }, { "epoch": 1.06042109978721, "grad_norm": 1.3718695640563965, "learning_rate": 9.4635e-05, "loss": 0.5397, "step": 18937 }, { "epoch": 1.0604770970993391, "grad_norm": 1.5296236276626587, "learning_rate": 9.464e-05, "loss": 0.4356, "step": 18938 }, { "epoch": 1.0605330944114681, "grad_norm": 1.440040111541748, "learning_rate": 9.4645e-05, "loss": 0.5243, "step": 18939 }, { "epoch": 1.0605890917235972, "grad_norm": 21.09760284423828, "learning_rate": 9.465000000000001e-05, "loss": 0.3917, "step": 18940 }, { "epoch": 1.0606450890357262, "grad_norm": 1.457410216331482, "learning_rate": 9.465500000000001e-05, "loss": 0.3693, "step": 18941 }, { "epoch": 1.0607010863478552, "grad_norm": 1.5546989440917969, "learning_rate": 9.466e-05, "loss": 0.4964, "step": 18942 }, { "epoch": 1.0607570836599842, "grad_norm": 1.2661175727844238, "learning_rate": 9.4665e-05, "loss": 0.4675, "step": 18943 }, { "epoch": 1.0608130809721132, "grad_norm": 1.2626347541809082, "learning_rate": 9.467e-05, "loss": 0.3433, "step": 18944 }, { "epoch": 1.0608690782842423, "grad_norm": 1.317460298538208, "learning_rate": 9.4675e-05, "loss": 0.35, "step": 18945 }, { "epoch": 1.0609250755963713, "grad_norm": 1.5489500761032104, "learning_rate": 9.468000000000001e-05, "loss": 0.4688, "step": 18946 }, { "epoch": 1.0609810729085003, "grad_norm": 1.44960618019104, "learning_rate": 9.4685e-05, "loss": 0.4104, "step": 18947 }, { "epoch": 1.0610370702206293, "grad_norm": 1.649051308631897, "learning_rate": 9.469e-05, "loss": 0.3846, "step": 18948 }, { "epoch": 1.0610930675327583, "grad_norm": 1.4297840595245361, "learning_rate": 9.4695e-05, "loss": 0.4522, "step": 18949 }, { "epoch": 1.0611490648448874, "grad_norm": 1.7101448774337769, "learning_rate": 9.47e-05, "loss": 0.4883, "step": 18950 }, { "epoch": 1.0612050621570164, "grad_norm": 1.4615607261657715, "learning_rate": 9.4705e-05, "loss": 0.3946, "step": 18951 }, { "epoch": 1.0612610594691454, "grad_norm": 1.315509557723999, "learning_rate": 9.471e-05, "loss": 0.491, "step": 18952 }, { "epoch": 1.0613170567812744, "grad_norm": 1.490499496459961, "learning_rate": 9.4715e-05, "loss": 0.4898, "step": 18953 }, { "epoch": 1.0613730540934034, "grad_norm": 1.748432993888855, "learning_rate": 9.472000000000001e-05, "loss": 0.5113, "step": 18954 }, { "epoch": 1.0614290514055325, "grad_norm": 1.2763144969940186, "learning_rate": 9.472500000000001e-05, "loss": 0.4567, "step": 18955 }, { "epoch": 1.0614850487176615, "grad_norm": 1.5092469453811646, "learning_rate": 9.473000000000001e-05, "loss": 0.4869, "step": 18956 }, { "epoch": 1.0615410460297905, "grad_norm": 1.5785478353500366, "learning_rate": 9.473500000000001e-05, "loss": 0.5633, "step": 18957 }, { "epoch": 1.0615970433419195, "grad_norm": 1.1675353050231934, "learning_rate": 9.474e-05, "loss": 0.42, "step": 18958 }, { "epoch": 1.0616530406540485, "grad_norm": 1.4426264762878418, "learning_rate": 9.4745e-05, "loss": 0.5753, "step": 18959 }, { "epoch": 1.0617090379661775, "grad_norm": 2.0994317531585693, "learning_rate": 9.475e-05, "loss": 0.4554, "step": 18960 }, { "epoch": 1.0617650352783066, "grad_norm": 1.3798431158065796, "learning_rate": 9.475500000000001e-05, "loss": 0.3845, "step": 18961 }, { "epoch": 1.0618210325904356, "grad_norm": 1.6509004831314087, "learning_rate": 9.476000000000001e-05, "loss": 0.5474, "step": 18962 }, { "epoch": 1.0618770299025646, "grad_norm": 1.4672154188156128, "learning_rate": 9.4765e-05, "loss": 0.5159, "step": 18963 }, { "epoch": 1.0619330272146936, "grad_norm": 1.2632747888565063, "learning_rate": 9.477e-05, "loss": 0.4353, "step": 18964 }, { "epoch": 1.0619890245268226, "grad_norm": 1.3404821157455444, "learning_rate": 9.4775e-05, "loss": 0.3383, "step": 18965 }, { "epoch": 1.0620450218389517, "grad_norm": 1.3777574300765991, "learning_rate": 9.478e-05, "loss": 0.5561, "step": 18966 }, { "epoch": 1.0621010191510807, "grad_norm": 1.2554987668991089, "learning_rate": 9.478500000000001e-05, "loss": 0.3572, "step": 18967 }, { "epoch": 1.0621570164632097, "grad_norm": 1.453681468963623, "learning_rate": 9.479e-05, "loss": 0.3301, "step": 18968 }, { "epoch": 1.0622130137753387, "grad_norm": 1.1490294933319092, "learning_rate": 9.4795e-05, "loss": 0.3352, "step": 18969 }, { "epoch": 1.0622690110874677, "grad_norm": 2.009647846221924, "learning_rate": 9.48e-05, "loss": 0.6631, "step": 18970 }, { "epoch": 1.0623250083995968, "grad_norm": 1.4922118186950684, "learning_rate": 9.4805e-05, "loss": 0.5982, "step": 18971 }, { "epoch": 1.0623810057117258, "grad_norm": 1.34416663646698, "learning_rate": 9.481000000000001e-05, "loss": 0.4867, "step": 18972 }, { "epoch": 1.0624370030238548, "grad_norm": 1.2582811117172241, "learning_rate": 9.4815e-05, "loss": 0.3392, "step": 18973 }, { "epoch": 1.0624930003359838, "grad_norm": 1.1551052331924438, "learning_rate": 9.482e-05, "loss": 0.3629, "step": 18974 }, { "epoch": 1.0625489976481128, "grad_norm": 1.3015782833099365, "learning_rate": 9.482500000000001e-05, "loss": 0.3792, "step": 18975 }, { "epoch": 1.0626049949602419, "grad_norm": 1.3310070037841797, "learning_rate": 9.483000000000001e-05, "loss": 0.3958, "step": 18976 }, { "epoch": 1.0626609922723709, "grad_norm": 1.3582322597503662, "learning_rate": 9.483500000000001e-05, "loss": 0.3922, "step": 18977 }, { "epoch": 1.0627169895845, "grad_norm": 1.480780005455017, "learning_rate": 9.484e-05, "loss": 0.3405, "step": 18978 }, { "epoch": 1.062772986896629, "grad_norm": 1.2930588722229004, "learning_rate": 9.4845e-05, "loss": 0.4571, "step": 18979 }, { "epoch": 1.062828984208758, "grad_norm": 1.5896929502487183, "learning_rate": 9.485e-05, "loss": 0.5357, "step": 18980 }, { "epoch": 1.062884981520887, "grad_norm": 1.4487299919128418, "learning_rate": 9.4855e-05, "loss": 0.5018, "step": 18981 }, { "epoch": 1.062940978833016, "grad_norm": 1.6484873294830322, "learning_rate": 9.486000000000001e-05, "loss": 0.6373, "step": 18982 }, { "epoch": 1.062996976145145, "grad_norm": 1.294859766960144, "learning_rate": 9.486500000000001e-05, "loss": 0.3981, "step": 18983 }, { "epoch": 1.063052973457274, "grad_norm": 1.2828603982925415, "learning_rate": 9.487e-05, "loss": 0.5343, "step": 18984 }, { "epoch": 1.063108970769403, "grad_norm": 1.579521656036377, "learning_rate": 9.4875e-05, "loss": 0.4794, "step": 18985 }, { "epoch": 1.063164968081532, "grad_norm": 5.587820529937744, "learning_rate": 9.488e-05, "loss": 0.3993, "step": 18986 }, { "epoch": 1.063220965393661, "grad_norm": 1.2712122201919556, "learning_rate": 9.4885e-05, "loss": 0.3959, "step": 18987 }, { "epoch": 1.06327696270579, "grad_norm": 1.1356114149093628, "learning_rate": 9.489e-05, "loss": 0.4589, "step": 18988 }, { "epoch": 1.063332960017919, "grad_norm": 2.2960779666900635, "learning_rate": 9.4895e-05, "loss": 0.2882, "step": 18989 }, { "epoch": 1.0633889573300481, "grad_norm": 1.2106236219406128, "learning_rate": 9.49e-05, "loss": 0.3994, "step": 18990 }, { "epoch": 1.0634449546421771, "grad_norm": 1.1267436742782593, "learning_rate": 9.4905e-05, "loss": 0.3675, "step": 18991 }, { "epoch": 1.0635009519543062, "grad_norm": 1.2601475715637207, "learning_rate": 9.491000000000001e-05, "loss": 0.3452, "step": 18992 }, { "epoch": 1.0635569492664352, "grad_norm": 1.4580140113830566, "learning_rate": 9.491500000000001e-05, "loss": 0.4447, "step": 18993 }, { "epoch": 1.0636129465785642, "grad_norm": 1.2755945920944214, "learning_rate": 9.492e-05, "loss": 0.4647, "step": 18994 }, { "epoch": 1.0636689438906932, "grad_norm": 1.2138370275497437, "learning_rate": 9.4925e-05, "loss": 0.387, "step": 18995 }, { "epoch": 1.0637249412028222, "grad_norm": 1.2418153285980225, "learning_rate": 9.493000000000001e-05, "loss": 0.3565, "step": 18996 }, { "epoch": 1.0637809385149513, "grad_norm": 1.2355155944824219, "learning_rate": 9.493500000000001e-05, "loss": 0.4235, "step": 18997 }, { "epoch": 1.0638369358270803, "grad_norm": 1.5285604000091553, "learning_rate": 9.494000000000001e-05, "loss": 0.3818, "step": 18998 }, { "epoch": 1.0638929331392093, "grad_norm": 1.3427233695983887, "learning_rate": 9.4945e-05, "loss": 0.4629, "step": 18999 }, { "epoch": 1.0639489304513383, "grad_norm": 1.4263298511505127, "learning_rate": 9.495e-05, "loss": 0.5107, "step": 19000 }, { "epoch": 1.0640049277634673, "grad_norm": 1.3277918100357056, "learning_rate": 9.4955e-05, "loss": 0.4914, "step": 19001 }, { "epoch": 1.0640609250755964, "grad_norm": 1.5316247940063477, "learning_rate": 9.496e-05, "loss": 0.5536, "step": 19002 }, { "epoch": 1.0641169223877254, "grad_norm": 1.4550143480300903, "learning_rate": 9.496500000000001e-05, "loss": 0.3668, "step": 19003 }, { "epoch": 1.0641729196998544, "grad_norm": 1.1481560468673706, "learning_rate": 9.497000000000001e-05, "loss": 0.3922, "step": 19004 }, { "epoch": 1.0642289170119834, "grad_norm": 1.3690632581710815, "learning_rate": 9.4975e-05, "loss": 0.5181, "step": 19005 }, { "epoch": 1.0642849143241124, "grad_norm": 1.583183765411377, "learning_rate": 9.498e-05, "loss": 0.4482, "step": 19006 }, { "epoch": 1.0643409116362414, "grad_norm": 1.3478230237960815, "learning_rate": 9.4985e-05, "loss": 0.3658, "step": 19007 }, { "epoch": 1.0643969089483705, "grad_norm": 3.4860141277313232, "learning_rate": 9.499e-05, "loss": 0.4589, "step": 19008 }, { "epoch": 1.0644529062604995, "grad_norm": 1.381873369216919, "learning_rate": 9.4995e-05, "loss": 0.4322, "step": 19009 }, { "epoch": 1.0645089035726285, "grad_norm": 1.5568053722381592, "learning_rate": 9.5e-05, "loss": 0.3266, "step": 19010 }, { "epoch": 1.0645649008847575, "grad_norm": 1.5077698230743408, "learning_rate": 9.5005e-05, "loss": 0.3767, "step": 19011 }, { "epoch": 1.0646208981968865, "grad_norm": 1.1655912399291992, "learning_rate": 9.501e-05, "loss": 0.3115, "step": 19012 }, { "epoch": 1.0646768955090156, "grad_norm": 1.681504487991333, "learning_rate": 9.501500000000001e-05, "loss": 0.4994, "step": 19013 }, { "epoch": 1.0647328928211446, "grad_norm": 1.292641520500183, "learning_rate": 9.502000000000001e-05, "loss": 0.4135, "step": 19014 }, { "epoch": 1.0647888901332736, "grad_norm": 1.6553596258163452, "learning_rate": 9.5025e-05, "loss": 0.4628, "step": 19015 }, { "epoch": 1.0648448874454026, "grad_norm": 1.3552294969558716, "learning_rate": 9.503e-05, "loss": 0.4012, "step": 19016 }, { "epoch": 1.0649008847575316, "grad_norm": 1.2220262289047241, "learning_rate": 9.5035e-05, "loss": 0.4394, "step": 19017 }, { "epoch": 1.0649568820696607, "grad_norm": 1.2594716548919678, "learning_rate": 9.504000000000001e-05, "loss": 0.3832, "step": 19018 }, { "epoch": 1.0650128793817897, "grad_norm": 1.3809083700180054, "learning_rate": 9.504500000000001e-05, "loss": 0.5147, "step": 19019 }, { "epoch": 1.0650688766939187, "grad_norm": 1.7652674913406372, "learning_rate": 9.505e-05, "loss": 0.3956, "step": 19020 }, { "epoch": 1.0651248740060477, "grad_norm": 1.3767143487930298, "learning_rate": 9.5055e-05, "loss": 0.4599, "step": 19021 }, { "epoch": 1.0651808713181767, "grad_norm": 1.350810170173645, "learning_rate": 9.506e-05, "loss": 0.4033, "step": 19022 }, { "epoch": 1.0652368686303058, "grad_norm": 2.0616953372955322, "learning_rate": 9.5065e-05, "loss": 0.4707, "step": 19023 }, { "epoch": 1.0652928659424348, "grad_norm": 1.2971410751342773, "learning_rate": 9.507000000000001e-05, "loss": 0.4084, "step": 19024 }, { "epoch": 1.0653488632545638, "grad_norm": 1.3095693588256836, "learning_rate": 9.507500000000001e-05, "loss": 0.3893, "step": 19025 }, { "epoch": 1.0654048605666928, "grad_norm": 1.9630202054977417, "learning_rate": 9.508e-05, "loss": 0.4432, "step": 19026 }, { "epoch": 1.0654608578788218, "grad_norm": 1.2579559087753296, "learning_rate": 9.5085e-05, "loss": 0.5054, "step": 19027 }, { "epoch": 1.0655168551909509, "grad_norm": 1.3713284730911255, "learning_rate": 9.509e-05, "loss": 0.4316, "step": 19028 }, { "epoch": 1.0655728525030799, "grad_norm": 1.4795838594436646, "learning_rate": 9.5095e-05, "loss": 0.4322, "step": 19029 }, { "epoch": 1.065628849815209, "grad_norm": 1.1935054063796997, "learning_rate": 9.51e-05, "loss": 0.3195, "step": 19030 }, { "epoch": 1.065684847127338, "grad_norm": 1.5417760610580444, "learning_rate": 9.5105e-05, "loss": 0.4727, "step": 19031 }, { "epoch": 1.065740844439467, "grad_norm": 1.6064913272857666, "learning_rate": 9.511e-05, "loss": 0.4643, "step": 19032 }, { "epoch": 1.065796841751596, "grad_norm": 1.329726219177246, "learning_rate": 9.511500000000001e-05, "loss": 0.4171, "step": 19033 }, { "epoch": 1.065852839063725, "grad_norm": 1.7679561376571655, "learning_rate": 9.512000000000001e-05, "loss": 0.5562, "step": 19034 }, { "epoch": 1.065908836375854, "grad_norm": 1.3980681896209717, "learning_rate": 9.512500000000001e-05, "loss": 0.5964, "step": 19035 }, { "epoch": 1.065964833687983, "grad_norm": 1.4354372024536133, "learning_rate": 9.513e-05, "loss": 0.5044, "step": 19036 }, { "epoch": 1.066020831000112, "grad_norm": 1.3081278800964355, "learning_rate": 9.5135e-05, "loss": 0.3527, "step": 19037 }, { "epoch": 1.066076828312241, "grad_norm": 1.4948159456253052, "learning_rate": 9.514e-05, "loss": 0.4191, "step": 19038 }, { "epoch": 1.06613282562437, "grad_norm": 1.452414631843567, "learning_rate": 9.514500000000001e-05, "loss": 0.5055, "step": 19039 }, { "epoch": 1.066188822936499, "grad_norm": 1.4004466533660889, "learning_rate": 9.515000000000001e-05, "loss": 0.3828, "step": 19040 }, { "epoch": 1.066244820248628, "grad_norm": 1.7150640487670898, "learning_rate": 9.5155e-05, "loss": 0.3618, "step": 19041 }, { "epoch": 1.0663008175607571, "grad_norm": 1.6523611545562744, "learning_rate": 9.516e-05, "loss": 0.4781, "step": 19042 }, { "epoch": 1.0663568148728861, "grad_norm": 1.2870184183120728, "learning_rate": 9.5165e-05, "loss": 0.4917, "step": 19043 }, { "epoch": 1.0664128121850152, "grad_norm": 1.2794761657714844, "learning_rate": 9.517e-05, "loss": 0.5292, "step": 19044 }, { "epoch": 1.0664688094971442, "grad_norm": 1.593009352684021, "learning_rate": 9.517500000000001e-05, "loss": 0.4965, "step": 19045 }, { "epoch": 1.0665248068092732, "grad_norm": 1.3673381805419922, "learning_rate": 9.518000000000001e-05, "loss": 0.4164, "step": 19046 }, { "epoch": 1.0665808041214022, "grad_norm": 2.2797043323516846, "learning_rate": 9.5185e-05, "loss": 0.49, "step": 19047 }, { "epoch": 1.0666368014335312, "grad_norm": 1.3565945625305176, "learning_rate": 9.519e-05, "loss": 0.464, "step": 19048 }, { "epoch": 1.0666927987456603, "grad_norm": 1.221509575843811, "learning_rate": 9.5195e-05, "loss": 0.3296, "step": 19049 }, { "epoch": 1.0667487960577893, "grad_norm": 1.562795639038086, "learning_rate": 9.52e-05, "loss": 0.3887, "step": 19050 }, { "epoch": 1.0668047933699183, "grad_norm": 1.1983307600021362, "learning_rate": 9.5205e-05, "loss": 0.4013, "step": 19051 }, { "epoch": 1.0668607906820473, "grad_norm": 1.505826473236084, "learning_rate": 9.521e-05, "loss": 0.4318, "step": 19052 }, { "epoch": 1.0669167879941763, "grad_norm": 1.3908984661102295, "learning_rate": 9.521500000000002e-05, "loss": 0.4813, "step": 19053 }, { "epoch": 1.0669727853063053, "grad_norm": 1.2964426279067993, "learning_rate": 9.522000000000001e-05, "loss": 0.3802, "step": 19054 }, { "epoch": 1.0670287826184344, "grad_norm": 1.3526910543441772, "learning_rate": 9.522500000000001e-05, "loss": 0.4145, "step": 19055 }, { "epoch": 1.0670847799305634, "grad_norm": 1.2798247337341309, "learning_rate": 9.523000000000001e-05, "loss": 0.4685, "step": 19056 }, { "epoch": 1.0671407772426924, "grad_norm": 1.429029941558838, "learning_rate": 9.5235e-05, "loss": 0.382, "step": 19057 }, { "epoch": 1.0671967745548214, "grad_norm": 1.4787951707839966, "learning_rate": 9.524e-05, "loss": 0.4634, "step": 19058 }, { "epoch": 1.0672527718669504, "grad_norm": 1.306098461151123, "learning_rate": 9.5245e-05, "loss": 0.5, "step": 19059 }, { "epoch": 1.0673087691790795, "grad_norm": 1.5572845935821533, "learning_rate": 9.525000000000001e-05, "loss": 0.4532, "step": 19060 }, { "epoch": 1.0673647664912085, "grad_norm": 1.5002474784851074, "learning_rate": 9.525500000000001e-05, "loss": 0.4291, "step": 19061 }, { "epoch": 1.0674207638033375, "grad_norm": 1.4859400987625122, "learning_rate": 9.526e-05, "loss": 0.4613, "step": 19062 }, { "epoch": 1.0674767611154665, "grad_norm": 1.4809750318527222, "learning_rate": 9.5265e-05, "loss": 0.439, "step": 19063 }, { "epoch": 1.0675327584275955, "grad_norm": 1.3810079097747803, "learning_rate": 9.527e-05, "loss": 0.3838, "step": 19064 }, { "epoch": 1.0675887557397246, "grad_norm": 1.2954858541488647, "learning_rate": 9.5275e-05, "loss": 0.3757, "step": 19065 }, { "epoch": 1.0676447530518536, "grad_norm": 1.359278917312622, "learning_rate": 9.528000000000001e-05, "loss": 0.4049, "step": 19066 }, { "epoch": 1.0677007503639826, "grad_norm": 1.168003797531128, "learning_rate": 9.5285e-05, "loss": 0.392, "step": 19067 }, { "epoch": 1.0677567476761116, "grad_norm": 1.284511923789978, "learning_rate": 9.529e-05, "loss": 0.3769, "step": 19068 }, { "epoch": 1.0678127449882406, "grad_norm": 1.1729944944381714, "learning_rate": 9.5295e-05, "loss": 0.3494, "step": 19069 }, { "epoch": 1.0678687423003697, "grad_norm": 1.1762340068817139, "learning_rate": 9.53e-05, "loss": 0.3331, "step": 19070 }, { "epoch": 1.0679247396124987, "grad_norm": 1.6314793825149536, "learning_rate": 9.5305e-05, "loss": 0.5729, "step": 19071 }, { "epoch": 1.0679807369246277, "grad_norm": 1.3847637176513672, "learning_rate": 9.531e-05, "loss": 0.4225, "step": 19072 }, { "epoch": 1.0680367342367567, "grad_norm": 1.3760457038879395, "learning_rate": 9.5315e-05, "loss": 0.4248, "step": 19073 }, { "epoch": 1.0680927315488857, "grad_norm": 1.4888118505477905, "learning_rate": 9.532000000000002e-05, "loss": 0.5156, "step": 19074 }, { "epoch": 1.0681487288610148, "grad_norm": 1.3431413173675537, "learning_rate": 9.532500000000001e-05, "loss": 0.3934, "step": 19075 }, { "epoch": 1.0682047261731438, "grad_norm": 1.3805855512619019, "learning_rate": 9.533000000000001e-05, "loss": 0.5146, "step": 19076 }, { "epoch": 1.0682607234852728, "grad_norm": 1.291238784790039, "learning_rate": 9.533500000000001e-05, "loss": 0.3844, "step": 19077 }, { "epoch": 1.0683167207974018, "grad_norm": 1.8542561531066895, "learning_rate": 9.534e-05, "loss": 0.4608, "step": 19078 }, { "epoch": 1.0683727181095308, "grad_norm": 1.3636932373046875, "learning_rate": 9.5345e-05, "loss": 0.4417, "step": 19079 }, { "epoch": 1.0684287154216598, "grad_norm": 1.1776975393295288, "learning_rate": 9.535e-05, "loss": 0.3979, "step": 19080 }, { "epoch": 1.0684847127337889, "grad_norm": 1.3916935920715332, "learning_rate": 9.535500000000001e-05, "loss": 0.4552, "step": 19081 }, { "epoch": 1.0685407100459179, "grad_norm": 1.281245470046997, "learning_rate": 9.536000000000001e-05, "loss": 0.4894, "step": 19082 }, { "epoch": 1.068596707358047, "grad_norm": 1.235292911529541, "learning_rate": 9.5365e-05, "loss": 0.5557, "step": 19083 }, { "epoch": 1.068652704670176, "grad_norm": 1.126031756401062, "learning_rate": 9.537e-05, "loss": 0.3541, "step": 19084 }, { "epoch": 1.068708701982305, "grad_norm": 1.4543650150299072, "learning_rate": 9.5375e-05, "loss": 0.3107, "step": 19085 }, { "epoch": 1.068764699294434, "grad_norm": 1.264474868774414, "learning_rate": 9.538e-05, "loss": 0.5231, "step": 19086 }, { "epoch": 1.068820696606563, "grad_norm": 1.4919121265411377, "learning_rate": 9.5385e-05, "loss": 0.4949, "step": 19087 }, { "epoch": 1.068876693918692, "grad_norm": 1.2485785484313965, "learning_rate": 9.539e-05, "loss": 0.3303, "step": 19088 }, { "epoch": 1.068932691230821, "grad_norm": 1.2648602724075317, "learning_rate": 9.5395e-05, "loss": 0.3607, "step": 19089 }, { "epoch": 1.06898868854295, "grad_norm": 1.0592108964920044, "learning_rate": 9.54e-05, "loss": 0.2496, "step": 19090 }, { "epoch": 1.069044685855079, "grad_norm": 1.2441421747207642, "learning_rate": 9.5405e-05, "loss": 0.4313, "step": 19091 }, { "epoch": 1.069100683167208, "grad_norm": 1.3437831401824951, "learning_rate": 9.541e-05, "loss": 0.4893, "step": 19092 }, { "epoch": 1.0691566804793369, "grad_norm": 1.3966460227966309, "learning_rate": 9.541500000000001e-05, "loss": 0.4479, "step": 19093 }, { "epoch": 1.069212677791466, "grad_norm": 1.3548359870910645, "learning_rate": 9.542e-05, "loss": 0.4455, "step": 19094 }, { "epoch": 1.069268675103595, "grad_norm": 2.854562520980835, "learning_rate": 9.542500000000002e-05, "loss": 0.3788, "step": 19095 }, { "epoch": 1.069324672415724, "grad_norm": 1.4204295873641968, "learning_rate": 9.543000000000001e-05, "loss": 0.5879, "step": 19096 }, { "epoch": 1.069380669727853, "grad_norm": 1.1766201257705688, "learning_rate": 9.543500000000001e-05, "loss": 0.3723, "step": 19097 }, { "epoch": 1.069436667039982, "grad_norm": 1.2578608989715576, "learning_rate": 9.544000000000001e-05, "loss": 0.4654, "step": 19098 }, { "epoch": 1.069492664352111, "grad_norm": 1.2997828722000122, "learning_rate": 9.5445e-05, "loss": 0.478, "step": 19099 }, { "epoch": 1.06954866166424, "grad_norm": 1.2996234893798828, "learning_rate": 9.545e-05, "loss": 0.5364, "step": 19100 }, { "epoch": 1.069604658976369, "grad_norm": 1.490668535232544, "learning_rate": 9.5455e-05, "loss": 0.5021, "step": 19101 }, { "epoch": 1.069660656288498, "grad_norm": 1.256386160850525, "learning_rate": 9.546000000000001e-05, "loss": 0.5062, "step": 19102 }, { "epoch": 1.069716653600627, "grad_norm": 1.200835943222046, "learning_rate": 9.546500000000001e-05, "loss": 0.3749, "step": 19103 }, { "epoch": 1.069772650912756, "grad_norm": 1.45115065574646, "learning_rate": 9.547e-05, "loss": 0.5045, "step": 19104 }, { "epoch": 1.069828648224885, "grad_norm": 1.0986417531967163, "learning_rate": 9.5475e-05, "loss": 0.4245, "step": 19105 }, { "epoch": 1.0698846455370141, "grad_norm": 1.5335986614227295, "learning_rate": 9.548e-05, "loss": 0.3657, "step": 19106 }, { "epoch": 1.0699406428491431, "grad_norm": 1.6153874397277832, "learning_rate": 9.5485e-05, "loss": 0.386, "step": 19107 }, { "epoch": 1.0699966401612722, "grad_norm": 1.272781252861023, "learning_rate": 9.549e-05, "loss": 0.4127, "step": 19108 }, { "epoch": 1.0700526374734012, "grad_norm": 1.1290336847305298, "learning_rate": 9.5495e-05, "loss": 0.33, "step": 19109 }, { "epoch": 1.0701086347855302, "grad_norm": 1.4628112316131592, "learning_rate": 9.55e-05, "loss": 0.3112, "step": 19110 }, { "epoch": 1.0701646320976592, "grad_norm": 2.051401138305664, "learning_rate": 9.5505e-05, "loss": 0.4451, "step": 19111 }, { "epoch": 1.0702206294097882, "grad_norm": 1.1548988819122314, "learning_rate": 9.551e-05, "loss": 0.3769, "step": 19112 }, { "epoch": 1.0702766267219173, "grad_norm": 1.3833144903182983, "learning_rate": 9.551500000000001e-05, "loss": 0.3899, "step": 19113 }, { "epoch": 1.0703326240340463, "grad_norm": 1.3498610258102417, "learning_rate": 9.552000000000001e-05, "loss": 0.4589, "step": 19114 }, { "epoch": 1.0703886213461753, "grad_norm": 1.2717036008834839, "learning_rate": 9.5525e-05, "loss": 0.3871, "step": 19115 }, { "epoch": 1.0704446186583043, "grad_norm": 1.3312551975250244, "learning_rate": 9.553e-05, "loss": 0.4529, "step": 19116 }, { "epoch": 1.0705006159704333, "grad_norm": 1.3813267946243286, "learning_rate": 9.553500000000001e-05, "loss": 0.4138, "step": 19117 }, { "epoch": 1.0705566132825624, "grad_norm": 1.2773473262786865, "learning_rate": 9.554000000000001e-05, "loss": 0.4857, "step": 19118 }, { "epoch": 1.0706126105946914, "grad_norm": 1.2579201459884644, "learning_rate": 9.554500000000001e-05, "loss": 0.4754, "step": 19119 }, { "epoch": 1.0706686079068204, "grad_norm": 1.289850115776062, "learning_rate": 9.555e-05, "loss": 0.4207, "step": 19120 }, { "epoch": 1.0707246052189494, "grad_norm": 1.1401543617248535, "learning_rate": 9.5555e-05, "loss": 0.4304, "step": 19121 }, { "epoch": 1.0707806025310784, "grad_norm": 1.5424062013626099, "learning_rate": 9.556e-05, "loss": 0.4428, "step": 19122 }, { "epoch": 1.0708365998432074, "grad_norm": 1.9160706996917725, "learning_rate": 9.556500000000001e-05, "loss": 0.6462, "step": 19123 }, { "epoch": 1.0708925971553365, "grad_norm": 1.756384253501892, "learning_rate": 9.557000000000001e-05, "loss": 0.4831, "step": 19124 }, { "epoch": 1.0709485944674655, "grad_norm": 1.5352463722229004, "learning_rate": 9.5575e-05, "loss": 0.4154, "step": 19125 }, { "epoch": 1.0710045917795945, "grad_norm": 1.402227520942688, "learning_rate": 9.558e-05, "loss": 0.4776, "step": 19126 }, { "epoch": 1.0710605890917235, "grad_norm": 1.381800889968872, "learning_rate": 9.5585e-05, "loss": 0.3569, "step": 19127 }, { "epoch": 1.0711165864038525, "grad_norm": 1.2688425779342651, "learning_rate": 9.559e-05, "loss": 0.3758, "step": 19128 }, { "epoch": 1.0711725837159816, "grad_norm": 1.3217486143112183, "learning_rate": 9.5595e-05, "loss": 0.5328, "step": 19129 }, { "epoch": 1.0712285810281106, "grad_norm": 1.3088239431381226, "learning_rate": 9.56e-05, "loss": 0.4262, "step": 19130 }, { "epoch": 1.0712845783402396, "grad_norm": 1.3540040254592896, "learning_rate": 9.5605e-05, "loss": 0.4453, "step": 19131 }, { "epoch": 1.0713405756523686, "grad_norm": 1.3389081954956055, "learning_rate": 9.561e-05, "loss": 0.3616, "step": 19132 }, { "epoch": 1.0713965729644976, "grad_norm": 1.123694658279419, "learning_rate": 9.561500000000001e-05, "loss": 0.4807, "step": 19133 }, { "epoch": 1.0714525702766267, "grad_norm": 1.4012280702590942, "learning_rate": 9.562000000000001e-05, "loss": 0.5071, "step": 19134 }, { "epoch": 1.0715085675887557, "grad_norm": 1.4097554683685303, "learning_rate": 9.562500000000001e-05, "loss": 0.4201, "step": 19135 }, { "epoch": 1.0715645649008847, "grad_norm": 1.218183994293213, "learning_rate": 9.563e-05, "loss": 0.3332, "step": 19136 }, { "epoch": 1.0716205622130137, "grad_norm": 1.2299362421035767, "learning_rate": 9.5635e-05, "loss": 0.3997, "step": 19137 }, { "epoch": 1.0716765595251427, "grad_norm": 1.709261178970337, "learning_rate": 9.564000000000001e-05, "loss": 0.459, "step": 19138 }, { "epoch": 1.0717325568372718, "grad_norm": 1.3686734437942505, "learning_rate": 9.564500000000001e-05, "loss": 0.4295, "step": 19139 }, { "epoch": 1.0717885541494008, "grad_norm": 1.5993621349334717, "learning_rate": 9.565000000000001e-05, "loss": 0.4597, "step": 19140 }, { "epoch": 1.0718445514615298, "grad_norm": 1.6603903770446777, "learning_rate": 9.5655e-05, "loss": 0.5369, "step": 19141 }, { "epoch": 1.0719005487736588, "grad_norm": 1.1640290021896362, "learning_rate": 9.566e-05, "loss": 0.4805, "step": 19142 }, { "epoch": 1.0719565460857878, "grad_norm": 1.2247138023376465, "learning_rate": 9.5665e-05, "loss": 0.3195, "step": 19143 }, { "epoch": 1.0720125433979169, "grad_norm": 1.2630603313446045, "learning_rate": 9.567000000000001e-05, "loss": 0.3868, "step": 19144 }, { "epoch": 1.0720685407100459, "grad_norm": 1.3521305322647095, "learning_rate": 9.567500000000001e-05, "loss": 0.4709, "step": 19145 }, { "epoch": 1.0721245380221749, "grad_norm": 1.1388635635375977, "learning_rate": 9.568e-05, "loss": 0.2951, "step": 19146 }, { "epoch": 1.072180535334304, "grad_norm": 1.536655306816101, "learning_rate": 9.5685e-05, "loss": 0.3331, "step": 19147 }, { "epoch": 1.072236532646433, "grad_norm": 1.6921892166137695, "learning_rate": 9.569e-05, "loss": 0.4127, "step": 19148 }, { "epoch": 1.072292529958562, "grad_norm": 1.5893220901489258, "learning_rate": 9.5695e-05, "loss": 0.396, "step": 19149 }, { "epoch": 1.072348527270691, "grad_norm": 2.503692865371704, "learning_rate": 9.57e-05, "loss": 0.4211, "step": 19150 }, { "epoch": 1.07240452458282, "grad_norm": 1.5990415811538696, "learning_rate": 9.5705e-05, "loss": 0.491, "step": 19151 }, { "epoch": 1.072460521894949, "grad_norm": 1.5531582832336426, "learning_rate": 9.571e-05, "loss": 0.5286, "step": 19152 }, { "epoch": 1.072516519207078, "grad_norm": 1.3615837097167969, "learning_rate": 9.5715e-05, "loss": 0.436, "step": 19153 }, { "epoch": 1.072572516519207, "grad_norm": 1.5618793964385986, "learning_rate": 9.572000000000001e-05, "loss": 0.4309, "step": 19154 }, { "epoch": 1.072628513831336, "grad_norm": 1.3727819919586182, "learning_rate": 9.572500000000001e-05, "loss": 0.5035, "step": 19155 }, { "epoch": 1.072684511143465, "grad_norm": 1.286844253540039, "learning_rate": 9.573e-05, "loss": 0.3753, "step": 19156 }, { "epoch": 1.072740508455594, "grad_norm": 1.4722813367843628, "learning_rate": 9.5735e-05, "loss": 0.3495, "step": 19157 }, { "epoch": 1.0727965057677231, "grad_norm": 1.631972074508667, "learning_rate": 9.574e-05, "loss": 0.4748, "step": 19158 }, { "epoch": 1.0728525030798521, "grad_norm": 1.638663411140442, "learning_rate": 9.574500000000001e-05, "loss": 0.4136, "step": 19159 }, { "epoch": 1.0729085003919812, "grad_norm": 1.4947776794433594, "learning_rate": 9.575000000000001e-05, "loss": 0.5904, "step": 19160 }, { "epoch": 1.0729644977041102, "grad_norm": 2.0101702213287354, "learning_rate": 9.575500000000001e-05, "loss": 0.5917, "step": 19161 }, { "epoch": 1.0730204950162392, "grad_norm": 1.1375631093978882, "learning_rate": 9.576e-05, "loss": 0.3687, "step": 19162 }, { "epoch": 1.0730764923283682, "grad_norm": 1.2634828090667725, "learning_rate": 9.5765e-05, "loss": 0.3762, "step": 19163 }, { "epoch": 1.0731324896404972, "grad_norm": 1.2566579580307007, "learning_rate": 9.577e-05, "loss": 0.4033, "step": 19164 }, { "epoch": 1.0731884869526263, "grad_norm": 1.6312873363494873, "learning_rate": 9.5775e-05, "loss": 0.5984, "step": 19165 }, { "epoch": 1.0732444842647553, "grad_norm": 1.2147444486618042, "learning_rate": 9.578000000000001e-05, "loss": 0.4717, "step": 19166 }, { "epoch": 1.0733004815768843, "grad_norm": 1.5377756357192993, "learning_rate": 9.5785e-05, "loss": 0.4054, "step": 19167 }, { "epoch": 1.0733564788890133, "grad_norm": 1.4068466424942017, "learning_rate": 9.579e-05, "loss": 0.3693, "step": 19168 }, { "epoch": 1.0734124762011423, "grad_norm": 1.3765770196914673, "learning_rate": 9.5795e-05, "loss": 0.5127, "step": 19169 }, { "epoch": 1.0734684735132713, "grad_norm": 1.680324673652649, "learning_rate": 9.58e-05, "loss": 0.5666, "step": 19170 }, { "epoch": 1.0735244708254004, "grad_norm": 1.2332838773727417, "learning_rate": 9.5805e-05, "loss": 0.4255, "step": 19171 }, { "epoch": 1.0735804681375294, "grad_norm": 1.4358876943588257, "learning_rate": 9.581e-05, "loss": 0.3233, "step": 19172 }, { "epoch": 1.0736364654496584, "grad_norm": 1.2500696182250977, "learning_rate": 9.5815e-05, "loss": 0.4533, "step": 19173 }, { "epoch": 1.0736924627617874, "grad_norm": 1.4486896991729736, "learning_rate": 9.582000000000001e-05, "loss": 0.4665, "step": 19174 }, { "epoch": 1.0737484600739164, "grad_norm": 1.3838056325912476, "learning_rate": 9.582500000000001e-05, "loss": 0.4326, "step": 19175 }, { "epoch": 1.0738044573860455, "grad_norm": 1.5006517171859741, "learning_rate": 9.583000000000001e-05, "loss": 0.5085, "step": 19176 }, { "epoch": 1.0738604546981745, "grad_norm": 1.25216543674469, "learning_rate": 9.5835e-05, "loss": 0.4017, "step": 19177 }, { "epoch": 1.0739164520103035, "grad_norm": 1.4105465412139893, "learning_rate": 9.584e-05, "loss": 0.482, "step": 19178 }, { "epoch": 1.0739724493224325, "grad_norm": 1.3745070695877075, "learning_rate": 9.5845e-05, "loss": 0.4596, "step": 19179 }, { "epoch": 1.0740284466345615, "grad_norm": 1.3403490781784058, "learning_rate": 9.585000000000001e-05, "loss": 0.4411, "step": 19180 }, { "epoch": 1.0740844439466906, "grad_norm": 1.0515056848526, "learning_rate": 9.585500000000001e-05, "loss": 0.3387, "step": 19181 }, { "epoch": 1.0741404412588196, "grad_norm": 1.3254003524780273, "learning_rate": 9.586000000000001e-05, "loss": 0.3439, "step": 19182 }, { "epoch": 1.0741964385709486, "grad_norm": 1.3216105699539185, "learning_rate": 9.5865e-05, "loss": 0.3528, "step": 19183 }, { "epoch": 1.0742524358830776, "grad_norm": 1.2448352575302124, "learning_rate": 9.587e-05, "loss": 0.3025, "step": 19184 }, { "epoch": 1.0743084331952066, "grad_norm": 1.499056339263916, "learning_rate": 9.5875e-05, "loss": 0.4477, "step": 19185 }, { "epoch": 1.0743644305073357, "grad_norm": 1.3132444620132446, "learning_rate": 9.588e-05, "loss": 0.5061, "step": 19186 }, { "epoch": 1.0744204278194647, "grad_norm": 1.2185949087142944, "learning_rate": 9.588500000000001e-05, "loss": 0.2891, "step": 19187 }, { "epoch": 1.0744764251315937, "grad_norm": 1.2195074558258057, "learning_rate": 9.589e-05, "loss": 0.326, "step": 19188 }, { "epoch": 1.0745324224437227, "grad_norm": 1.6875252723693848, "learning_rate": 9.5895e-05, "loss": 0.5185, "step": 19189 }, { "epoch": 1.0745884197558517, "grad_norm": 1.3300776481628418, "learning_rate": 9.59e-05, "loss": 0.3762, "step": 19190 }, { "epoch": 1.0746444170679808, "grad_norm": 1.4575303792953491, "learning_rate": 9.5905e-05, "loss": 0.4495, "step": 19191 }, { "epoch": 1.0747004143801098, "grad_norm": 1.3437873125076294, "learning_rate": 9.591e-05, "loss": 0.5067, "step": 19192 }, { "epoch": 1.0747564116922388, "grad_norm": 1.2452030181884766, "learning_rate": 9.5915e-05, "loss": 0.3846, "step": 19193 }, { "epoch": 1.0748124090043678, "grad_norm": 1.2425785064697266, "learning_rate": 9.592e-05, "loss": 0.3747, "step": 19194 }, { "epoch": 1.0748684063164968, "grad_norm": 1.5638777017593384, "learning_rate": 9.592500000000001e-05, "loss": 0.4093, "step": 19195 }, { "epoch": 1.0749244036286258, "grad_norm": 1.5321054458618164, "learning_rate": 9.593000000000001e-05, "loss": 0.4748, "step": 19196 }, { "epoch": 1.0749804009407549, "grad_norm": 1.5617907047271729, "learning_rate": 9.593500000000001e-05, "loss": 0.4677, "step": 19197 }, { "epoch": 1.0750363982528839, "grad_norm": 2.0262441635131836, "learning_rate": 9.594e-05, "loss": 0.6669, "step": 19198 }, { "epoch": 1.075092395565013, "grad_norm": 1.2591593265533447, "learning_rate": 9.5945e-05, "loss": 0.5268, "step": 19199 }, { "epoch": 1.075148392877142, "grad_norm": 1.3188189268112183, "learning_rate": 9.595e-05, "loss": 0.4543, "step": 19200 }, { "epoch": 1.075204390189271, "grad_norm": 1.450954794883728, "learning_rate": 9.595500000000001e-05, "loss": 0.4581, "step": 19201 }, { "epoch": 1.0752603875014, "grad_norm": 1.6723936796188354, "learning_rate": 9.596000000000001e-05, "loss": 0.5799, "step": 19202 }, { "epoch": 1.075316384813529, "grad_norm": 1.4126452207565308, "learning_rate": 9.596500000000001e-05, "loss": 0.4893, "step": 19203 }, { "epoch": 1.075372382125658, "grad_norm": 1.315180778503418, "learning_rate": 9.597e-05, "loss": 0.4201, "step": 19204 }, { "epoch": 1.075428379437787, "grad_norm": 1.350109577178955, "learning_rate": 9.5975e-05, "loss": 0.4438, "step": 19205 }, { "epoch": 1.075484376749916, "grad_norm": 1.246303677558899, "learning_rate": 9.598e-05, "loss": 0.3991, "step": 19206 }, { "epoch": 1.075540374062045, "grad_norm": 1.2813001871109009, "learning_rate": 9.5985e-05, "loss": 0.42, "step": 19207 }, { "epoch": 1.075596371374174, "grad_norm": 1.320867657661438, "learning_rate": 9.599000000000001e-05, "loss": 0.4516, "step": 19208 }, { "epoch": 1.075652368686303, "grad_norm": 1.4397691488265991, "learning_rate": 9.5995e-05, "loss": 0.4549, "step": 19209 }, { "epoch": 1.0757083659984321, "grad_norm": 1.1421473026275635, "learning_rate": 9.6e-05, "loss": 0.3526, "step": 19210 }, { "epoch": 1.0757643633105611, "grad_norm": 1.3536735773086548, "learning_rate": 9.6005e-05, "loss": 0.3772, "step": 19211 }, { "epoch": 1.0758203606226902, "grad_norm": 1.417104959487915, "learning_rate": 9.601e-05, "loss": 0.4627, "step": 19212 }, { "epoch": 1.0758763579348192, "grad_norm": 1.2568718194961548, "learning_rate": 9.6015e-05, "loss": 0.3361, "step": 19213 }, { "epoch": 1.0759323552469482, "grad_norm": 1.1949340105056763, "learning_rate": 9.602e-05, "loss": 0.4164, "step": 19214 }, { "epoch": 1.0759883525590772, "grad_norm": 1.399598479270935, "learning_rate": 9.6025e-05, "loss": 0.4094, "step": 19215 }, { "epoch": 1.0760443498712062, "grad_norm": 1.5406945943832397, "learning_rate": 9.603000000000001e-05, "loss": 0.4701, "step": 19216 }, { "epoch": 1.0761003471833352, "grad_norm": 1.3081169128417969, "learning_rate": 9.603500000000001e-05, "loss": 0.4717, "step": 19217 }, { "epoch": 1.0761563444954643, "grad_norm": 1.2680391073226929, "learning_rate": 9.604000000000001e-05, "loss": 0.4676, "step": 19218 }, { "epoch": 1.0762123418075933, "grad_norm": 1.303810954093933, "learning_rate": 9.6045e-05, "loss": 0.41, "step": 19219 }, { "epoch": 1.0762683391197223, "grad_norm": 1.2404943704605103, "learning_rate": 9.605e-05, "loss": 0.445, "step": 19220 }, { "epoch": 1.0763243364318513, "grad_norm": 1.246006727218628, "learning_rate": 9.6055e-05, "loss": 0.458, "step": 19221 }, { "epoch": 1.0763803337439803, "grad_norm": 1.2949378490447998, "learning_rate": 9.606000000000001e-05, "loss": 0.4365, "step": 19222 }, { "epoch": 1.0764363310561094, "grad_norm": 1.7878241539001465, "learning_rate": 9.606500000000001e-05, "loss": 0.4829, "step": 19223 }, { "epoch": 1.0764923283682384, "grad_norm": 1.2644611597061157, "learning_rate": 9.607000000000001e-05, "loss": 0.455, "step": 19224 }, { "epoch": 1.0765483256803674, "grad_norm": 1.3939383029937744, "learning_rate": 9.6075e-05, "loss": 0.4113, "step": 19225 }, { "epoch": 1.0766043229924964, "grad_norm": 1.207818627357483, "learning_rate": 9.608e-05, "loss": 0.3622, "step": 19226 }, { "epoch": 1.0766603203046254, "grad_norm": 1.1098518371582031, "learning_rate": 9.6085e-05, "loss": 0.2977, "step": 19227 }, { "epoch": 1.0767163176167545, "grad_norm": 1.2676883935928345, "learning_rate": 9.609e-05, "loss": 0.3818, "step": 19228 }, { "epoch": 1.0767723149288835, "grad_norm": 1.369973063468933, "learning_rate": 9.609500000000001e-05, "loss": 0.4579, "step": 19229 }, { "epoch": 1.0768283122410125, "grad_norm": 1.4226983785629272, "learning_rate": 9.61e-05, "loss": 0.3708, "step": 19230 }, { "epoch": 1.0768843095531415, "grad_norm": 1.5574685335159302, "learning_rate": 9.6105e-05, "loss": 0.5837, "step": 19231 }, { "epoch": 1.0769403068652705, "grad_norm": 1.4205504655838013, "learning_rate": 9.611e-05, "loss": 0.5139, "step": 19232 }, { "epoch": 1.0769963041773996, "grad_norm": 1.4705474376678467, "learning_rate": 9.6115e-05, "loss": 0.5394, "step": 19233 }, { "epoch": 1.0770523014895286, "grad_norm": 1.4634571075439453, "learning_rate": 9.612000000000001e-05, "loss": 0.448, "step": 19234 }, { "epoch": 1.0771082988016576, "grad_norm": 1.6852033138275146, "learning_rate": 9.6125e-05, "loss": 0.5067, "step": 19235 }, { "epoch": 1.0771642961137866, "grad_norm": 1.2660261392593384, "learning_rate": 9.613e-05, "loss": 0.3121, "step": 19236 }, { "epoch": 1.0772202934259156, "grad_norm": 1.2085484266281128, "learning_rate": 9.613500000000001e-05, "loss": 0.4393, "step": 19237 }, { "epoch": 1.0772762907380447, "grad_norm": 1.3156460523605347, "learning_rate": 9.614000000000001e-05, "loss": 0.5263, "step": 19238 }, { "epoch": 1.0773322880501737, "grad_norm": 1.5212336778640747, "learning_rate": 9.614500000000001e-05, "loss": 0.4745, "step": 19239 }, { "epoch": 1.0773882853623027, "grad_norm": 1.2039270401000977, "learning_rate": 9.615e-05, "loss": 0.3301, "step": 19240 }, { "epoch": 1.0774442826744317, "grad_norm": 1.3416954278945923, "learning_rate": 9.6155e-05, "loss": 0.2978, "step": 19241 }, { "epoch": 1.0775002799865607, "grad_norm": 1.633178472518921, "learning_rate": 9.616e-05, "loss": 0.3377, "step": 19242 }, { "epoch": 1.0775562772986897, "grad_norm": 1.3483723402023315, "learning_rate": 9.616500000000001e-05, "loss": 0.4082, "step": 19243 }, { "epoch": 1.0776122746108188, "grad_norm": 1.492039680480957, "learning_rate": 9.617000000000001e-05, "loss": 0.5695, "step": 19244 }, { "epoch": 1.0776682719229478, "grad_norm": 1.0585488080978394, "learning_rate": 9.6175e-05, "loss": 0.3845, "step": 19245 }, { "epoch": 1.0777242692350768, "grad_norm": 1.646497368812561, "learning_rate": 9.618e-05, "loss": 0.4469, "step": 19246 }, { "epoch": 1.0777802665472058, "grad_norm": 1.4915268421173096, "learning_rate": 9.6185e-05, "loss": 0.4839, "step": 19247 }, { "epoch": 1.0778362638593348, "grad_norm": 1.1967825889587402, "learning_rate": 9.619e-05, "loss": 0.3524, "step": 19248 }, { "epoch": 1.0778922611714639, "grad_norm": 1.4445247650146484, "learning_rate": 9.6195e-05, "loss": 0.3228, "step": 19249 }, { "epoch": 1.0779482584835929, "grad_norm": 1.1791483163833618, "learning_rate": 9.620000000000001e-05, "loss": 0.3623, "step": 19250 }, { "epoch": 1.078004255795722, "grad_norm": 1.2073333263397217, "learning_rate": 9.6205e-05, "loss": 0.3775, "step": 19251 }, { "epoch": 1.078060253107851, "grad_norm": 1.238425612449646, "learning_rate": 9.621e-05, "loss": 0.3242, "step": 19252 }, { "epoch": 1.07811625041998, "grad_norm": 1.1087088584899902, "learning_rate": 9.6215e-05, "loss": 0.3238, "step": 19253 }, { "epoch": 1.078172247732109, "grad_norm": 1.262492060661316, "learning_rate": 9.622000000000001e-05, "loss": 0.3786, "step": 19254 }, { "epoch": 1.078228245044238, "grad_norm": 1.1630018949508667, "learning_rate": 9.622500000000001e-05, "loss": 0.342, "step": 19255 }, { "epoch": 1.078284242356367, "grad_norm": 1.9120094776153564, "learning_rate": 9.623e-05, "loss": 0.6781, "step": 19256 }, { "epoch": 1.078340239668496, "grad_norm": 1.4458720684051514, "learning_rate": 9.6235e-05, "loss": 0.4242, "step": 19257 }, { "epoch": 1.078396236980625, "grad_norm": 1.3422973155975342, "learning_rate": 9.624000000000001e-05, "loss": 0.3918, "step": 19258 }, { "epoch": 1.078452234292754, "grad_norm": 1.4883275032043457, "learning_rate": 9.624500000000001e-05, "loss": 0.6011, "step": 19259 }, { "epoch": 1.078508231604883, "grad_norm": 1.652848720550537, "learning_rate": 9.625000000000001e-05, "loss": 0.4489, "step": 19260 }, { "epoch": 1.078564228917012, "grad_norm": 1.555734395980835, "learning_rate": 9.6255e-05, "loss": 0.3759, "step": 19261 }, { "epoch": 1.078620226229141, "grad_norm": 1.3970298767089844, "learning_rate": 9.626e-05, "loss": 0.4004, "step": 19262 }, { "epoch": 1.0786762235412701, "grad_norm": 1.3191635608673096, "learning_rate": 9.6265e-05, "loss": 0.3588, "step": 19263 }, { "epoch": 1.0787322208533991, "grad_norm": 1.6098352670669556, "learning_rate": 9.627e-05, "loss": 0.4608, "step": 19264 }, { "epoch": 1.0787882181655282, "grad_norm": 1.4320333003997803, "learning_rate": 9.627500000000001e-05, "loss": 0.4395, "step": 19265 }, { "epoch": 1.0788442154776572, "grad_norm": 1.2062251567840576, "learning_rate": 9.628e-05, "loss": 0.3891, "step": 19266 }, { "epoch": 1.0789002127897862, "grad_norm": 1.2579712867736816, "learning_rate": 9.6285e-05, "loss": 0.4037, "step": 19267 }, { "epoch": 1.0789562101019152, "grad_norm": 1.2883987426757812, "learning_rate": 9.629e-05, "loss": 0.4493, "step": 19268 }, { "epoch": 1.079012207414044, "grad_norm": 1.0617892742156982, "learning_rate": 9.6295e-05, "loss": 0.3791, "step": 19269 }, { "epoch": 1.079068204726173, "grad_norm": 1.2531203031539917, "learning_rate": 9.63e-05, "loss": 0.3705, "step": 19270 }, { "epoch": 1.079124202038302, "grad_norm": 1.6580188274383545, "learning_rate": 9.630500000000001e-05, "loss": 0.5208, "step": 19271 }, { "epoch": 1.079180199350431, "grad_norm": 1.5703518390655518, "learning_rate": 9.631e-05, "loss": 0.5463, "step": 19272 }, { "epoch": 1.07923619666256, "grad_norm": 1.3446650505065918, "learning_rate": 9.6315e-05, "loss": 0.4433, "step": 19273 }, { "epoch": 1.0792921939746891, "grad_norm": 1.2784892320632935, "learning_rate": 9.632e-05, "loss": 0.3716, "step": 19274 }, { "epoch": 1.0793481912868181, "grad_norm": 1.4385497570037842, "learning_rate": 9.632500000000001e-05, "loss": 0.4583, "step": 19275 }, { "epoch": 1.0794041885989472, "grad_norm": 1.7008953094482422, "learning_rate": 9.633000000000001e-05, "loss": 0.6018, "step": 19276 }, { "epoch": 1.0794601859110762, "grad_norm": 1.5577605962753296, "learning_rate": 9.6335e-05, "loss": 0.4435, "step": 19277 }, { "epoch": 1.0795161832232052, "grad_norm": 1.368937373161316, "learning_rate": 9.634e-05, "loss": 0.3812, "step": 19278 }, { "epoch": 1.0795721805353342, "grad_norm": 1.4653809070587158, "learning_rate": 9.634500000000001e-05, "loss": 0.4979, "step": 19279 }, { "epoch": 1.0796281778474632, "grad_norm": 1.2361409664154053, "learning_rate": 9.635000000000001e-05, "loss": 0.4288, "step": 19280 }, { "epoch": 1.0796841751595923, "grad_norm": 1.3378677368164062, "learning_rate": 9.635500000000001e-05, "loss": 0.4036, "step": 19281 }, { "epoch": 1.0797401724717213, "grad_norm": 1.4238026142120361, "learning_rate": 9.636e-05, "loss": 0.4819, "step": 19282 }, { "epoch": 1.0797961697838503, "grad_norm": 1.2204433679580688, "learning_rate": 9.6365e-05, "loss": 0.4173, "step": 19283 }, { "epoch": 1.0798521670959793, "grad_norm": 1.1371172666549683, "learning_rate": 9.637e-05, "loss": 0.3358, "step": 19284 }, { "epoch": 1.0799081644081083, "grad_norm": 1.2987414598464966, "learning_rate": 9.6375e-05, "loss": 0.4411, "step": 19285 }, { "epoch": 1.0799641617202373, "grad_norm": 1.4364862442016602, "learning_rate": 9.638000000000001e-05, "loss": 0.3644, "step": 19286 }, { "epoch": 1.0800201590323664, "grad_norm": 1.258409857749939, "learning_rate": 9.6385e-05, "loss": 0.4312, "step": 19287 }, { "epoch": 1.0800761563444954, "grad_norm": 1.504977822303772, "learning_rate": 9.639e-05, "loss": 0.426, "step": 19288 }, { "epoch": 1.0801321536566244, "grad_norm": 1.5396018028259277, "learning_rate": 9.6395e-05, "loss": 0.5399, "step": 19289 }, { "epoch": 1.0801881509687534, "grad_norm": 1.4223546981811523, "learning_rate": 9.64e-05, "loss": 0.5389, "step": 19290 }, { "epoch": 1.0802441482808824, "grad_norm": 1.4549106359481812, "learning_rate": 9.6405e-05, "loss": 0.5428, "step": 19291 }, { "epoch": 1.0803001455930115, "grad_norm": 1.9123473167419434, "learning_rate": 9.641000000000001e-05, "loss": 0.4298, "step": 19292 }, { "epoch": 1.0803561429051405, "grad_norm": 1.4224637746810913, "learning_rate": 9.6415e-05, "loss": 0.3685, "step": 19293 }, { "epoch": 1.0804121402172695, "grad_norm": 1.3219951391220093, "learning_rate": 9.642e-05, "loss": 0.4869, "step": 19294 }, { "epoch": 1.0804681375293985, "grad_norm": 1.3343758583068848, "learning_rate": 9.642500000000001e-05, "loss": 0.492, "step": 19295 }, { "epoch": 1.0805241348415275, "grad_norm": 1.5717768669128418, "learning_rate": 9.643000000000001e-05, "loss": 0.6136, "step": 19296 }, { "epoch": 1.0805801321536566, "grad_norm": 1.5233100652694702, "learning_rate": 9.643500000000001e-05, "loss": 0.4301, "step": 19297 }, { "epoch": 1.0806361294657856, "grad_norm": 1.520534634590149, "learning_rate": 9.644e-05, "loss": 0.482, "step": 19298 }, { "epoch": 1.0806921267779146, "grad_norm": 1.3004443645477295, "learning_rate": 9.6445e-05, "loss": 0.3732, "step": 19299 }, { "epoch": 1.0807481240900436, "grad_norm": 1.151798963546753, "learning_rate": 9.645000000000001e-05, "loss": 0.298, "step": 19300 }, { "epoch": 1.0808041214021726, "grad_norm": 1.3476203680038452, "learning_rate": 9.645500000000001e-05, "loss": 0.4183, "step": 19301 }, { "epoch": 1.0808601187143017, "grad_norm": 1.2625181674957275, "learning_rate": 9.646000000000001e-05, "loss": 0.4778, "step": 19302 }, { "epoch": 1.0809161160264307, "grad_norm": 1.2647287845611572, "learning_rate": 9.6465e-05, "loss": 0.4746, "step": 19303 }, { "epoch": 1.0809721133385597, "grad_norm": 1.4775749444961548, "learning_rate": 9.647e-05, "loss": 0.4654, "step": 19304 }, { "epoch": 1.0810281106506887, "grad_norm": 1.6221728324890137, "learning_rate": 9.6475e-05, "loss": 0.387, "step": 19305 }, { "epoch": 1.0810841079628177, "grad_norm": 1.1883046627044678, "learning_rate": 9.648e-05, "loss": 0.3965, "step": 19306 }, { "epoch": 1.0811401052749467, "grad_norm": 1.416805624961853, "learning_rate": 9.648500000000001e-05, "loss": 0.4267, "step": 19307 }, { "epoch": 1.0811961025870758, "grad_norm": 1.3891547918319702, "learning_rate": 9.649e-05, "loss": 0.4943, "step": 19308 }, { "epoch": 1.0812520998992048, "grad_norm": 1.1932092905044556, "learning_rate": 9.6495e-05, "loss": 0.4577, "step": 19309 }, { "epoch": 1.0813080972113338, "grad_norm": 1.4168490171432495, "learning_rate": 9.65e-05, "loss": 0.499, "step": 19310 }, { "epoch": 1.0813640945234628, "grad_norm": 1.3673006296157837, "learning_rate": 9.6505e-05, "loss": 0.4831, "step": 19311 }, { "epoch": 1.0814200918355918, "grad_norm": 1.3717122077941895, "learning_rate": 9.651e-05, "loss": 0.4299, "step": 19312 }, { "epoch": 1.0814760891477209, "grad_norm": 1.4127898216247559, "learning_rate": 9.6515e-05, "loss": 0.462, "step": 19313 }, { "epoch": 1.0815320864598499, "grad_norm": 1.3625534772872925, "learning_rate": 9.652e-05, "loss": 0.3846, "step": 19314 }, { "epoch": 1.081588083771979, "grad_norm": 1.3918216228485107, "learning_rate": 9.652500000000002e-05, "loss": 0.3508, "step": 19315 }, { "epoch": 1.081644081084108, "grad_norm": 1.4035346508026123, "learning_rate": 9.653000000000001e-05, "loss": 0.6266, "step": 19316 }, { "epoch": 1.081700078396237, "grad_norm": 1.3695547580718994, "learning_rate": 9.653500000000001e-05, "loss": 0.3242, "step": 19317 }, { "epoch": 1.081756075708366, "grad_norm": 1.5471813678741455, "learning_rate": 9.654000000000001e-05, "loss": 0.4938, "step": 19318 }, { "epoch": 1.081812073020495, "grad_norm": 1.4240856170654297, "learning_rate": 9.6545e-05, "loss": 0.3783, "step": 19319 }, { "epoch": 1.081868070332624, "grad_norm": 1.166264533996582, "learning_rate": 9.655e-05, "loss": 0.3181, "step": 19320 }, { "epoch": 1.081924067644753, "grad_norm": 1.5024768114089966, "learning_rate": 9.655500000000001e-05, "loss": 0.606, "step": 19321 }, { "epoch": 1.081980064956882, "grad_norm": 1.1460301876068115, "learning_rate": 9.656000000000001e-05, "loss": 0.436, "step": 19322 }, { "epoch": 1.082036062269011, "grad_norm": 1.4128327369689941, "learning_rate": 9.656500000000001e-05, "loss": 0.4741, "step": 19323 }, { "epoch": 1.08209205958114, "grad_norm": 1.320252776145935, "learning_rate": 9.657e-05, "loss": 0.4336, "step": 19324 }, { "epoch": 1.082148056893269, "grad_norm": 1.1598433256149292, "learning_rate": 9.6575e-05, "loss": 0.3989, "step": 19325 }, { "epoch": 1.0822040542053981, "grad_norm": 1.2715833187103271, "learning_rate": 9.658e-05, "loss": 0.3745, "step": 19326 }, { "epoch": 1.0822600515175271, "grad_norm": 1.4066200256347656, "learning_rate": 9.6585e-05, "loss": 0.4034, "step": 19327 }, { "epoch": 1.0823160488296562, "grad_norm": 1.2118477821350098, "learning_rate": 9.659000000000001e-05, "loss": 0.4449, "step": 19328 }, { "epoch": 1.0823720461417852, "grad_norm": 1.5100220441818237, "learning_rate": 9.6595e-05, "loss": 0.5064, "step": 19329 }, { "epoch": 1.0824280434539142, "grad_norm": 1.2827850580215454, "learning_rate": 9.66e-05, "loss": 0.4138, "step": 19330 }, { "epoch": 1.0824840407660432, "grad_norm": 1.5457252264022827, "learning_rate": 9.6605e-05, "loss": 0.5519, "step": 19331 }, { "epoch": 1.0825400380781722, "grad_norm": 1.2562763690948486, "learning_rate": 9.661e-05, "loss": 0.4989, "step": 19332 }, { "epoch": 1.0825960353903012, "grad_norm": 1.4032789468765259, "learning_rate": 9.6615e-05, "loss": 0.4255, "step": 19333 }, { "epoch": 1.0826520327024303, "grad_norm": 1.6235026121139526, "learning_rate": 9.661999999999999e-05, "loss": 0.4823, "step": 19334 }, { "epoch": 1.0827080300145593, "grad_norm": 1.156258225440979, "learning_rate": 9.6625e-05, "loss": 0.3425, "step": 19335 }, { "epoch": 1.0827640273266883, "grad_norm": 1.303497552871704, "learning_rate": 9.663000000000002e-05, "loss": 0.4756, "step": 19336 }, { "epoch": 1.0828200246388173, "grad_norm": 1.3825955390930176, "learning_rate": 9.663500000000001e-05, "loss": 0.3229, "step": 19337 }, { "epoch": 1.0828760219509463, "grad_norm": 1.3050435781478882, "learning_rate": 9.664000000000001e-05, "loss": 0.4469, "step": 19338 }, { "epoch": 1.0829320192630754, "grad_norm": 1.271113395690918, "learning_rate": 9.664500000000001e-05, "loss": 0.385, "step": 19339 }, { "epoch": 1.0829880165752044, "grad_norm": 1.216892957687378, "learning_rate": 9.665e-05, "loss": 0.3499, "step": 19340 }, { "epoch": 1.0830440138873334, "grad_norm": 1.3005635738372803, "learning_rate": 9.6655e-05, "loss": 0.4114, "step": 19341 }, { "epoch": 1.0831000111994624, "grad_norm": 1.6403224468231201, "learning_rate": 9.666e-05, "loss": 0.4046, "step": 19342 }, { "epoch": 1.0831560085115914, "grad_norm": 1.3765748739242554, "learning_rate": 9.666500000000001e-05, "loss": 0.5173, "step": 19343 }, { "epoch": 1.0832120058237205, "grad_norm": 1.6299631595611572, "learning_rate": 9.667000000000001e-05, "loss": 0.3675, "step": 19344 }, { "epoch": 1.0832680031358495, "grad_norm": 1.4607957601547241, "learning_rate": 9.6675e-05, "loss": 0.4178, "step": 19345 }, { "epoch": 1.0833240004479785, "grad_norm": 1.4849029779434204, "learning_rate": 9.668e-05, "loss": 0.5477, "step": 19346 }, { "epoch": 1.0833799977601075, "grad_norm": 1.2126410007476807, "learning_rate": 9.6685e-05, "loss": 0.3774, "step": 19347 }, { "epoch": 1.0834359950722365, "grad_norm": 1.4253648519515991, "learning_rate": 9.669e-05, "loss": 0.3828, "step": 19348 }, { "epoch": 1.0834919923843656, "grad_norm": 1.1440986394882202, "learning_rate": 9.669500000000001e-05, "loss": 0.2512, "step": 19349 }, { "epoch": 1.0835479896964946, "grad_norm": 1.4302659034729004, "learning_rate": 9.67e-05, "loss": 0.3265, "step": 19350 }, { "epoch": 1.0836039870086236, "grad_norm": 1.5102487802505493, "learning_rate": 9.6705e-05, "loss": 0.3999, "step": 19351 }, { "epoch": 1.0836599843207526, "grad_norm": 1.5125442743301392, "learning_rate": 9.671e-05, "loss": 0.5036, "step": 19352 }, { "epoch": 1.0837159816328816, "grad_norm": 1.411964774131775, "learning_rate": 9.6715e-05, "loss": 0.6515, "step": 19353 }, { "epoch": 1.0837719789450107, "grad_norm": 1.6002410650253296, "learning_rate": 9.672e-05, "loss": 0.6385, "step": 19354 }, { "epoch": 1.0838279762571397, "grad_norm": 1.8580633401870728, "learning_rate": 9.6725e-05, "loss": 0.605, "step": 19355 }, { "epoch": 1.0838839735692687, "grad_norm": 1.1607344150543213, "learning_rate": 9.673e-05, "loss": 0.4321, "step": 19356 }, { "epoch": 1.0839399708813977, "grad_norm": 1.320904016494751, "learning_rate": 9.673500000000001e-05, "loss": 0.4001, "step": 19357 }, { "epoch": 1.0839959681935267, "grad_norm": 1.255415439605713, "learning_rate": 9.674000000000001e-05, "loss": 0.4552, "step": 19358 }, { "epoch": 1.0840519655056557, "grad_norm": 1.2322289943695068, "learning_rate": 9.674500000000001e-05, "loss": 0.4472, "step": 19359 }, { "epoch": 1.0841079628177848, "grad_norm": 1.4008407592773438, "learning_rate": 9.675000000000001e-05, "loss": 0.5539, "step": 19360 }, { "epoch": 1.0841639601299138, "grad_norm": 1.384464144706726, "learning_rate": 9.6755e-05, "loss": 0.3652, "step": 19361 }, { "epoch": 1.0842199574420428, "grad_norm": 1.4799270629882812, "learning_rate": 9.676e-05, "loss": 0.4102, "step": 19362 }, { "epoch": 1.0842759547541718, "grad_norm": 1.5826606750488281, "learning_rate": 9.6765e-05, "loss": 0.5595, "step": 19363 }, { "epoch": 1.0843319520663008, "grad_norm": 1.3750200271606445, "learning_rate": 9.677000000000001e-05, "loss": 0.4728, "step": 19364 }, { "epoch": 1.0843879493784299, "grad_norm": 1.9382109642028809, "learning_rate": 9.677500000000001e-05, "loss": 0.5068, "step": 19365 }, { "epoch": 1.0844439466905589, "grad_norm": 1.368626356124878, "learning_rate": 9.678e-05, "loss": 0.3895, "step": 19366 }, { "epoch": 1.084499944002688, "grad_norm": 1.2477309703826904, "learning_rate": 9.6785e-05, "loss": 0.3411, "step": 19367 }, { "epoch": 1.084555941314817, "grad_norm": 1.4445922374725342, "learning_rate": 9.679e-05, "loss": 0.3236, "step": 19368 }, { "epoch": 1.084611938626946, "grad_norm": 1.6062517166137695, "learning_rate": 9.6795e-05, "loss": 0.4774, "step": 19369 }, { "epoch": 1.084667935939075, "grad_norm": 1.1364208459854126, "learning_rate": 9.680000000000001e-05, "loss": 0.4174, "step": 19370 }, { "epoch": 1.084723933251204, "grad_norm": 1.2720904350280762, "learning_rate": 9.6805e-05, "loss": 0.3245, "step": 19371 }, { "epoch": 1.084779930563333, "grad_norm": 1.477702260017395, "learning_rate": 9.681e-05, "loss": 0.5477, "step": 19372 }, { "epoch": 1.084835927875462, "grad_norm": 1.1432007551193237, "learning_rate": 9.6815e-05, "loss": 0.4232, "step": 19373 }, { "epoch": 1.084891925187591, "grad_norm": 1.412406086921692, "learning_rate": 9.682e-05, "loss": 0.4568, "step": 19374 }, { "epoch": 1.08494792249972, "grad_norm": 1.2835264205932617, "learning_rate": 9.682500000000001e-05, "loss": 0.377, "step": 19375 }, { "epoch": 1.085003919811849, "grad_norm": 1.3556221723556519, "learning_rate": 9.683e-05, "loss": 0.3866, "step": 19376 }, { "epoch": 1.085059917123978, "grad_norm": 1.238083004951477, "learning_rate": 9.6835e-05, "loss": 0.54, "step": 19377 }, { "epoch": 1.085115914436107, "grad_norm": 1.3918308019638062, "learning_rate": 9.684000000000001e-05, "loss": 0.4972, "step": 19378 }, { "epoch": 1.0851719117482361, "grad_norm": 1.1738877296447754, "learning_rate": 9.684500000000001e-05, "loss": 0.3268, "step": 19379 }, { "epoch": 1.0852279090603651, "grad_norm": 1.3730249404907227, "learning_rate": 9.685000000000001e-05, "loss": 0.4502, "step": 19380 }, { "epoch": 1.0852839063724942, "grad_norm": 1.495382308959961, "learning_rate": 9.685500000000001e-05, "loss": 0.428, "step": 19381 }, { "epoch": 1.0853399036846232, "grad_norm": 1.2371774911880493, "learning_rate": 9.686e-05, "loss": 0.3295, "step": 19382 }, { "epoch": 1.0853959009967522, "grad_norm": 1.256272792816162, "learning_rate": 9.6865e-05, "loss": 0.5049, "step": 19383 }, { "epoch": 1.0854518983088812, "grad_norm": 1.5617402791976929, "learning_rate": 9.687e-05, "loss": 0.3444, "step": 19384 }, { "epoch": 1.0855078956210102, "grad_norm": 1.2238343954086304, "learning_rate": 9.687500000000001e-05, "loss": 0.4621, "step": 19385 }, { "epoch": 1.0855638929331393, "grad_norm": 1.7954620122909546, "learning_rate": 9.688000000000001e-05, "loss": 0.536, "step": 19386 }, { "epoch": 1.0856198902452683, "grad_norm": 1.3573501110076904, "learning_rate": 9.6885e-05, "loss": 0.4388, "step": 19387 }, { "epoch": 1.0856758875573973, "grad_norm": 1.399285078048706, "learning_rate": 9.689e-05, "loss": 0.5818, "step": 19388 }, { "epoch": 1.0857318848695263, "grad_norm": 1.4428678750991821, "learning_rate": 9.6895e-05, "loss": 0.4234, "step": 19389 }, { "epoch": 1.0857878821816553, "grad_norm": 1.4424530267715454, "learning_rate": 9.69e-05, "loss": 0.4421, "step": 19390 }, { "epoch": 1.0858438794937844, "grad_norm": 1.754023790359497, "learning_rate": 9.6905e-05, "loss": 0.4428, "step": 19391 }, { "epoch": 1.0858998768059134, "grad_norm": 1.343117594718933, "learning_rate": 9.691e-05, "loss": 0.312, "step": 19392 }, { "epoch": 1.0859558741180424, "grad_norm": 1.1642510890960693, "learning_rate": 9.6915e-05, "loss": 0.3357, "step": 19393 }, { "epoch": 1.0860118714301714, "grad_norm": 1.524693250656128, "learning_rate": 9.692e-05, "loss": 0.375, "step": 19394 }, { "epoch": 1.0860678687423004, "grad_norm": 1.7570533752441406, "learning_rate": 9.6925e-05, "loss": 0.5246, "step": 19395 }, { "epoch": 1.0861238660544295, "grad_norm": 1.2837694883346558, "learning_rate": 9.693000000000001e-05, "loss": 0.3471, "step": 19396 }, { "epoch": 1.0861798633665585, "grad_norm": 1.1543211936950684, "learning_rate": 9.6935e-05, "loss": 0.3616, "step": 19397 }, { "epoch": 1.0862358606786875, "grad_norm": 1.3171840906143188, "learning_rate": 9.694e-05, "loss": 0.4241, "step": 19398 }, { "epoch": 1.0862918579908165, "grad_norm": 1.5649409294128418, "learning_rate": 9.694500000000001e-05, "loss": 0.4944, "step": 19399 }, { "epoch": 1.0863478553029455, "grad_norm": 1.2931212186813354, "learning_rate": 9.695000000000001e-05, "loss": 0.5089, "step": 19400 }, { "epoch": 1.0864038526150746, "grad_norm": 1.3104225397109985, "learning_rate": 9.695500000000001e-05, "loss": 0.4478, "step": 19401 }, { "epoch": 1.0864598499272036, "grad_norm": 1.1763811111450195, "learning_rate": 9.696000000000001e-05, "loss": 0.4075, "step": 19402 }, { "epoch": 1.0865158472393326, "grad_norm": 1.2341376543045044, "learning_rate": 9.6965e-05, "loss": 0.5711, "step": 19403 }, { "epoch": 1.0865718445514616, "grad_norm": 1.2019206285476685, "learning_rate": 9.697e-05, "loss": 0.3148, "step": 19404 }, { "epoch": 1.0866278418635906, "grad_norm": 1.6193609237670898, "learning_rate": 9.6975e-05, "loss": 0.4713, "step": 19405 }, { "epoch": 1.0866838391757196, "grad_norm": 1.4534461498260498, "learning_rate": 9.698000000000001e-05, "loss": 0.4004, "step": 19406 }, { "epoch": 1.0867398364878487, "grad_norm": 1.1196978092193604, "learning_rate": 9.698500000000001e-05, "loss": 0.3419, "step": 19407 }, { "epoch": 1.0867958337999777, "grad_norm": 1.272823452949524, "learning_rate": 9.699e-05, "loss": 0.4797, "step": 19408 }, { "epoch": 1.0868518311121067, "grad_norm": 6.032217979431152, "learning_rate": 9.6995e-05, "loss": 0.5645, "step": 19409 }, { "epoch": 1.0869078284242357, "grad_norm": 1.2282921075820923, "learning_rate": 9.7e-05, "loss": 0.494, "step": 19410 }, { "epoch": 1.0869638257363647, "grad_norm": 1.432591199874878, "learning_rate": 9.7005e-05, "loss": 0.4727, "step": 19411 }, { "epoch": 1.0870198230484938, "grad_norm": 1.428158164024353, "learning_rate": 9.701e-05, "loss": 0.3715, "step": 19412 }, { "epoch": 1.0870758203606228, "grad_norm": 1.33553147315979, "learning_rate": 9.7015e-05, "loss": 0.4723, "step": 19413 }, { "epoch": 1.0871318176727518, "grad_norm": 1.2606146335601807, "learning_rate": 9.702e-05, "loss": 0.4094, "step": 19414 }, { "epoch": 1.0871878149848808, "grad_norm": 1.3955553770065308, "learning_rate": 9.7025e-05, "loss": 0.3692, "step": 19415 }, { "epoch": 1.0872438122970098, "grad_norm": 1.202028751373291, "learning_rate": 9.703000000000001e-05, "loss": 0.4275, "step": 19416 }, { "epoch": 1.0872998096091389, "grad_norm": 1.497829556465149, "learning_rate": 9.703500000000001e-05, "loss": 0.6001, "step": 19417 }, { "epoch": 1.0873558069212679, "grad_norm": 1.1924201250076294, "learning_rate": 9.704e-05, "loss": 0.3614, "step": 19418 }, { "epoch": 1.087411804233397, "grad_norm": 1.5991381406784058, "learning_rate": 9.7045e-05, "loss": 0.5625, "step": 19419 }, { "epoch": 1.087467801545526, "grad_norm": 1.3351833820343018, "learning_rate": 9.705e-05, "loss": 0.5205, "step": 19420 }, { "epoch": 1.087523798857655, "grad_norm": 1.2472409009933472, "learning_rate": 9.705500000000001e-05, "loss": 0.3638, "step": 19421 }, { "epoch": 1.087579796169784, "grad_norm": 1.1877214908599854, "learning_rate": 9.706000000000001e-05, "loss": 0.4038, "step": 19422 }, { "epoch": 1.087635793481913, "grad_norm": 1.28447425365448, "learning_rate": 9.7065e-05, "loss": 0.4491, "step": 19423 }, { "epoch": 1.0876917907940418, "grad_norm": 1.2487019300460815, "learning_rate": 9.707e-05, "loss": 0.5416, "step": 19424 }, { "epoch": 1.0877477881061708, "grad_norm": 1.412729024887085, "learning_rate": 9.7075e-05, "loss": 0.4928, "step": 19425 }, { "epoch": 1.0878037854182998, "grad_norm": 1.8077267408370972, "learning_rate": 9.708e-05, "loss": 0.4177, "step": 19426 }, { "epoch": 1.0878597827304288, "grad_norm": 1.2820024490356445, "learning_rate": 9.708500000000001e-05, "loss": 0.4856, "step": 19427 }, { "epoch": 1.0879157800425578, "grad_norm": 1.74324369430542, "learning_rate": 9.709000000000001e-05, "loss": 0.3792, "step": 19428 }, { "epoch": 1.0879717773546869, "grad_norm": 1.2279449701309204, "learning_rate": 9.7095e-05, "loss": 0.341, "step": 19429 }, { "epoch": 1.0880277746668159, "grad_norm": 1.4273266792297363, "learning_rate": 9.71e-05, "loss": 0.4763, "step": 19430 }, { "epoch": 1.088083771978945, "grad_norm": 1.2334628105163574, "learning_rate": 9.7105e-05, "loss": 0.3511, "step": 19431 }, { "epoch": 1.088139769291074, "grad_norm": 1.3678807020187378, "learning_rate": 9.711e-05, "loss": 0.3458, "step": 19432 }, { "epoch": 1.088195766603203, "grad_norm": 1.2043136358261108, "learning_rate": 9.7115e-05, "loss": 0.348, "step": 19433 }, { "epoch": 1.088251763915332, "grad_norm": 1.4696546792984009, "learning_rate": 9.712e-05, "loss": 0.5889, "step": 19434 }, { "epoch": 1.088307761227461, "grad_norm": 1.3678463697433472, "learning_rate": 9.7125e-05, "loss": 0.5672, "step": 19435 }, { "epoch": 1.08836375853959, "grad_norm": 1.7388309240341187, "learning_rate": 9.713000000000001e-05, "loss": 0.3944, "step": 19436 }, { "epoch": 1.088419755851719, "grad_norm": 1.696213722229004, "learning_rate": 9.713500000000001e-05, "loss": 0.3871, "step": 19437 }, { "epoch": 1.088475753163848, "grad_norm": 1.5011556148529053, "learning_rate": 9.714000000000001e-05, "loss": 0.4874, "step": 19438 }, { "epoch": 1.088531750475977, "grad_norm": 1.4647469520568848, "learning_rate": 9.7145e-05, "loss": 0.3355, "step": 19439 }, { "epoch": 1.088587747788106, "grad_norm": 1.3213003873825073, "learning_rate": 9.715e-05, "loss": 0.33, "step": 19440 }, { "epoch": 1.088643745100235, "grad_norm": 1.250311255455017, "learning_rate": 9.7155e-05, "loss": 0.4597, "step": 19441 }, { "epoch": 1.0886997424123641, "grad_norm": 1.4594535827636719, "learning_rate": 9.716000000000001e-05, "loss": 0.4793, "step": 19442 }, { "epoch": 1.0887557397244931, "grad_norm": 1.3642311096191406, "learning_rate": 9.716500000000001e-05, "loss": 0.4608, "step": 19443 }, { "epoch": 1.0888117370366222, "grad_norm": 2.3334622383117676, "learning_rate": 9.717e-05, "loss": 0.5047, "step": 19444 }, { "epoch": 1.0888677343487512, "grad_norm": 1.3684736490249634, "learning_rate": 9.7175e-05, "loss": 0.3814, "step": 19445 }, { "epoch": 1.0889237316608802, "grad_norm": 1.1973755359649658, "learning_rate": 9.718e-05, "loss": 0.4501, "step": 19446 }, { "epoch": 1.0889797289730092, "grad_norm": 1.4940571784973145, "learning_rate": 9.7185e-05, "loss": 0.6219, "step": 19447 }, { "epoch": 1.0890357262851382, "grad_norm": 1.252989649772644, "learning_rate": 9.719000000000001e-05, "loss": 0.41, "step": 19448 }, { "epoch": 1.0890917235972672, "grad_norm": 1.185983419418335, "learning_rate": 9.719500000000001e-05, "loss": 0.4355, "step": 19449 }, { "epoch": 1.0891477209093963, "grad_norm": 1.1518192291259766, "learning_rate": 9.72e-05, "loss": 0.3613, "step": 19450 }, { "epoch": 1.0892037182215253, "grad_norm": 1.5924357175827026, "learning_rate": 9.7205e-05, "loss": 0.4771, "step": 19451 }, { "epoch": 1.0892597155336543, "grad_norm": 1.1367547512054443, "learning_rate": 9.721e-05, "loss": 0.4243, "step": 19452 }, { "epoch": 1.0893157128457833, "grad_norm": 1.4977604150772095, "learning_rate": 9.7215e-05, "loss": 0.3691, "step": 19453 }, { "epoch": 1.0893717101579123, "grad_norm": 1.2454397678375244, "learning_rate": 9.722e-05, "loss": 0.5785, "step": 19454 }, { "epoch": 1.0894277074700414, "grad_norm": 1.5086647272109985, "learning_rate": 9.7225e-05, "loss": 0.3714, "step": 19455 }, { "epoch": 1.0894837047821704, "grad_norm": 1.3603461980819702, "learning_rate": 9.723000000000002e-05, "loss": 0.5453, "step": 19456 }, { "epoch": 1.0895397020942994, "grad_norm": 1.7683531045913696, "learning_rate": 9.723500000000001e-05, "loss": 0.7335, "step": 19457 }, { "epoch": 1.0895956994064284, "grad_norm": 2.0208988189697266, "learning_rate": 9.724000000000001e-05, "loss": 0.4242, "step": 19458 }, { "epoch": 1.0896516967185574, "grad_norm": 1.403065800666809, "learning_rate": 9.724500000000001e-05, "loss": 0.4319, "step": 19459 }, { "epoch": 1.0897076940306865, "grad_norm": 1.7681505680084229, "learning_rate": 9.725e-05, "loss": 0.5649, "step": 19460 }, { "epoch": 1.0897636913428155, "grad_norm": 5.0404133796691895, "learning_rate": 9.7255e-05, "loss": 0.3528, "step": 19461 }, { "epoch": 1.0898196886549445, "grad_norm": 1.498591661453247, "learning_rate": 9.726e-05, "loss": 0.5161, "step": 19462 }, { "epoch": 1.0898756859670735, "grad_norm": 1.8758131265640259, "learning_rate": 9.726500000000001e-05, "loss": 0.5007, "step": 19463 }, { "epoch": 1.0899316832792025, "grad_norm": 1.260574221611023, "learning_rate": 9.727000000000001e-05, "loss": 0.3745, "step": 19464 }, { "epoch": 1.0899876805913316, "grad_norm": 1.312031865119934, "learning_rate": 9.7275e-05, "loss": 0.4584, "step": 19465 }, { "epoch": 1.0900436779034606, "grad_norm": 1.5896401405334473, "learning_rate": 9.728e-05, "loss": 0.47, "step": 19466 }, { "epoch": 1.0900996752155896, "grad_norm": 1.0249112844467163, "learning_rate": 9.7285e-05, "loss": 0.3238, "step": 19467 }, { "epoch": 1.0901556725277186, "grad_norm": 1.251260757446289, "learning_rate": 9.729e-05, "loss": 0.3825, "step": 19468 }, { "epoch": 1.0902116698398476, "grad_norm": 1.4393593072891235, "learning_rate": 9.729500000000001e-05, "loss": 0.4745, "step": 19469 }, { "epoch": 1.0902676671519766, "grad_norm": 1.361510157585144, "learning_rate": 9.730000000000001e-05, "loss": 0.419, "step": 19470 }, { "epoch": 1.0903236644641057, "grad_norm": 1.2182210683822632, "learning_rate": 9.7305e-05, "loss": 0.4122, "step": 19471 }, { "epoch": 1.0903796617762347, "grad_norm": 1.2895756959915161, "learning_rate": 9.731e-05, "loss": 0.406, "step": 19472 }, { "epoch": 1.0904356590883637, "grad_norm": 1.3974794149398804, "learning_rate": 9.7315e-05, "loss": 0.5104, "step": 19473 }, { "epoch": 1.0904916564004927, "grad_norm": 1.6768306493759155, "learning_rate": 9.732e-05, "loss": 0.4613, "step": 19474 }, { "epoch": 1.0905476537126217, "grad_norm": 1.3968340158462524, "learning_rate": 9.7325e-05, "loss": 0.4519, "step": 19475 }, { "epoch": 1.0906036510247508, "grad_norm": 1.7102560997009277, "learning_rate": 9.733e-05, "loss": 0.5399, "step": 19476 }, { "epoch": 1.0906596483368798, "grad_norm": 1.3042027950286865, "learning_rate": 9.733500000000002e-05, "loss": 0.4404, "step": 19477 }, { "epoch": 1.0907156456490088, "grad_norm": 1.2797521352767944, "learning_rate": 9.734000000000001e-05, "loss": 0.4302, "step": 19478 }, { "epoch": 1.0907716429611378, "grad_norm": 1.3735034465789795, "learning_rate": 9.734500000000001e-05, "loss": 0.4131, "step": 19479 }, { "epoch": 1.0908276402732668, "grad_norm": 1.4389700889587402, "learning_rate": 9.735000000000001e-05, "loss": 0.5966, "step": 19480 }, { "epoch": 1.0908836375853959, "grad_norm": 1.6658045053482056, "learning_rate": 9.7355e-05, "loss": 0.5419, "step": 19481 }, { "epoch": 1.0909396348975249, "grad_norm": 1.3026015758514404, "learning_rate": 9.736e-05, "loss": 0.5054, "step": 19482 }, { "epoch": 1.090995632209654, "grad_norm": 1.3731954097747803, "learning_rate": 9.7365e-05, "loss": 0.4267, "step": 19483 }, { "epoch": 1.091051629521783, "grad_norm": 1.4135109186172485, "learning_rate": 9.737000000000001e-05, "loss": 0.451, "step": 19484 }, { "epoch": 1.091107626833912, "grad_norm": 1.2790257930755615, "learning_rate": 9.737500000000001e-05, "loss": 0.3954, "step": 19485 }, { "epoch": 1.091163624146041, "grad_norm": 1.4345476627349854, "learning_rate": 9.738e-05, "loss": 0.4735, "step": 19486 }, { "epoch": 1.09121962145817, "grad_norm": 1.2349601984024048, "learning_rate": 9.7385e-05, "loss": 0.3137, "step": 19487 }, { "epoch": 1.091275618770299, "grad_norm": 1.20396089553833, "learning_rate": 9.739e-05, "loss": 0.4095, "step": 19488 }, { "epoch": 1.091331616082428, "grad_norm": 1.3240493535995483, "learning_rate": 9.7395e-05, "loss": 0.4282, "step": 19489 }, { "epoch": 1.091387613394557, "grad_norm": 1.3587307929992676, "learning_rate": 9.74e-05, "loss": 0.5745, "step": 19490 }, { "epoch": 1.091443610706686, "grad_norm": 1.4722732305526733, "learning_rate": 9.7405e-05, "loss": 0.4488, "step": 19491 }, { "epoch": 1.091499608018815, "grad_norm": 1.6319332122802734, "learning_rate": 9.741e-05, "loss": 0.4726, "step": 19492 }, { "epoch": 1.091555605330944, "grad_norm": 1.2616441249847412, "learning_rate": 9.7415e-05, "loss": 0.4085, "step": 19493 }, { "epoch": 1.091611602643073, "grad_norm": 1.4479644298553467, "learning_rate": 9.742e-05, "loss": 0.4895, "step": 19494 }, { "epoch": 1.0916675999552021, "grad_norm": 1.1906683444976807, "learning_rate": 9.7425e-05, "loss": 0.3664, "step": 19495 }, { "epoch": 1.0917235972673311, "grad_norm": 1.1659542322158813, "learning_rate": 9.743000000000001e-05, "loss": 0.455, "step": 19496 }, { "epoch": 1.0917795945794602, "grad_norm": 1.4838449954986572, "learning_rate": 9.7435e-05, "loss": 0.4508, "step": 19497 }, { "epoch": 1.0918355918915892, "grad_norm": 1.3363466262817383, "learning_rate": 9.744000000000002e-05, "loss": 0.3449, "step": 19498 }, { "epoch": 1.0918915892037182, "grad_norm": 1.0771715641021729, "learning_rate": 9.744500000000001e-05, "loss": 0.3355, "step": 19499 }, { "epoch": 1.0919475865158472, "grad_norm": 1.1941698789596558, "learning_rate": 9.745000000000001e-05, "loss": 0.4237, "step": 19500 }, { "epoch": 1.0920035838279762, "grad_norm": 1.3635395765304565, "learning_rate": 9.745500000000001e-05, "loss": 0.5534, "step": 19501 }, { "epoch": 1.0920595811401053, "grad_norm": 1.4608535766601562, "learning_rate": 9.746e-05, "loss": 0.3937, "step": 19502 }, { "epoch": 1.0921155784522343, "grad_norm": 1.342836856842041, "learning_rate": 9.7465e-05, "loss": 0.4632, "step": 19503 }, { "epoch": 1.0921715757643633, "grad_norm": 1.4179961681365967, "learning_rate": 9.747e-05, "loss": 0.5522, "step": 19504 }, { "epoch": 1.0922275730764923, "grad_norm": 1.4744871854782104, "learning_rate": 9.747500000000001e-05, "loss": 0.4017, "step": 19505 }, { "epoch": 1.0922835703886213, "grad_norm": 1.4139132499694824, "learning_rate": 9.748000000000001e-05, "loss": 0.4151, "step": 19506 }, { "epoch": 1.0923395677007504, "grad_norm": 1.4782044887542725, "learning_rate": 9.7485e-05, "loss": 0.603, "step": 19507 }, { "epoch": 1.0923955650128794, "grad_norm": 1.4270756244659424, "learning_rate": 9.749e-05, "loss": 0.4025, "step": 19508 }, { "epoch": 1.0924515623250084, "grad_norm": 1.2587467432022095, "learning_rate": 9.7495e-05, "loss": 0.3908, "step": 19509 }, { "epoch": 1.0925075596371374, "grad_norm": 1.3345550298690796, "learning_rate": 9.75e-05, "loss": 0.3895, "step": 19510 }, { "epoch": 1.0925635569492664, "grad_norm": 1.5031352043151855, "learning_rate": 9.7505e-05, "loss": 0.4788, "step": 19511 }, { "epoch": 1.0926195542613955, "grad_norm": 1.3319941759109497, "learning_rate": 9.751e-05, "loss": 0.5293, "step": 19512 }, { "epoch": 1.0926755515735245, "grad_norm": 1.1762086153030396, "learning_rate": 9.7515e-05, "loss": 0.4477, "step": 19513 }, { "epoch": 1.0927315488856535, "grad_norm": 1.3730016946792603, "learning_rate": 9.752e-05, "loss": 0.4041, "step": 19514 }, { "epoch": 1.0927875461977825, "grad_norm": 1.591992735862732, "learning_rate": 9.7525e-05, "loss": 0.481, "step": 19515 }, { "epoch": 1.0928435435099115, "grad_norm": 1.3490495681762695, "learning_rate": 9.753e-05, "loss": 0.3612, "step": 19516 }, { "epoch": 1.0928995408220405, "grad_norm": 1.2400485277175903, "learning_rate": 9.753500000000001e-05, "loss": 0.4447, "step": 19517 }, { "epoch": 1.0929555381341696, "grad_norm": 1.4158413410186768, "learning_rate": 9.754e-05, "loss": 0.5537, "step": 19518 }, { "epoch": 1.0930115354462986, "grad_norm": 1.2359211444854736, "learning_rate": 9.7545e-05, "loss": 0.5455, "step": 19519 }, { "epoch": 1.0930675327584276, "grad_norm": 1.2567752599716187, "learning_rate": 9.755000000000001e-05, "loss": 0.364, "step": 19520 }, { "epoch": 1.0931235300705566, "grad_norm": 1.3764561414718628, "learning_rate": 9.755500000000001e-05, "loss": 0.4302, "step": 19521 }, { "epoch": 1.0931795273826856, "grad_norm": 1.321619987487793, "learning_rate": 9.756000000000001e-05, "loss": 0.4692, "step": 19522 }, { "epoch": 1.0932355246948147, "grad_norm": 1.33164381980896, "learning_rate": 9.7565e-05, "loss": 0.3829, "step": 19523 }, { "epoch": 1.0932915220069437, "grad_norm": 1.5573786497116089, "learning_rate": 9.757e-05, "loss": 0.4894, "step": 19524 }, { "epoch": 1.0933475193190727, "grad_norm": 1.4197397232055664, "learning_rate": 9.7575e-05, "loss": 0.3993, "step": 19525 }, { "epoch": 1.0934035166312017, "grad_norm": 1.4187829494476318, "learning_rate": 9.758000000000001e-05, "loss": 0.4583, "step": 19526 }, { "epoch": 1.0934595139433307, "grad_norm": 1.202857255935669, "learning_rate": 9.758500000000001e-05, "loss": 0.3423, "step": 19527 }, { "epoch": 1.0935155112554598, "grad_norm": 1.4020442962646484, "learning_rate": 9.759e-05, "loss": 0.4366, "step": 19528 }, { "epoch": 1.0935715085675888, "grad_norm": 1.3176480531692505, "learning_rate": 9.7595e-05, "loss": 0.4086, "step": 19529 }, { "epoch": 1.0936275058797178, "grad_norm": 1.1130186319351196, "learning_rate": 9.76e-05, "loss": 0.3393, "step": 19530 }, { "epoch": 1.0936835031918468, "grad_norm": 1.2685071229934692, "learning_rate": 9.7605e-05, "loss": 0.4529, "step": 19531 }, { "epoch": 1.0937395005039758, "grad_norm": 1.6565825939178467, "learning_rate": 9.761e-05, "loss": 0.4969, "step": 19532 }, { "epoch": 1.0937954978161049, "grad_norm": 1.3428248167037964, "learning_rate": 9.7615e-05, "loss": 0.384, "step": 19533 }, { "epoch": 1.0938514951282339, "grad_norm": 1.4593104124069214, "learning_rate": 9.762e-05, "loss": 0.4969, "step": 19534 }, { "epoch": 1.093907492440363, "grad_norm": 1.5722178220748901, "learning_rate": 9.7625e-05, "loss": 0.5102, "step": 19535 }, { "epoch": 1.093963489752492, "grad_norm": 1.4600510597229004, "learning_rate": 9.763e-05, "loss": 0.3806, "step": 19536 }, { "epoch": 1.094019487064621, "grad_norm": 1.3687782287597656, "learning_rate": 9.763500000000001e-05, "loss": 0.5298, "step": 19537 }, { "epoch": 1.09407548437675, "grad_norm": 1.763283610343933, "learning_rate": 9.764000000000001e-05, "loss": 0.4613, "step": 19538 }, { "epoch": 1.094131481688879, "grad_norm": 1.4355372190475464, "learning_rate": 9.7645e-05, "loss": 0.4203, "step": 19539 }, { "epoch": 1.094187479001008, "grad_norm": 1.6046817302703857, "learning_rate": 9.765e-05, "loss": 0.3555, "step": 19540 }, { "epoch": 1.094243476313137, "grad_norm": 1.6429340839385986, "learning_rate": 9.765500000000001e-05, "loss": 0.4662, "step": 19541 }, { "epoch": 1.094299473625266, "grad_norm": 1.4519562721252441, "learning_rate": 9.766000000000001e-05, "loss": 0.4883, "step": 19542 }, { "epoch": 1.094355470937395, "grad_norm": 1.3624401092529297, "learning_rate": 9.766500000000001e-05, "loss": 0.4762, "step": 19543 }, { "epoch": 1.094411468249524, "grad_norm": 1.355677843093872, "learning_rate": 9.767e-05, "loss": 0.3493, "step": 19544 }, { "epoch": 1.094467465561653, "grad_norm": 6.697079658508301, "learning_rate": 9.7675e-05, "loss": 0.4235, "step": 19545 }, { "epoch": 1.094523462873782, "grad_norm": 1.138710379600525, "learning_rate": 9.768e-05, "loss": 0.3862, "step": 19546 }, { "epoch": 1.0945794601859111, "grad_norm": 1.5354340076446533, "learning_rate": 9.768500000000001e-05, "loss": 0.4287, "step": 19547 }, { "epoch": 1.0946354574980401, "grad_norm": 1.3873952627182007, "learning_rate": 9.769000000000001e-05, "loss": 0.3725, "step": 19548 }, { "epoch": 1.0946914548101692, "grad_norm": 1.2548458576202393, "learning_rate": 9.7695e-05, "loss": 0.3202, "step": 19549 }, { "epoch": 1.0947474521222982, "grad_norm": 1.577109932899475, "learning_rate": 9.77e-05, "loss": 0.587, "step": 19550 }, { "epoch": 1.0948034494344272, "grad_norm": 1.3174759149551392, "learning_rate": 9.7705e-05, "loss": 0.4666, "step": 19551 }, { "epoch": 1.0948594467465562, "grad_norm": 1.461652159690857, "learning_rate": 9.771e-05, "loss": 0.4711, "step": 19552 }, { "epoch": 1.0949154440586852, "grad_norm": 1.3167462348937988, "learning_rate": 9.7715e-05, "loss": 0.4441, "step": 19553 }, { "epoch": 1.0949714413708143, "grad_norm": 1.1816465854644775, "learning_rate": 9.772e-05, "loss": 0.3607, "step": 19554 }, { "epoch": 1.0950274386829433, "grad_norm": 1.3829306364059448, "learning_rate": 9.7725e-05, "loss": 0.4564, "step": 19555 }, { "epoch": 1.0950834359950723, "grad_norm": 1.3023033142089844, "learning_rate": 9.773e-05, "loss": 0.3954, "step": 19556 }, { "epoch": 1.0951394333072013, "grad_norm": 1.0449341535568237, "learning_rate": 9.773500000000001e-05, "loss": 0.3799, "step": 19557 }, { "epoch": 1.0951954306193303, "grad_norm": 1.3946932554244995, "learning_rate": 9.774000000000001e-05, "loss": 0.428, "step": 19558 }, { "epoch": 1.0952514279314594, "grad_norm": 1.5530643463134766, "learning_rate": 9.774500000000001e-05, "loss": 0.565, "step": 19559 }, { "epoch": 1.0953074252435884, "grad_norm": 1.2083131074905396, "learning_rate": 9.775e-05, "loss": 0.2811, "step": 19560 }, { "epoch": 1.0953634225557174, "grad_norm": 1.2603507041931152, "learning_rate": 9.7755e-05, "loss": 0.596, "step": 19561 }, { "epoch": 1.0954194198678464, "grad_norm": 1.5251781940460205, "learning_rate": 9.776000000000001e-05, "loss": 0.4129, "step": 19562 }, { "epoch": 1.0954754171799754, "grad_norm": 1.2989113330841064, "learning_rate": 9.776500000000001e-05, "loss": 0.4234, "step": 19563 }, { "epoch": 1.0955314144921044, "grad_norm": 1.4201045036315918, "learning_rate": 9.777000000000001e-05, "loss": 0.4128, "step": 19564 }, { "epoch": 1.0955874118042335, "grad_norm": 1.3607844114303589, "learning_rate": 9.7775e-05, "loss": 0.6465, "step": 19565 }, { "epoch": 1.0956434091163625, "grad_norm": 1.6310099363327026, "learning_rate": 9.778e-05, "loss": 0.4283, "step": 19566 }, { "epoch": 1.0956994064284915, "grad_norm": 1.1884732246398926, "learning_rate": 9.7785e-05, "loss": 0.3746, "step": 19567 }, { "epoch": 1.0957554037406205, "grad_norm": 1.3570232391357422, "learning_rate": 9.779e-05, "loss": 0.4903, "step": 19568 }, { "epoch": 1.0958114010527495, "grad_norm": 1.44740629196167, "learning_rate": 9.779500000000001e-05, "loss": 0.4056, "step": 19569 }, { "epoch": 1.0958673983648786, "grad_norm": 1.1972736120224, "learning_rate": 9.78e-05, "loss": 0.3, "step": 19570 }, { "epoch": 1.0959233956770076, "grad_norm": 1.7060189247131348, "learning_rate": 9.7805e-05, "loss": 0.4381, "step": 19571 }, { "epoch": 1.0959793929891366, "grad_norm": 1.6647682189941406, "learning_rate": 9.781e-05, "loss": 0.4836, "step": 19572 }, { "epoch": 1.0960353903012656, "grad_norm": 1.5249102115631104, "learning_rate": 9.7815e-05, "loss": 0.4272, "step": 19573 }, { "epoch": 1.0960913876133946, "grad_norm": 1.2867717742919922, "learning_rate": 9.782e-05, "loss": 0.5391, "step": 19574 }, { "epoch": 1.0961473849255237, "grad_norm": 1.3553507328033447, "learning_rate": 9.7825e-05, "loss": 0.4903, "step": 19575 }, { "epoch": 1.0962033822376527, "grad_norm": 1.1969085931777954, "learning_rate": 9.783e-05, "loss": 0.4377, "step": 19576 }, { "epoch": 1.0962593795497817, "grad_norm": 1.2163243293762207, "learning_rate": 9.783500000000001e-05, "loss": 0.3771, "step": 19577 }, { "epoch": 1.0963153768619107, "grad_norm": 2.052074432373047, "learning_rate": 9.784000000000001e-05, "loss": 0.3332, "step": 19578 }, { "epoch": 1.0963713741740397, "grad_norm": 1.3686301708221436, "learning_rate": 9.784500000000001e-05, "loss": 0.5045, "step": 19579 }, { "epoch": 1.0964273714861688, "grad_norm": 1.6917316913604736, "learning_rate": 9.785e-05, "loss": 0.4673, "step": 19580 }, { "epoch": 1.0964833687982978, "grad_norm": 1.682552456855774, "learning_rate": 9.7855e-05, "loss": 0.4664, "step": 19581 }, { "epoch": 1.0965393661104268, "grad_norm": 1.4329495429992676, "learning_rate": 9.786e-05, "loss": 0.4861, "step": 19582 }, { "epoch": 1.0965953634225558, "grad_norm": 1.289271354675293, "learning_rate": 9.786500000000001e-05, "loss": 0.3885, "step": 19583 }, { "epoch": 1.0966513607346848, "grad_norm": 1.4165812730789185, "learning_rate": 9.787000000000001e-05, "loss": 0.4461, "step": 19584 }, { "epoch": 1.0967073580468139, "grad_norm": 1.2960909605026245, "learning_rate": 9.787500000000001e-05, "loss": 0.5017, "step": 19585 }, { "epoch": 1.0967633553589429, "grad_norm": 1.6284939050674438, "learning_rate": 9.788e-05, "loss": 0.453, "step": 19586 }, { "epoch": 1.096819352671072, "grad_norm": 1.2743760347366333, "learning_rate": 9.7885e-05, "loss": 0.2907, "step": 19587 }, { "epoch": 1.096875349983201, "grad_norm": 1.3700569868087769, "learning_rate": 9.789e-05, "loss": 0.4638, "step": 19588 }, { "epoch": 1.09693134729533, "grad_norm": 1.3631771802902222, "learning_rate": 9.7895e-05, "loss": 0.3999, "step": 19589 }, { "epoch": 1.096987344607459, "grad_norm": 1.553858757019043, "learning_rate": 9.790000000000001e-05, "loss": 0.514, "step": 19590 }, { "epoch": 1.097043341919588, "grad_norm": 1.1983616352081299, "learning_rate": 9.7905e-05, "loss": 0.3723, "step": 19591 }, { "epoch": 1.097099339231717, "grad_norm": 1.3355696201324463, "learning_rate": 9.791e-05, "loss": 0.3802, "step": 19592 }, { "epoch": 1.097155336543846, "grad_norm": 1.3248353004455566, "learning_rate": 9.7915e-05, "loss": 0.5072, "step": 19593 }, { "epoch": 1.097211333855975, "grad_norm": 1.1041831970214844, "learning_rate": 9.792e-05, "loss": 0.3715, "step": 19594 }, { "epoch": 1.097267331168104, "grad_norm": 1.2678688764572144, "learning_rate": 9.7925e-05, "loss": 0.4019, "step": 19595 }, { "epoch": 1.097323328480233, "grad_norm": 1.381998896598816, "learning_rate": 9.793e-05, "loss": 0.5244, "step": 19596 }, { "epoch": 1.097379325792362, "grad_norm": 1.5407527685165405, "learning_rate": 9.7935e-05, "loss": 0.4575, "step": 19597 }, { "epoch": 1.097435323104491, "grad_norm": 1.1095999479293823, "learning_rate": 9.794000000000001e-05, "loss": 0.3277, "step": 19598 }, { "epoch": 1.0974913204166201, "grad_norm": 1.348344326019287, "learning_rate": 9.794500000000001e-05, "loss": 0.4471, "step": 19599 }, { "epoch": 1.097547317728749, "grad_norm": 1.2373028993606567, "learning_rate": 9.795000000000001e-05, "loss": 0.5059, "step": 19600 }, { "epoch": 1.097603315040878, "grad_norm": 1.3573837280273438, "learning_rate": 9.7955e-05, "loss": 0.4558, "step": 19601 }, { "epoch": 1.097659312353007, "grad_norm": 1.5080405473709106, "learning_rate": 9.796e-05, "loss": 0.5021, "step": 19602 }, { "epoch": 1.097715309665136, "grad_norm": 1.1538140773773193, "learning_rate": 9.7965e-05, "loss": 0.365, "step": 19603 }, { "epoch": 1.097771306977265, "grad_norm": 1.2251487970352173, "learning_rate": 9.797000000000001e-05, "loss": 0.3905, "step": 19604 }, { "epoch": 1.097827304289394, "grad_norm": 1.2731026411056519, "learning_rate": 9.797500000000001e-05, "loss": 0.3414, "step": 19605 }, { "epoch": 1.097883301601523, "grad_norm": 1.5197020769119263, "learning_rate": 9.798000000000001e-05, "loss": 0.5708, "step": 19606 }, { "epoch": 1.097939298913652, "grad_norm": 1.5866721868515015, "learning_rate": 9.7985e-05, "loss": 0.5346, "step": 19607 }, { "epoch": 1.097995296225781, "grad_norm": 1.284306526184082, "learning_rate": 9.799e-05, "loss": 0.4819, "step": 19608 }, { "epoch": 1.09805129353791, "grad_norm": 1.3480887413024902, "learning_rate": 9.7995e-05, "loss": 0.4383, "step": 19609 }, { "epoch": 1.098107290850039, "grad_norm": 1.904942274093628, "learning_rate": 9.8e-05, "loss": 0.433, "step": 19610 }, { "epoch": 1.0981632881621681, "grad_norm": 1.3913054466247559, "learning_rate": 9.800500000000001e-05, "loss": 0.4088, "step": 19611 }, { "epoch": 1.0982192854742971, "grad_norm": 1.3774702548980713, "learning_rate": 9.801e-05, "loss": 0.3585, "step": 19612 }, { "epoch": 1.0982752827864262, "grad_norm": 1.439720630645752, "learning_rate": 9.8015e-05, "loss": 0.4768, "step": 19613 }, { "epoch": 1.0983312800985552, "grad_norm": 1.4266635179519653, "learning_rate": 9.802e-05, "loss": 0.4101, "step": 19614 }, { "epoch": 1.0983872774106842, "grad_norm": 1.3650580644607544, "learning_rate": 9.8025e-05, "loss": 0.326, "step": 19615 }, { "epoch": 1.0984432747228132, "grad_norm": 1.794957160949707, "learning_rate": 9.803e-05, "loss": 0.5428, "step": 19616 }, { "epoch": 1.0984992720349422, "grad_norm": 1.2034342288970947, "learning_rate": 9.8035e-05, "loss": 0.361, "step": 19617 }, { "epoch": 1.0985552693470713, "grad_norm": 1.705862045288086, "learning_rate": 9.804e-05, "loss": 0.5285, "step": 19618 }, { "epoch": 1.0986112666592003, "grad_norm": 1.4839178323745728, "learning_rate": 9.804500000000001e-05, "loss": 0.5371, "step": 19619 }, { "epoch": 1.0986672639713293, "grad_norm": 1.4856160879135132, "learning_rate": 9.805000000000001e-05, "loss": 0.4007, "step": 19620 }, { "epoch": 1.0987232612834583, "grad_norm": 1.6990814208984375, "learning_rate": 9.805500000000001e-05, "loss": 0.4334, "step": 19621 }, { "epoch": 1.0987792585955873, "grad_norm": 1.3838716745376587, "learning_rate": 9.806e-05, "loss": 0.5617, "step": 19622 }, { "epoch": 1.0988352559077164, "grad_norm": 1.3111406564712524, "learning_rate": 9.8065e-05, "loss": 0.5367, "step": 19623 }, { "epoch": 1.0988912532198454, "grad_norm": 1.1596437692642212, "learning_rate": 9.807e-05, "loss": 0.3757, "step": 19624 }, { "epoch": 1.0989472505319744, "grad_norm": 1.465855360031128, "learning_rate": 9.807500000000001e-05, "loss": 0.4408, "step": 19625 }, { "epoch": 1.0990032478441034, "grad_norm": 1.2543396949768066, "learning_rate": 9.808000000000001e-05, "loss": 0.4489, "step": 19626 }, { "epoch": 1.0990592451562324, "grad_norm": 1.525762677192688, "learning_rate": 9.808500000000001e-05, "loss": 0.5929, "step": 19627 }, { "epoch": 1.0991152424683615, "grad_norm": 1.179938554763794, "learning_rate": 9.809e-05, "loss": 0.4061, "step": 19628 }, { "epoch": 1.0991712397804905, "grad_norm": 1.2849570512771606, "learning_rate": 9.8095e-05, "loss": 0.4987, "step": 19629 }, { "epoch": 1.0992272370926195, "grad_norm": 1.1673990488052368, "learning_rate": 9.81e-05, "loss": 0.3156, "step": 19630 }, { "epoch": 1.0992832344047485, "grad_norm": 1.4120322465896606, "learning_rate": 9.8105e-05, "loss": 0.4933, "step": 19631 }, { "epoch": 1.0993392317168775, "grad_norm": 1.819898009300232, "learning_rate": 9.811000000000001e-05, "loss": 0.4087, "step": 19632 }, { "epoch": 1.0993952290290065, "grad_norm": 1.1839394569396973, "learning_rate": 9.8115e-05, "loss": 0.4765, "step": 19633 }, { "epoch": 1.0994512263411356, "grad_norm": 1.347901463508606, "learning_rate": 9.812e-05, "loss": 0.417, "step": 19634 }, { "epoch": 1.0995072236532646, "grad_norm": 1.4624589681625366, "learning_rate": 9.8125e-05, "loss": 0.4186, "step": 19635 }, { "epoch": 1.0995632209653936, "grad_norm": 1.3179152011871338, "learning_rate": 9.813e-05, "loss": 0.3827, "step": 19636 }, { "epoch": 1.0996192182775226, "grad_norm": 1.1995933055877686, "learning_rate": 9.8135e-05, "loss": 0.3866, "step": 19637 }, { "epoch": 1.0996752155896516, "grad_norm": 1.2979129552841187, "learning_rate": 9.814e-05, "loss": 0.4895, "step": 19638 }, { "epoch": 1.0997312129017807, "grad_norm": 1.2624669075012207, "learning_rate": 9.8145e-05, "loss": 0.4745, "step": 19639 }, { "epoch": 1.0997872102139097, "grad_norm": 1.2604899406433105, "learning_rate": 9.815000000000001e-05, "loss": 0.4489, "step": 19640 }, { "epoch": 1.0998432075260387, "grad_norm": 1.3490601778030396, "learning_rate": 9.815500000000001e-05, "loss": 0.414, "step": 19641 }, { "epoch": 1.0998992048381677, "grad_norm": 1.4333354234695435, "learning_rate": 9.816000000000001e-05, "loss": 0.43, "step": 19642 }, { "epoch": 1.0999552021502967, "grad_norm": 1.474410057067871, "learning_rate": 9.8165e-05, "loss": 0.5682, "step": 19643 }, { "epoch": 1.1000111994624258, "grad_norm": 1.4290974140167236, "learning_rate": 9.817e-05, "loss": 0.4719, "step": 19644 }, { "epoch": 1.1000671967745548, "grad_norm": 1.4126131534576416, "learning_rate": 9.8175e-05, "loss": 0.4099, "step": 19645 }, { "epoch": 1.1001231940866838, "grad_norm": 1.2462862730026245, "learning_rate": 9.818000000000001e-05, "loss": 0.3927, "step": 19646 }, { "epoch": 1.1001791913988128, "grad_norm": 1.3805636167526245, "learning_rate": 9.818500000000001e-05, "loss": 0.3539, "step": 19647 }, { "epoch": 1.1002351887109418, "grad_norm": 1.6017307043075562, "learning_rate": 9.819000000000001e-05, "loss": 0.434, "step": 19648 }, { "epoch": 1.1002911860230709, "grad_norm": 1.4915167093276978, "learning_rate": 9.8195e-05, "loss": 0.2893, "step": 19649 }, { "epoch": 1.1003471833351999, "grad_norm": 1.3202970027923584, "learning_rate": 9.82e-05, "loss": 0.4855, "step": 19650 }, { "epoch": 1.100403180647329, "grad_norm": 1.255157232284546, "learning_rate": 9.8205e-05, "loss": 0.383, "step": 19651 }, { "epoch": 1.100459177959458, "grad_norm": 1.5192474126815796, "learning_rate": 9.821e-05, "loss": 0.5071, "step": 19652 }, { "epoch": 1.100515175271587, "grad_norm": 1.2128627300262451, "learning_rate": 9.821500000000001e-05, "loss": 0.352, "step": 19653 }, { "epoch": 1.100571172583716, "grad_norm": 1.457595944404602, "learning_rate": 9.822e-05, "loss": 0.3849, "step": 19654 }, { "epoch": 1.100627169895845, "grad_norm": 1.1318013668060303, "learning_rate": 9.8225e-05, "loss": 0.3474, "step": 19655 }, { "epoch": 1.100683167207974, "grad_norm": 1.130577564239502, "learning_rate": 9.823e-05, "loss": 0.4295, "step": 19656 }, { "epoch": 1.100739164520103, "grad_norm": 1.4926098585128784, "learning_rate": 9.8235e-05, "loss": 0.3817, "step": 19657 }, { "epoch": 1.100795161832232, "grad_norm": 1.287928819656372, "learning_rate": 9.824000000000001e-05, "loss": 0.3621, "step": 19658 }, { "epoch": 1.100851159144361, "grad_norm": 1.5619772672653198, "learning_rate": 9.8245e-05, "loss": 0.4055, "step": 19659 }, { "epoch": 1.10090715645649, "grad_norm": 1.1738126277923584, "learning_rate": 9.825e-05, "loss": 0.301, "step": 19660 }, { "epoch": 1.100963153768619, "grad_norm": 1.506620168685913, "learning_rate": 9.825500000000001e-05, "loss": 0.4636, "step": 19661 }, { "epoch": 1.101019151080748, "grad_norm": 1.37851881980896, "learning_rate": 9.826000000000001e-05, "loss": 0.4036, "step": 19662 }, { "epoch": 1.1010751483928771, "grad_norm": 1.145391821861267, "learning_rate": 9.826500000000001e-05, "loss": 0.4037, "step": 19663 }, { "epoch": 1.1011311457050061, "grad_norm": 1.5921082496643066, "learning_rate": 9.827e-05, "loss": 0.4837, "step": 19664 }, { "epoch": 1.1011871430171352, "grad_norm": 1.2360310554504395, "learning_rate": 9.8275e-05, "loss": 0.3909, "step": 19665 }, { "epoch": 1.1012431403292642, "grad_norm": 1.3479084968566895, "learning_rate": 9.828e-05, "loss": 0.3891, "step": 19666 }, { "epoch": 1.1012991376413932, "grad_norm": 1.4180879592895508, "learning_rate": 9.8285e-05, "loss": 0.4824, "step": 19667 }, { "epoch": 1.1013551349535222, "grad_norm": 1.416598916053772, "learning_rate": 9.829000000000001e-05, "loss": 0.4931, "step": 19668 }, { "epoch": 1.1014111322656512, "grad_norm": 1.3986800909042358, "learning_rate": 9.8295e-05, "loss": 0.4377, "step": 19669 }, { "epoch": 1.1014671295777803, "grad_norm": 1.4680436849594116, "learning_rate": 9.83e-05, "loss": 0.4207, "step": 19670 }, { "epoch": 1.1015231268899093, "grad_norm": 1.2534722089767456, "learning_rate": 9.8305e-05, "loss": 0.4465, "step": 19671 }, { "epoch": 1.1015791242020383, "grad_norm": 1.4206799268722534, "learning_rate": 9.831e-05, "loss": 0.3915, "step": 19672 }, { "epoch": 1.1016351215141673, "grad_norm": 1.613774299621582, "learning_rate": 9.8315e-05, "loss": 0.5944, "step": 19673 }, { "epoch": 1.1016911188262963, "grad_norm": 1.422844409942627, "learning_rate": 9.832000000000001e-05, "loss": 0.4587, "step": 19674 }, { "epoch": 1.1017471161384254, "grad_norm": 1.4094058275222778, "learning_rate": 9.8325e-05, "loss": 0.4965, "step": 19675 }, { "epoch": 1.1018031134505544, "grad_norm": 1.4469799995422363, "learning_rate": 9.833e-05, "loss": 0.4653, "step": 19676 }, { "epoch": 1.1018591107626834, "grad_norm": 1.2660422325134277, "learning_rate": 9.8335e-05, "loss": 0.4329, "step": 19677 }, { "epoch": 1.1019151080748124, "grad_norm": 1.3411662578582764, "learning_rate": 9.834000000000001e-05, "loss": 0.4491, "step": 19678 }, { "epoch": 1.1019711053869414, "grad_norm": 1.233513593673706, "learning_rate": 9.834500000000001e-05, "loss": 0.5012, "step": 19679 }, { "epoch": 1.1020271026990704, "grad_norm": 1.741809368133545, "learning_rate": 9.835e-05, "loss": 0.4552, "step": 19680 }, { "epoch": 1.1020831000111995, "grad_norm": 1.3911317586898804, "learning_rate": 9.8355e-05, "loss": 0.4226, "step": 19681 }, { "epoch": 1.1021390973233285, "grad_norm": 1.219521403312683, "learning_rate": 9.836000000000001e-05, "loss": 0.3876, "step": 19682 }, { "epoch": 1.1021950946354575, "grad_norm": 1.204106092453003, "learning_rate": 9.836500000000001e-05, "loss": 0.3628, "step": 19683 }, { "epoch": 1.1022510919475865, "grad_norm": 1.6147651672363281, "learning_rate": 9.837000000000001e-05, "loss": 0.4808, "step": 19684 }, { "epoch": 1.1023070892597155, "grad_norm": 1.4001786708831787, "learning_rate": 9.8375e-05, "loss": 0.4303, "step": 19685 }, { "epoch": 1.1023630865718446, "grad_norm": 1.4113482236862183, "learning_rate": 9.838e-05, "loss": 0.5507, "step": 19686 }, { "epoch": 1.1024190838839736, "grad_norm": 1.3065470457077026, "learning_rate": 9.8385e-05, "loss": 0.4914, "step": 19687 }, { "epoch": 1.1024750811961026, "grad_norm": 1.4770466089248657, "learning_rate": 9.839e-05, "loss": 0.4447, "step": 19688 }, { "epoch": 1.1025310785082316, "grad_norm": 1.696991205215454, "learning_rate": 9.839500000000001e-05, "loss": 0.6721, "step": 19689 }, { "epoch": 1.1025870758203606, "grad_norm": 1.3368000984191895, "learning_rate": 9.84e-05, "loss": 0.3748, "step": 19690 }, { "epoch": 1.1026430731324897, "grad_norm": 1.4557217359542847, "learning_rate": 9.8405e-05, "loss": 0.3309, "step": 19691 }, { "epoch": 1.1026990704446187, "grad_norm": 1.1468091011047363, "learning_rate": 9.841e-05, "loss": 0.323, "step": 19692 }, { "epoch": 1.1027550677567477, "grad_norm": 1.4091640710830688, "learning_rate": 9.8415e-05, "loss": 0.4994, "step": 19693 }, { "epoch": 1.1028110650688767, "grad_norm": 1.1214548349380493, "learning_rate": 9.842e-05, "loss": 0.4106, "step": 19694 }, { "epoch": 1.1028670623810057, "grad_norm": 1.9984763860702515, "learning_rate": 9.842500000000001e-05, "loss": 0.5373, "step": 19695 }, { "epoch": 1.1029230596931348, "grad_norm": 1.2726832628250122, "learning_rate": 9.843e-05, "loss": 0.4828, "step": 19696 }, { "epoch": 1.1029790570052638, "grad_norm": 1.4429244995117188, "learning_rate": 9.8435e-05, "loss": 0.4125, "step": 19697 }, { "epoch": 1.1030350543173928, "grad_norm": 1.4349812269210815, "learning_rate": 9.844000000000001e-05, "loss": 0.3782, "step": 19698 }, { "epoch": 1.1030910516295218, "grad_norm": 1.1398873329162598, "learning_rate": 9.844500000000001e-05, "loss": 0.3644, "step": 19699 }, { "epoch": 1.1031470489416508, "grad_norm": 1.466614842414856, "learning_rate": 9.845000000000001e-05, "loss": 0.4185, "step": 19700 }, { "epoch": 1.1032030462537799, "grad_norm": 1.4406763315200806, "learning_rate": 9.8455e-05, "loss": 0.5374, "step": 19701 }, { "epoch": 1.1032590435659089, "grad_norm": 1.4726649522781372, "learning_rate": 9.846e-05, "loss": 0.4873, "step": 19702 }, { "epoch": 1.103315040878038, "grad_norm": 1.8432420492172241, "learning_rate": 9.846500000000001e-05, "loss": 0.5391, "step": 19703 }, { "epoch": 1.103371038190167, "grad_norm": 1.6330406665802002, "learning_rate": 9.847000000000001e-05, "loss": 0.4411, "step": 19704 }, { "epoch": 1.103427035502296, "grad_norm": 1.1099885702133179, "learning_rate": 9.847500000000001e-05, "loss": 0.358, "step": 19705 }, { "epoch": 1.103483032814425, "grad_norm": 1.2998239994049072, "learning_rate": 9.848e-05, "loss": 0.6747, "step": 19706 }, { "epoch": 1.103539030126554, "grad_norm": 1.2724019289016724, "learning_rate": 9.8485e-05, "loss": 0.3967, "step": 19707 }, { "epoch": 1.103595027438683, "grad_norm": 1.0637582540512085, "learning_rate": 9.849e-05, "loss": 0.5166, "step": 19708 }, { "epoch": 1.103651024750812, "grad_norm": 1.5849372148513794, "learning_rate": 9.8495e-05, "loss": 0.5009, "step": 19709 }, { "epoch": 1.103707022062941, "grad_norm": 1.1329911947250366, "learning_rate": 9.850000000000001e-05, "loss": 0.4318, "step": 19710 }, { "epoch": 1.10376301937507, "grad_norm": 1.33184814453125, "learning_rate": 9.8505e-05, "loss": 0.3949, "step": 19711 }, { "epoch": 1.103819016687199, "grad_norm": 1.7777830362319946, "learning_rate": 9.851e-05, "loss": 0.2906, "step": 19712 }, { "epoch": 1.103875013999328, "grad_norm": 1.3954609632492065, "learning_rate": 9.8515e-05, "loss": 0.4586, "step": 19713 }, { "epoch": 1.103931011311457, "grad_norm": 1.442158818244934, "learning_rate": 9.852e-05, "loss": 0.4672, "step": 19714 }, { "epoch": 1.1039870086235861, "grad_norm": 1.2966798543930054, "learning_rate": 9.8525e-05, "loss": 0.3713, "step": 19715 }, { "epoch": 1.1040430059357151, "grad_norm": 1.3441522121429443, "learning_rate": 9.853e-05, "loss": 0.5063, "step": 19716 }, { "epoch": 1.1040990032478442, "grad_norm": 1.5825473070144653, "learning_rate": 9.8535e-05, "loss": 0.4611, "step": 19717 }, { "epoch": 1.1041550005599732, "grad_norm": 1.2520579099655151, "learning_rate": 9.854000000000002e-05, "loss": 0.4068, "step": 19718 }, { "epoch": 1.1042109978721022, "grad_norm": 1.5830234289169312, "learning_rate": 9.854500000000001e-05, "loss": 0.544, "step": 19719 }, { "epoch": 1.1042669951842312, "grad_norm": 1.355677604675293, "learning_rate": 9.855000000000001e-05, "loss": 0.4227, "step": 19720 }, { "epoch": 1.1043229924963602, "grad_norm": 1.1585386991500854, "learning_rate": 9.855500000000001e-05, "loss": 0.412, "step": 19721 }, { "epoch": 1.1043789898084893, "grad_norm": 1.249297857284546, "learning_rate": 9.856e-05, "loss": 0.4418, "step": 19722 }, { "epoch": 1.1044349871206183, "grad_norm": 1.499921202659607, "learning_rate": 9.8565e-05, "loss": 0.4302, "step": 19723 }, { "epoch": 1.1044909844327473, "grad_norm": 1.3647310733795166, "learning_rate": 9.857000000000001e-05, "loss": 0.3631, "step": 19724 }, { "epoch": 1.1045469817448763, "grad_norm": 1.7964094877243042, "learning_rate": 9.857500000000001e-05, "loss": 0.5309, "step": 19725 }, { "epoch": 1.1046029790570053, "grad_norm": 1.1209633350372314, "learning_rate": 9.858000000000001e-05, "loss": 0.3896, "step": 19726 }, { "epoch": 1.1046589763691343, "grad_norm": 1.3948016166687012, "learning_rate": 9.8585e-05, "loss": 0.3297, "step": 19727 }, { "epoch": 1.1047149736812634, "grad_norm": 1.5150035619735718, "learning_rate": 9.859e-05, "loss": 0.419, "step": 19728 }, { "epoch": 1.1047709709933924, "grad_norm": 1.659343957901001, "learning_rate": 9.8595e-05, "loss": 0.6311, "step": 19729 }, { "epoch": 1.1048269683055214, "grad_norm": 1.4386720657348633, "learning_rate": 9.86e-05, "loss": 0.4453, "step": 19730 }, { "epoch": 1.1048829656176504, "grad_norm": 1.4890894889831543, "learning_rate": 9.860500000000001e-05, "loss": 0.518, "step": 19731 }, { "epoch": 1.1049389629297794, "grad_norm": 1.4367374181747437, "learning_rate": 9.861e-05, "loss": 0.5873, "step": 19732 }, { "epoch": 1.1049949602419085, "grad_norm": 1.6929882764816284, "learning_rate": 9.8615e-05, "loss": 0.4449, "step": 19733 }, { "epoch": 1.1050509575540375, "grad_norm": 1.3177821636199951, "learning_rate": 9.862e-05, "loss": 0.3507, "step": 19734 }, { "epoch": 1.1051069548661665, "grad_norm": 1.3555843830108643, "learning_rate": 9.8625e-05, "loss": 0.4354, "step": 19735 }, { "epoch": 1.1051629521782955, "grad_norm": 1.4369921684265137, "learning_rate": 9.863e-05, "loss": 0.4411, "step": 19736 }, { "epoch": 1.1052189494904245, "grad_norm": 1.2860368490219116, "learning_rate": 9.8635e-05, "loss": 0.3695, "step": 19737 }, { "epoch": 1.1052749468025536, "grad_norm": 1.36696457862854, "learning_rate": 9.864e-05, "loss": 0.4118, "step": 19738 }, { "epoch": 1.1053309441146826, "grad_norm": 1.2955561876296997, "learning_rate": 9.864500000000002e-05, "loss": 0.3879, "step": 19739 }, { "epoch": 1.1053869414268116, "grad_norm": 1.7504791021347046, "learning_rate": 9.865000000000001e-05, "loss": 0.4655, "step": 19740 }, { "epoch": 1.1054429387389406, "grad_norm": 1.4993438720703125, "learning_rate": 9.865500000000001e-05, "loss": 0.4497, "step": 19741 }, { "epoch": 1.1054989360510696, "grad_norm": 1.411922574043274, "learning_rate": 9.866000000000001e-05, "loss": 0.5643, "step": 19742 }, { "epoch": 1.1055549333631987, "grad_norm": 1.3271328210830688, "learning_rate": 9.8665e-05, "loss": 0.4518, "step": 19743 }, { "epoch": 1.1056109306753277, "grad_norm": 1.5522270202636719, "learning_rate": 9.867e-05, "loss": 0.4669, "step": 19744 }, { "epoch": 1.1056669279874567, "grad_norm": 1.6213088035583496, "learning_rate": 9.8675e-05, "loss": 0.39, "step": 19745 }, { "epoch": 1.1057229252995857, "grad_norm": 1.1774853467941284, "learning_rate": 9.868000000000001e-05, "loss": 0.4392, "step": 19746 }, { "epoch": 1.1057789226117147, "grad_norm": 1.3302592039108276, "learning_rate": 9.868500000000001e-05, "loss": 0.4107, "step": 19747 }, { "epoch": 1.1058349199238438, "grad_norm": 1.5091776847839355, "learning_rate": 9.869e-05, "loss": 0.4309, "step": 19748 }, { "epoch": 1.1058909172359728, "grad_norm": 1.4652016162872314, "learning_rate": 9.8695e-05, "loss": 0.4422, "step": 19749 }, { "epoch": 1.1059469145481018, "grad_norm": 1.6019277572631836, "learning_rate": 9.87e-05, "loss": 0.4204, "step": 19750 }, { "epoch": 1.1060029118602308, "grad_norm": 1.4288212060928345, "learning_rate": 9.8705e-05, "loss": 0.4841, "step": 19751 }, { "epoch": 1.1060589091723598, "grad_norm": 1.8394572734832764, "learning_rate": 9.871000000000001e-05, "loss": 0.4747, "step": 19752 }, { "epoch": 1.1061149064844888, "grad_norm": 1.221510648727417, "learning_rate": 9.8715e-05, "loss": 0.463, "step": 19753 }, { "epoch": 1.1061709037966179, "grad_norm": 1.2516493797302246, "learning_rate": 9.872e-05, "loss": 0.3969, "step": 19754 }, { "epoch": 1.1062269011087467, "grad_norm": 1.5816762447357178, "learning_rate": 9.8725e-05, "loss": 0.449, "step": 19755 }, { "epoch": 1.1062828984208757, "grad_norm": 1.6973276138305664, "learning_rate": 9.873e-05, "loss": 0.4978, "step": 19756 }, { "epoch": 1.1063388957330047, "grad_norm": 1.1648826599121094, "learning_rate": 9.8735e-05, "loss": 0.3786, "step": 19757 }, { "epoch": 1.1063948930451337, "grad_norm": 1.2789723873138428, "learning_rate": 9.874e-05, "loss": 0.3556, "step": 19758 }, { "epoch": 1.1064508903572627, "grad_norm": 1.9684841632843018, "learning_rate": 9.8745e-05, "loss": 0.4716, "step": 19759 }, { "epoch": 1.1065068876693918, "grad_norm": 1.2908488512039185, "learning_rate": 9.875000000000002e-05, "loss": 0.4018, "step": 19760 }, { "epoch": 1.1065628849815208, "grad_norm": 1.6126617193222046, "learning_rate": 9.875500000000001e-05, "loss": 0.4275, "step": 19761 }, { "epoch": 1.1066188822936498, "grad_norm": 1.5051411390304565, "learning_rate": 9.876000000000001e-05, "loss": 0.4489, "step": 19762 }, { "epoch": 1.1066748796057788, "grad_norm": 1.4963515996932983, "learning_rate": 9.876500000000001e-05, "loss": 0.4229, "step": 19763 }, { "epoch": 1.1067308769179078, "grad_norm": 1.419785499572754, "learning_rate": 9.877e-05, "loss": 0.4226, "step": 19764 }, { "epoch": 1.1067868742300369, "grad_norm": 1.2918188571929932, "learning_rate": 9.8775e-05, "loss": 0.4028, "step": 19765 }, { "epoch": 1.1068428715421659, "grad_norm": 1.3430591821670532, "learning_rate": 9.878e-05, "loss": 0.4244, "step": 19766 }, { "epoch": 1.106898868854295, "grad_norm": 1.2693862915039062, "learning_rate": 9.878500000000001e-05, "loss": 0.4661, "step": 19767 }, { "epoch": 1.106954866166424, "grad_norm": 1.434861421585083, "learning_rate": 9.879000000000001e-05, "loss": 0.5121, "step": 19768 }, { "epoch": 1.107010863478553, "grad_norm": 1.250809669494629, "learning_rate": 9.8795e-05, "loss": 0.3467, "step": 19769 }, { "epoch": 1.107066860790682, "grad_norm": 1.286453127861023, "learning_rate": 9.88e-05, "loss": 0.5006, "step": 19770 }, { "epoch": 1.107122858102811, "grad_norm": 1.2133138179779053, "learning_rate": 9.8805e-05, "loss": 0.4423, "step": 19771 }, { "epoch": 1.10717885541494, "grad_norm": 1.489060401916504, "learning_rate": 9.881e-05, "loss": 0.3541, "step": 19772 }, { "epoch": 1.107234852727069, "grad_norm": 1.31699800491333, "learning_rate": 9.881500000000001e-05, "loss": 0.3656, "step": 19773 }, { "epoch": 1.107290850039198, "grad_norm": 1.2022706270217896, "learning_rate": 9.882e-05, "loss": 0.4431, "step": 19774 }, { "epoch": 1.107346847351327, "grad_norm": 1.1489864587783813, "learning_rate": 9.8825e-05, "loss": 0.3205, "step": 19775 }, { "epoch": 1.107402844663456, "grad_norm": 1.2494655847549438, "learning_rate": 9.883e-05, "loss": 0.3814, "step": 19776 }, { "epoch": 1.107458841975585, "grad_norm": 1.5846800804138184, "learning_rate": 9.8835e-05, "loss": 0.4948, "step": 19777 }, { "epoch": 1.107514839287714, "grad_norm": 1.374254584312439, "learning_rate": 9.884e-05, "loss": 0.5802, "step": 19778 }, { "epoch": 1.1075708365998431, "grad_norm": 1.5100247859954834, "learning_rate": 9.8845e-05, "loss": 0.4543, "step": 19779 }, { "epoch": 1.1076268339119721, "grad_norm": 1.4424753189086914, "learning_rate": 9.885e-05, "loss": 0.4236, "step": 19780 }, { "epoch": 1.1076828312241012, "grad_norm": 1.4685970544815063, "learning_rate": 9.885500000000001e-05, "loss": 0.4548, "step": 19781 }, { "epoch": 1.1077388285362302, "grad_norm": 1.7806181907653809, "learning_rate": 9.886000000000001e-05, "loss": 0.3814, "step": 19782 }, { "epoch": 1.1077948258483592, "grad_norm": 1.264974594116211, "learning_rate": 9.886500000000001e-05, "loss": 0.3284, "step": 19783 }, { "epoch": 1.1078508231604882, "grad_norm": 1.2328931093215942, "learning_rate": 9.887000000000001e-05, "loss": 0.5104, "step": 19784 }, { "epoch": 1.1079068204726172, "grad_norm": 1.28622567653656, "learning_rate": 9.8875e-05, "loss": 0.4541, "step": 19785 }, { "epoch": 1.1079628177847463, "grad_norm": 1.5155245065689087, "learning_rate": 9.888e-05, "loss": 0.4498, "step": 19786 }, { "epoch": 1.1080188150968753, "grad_norm": 1.269075632095337, "learning_rate": 9.8885e-05, "loss": 0.2918, "step": 19787 }, { "epoch": 1.1080748124090043, "grad_norm": 2.4623186588287354, "learning_rate": 9.889000000000001e-05, "loss": 0.4697, "step": 19788 }, { "epoch": 1.1081308097211333, "grad_norm": 1.508831262588501, "learning_rate": 9.889500000000001e-05, "loss": 0.629, "step": 19789 }, { "epoch": 1.1081868070332623, "grad_norm": 1.5116913318634033, "learning_rate": 9.89e-05, "loss": 0.5501, "step": 19790 }, { "epoch": 1.1082428043453914, "grad_norm": 1.5507465600967407, "learning_rate": 9.8905e-05, "loss": 0.443, "step": 19791 }, { "epoch": 1.1082988016575204, "grad_norm": 1.4017212390899658, "learning_rate": 9.891e-05, "loss": 0.4627, "step": 19792 }, { "epoch": 1.1083547989696494, "grad_norm": 1.7601009607315063, "learning_rate": 9.8915e-05, "loss": 0.323, "step": 19793 }, { "epoch": 1.1084107962817784, "grad_norm": 1.5531072616577148, "learning_rate": 9.892e-05, "loss": 0.4752, "step": 19794 }, { "epoch": 1.1084667935939074, "grad_norm": 1.5044505596160889, "learning_rate": 9.8925e-05, "loss": 0.5064, "step": 19795 }, { "epoch": 1.1085227909060364, "grad_norm": 1.2802830934524536, "learning_rate": 9.893e-05, "loss": 0.5113, "step": 19796 }, { "epoch": 1.1085787882181655, "grad_norm": 1.742136001586914, "learning_rate": 9.8935e-05, "loss": 0.7409, "step": 19797 }, { "epoch": 1.1086347855302945, "grad_norm": 1.4399957656860352, "learning_rate": 9.894e-05, "loss": 0.5201, "step": 19798 }, { "epoch": 1.1086907828424235, "grad_norm": 1.4919958114624023, "learning_rate": 9.894500000000001e-05, "loss": 0.4162, "step": 19799 }, { "epoch": 1.1087467801545525, "grad_norm": 1.313231348991394, "learning_rate": 9.895e-05, "loss": 0.377, "step": 19800 }, { "epoch": 1.1088027774666815, "grad_norm": 1.7701061964035034, "learning_rate": 9.8955e-05, "loss": 0.4284, "step": 19801 }, { "epoch": 1.1088587747788106, "grad_norm": 1.6026102304458618, "learning_rate": 9.896000000000001e-05, "loss": 0.4224, "step": 19802 }, { "epoch": 1.1089147720909396, "grad_norm": 1.1849709749221802, "learning_rate": 9.896500000000001e-05, "loss": 0.4069, "step": 19803 }, { "epoch": 1.1089707694030686, "grad_norm": 1.3921117782592773, "learning_rate": 9.897000000000001e-05, "loss": 0.4451, "step": 19804 }, { "epoch": 1.1090267667151976, "grad_norm": 1.1940969228744507, "learning_rate": 9.897500000000001e-05, "loss": 0.4032, "step": 19805 }, { "epoch": 1.1090827640273266, "grad_norm": 1.337113857269287, "learning_rate": 9.898e-05, "loss": 0.4439, "step": 19806 }, { "epoch": 1.1091387613394557, "grad_norm": 1.444793701171875, "learning_rate": 9.8985e-05, "loss": 0.494, "step": 19807 }, { "epoch": 1.1091947586515847, "grad_norm": 1.5827836990356445, "learning_rate": 9.899e-05, "loss": 0.4808, "step": 19808 }, { "epoch": 1.1092507559637137, "grad_norm": 1.4604209661483765, "learning_rate": 9.899500000000001e-05, "loss": 0.4782, "step": 19809 }, { "epoch": 1.1093067532758427, "grad_norm": 1.7103921175003052, "learning_rate": 9.900000000000001e-05, "loss": 0.7892, "step": 19810 }, { "epoch": 1.1093627505879717, "grad_norm": 1.7148162126541138, "learning_rate": 9.9005e-05, "loss": 0.3339, "step": 19811 }, { "epoch": 1.1094187479001008, "grad_norm": 1.3999277353286743, "learning_rate": 9.901e-05, "loss": 0.5298, "step": 19812 }, { "epoch": 1.1094747452122298, "grad_norm": 1.165982723236084, "learning_rate": 9.9015e-05, "loss": 0.4168, "step": 19813 }, { "epoch": 1.1095307425243588, "grad_norm": 1.240397334098816, "learning_rate": 9.902e-05, "loss": 0.3886, "step": 19814 }, { "epoch": 1.1095867398364878, "grad_norm": 1.2412090301513672, "learning_rate": 9.9025e-05, "loss": 0.32, "step": 19815 }, { "epoch": 1.1096427371486168, "grad_norm": 1.3912166357040405, "learning_rate": 9.903e-05, "loss": 0.4508, "step": 19816 }, { "epoch": 1.1096987344607459, "grad_norm": 1.2844611406326294, "learning_rate": 9.9035e-05, "loss": 0.4375, "step": 19817 }, { "epoch": 1.1097547317728749, "grad_norm": 2.117445707321167, "learning_rate": 9.904e-05, "loss": 0.601, "step": 19818 }, { "epoch": 1.1098107290850039, "grad_norm": 1.6978082656860352, "learning_rate": 9.904500000000001e-05, "loss": 0.512, "step": 19819 }, { "epoch": 1.109866726397133, "grad_norm": 1.3439422845840454, "learning_rate": 9.905000000000001e-05, "loss": 0.473, "step": 19820 }, { "epoch": 1.109922723709262, "grad_norm": 1.410443663597107, "learning_rate": 9.9055e-05, "loss": 0.401, "step": 19821 }, { "epoch": 1.109978721021391, "grad_norm": 1.4224430322647095, "learning_rate": 9.906e-05, "loss": 0.4945, "step": 19822 }, { "epoch": 1.11003471833352, "grad_norm": 1.2218562364578247, "learning_rate": 9.9065e-05, "loss": 0.4759, "step": 19823 }, { "epoch": 1.110090715645649, "grad_norm": 1.3283394575119019, "learning_rate": 9.907000000000001e-05, "loss": 0.4862, "step": 19824 }, { "epoch": 1.110146712957778, "grad_norm": 1.2634313106536865, "learning_rate": 9.907500000000001e-05, "loss": 0.4187, "step": 19825 }, { "epoch": 1.110202710269907, "grad_norm": 1.288685917854309, "learning_rate": 9.908000000000001e-05, "loss": 0.3703, "step": 19826 }, { "epoch": 1.110258707582036, "grad_norm": 1.2056894302368164, "learning_rate": 9.9085e-05, "loss": 0.4127, "step": 19827 }, { "epoch": 1.110314704894165, "grad_norm": 1.2373862266540527, "learning_rate": 9.909e-05, "loss": 0.4087, "step": 19828 }, { "epoch": 1.110370702206294, "grad_norm": 1.2632445096969604, "learning_rate": 9.9095e-05, "loss": 0.4093, "step": 19829 }, { "epoch": 1.110426699518423, "grad_norm": 1.4656373262405396, "learning_rate": 9.910000000000001e-05, "loss": 0.5283, "step": 19830 }, { "epoch": 1.1104826968305521, "grad_norm": 1.6199040412902832, "learning_rate": 9.910500000000001e-05, "loss": 0.4204, "step": 19831 }, { "epoch": 1.1105386941426811, "grad_norm": 1.6431338787078857, "learning_rate": 9.911e-05, "loss": 0.4061, "step": 19832 }, { "epoch": 1.1105946914548102, "grad_norm": 1.3754061460494995, "learning_rate": 9.9115e-05, "loss": 0.3841, "step": 19833 }, { "epoch": 1.1106506887669392, "grad_norm": 1.3536350727081299, "learning_rate": 9.912e-05, "loss": 0.4043, "step": 19834 }, { "epoch": 1.1107066860790682, "grad_norm": 1.239001750946045, "learning_rate": 9.9125e-05, "loss": 0.4993, "step": 19835 }, { "epoch": 1.1107626833911972, "grad_norm": 1.9410868883132935, "learning_rate": 9.913e-05, "loss": 0.5781, "step": 19836 }, { "epoch": 1.1108186807033262, "grad_norm": 1.3264325857162476, "learning_rate": 9.9135e-05, "loss": 0.3521, "step": 19837 }, { "epoch": 1.1108746780154553, "grad_norm": 1.7010470628738403, "learning_rate": 9.914e-05, "loss": 0.4823, "step": 19838 }, { "epoch": 1.1109306753275843, "grad_norm": 1.9255300760269165, "learning_rate": 9.914500000000001e-05, "loss": 0.5195, "step": 19839 }, { "epoch": 1.1109866726397133, "grad_norm": 1.4976234436035156, "learning_rate": 9.915000000000001e-05, "loss": 0.4206, "step": 19840 }, { "epoch": 1.1110426699518423, "grad_norm": 1.4784563779830933, "learning_rate": 9.915500000000001e-05, "loss": 0.3892, "step": 19841 }, { "epoch": 1.1110986672639713, "grad_norm": 1.702567458152771, "learning_rate": 9.916e-05, "loss": 0.467, "step": 19842 }, { "epoch": 1.1111546645761003, "grad_norm": 1.2280988693237305, "learning_rate": 9.9165e-05, "loss": 0.4062, "step": 19843 }, { "epoch": 1.1112106618882294, "grad_norm": 1.4703607559204102, "learning_rate": 9.917e-05, "loss": 0.5394, "step": 19844 }, { "epoch": 1.1112666592003584, "grad_norm": 1.282779335975647, "learning_rate": 9.917500000000001e-05, "loss": 0.3558, "step": 19845 }, { "epoch": 1.1113226565124874, "grad_norm": 1.4535640478134155, "learning_rate": 9.918000000000001e-05, "loss": 0.4454, "step": 19846 }, { "epoch": 1.1113786538246164, "grad_norm": 1.484246850013733, "learning_rate": 9.9185e-05, "loss": 0.4246, "step": 19847 }, { "epoch": 1.1114346511367454, "grad_norm": 1.4134314060211182, "learning_rate": 9.919e-05, "loss": 0.3681, "step": 19848 }, { "epoch": 1.1114906484488745, "grad_norm": 1.7610067129135132, "learning_rate": 9.9195e-05, "loss": 0.5749, "step": 19849 }, { "epoch": 1.1115466457610035, "grad_norm": 1.642107605934143, "learning_rate": 9.92e-05, "loss": 0.48, "step": 19850 }, { "epoch": 1.1116026430731325, "grad_norm": 1.2779661417007446, "learning_rate": 9.920500000000001e-05, "loss": 0.3769, "step": 19851 }, { "epoch": 1.1116586403852615, "grad_norm": 1.3027851581573486, "learning_rate": 9.921000000000001e-05, "loss": 0.4104, "step": 19852 }, { "epoch": 1.1117146376973905, "grad_norm": 1.3508186340332031, "learning_rate": 9.9215e-05, "loss": 0.4403, "step": 19853 }, { "epoch": 1.1117706350095196, "grad_norm": 1.39118230342865, "learning_rate": 9.922e-05, "loss": 0.4971, "step": 19854 }, { "epoch": 1.1118266323216486, "grad_norm": 1.4822081327438354, "learning_rate": 9.9225e-05, "loss": 0.4282, "step": 19855 }, { "epoch": 1.1118826296337776, "grad_norm": 1.4425870180130005, "learning_rate": 9.923e-05, "loss": 0.3867, "step": 19856 }, { "epoch": 1.1119386269459066, "grad_norm": 1.5058995485305786, "learning_rate": 9.9235e-05, "loss": 0.3943, "step": 19857 }, { "epoch": 1.1119946242580356, "grad_norm": 1.1833412647247314, "learning_rate": 9.924e-05, "loss": 0.384, "step": 19858 }, { "epoch": 1.1120506215701647, "grad_norm": 1.3850737810134888, "learning_rate": 9.924500000000002e-05, "loss": 0.4203, "step": 19859 }, { "epoch": 1.1121066188822937, "grad_norm": 1.5803650617599487, "learning_rate": 9.925000000000001e-05, "loss": 0.4912, "step": 19860 }, { "epoch": 1.1121626161944227, "grad_norm": 1.3808683156967163, "learning_rate": 9.925500000000001e-05, "loss": 0.4308, "step": 19861 }, { "epoch": 1.1122186135065517, "grad_norm": 1.4314091205596924, "learning_rate": 9.926000000000001e-05, "loss": 0.5991, "step": 19862 }, { "epoch": 1.1122746108186807, "grad_norm": 1.2249228954315186, "learning_rate": 9.9265e-05, "loss": 0.5417, "step": 19863 }, { "epoch": 1.1123306081308098, "grad_norm": 1.2787408828735352, "learning_rate": 9.927e-05, "loss": 0.3841, "step": 19864 }, { "epoch": 1.1123866054429388, "grad_norm": 1.3893275260925293, "learning_rate": 9.9275e-05, "loss": 0.467, "step": 19865 }, { "epoch": 1.1124426027550678, "grad_norm": 1.149018406867981, "learning_rate": 9.928000000000001e-05, "loss": 0.39, "step": 19866 }, { "epoch": 1.1124986000671968, "grad_norm": 1.4450526237487793, "learning_rate": 9.928500000000001e-05, "loss": 0.371, "step": 19867 }, { "epoch": 1.1125545973793258, "grad_norm": 1.4963886737823486, "learning_rate": 9.929e-05, "loss": 0.4029, "step": 19868 }, { "epoch": 1.1126105946914548, "grad_norm": 1.2389373779296875, "learning_rate": 9.9295e-05, "loss": 0.4354, "step": 19869 }, { "epoch": 1.1126665920035839, "grad_norm": 1.412307858467102, "learning_rate": 9.93e-05, "loss": 0.4996, "step": 19870 }, { "epoch": 1.1127225893157129, "grad_norm": 1.8511191606521606, "learning_rate": 9.9305e-05, "loss": 0.4936, "step": 19871 }, { "epoch": 1.112778586627842, "grad_norm": 1.1849406957626343, "learning_rate": 9.931000000000001e-05, "loss": 0.3702, "step": 19872 }, { "epoch": 1.112834583939971, "grad_norm": 1.2159291505813599, "learning_rate": 9.931500000000001e-05, "loss": 0.414, "step": 19873 }, { "epoch": 1.1128905812521, "grad_norm": 1.3139448165893555, "learning_rate": 9.932e-05, "loss": 0.3703, "step": 19874 }, { "epoch": 1.112946578564229, "grad_norm": 1.3350764513015747, "learning_rate": 9.9325e-05, "loss": 0.6695, "step": 19875 }, { "epoch": 1.113002575876358, "grad_norm": 1.1906070709228516, "learning_rate": 9.933e-05, "loss": 0.4074, "step": 19876 }, { "epoch": 1.113058573188487, "grad_norm": 1.3370988368988037, "learning_rate": 9.9335e-05, "loss": 0.553, "step": 19877 }, { "epoch": 1.113114570500616, "grad_norm": 1.351703405380249, "learning_rate": 9.934e-05, "loss": 0.4098, "step": 19878 }, { "epoch": 1.113170567812745, "grad_norm": 1.4156376123428345, "learning_rate": 9.9345e-05, "loss": 0.4852, "step": 19879 }, { "epoch": 1.113226565124874, "grad_norm": 1.2463830709457397, "learning_rate": 9.935000000000002e-05, "loss": 0.3755, "step": 19880 }, { "epoch": 1.113282562437003, "grad_norm": 1.4752771854400635, "learning_rate": 9.935500000000001e-05, "loss": 0.4217, "step": 19881 }, { "epoch": 1.113338559749132, "grad_norm": 1.4596432447433472, "learning_rate": 9.936000000000001e-05, "loss": 0.4529, "step": 19882 }, { "epoch": 1.1133945570612611, "grad_norm": 1.6576263904571533, "learning_rate": 9.936500000000001e-05, "loss": 0.5571, "step": 19883 }, { "epoch": 1.1134505543733901, "grad_norm": 1.398348331451416, "learning_rate": 9.937e-05, "loss": 0.4008, "step": 19884 }, { "epoch": 1.1135065516855192, "grad_norm": 1.4356671571731567, "learning_rate": 9.9375e-05, "loss": 0.4391, "step": 19885 }, { "epoch": 1.1135625489976482, "grad_norm": 1.416871190071106, "learning_rate": 9.938e-05, "loss": 0.5502, "step": 19886 }, { "epoch": 1.1136185463097772, "grad_norm": 1.1983762979507446, "learning_rate": 9.938500000000001e-05, "loss": 0.4317, "step": 19887 }, { "epoch": 1.1136745436219062, "grad_norm": 1.3804419040679932, "learning_rate": 9.939000000000001e-05, "loss": 0.4029, "step": 19888 }, { "epoch": 1.1137305409340352, "grad_norm": 1.318977952003479, "learning_rate": 9.9395e-05, "loss": 0.5044, "step": 19889 }, { "epoch": 1.1137865382461642, "grad_norm": 1.2622560262680054, "learning_rate": 9.94e-05, "loss": 0.3935, "step": 19890 }, { "epoch": 1.1138425355582933, "grad_norm": 1.322582483291626, "learning_rate": 9.9405e-05, "loss": 0.4329, "step": 19891 }, { "epoch": 1.1138985328704223, "grad_norm": 1.4327465295791626, "learning_rate": 9.941e-05, "loss": 0.4451, "step": 19892 }, { "epoch": 1.1139545301825513, "grad_norm": 1.2685604095458984, "learning_rate": 9.9415e-05, "loss": 0.3814, "step": 19893 }, { "epoch": 1.1140105274946803, "grad_norm": 1.1945087909698486, "learning_rate": 9.942000000000001e-05, "loss": 0.4192, "step": 19894 }, { "epoch": 1.1140665248068093, "grad_norm": 1.2283843755722046, "learning_rate": 9.9425e-05, "loss": 0.5873, "step": 19895 }, { "epoch": 1.1141225221189384, "grad_norm": 1.350475788116455, "learning_rate": 9.943e-05, "loss": 0.3933, "step": 19896 }, { "epoch": 1.1141785194310674, "grad_norm": 1.5221939086914062, "learning_rate": 9.9435e-05, "loss": 0.4215, "step": 19897 }, { "epoch": 1.1142345167431964, "grad_norm": 1.5168911218643188, "learning_rate": 9.944e-05, "loss": 0.4496, "step": 19898 }, { "epoch": 1.1142905140553254, "grad_norm": 1.6667182445526123, "learning_rate": 9.9445e-05, "loss": 0.4945, "step": 19899 }, { "epoch": 1.1143465113674544, "grad_norm": 1.4806978702545166, "learning_rate": 9.945e-05, "loss": 0.3982, "step": 19900 }, { "epoch": 1.1144025086795835, "grad_norm": 1.5881752967834473, "learning_rate": 9.945500000000002e-05, "loss": 0.4349, "step": 19901 }, { "epoch": 1.1144585059917125, "grad_norm": 1.4468464851379395, "learning_rate": 9.946000000000001e-05, "loss": 0.5028, "step": 19902 }, { "epoch": 1.1145145033038415, "grad_norm": 1.4290721416473389, "learning_rate": 9.946500000000001e-05, "loss": 0.5494, "step": 19903 }, { "epoch": 1.1145705006159705, "grad_norm": 1.4594035148620605, "learning_rate": 9.947000000000001e-05, "loss": 0.4004, "step": 19904 }, { "epoch": 1.1146264979280995, "grad_norm": 1.3635236024856567, "learning_rate": 9.9475e-05, "loss": 0.5512, "step": 19905 }, { "epoch": 1.1146824952402286, "grad_norm": 1.4220784902572632, "learning_rate": 9.948e-05, "loss": 0.3574, "step": 19906 }, { "epoch": 1.1147384925523576, "grad_norm": 1.4105898141860962, "learning_rate": 9.9485e-05, "loss": 0.5975, "step": 19907 }, { "epoch": 1.1147944898644866, "grad_norm": 1.3315365314483643, "learning_rate": 9.949000000000001e-05, "loss": 0.4772, "step": 19908 }, { "epoch": 1.1148504871766156, "grad_norm": 1.4663732051849365, "learning_rate": 9.949500000000001e-05, "loss": 0.4859, "step": 19909 }, { "epoch": 1.1149064844887446, "grad_norm": 1.4048585891723633, "learning_rate": 9.95e-05, "loss": 0.4054, "step": 19910 }, { "epoch": 1.1149624818008737, "grad_norm": 1.4173774719238281, "learning_rate": 9.9505e-05, "loss": 0.395, "step": 19911 }, { "epoch": 1.1150184791130027, "grad_norm": 1.297888994216919, "learning_rate": 9.951e-05, "loss": 0.3928, "step": 19912 }, { "epoch": 1.1150744764251317, "grad_norm": 1.144591212272644, "learning_rate": 9.9515e-05, "loss": 0.3601, "step": 19913 }, { "epoch": 1.1151304737372607, "grad_norm": 1.7306883335113525, "learning_rate": 9.952e-05, "loss": 0.4475, "step": 19914 }, { "epoch": 1.1151864710493897, "grad_norm": 1.7416670322418213, "learning_rate": 9.952500000000001e-05, "loss": 0.5564, "step": 19915 }, { "epoch": 1.1152424683615187, "grad_norm": 1.2478415966033936, "learning_rate": 9.953e-05, "loss": 0.5177, "step": 19916 }, { "epoch": 1.1152984656736478, "grad_norm": 1.2330882549285889, "learning_rate": 9.9535e-05, "loss": 0.4486, "step": 19917 }, { "epoch": 1.1153544629857768, "grad_norm": 1.2023470401763916, "learning_rate": 9.954e-05, "loss": 0.5095, "step": 19918 }, { "epoch": 1.1154104602979058, "grad_norm": 1.6836299896240234, "learning_rate": 9.9545e-05, "loss": 0.5091, "step": 19919 }, { "epoch": 1.1154664576100348, "grad_norm": 1.297433614730835, "learning_rate": 9.955000000000001e-05, "loss": 0.3423, "step": 19920 }, { "epoch": 1.1155224549221638, "grad_norm": 1.5119662284851074, "learning_rate": 9.9555e-05, "loss": 0.4965, "step": 19921 }, { "epoch": 1.1155784522342929, "grad_norm": 1.4331485033035278, "learning_rate": 9.956e-05, "loss": 0.4122, "step": 19922 }, { "epoch": 1.1156344495464219, "grad_norm": 1.4502153396606445, "learning_rate": 9.956500000000001e-05, "loss": 0.4129, "step": 19923 }, { "epoch": 1.115690446858551, "grad_norm": 1.3752232789993286, "learning_rate": 9.957000000000001e-05, "loss": 0.408, "step": 19924 }, { "epoch": 1.11574644417068, "grad_norm": 1.5951974391937256, "learning_rate": 9.957500000000001e-05, "loss": 0.5723, "step": 19925 }, { "epoch": 1.115802441482809, "grad_norm": 1.2176891565322876, "learning_rate": 9.958e-05, "loss": 0.4047, "step": 19926 }, { "epoch": 1.115858438794938, "grad_norm": 1.3480055332183838, "learning_rate": 9.9585e-05, "loss": 0.4002, "step": 19927 }, { "epoch": 1.115914436107067, "grad_norm": 1.6599169969558716, "learning_rate": 9.959e-05, "loss": 0.6165, "step": 19928 }, { "epoch": 1.115970433419196, "grad_norm": 1.3681401014328003, "learning_rate": 9.959500000000001e-05, "loss": 0.7238, "step": 19929 }, { "epoch": 1.116026430731325, "grad_norm": 1.1669130325317383, "learning_rate": 9.960000000000001e-05, "loss": 0.3322, "step": 19930 }, { "epoch": 1.1160824280434538, "grad_norm": 1.4666856527328491, "learning_rate": 9.9605e-05, "loss": 0.5052, "step": 19931 }, { "epoch": 1.1161384253555828, "grad_norm": 1.4829007387161255, "learning_rate": 9.961e-05, "loss": 0.3775, "step": 19932 }, { "epoch": 1.1161944226677118, "grad_norm": 1.5757139921188354, "learning_rate": 9.9615e-05, "loss": 0.3391, "step": 19933 }, { "epoch": 1.1162504199798409, "grad_norm": 1.6869962215423584, "learning_rate": 9.962e-05, "loss": 0.6753, "step": 19934 }, { "epoch": 1.1163064172919699, "grad_norm": 1.349286675453186, "learning_rate": 9.9625e-05, "loss": 0.4492, "step": 19935 }, { "epoch": 1.116362414604099, "grad_norm": 1.3248045444488525, "learning_rate": 9.963e-05, "loss": 0.3965, "step": 19936 }, { "epoch": 1.116418411916228, "grad_norm": 1.3681490421295166, "learning_rate": 9.9635e-05, "loss": 0.4602, "step": 19937 }, { "epoch": 1.116474409228357, "grad_norm": 1.4884835481643677, "learning_rate": 9.964e-05, "loss": 0.4888, "step": 19938 }, { "epoch": 1.116530406540486, "grad_norm": 1.4044023752212524, "learning_rate": 9.9645e-05, "loss": 0.6349, "step": 19939 }, { "epoch": 1.116586403852615, "grad_norm": 1.4194635152816772, "learning_rate": 9.965000000000001e-05, "loss": 0.5979, "step": 19940 }, { "epoch": 1.116642401164744, "grad_norm": 1.5223853588104248, "learning_rate": 9.965500000000001e-05, "loss": 0.4812, "step": 19941 }, { "epoch": 1.116698398476873, "grad_norm": 1.4153226613998413, "learning_rate": 9.966e-05, "loss": 0.4602, "step": 19942 }, { "epoch": 1.116754395789002, "grad_norm": 1.2923203706741333, "learning_rate": 9.9665e-05, "loss": 0.4366, "step": 19943 }, { "epoch": 1.116810393101131, "grad_norm": 1.382320523262024, "learning_rate": 9.967000000000001e-05, "loss": 0.4938, "step": 19944 }, { "epoch": 1.11686639041326, "grad_norm": 1.4697731733322144, "learning_rate": 9.967500000000001e-05, "loss": 0.4776, "step": 19945 }, { "epoch": 1.116922387725389, "grad_norm": 1.3057854175567627, "learning_rate": 9.968000000000001e-05, "loss": 0.489, "step": 19946 }, { "epoch": 1.1169783850375181, "grad_norm": 1.5513603687286377, "learning_rate": 9.9685e-05, "loss": 0.4308, "step": 19947 }, { "epoch": 1.1170343823496471, "grad_norm": 1.413629174232483, "learning_rate": 9.969e-05, "loss": 0.413, "step": 19948 }, { "epoch": 1.1170903796617762, "grad_norm": 1.5906438827514648, "learning_rate": 9.9695e-05, "loss": 0.3332, "step": 19949 }, { "epoch": 1.1171463769739052, "grad_norm": 1.3415582180023193, "learning_rate": 9.970000000000001e-05, "loss": 0.5089, "step": 19950 }, { "epoch": 1.1172023742860342, "grad_norm": 1.3672477006912231, "learning_rate": 9.970500000000001e-05, "loss": 0.4277, "step": 19951 }, { "epoch": 1.1172583715981632, "grad_norm": 1.3722107410430908, "learning_rate": 9.971e-05, "loss": 0.4536, "step": 19952 }, { "epoch": 1.1173143689102922, "grad_norm": 1.2795250415802002, "learning_rate": 9.9715e-05, "loss": 0.4078, "step": 19953 }, { "epoch": 1.1173703662224213, "grad_norm": 1.5060938596725464, "learning_rate": 9.972e-05, "loss": 0.4411, "step": 19954 }, { "epoch": 1.1174263635345503, "grad_norm": 1.2826478481292725, "learning_rate": 9.9725e-05, "loss": 0.4602, "step": 19955 }, { "epoch": 1.1174823608466793, "grad_norm": 1.434885859489441, "learning_rate": 9.973e-05, "loss": 0.453, "step": 19956 }, { "epoch": 1.1175383581588083, "grad_norm": 1.210551381111145, "learning_rate": 9.9735e-05, "loss": 0.4582, "step": 19957 }, { "epoch": 1.1175943554709373, "grad_norm": 1.3893661499023438, "learning_rate": 9.974e-05, "loss": 0.4822, "step": 19958 }, { "epoch": 1.1176503527830663, "grad_norm": 1.260366678237915, "learning_rate": 9.9745e-05, "loss": 0.3964, "step": 19959 }, { "epoch": 1.1177063500951954, "grad_norm": 1.0513705015182495, "learning_rate": 9.975000000000001e-05, "loss": 0.3309, "step": 19960 }, { "epoch": 1.1177623474073244, "grad_norm": 4.4490790367126465, "learning_rate": 9.975500000000001e-05, "loss": 0.5086, "step": 19961 }, { "epoch": 1.1178183447194534, "grad_norm": 1.1814807653427124, "learning_rate": 9.976000000000001e-05, "loss": 0.4661, "step": 19962 }, { "epoch": 1.1178743420315824, "grad_norm": 1.4177594184875488, "learning_rate": 9.9765e-05, "loss": 0.454, "step": 19963 }, { "epoch": 1.1179303393437114, "grad_norm": 1.2462059259414673, "learning_rate": 9.977e-05, "loss": 0.5212, "step": 19964 }, { "epoch": 1.1179863366558405, "grad_norm": 1.2088459730148315, "learning_rate": 9.977500000000001e-05, "loss": 0.3805, "step": 19965 }, { "epoch": 1.1180423339679695, "grad_norm": 1.2562636137008667, "learning_rate": 9.978000000000001e-05, "loss": 0.4543, "step": 19966 }, { "epoch": 1.1180983312800985, "grad_norm": 1.3372293710708618, "learning_rate": 9.978500000000001e-05, "loss": 0.639, "step": 19967 }, { "epoch": 1.1181543285922275, "grad_norm": 1.5341663360595703, "learning_rate": 9.979e-05, "loss": 0.4539, "step": 19968 }, { "epoch": 1.1182103259043565, "grad_norm": 1.4747451543807983, "learning_rate": 9.9795e-05, "loss": 0.4447, "step": 19969 }, { "epoch": 1.1182663232164856, "grad_norm": 1.4931836128234863, "learning_rate": 9.98e-05, "loss": 0.4791, "step": 19970 }, { "epoch": 1.1183223205286146, "grad_norm": 1.6403145790100098, "learning_rate": 9.9805e-05, "loss": 0.4906, "step": 19971 }, { "epoch": 1.1183783178407436, "grad_norm": 1.296626091003418, "learning_rate": 9.981000000000001e-05, "loss": 0.4241, "step": 19972 }, { "epoch": 1.1184343151528726, "grad_norm": 1.3703420162200928, "learning_rate": 9.9815e-05, "loss": 0.3721, "step": 19973 }, { "epoch": 1.1184903124650016, "grad_norm": 1.6404469013214111, "learning_rate": 9.982e-05, "loss": 0.4843, "step": 19974 }, { "epoch": 1.1185463097771307, "grad_norm": 1.3929030895233154, "learning_rate": 9.9825e-05, "loss": 0.518, "step": 19975 }, { "epoch": 1.1186023070892597, "grad_norm": 1.3429993391036987, "learning_rate": 9.983e-05, "loss": 0.3587, "step": 19976 }, { "epoch": 1.1186583044013887, "grad_norm": 1.367019534111023, "learning_rate": 9.9835e-05, "loss": 0.6241, "step": 19977 }, { "epoch": 1.1187143017135177, "grad_norm": 1.4297236204147339, "learning_rate": 9.984e-05, "loss": 0.557, "step": 19978 }, { "epoch": 1.1187702990256467, "grad_norm": 1.4048442840576172, "learning_rate": 9.9845e-05, "loss": 0.4619, "step": 19979 }, { "epoch": 1.1188262963377757, "grad_norm": 1.6790038347244263, "learning_rate": 9.985000000000001e-05, "loss": 0.5104, "step": 19980 }, { "epoch": 1.1188822936499048, "grad_norm": 1.2666529417037964, "learning_rate": 9.985500000000001e-05, "loss": 0.4194, "step": 19981 }, { "epoch": 1.1189382909620338, "grad_norm": 1.4653370380401611, "learning_rate": 9.986000000000001e-05, "loss": 0.3837, "step": 19982 }, { "epoch": 1.1189942882741628, "grad_norm": 1.3848682641983032, "learning_rate": 9.986500000000001e-05, "loss": 0.5892, "step": 19983 }, { "epoch": 1.1190502855862918, "grad_norm": 1.2159497737884521, "learning_rate": 9.987e-05, "loss": 0.4306, "step": 19984 }, { "epoch": 1.1191062828984208, "grad_norm": 1.0413435697555542, "learning_rate": 9.9875e-05, "loss": 0.3403, "step": 19985 }, { "epoch": 1.1191622802105499, "grad_norm": 1.1079907417297363, "learning_rate": 9.988000000000001e-05, "loss": 0.3271, "step": 19986 }, { "epoch": 1.1192182775226789, "grad_norm": 1.1294519901275635, "learning_rate": 9.988500000000001e-05, "loss": 0.3808, "step": 19987 }, { "epoch": 1.119274274834808, "grad_norm": 1.372883677482605, "learning_rate": 9.989000000000001e-05, "loss": 0.4385, "step": 19988 }, { "epoch": 1.119330272146937, "grad_norm": 1.23002028465271, "learning_rate": 9.9895e-05, "loss": 0.4712, "step": 19989 }, { "epoch": 1.119386269459066, "grad_norm": 1.3323321342468262, "learning_rate": 9.99e-05, "loss": 0.3743, "step": 19990 }, { "epoch": 1.119442266771195, "grad_norm": 1.47197425365448, "learning_rate": 9.9905e-05, "loss": 0.4424, "step": 19991 }, { "epoch": 1.119498264083324, "grad_norm": 1.7073791027069092, "learning_rate": 9.991e-05, "loss": 0.4088, "step": 19992 }, { "epoch": 1.119554261395453, "grad_norm": 1.2094861268997192, "learning_rate": 9.991500000000001e-05, "loss": 0.3556, "step": 19993 }, { "epoch": 1.119610258707582, "grad_norm": 1.3336609601974487, "learning_rate": 9.992e-05, "loss": 0.4196, "step": 19994 }, { "epoch": 1.119666256019711, "grad_norm": 1.3385021686553955, "learning_rate": 9.9925e-05, "loss": 0.4877, "step": 19995 }, { "epoch": 1.11972225333184, "grad_norm": 1.3331232070922852, "learning_rate": 9.993e-05, "loss": 0.429, "step": 19996 }, { "epoch": 1.119778250643969, "grad_norm": 1.5061585903167725, "learning_rate": 9.9935e-05, "loss": 0.4839, "step": 19997 }, { "epoch": 1.119834247956098, "grad_norm": 1.2313766479492188, "learning_rate": 9.994e-05, "loss": 0.4743, "step": 19998 }, { "epoch": 1.1198902452682271, "grad_norm": 1.3281816244125366, "learning_rate": 9.9945e-05, "loss": 0.4664, "step": 19999 }, { "epoch": 1.1199462425803561, "grad_norm": 1.4939744472503662, "learning_rate": 9.995e-05, "loss": 0.6259, "step": 20000 }, { "epoch": 1.1200022398924852, "grad_norm": 1.670356035232544, "learning_rate": 9.995500000000001e-05, "loss": 0.3992, "step": 20001 }, { "epoch": 1.1200582372046142, "grad_norm": 1.2830830812454224, "learning_rate": 9.996000000000001e-05, "loss": 0.4945, "step": 20002 }, { "epoch": 1.1201142345167432, "grad_norm": 1.5486961603164673, "learning_rate": 9.996500000000001e-05, "loss": 0.3299, "step": 20003 }, { "epoch": 1.1201702318288722, "grad_norm": 1.5304434299468994, "learning_rate": 9.997e-05, "loss": 0.5126, "step": 20004 }, { "epoch": 1.1202262291410012, "grad_norm": 1.4435508251190186, "learning_rate": 9.9975e-05, "loss": 0.4213, "step": 20005 }, { "epoch": 1.1202822264531302, "grad_norm": 1.157757043838501, "learning_rate": 9.998e-05, "loss": 0.34, "step": 20006 }, { "epoch": 1.1203382237652593, "grad_norm": 1.3835796117782593, "learning_rate": 9.998500000000001e-05, "loss": 0.5336, "step": 20007 }, { "epoch": 1.1203942210773883, "grad_norm": 1.747640609741211, "learning_rate": 9.999000000000001e-05, "loss": 0.4485, "step": 20008 }, { "epoch": 1.1204502183895173, "grad_norm": 1.5142403841018677, "learning_rate": 9.999500000000001e-05, "loss": 0.6584, "step": 20009 }, { "epoch": 1.1205062157016463, "grad_norm": 1.684420108795166, "learning_rate": 0.0001, "loss": 0.5178, "step": 20010 }, { "epoch": 1.1205622130137753, "grad_norm": 1.2592800855636597, "learning_rate": 9.999973684210526e-05, "loss": 0.4609, "step": 20011 }, { "epoch": 1.1206182103259044, "grad_norm": 1.562971830368042, "learning_rate": 9.999947368421054e-05, "loss": 0.5383, "step": 20012 }, { "epoch": 1.1206742076380334, "grad_norm": 1.318636417388916, "learning_rate": 9.999921052631578e-05, "loss": 0.3895, "step": 20013 }, { "epoch": 1.1207302049501624, "grad_norm": 1.1930203437805176, "learning_rate": 9.999894736842106e-05, "loss": 0.3013, "step": 20014 }, { "epoch": 1.1207862022622914, "grad_norm": 1.4527639150619507, "learning_rate": 9.999868421052632e-05, "loss": 0.3946, "step": 20015 }, { "epoch": 1.1208421995744204, "grad_norm": 1.5577555894851685, "learning_rate": 9.999842105263159e-05, "loss": 0.5031, "step": 20016 }, { "epoch": 1.1208981968865495, "grad_norm": 1.3654688596725464, "learning_rate": 9.999815789473685e-05, "loss": 0.4687, "step": 20017 }, { "epoch": 1.1209541941986785, "grad_norm": 1.3246850967407227, "learning_rate": 9.999789473684211e-05, "loss": 0.4485, "step": 20018 }, { "epoch": 1.1210101915108075, "grad_norm": 1.4335442781448364, "learning_rate": 9.999763157894737e-05, "loss": 0.4369, "step": 20019 }, { "epoch": 1.1210661888229365, "grad_norm": 1.2487568855285645, "learning_rate": 9.999736842105264e-05, "loss": 0.4527, "step": 20020 }, { "epoch": 1.1211221861350655, "grad_norm": 1.457713007926941, "learning_rate": 9.99971052631579e-05, "loss": 0.4996, "step": 20021 }, { "epoch": 1.1211781834471946, "grad_norm": 1.7885032892227173, "learning_rate": 9.999684210526316e-05, "loss": 0.456, "step": 20022 }, { "epoch": 1.1212341807593236, "grad_norm": 1.3303115367889404, "learning_rate": 9.999657894736842e-05, "loss": 0.3477, "step": 20023 }, { "epoch": 1.1212901780714526, "grad_norm": 1.3550374507904053, "learning_rate": 9.99963157894737e-05, "loss": 0.3813, "step": 20024 }, { "epoch": 1.1213461753835816, "grad_norm": 1.4085867404937744, "learning_rate": 9.999605263157895e-05, "loss": 0.4964, "step": 20025 }, { "epoch": 1.1214021726957106, "grad_norm": 1.7398312091827393, "learning_rate": 9.999578947368421e-05, "loss": 0.4673, "step": 20026 }, { "epoch": 1.1214581700078396, "grad_norm": 1.3218722343444824, "learning_rate": 9.999552631578947e-05, "loss": 0.3901, "step": 20027 }, { "epoch": 1.1215141673199687, "grad_norm": 1.3908706903457642, "learning_rate": 9.999526315789473e-05, "loss": 0.5142, "step": 20028 }, { "epoch": 1.1215701646320977, "grad_norm": 1.4564788341522217, "learning_rate": 9.999500000000001e-05, "loss": 0.4245, "step": 20029 }, { "epoch": 1.1216261619442267, "grad_norm": 1.5256010293960571, "learning_rate": 9.999473684210527e-05, "loss": 0.3972, "step": 20030 }, { "epoch": 1.1216821592563557, "grad_norm": 1.153856873512268, "learning_rate": 9.999447368421053e-05, "loss": 0.3373, "step": 20031 }, { "epoch": 1.1217381565684847, "grad_norm": 1.2020797729492188, "learning_rate": 9.999421052631579e-05, "loss": 0.3982, "step": 20032 }, { "epoch": 1.1217941538806138, "grad_norm": 1.2495990991592407, "learning_rate": 9.999394736842106e-05, "loss": 0.2986, "step": 20033 }, { "epoch": 1.1218501511927428, "grad_norm": 1.400131106376648, "learning_rate": 9.999368421052632e-05, "loss": 0.3613, "step": 20034 }, { "epoch": 1.1219061485048718, "grad_norm": 1.214959979057312, "learning_rate": 9.999342105263159e-05, "loss": 0.44, "step": 20035 }, { "epoch": 1.1219621458170008, "grad_norm": 1.2081224918365479, "learning_rate": 9.999315789473684e-05, "loss": 0.3817, "step": 20036 }, { "epoch": 1.1220181431291298, "grad_norm": 1.3629376888275146, "learning_rate": 9.999289473684211e-05, "loss": 0.5077, "step": 20037 }, { "epoch": 1.1220741404412589, "grad_norm": 1.4008897542953491, "learning_rate": 9.999263157894737e-05, "loss": 0.47, "step": 20038 }, { "epoch": 1.1221301377533879, "grad_norm": 1.4684243202209473, "learning_rate": 9.999236842105265e-05, "loss": 0.3975, "step": 20039 }, { "epoch": 1.122186135065517, "grad_norm": 1.8027886152267456, "learning_rate": 9.999210526315789e-05, "loss": 0.4434, "step": 20040 }, { "epoch": 1.122242132377646, "grad_norm": 1.8867990970611572, "learning_rate": 9.999184210526316e-05, "loss": 0.362, "step": 20041 }, { "epoch": 1.122298129689775, "grad_norm": 1.4636904001235962, "learning_rate": 9.999157894736842e-05, "loss": 0.4318, "step": 20042 }, { "epoch": 1.122354127001904, "grad_norm": 1.3516290187835693, "learning_rate": 9.99913157894737e-05, "loss": 0.5048, "step": 20043 }, { "epoch": 1.122410124314033, "grad_norm": 1.6282988786697388, "learning_rate": 9.999105263157896e-05, "loss": 0.643, "step": 20044 }, { "epoch": 1.122466121626162, "grad_norm": 1.0735915899276733, "learning_rate": 9.99907894736842e-05, "loss": 0.3689, "step": 20045 }, { "epoch": 1.122522118938291, "grad_norm": 1.4593976736068726, "learning_rate": 9.999052631578948e-05, "loss": 0.3651, "step": 20046 }, { "epoch": 1.12257811625042, "grad_norm": 1.146026849746704, "learning_rate": 9.999026315789474e-05, "loss": 0.3888, "step": 20047 }, { "epoch": 1.122634113562549, "grad_norm": 1.2889912128448486, "learning_rate": 9.999000000000001e-05, "loss": 0.3881, "step": 20048 }, { "epoch": 1.122690110874678, "grad_norm": 1.4708292484283447, "learning_rate": 9.998973684210527e-05, "loss": 0.4643, "step": 20049 }, { "epoch": 1.122746108186807, "grad_norm": 1.4523096084594727, "learning_rate": 9.998947368421053e-05, "loss": 0.5212, "step": 20050 }, { "epoch": 1.122802105498936, "grad_norm": 1.3572546243667603, "learning_rate": 9.998921052631579e-05, "loss": 0.485, "step": 20051 }, { "epoch": 1.1228581028110651, "grad_norm": 1.9971601963043213, "learning_rate": 9.998894736842106e-05, "loss": 0.3975, "step": 20052 }, { "epoch": 1.1229141001231941, "grad_norm": 1.4978028535842896, "learning_rate": 9.998868421052632e-05, "loss": 0.4827, "step": 20053 }, { "epoch": 1.1229700974353232, "grad_norm": 1.2745866775512695, "learning_rate": 9.998842105263158e-05, "loss": 0.3795, "step": 20054 }, { "epoch": 1.1230260947474522, "grad_norm": 1.484288215637207, "learning_rate": 9.998815789473684e-05, "loss": 0.3018, "step": 20055 }, { "epoch": 1.1230820920595812, "grad_norm": 1.2436482906341553, "learning_rate": 9.998789473684211e-05, "loss": 0.3835, "step": 20056 }, { "epoch": 1.1231380893717102, "grad_norm": 1.1885489225387573, "learning_rate": 9.998763157894737e-05, "loss": 0.3402, "step": 20057 }, { "epoch": 1.1231940866838392, "grad_norm": 1.3961933851242065, "learning_rate": 9.998736842105263e-05, "loss": 0.5356, "step": 20058 }, { "epoch": 1.1232500839959683, "grad_norm": 1.2171860933303833, "learning_rate": 9.99871052631579e-05, "loss": 0.4313, "step": 20059 }, { "epoch": 1.1233060813080973, "grad_norm": 1.3281238079071045, "learning_rate": 9.998684210526317e-05, "loss": 0.5049, "step": 20060 }, { "epoch": 1.1233620786202263, "grad_norm": 1.1421741247177124, "learning_rate": 9.998657894736843e-05, "loss": 0.3011, "step": 20061 }, { "epoch": 1.1234180759323553, "grad_norm": 1.3167433738708496, "learning_rate": 9.998631578947369e-05, "loss": 0.6157, "step": 20062 }, { "epoch": 1.1234740732444843, "grad_norm": 1.498759150505066, "learning_rate": 9.998605263157895e-05, "loss": 0.3853, "step": 20063 }, { "epoch": 1.1235300705566134, "grad_norm": 1.7297914028167725, "learning_rate": 9.99857894736842e-05, "loss": 0.4637, "step": 20064 }, { "epoch": 1.1235860678687424, "grad_norm": 1.525895357131958, "learning_rate": 9.998552631578948e-05, "loss": 0.5171, "step": 20065 }, { "epoch": 1.1236420651808714, "grad_norm": 1.3542238473892212, "learning_rate": 9.998526315789474e-05, "loss": 0.3932, "step": 20066 }, { "epoch": 1.1236980624930004, "grad_norm": 1.4197075366973877, "learning_rate": 9.998500000000001e-05, "loss": 0.406, "step": 20067 }, { "epoch": 1.1237540598051294, "grad_norm": 1.2412537336349487, "learning_rate": 9.998473684210526e-05, "loss": 0.3571, "step": 20068 }, { "epoch": 1.1238100571172585, "grad_norm": 1.256371021270752, "learning_rate": 9.998447368421053e-05, "loss": 0.3646, "step": 20069 }, { "epoch": 1.1238660544293875, "grad_norm": 1.223979115486145, "learning_rate": 9.998421052631579e-05, "loss": 0.4346, "step": 20070 }, { "epoch": 1.1239220517415165, "grad_norm": 1.237862229347229, "learning_rate": 9.998394736842107e-05, "loss": 0.4622, "step": 20071 }, { "epoch": 1.1239780490536455, "grad_norm": 1.4763144254684448, "learning_rate": 9.998368421052632e-05, "loss": 0.4447, "step": 20072 }, { "epoch": 1.1240340463657745, "grad_norm": 1.2453192472457886, "learning_rate": 9.998342105263158e-05, "loss": 0.4321, "step": 20073 }, { "epoch": 1.1240900436779035, "grad_norm": 1.6521111726760864, "learning_rate": 9.998315789473684e-05, "loss": 0.4248, "step": 20074 }, { "epoch": 1.1241460409900326, "grad_norm": 1.4103846549987793, "learning_rate": 9.998289473684212e-05, "loss": 0.5108, "step": 20075 }, { "epoch": 1.1242020383021616, "grad_norm": 1.3992470502853394, "learning_rate": 9.998263157894738e-05, "loss": 0.432, "step": 20076 }, { "epoch": 1.1242580356142906, "grad_norm": 1.2738618850708008, "learning_rate": 9.998236842105264e-05, "loss": 0.4344, "step": 20077 }, { "epoch": 1.1243140329264196, "grad_norm": 1.4086074829101562, "learning_rate": 9.99821052631579e-05, "loss": 0.4177, "step": 20078 }, { "epoch": 1.1243700302385486, "grad_norm": 1.2592560052871704, "learning_rate": 9.998184210526317e-05, "loss": 0.3631, "step": 20079 }, { "epoch": 1.1244260275506777, "grad_norm": 1.4146602153778076, "learning_rate": 9.998157894736843e-05, "loss": 0.4133, "step": 20080 }, { "epoch": 1.1244820248628067, "grad_norm": 1.422480821609497, "learning_rate": 9.998131578947369e-05, "loss": 0.4386, "step": 20081 }, { "epoch": 1.1245380221749357, "grad_norm": 1.299598217010498, "learning_rate": 9.998105263157895e-05, "loss": 0.4574, "step": 20082 }, { "epoch": 1.1245940194870647, "grad_norm": 1.1920071840286255, "learning_rate": 9.998078947368421e-05, "loss": 0.4934, "step": 20083 }, { "epoch": 1.1246500167991937, "grad_norm": 1.2635859251022339, "learning_rate": 9.998052631578948e-05, "loss": 0.4196, "step": 20084 }, { "epoch": 1.1247060141113228, "grad_norm": 1.3033627271652222, "learning_rate": 9.998026315789474e-05, "loss": 0.3937, "step": 20085 }, { "epoch": 1.1247620114234516, "grad_norm": 1.5347754955291748, "learning_rate": 9.998e-05, "loss": 0.5463, "step": 20086 }, { "epoch": 1.1248180087355806, "grad_norm": 1.1636805534362793, "learning_rate": 9.997973684210526e-05, "loss": 0.3372, "step": 20087 }, { "epoch": 1.1248740060477096, "grad_norm": 1.404335379600525, "learning_rate": 9.997947368421053e-05, "loss": 0.3842, "step": 20088 }, { "epoch": 1.1249300033598386, "grad_norm": 1.210408329963684, "learning_rate": 9.99792105263158e-05, "loss": 0.4594, "step": 20089 }, { "epoch": 1.1249860006719676, "grad_norm": 1.3475546836853027, "learning_rate": 9.997894736842107e-05, "loss": 0.4462, "step": 20090 }, { "epoch": 1.1250419979840967, "grad_norm": 1.3855321407318115, "learning_rate": 9.997868421052631e-05, "loss": 0.4473, "step": 20091 }, { "epoch": 1.1250979952962257, "grad_norm": 1.3210816383361816, "learning_rate": 9.997842105263159e-05, "loss": 0.4109, "step": 20092 }, { "epoch": 1.1251539926083547, "grad_norm": 1.2670806646347046, "learning_rate": 9.997815789473685e-05, "loss": 0.3964, "step": 20093 }, { "epoch": 1.1252099899204837, "grad_norm": 1.292494297027588, "learning_rate": 9.997789473684212e-05, "loss": 0.3616, "step": 20094 }, { "epoch": 1.1252659872326127, "grad_norm": 1.4878965616226196, "learning_rate": 9.997763157894737e-05, "loss": 0.4488, "step": 20095 }, { "epoch": 1.1253219845447417, "grad_norm": 1.2568440437316895, "learning_rate": 9.997736842105264e-05, "loss": 0.425, "step": 20096 }, { "epoch": 1.1253779818568708, "grad_norm": 1.6238255500793457, "learning_rate": 9.99771052631579e-05, "loss": 0.4368, "step": 20097 }, { "epoch": 1.1254339791689998, "grad_norm": 1.3254094123840332, "learning_rate": 9.997684210526316e-05, "loss": 0.3848, "step": 20098 }, { "epoch": 1.1254899764811288, "grad_norm": 1.2998703718185425, "learning_rate": 9.997657894736843e-05, "loss": 0.3981, "step": 20099 }, { "epoch": 1.1255459737932578, "grad_norm": 1.4188164472579956, "learning_rate": 9.997631578947368e-05, "loss": 0.569, "step": 20100 }, { "epoch": 1.1256019711053868, "grad_norm": 1.50592839717865, "learning_rate": 9.997605263157895e-05, "loss": 0.665, "step": 20101 }, { "epoch": 1.1256579684175159, "grad_norm": 1.3922353982925415, "learning_rate": 9.997578947368421e-05, "loss": 0.5353, "step": 20102 }, { "epoch": 1.1257139657296449, "grad_norm": 1.3044599294662476, "learning_rate": 9.997552631578948e-05, "loss": 0.439, "step": 20103 }, { "epoch": 1.125769963041774, "grad_norm": 1.473437786102295, "learning_rate": 9.997526315789474e-05, "loss": 0.5626, "step": 20104 }, { "epoch": 1.125825960353903, "grad_norm": 1.2697563171386719, "learning_rate": 9.9975e-05, "loss": 0.501, "step": 20105 }, { "epoch": 1.125881957666032, "grad_norm": 1.4197789430618286, "learning_rate": 9.997473684210526e-05, "loss": 0.5738, "step": 20106 }, { "epoch": 1.125937954978161, "grad_norm": 1.2517338991165161, "learning_rate": 9.997447368421054e-05, "loss": 0.434, "step": 20107 }, { "epoch": 1.12599395229029, "grad_norm": 1.3196479082107544, "learning_rate": 9.99742105263158e-05, "loss": 0.5725, "step": 20108 }, { "epoch": 1.126049949602419, "grad_norm": 1.2089532613754272, "learning_rate": 9.997394736842106e-05, "loss": 0.4245, "step": 20109 }, { "epoch": 1.126105946914548, "grad_norm": 1.2290751934051514, "learning_rate": 9.997368421052632e-05, "loss": 0.5232, "step": 20110 }, { "epoch": 1.126161944226677, "grad_norm": 1.5362193584442139, "learning_rate": 9.997342105263159e-05, "loss": 0.5124, "step": 20111 }, { "epoch": 1.126217941538806, "grad_norm": 1.1183627843856812, "learning_rate": 9.997315789473685e-05, "loss": 0.3339, "step": 20112 }, { "epoch": 1.126273938850935, "grad_norm": 1.3534480333328247, "learning_rate": 9.997289473684211e-05, "loss": 0.37, "step": 20113 }, { "epoch": 1.126329936163064, "grad_norm": 1.407397747039795, "learning_rate": 9.997263157894737e-05, "loss": 0.3819, "step": 20114 }, { "epoch": 1.1263859334751931, "grad_norm": 1.2968662977218628, "learning_rate": 9.997236842105263e-05, "loss": 0.4112, "step": 20115 }, { "epoch": 1.1264419307873221, "grad_norm": 1.645027756690979, "learning_rate": 9.99721052631579e-05, "loss": 0.5773, "step": 20116 }, { "epoch": 1.1264979280994512, "grad_norm": 1.0771293640136719, "learning_rate": 9.997184210526316e-05, "loss": 0.3627, "step": 20117 }, { "epoch": 1.1265539254115802, "grad_norm": 1.7107199430465698, "learning_rate": 9.997157894736842e-05, "loss": 0.6184, "step": 20118 }, { "epoch": 1.1266099227237092, "grad_norm": 1.67527174949646, "learning_rate": 9.997131578947368e-05, "loss": 0.6594, "step": 20119 }, { "epoch": 1.1266659200358382, "grad_norm": 1.3696786165237427, "learning_rate": 9.997105263157895e-05, "loss": 0.3945, "step": 20120 }, { "epoch": 1.1267219173479672, "grad_norm": 1.4254775047302246, "learning_rate": 9.997078947368421e-05, "loss": 0.5711, "step": 20121 }, { "epoch": 1.1267779146600962, "grad_norm": 1.559033989906311, "learning_rate": 9.997052631578949e-05, "loss": 0.5888, "step": 20122 }, { "epoch": 1.1268339119722253, "grad_norm": 1.3969683647155762, "learning_rate": 9.997026315789473e-05, "loss": 0.4524, "step": 20123 }, { "epoch": 1.1268899092843543, "grad_norm": 1.4289509057998657, "learning_rate": 9.997e-05, "loss": 0.4408, "step": 20124 }, { "epoch": 1.1269459065964833, "grad_norm": 1.3135063648223877, "learning_rate": 9.996973684210527e-05, "loss": 0.5331, "step": 20125 }, { "epoch": 1.1270019039086123, "grad_norm": 1.2597249746322632, "learning_rate": 9.996947368421054e-05, "loss": 0.3825, "step": 20126 }, { "epoch": 1.1270579012207413, "grad_norm": 1.3031622171401978, "learning_rate": 9.99692105263158e-05, "loss": 0.5424, "step": 20127 }, { "epoch": 1.1271138985328704, "grad_norm": 1.5318948030471802, "learning_rate": 9.996894736842106e-05, "loss": 0.4662, "step": 20128 }, { "epoch": 1.1271698958449994, "grad_norm": 1.6408461332321167, "learning_rate": 9.996868421052632e-05, "loss": 0.4568, "step": 20129 }, { "epoch": 1.1272258931571284, "grad_norm": 1.4029548168182373, "learning_rate": 9.996842105263159e-05, "loss": 0.5072, "step": 20130 }, { "epoch": 1.1272818904692574, "grad_norm": 1.8679447174072266, "learning_rate": 9.996815789473685e-05, "loss": 0.6695, "step": 20131 }, { "epoch": 1.1273378877813864, "grad_norm": 1.3182162046432495, "learning_rate": 9.99678947368421e-05, "loss": 0.477, "step": 20132 }, { "epoch": 1.1273938850935155, "grad_norm": 1.8410577774047852, "learning_rate": 9.996763157894737e-05, "loss": 0.5332, "step": 20133 }, { "epoch": 1.1274498824056445, "grad_norm": 1.2833086252212524, "learning_rate": 9.996736842105263e-05, "loss": 0.4246, "step": 20134 }, { "epoch": 1.1275058797177735, "grad_norm": 1.196503758430481, "learning_rate": 9.99671052631579e-05, "loss": 0.4582, "step": 20135 }, { "epoch": 1.1275618770299025, "grad_norm": 1.2653039693832397, "learning_rate": 9.996684210526316e-05, "loss": 0.4371, "step": 20136 }, { "epoch": 1.1276178743420315, "grad_norm": 1.2870479822158813, "learning_rate": 9.996657894736842e-05, "loss": 0.3413, "step": 20137 }, { "epoch": 1.1276738716541606, "grad_norm": 1.4446899890899658, "learning_rate": 9.996631578947368e-05, "loss": 0.3806, "step": 20138 }, { "epoch": 1.1277298689662896, "grad_norm": 1.706510066986084, "learning_rate": 9.996605263157896e-05, "loss": 0.6093, "step": 20139 }, { "epoch": 1.1277858662784186, "grad_norm": 1.2224639654159546, "learning_rate": 9.996578947368422e-05, "loss": 0.4163, "step": 20140 }, { "epoch": 1.1278418635905476, "grad_norm": 1.2918223142623901, "learning_rate": 9.996552631578948e-05, "loss": 0.4171, "step": 20141 }, { "epoch": 1.1278978609026766, "grad_norm": 1.895620346069336, "learning_rate": 9.996526315789474e-05, "loss": 0.434, "step": 20142 }, { "epoch": 1.1279538582148056, "grad_norm": 1.5262978076934814, "learning_rate": 9.996500000000001e-05, "loss": 0.4186, "step": 20143 }, { "epoch": 1.1280098555269347, "grad_norm": 1.4467686414718628, "learning_rate": 9.996473684210527e-05, "loss": 0.4421, "step": 20144 }, { "epoch": 1.1280658528390637, "grad_norm": 1.2918657064437866, "learning_rate": 9.996447368421054e-05, "loss": 0.4428, "step": 20145 }, { "epoch": 1.1281218501511927, "grad_norm": 1.3140455484390259, "learning_rate": 9.996421052631579e-05, "loss": 0.4878, "step": 20146 }, { "epoch": 1.1281778474633217, "grad_norm": 1.4031810760498047, "learning_rate": 9.996394736842106e-05, "loss": 0.5236, "step": 20147 }, { "epoch": 1.1282338447754507, "grad_norm": 1.2455710172653198, "learning_rate": 9.996368421052632e-05, "loss": 0.4509, "step": 20148 }, { "epoch": 1.1282898420875798, "grad_norm": 1.3842425346374512, "learning_rate": 9.996342105263158e-05, "loss": 0.4766, "step": 20149 }, { "epoch": 1.1283458393997088, "grad_norm": 1.1807523965835571, "learning_rate": 9.996315789473684e-05, "loss": 0.401, "step": 20150 }, { "epoch": 1.1284018367118378, "grad_norm": 1.1878485679626465, "learning_rate": 9.99628947368421e-05, "loss": 0.455, "step": 20151 }, { "epoch": 1.1284578340239668, "grad_norm": 1.2592793703079224, "learning_rate": 9.996263157894737e-05, "loss": 0.4084, "step": 20152 }, { "epoch": 1.1285138313360958, "grad_norm": 1.6455912590026855, "learning_rate": 9.996236842105263e-05, "loss": 0.4427, "step": 20153 }, { "epoch": 1.1285698286482249, "grad_norm": 1.357524037361145, "learning_rate": 9.996210526315791e-05, "loss": 0.3725, "step": 20154 }, { "epoch": 1.1286258259603539, "grad_norm": 1.5846613645553589, "learning_rate": 9.996184210526315e-05, "loss": 0.4286, "step": 20155 }, { "epoch": 1.128681823272483, "grad_norm": 1.5454031229019165, "learning_rate": 9.996157894736843e-05, "loss": 0.5492, "step": 20156 }, { "epoch": 1.128737820584612, "grad_norm": 1.4770495891571045, "learning_rate": 9.996131578947369e-05, "loss": 0.4957, "step": 20157 }, { "epoch": 1.128793817896741, "grad_norm": 1.6208133697509766, "learning_rate": 9.996105263157896e-05, "loss": 0.4944, "step": 20158 }, { "epoch": 1.12884981520887, "grad_norm": 1.5294944047927856, "learning_rate": 9.996078947368422e-05, "loss": 0.5993, "step": 20159 }, { "epoch": 1.128905812520999, "grad_norm": 1.2093846797943115, "learning_rate": 9.996052631578948e-05, "loss": 0.3749, "step": 20160 }, { "epoch": 1.128961809833128, "grad_norm": 1.3696269989013672, "learning_rate": 9.996026315789474e-05, "loss": 0.4462, "step": 20161 }, { "epoch": 1.129017807145257, "grad_norm": 1.315756916999817, "learning_rate": 9.996000000000001e-05, "loss": 0.4758, "step": 20162 }, { "epoch": 1.129073804457386, "grad_norm": 1.4041450023651123, "learning_rate": 9.995973684210527e-05, "loss": 0.3999, "step": 20163 }, { "epoch": 1.129129801769515, "grad_norm": 1.455556035041809, "learning_rate": 9.995947368421053e-05, "loss": 0.4594, "step": 20164 }, { "epoch": 1.129185799081644, "grad_norm": 1.2051758766174316, "learning_rate": 9.995921052631579e-05, "loss": 0.3883, "step": 20165 }, { "epoch": 1.129241796393773, "grad_norm": 1.2870639562606812, "learning_rate": 9.995894736842105e-05, "loss": 0.5278, "step": 20166 }, { "epoch": 1.129297793705902, "grad_norm": 1.8627378940582275, "learning_rate": 9.995868421052632e-05, "loss": 0.4947, "step": 20167 }, { "epoch": 1.1293537910180311, "grad_norm": 1.3675086498260498, "learning_rate": 9.995842105263158e-05, "loss": 0.3386, "step": 20168 }, { "epoch": 1.1294097883301601, "grad_norm": 1.2064725160598755, "learning_rate": 9.995815789473684e-05, "loss": 0.4058, "step": 20169 }, { "epoch": 1.1294657856422892, "grad_norm": 1.3493707180023193, "learning_rate": 9.99578947368421e-05, "loss": 0.4858, "step": 20170 }, { "epoch": 1.1295217829544182, "grad_norm": 1.31057870388031, "learning_rate": 9.995763157894738e-05, "loss": 0.3552, "step": 20171 }, { "epoch": 1.1295777802665472, "grad_norm": 1.3686400651931763, "learning_rate": 9.995736842105264e-05, "loss": 0.4329, "step": 20172 }, { "epoch": 1.1296337775786762, "grad_norm": 1.170465350151062, "learning_rate": 9.99571052631579e-05, "loss": 0.3591, "step": 20173 }, { "epoch": 1.1296897748908052, "grad_norm": 1.2574832439422607, "learning_rate": 9.995684210526316e-05, "loss": 0.4382, "step": 20174 }, { "epoch": 1.1297457722029343, "grad_norm": 1.652482509613037, "learning_rate": 9.995657894736843e-05, "loss": 0.4608, "step": 20175 }, { "epoch": 1.1298017695150633, "grad_norm": 1.4721181392669678, "learning_rate": 9.995631578947369e-05, "loss": 0.4936, "step": 20176 }, { "epoch": 1.1298577668271923, "grad_norm": 1.2871087789535522, "learning_rate": 9.995605263157896e-05, "loss": 0.3359, "step": 20177 }, { "epoch": 1.1299137641393213, "grad_norm": 1.4253180027008057, "learning_rate": 9.995578947368421e-05, "loss": 0.4732, "step": 20178 }, { "epoch": 1.1299697614514503, "grad_norm": 1.4310779571533203, "learning_rate": 9.995552631578948e-05, "loss": 0.45, "step": 20179 }, { "epoch": 1.1300257587635794, "grad_norm": 1.3896969556808472, "learning_rate": 9.995526315789474e-05, "loss": 0.4549, "step": 20180 }, { "epoch": 1.1300817560757084, "grad_norm": 4.925571441650391, "learning_rate": 9.995500000000001e-05, "loss": 0.4441, "step": 20181 }, { "epoch": 1.1301377533878374, "grad_norm": 1.9883801937103271, "learning_rate": 9.995473684210527e-05, "loss": 0.431, "step": 20182 }, { "epoch": 1.1301937506999664, "grad_norm": 1.221846580505371, "learning_rate": 9.995447368421052e-05, "loss": 0.3792, "step": 20183 }, { "epoch": 1.1302497480120954, "grad_norm": 1.3770896196365356, "learning_rate": 9.99542105263158e-05, "loss": 0.5133, "step": 20184 }, { "epoch": 1.1303057453242245, "grad_norm": 1.4019097089767456, "learning_rate": 9.995394736842105e-05, "loss": 0.5023, "step": 20185 }, { "epoch": 1.1303617426363535, "grad_norm": 1.2293190956115723, "learning_rate": 9.995368421052633e-05, "loss": 0.4406, "step": 20186 }, { "epoch": 1.1304177399484825, "grad_norm": 1.328400731086731, "learning_rate": 9.995342105263157e-05, "loss": 0.4856, "step": 20187 }, { "epoch": 1.1304737372606115, "grad_norm": 1.5837477445602417, "learning_rate": 9.995315789473685e-05, "loss": 0.4598, "step": 20188 }, { "epoch": 1.1305297345727405, "grad_norm": 1.52598237991333, "learning_rate": 9.99528947368421e-05, "loss": 0.4943, "step": 20189 }, { "epoch": 1.1305857318848695, "grad_norm": 1.2836613655090332, "learning_rate": 9.995263157894738e-05, "loss": 0.431, "step": 20190 }, { "epoch": 1.1306417291969986, "grad_norm": 2.122936487197876, "learning_rate": 9.995236842105264e-05, "loss": 0.5085, "step": 20191 }, { "epoch": 1.1306977265091276, "grad_norm": 1.7128816843032837, "learning_rate": 9.99521052631579e-05, "loss": 0.4088, "step": 20192 }, { "epoch": 1.1307537238212566, "grad_norm": 2.03788685798645, "learning_rate": 9.995184210526316e-05, "loss": 0.6117, "step": 20193 }, { "epoch": 1.1308097211333856, "grad_norm": 1.322121024131775, "learning_rate": 9.995157894736843e-05, "loss": 0.323, "step": 20194 }, { "epoch": 1.1308657184455146, "grad_norm": 1.515406847000122, "learning_rate": 9.995131578947369e-05, "loss": 0.4084, "step": 20195 }, { "epoch": 1.1309217157576437, "grad_norm": 1.3654356002807617, "learning_rate": 9.995105263157895e-05, "loss": 0.3751, "step": 20196 }, { "epoch": 1.1309777130697727, "grad_norm": 1.6777520179748535, "learning_rate": 9.995078947368421e-05, "loss": 0.5004, "step": 20197 }, { "epoch": 1.1310337103819017, "grad_norm": 1.3237303495407104, "learning_rate": 9.995052631578948e-05, "loss": 0.4237, "step": 20198 }, { "epoch": 1.1310897076940307, "grad_norm": 1.4779011011123657, "learning_rate": 9.995026315789474e-05, "loss": 0.3831, "step": 20199 }, { "epoch": 1.1311457050061597, "grad_norm": 1.2313202619552612, "learning_rate": 9.995e-05, "loss": 0.3035, "step": 20200 }, { "epoch": 1.1312017023182888, "grad_norm": 1.7041383981704712, "learning_rate": 9.994973684210526e-05, "loss": 0.4756, "step": 20201 }, { "epoch": 1.1312576996304178, "grad_norm": 1.3493001461029053, "learning_rate": 9.994947368421052e-05, "loss": 0.6141, "step": 20202 }, { "epoch": 1.1313136969425468, "grad_norm": 1.3731756210327148, "learning_rate": 9.99492105263158e-05, "loss": 0.3948, "step": 20203 }, { "epoch": 1.1313696942546758, "grad_norm": 1.9412846565246582, "learning_rate": 9.994894736842106e-05, "loss": 0.5277, "step": 20204 }, { "epoch": 1.1314256915668048, "grad_norm": 1.3892247676849365, "learning_rate": 9.994868421052632e-05, "loss": 0.5429, "step": 20205 }, { "epoch": 1.1314816888789339, "grad_norm": 1.6253856420516968, "learning_rate": 9.994842105263158e-05, "loss": 0.611, "step": 20206 }, { "epoch": 1.1315376861910629, "grad_norm": 1.4898595809936523, "learning_rate": 9.994815789473685e-05, "loss": 0.3932, "step": 20207 }, { "epoch": 1.131593683503192, "grad_norm": 1.2663226127624512, "learning_rate": 9.994789473684211e-05, "loss": 0.5623, "step": 20208 }, { "epoch": 1.131649680815321, "grad_norm": 1.4919707775115967, "learning_rate": 9.994763157894738e-05, "loss": 0.4961, "step": 20209 }, { "epoch": 1.13170567812745, "grad_norm": 1.314507007598877, "learning_rate": 9.994736842105263e-05, "loss": 0.503, "step": 20210 }, { "epoch": 1.131761675439579, "grad_norm": 1.7011282444000244, "learning_rate": 9.99471052631579e-05, "loss": 0.4651, "step": 20211 }, { "epoch": 1.131817672751708, "grad_norm": 1.2699919939041138, "learning_rate": 9.994684210526316e-05, "loss": 0.493, "step": 20212 }, { "epoch": 1.131873670063837, "grad_norm": 1.4917173385620117, "learning_rate": 9.994657894736843e-05, "loss": 0.4717, "step": 20213 }, { "epoch": 1.131929667375966, "grad_norm": 1.2997092008590698, "learning_rate": 9.99463157894737e-05, "loss": 0.5401, "step": 20214 }, { "epoch": 1.131985664688095, "grad_norm": 2.0021791458129883, "learning_rate": 9.994605263157895e-05, "loss": 0.6804, "step": 20215 }, { "epoch": 1.132041662000224, "grad_norm": 1.6106153726577759, "learning_rate": 9.994578947368421e-05, "loss": 0.3436, "step": 20216 }, { "epoch": 1.132097659312353, "grad_norm": 1.16148841381073, "learning_rate": 9.994552631578949e-05, "loss": 0.4427, "step": 20217 }, { "epoch": 1.132153656624482, "grad_norm": 1.8368303775787354, "learning_rate": 9.994526315789475e-05, "loss": 0.5691, "step": 20218 }, { "epoch": 1.132209653936611, "grad_norm": 1.274310827255249, "learning_rate": 9.9945e-05, "loss": 0.4706, "step": 20219 }, { "epoch": 1.1322656512487401, "grad_norm": 1.1377419233322144, "learning_rate": 9.994473684210527e-05, "loss": 0.4518, "step": 20220 }, { "epoch": 1.1323216485608691, "grad_norm": 1.1848833560943604, "learning_rate": 9.994447368421053e-05, "loss": 0.3248, "step": 20221 }, { "epoch": 1.1323776458729982, "grad_norm": 1.4867243766784668, "learning_rate": 9.99442105263158e-05, "loss": 0.4576, "step": 20222 }, { "epoch": 1.1324336431851272, "grad_norm": 1.7923411130905151, "learning_rate": 9.994394736842106e-05, "loss": 0.4153, "step": 20223 }, { "epoch": 1.1324896404972562, "grad_norm": 1.1907631158828735, "learning_rate": 9.994368421052632e-05, "loss": 0.3663, "step": 20224 }, { "epoch": 1.1325456378093852, "grad_norm": 1.2464754581451416, "learning_rate": 9.994342105263158e-05, "loss": 0.4706, "step": 20225 }, { "epoch": 1.1326016351215142, "grad_norm": 1.1599808931350708, "learning_rate": 9.994315789473685e-05, "loss": 0.4196, "step": 20226 }, { "epoch": 1.1326576324336433, "grad_norm": 1.3001772165298462, "learning_rate": 9.994289473684211e-05, "loss": 0.4365, "step": 20227 }, { "epoch": 1.1327136297457723, "grad_norm": 1.241241455078125, "learning_rate": 9.994263157894737e-05, "loss": 0.3607, "step": 20228 }, { "epoch": 1.1327696270579013, "grad_norm": 1.176579475402832, "learning_rate": 9.994236842105263e-05, "loss": 0.2961, "step": 20229 }, { "epoch": 1.1328256243700303, "grad_norm": 1.525162696838379, "learning_rate": 9.99421052631579e-05, "loss": 0.5938, "step": 20230 }, { "epoch": 1.1328816216821593, "grad_norm": 1.3000282049179077, "learning_rate": 9.994184210526316e-05, "loss": 0.4874, "step": 20231 }, { "epoch": 1.1329376189942884, "grad_norm": 1.2870616912841797, "learning_rate": 9.994157894736844e-05, "loss": 0.4187, "step": 20232 }, { "epoch": 1.1329936163064174, "grad_norm": 1.3188973665237427, "learning_rate": 9.994131578947368e-05, "loss": 0.4117, "step": 20233 }, { "epoch": 1.1330496136185464, "grad_norm": 1.2505483627319336, "learning_rate": 9.994105263157896e-05, "loss": 0.302, "step": 20234 }, { "epoch": 1.1331056109306754, "grad_norm": 1.1003673076629639, "learning_rate": 9.994078947368422e-05, "loss": 0.383, "step": 20235 }, { "epoch": 1.1331616082428044, "grad_norm": 1.0437648296356201, "learning_rate": 9.994052631578948e-05, "loss": 0.4162, "step": 20236 }, { "epoch": 1.1332176055549334, "grad_norm": 1.2069073915481567, "learning_rate": 9.994026315789475e-05, "loss": 0.3115, "step": 20237 }, { "epoch": 1.1332736028670625, "grad_norm": 1.2497919797897339, "learning_rate": 9.994e-05, "loss": 0.4764, "step": 20238 }, { "epoch": 1.1333296001791915, "grad_norm": 1.428944706916809, "learning_rate": 9.993973684210527e-05, "loss": 0.5107, "step": 20239 }, { "epoch": 1.1333855974913205, "grad_norm": 1.3723713159561157, "learning_rate": 9.993947368421053e-05, "loss": 0.5085, "step": 20240 }, { "epoch": 1.1334415948034495, "grad_norm": 1.3960775136947632, "learning_rate": 9.99392105263158e-05, "loss": 0.5389, "step": 20241 }, { "epoch": 1.1334975921155785, "grad_norm": 1.1459434032440186, "learning_rate": 9.993894736842105e-05, "loss": 0.3664, "step": 20242 }, { "epoch": 1.1335535894277076, "grad_norm": 1.4901725053787231, "learning_rate": 9.993868421052632e-05, "loss": 0.5682, "step": 20243 }, { "epoch": 1.1336095867398366, "grad_norm": 1.2028594017028809, "learning_rate": 9.993842105263158e-05, "loss": 0.2462, "step": 20244 }, { "epoch": 1.1336655840519656, "grad_norm": 1.3778281211853027, "learning_rate": 9.993815789473685e-05, "loss": 0.4629, "step": 20245 }, { "epoch": 1.1337215813640946, "grad_norm": 1.487456202507019, "learning_rate": 9.993789473684211e-05, "loss": 0.4372, "step": 20246 }, { "epoch": 1.1337775786762236, "grad_norm": 2.2966628074645996, "learning_rate": 9.993763157894737e-05, "loss": 0.6028, "step": 20247 }, { "epoch": 1.1338335759883527, "grad_norm": 1.343872308731079, "learning_rate": 9.993736842105263e-05, "loss": 0.3569, "step": 20248 }, { "epoch": 1.1338895733004817, "grad_norm": 1.3263475894927979, "learning_rate": 9.99371052631579e-05, "loss": 0.4684, "step": 20249 }, { "epoch": 1.1339455706126107, "grad_norm": 1.538833737373352, "learning_rate": 9.993684210526317e-05, "loss": 0.5163, "step": 20250 }, { "epoch": 1.1340015679247397, "grad_norm": 1.429449200630188, "learning_rate": 9.993657894736843e-05, "loss": 0.454, "step": 20251 }, { "epoch": 1.1340575652368687, "grad_norm": 1.3572133779525757, "learning_rate": 9.993631578947369e-05, "loss": 0.6305, "step": 20252 }, { "epoch": 1.1341135625489978, "grad_norm": 1.3160983324050903, "learning_rate": 9.993605263157895e-05, "loss": 0.3628, "step": 20253 }, { "epoch": 1.1341695598611268, "grad_norm": 1.3186285495758057, "learning_rate": 9.993578947368422e-05, "loss": 0.467, "step": 20254 }, { "epoch": 1.1342255571732558, "grad_norm": 1.1341521739959717, "learning_rate": 9.993552631578948e-05, "loss": 0.3895, "step": 20255 }, { "epoch": 1.1342815544853848, "grad_norm": 1.5238111019134521, "learning_rate": 9.993526315789474e-05, "loss": 0.3964, "step": 20256 }, { "epoch": 1.1343375517975138, "grad_norm": 1.2447681427001953, "learning_rate": 9.9935e-05, "loss": 0.4356, "step": 20257 }, { "epoch": 1.1343935491096429, "grad_norm": 1.2781203985214233, "learning_rate": 9.993473684210527e-05, "loss": 0.3634, "step": 20258 }, { "epoch": 1.1344495464217719, "grad_norm": 1.5016010999679565, "learning_rate": 9.993447368421053e-05, "loss": 0.4863, "step": 20259 }, { "epoch": 1.134505543733901, "grad_norm": 1.2829502820968628, "learning_rate": 9.993421052631579e-05, "loss": 0.465, "step": 20260 }, { "epoch": 1.13456154104603, "grad_norm": 1.2866705656051636, "learning_rate": 9.993394736842105e-05, "loss": 0.3207, "step": 20261 }, { "epoch": 1.134617538358159, "grad_norm": 1.5260947942733765, "learning_rate": 9.993368421052632e-05, "loss": 0.4029, "step": 20262 }, { "epoch": 1.134673535670288, "grad_norm": 1.3858051300048828, "learning_rate": 9.993342105263158e-05, "loss": 0.3886, "step": 20263 }, { "epoch": 1.134729532982417, "grad_norm": 1.4056178331375122, "learning_rate": 9.993315789473686e-05, "loss": 0.6166, "step": 20264 }, { "epoch": 1.134785530294546, "grad_norm": 2.4712114334106445, "learning_rate": 9.99328947368421e-05, "loss": 0.4633, "step": 20265 }, { "epoch": 1.1348415276066748, "grad_norm": 1.2079639434814453, "learning_rate": 9.993263157894738e-05, "loss": 0.385, "step": 20266 }, { "epoch": 1.1348975249188038, "grad_norm": 1.3643262386322021, "learning_rate": 9.993236842105264e-05, "loss": 0.4943, "step": 20267 }, { "epoch": 1.1349535222309328, "grad_norm": 1.1451470851898193, "learning_rate": 9.993210526315791e-05, "loss": 0.3125, "step": 20268 }, { "epoch": 1.1350095195430618, "grad_norm": 1.769522786140442, "learning_rate": 9.993184210526317e-05, "loss": 0.4586, "step": 20269 }, { "epoch": 1.1350655168551909, "grad_norm": 9.267542839050293, "learning_rate": 9.993157894736841e-05, "loss": 0.4237, "step": 20270 }, { "epoch": 1.1351215141673199, "grad_norm": 3.177318572998047, "learning_rate": 9.993131578947369e-05, "loss": 0.4726, "step": 20271 }, { "epoch": 1.135177511479449, "grad_norm": 1.3887708187103271, "learning_rate": 9.993105263157895e-05, "loss": 0.4423, "step": 20272 }, { "epoch": 1.135233508791578, "grad_norm": 1.4719115495681763, "learning_rate": 9.993078947368422e-05, "loss": 0.449, "step": 20273 }, { "epoch": 1.135289506103707, "grad_norm": 1.4405137300491333, "learning_rate": 9.993052631578948e-05, "loss": 0.4776, "step": 20274 }, { "epoch": 1.135345503415836, "grad_norm": 1.2587157487869263, "learning_rate": 9.993026315789474e-05, "loss": 0.4237, "step": 20275 }, { "epoch": 1.135401500727965, "grad_norm": 1.2803078889846802, "learning_rate": 9.993e-05, "loss": 0.4338, "step": 20276 }, { "epoch": 1.135457498040094, "grad_norm": 1.1684249639511108, "learning_rate": 9.992973684210527e-05, "loss": 0.3643, "step": 20277 }, { "epoch": 1.135513495352223, "grad_norm": 1.356095314025879, "learning_rate": 9.992947368421053e-05, "loss": 0.5039, "step": 20278 }, { "epoch": 1.135569492664352, "grad_norm": 1.1681362390518188, "learning_rate": 9.992921052631579e-05, "loss": 0.4442, "step": 20279 }, { "epoch": 1.135625489976481, "grad_norm": 1.3019719123840332, "learning_rate": 9.992894736842105e-05, "loss": 0.4194, "step": 20280 }, { "epoch": 1.13568148728861, "grad_norm": 1.1020950078964233, "learning_rate": 9.992868421052633e-05, "loss": 0.3683, "step": 20281 }, { "epoch": 1.135737484600739, "grad_norm": 1.5293177366256714, "learning_rate": 9.992842105263159e-05, "loss": 0.494, "step": 20282 }, { "epoch": 1.135793481912868, "grad_norm": 1.2927979230880737, "learning_rate": 9.992815789473685e-05, "loss": 0.4483, "step": 20283 }, { "epoch": 1.1358494792249971, "grad_norm": 1.9020200967788696, "learning_rate": 9.99278947368421e-05, "loss": 0.5442, "step": 20284 }, { "epoch": 1.1359054765371261, "grad_norm": 1.4373677968978882, "learning_rate": 9.992763157894738e-05, "loss": 0.4161, "step": 20285 }, { "epoch": 1.1359614738492552, "grad_norm": 1.4182528257369995, "learning_rate": 9.992736842105264e-05, "loss": 0.4502, "step": 20286 }, { "epoch": 1.1360174711613842, "grad_norm": 1.112560510635376, "learning_rate": 9.99271052631579e-05, "loss": 0.3414, "step": 20287 }, { "epoch": 1.1360734684735132, "grad_norm": 1.262067198753357, "learning_rate": 9.992684210526316e-05, "loss": 0.4059, "step": 20288 }, { "epoch": 1.1361294657856422, "grad_norm": 1.3725446462631226, "learning_rate": 9.992657894736842e-05, "loss": 0.44, "step": 20289 }, { "epoch": 1.1361854630977712, "grad_norm": 1.2827565670013428, "learning_rate": 9.992631578947369e-05, "loss": 0.4307, "step": 20290 }, { "epoch": 1.1362414604099003, "grad_norm": 1.291059136390686, "learning_rate": 9.992605263157895e-05, "loss": 0.4441, "step": 20291 }, { "epoch": 1.1362974577220293, "grad_norm": 1.5054551362991333, "learning_rate": 9.992578947368422e-05, "loss": 0.477, "step": 20292 }, { "epoch": 1.1363534550341583, "grad_norm": 1.8364559412002563, "learning_rate": 9.992552631578947e-05, "loss": 0.4822, "step": 20293 }, { "epoch": 1.1364094523462873, "grad_norm": 1.191460371017456, "learning_rate": 9.992526315789474e-05, "loss": 0.4269, "step": 20294 }, { "epoch": 1.1364654496584163, "grad_norm": 3.196669578552246, "learning_rate": 9.9925e-05, "loss": 0.5738, "step": 20295 }, { "epoch": 1.1365214469705454, "grad_norm": 1.3577088117599487, "learning_rate": 9.992473684210528e-05, "loss": 0.4304, "step": 20296 }, { "epoch": 1.1365774442826744, "grad_norm": 1.5136973857879639, "learning_rate": 9.992447368421052e-05, "loss": 0.4075, "step": 20297 }, { "epoch": 1.1366334415948034, "grad_norm": 1.4799716472625732, "learning_rate": 9.99242105263158e-05, "loss": 0.4673, "step": 20298 }, { "epoch": 1.1366894389069324, "grad_norm": 1.289697527885437, "learning_rate": 9.992394736842106e-05, "loss": 0.3812, "step": 20299 }, { "epoch": 1.1367454362190614, "grad_norm": 1.0879672765731812, "learning_rate": 9.992368421052633e-05, "loss": 0.3818, "step": 20300 }, { "epoch": 1.1368014335311905, "grad_norm": 1.3830888271331787, "learning_rate": 9.992342105263159e-05, "loss": 0.4774, "step": 20301 }, { "epoch": 1.1368574308433195, "grad_norm": 1.3574031591415405, "learning_rate": 9.992315789473685e-05, "loss": 0.6394, "step": 20302 }, { "epoch": 1.1369134281554485, "grad_norm": 1.266954779624939, "learning_rate": 9.992289473684211e-05, "loss": 0.4814, "step": 20303 }, { "epoch": 1.1369694254675775, "grad_norm": 1.6034536361694336, "learning_rate": 9.992263157894737e-05, "loss": 0.5257, "step": 20304 }, { "epoch": 1.1370254227797065, "grad_norm": 1.2043932676315308, "learning_rate": 9.992236842105264e-05, "loss": 0.4585, "step": 20305 }, { "epoch": 1.1370814200918355, "grad_norm": 1.4146957397460938, "learning_rate": 9.99221052631579e-05, "loss": 0.4716, "step": 20306 }, { "epoch": 1.1371374174039646, "grad_norm": 1.373159646987915, "learning_rate": 9.992184210526316e-05, "loss": 0.3746, "step": 20307 }, { "epoch": 1.1371934147160936, "grad_norm": 1.1979814767837524, "learning_rate": 9.992157894736842e-05, "loss": 0.3212, "step": 20308 }, { "epoch": 1.1372494120282226, "grad_norm": 1.7314233779907227, "learning_rate": 9.99213157894737e-05, "loss": 0.3733, "step": 20309 }, { "epoch": 1.1373054093403516, "grad_norm": 1.1924324035644531, "learning_rate": 9.992105263157895e-05, "loss": 0.4887, "step": 20310 }, { "epoch": 1.1373614066524806, "grad_norm": 1.3776707649230957, "learning_rate": 9.992078947368421e-05, "loss": 0.3593, "step": 20311 }, { "epoch": 1.1374174039646097, "grad_norm": 1.463600754737854, "learning_rate": 9.992052631578947e-05, "loss": 0.4736, "step": 20312 }, { "epoch": 1.1374734012767387, "grad_norm": 1.376007080078125, "learning_rate": 9.992026315789475e-05, "loss": 0.4454, "step": 20313 }, { "epoch": 1.1375293985888677, "grad_norm": 1.5093474388122559, "learning_rate": 9.992e-05, "loss": 0.4242, "step": 20314 }, { "epoch": 1.1375853959009967, "grad_norm": 1.3137797117233276, "learning_rate": 9.991973684210527e-05, "loss": 0.4289, "step": 20315 }, { "epoch": 1.1376413932131257, "grad_norm": 1.4704359769821167, "learning_rate": 9.991947368421053e-05, "loss": 0.4248, "step": 20316 }, { "epoch": 1.1376973905252548, "grad_norm": 1.3900424242019653, "learning_rate": 9.99192105263158e-05, "loss": 0.412, "step": 20317 }, { "epoch": 1.1377533878373838, "grad_norm": 1.325015664100647, "learning_rate": 9.991894736842106e-05, "loss": 0.5808, "step": 20318 }, { "epoch": 1.1378093851495128, "grad_norm": 1.2525914907455444, "learning_rate": 9.991868421052633e-05, "loss": 0.5415, "step": 20319 }, { "epoch": 1.1378653824616418, "grad_norm": 1.4956260919570923, "learning_rate": 9.991842105263158e-05, "loss": 0.4928, "step": 20320 }, { "epoch": 1.1379213797737708, "grad_norm": 1.1873290538787842, "learning_rate": 9.991815789473685e-05, "loss": 0.3795, "step": 20321 }, { "epoch": 1.1379773770858999, "grad_norm": 1.2872254848480225, "learning_rate": 9.991789473684211e-05, "loss": 0.4509, "step": 20322 }, { "epoch": 1.1380333743980289, "grad_norm": 1.3473589420318604, "learning_rate": 9.991763157894737e-05, "loss": 0.343, "step": 20323 }, { "epoch": 1.138089371710158, "grad_norm": 1.6654990911483765, "learning_rate": 9.991736842105264e-05, "loss": 0.5751, "step": 20324 }, { "epoch": 1.138145369022287, "grad_norm": 1.8152382373809814, "learning_rate": 9.991710526315789e-05, "loss": 0.3431, "step": 20325 }, { "epoch": 1.138201366334416, "grad_norm": 1.5003809928894043, "learning_rate": 9.991684210526316e-05, "loss": 0.4206, "step": 20326 }, { "epoch": 1.138257363646545, "grad_norm": 1.2405860424041748, "learning_rate": 9.991657894736842e-05, "loss": 0.3798, "step": 20327 }, { "epoch": 1.138313360958674, "grad_norm": 1.306026577949524, "learning_rate": 9.99163157894737e-05, "loss": 0.4388, "step": 20328 }, { "epoch": 1.138369358270803, "grad_norm": 1.517681360244751, "learning_rate": 9.991605263157896e-05, "loss": 0.5016, "step": 20329 }, { "epoch": 1.138425355582932, "grad_norm": 2.5119831562042236, "learning_rate": 9.991578947368422e-05, "loss": 0.4232, "step": 20330 }, { "epoch": 1.138481352895061, "grad_norm": 1.238312005996704, "learning_rate": 9.991552631578948e-05, "loss": 0.3336, "step": 20331 }, { "epoch": 1.13853735020719, "grad_norm": 1.3370847702026367, "learning_rate": 9.991526315789475e-05, "loss": 0.474, "step": 20332 }, { "epoch": 1.138593347519319, "grad_norm": 3.627495527267456, "learning_rate": 9.991500000000001e-05, "loss": 0.5222, "step": 20333 }, { "epoch": 1.138649344831448, "grad_norm": 1.314353585243225, "learning_rate": 9.991473684210527e-05, "loss": 0.4736, "step": 20334 }, { "epoch": 1.138705342143577, "grad_norm": 1.693156361579895, "learning_rate": 9.991447368421053e-05, "loss": 0.5348, "step": 20335 }, { "epoch": 1.1387613394557061, "grad_norm": 5.299622535705566, "learning_rate": 9.99142105263158e-05, "loss": 0.4973, "step": 20336 }, { "epoch": 1.1388173367678351, "grad_norm": 1.2720335721969604, "learning_rate": 9.991394736842106e-05, "loss": 0.3379, "step": 20337 }, { "epoch": 1.1388733340799642, "grad_norm": 1.6211950778961182, "learning_rate": 9.991368421052632e-05, "loss": 0.4027, "step": 20338 }, { "epoch": 1.1389293313920932, "grad_norm": 1.5061919689178467, "learning_rate": 9.991342105263158e-05, "loss": 0.6308, "step": 20339 }, { "epoch": 1.1389853287042222, "grad_norm": 1.3513188362121582, "learning_rate": 9.991315789473684e-05, "loss": 0.4841, "step": 20340 }, { "epoch": 1.1390413260163512, "grad_norm": 1.410709023475647, "learning_rate": 9.991289473684211e-05, "loss": 0.3373, "step": 20341 }, { "epoch": 1.1390973233284802, "grad_norm": 1.2556793689727783, "learning_rate": 9.991263157894737e-05, "loss": 0.4272, "step": 20342 }, { "epoch": 1.1391533206406093, "grad_norm": 1.5008658170700073, "learning_rate": 9.991236842105263e-05, "loss": 0.5107, "step": 20343 }, { "epoch": 1.1392093179527383, "grad_norm": 1.5143998861312866, "learning_rate": 9.991210526315789e-05, "loss": 0.6184, "step": 20344 }, { "epoch": 1.1392653152648673, "grad_norm": 1.224623441696167, "learning_rate": 9.991184210526317e-05, "loss": 0.4284, "step": 20345 }, { "epoch": 1.1393213125769963, "grad_norm": 1.4190151691436768, "learning_rate": 9.991157894736843e-05, "loss": 0.4862, "step": 20346 }, { "epoch": 1.1393773098891253, "grad_norm": 1.2998725175857544, "learning_rate": 9.991131578947368e-05, "loss": 0.4061, "step": 20347 }, { "epoch": 1.1394333072012544, "grad_norm": 1.2712948322296143, "learning_rate": 9.991105263157894e-05, "loss": 0.4702, "step": 20348 }, { "epoch": 1.1394893045133834, "grad_norm": 1.305171012878418, "learning_rate": 9.991078947368422e-05, "loss": 0.3515, "step": 20349 }, { "epoch": 1.1395453018255124, "grad_norm": 1.5488324165344238, "learning_rate": 9.991052631578948e-05, "loss": 0.4834, "step": 20350 }, { "epoch": 1.1396012991376414, "grad_norm": 1.2901452779769897, "learning_rate": 9.991026315789475e-05, "loss": 0.4051, "step": 20351 }, { "epoch": 1.1396572964497704, "grad_norm": 1.3085746765136719, "learning_rate": 9.991e-05, "loss": 0.4558, "step": 20352 }, { "epoch": 1.1397132937618994, "grad_norm": 1.3028777837753296, "learning_rate": 9.990973684210527e-05, "loss": 0.5685, "step": 20353 }, { "epoch": 1.1397692910740285, "grad_norm": 1.3169550895690918, "learning_rate": 9.990947368421053e-05, "loss": 0.3991, "step": 20354 }, { "epoch": 1.1398252883861575, "grad_norm": 1.3387531042099, "learning_rate": 9.99092105263158e-05, "loss": 0.3692, "step": 20355 }, { "epoch": 1.1398812856982865, "grad_norm": 1.3168439865112305, "learning_rate": 9.990894736842106e-05, "loss": 0.3662, "step": 20356 }, { "epoch": 1.1399372830104155, "grad_norm": 1.3575280904769897, "learning_rate": 9.990868421052631e-05, "loss": 0.4071, "step": 20357 }, { "epoch": 1.1399932803225445, "grad_norm": 1.6750175952911377, "learning_rate": 9.990842105263158e-05, "loss": 0.6067, "step": 20358 }, { "epoch": 1.1400492776346736, "grad_norm": 1.2230628728866577, "learning_rate": 9.990815789473684e-05, "loss": 0.4325, "step": 20359 }, { "epoch": 1.1401052749468026, "grad_norm": 1.4564143419265747, "learning_rate": 9.990789473684212e-05, "loss": 0.3646, "step": 20360 }, { "epoch": 1.1401612722589316, "grad_norm": 1.7707886695861816, "learning_rate": 9.990763157894738e-05, "loss": 0.3448, "step": 20361 }, { "epoch": 1.1402172695710606, "grad_norm": 1.2103395462036133, "learning_rate": 9.990736842105264e-05, "loss": 0.4444, "step": 20362 }, { "epoch": 1.1402732668831896, "grad_norm": 1.2842963933944702, "learning_rate": 9.99071052631579e-05, "loss": 0.5412, "step": 20363 }, { "epoch": 1.1403292641953187, "grad_norm": 1.1807801723480225, "learning_rate": 9.990684210526317e-05, "loss": 0.4317, "step": 20364 }, { "epoch": 1.1403852615074477, "grad_norm": 1.2823046445846558, "learning_rate": 9.990657894736843e-05, "loss": 0.5272, "step": 20365 }, { "epoch": 1.1404412588195767, "grad_norm": 1.4724619388580322, "learning_rate": 9.990631578947369e-05, "loss": 0.4194, "step": 20366 }, { "epoch": 1.1404972561317057, "grad_norm": 1.0696430206298828, "learning_rate": 9.990605263157895e-05, "loss": 0.311, "step": 20367 }, { "epoch": 1.1405532534438347, "grad_norm": 1.2921104431152344, "learning_rate": 9.990578947368422e-05, "loss": 0.434, "step": 20368 }, { "epoch": 1.1406092507559638, "grad_norm": 1.2019782066345215, "learning_rate": 9.990552631578948e-05, "loss": 0.3851, "step": 20369 }, { "epoch": 1.1406652480680928, "grad_norm": 1.3826487064361572, "learning_rate": 9.990526315789474e-05, "loss": 0.5371, "step": 20370 }, { "epoch": 1.1407212453802218, "grad_norm": 1.5002570152282715, "learning_rate": 9.9905e-05, "loss": 0.357, "step": 20371 }, { "epoch": 1.1407772426923508, "grad_norm": 1.426370620727539, "learning_rate": 9.990473684210527e-05, "loss": 0.5019, "step": 20372 }, { "epoch": 1.1408332400044798, "grad_norm": 1.5056532621383667, "learning_rate": 9.990447368421053e-05, "loss": 0.5796, "step": 20373 }, { "epoch": 1.1408892373166089, "grad_norm": 1.2421802282333374, "learning_rate": 9.990421052631579e-05, "loss": 0.3594, "step": 20374 }, { "epoch": 1.1409452346287379, "grad_norm": 1.8825669288635254, "learning_rate": 9.990394736842105e-05, "loss": 0.4201, "step": 20375 }, { "epoch": 1.141001231940867, "grad_norm": 1.3323009014129639, "learning_rate": 9.990368421052631e-05, "loss": 0.4136, "step": 20376 }, { "epoch": 1.141057229252996, "grad_norm": 1.405066728591919, "learning_rate": 9.990342105263159e-05, "loss": 0.4292, "step": 20377 }, { "epoch": 1.141113226565125, "grad_norm": 1.2221298217773438, "learning_rate": 9.990315789473684e-05, "loss": 0.3495, "step": 20378 }, { "epoch": 1.141169223877254, "grad_norm": 1.2865514755249023, "learning_rate": 9.990289473684212e-05, "loss": 0.3648, "step": 20379 }, { "epoch": 1.141225221189383, "grad_norm": 1.406097173690796, "learning_rate": 9.990263157894736e-05, "loss": 0.428, "step": 20380 }, { "epoch": 1.141281218501512, "grad_norm": 2.4247918128967285, "learning_rate": 9.990236842105264e-05, "loss": 0.4855, "step": 20381 }, { "epoch": 1.141337215813641, "grad_norm": 1.4736703634262085, "learning_rate": 9.99021052631579e-05, "loss": 0.4213, "step": 20382 }, { "epoch": 1.14139321312577, "grad_norm": 1.3291274309158325, "learning_rate": 9.990184210526317e-05, "loss": 0.3984, "step": 20383 }, { "epoch": 1.141449210437899, "grad_norm": 1.3679965734481812, "learning_rate": 9.990157894736843e-05, "loss": 0.4346, "step": 20384 }, { "epoch": 1.141505207750028, "grad_norm": 1.220797061920166, "learning_rate": 9.990131578947369e-05, "loss": 0.4462, "step": 20385 }, { "epoch": 1.141561205062157, "grad_norm": 1.4401963949203491, "learning_rate": 9.990105263157895e-05, "loss": 0.357, "step": 20386 }, { "epoch": 1.141617202374286, "grad_norm": 1.3059804439544678, "learning_rate": 9.990078947368422e-05, "loss": 0.4317, "step": 20387 }, { "epoch": 1.1416731996864151, "grad_norm": 2.1954119205474854, "learning_rate": 9.990052631578948e-05, "loss": 0.576, "step": 20388 }, { "epoch": 1.1417291969985441, "grad_norm": 1.312447190284729, "learning_rate": 9.990026315789474e-05, "loss": 0.4316, "step": 20389 }, { "epoch": 1.1417851943106732, "grad_norm": 1.3905044794082642, "learning_rate": 9.99e-05, "loss": 0.6226, "step": 20390 }, { "epoch": 1.1418411916228022, "grad_norm": 1.4395277500152588, "learning_rate": 9.989973684210526e-05, "loss": 0.6674, "step": 20391 }, { "epoch": 1.1418971889349312, "grad_norm": 1.7190442085266113, "learning_rate": 9.989947368421054e-05, "loss": 0.4215, "step": 20392 }, { "epoch": 1.1419531862470602, "grad_norm": 1.292906641960144, "learning_rate": 9.98992105263158e-05, "loss": 0.4206, "step": 20393 }, { "epoch": 1.1420091835591892, "grad_norm": 1.3469399213790894, "learning_rate": 9.989894736842105e-05, "loss": 0.3768, "step": 20394 }, { "epoch": 1.1420651808713183, "grad_norm": 2.9747273921966553, "learning_rate": 9.989868421052631e-05, "loss": 0.4534, "step": 20395 }, { "epoch": 1.1421211781834473, "grad_norm": 1.2159113883972168, "learning_rate": 9.989842105263159e-05, "loss": 0.4651, "step": 20396 }, { "epoch": 1.1421771754955763, "grad_norm": 1.318577527999878, "learning_rate": 9.989815789473685e-05, "loss": 0.412, "step": 20397 }, { "epoch": 1.1422331728077053, "grad_norm": 1.2219117879867554, "learning_rate": 9.989789473684211e-05, "loss": 0.4052, "step": 20398 }, { "epoch": 1.1422891701198343, "grad_norm": 1.5057100057601929, "learning_rate": 9.989763157894737e-05, "loss": 0.4254, "step": 20399 }, { "epoch": 1.1423451674319633, "grad_norm": 1.3558597564697266, "learning_rate": 9.989736842105264e-05, "loss": 0.4651, "step": 20400 }, { "epoch": 1.1424011647440924, "grad_norm": 9.762716293334961, "learning_rate": 9.98971052631579e-05, "loss": 0.4503, "step": 20401 }, { "epoch": 1.1424571620562214, "grad_norm": 1.5793808698654175, "learning_rate": 9.989684210526316e-05, "loss": 0.4069, "step": 20402 }, { "epoch": 1.1425131593683504, "grad_norm": 1.2100074291229248, "learning_rate": 9.989657894736842e-05, "loss": 0.33, "step": 20403 }, { "epoch": 1.1425691566804794, "grad_norm": 1.594765543937683, "learning_rate": 9.989631578947369e-05, "loss": 0.5182, "step": 20404 }, { "epoch": 1.1426251539926084, "grad_norm": 1.6750227212905884, "learning_rate": 9.989605263157895e-05, "loss": 0.4615, "step": 20405 }, { "epoch": 1.1426811513047375, "grad_norm": 1.3448501825332642, "learning_rate": 9.989578947368423e-05, "loss": 0.512, "step": 20406 }, { "epoch": 1.1427371486168665, "grad_norm": 1.508351445198059, "learning_rate": 9.989552631578947e-05, "loss": 0.5387, "step": 20407 }, { "epoch": 1.1427931459289955, "grad_norm": 1.4007755517959595, "learning_rate": 9.989526315789473e-05, "loss": 0.4336, "step": 20408 }, { "epoch": 1.1428491432411245, "grad_norm": 1.5024051666259766, "learning_rate": 9.9895e-05, "loss": 0.3761, "step": 20409 }, { "epoch": 1.1429051405532535, "grad_norm": 1.227728009223938, "learning_rate": 9.989473684210526e-05, "loss": 0.5032, "step": 20410 }, { "epoch": 1.1429611378653823, "grad_norm": 1.4767719507217407, "learning_rate": 9.989447368421054e-05, "loss": 0.4621, "step": 20411 }, { "epoch": 1.1430171351775114, "grad_norm": 1.2244586944580078, "learning_rate": 9.989421052631578e-05, "loss": 0.4291, "step": 20412 }, { "epoch": 1.1430731324896404, "grad_norm": 1.3493458032608032, "learning_rate": 9.989394736842106e-05, "loss": 0.3902, "step": 20413 }, { "epoch": 1.1431291298017694, "grad_norm": 1.400938868522644, "learning_rate": 9.989368421052632e-05, "loss": 0.421, "step": 20414 }, { "epoch": 1.1431851271138984, "grad_norm": 1.3430699110031128, "learning_rate": 9.989342105263159e-05, "loss": 0.429, "step": 20415 }, { "epoch": 1.1432411244260274, "grad_norm": 1.5283825397491455, "learning_rate": 9.989315789473685e-05, "loss": 0.5953, "step": 20416 }, { "epoch": 1.1432971217381565, "grad_norm": 1.3737848997116089, "learning_rate": 9.989289473684211e-05, "loss": 0.4791, "step": 20417 }, { "epoch": 1.1433531190502855, "grad_norm": 1.2857928276062012, "learning_rate": 9.989263157894737e-05, "loss": 0.5354, "step": 20418 }, { "epoch": 1.1434091163624145, "grad_norm": 1.208808422088623, "learning_rate": 9.989236842105264e-05, "loss": 0.4551, "step": 20419 }, { "epoch": 1.1434651136745435, "grad_norm": 1.372432827949524, "learning_rate": 9.98921052631579e-05, "loss": 0.3604, "step": 20420 }, { "epoch": 1.1435211109866725, "grad_norm": 1.3018593788146973, "learning_rate": 9.989184210526316e-05, "loss": 0.3871, "step": 20421 }, { "epoch": 1.1435771082988015, "grad_norm": 1.326655387878418, "learning_rate": 9.989157894736842e-05, "loss": 0.3472, "step": 20422 }, { "epoch": 1.1436331056109306, "grad_norm": 1.8529084920883179, "learning_rate": 9.98913157894737e-05, "loss": 0.5448, "step": 20423 }, { "epoch": 1.1436891029230596, "grad_norm": 1.4630001783370972, "learning_rate": 9.989105263157896e-05, "loss": 0.4667, "step": 20424 }, { "epoch": 1.1437451002351886, "grad_norm": 1.5406907796859741, "learning_rate": 9.989078947368421e-05, "loss": 0.4948, "step": 20425 }, { "epoch": 1.1438010975473176, "grad_norm": 1.7303768396377563, "learning_rate": 9.989052631578947e-05, "loss": 0.4362, "step": 20426 }, { "epoch": 1.1438570948594466, "grad_norm": 1.3066902160644531, "learning_rate": 9.989026315789473e-05, "loss": 0.4374, "step": 20427 }, { "epoch": 1.1439130921715757, "grad_norm": 1.3579692840576172, "learning_rate": 9.989000000000001e-05, "loss": 0.4402, "step": 20428 }, { "epoch": 1.1439690894837047, "grad_norm": 1.7817784547805786, "learning_rate": 9.988973684210527e-05, "loss": 0.5121, "step": 20429 }, { "epoch": 1.1440250867958337, "grad_norm": 1.3389939069747925, "learning_rate": 9.988947368421053e-05, "loss": 0.4115, "step": 20430 }, { "epoch": 1.1440810841079627, "grad_norm": 1.320111632347107, "learning_rate": 9.988921052631579e-05, "loss": 0.4343, "step": 20431 }, { "epoch": 1.1441370814200917, "grad_norm": 1.242687463760376, "learning_rate": 9.988894736842106e-05, "loss": 0.3491, "step": 20432 }, { "epoch": 1.1441930787322208, "grad_norm": 1.3836536407470703, "learning_rate": 9.988868421052632e-05, "loss": 0.3486, "step": 20433 }, { "epoch": 1.1442490760443498, "grad_norm": 1.419258952140808, "learning_rate": 9.988842105263159e-05, "loss": 0.4177, "step": 20434 }, { "epoch": 1.1443050733564788, "grad_norm": 1.140465259552002, "learning_rate": 9.988815789473684e-05, "loss": 0.4119, "step": 20435 }, { "epoch": 1.1443610706686078, "grad_norm": 1.4027020931243896, "learning_rate": 9.988789473684211e-05, "loss": 0.3921, "step": 20436 }, { "epoch": 1.1444170679807368, "grad_norm": 1.3744571208953857, "learning_rate": 9.988763157894737e-05, "loss": 0.4064, "step": 20437 }, { "epoch": 1.1444730652928659, "grad_norm": 1.6248918771743774, "learning_rate": 9.988736842105265e-05, "loss": 0.5104, "step": 20438 }, { "epoch": 1.1445290626049949, "grad_norm": 1.2133638858795166, "learning_rate": 9.98871052631579e-05, "loss": 0.3985, "step": 20439 }, { "epoch": 1.144585059917124, "grad_norm": 1.2253282070159912, "learning_rate": 9.988684210526316e-05, "loss": 0.4253, "step": 20440 }, { "epoch": 1.144641057229253, "grad_norm": 1.3476512432098389, "learning_rate": 9.988657894736842e-05, "loss": 0.389, "step": 20441 }, { "epoch": 1.144697054541382, "grad_norm": 1.5335700511932373, "learning_rate": 9.98863157894737e-05, "loss": 0.3119, "step": 20442 }, { "epoch": 1.144753051853511, "grad_norm": 1.3555032014846802, "learning_rate": 9.988605263157896e-05, "loss": 0.5063, "step": 20443 }, { "epoch": 1.14480904916564, "grad_norm": 1.432375431060791, "learning_rate": 9.98857894736842e-05, "loss": 0.5124, "step": 20444 }, { "epoch": 1.144865046477769, "grad_norm": 1.4518640041351318, "learning_rate": 9.988552631578948e-05, "loss": 0.426, "step": 20445 }, { "epoch": 1.144921043789898, "grad_norm": 1.2916922569274902, "learning_rate": 9.988526315789474e-05, "loss": 0.4223, "step": 20446 }, { "epoch": 1.144977041102027, "grad_norm": 1.2688654661178589, "learning_rate": 9.988500000000001e-05, "loss": 0.3681, "step": 20447 }, { "epoch": 1.145033038414156, "grad_norm": 2.6093363761901855, "learning_rate": 9.988473684210527e-05, "loss": 0.4633, "step": 20448 }, { "epoch": 1.145089035726285, "grad_norm": 1.2063078880310059, "learning_rate": 9.988447368421053e-05, "loss": 0.3911, "step": 20449 }, { "epoch": 1.145145033038414, "grad_norm": 1.247310757637024, "learning_rate": 9.988421052631579e-05, "loss": 0.3752, "step": 20450 }, { "epoch": 1.145201030350543, "grad_norm": 1.5782006978988647, "learning_rate": 9.988394736842106e-05, "loss": 0.4034, "step": 20451 }, { "epoch": 1.1452570276626721, "grad_norm": 1.9347234964370728, "learning_rate": 9.988368421052632e-05, "loss": 0.5993, "step": 20452 }, { "epoch": 1.1453130249748011, "grad_norm": 1.2036774158477783, "learning_rate": 9.988342105263158e-05, "loss": 0.4467, "step": 20453 }, { "epoch": 1.1453690222869302, "grad_norm": 1.3685601949691772, "learning_rate": 9.988315789473684e-05, "loss": 0.4881, "step": 20454 }, { "epoch": 1.1454250195990592, "grad_norm": 2.390435218811035, "learning_rate": 9.988289473684212e-05, "loss": 0.4933, "step": 20455 }, { "epoch": 1.1454810169111882, "grad_norm": 1.2845882177352905, "learning_rate": 9.988263157894737e-05, "loss": 0.4687, "step": 20456 }, { "epoch": 1.1455370142233172, "grad_norm": 1.82760751247406, "learning_rate": 9.988236842105263e-05, "loss": 0.4768, "step": 20457 }, { "epoch": 1.1455930115354462, "grad_norm": 1.1389309167861938, "learning_rate": 9.98821052631579e-05, "loss": 0.4183, "step": 20458 }, { "epoch": 1.1456490088475753, "grad_norm": 1.4520819187164307, "learning_rate": 9.988184210526317e-05, "loss": 0.3848, "step": 20459 }, { "epoch": 1.1457050061597043, "grad_norm": 1.26947021484375, "learning_rate": 9.988157894736843e-05, "loss": 0.4503, "step": 20460 }, { "epoch": 1.1457610034718333, "grad_norm": 1.2558846473693848, "learning_rate": 9.988131578947369e-05, "loss": 0.3364, "step": 20461 }, { "epoch": 1.1458170007839623, "grad_norm": 1.8046354055404663, "learning_rate": 9.988105263157895e-05, "loss": 0.6396, "step": 20462 }, { "epoch": 1.1458729980960913, "grad_norm": 1.3201208114624023, "learning_rate": 9.98807894736842e-05, "loss": 0.4427, "step": 20463 }, { "epoch": 1.1459289954082204, "grad_norm": 1.6858083009719849, "learning_rate": 9.988052631578948e-05, "loss": 0.5229, "step": 20464 }, { "epoch": 1.1459849927203494, "grad_norm": 1.1688151359558105, "learning_rate": 9.988026315789474e-05, "loss": 0.43, "step": 20465 }, { "epoch": 1.1460409900324784, "grad_norm": 1.6450614929199219, "learning_rate": 9.988000000000001e-05, "loss": 0.4932, "step": 20466 }, { "epoch": 1.1460969873446074, "grad_norm": 1.4939018487930298, "learning_rate": 9.987973684210526e-05, "loss": 0.4432, "step": 20467 }, { "epoch": 1.1461529846567364, "grad_norm": 1.5850712060928345, "learning_rate": 9.987947368421053e-05, "loss": 0.4831, "step": 20468 }, { "epoch": 1.1462089819688654, "grad_norm": 1.2346611022949219, "learning_rate": 9.987921052631579e-05, "loss": 0.4935, "step": 20469 }, { "epoch": 1.1462649792809945, "grad_norm": 1.5039423704147339, "learning_rate": 9.987894736842107e-05, "loss": 0.4986, "step": 20470 }, { "epoch": 1.1463209765931235, "grad_norm": 1.4204386472702026, "learning_rate": 9.987868421052632e-05, "loss": 0.3778, "step": 20471 }, { "epoch": 1.1463769739052525, "grad_norm": 1.4285898208618164, "learning_rate": 9.987842105263158e-05, "loss": 0.4692, "step": 20472 }, { "epoch": 1.1464329712173815, "grad_norm": 1.6953669786453247, "learning_rate": 9.987815789473684e-05, "loss": 0.3309, "step": 20473 }, { "epoch": 1.1464889685295105, "grad_norm": 1.5123811960220337, "learning_rate": 9.987789473684212e-05, "loss": 0.4395, "step": 20474 }, { "epoch": 1.1465449658416396, "grad_norm": 1.2125428915023804, "learning_rate": 9.987763157894738e-05, "loss": 0.3578, "step": 20475 }, { "epoch": 1.1466009631537686, "grad_norm": 1.2554445266723633, "learning_rate": 9.987736842105264e-05, "loss": 0.41, "step": 20476 }, { "epoch": 1.1466569604658976, "grad_norm": 1.1073169708251953, "learning_rate": 9.98771052631579e-05, "loss": 0.3199, "step": 20477 }, { "epoch": 1.1467129577780266, "grad_norm": 1.5877275466918945, "learning_rate": 9.987684210526316e-05, "loss": 0.4738, "step": 20478 }, { "epoch": 1.1467689550901556, "grad_norm": 1.2053771018981934, "learning_rate": 9.987657894736843e-05, "loss": 0.4658, "step": 20479 }, { "epoch": 1.1468249524022847, "grad_norm": 1.2012308835983276, "learning_rate": 9.987631578947369e-05, "loss": 0.4932, "step": 20480 }, { "epoch": 1.1468809497144137, "grad_norm": 1.4577291011810303, "learning_rate": 9.987605263157895e-05, "loss": 0.4037, "step": 20481 }, { "epoch": 1.1469369470265427, "grad_norm": 1.980048656463623, "learning_rate": 9.987578947368421e-05, "loss": 0.5397, "step": 20482 }, { "epoch": 1.1469929443386717, "grad_norm": 1.3027416467666626, "learning_rate": 9.987552631578948e-05, "loss": 0.4341, "step": 20483 }, { "epoch": 1.1470489416508007, "grad_norm": 1.3045016527175903, "learning_rate": 9.987526315789474e-05, "loss": 0.4104, "step": 20484 }, { "epoch": 1.1471049389629298, "grad_norm": 1.537992000579834, "learning_rate": 9.9875e-05, "loss": 0.554, "step": 20485 }, { "epoch": 1.1471609362750588, "grad_norm": 1.3715176582336426, "learning_rate": 9.987473684210526e-05, "loss": 0.4554, "step": 20486 }, { "epoch": 1.1472169335871878, "grad_norm": 1.564469337463379, "learning_rate": 9.987447368421053e-05, "loss": 0.4798, "step": 20487 }, { "epoch": 1.1472729308993168, "grad_norm": 1.3704088926315308, "learning_rate": 9.98742105263158e-05, "loss": 0.4266, "step": 20488 }, { "epoch": 1.1473289282114458, "grad_norm": 1.655909538269043, "learning_rate": 9.987394736842107e-05, "loss": 0.5037, "step": 20489 }, { "epoch": 1.1473849255235749, "grad_norm": 1.3380017280578613, "learning_rate": 9.987368421052631e-05, "loss": 0.5295, "step": 20490 }, { "epoch": 1.1474409228357039, "grad_norm": 1.4491338729858398, "learning_rate": 9.987342105263159e-05, "loss": 0.3792, "step": 20491 }, { "epoch": 1.1474969201478329, "grad_norm": 1.2327592372894287, "learning_rate": 9.987315789473685e-05, "loss": 0.3807, "step": 20492 }, { "epoch": 1.147552917459962, "grad_norm": 1.2314577102661133, "learning_rate": 9.987289473684212e-05, "loss": 0.3485, "step": 20493 }, { "epoch": 1.147608914772091, "grad_norm": 1.2795847654342651, "learning_rate": 9.987263157894738e-05, "loss": 0.4447, "step": 20494 }, { "epoch": 1.14766491208422, "grad_norm": 1.2632372379302979, "learning_rate": 9.987236842105263e-05, "loss": 0.4008, "step": 20495 }, { "epoch": 1.147720909396349, "grad_norm": 1.2883623838424683, "learning_rate": 9.98721052631579e-05, "loss": 0.4505, "step": 20496 }, { "epoch": 1.147776906708478, "grad_norm": 1.2738240957260132, "learning_rate": 9.987184210526316e-05, "loss": 0.3776, "step": 20497 }, { "epoch": 1.147832904020607, "grad_norm": 1.2178534269332886, "learning_rate": 9.987157894736843e-05, "loss": 0.3194, "step": 20498 }, { "epoch": 1.147888901332736, "grad_norm": 1.4835726022720337, "learning_rate": 9.987131578947368e-05, "loss": 0.383, "step": 20499 }, { "epoch": 1.147944898644865, "grad_norm": 1.263297438621521, "learning_rate": 9.987105263157895e-05, "loss": 0.3836, "step": 20500 }, { "epoch": 1.148000895956994, "grad_norm": 1.3925824165344238, "learning_rate": 9.987078947368421e-05, "loss": 0.4921, "step": 20501 }, { "epoch": 1.148056893269123, "grad_norm": 1.2069382667541504, "learning_rate": 9.987052631578948e-05, "loss": 0.4491, "step": 20502 }, { "epoch": 1.148112890581252, "grad_norm": 1.632684588432312, "learning_rate": 9.987026315789474e-05, "loss": 0.5105, "step": 20503 }, { "epoch": 1.1481688878933811, "grad_norm": 1.347937822341919, "learning_rate": 9.987e-05, "loss": 0.4321, "step": 20504 }, { "epoch": 1.1482248852055101, "grad_norm": 1.5664825439453125, "learning_rate": 9.986973684210526e-05, "loss": 0.4648, "step": 20505 }, { "epoch": 1.1482808825176392, "grad_norm": 1.4892882108688354, "learning_rate": 9.986947368421054e-05, "loss": 0.4631, "step": 20506 }, { "epoch": 1.1483368798297682, "grad_norm": 1.3442851305007935, "learning_rate": 9.98692105263158e-05, "loss": 0.5604, "step": 20507 }, { "epoch": 1.1483928771418972, "grad_norm": 1.3856630325317383, "learning_rate": 9.986894736842106e-05, "loss": 0.3672, "step": 20508 }, { "epoch": 1.1484488744540262, "grad_norm": 1.2508807182312012, "learning_rate": 9.986868421052632e-05, "loss": 0.4337, "step": 20509 }, { "epoch": 1.1485048717661552, "grad_norm": 1.9127460718154907, "learning_rate": 9.986842105263159e-05, "loss": 0.4016, "step": 20510 }, { "epoch": 1.1485608690782843, "grad_norm": 1.5735092163085938, "learning_rate": 9.986815789473685e-05, "loss": 0.5388, "step": 20511 }, { "epoch": 1.1486168663904133, "grad_norm": 1.1594423055648804, "learning_rate": 9.986789473684211e-05, "loss": 0.3916, "step": 20512 }, { "epoch": 1.1486728637025423, "grad_norm": 1.6421160697937012, "learning_rate": 9.986763157894737e-05, "loss": 0.5958, "step": 20513 }, { "epoch": 1.1487288610146713, "grad_norm": 1.5584092140197754, "learning_rate": 9.986736842105263e-05, "loss": 0.4277, "step": 20514 }, { "epoch": 1.1487848583268003, "grad_norm": 1.321160078048706, "learning_rate": 9.98671052631579e-05, "loss": 0.4582, "step": 20515 }, { "epoch": 1.1488408556389293, "grad_norm": 2.9948270320892334, "learning_rate": 9.986684210526316e-05, "loss": 0.4916, "step": 20516 }, { "epoch": 1.1488968529510584, "grad_norm": 1.7630518674850464, "learning_rate": 9.986657894736842e-05, "loss": 0.4982, "step": 20517 }, { "epoch": 1.1489528502631874, "grad_norm": 1.5841964483261108, "learning_rate": 9.986631578947368e-05, "loss": 0.4953, "step": 20518 }, { "epoch": 1.1490088475753164, "grad_norm": 1.4199410676956177, "learning_rate": 9.986605263157895e-05, "loss": 0.3478, "step": 20519 }, { "epoch": 1.1490648448874454, "grad_norm": 1.2151843309402466, "learning_rate": 9.986578947368421e-05, "loss": 0.3648, "step": 20520 }, { "epoch": 1.1491208421995744, "grad_norm": 1.4104688167572021, "learning_rate": 9.986552631578949e-05, "loss": 0.4732, "step": 20521 }, { "epoch": 1.1491768395117035, "grad_norm": 1.6134860515594482, "learning_rate": 9.986526315789473e-05, "loss": 0.4832, "step": 20522 }, { "epoch": 1.1492328368238325, "grad_norm": 1.2508913278579712, "learning_rate": 9.986500000000001e-05, "loss": 0.4126, "step": 20523 }, { "epoch": 1.1492888341359615, "grad_norm": 7.151427268981934, "learning_rate": 9.986473684210527e-05, "loss": 0.5626, "step": 20524 }, { "epoch": 1.1493448314480905, "grad_norm": 1.5135068893432617, "learning_rate": 9.986447368421054e-05, "loss": 0.5287, "step": 20525 }, { "epoch": 1.1494008287602195, "grad_norm": 1.3105688095092773, "learning_rate": 9.98642105263158e-05, "loss": 0.453, "step": 20526 }, { "epoch": 1.1494568260723486, "grad_norm": 1.422707438468933, "learning_rate": 9.986394736842106e-05, "loss": 0.4645, "step": 20527 }, { "epoch": 1.1495128233844776, "grad_norm": 1.54299795627594, "learning_rate": 9.986368421052632e-05, "loss": 0.497, "step": 20528 }, { "epoch": 1.1495688206966066, "grad_norm": 1.470493197441101, "learning_rate": 9.986342105263158e-05, "loss": 0.4095, "step": 20529 }, { "epoch": 1.1496248180087356, "grad_norm": 1.6525027751922607, "learning_rate": 9.986315789473685e-05, "loss": 0.4128, "step": 20530 }, { "epoch": 1.1496808153208646, "grad_norm": 1.5500452518463135, "learning_rate": 9.986289473684211e-05, "loss": 0.3531, "step": 20531 }, { "epoch": 1.1497368126329937, "grad_norm": 1.5278432369232178, "learning_rate": 9.986263157894737e-05, "loss": 0.6385, "step": 20532 }, { "epoch": 1.1497928099451227, "grad_norm": 1.5010216236114502, "learning_rate": 9.986236842105263e-05, "loss": 0.4497, "step": 20533 }, { "epoch": 1.1498488072572517, "grad_norm": 1.275917649269104, "learning_rate": 9.98621052631579e-05, "loss": 0.4798, "step": 20534 }, { "epoch": 1.1499048045693807, "grad_norm": 1.313026785850525, "learning_rate": 9.986184210526316e-05, "loss": 0.4179, "step": 20535 }, { "epoch": 1.1499608018815097, "grad_norm": 1.8314224481582642, "learning_rate": 9.986157894736842e-05, "loss": 0.3713, "step": 20536 }, { "epoch": 1.1500167991936388, "grad_norm": 1.3292884826660156, "learning_rate": 9.986131578947368e-05, "loss": 0.4248, "step": 20537 }, { "epoch": 1.1500727965057678, "grad_norm": 1.2726421356201172, "learning_rate": 9.986105263157896e-05, "loss": 0.3801, "step": 20538 }, { "epoch": 1.1501287938178968, "grad_norm": 1.2855806350708008, "learning_rate": 9.986078947368422e-05, "loss": 0.4511, "step": 20539 }, { "epoch": 1.1501847911300258, "grad_norm": 1.3955669403076172, "learning_rate": 9.986052631578948e-05, "loss": 0.5053, "step": 20540 }, { "epoch": 1.1502407884421548, "grad_norm": 1.2548118829727173, "learning_rate": 9.986026315789474e-05, "loss": 0.3955, "step": 20541 }, { "epoch": 1.1502967857542838, "grad_norm": 1.2880767583847046, "learning_rate": 9.986000000000001e-05, "loss": 0.3904, "step": 20542 }, { "epoch": 1.1503527830664129, "grad_norm": 1.262155294418335, "learning_rate": 9.985973684210527e-05, "loss": 0.4347, "step": 20543 }, { "epoch": 1.1504087803785419, "grad_norm": 1.4613949060440063, "learning_rate": 9.985947368421054e-05, "loss": 0.47, "step": 20544 }, { "epoch": 1.150464777690671, "grad_norm": 1.3043702840805054, "learning_rate": 9.985921052631579e-05, "loss": 0.3711, "step": 20545 }, { "epoch": 1.1505207750028, "grad_norm": 1.5589770078659058, "learning_rate": 9.985894736842105e-05, "loss": 0.3877, "step": 20546 }, { "epoch": 1.150576772314929, "grad_norm": 1.381598711013794, "learning_rate": 9.985868421052632e-05, "loss": 0.4932, "step": 20547 }, { "epoch": 1.150632769627058, "grad_norm": 1.4977377653121948, "learning_rate": 9.985842105263158e-05, "loss": 0.5111, "step": 20548 }, { "epoch": 1.150688766939187, "grad_norm": 2.3996832370758057, "learning_rate": 9.985815789473684e-05, "loss": 0.4615, "step": 20549 }, { "epoch": 1.150744764251316, "grad_norm": 1.307060956954956, "learning_rate": 9.98578947368421e-05, "loss": 0.5004, "step": 20550 }, { "epoch": 1.150800761563445, "grad_norm": 1.3533759117126465, "learning_rate": 9.985763157894737e-05, "loss": 0.3433, "step": 20551 }, { "epoch": 1.150856758875574, "grad_norm": 1.4080413579940796, "learning_rate": 9.985736842105263e-05, "loss": 0.3815, "step": 20552 }, { "epoch": 1.150912756187703, "grad_norm": 1.3012210130691528, "learning_rate": 9.985710526315791e-05, "loss": 0.529, "step": 20553 }, { "epoch": 1.150968753499832, "grad_norm": 1.5659222602844238, "learning_rate": 9.985684210526315e-05, "loss": 0.4356, "step": 20554 }, { "epoch": 1.151024750811961, "grad_norm": 1.316042423248291, "learning_rate": 9.985657894736843e-05, "loss": 0.4283, "step": 20555 }, { "epoch": 1.1510807481240901, "grad_norm": 1.5393595695495605, "learning_rate": 9.985631578947369e-05, "loss": 0.4821, "step": 20556 }, { "epoch": 1.1511367454362191, "grad_norm": 1.5968881845474243, "learning_rate": 9.985605263157896e-05, "loss": 0.5475, "step": 20557 }, { "epoch": 1.1511927427483482, "grad_norm": 1.4276247024536133, "learning_rate": 9.985578947368422e-05, "loss": 0.4981, "step": 20558 }, { "epoch": 1.1512487400604772, "grad_norm": 1.418346881866455, "learning_rate": 9.985552631578948e-05, "loss": 0.4105, "step": 20559 }, { "epoch": 1.1513047373726062, "grad_norm": 1.4498636722564697, "learning_rate": 9.985526315789474e-05, "loss": 0.4128, "step": 20560 }, { "epoch": 1.1513607346847352, "grad_norm": 2.0232760906219482, "learning_rate": 9.985500000000001e-05, "loss": 0.6095, "step": 20561 }, { "epoch": 1.1514167319968642, "grad_norm": 1.7056657075881958, "learning_rate": 9.985473684210527e-05, "loss": 0.3965, "step": 20562 }, { "epoch": 1.1514727293089932, "grad_norm": 1.3510469198226929, "learning_rate": 9.985447368421053e-05, "loss": 0.3811, "step": 20563 }, { "epoch": 1.1515287266211223, "grad_norm": 1.7105083465576172, "learning_rate": 9.985421052631579e-05, "loss": 0.4305, "step": 20564 }, { "epoch": 1.1515847239332513, "grad_norm": 1.1439239978790283, "learning_rate": 9.985394736842105e-05, "loss": 0.3628, "step": 20565 }, { "epoch": 1.1516407212453803, "grad_norm": 1.3918046951293945, "learning_rate": 9.985368421052632e-05, "loss": 0.4256, "step": 20566 }, { "epoch": 1.1516967185575093, "grad_norm": 1.1805944442749023, "learning_rate": 9.985342105263158e-05, "loss": 0.4436, "step": 20567 }, { "epoch": 1.1517527158696383, "grad_norm": 1.303356409072876, "learning_rate": 9.985315789473684e-05, "loss": 0.363, "step": 20568 }, { "epoch": 1.1518087131817674, "grad_norm": 9.980767250061035, "learning_rate": 9.98528947368421e-05, "loss": 0.4461, "step": 20569 }, { "epoch": 1.1518647104938964, "grad_norm": 1.2744040489196777, "learning_rate": 9.985263157894738e-05, "loss": 0.4167, "step": 20570 }, { "epoch": 1.1519207078060254, "grad_norm": 1.841589331626892, "learning_rate": 9.985236842105264e-05, "loss": 0.3982, "step": 20571 }, { "epoch": 1.1519767051181544, "grad_norm": 1.4686000347137451, "learning_rate": 9.98521052631579e-05, "loss": 0.4736, "step": 20572 }, { "epoch": 1.1520327024302834, "grad_norm": 2.5662789344787598, "learning_rate": 9.985184210526316e-05, "loss": 0.3899, "step": 20573 }, { "epoch": 1.1520886997424125, "grad_norm": 3.3797099590301514, "learning_rate": 9.985157894736843e-05, "loss": 0.661, "step": 20574 }, { "epoch": 1.1521446970545415, "grad_norm": 1.292691946029663, "learning_rate": 9.985131578947369e-05, "loss": 0.5272, "step": 20575 }, { "epoch": 1.1522006943666705, "grad_norm": 1.3117496967315674, "learning_rate": 9.985105263157896e-05, "loss": 0.42, "step": 20576 }, { "epoch": 1.1522566916787995, "grad_norm": 1.5081813335418701, "learning_rate": 9.985078947368421e-05, "loss": 0.6265, "step": 20577 }, { "epoch": 1.1523126889909285, "grad_norm": 1.4189059734344482, "learning_rate": 9.985052631578948e-05, "loss": 0.5678, "step": 20578 }, { "epoch": 1.1523686863030576, "grad_norm": 1.3685775995254517, "learning_rate": 9.985026315789474e-05, "loss": 0.449, "step": 20579 }, { "epoch": 1.1524246836151866, "grad_norm": 1.1283231973648071, "learning_rate": 9.985000000000001e-05, "loss": 0.3494, "step": 20580 }, { "epoch": 1.1524806809273156, "grad_norm": 1.373646855354309, "learning_rate": 9.984973684210527e-05, "loss": 0.5035, "step": 20581 }, { "epoch": 1.1525366782394446, "grad_norm": 1.3119111061096191, "learning_rate": 9.984947368421052e-05, "loss": 0.4533, "step": 20582 }, { "epoch": 1.1525926755515736, "grad_norm": 1.592946171760559, "learning_rate": 9.98492105263158e-05, "loss": 0.4196, "step": 20583 }, { "epoch": 1.1526486728637027, "grad_norm": 1.5488710403442383, "learning_rate": 9.984894736842105e-05, "loss": 0.5623, "step": 20584 }, { "epoch": 1.1527046701758317, "grad_norm": 1.075756549835205, "learning_rate": 9.984868421052633e-05, "loss": 0.295, "step": 20585 }, { "epoch": 1.1527606674879607, "grad_norm": 1.412971019744873, "learning_rate": 9.984842105263159e-05, "loss": 0.3412, "step": 20586 }, { "epoch": 1.1528166648000897, "grad_norm": 1.327127456665039, "learning_rate": 9.984815789473685e-05, "loss": 0.3978, "step": 20587 }, { "epoch": 1.1528726621122187, "grad_norm": 1.4128966331481934, "learning_rate": 9.98478947368421e-05, "loss": 0.513, "step": 20588 }, { "epoch": 1.1529286594243477, "grad_norm": 1.3311222791671753, "learning_rate": 9.984763157894738e-05, "loss": 0.3648, "step": 20589 }, { "epoch": 1.1529846567364768, "grad_norm": 1.5850287675857544, "learning_rate": 9.984736842105264e-05, "loss": 0.6009, "step": 20590 }, { "epoch": 1.1530406540486058, "grad_norm": 1.4272699356079102, "learning_rate": 9.98471052631579e-05, "loss": 0.4599, "step": 20591 }, { "epoch": 1.1530966513607348, "grad_norm": 1.2315454483032227, "learning_rate": 9.984684210526316e-05, "loss": 0.4101, "step": 20592 }, { "epoch": 1.1531526486728638, "grad_norm": 1.3531090021133423, "learning_rate": 9.984657894736843e-05, "loss": 0.5647, "step": 20593 }, { "epoch": 1.1532086459849928, "grad_norm": 1.4263765811920166, "learning_rate": 9.984631578947369e-05, "loss": 0.4509, "step": 20594 }, { "epoch": 1.1532646432971219, "grad_norm": 1.655713438987732, "learning_rate": 9.984605263157895e-05, "loss": 0.4888, "step": 20595 }, { "epoch": 1.1533206406092509, "grad_norm": 1.4959297180175781, "learning_rate": 9.984578947368421e-05, "loss": 0.4943, "step": 20596 }, { "epoch": 1.1533766379213797, "grad_norm": 1.14749276638031, "learning_rate": 9.984552631578948e-05, "loss": 0.3827, "step": 20597 }, { "epoch": 1.1534326352335087, "grad_norm": 1.2836191654205322, "learning_rate": 9.984526315789474e-05, "loss": 0.4814, "step": 20598 }, { "epoch": 1.1534886325456377, "grad_norm": 1.748325228691101, "learning_rate": 9.9845e-05, "loss": 0.4994, "step": 20599 }, { "epoch": 1.1535446298577667, "grad_norm": 1.253800392150879, "learning_rate": 9.984473684210526e-05, "loss": 0.3932, "step": 20600 }, { "epoch": 1.1536006271698958, "grad_norm": 1.2215180397033691, "learning_rate": 9.984447368421052e-05, "loss": 0.4599, "step": 20601 }, { "epoch": 1.1536566244820248, "grad_norm": 1.1674331426620483, "learning_rate": 9.98442105263158e-05, "loss": 0.3978, "step": 20602 }, { "epoch": 1.1537126217941538, "grad_norm": 1.3800455331802368, "learning_rate": 9.984394736842106e-05, "loss": 0.4135, "step": 20603 }, { "epoch": 1.1537686191062828, "grad_norm": 1.2307895421981812, "learning_rate": 9.984368421052632e-05, "loss": 0.4272, "step": 20604 }, { "epoch": 1.1538246164184118, "grad_norm": 1.4166638851165771, "learning_rate": 9.984342105263158e-05, "loss": 0.4664, "step": 20605 }, { "epoch": 1.1538806137305408, "grad_norm": 1.3545113801956177, "learning_rate": 9.984315789473685e-05, "loss": 0.4026, "step": 20606 }, { "epoch": 1.1539366110426699, "grad_norm": 1.3365097045898438, "learning_rate": 9.984289473684211e-05, "loss": 0.4322, "step": 20607 }, { "epoch": 1.1539926083547989, "grad_norm": 1.1657264232635498, "learning_rate": 9.984263157894738e-05, "loss": 0.3561, "step": 20608 }, { "epoch": 1.154048605666928, "grad_norm": 1.6107335090637207, "learning_rate": 9.984236842105263e-05, "loss": 0.4779, "step": 20609 }, { "epoch": 1.154104602979057, "grad_norm": 1.2185475826263428, "learning_rate": 9.98421052631579e-05, "loss": 0.4163, "step": 20610 }, { "epoch": 1.154160600291186, "grad_norm": 1.1416774988174438, "learning_rate": 9.984184210526316e-05, "loss": 0.3986, "step": 20611 }, { "epoch": 1.154216597603315, "grad_norm": 1.601789951324463, "learning_rate": 9.984157894736843e-05, "loss": 0.5496, "step": 20612 }, { "epoch": 1.154272594915444, "grad_norm": 1.754992127418518, "learning_rate": 9.98413157894737e-05, "loss": 0.4872, "step": 20613 }, { "epoch": 1.154328592227573, "grad_norm": 1.2180742025375366, "learning_rate": 9.984105263157895e-05, "loss": 0.5108, "step": 20614 }, { "epoch": 1.154384589539702, "grad_norm": 1.204809546470642, "learning_rate": 9.984078947368421e-05, "loss": 0.4099, "step": 20615 }, { "epoch": 1.154440586851831, "grad_norm": 1.3774548768997192, "learning_rate": 9.984052631578947e-05, "loss": 0.5555, "step": 20616 }, { "epoch": 1.15449658416396, "grad_norm": 1.548224925994873, "learning_rate": 9.984026315789475e-05, "loss": 0.4562, "step": 20617 }, { "epoch": 1.154552581476089, "grad_norm": 1.4371669292449951, "learning_rate": 9.984e-05, "loss": 0.4748, "step": 20618 }, { "epoch": 1.154608578788218, "grad_norm": 1.6385565996170044, "learning_rate": 9.983973684210527e-05, "loss": 0.5372, "step": 20619 }, { "epoch": 1.1546645761003471, "grad_norm": 1.3957754373550415, "learning_rate": 9.983947368421053e-05, "loss": 0.3931, "step": 20620 }, { "epoch": 1.1547205734124761, "grad_norm": 1.3508726358413696, "learning_rate": 9.98392105263158e-05, "loss": 0.591, "step": 20621 }, { "epoch": 1.1547765707246052, "grad_norm": 1.2779326438903809, "learning_rate": 9.983894736842106e-05, "loss": 0.4228, "step": 20622 }, { "epoch": 1.1548325680367342, "grad_norm": 1.6317874193191528, "learning_rate": 9.983868421052632e-05, "loss": 0.7641, "step": 20623 }, { "epoch": 1.1548885653488632, "grad_norm": 1.2334160804748535, "learning_rate": 9.983842105263158e-05, "loss": 0.3859, "step": 20624 }, { "epoch": 1.1549445626609922, "grad_norm": 1.157307744026184, "learning_rate": 9.983815789473685e-05, "loss": 0.3748, "step": 20625 }, { "epoch": 1.1550005599731212, "grad_norm": 1.2137049436569214, "learning_rate": 9.983789473684211e-05, "loss": 0.3819, "step": 20626 }, { "epoch": 1.1550565572852503, "grad_norm": 1.1553492546081543, "learning_rate": 9.983763157894737e-05, "loss": 0.3774, "step": 20627 }, { "epoch": 1.1551125545973793, "grad_norm": 1.3425161838531494, "learning_rate": 9.983736842105263e-05, "loss": 0.359, "step": 20628 }, { "epoch": 1.1551685519095083, "grad_norm": 1.3786944150924683, "learning_rate": 9.98371052631579e-05, "loss": 0.4054, "step": 20629 }, { "epoch": 1.1552245492216373, "grad_norm": 1.623252511024475, "learning_rate": 9.983684210526316e-05, "loss": 0.3692, "step": 20630 }, { "epoch": 1.1552805465337663, "grad_norm": 1.4085582494735718, "learning_rate": 9.983657894736844e-05, "loss": 0.4427, "step": 20631 }, { "epoch": 1.1553365438458953, "grad_norm": 1.2663112878799438, "learning_rate": 9.983631578947368e-05, "loss": 0.3932, "step": 20632 }, { "epoch": 1.1553925411580244, "grad_norm": 1.4995399713516235, "learning_rate": 9.983605263157894e-05, "loss": 0.5565, "step": 20633 }, { "epoch": 1.1554485384701534, "grad_norm": 1.4257159233093262, "learning_rate": 9.983578947368422e-05, "loss": 0.5338, "step": 20634 }, { "epoch": 1.1555045357822824, "grad_norm": 1.286906361579895, "learning_rate": 9.983552631578948e-05, "loss": 0.4541, "step": 20635 }, { "epoch": 1.1555605330944114, "grad_norm": 1.3742042779922485, "learning_rate": 9.983526315789475e-05, "loss": 0.4161, "step": 20636 }, { "epoch": 1.1556165304065404, "grad_norm": 1.2167344093322754, "learning_rate": 9.9835e-05, "loss": 0.3699, "step": 20637 }, { "epoch": 1.1556725277186695, "grad_norm": 2.8422434329986572, "learning_rate": 9.983473684210527e-05, "loss": 0.4744, "step": 20638 }, { "epoch": 1.1557285250307985, "grad_norm": 1.3396075963974, "learning_rate": 9.983447368421053e-05, "loss": 0.4409, "step": 20639 }, { "epoch": 1.1557845223429275, "grad_norm": 1.3033193349838257, "learning_rate": 9.98342105263158e-05, "loss": 0.3278, "step": 20640 }, { "epoch": 1.1558405196550565, "grad_norm": 1.7807316780090332, "learning_rate": 9.983394736842106e-05, "loss": 0.407, "step": 20641 }, { "epoch": 1.1558965169671855, "grad_norm": 1.249908208847046, "learning_rate": 9.983368421052632e-05, "loss": 0.386, "step": 20642 }, { "epoch": 1.1559525142793146, "grad_norm": 1.384415864944458, "learning_rate": 9.983342105263158e-05, "loss": 0.4447, "step": 20643 }, { "epoch": 1.1560085115914436, "grad_norm": 1.1966683864593506, "learning_rate": 9.983315789473685e-05, "loss": 0.3824, "step": 20644 }, { "epoch": 1.1560645089035726, "grad_norm": 1.3831819295883179, "learning_rate": 9.983289473684211e-05, "loss": 0.4426, "step": 20645 }, { "epoch": 1.1561205062157016, "grad_norm": 1.4994510412216187, "learning_rate": 9.983263157894737e-05, "loss": 0.4747, "step": 20646 }, { "epoch": 1.1561765035278306, "grad_norm": 1.6412901878356934, "learning_rate": 9.983236842105263e-05, "loss": 0.4636, "step": 20647 }, { "epoch": 1.1562325008399597, "grad_norm": 1.3096891641616821, "learning_rate": 9.98321052631579e-05, "loss": 0.5052, "step": 20648 }, { "epoch": 1.1562884981520887, "grad_norm": 1.3484374284744263, "learning_rate": 9.983184210526317e-05, "loss": 0.453, "step": 20649 }, { "epoch": 1.1563444954642177, "grad_norm": 1.302478551864624, "learning_rate": 9.983157894736843e-05, "loss": 0.3929, "step": 20650 }, { "epoch": 1.1564004927763467, "grad_norm": 1.4896066188812256, "learning_rate": 9.983131578947369e-05, "loss": 0.5056, "step": 20651 }, { "epoch": 1.1564564900884757, "grad_norm": 1.3241114616394043, "learning_rate": 9.983105263157895e-05, "loss": 0.3247, "step": 20652 }, { "epoch": 1.1565124874006047, "grad_norm": 1.3620420694351196, "learning_rate": 9.983078947368422e-05, "loss": 0.4644, "step": 20653 }, { "epoch": 1.1565684847127338, "grad_norm": 1.547397255897522, "learning_rate": 9.983052631578948e-05, "loss": 0.346, "step": 20654 }, { "epoch": 1.1566244820248628, "grad_norm": 1.4957448244094849, "learning_rate": 9.983026315789474e-05, "loss": 0.5126, "step": 20655 }, { "epoch": 1.1566804793369918, "grad_norm": 1.281531810760498, "learning_rate": 9.983e-05, "loss": 0.3975, "step": 20656 }, { "epoch": 1.1567364766491208, "grad_norm": 1.444127082824707, "learning_rate": 9.982973684210527e-05, "loss": 0.4188, "step": 20657 }, { "epoch": 1.1567924739612498, "grad_norm": 1.3772447109222412, "learning_rate": 9.982947368421053e-05, "loss": 0.3654, "step": 20658 }, { "epoch": 1.1568484712733789, "grad_norm": 1.1936354637145996, "learning_rate": 9.982921052631579e-05, "loss": 0.3566, "step": 20659 }, { "epoch": 1.1569044685855079, "grad_norm": 1.5570268630981445, "learning_rate": 9.982894736842105e-05, "loss": 0.3684, "step": 20660 }, { "epoch": 1.156960465897637, "grad_norm": 1.8814806938171387, "learning_rate": 9.982868421052632e-05, "loss": 0.3629, "step": 20661 }, { "epoch": 1.157016463209766, "grad_norm": 1.587518334388733, "learning_rate": 9.982842105263158e-05, "loss": 0.4118, "step": 20662 }, { "epoch": 1.157072460521895, "grad_norm": 1.2009578943252563, "learning_rate": 9.982815789473686e-05, "loss": 0.359, "step": 20663 }, { "epoch": 1.157128457834024, "grad_norm": 1.4607332944869995, "learning_rate": 9.98278947368421e-05, "loss": 0.4445, "step": 20664 }, { "epoch": 1.157184455146153, "grad_norm": 2.0582501888275146, "learning_rate": 9.982763157894738e-05, "loss": 0.4486, "step": 20665 }, { "epoch": 1.157240452458282, "grad_norm": 1.9901195764541626, "learning_rate": 9.982736842105264e-05, "loss": 0.4279, "step": 20666 }, { "epoch": 1.157296449770411, "grad_norm": 2.280203342437744, "learning_rate": 9.98271052631579e-05, "loss": 0.5867, "step": 20667 }, { "epoch": 1.15735244708254, "grad_norm": 1.4749000072479248, "learning_rate": 9.982684210526317e-05, "loss": 0.4305, "step": 20668 }, { "epoch": 1.157408444394669, "grad_norm": 1.3941112756729126, "learning_rate": 9.982657894736842e-05, "loss": 0.5329, "step": 20669 }, { "epoch": 1.157464441706798, "grad_norm": 1.2100077867507935, "learning_rate": 9.982631578947369e-05, "loss": 0.4537, "step": 20670 }, { "epoch": 1.157520439018927, "grad_norm": 1.497073769569397, "learning_rate": 9.982605263157895e-05, "loss": 0.4575, "step": 20671 }, { "epoch": 1.1575764363310561, "grad_norm": 1.3474293947219849, "learning_rate": 9.982578947368422e-05, "loss": 0.4577, "step": 20672 }, { "epoch": 1.1576324336431851, "grad_norm": 1.2917795181274414, "learning_rate": 9.982552631578948e-05, "loss": 0.3241, "step": 20673 }, { "epoch": 1.1576884309553142, "grad_norm": 1.353052020072937, "learning_rate": 9.982526315789474e-05, "loss": 0.5881, "step": 20674 }, { "epoch": 1.1577444282674432, "grad_norm": 1.6958577632904053, "learning_rate": 9.9825e-05, "loss": 0.7388, "step": 20675 }, { "epoch": 1.1578004255795722, "grad_norm": 1.5526212453842163, "learning_rate": 9.982473684210527e-05, "loss": 0.4998, "step": 20676 }, { "epoch": 1.1578564228917012, "grad_norm": 1.227476716041565, "learning_rate": 9.982447368421053e-05, "loss": 0.4287, "step": 20677 }, { "epoch": 1.1579124202038302, "grad_norm": 1.4508161544799805, "learning_rate": 9.98242105263158e-05, "loss": 0.4634, "step": 20678 }, { "epoch": 1.1579684175159592, "grad_norm": 1.591030240058899, "learning_rate": 9.982394736842105e-05, "loss": 0.5235, "step": 20679 }, { "epoch": 1.1580244148280883, "grad_norm": 1.2589104175567627, "learning_rate": 9.982368421052633e-05, "loss": 0.4743, "step": 20680 }, { "epoch": 1.1580804121402173, "grad_norm": 1.3506782054901123, "learning_rate": 9.982342105263159e-05, "loss": 0.4832, "step": 20681 }, { "epoch": 1.1581364094523463, "grad_norm": 2.7089169025421143, "learning_rate": 9.982315789473685e-05, "loss": 0.3896, "step": 20682 }, { "epoch": 1.1581924067644753, "grad_norm": 1.3554126024246216, "learning_rate": 9.98228947368421e-05, "loss": 0.4197, "step": 20683 }, { "epoch": 1.1582484040766043, "grad_norm": 1.5139764547348022, "learning_rate": 9.982263157894738e-05, "loss": 0.4632, "step": 20684 }, { "epoch": 1.1583044013887334, "grad_norm": 1.5954136848449707, "learning_rate": 9.982236842105264e-05, "loss": 0.5863, "step": 20685 }, { "epoch": 1.1583603987008624, "grad_norm": 1.5175155401229858, "learning_rate": 9.98221052631579e-05, "loss": 0.3828, "step": 20686 }, { "epoch": 1.1584163960129914, "grad_norm": 1.2836077213287354, "learning_rate": 9.982184210526316e-05, "loss": 0.4393, "step": 20687 }, { "epoch": 1.1584723933251204, "grad_norm": 1.2348936796188354, "learning_rate": 9.982157894736842e-05, "loss": 0.4897, "step": 20688 }, { "epoch": 1.1585283906372494, "grad_norm": 1.135157823562622, "learning_rate": 9.982131578947369e-05, "loss": 0.375, "step": 20689 }, { "epoch": 1.1585843879493785, "grad_norm": 1.1472995281219482, "learning_rate": 9.982105263157895e-05, "loss": 0.4163, "step": 20690 }, { "epoch": 1.1586403852615075, "grad_norm": 1.7743520736694336, "learning_rate": 9.982078947368422e-05, "loss": 0.5812, "step": 20691 }, { "epoch": 1.1586963825736365, "grad_norm": 1.519546627998352, "learning_rate": 9.982052631578947e-05, "loss": 0.4854, "step": 20692 }, { "epoch": 1.1587523798857655, "grad_norm": 1.6588484048843384, "learning_rate": 9.982026315789474e-05, "loss": 0.4428, "step": 20693 }, { "epoch": 1.1588083771978945, "grad_norm": 1.4660861492156982, "learning_rate": 9.982e-05, "loss": 0.5322, "step": 20694 }, { "epoch": 1.1588643745100236, "grad_norm": 1.6412279605865479, "learning_rate": 9.981973684210528e-05, "loss": 0.3968, "step": 20695 }, { "epoch": 1.1589203718221526, "grad_norm": 1.2905727624893188, "learning_rate": 9.981947368421054e-05, "loss": 0.3578, "step": 20696 }, { "epoch": 1.1589763691342816, "grad_norm": 1.4095821380615234, "learning_rate": 9.98192105263158e-05, "loss": 0.5055, "step": 20697 }, { "epoch": 1.1590323664464106, "grad_norm": 1.3190431594848633, "learning_rate": 9.981894736842106e-05, "loss": 0.4439, "step": 20698 }, { "epoch": 1.1590883637585396, "grad_norm": 1.1977996826171875, "learning_rate": 9.981868421052633e-05, "loss": 0.3916, "step": 20699 }, { "epoch": 1.1591443610706686, "grad_norm": 1.425007700920105, "learning_rate": 9.981842105263159e-05, "loss": 0.3826, "step": 20700 }, { "epoch": 1.1592003583827977, "grad_norm": 1.4595378637313843, "learning_rate": 9.981815789473685e-05, "loss": 0.4243, "step": 20701 }, { "epoch": 1.1592563556949267, "grad_norm": 1.5491729974746704, "learning_rate": 9.981789473684211e-05, "loss": 0.5941, "step": 20702 }, { "epoch": 1.1593123530070557, "grad_norm": 1.4100964069366455, "learning_rate": 9.981763157894737e-05, "loss": 0.5043, "step": 20703 }, { "epoch": 1.1593683503191847, "grad_norm": 1.2770140171051025, "learning_rate": 9.981736842105264e-05, "loss": 0.4207, "step": 20704 }, { "epoch": 1.1594243476313137, "grad_norm": 2.4358294010162354, "learning_rate": 9.98171052631579e-05, "loss": 0.3678, "step": 20705 }, { "epoch": 1.1594803449434428, "grad_norm": 1.441752314567566, "learning_rate": 9.981684210526316e-05, "loss": 0.5353, "step": 20706 }, { "epoch": 1.1595363422555718, "grad_norm": 1.4326294660568237, "learning_rate": 9.981657894736842e-05, "loss": 0.4606, "step": 20707 }, { "epoch": 1.1595923395677008, "grad_norm": 1.4000003337860107, "learning_rate": 9.98163157894737e-05, "loss": 0.442, "step": 20708 }, { "epoch": 1.1596483368798298, "grad_norm": 1.3965154886245728, "learning_rate": 9.981605263157895e-05, "loss": 0.4879, "step": 20709 }, { "epoch": 1.1597043341919588, "grad_norm": 1.3141099214553833, "learning_rate": 9.981578947368421e-05, "loss": 0.3911, "step": 20710 }, { "epoch": 1.1597603315040879, "grad_norm": 1.27518630027771, "learning_rate": 9.981552631578947e-05, "loss": 0.3508, "step": 20711 }, { "epoch": 1.1598163288162169, "grad_norm": 1.3997254371643066, "learning_rate": 9.981526315789475e-05, "loss": 0.3492, "step": 20712 }, { "epoch": 1.159872326128346, "grad_norm": 1.1966441869735718, "learning_rate": 9.9815e-05, "loss": 0.4952, "step": 20713 }, { "epoch": 1.159928323440475, "grad_norm": 1.563307762145996, "learning_rate": 9.981473684210527e-05, "loss": 0.4078, "step": 20714 }, { "epoch": 1.159984320752604, "grad_norm": 1.7178142070770264, "learning_rate": 9.981447368421053e-05, "loss": 0.4521, "step": 20715 }, { "epoch": 1.160040318064733, "grad_norm": 1.086503028869629, "learning_rate": 9.98142105263158e-05, "loss": 0.3867, "step": 20716 }, { "epoch": 1.160096315376862, "grad_norm": 1.123307466506958, "learning_rate": 9.981394736842106e-05, "loss": 0.346, "step": 20717 }, { "epoch": 1.160152312688991, "grad_norm": 1.4775654077529907, "learning_rate": 9.981368421052633e-05, "loss": 0.5772, "step": 20718 }, { "epoch": 1.16020831000112, "grad_norm": 1.3776569366455078, "learning_rate": 9.981342105263158e-05, "loss": 0.441, "step": 20719 }, { "epoch": 1.160264307313249, "grad_norm": 1.5174689292907715, "learning_rate": 9.981315789473684e-05, "loss": 0.4709, "step": 20720 }, { "epoch": 1.160320304625378, "grad_norm": 1.0853722095489502, "learning_rate": 9.981289473684211e-05, "loss": 0.4545, "step": 20721 }, { "epoch": 1.160376301937507, "grad_norm": 1.148170828819275, "learning_rate": 9.981263157894737e-05, "loss": 0.368, "step": 20722 }, { "epoch": 1.160432299249636, "grad_norm": 1.2531620264053345, "learning_rate": 9.981236842105264e-05, "loss": 0.395, "step": 20723 }, { "epoch": 1.160488296561765, "grad_norm": 1.188633680343628, "learning_rate": 9.981210526315789e-05, "loss": 0.3, "step": 20724 }, { "epoch": 1.1605442938738941, "grad_norm": 1.3251160383224487, "learning_rate": 9.981184210526316e-05, "loss": 0.4416, "step": 20725 }, { "epoch": 1.1606002911860231, "grad_norm": 1.6037322282791138, "learning_rate": 9.981157894736842e-05, "loss": 0.4821, "step": 20726 }, { "epoch": 1.1606562884981522, "grad_norm": 1.3819395303726196, "learning_rate": 9.98113157894737e-05, "loss": 0.4847, "step": 20727 }, { "epoch": 1.1607122858102812, "grad_norm": 1.2603572607040405, "learning_rate": 9.981105263157896e-05, "loss": 0.401, "step": 20728 }, { "epoch": 1.1607682831224102, "grad_norm": 1.2100732326507568, "learning_rate": 9.981078947368422e-05, "loss": 0.5267, "step": 20729 }, { "epoch": 1.1608242804345392, "grad_norm": 2.2614517211914062, "learning_rate": 9.981052631578948e-05, "loss": 0.4493, "step": 20730 }, { "epoch": 1.1608802777466682, "grad_norm": 1.3757879734039307, "learning_rate": 9.981026315789475e-05, "loss": 0.4935, "step": 20731 }, { "epoch": 1.1609362750587973, "grad_norm": 1.8969056606292725, "learning_rate": 9.981000000000001e-05, "loss": 0.3687, "step": 20732 }, { "epoch": 1.1609922723709263, "grad_norm": 1.234013319015503, "learning_rate": 9.980973684210527e-05, "loss": 0.3627, "step": 20733 }, { "epoch": 1.1610482696830553, "grad_norm": 1.4058195352554321, "learning_rate": 9.980947368421053e-05, "loss": 0.4655, "step": 20734 }, { "epoch": 1.1611042669951843, "grad_norm": 1.357609748840332, "learning_rate": 9.98092105263158e-05, "loss": 0.4541, "step": 20735 }, { "epoch": 1.1611602643073133, "grad_norm": 1.443774938583374, "learning_rate": 9.980894736842106e-05, "loss": 0.4308, "step": 20736 }, { "epoch": 1.1612162616194424, "grad_norm": 1.4773916006088257, "learning_rate": 9.980868421052632e-05, "loss": 0.4524, "step": 20737 }, { "epoch": 1.1612722589315714, "grad_norm": 1.1345545053482056, "learning_rate": 9.980842105263158e-05, "loss": 0.489, "step": 20738 }, { "epoch": 1.1613282562437004, "grad_norm": 1.2463173866271973, "learning_rate": 9.980815789473684e-05, "loss": 0.3472, "step": 20739 }, { "epoch": 1.1613842535558294, "grad_norm": 1.7290700674057007, "learning_rate": 9.980789473684211e-05, "loss": 0.424, "step": 20740 }, { "epoch": 1.1614402508679584, "grad_norm": 1.5474966764450073, "learning_rate": 9.980763157894737e-05, "loss": 0.415, "step": 20741 }, { "epoch": 1.1614962481800872, "grad_norm": 1.5076913833618164, "learning_rate": 9.980736842105263e-05, "loss": 0.4521, "step": 20742 }, { "epoch": 1.1615522454922163, "grad_norm": 1.579359531402588, "learning_rate": 9.980710526315789e-05, "loss": 0.5414, "step": 20743 }, { "epoch": 1.1616082428043453, "grad_norm": 1.6798981428146362, "learning_rate": 9.980684210526317e-05, "loss": 0.3569, "step": 20744 }, { "epoch": 1.1616642401164743, "grad_norm": 1.8302743434906006, "learning_rate": 9.980657894736843e-05, "loss": 0.4196, "step": 20745 }, { "epoch": 1.1617202374286033, "grad_norm": 1.3661141395568848, "learning_rate": 9.98063157894737e-05, "loss": 0.4603, "step": 20746 }, { "epoch": 1.1617762347407323, "grad_norm": 1.3627281188964844, "learning_rate": 9.980605263157894e-05, "loss": 0.3453, "step": 20747 }, { "epoch": 1.1618322320528613, "grad_norm": 1.2853434085845947, "learning_rate": 9.980578947368422e-05, "loss": 0.3704, "step": 20748 }, { "epoch": 1.1618882293649904, "grad_norm": 1.2826396226882935, "learning_rate": 9.980552631578948e-05, "loss": 0.4098, "step": 20749 }, { "epoch": 1.1619442266771194, "grad_norm": 1.340430736541748, "learning_rate": 9.980526315789475e-05, "loss": 0.5137, "step": 20750 }, { "epoch": 1.1620002239892484, "grad_norm": 1.456680417060852, "learning_rate": 9.9805e-05, "loss": 0.4012, "step": 20751 }, { "epoch": 1.1620562213013774, "grad_norm": 1.3966798782348633, "learning_rate": 9.980473684210527e-05, "loss": 0.4541, "step": 20752 }, { "epoch": 1.1621122186135064, "grad_norm": 1.2240643501281738, "learning_rate": 9.980447368421053e-05, "loss": 0.51, "step": 20753 }, { "epoch": 1.1621682159256355, "grad_norm": 1.1132729053497314, "learning_rate": 9.980421052631579e-05, "loss": 0.3773, "step": 20754 }, { "epoch": 1.1622242132377645, "grad_norm": 1.4580998420715332, "learning_rate": 9.980394736842106e-05, "loss": 0.4872, "step": 20755 }, { "epoch": 1.1622802105498935, "grad_norm": 1.3250617980957031, "learning_rate": 9.980368421052631e-05, "loss": 0.3912, "step": 20756 }, { "epoch": 1.1623362078620225, "grad_norm": 1.5409741401672363, "learning_rate": 9.980342105263158e-05, "loss": 0.3819, "step": 20757 }, { "epoch": 1.1623922051741515, "grad_norm": 1.1932024955749512, "learning_rate": 9.980315789473684e-05, "loss": 0.4859, "step": 20758 }, { "epoch": 1.1624482024862806, "grad_norm": 1.4459530115127563, "learning_rate": 9.980289473684212e-05, "loss": 0.4208, "step": 20759 }, { "epoch": 1.1625041997984096, "grad_norm": 1.2691309452056885, "learning_rate": 9.980263157894738e-05, "loss": 0.4219, "step": 20760 }, { "epoch": 1.1625601971105386, "grad_norm": 1.4836251735687256, "learning_rate": 9.980236842105264e-05, "loss": 0.3515, "step": 20761 }, { "epoch": 1.1626161944226676, "grad_norm": 1.3650662899017334, "learning_rate": 9.98021052631579e-05, "loss": 0.4355, "step": 20762 }, { "epoch": 1.1626721917347966, "grad_norm": 1.3967193365097046, "learning_rate": 9.980184210526317e-05, "loss": 0.4403, "step": 20763 }, { "epoch": 1.1627281890469257, "grad_norm": 1.680420160293579, "learning_rate": 9.980157894736843e-05, "loss": 0.6033, "step": 20764 }, { "epoch": 1.1627841863590547, "grad_norm": 1.2812414169311523, "learning_rate": 9.980131578947369e-05, "loss": 0.3003, "step": 20765 }, { "epoch": 1.1628401836711837, "grad_norm": 1.4538753032684326, "learning_rate": 9.980105263157895e-05, "loss": 0.4283, "step": 20766 }, { "epoch": 1.1628961809833127, "grad_norm": 1.4691946506500244, "learning_rate": 9.980078947368422e-05, "loss": 0.4283, "step": 20767 }, { "epoch": 1.1629521782954417, "grad_norm": 1.609779953956604, "learning_rate": 9.980052631578948e-05, "loss": 0.4358, "step": 20768 }, { "epoch": 1.1630081756075707, "grad_norm": 1.4415971040725708, "learning_rate": 9.980026315789474e-05, "loss": 0.5102, "step": 20769 }, { "epoch": 1.1630641729196998, "grad_norm": 1.41593337059021, "learning_rate": 9.98e-05, "loss": 0.4327, "step": 20770 }, { "epoch": 1.1631201702318288, "grad_norm": 5.720890045166016, "learning_rate": 9.979973684210526e-05, "loss": 0.5614, "step": 20771 }, { "epoch": 1.1631761675439578, "grad_norm": 1.302211880683899, "learning_rate": 9.979947368421053e-05, "loss": 0.4625, "step": 20772 }, { "epoch": 1.1632321648560868, "grad_norm": 1.50058114528656, "learning_rate": 9.979921052631579e-05, "loss": 0.4799, "step": 20773 }, { "epoch": 1.1632881621682158, "grad_norm": 1.2870792150497437, "learning_rate": 9.979894736842105e-05, "loss": 0.4244, "step": 20774 }, { "epoch": 1.1633441594803449, "grad_norm": 1.3122467994689941, "learning_rate": 9.979868421052631e-05, "loss": 0.3458, "step": 20775 }, { "epoch": 1.1634001567924739, "grad_norm": 2.0894978046417236, "learning_rate": 9.979842105263159e-05, "loss": 0.5191, "step": 20776 }, { "epoch": 1.163456154104603, "grad_norm": 1.4296149015426636, "learning_rate": 9.979815789473685e-05, "loss": 0.4258, "step": 20777 }, { "epoch": 1.163512151416732, "grad_norm": 1.4373489618301392, "learning_rate": 9.979789473684212e-05, "loss": 0.4261, "step": 20778 }, { "epoch": 1.163568148728861, "grad_norm": 1.5073524713516235, "learning_rate": 9.979763157894736e-05, "loss": 0.4356, "step": 20779 }, { "epoch": 1.16362414604099, "grad_norm": 1.6112574338912964, "learning_rate": 9.979736842105264e-05, "loss": 0.606, "step": 20780 }, { "epoch": 1.163680143353119, "grad_norm": 1.4365694522857666, "learning_rate": 9.97971052631579e-05, "loss": 0.454, "step": 20781 }, { "epoch": 1.163736140665248, "grad_norm": 1.2466692924499512, "learning_rate": 9.979684210526317e-05, "loss": 0.3782, "step": 20782 }, { "epoch": 1.163792137977377, "grad_norm": 1.4135067462921143, "learning_rate": 9.979657894736843e-05, "loss": 0.5012, "step": 20783 }, { "epoch": 1.163848135289506, "grad_norm": 1.2118281126022339, "learning_rate": 9.979631578947369e-05, "loss": 0.4247, "step": 20784 }, { "epoch": 1.163904132601635, "grad_norm": 1.41605544090271, "learning_rate": 9.979605263157895e-05, "loss": 0.4478, "step": 20785 }, { "epoch": 1.163960129913764, "grad_norm": 1.5420175790786743, "learning_rate": 9.979578947368422e-05, "loss": 0.4679, "step": 20786 }, { "epoch": 1.164016127225893, "grad_norm": 1.2664000988006592, "learning_rate": 9.979552631578948e-05, "loss": 0.4349, "step": 20787 }, { "epoch": 1.1640721245380221, "grad_norm": 1.2067443132400513, "learning_rate": 9.979526315789474e-05, "loss": 0.4742, "step": 20788 }, { "epoch": 1.1641281218501511, "grad_norm": 1.4331861734390259, "learning_rate": 9.9795e-05, "loss": 0.4219, "step": 20789 }, { "epoch": 1.1641841191622802, "grad_norm": 2.042576789855957, "learning_rate": 9.979473684210526e-05, "loss": 0.4347, "step": 20790 }, { "epoch": 1.1642401164744092, "grad_norm": 1.2423555850982666, "learning_rate": 9.979447368421054e-05, "loss": 0.4861, "step": 20791 }, { "epoch": 1.1642961137865382, "grad_norm": 1.7242858409881592, "learning_rate": 9.97942105263158e-05, "loss": 0.6151, "step": 20792 }, { "epoch": 1.1643521110986672, "grad_norm": 1.4775879383087158, "learning_rate": 9.979394736842105e-05, "loss": 0.4475, "step": 20793 }, { "epoch": 1.1644081084107962, "grad_norm": 1.215835452079773, "learning_rate": 9.979368421052631e-05, "loss": 0.4152, "step": 20794 }, { "epoch": 1.1644641057229252, "grad_norm": 1.7902518510818481, "learning_rate": 9.979342105263159e-05, "loss": 0.5051, "step": 20795 }, { "epoch": 1.1645201030350543, "grad_norm": 1.3555628061294556, "learning_rate": 9.979315789473685e-05, "loss": 0.4597, "step": 20796 }, { "epoch": 1.1645761003471833, "grad_norm": 1.3614206314086914, "learning_rate": 9.979289473684211e-05, "loss": 0.4464, "step": 20797 }, { "epoch": 1.1646320976593123, "grad_norm": 1.5785999298095703, "learning_rate": 9.979263157894737e-05, "loss": 0.494, "step": 20798 }, { "epoch": 1.1646880949714413, "grad_norm": 1.6326556205749512, "learning_rate": 9.979236842105264e-05, "loss": 0.5103, "step": 20799 }, { "epoch": 1.1647440922835703, "grad_norm": 1.3406559228897095, "learning_rate": 9.97921052631579e-05, "loss": 0.4389, "step": 20800 }, { "epoch": 1.1648000895956994, "grad_norm": 1.7493377923965454, "learning_rate": 9.979184210526317e-05, "loss": 0.5527, "step": 20801 }, { "epoch": 1.1648560869078284, "grad_norm": 1.2731801271438599, "learning_rate": 9.979157894736842e-05, "loss": 0.4271, "step": 20802 }, { "epoch": 1.1649120842199574, "grad_norm": 1.9941129684448242, "learning_rate": 9.979131578947369e-05, "loss": 0.4659, "step": 20803 }, { "epoch": 1.1649680815320864, "grad_norm": 1.557101845741272, "learning_rate": 9.979105263157895e-05, "loss": 0.5432, "step": 20804 }, { "epoch": 1.1650240788442154, "grad_norm": 1.1992847919464111, "learning_rate": 9.979078947368423e-05, "loss": 0.4119, "step": 20805 }, { "epoch": 1.1650800761563445, "grad_norm": 1.6048921346664429, "learning_rate": 9.979052631578947e-05, "loss": 0.5382, "step": 20806 }, { "epoch": 1.1651360734684735, "grad_norm": 1.4330283403396606, "learning_rate": 9.979026315789473e-05, "loss": 0.498, "step": 20807 }, { "epoch": 1.1651920707806025, "grad_norm": 1.2458750009536743, "learning_rate": 9.979e-05, "loss": 0.3971, "step": 20808 }, { "epoch": 1.1652480680927315, "grad_norm": 1.2125712633132935, "learning_rate": 9.978973684210526e-05, "loss": 0.3569, "step": 20809 }, { "epoch": 1.1653040654048605, "grad_norm": 1.4834809303283691, "learning_rate": 9.978947368421054e-05, "loss": 0.3777, "step": 20810 }, { "epoch": 1.1653600627169896, "grad_norm": 1.3419028520584106, "learning_rate": 9.978921052631578e-05, "loss": 0.4946, "step": 20811 }, { "epoch": 1.1654160600291186, "grad_norm": 1.4450846910476685, "learning_rate": 9.978894736842106e-05, "loss": 0.4897, "step": 20812 }, { "epoch": 1.1654720573412476, "grad_norm": 1.1625169515609741, "learning_rate": 9.978868421052632e-05, "loss": 0.3192, "step": 20813 }, { "epoch": 1.1655280546533766, "grad_norm": 1.422334909439087, "learning_rate": 9.978842105263159e-05, "loss": 0.413, "step": 20814 }, { "epoch": 1.1655840519655056, "grad_norm": 1.3449368476867676, "learning_rate": 9.978815789473685e-05, "loss": 0.477, "step": 20815 }, { "epoch": 1.1656400492776346, "grad_norm": 1.451908826828003, "learning_rate": 9.978789473684211e-05, "loss": 0.4372, "step": 20816 }, { "epoch": 1.1656960465897637, "grad_norm": 1.3651716709136963, "learning_rate": 9.978763157894737e-05, "loss": 0.4729, "step": 20817 }, { "epoch": 1.1657520439018927, "grad_norm": 1.708986759185791, "learning_rate": 9.978736842105264e-05, "loss": 0.4606, "step": 20818 }, { "epoch": 1.1658080412140217, "grad_norm": 1.6787852048873901, "learning_rate": 9.97871052631579e-05, "loss": 0.496, "step": 20819 }, { "epoch": 1.1658640385261507, "grad_norm": 1.1393404006958008, "learning_rate": 9.978684210526316e-05, "loss": 0.3712, "step": 20820 }, { "epoch": 1.1659200358382797, "grad_norm": 1.606389045715332, "learning_rate": 9.978657894736842e-05, "loss": 0.468, "step": 20821 }, { "epoch": 1.1659760331504088, "grad_norm": 1.4294898509979248, "learning_rate": 9.97863157894737e-05, "loss": 0.4755, "step": 20822 }, { "epoch": 1.1660320304625378, "grad_norm": 1.9825870990753174, "learning_rate": 9.978605263157896e-05, "loss": 0.4481, "step": 20823 }, { "epoch": 1.1660880277746668, "grad_norm": 1.1417534351348877, "learning_rate": 9.978578947368421e-05, "loss": 0.3835, "step": 20824 }, { "epoch": 1.1661440250867958, "grad_norm": 1.6894428730010986, "learning_rate": 9.978552631578947e-05, "loss": 0.6405, "step": 20825 }, { "epoch": 1.1662000223989248, "grad_norm": 1.5436031818389893, "learning_rate": 9.978526315789473e-05, "loss": 0.5195, "step": 20826 }, { "epoch": 1.1662560197110539, "grad_norm": 1.8321113586425781, "learning_rate": 9.978500000000001e-05, "loss": 0.4712, "step": 20827 }, { "epoch": 1.1663120170231829, "grad_norm": 1.4048885107040405, "learning_rate": 9.978473684210527e-05, "loss": 0.4554, "step": 20828 }, { "epoch": 1.166368014335312, "grad_norm": 1.2561533451080322, "learning_rate": 9.978447368421053e-05, "loss": 0.4446, "step": 20829 }, { "epoch": 1.166424011647441, "grad_norm": 1.5617958307266235, "learning_rate": 9.978421052631579e-05, "loss": 0.4572, "step": 20830 }, { "epoch": 1.16648000895957, "grad_norm": 2.4134650230407715, "learning_rate": 9.978394736842106e-05, "loss": 0.3244, "step": 20831 }, { "epoch": 1.166536006271699, "grad_norm": 2.2136783599853516, "learning_rate": 9.978368421052632e-05, "loss": 0.5679, "step": 20832 }, { "epoch": 1.166592003583828, "grad_norm": 1.5566372871398926, "learning_rate": 9.978342105263159e-05, "loss": 0.3798, "step": 20833 }, { "epoch": 1.166648000895957, "grad_norm": 1.5471076965332031, "learning_rate": 9.978315789473684e-05, "loss": 0.2983, "step": 20834 }, { "epoch": 1.166703998208086, "grad_norm": 1.1122899055480957, "learning_rate": 9.978289473684211e-05, "loss": 0.3883, "step": 20835 }, { "epoch": 1.166759995520215, "grad_norm": 1.3770607709884644, "learning_rate": 9.978263157894737e-05, "loss": 0.414, "step": 20836 }, { "epoch": 1.166815992832344, "grad_norm": 1.6381101608276367, "learning_rate": 9.978236842105265e-05, "loss": 0.4451, "step": 20837 }, { "epoch": 1.166871990144473, "grad_norm": 1.3374508619308472, "learning_rate": 9.97821052631579e-05, "loss": 0.6068, "step": 20838 }, { "epoch": 1.166927987456602, "grad_norm": 1.1922324895858765, "learning_rate": 9.978184210526317e-05, "loss": 0.4348, "step": 20839 }, { "epoch": 1.166983984768731, "grad_norm": 1.3648905754089355, "learning_rate": 9.978157894736842e-05, "loss": 0.5869, "step": 20840 }, { "epoch": 1.1670399820808601, "grad_norm": 1.3370119333267212, "learning_rate": 9.978131578947368e-05, "loss": 0.3223, "step": 20841 }, { "epoch": 1.1670959793929891, "grad_norm": 1.3336783647537231, "learning_rate": 9.978105263157896e-05, "loss": 0.3605, "step": 20842 }, { "epoch": 1.1671519767051182, "grad_norm": 1.1952753067016602, "learning_rate": 9.978078947368422e-05, "loss": 0.409, "step": 20843 }, { "epoch": 1.1672079740172472, "grad_norm": 1.3847600221633911, "learning_rate": 9.978052631578948e-05, "loss": 0.473, "step": 20844 }, { "epoch": 1.1672639713293762, "grad_norm": 1.280964970588684, "learning_rate": 9.978026315789474e-05, "loss": 0.4478, "step": 20845 }, { "epoch": 1.1673199686415052, "grad_norm": 1.550473928451538, "learning_rate": 9.978000000000001e-05, "loss": 0.3973, "step": 20846 }, { "epoch": 1.1673759659536342, "grad_norm": 1.2537569999694824, "learning_rate": 9.977973684210527e-05, "loss": 0.4855, "step": 20847 }, { "epoch": 1.1674319632657633, "grad_norm": 1.225132703781128, "learning_rate": 9.977947368421053e-05, "loss": 0.3958, "step": 20848 }, { "epoch": 1.1674879605778923, "grad_norm": 1.2652547359466553, "learning_rate": 9.977921052631579e-05, "loss": 0.4369, "step": 20849 }, { "epoch": 1.1675439578900213, "grad_norm": 1.440535306930542, "learning_rate": 9.977894736842106e-05, "loss": 0.5253, "step": 20850 }, { "epoch": 1.1675999552021503, "grad_norm": 1.3133693933486938, "learning_rate": 9.977868421052632e-05, "loss": 0.3711, "step": 20851 }, { "epoch": 1.1676559525142793, "grad_norm": 1.2892177104949951, "learning_rate": 9.977842105263158e-05, "loss": 0.4003, "step": 20852 }, { "epoch": 1.1677119498264084, "grad_norm": 1.1579002141952515, "learning_rate": 9.977815789473684e-05, "loss": 0.4041, "step": 20853 }, { "epoch": 1.1677679471385374, "grad_norm": 1.5492885112762451, "learning_rate": 9.977789473684212e-05, "loss": 0.4028, "step": 20854 }, { "epoch": 1.1678239444506664, "grad_norm": 1.4084981679916382, "learning_rate": 9.977763157894737e-05, "loss": 0.5682, "step": 20855 }, { "epoch": 1.1678799417627954, "grad_norm": 1.1969753503799438, "learning_rate": 9.977736842105265e-05, "loss": 0.4436, "step": 20856 }, { "epoch": 1.1679359390749244, "grad_norm": 1.3001867532730103, "learning_rate": 9.97771052631579e-05, "loss": 0.3737, "step": 20857 }, { "epoch": 1.1679919363870535, "grad_norm": 1.2896686792373657, "learning_rate": 9.977684210526315e-05, "loss": 0.3692, "step": 20858 }, { "epoch": 1.1680479336991825, "grad_norm": 1.4139043092727661, "learning_rate": 9.977657894736843e-05, "loss": 0.4735, "step": 20859 }, { "epoch": 1.1681039310113115, "grad_norm": 1.5470494031906128, "learning_rate": 9.977631578947369e-05, "loss": 0.5146, "step": 20860 }, { "epoch": 1.1681599283234405, "grad_norm": 1.162338376045227, "learning_rate": 9.977605263157895e-05, "loss": 0.3699, "step": 20861 }, { "epoch": 1.1682159256355695, "grad_norm": 1.4471659660339355, "learning_rate": 9.97757894736842e-05, "loss": 0.571, "step": 20862 }, { "epoch": 1.1682719229476985, "grad_norm": 1.1782408952713013, "learning_rate": 9.977552631578948e-05, "loss": 0.4531, "step": 20863 }, { "epoch": 1.1683279202598276, "grad_norm": 1.2059881687164307, "learning_rate": 9.977526315789474e-05, "loss": 0.4108, "step": 20864 }, { "epoch": 1.1683839175719566, "grad_norm": 1.3328951597213745, "learning_rate": 9.977500000000001e-05, "loss": 0.5086, "step": 20865 }, { "epoch": 1.1684399148840856, "grad_norm": 1.190344214439392, "learning_rate": 9.977473684210526e-05, "loss": 0.4355, "step": 20866 }, { "epoch": 1.1684959121962146, "grad_norm": 2.1711649894714355, "learning_rate": 9.977447368421053e-05, "loss": 0.404, "step": 20867 }, { "epoch": 1.1685519095083436, "grad_norm": 1.2574708461761475, "learning_rate": 9.977421052631579e-05, "loss": 0.3654, "step": 20868 }, { "epoch": 1.1686079068204727, "grad_norm": 1.353613018989563, "learning_rate": 9.977394736842107e-05, "loss": 0.396, "step": 20869 }, { "epoch": 1.1686639041326017, "grad_norm": 1.3925970792770386, "learning_rate": 9.977368421052633e-05, "loss": 0.5186, "step": 20870 }, { "epoch": 1.1687199014447307, "grad_norm": 1.6180205345153809, "learning_rate": 9.977342105263158e-05, "loss": 0.4202, "step": 20871 }, { "epoch": 1.1687758987568597, "grad_norm": 1.2593497037887573, "learning_rate": 9.977315789473684e-05, "loss": 0.4175, "step": 20872 }, { "epoch": 1.1688318960689887, "grad_norm": 1.4117871522903442, "learning_rate": 9.977289473684212e-05, "loss": 0.3986, "step": 20873 }, { "epoch": 1.1688878933811178, "grad_norm": 1.1125867366790771, "learning_rate": 9.977263157894738e-05, "loss": 0.296, "step": 20874 }, { "epoch": 1.1689438906932468, "grad_norm": 1.1166563034057617, "learning_rate": 9.977236842105264e-05, "loss": 0.3429, "step": 20875 }, { "epoch": 1.1689998880053758, "grad_norm": 1.1332788467407227, "learning_rate": 9.97721052631579e-05, "loss": 0.3374, "step": 20876 }, { "epoch": 1.1690558853175048, "grad_norm": 1.4194594621658325, "learning_rate": 9.977184210526316e-05, "loss": 0.3917, "step": 20877 }, { "epoch": 1.1691118826296338, "grad_norm": 1.3464772701263428, "learning_rate": 9.977157894736843e-05, "loss": 0.5524, "step": 20878 }, { "epoch": 1.1691678799417629, "grad_norm": 1.4248733520507812, "learning_rate": 9.977131578947369e-05, "loss": 0.6119, "step": 20879 }, { "epoch": 1.1692238772538919, "grad_norm": 1.4053601026535034, "learning_rate": 9.977105263157895e-05, "loss": 0.4178, "step": 20880 }, { "epoch": 1.169279874566021, "grad_norm": 1.308698058128357, "learning_rate": 9.977078947368421e-05, "loss": 0.4344, "step": 20881 }, { "epoch": 1.16933587187815, "grad_norm": 1.3528685569763184, "learning_rate": 9.977052631578948e-05, "loss": 0.5386, "step": 20882 }, { "epoch": 1.169391869190279, "grad_norm": 1.090651273727417, "learning_rate": 9.977026315789474e-05, "loss": 0.4617, "step": 20883 }, { "epoch": 1.169447866502408, "grad_norm": 2.5601022243499756, "learning_rate": 9.977e-05, "loss": 0.4759, "step": 20884 }, { "epoch": 1.169503863814537, "grad_norm": 1.2338849306106567, "learning_rate": 9.976973684210526e-05, "loss": 0.3925, "step": 20885 }, { "epoch": 1.169559861126666, "grad_norm": 1.5448415279388428, "learning_rate": 9.976947368421053e-05, "loss": 0.6681, "step": 20886 }, { "epoch": 1.169615858438795, "grad_norm": 1.7976655960083008, "learning_rate": 9.97692105263158e-05, "loss": 0.4577, "step": 20887 }, { "epoch": 1.169671855750924, "grad_norm": 1.1426849365234375, "learning_rate": 9.976894736842107e-05, "loss": 0.4016, "step": 20888 }, { "epoch": 1.169727853063053, "grad_norm": 1.3843919038772583, "learning_rate": 9.976868421052631e-05, "loss": 0.5273, "step": 20889 }, { "epoch": 1.169783850375182, "grad_norm": 1.341097116470337, "learning_rate": 9.976842105263159e-05, "loss": 0.4748, "step": 20890 }, { "epoch": 1.169839847687311, "grad_norm": 1.3938333988189697, "learning_rate": 9.976815789473685e-05, "loss": 0.586, "step": 20891 }, { "epoch": 1.16989584499944, "grad_norm": 1.2222189903259277, "learning_rate": 9.976789473684211e-05, "loss": 0.4304, "step": 20892 }, { "epoch": 1.1699518423115691, "grad_norm": 1.2287477254867554, "learning_rate": 9.976763157894738e-05, "loss": 0.3932, "step": 20893 }, { "epoch": 1.1700078396236981, "grad_norm": 1.4622381925582886, "learning_rate": 9.976736842105263e-05, "loss": 0.4012, "step": 20894 }, { "epoch": 1.1700638369358272, "grad_norm": 1.5046672821044922, "learning_rate": 9.97671052631579e-05, "loss": 0.5457, "step": 20895 }, { "epoch": 1.1701198342479562, "grad_norm": 1.8150392770767212, "learning_rate": 9.976684210526316e-05, "loss": 0.6839, "step": 20896 }, { "epoch": 1.1701758315600852, "grad_norm": 1.2193825244903564, "learning_rate": 9.976657894736843e-05, "loss": 0.4728, "step": 20897 }, { "epoch": 1.1702318288722142, "grad_norm": 1.1769912242889404, "learning_rate": 9.976631578947368e-05, "loss": 0.3759, "step": 20898 }, { "epoch": 1.1702878261843432, "grad_norm": 1.4072461128234863, "learning_rate": 9.976605263157895e-05, "loss": 0.3223, "step": 20899 }, { "epoch": 1.1703438234964723, "grad_norm": 1.457567572593689, "learning_rate": 9.976578947368421e-05, "loss": 0.5848, "step": 20900 }, { "epoch": 1.1703998208086013, "grad_norm": 1.1242045164108276, "learning_rate": 9.976552631578949e-05, "loss": 0.3526, "step": 20901 }, { "epoch": 1.1704558181207303, "grad_norm": 5.152798175811768, "learning_rate": 9.976526315789474e-05, "loss": 0.5271, "step": 20902 }, { "epoch": 1.1705118154328593, "grad_norm": 1.4303909540176392, "learning_rate": 9.9765e-05, "loss": 0.4677, "step": 20903 }, { "epoch": 1.1705678127449883, "grad_norm": 1.189496397972107, "learning_rate": 9.976473684210526e-05, "loss": 0.3779, "step": 20904 }, { "epoch": 1.1706238100571174, "grad_norm": 1.820021629333496, "learning_rate": 9.976447368421054e-05, "loss": 0.5416, "step": 20905 }, { "epoch": 1.1706798073692464, "grad_norm": 2.0237650871276855, "learning_rate": 9.97642105263158e-05, "loss": 0.6019, "step": 20906 }, { "epoch": 1.1707358046813754, "grad_norm": 1.4961506128311157, "learning_rate": 9.976394736842106e-05, "loss": 0.4167, "step": 20907 }, { "epoch": 1.1707918019935044, "grad_norm": 1.4235213994979858, "learning_rate": 9.976368421052632e-05, "loss": 0.4684, "step": 20908 }, { "epoch": 1.1708477993056334, "grad_norm": 1.7053412199020386, "learning_rate": 9.976342105263158e-05, "loss": 0.404, "step": 20909 }, { "epoch": 1.1709037966177624, "grad_norm": 1.4406343698501587, "learning_rate": 9.976315789473685e-05, "loss": 0.4105, "step": 20910 }, { "epoch": 1.1709597939298915, "grad_norm": 1.739396572113037, "learning_rate": 9.976289473684211e-05, "loss": 0.551, "step": 20911 }, { "epoch": 1.1710157912420205, "grad_norm": 1.2890076637268066, "learning_rate": 9.976263157894737e-05, "loss": 0.4523, "step": 20912 }, { "epoch": 1.1710717885541495, "grad_norm": 1.667888879776001, "learning_rate": 9.976236842105263e-05, "loss": 0.3999, "step": 20913 }, { "epoch": 1.1711277858662785, "grad_norm": 1.4856075048446655, "learning_rate": 9.97621052631579e-05, "loss": 0.5666, "step": 20914 }, { "epoch": 1.1711837831784075, "grad_norm": 1.2786290645599365, "learning_rate": 9.976184210526316e-05, "loss": 0.4664, "step": 20915 }, { "epoch": 1.1712397804905366, "grad_norm": 1.2476434707641602, "learning_rate": 9.976157894736842e-05, "loss": 0.4253, "step": 20916 }, { "epoch": 1.1712957778026656, "grad_norm": 1.4082708358764648, "learning_rate": 9.976131578947368e-05, "loss": 0.4569, "step": 20917 }, { "epoch": 1.1713517751147946, "grad_norm": 4.578914642333984, "learning_rate": 9.976105263157895e-05, "loss": 0.3723, "step": 20918 }, { "epoch": 1.1714077724269236, "grad_norm": 1.360572338104248, "learning_rate": 9.976078947368421e-05, "loss": 0.4925, "step": 20919 }, { "epoch": 1.1714637697390526, "grad_norm": 1.3907374143600464, "learning_rate": 9.976052631578949e-05, "loss": 0.5032, "step": 20920 }, { "epoch": 1.1715197670511817, "grad_norm": 1.2744126319885254, "learning_rate": 9.976026315789473e-05, "loss": 0.3994, "step": 20921 }, { "epoch": 1.1715757643633107, "grad_norm": 1.6579738855361938, "learning_rate": 9.976000000000001e-05, "loss": 0.5932, "step": 20922 }, { "epoch": 1.1716317616754397, "grad_norm": 1.3585381507873535, "learning_rate": 9.975973684210527e-05, "loss": 0.4995, "step": 20923 }, { "epoch": 1.1716877589875687, "grad_norm": 1.6113086938858032, "learning_rate": 9.975947368421054e-05, "loss": 0.3571, "step": 20924 }, { "epoch": 1.1717437562996977, "grad_norm": 1.8635375499725342, "learning_rate": 9.97592105263158e-05, "loss": 0.6094, "step": 20925 }, { "epoch": 1.1717997536118268, "grad_norm": 1.3696166276931763, "learning_rate": 9.975894736842106e-05, "loss": 0.5199, "step": 20926 }, { "epoch": 1.1718557509239558, "grad_norm": 1.7059210538864136, "learning_rate": 9.975868421052632e-05, "loss": 0.6103, "step": 20927 }, { "epoch": 1.1719117482360846, "grad_norm": 1.4262621402740479, "learning_rate": 9.975842105263158e-05, "loss": 0.5573, "step": 20928 }, { "epoch": 1.1719677455482136, "grad_norm": 1.6460988521575928, "learning_rate": 9.975815789473685e-05, "loss": 0.6694, "step": 20929 }, { "epoch": 1.1720237428603426, "grad_norm": 1.3685556650161743, "learning_rate": 9.975789473684211e-05, "loss": 0.4285, "step": 20930 }, { "epoch": 1.1720797401724716, "grad_norm": 1.4430515766143799, "learning_rate": 9.975763157894737e-05, "loss": 0.4029, "step": 20931 }, { "epoch": 1.1721357374846006, "grad_norm": 1.3129043579101562, "learning_rate": 9.975736842105263e-05, "loss": 0.4033, "step": 20932 }, { "epoch": 1.1721917347967297, "grad_norm": 1.3137214183807373, "learning_rate": 9.97571052631579e-05, "loss": 0.3447, "step": 20933 }, { "epoch": 1.1722477321088587, "grad_norm": 1.2377527952194214, "learning_rate": 9.975684210526316e-05, "loss": 0.4837, "step": 20934 }, { "epoch": 1.1723037294209877, "grad_norm": 1.4682475328445435, "learning_rate": 9.975657894736842e-05, "loss": 0.3254, "step": 20935 }, { "epoch": 1.1723597267331167, "grad_norm": 1.4442527294158936, "learning_rate": 9.975631578947368e-05, "loss": 0.5485, "step": 20936 }, { "epoch": 1.1724157240452457, "grad_norm": 1.2747081518173218, "learning_rate": 9.975605263157896e-05, "loss": 0.3679, "step": 20937 }, { "epoch": 1.1724717213573748, "grad_norm": 1.3651938438415527, "learning_rate": 9.975578947368422e-05, "loss": 0.5902, "step": 20938 }, { "epoch": 1.1725277186695038, "grad_norm": 1.3734140396118164, "learning_rate": 9.975552631578948e-05, "loss": 0.4437, "step": 20939 }, { "epoch": 1.1725837159816328, "grad_norm": 3.9022696018218994, "learning_rate": 9.975526315789474e-05, "loss": 0.4237, "step": 20940 }, { "epoch": 1.1726397132937618, "grad_norm": 1.5584402084350586, "learning_rate": 9.975500000000001e-05, "loss": 0.496, "step": 20941 }, { "epoch": 1.1726957106058908, "grad_norm": 1.225854516029358, "learning_rate": 9.975473684210527e-05, "loss": 0.4197, "step": 20942 }, { "epoch": 1.1727517079180199, "grad_norm": 1.3228957653045654, "learning_rate": 9.975447368421054e-05, "loss": 0.4086, "step": 20943 }, { "epoch": 1.1728077052301489, "grad_norm": 1.4241491556167603, "learning_rate": 9.975421052631579e-05, "loss": 0.5034, "step": 20944 }, { "epoch": 1.172863702542278, "grad_norm": 1.351749300956726, "learning_rate": 9.975394736842105e-05, "loss": 0.4078, "step": 20945 }, { "epoch": 1.172919699854407, "grad_norm": 1.266481637954712, "learning_rate": 9.975368421052632e-05, "loss": 0.4345, "step": 20946 }, { "epoch": 1.172975697166536, "grad_norm": 1.4176220893859863, "learning_rate": 9.975342105263158e-05, "loss": 0.5739, "step": 20947 }, { "epoch": 1.173031694478665, "grad_norm": 1.3308656215667725, "learning_rate": 9.975315789473685e-05, "loss": 0.4177, "step": 20948 }, { "epoch": 1.173087691790794, "grad_norm": 1.5689510107040405, "learning_rate": 9.97528947368421e-05, "loss": 0.49, "step": 20949 }, { "epoch": 1.173143689102923, "grad_norm": 1.4418233633041382, "learning_rate": 9.975263157894737e-05, "loss": 0.4703, "step": 20950 }, { "epoch": 1.173199686415052, "grad_norm": 1.4436392784118652, "learning_rate": 9.975236842105263e-05, "loss": 0.4242, "step": 20951 }, { "epoch": 1.173255683727181, "grad_norm": 1.423810362815857, "learning_rate": 9.975210526315791e-05, "loss": 0.3812, "step": 20952 }, { "epoch": 1.17331168103931, "grad_norm": 1.2401375770568848, "learning_rate": 9.975184210526315e-05, "loss": 0.4388, "step": 20953 }, { "epoch": 1.173367678351439, "grad_norm": 1.3293055295944214, "learning_rate": 9.975157894736843e-05, "loss": 0.4964, "step": 20954 }, { "epoch": 1.173423675663568, "grad_norm": 1.3621879816055298, "learning_rate": 9.975131578947369e-05, "loss": 0.4066, "step": 20955 }, { "epoch": 1.173479672975697, "grad_norm": 1.1478829383850098, "learning_rate": 9.975105263157896e-05, "loss": 0.3469, "step": 20956 }, { "epoch": 1.1735356702878261, "grad_norm": 1.63117253780365, "learning_rate": 9.975078947368422e-05, "loss": 0.6342, "step": 20957 }, { "epoch": 1.1735916675999551, "grad_norm": 1.3149467706680298, "learning_rate": 9.975052631578948e-05, "loss": 0.3948, "step": 20958 }, { "epoch": 1.1736476649120842, "grad_norm": 1.46638822555542, "learning_rate": 9.975026315789474e-05, "loss": 0.4828, "step": 20959 }, { "epoch": 1.1737036622242132, "grad_norm": 1.3919765949249268, "learning_rate": 9.975000000000001e-05, "loss": 0.4549, "step": 20960 }, { "epoch": 1.1737596595363422, "grad_norm": 1.3568085432052612, "learning_rate": 9.974973684210527e-05, "loss": 0.4909, "step": 20961 }, { "epoch": 1.1738156568484712, "grad_norm": 1.5416512489318848, "learning_rate": 9.974947368421053e-05, "loss": 0.6284, "step": 20962 }, { "epoch": 1.1738716541606002, "grad_norm": 1.1301103830337524, "learning_rate": 9.974921052631579e-05, "loss": 0.4017, "step": 20963 }, { "epoch": 1.1739276514727293, "grad_norm": 1.399775505065918, "learning_rate": 9.974894736842105e-05, "loss": 0.49, "step": 20964 }, { "epoch": 1.1739836487848583, "grad_norm": 1.6052110195159912, "learning_rate": 9.974868421052632e-05, "loss": 0.5099, "step": 20965 }, { "epoch": 1.1740396460969873, "grad_norm": 1.214698314666748, "learning_rate": 9.974842105263158e-05, "loss": 0.4041, "step": 20966 }, { "epoch": 1.1740956434091163, "grad_norm": 1.6586273908615112, "learning_rate": 9.974815789473684e-05, "loss": 0.4254, "step": 20967 }, { "epoch": 1.1741516407212453, "grad_norm": 1.307340383529663, "learning_rate": 9.97478947368421e-05, "loss": 0.4385, "step": 20968 }, { "epoch": 1.1742076380333744, "grad_norm": 1.3171969652175903, "learning_rate": 9.974763157894738e-05, "loss": 0.5378, "step": 20969 }, { "epoch": 1.1742636353455034, "grad_norm": 1.87435781955719, "learning_rate": 9.974736842105264e-05, "loss": 0.5037, "step": 20970 }, { "epoch": 1.1743196326576324, "grad_norm": 1.3954530954360962, "learning_rate": 9.97471052631579e-05, "loss": 0.5401, "step": 20971 }, { "epoch": 1.1743756299697614, "grad_norm": 1.3167064189910889, "learning_rate": 9.974684210526316e-05, "loss": 0.4602, "step": 20972 }, { "epoch": 1.1744316272818904, "grad_norm": 1.5326517820358276, "learning_rate": 9.974657894736843e-05, "loss": 0.4996, "step": 20973 }, { "epoch": 1.1744876245940195, "grad_norm": 1.471760869026184, "learning_rate": 9.974631578947369e-05, "loss": 0.3486, "step": 20974 }, { "epoch": 1.1745436219061485, "grad_norm": 1.2557377815246582, "learning_rate": 9.974605263157896e-05, "loss": 0.4018, "step": 20975 }, { "epoch": 1.1745996192182775, "grad_norm": 1.3838822841644287, "learning_rate": 9.974578947368421e-05, "loss": 0.4234, "step": 20976 }, { "epoch": 1.1746556165304065, "grad_norm": 1.2633038759231567, "learning_rate": 9.974552631578948e-05, "loss": 0.4, "step": 20977 }, { "epoch": 1.1747116138425355, "grad_norm": 1.2640166282653809, "learning_rate": 9.974526315789474e-05, "loss": 0.3977, "step": 20978 }, { "epoch": 1.1747676111546645, "grad_norm": 1.1059231758117676, "learning_rate": 9.9745e-05, "loss": 0.3712, "step": 20979 }, { "epoch": 1.1748236084667936, "grad_norm": 1.2637633085250854, "learning_rate": 9.974473684210527e-05, "loss": 0.5684, "step": 20980 }, { "epoch": 1.1748796057789226, "grad_norm": 1.5723506212234497, "learning_rate": 9.974447368421052e-05, "loss": 0.4166, "step": 20981 }, { "epoch": 1.1749356030910516, "grad_norm": 1.3939402103424072, "learning_rate": 9.97442105263158e-05, "loss": 0.4191, "step": 20982 }, { "epoch": 1.1749916004031806, "grad_norm": 1.2307891845703125, "learning_rate": 9.974394736842105e-05, "loss": 0.3587, "step": 20983 }, { "epoch": 1.1750475977153096, "grad_norm": 1.5806177854537964, "learning_rate": 9.974368421052633e-05, "loss": 0.5389, "step": 20984 }, { "epoch": 1.1751035950274387, "grad_norm": 1.2895103693008423, "learning_rate": 9.974342105263159e-05, "loss": 0.469, "step": 20985 }, { "epoch": 1.1751595923395677, "grad_norm": 1.590126395225525, "learning_rate": 9.974315789473685e-05, "loss": 0.5257, "step": 20986 }, { "epoch": 1.1752155896516967, "grad_norm": 1.3139359951019287, "learning_rate": 9.97428947368421e-05, "loss": 0.4209, "step": 20987 }, { "epoch": 1.1752715869638257, "grad_norm": 1.2274155616760254, "learning_rate": 9.974263157894738e-05, "loss": 0.4931, "step": 20988 }, { "epoch": 1.1753275842759547, "grad_norm": 1.2153725624084473, "learning_rate": 9.974236842105264e-05, "loss": 0.5214, "step": 20989 }, { "epoch": 1.1753835815880838, "grad_norm": 1.3513826131820679, "learning_rate": 9.97421052631579e-05, "loss": 0.4725, "step": 20990 }, { "epoch": 1.1754395789002128, "grad_norm": 1.201668381690979, "learning_rate": 9.974184210526316e-05, "loss": 0.5036, "step": 20991 }, { "epoch": 1.1754955762123418, "grad_norm": 1.4883041381835938, "learning_rate": 9.974157894736843e-05, "loss": 0.4364, "step": 20992 }, { "epoch": 1.1755515735244708, "grad_norm": 1.4872490167617798, "learning_rate": 9.974131578947369e-05, "loss": 0.446, "step": 20993 }, { "epoch": 1.1756075708365998, "grad_norm": 1.2378628253936768, "learning_rate": 9.974105263157895e-05, "loss": 0.4013, "step": 20994 }, { "epoch": 1.1756635681487289, "grad_norm": 1.235908031463623, "learning_rate": 9.974078947368421e-05, "loss": 0.3845, "step": 20995 }, { "epoch": 1.1757195654608579, "grad_norm": 1.1609922647476196, "learning_rate": 9.974052631578947e-05, "loss": 0.5331, "step": 20996 }, { "epoch": 1.175775562772987, "grad_norm": 1.4406769275665283, "learning_rate": 9.974026315789474e-05, "loss": 0.4935, "step": 20997 }, { "epoch": 1.175831560085116, "grad_norm": 1.3535735607147217, "learning_rate": 9.974e-05, "loss": 0.3864, "step": 20998 }, { "epoch": 1.175887557397245, "grad_norm": 1.3318134546279907, "learning_rate": 9.973973684210526e-05, "loss": 0.4113, "step": 20999 }, { "epoch": 1.175943554709374, "grad_norm": 1.5921144485473633, "learning_rate": 9.973947368421052e-05, "loss": 0.3982, "step": 21000 }, { "epoch": 1.175999552021503, "grad_norm": 1.7537912130355835, "learning_rate": 9.97392105263158e-05, "loss": 0.4266, "step": 21001 }, { "epoch": 1.176055549333632, "grad_norm": 1.2887009382247925, "learning_rate": 9.973894736842106e-05, "loss": 0.4111, "step": 21002 }, { "epoch": 1.176111546645761, "grad_norm": 1.4059815406799316, "learning_rate": 9.973868421052633e-05, "loss": 0.3887, "step": 21003 }, { "epoch": 1.17616754395789, "grad_norm": 1.3824046850204468, "learning_rate": 9.973842105263158e-05, "loss": 0.4825, "step": 21004 }, { "epoch": 1.176223541270019, "grad_norm": 2.390695571899414, "learning_rate": 9.973815789473685e-05, "loss": 0.5852, "step": 21005 }, { "epoch": 1.176279538582148, "grad_norm": 1.2309800386428833, "learning_rate": 9.973789473684211e-05, "loss": 0.3685, "step": 21006 }, { "epoch": 1.176335535894277, "grad_norm": 1.358850121498108, "learning_rate": 9.973763157894738e-05, "loss": 0.3702, "step": 21007 }, { "epoch": 1.176391533206406, "grad_norm": 2.115166187286377, "learning_rate": 9.973736842105263e-05, "loss": 0.5503, "step": 21008 }, { "epoch": 1.1764475305185351, "grad_norm": 1.2767928838729858, "learning_rate": 9.97371052631579e-05, "loss": 0.4729, "step": 21009 }, { "epoch": 1.1765035278306641, "grad_norm": 1.5280072689056396, "learning_rate": 9.973684210526316e-05, "loss": 0.3253, "step": 21010 }, { "epoch": 1.1765595251427932, "grad_norm": 1.5275877714157104, "learning_rate": 9.973657894736843e-05, "loss": 0.4214, "step": 21011 }, { "epoch": 1.1766155224549222, "grad_norm": 1.173607587814331, "learning_rate": 9.97363157894737e-05, "loss": 0.3094, "step": 21012 }, { "epoch": 1.1766715197670512, "grad_norm": 1.2253994941711426, "learning_rate": 9.973605263157894e-05, "loss": 0.4247, "step": 21013 }, { "epoch": 1.1767275170791802, "grad_norm": 1.1790021657943726, "learning_rate": 9.973578947368421e-05, "loss": 0.353, "step": 21014 }, { "epoch": 1.1767835143913092, "grad_norm": 1.2789949178695679, "learning_rate": 9.973552631578947e-05, "loss": 0.3371, "step": 21015 }, { "epoch": 1.1768395117034383, "grad_norm": 1.2025184631347656, "learning_rate": 9.973526315789475e-05, "loss": 0.474, "step": 21016 }, { "epoch": 1.1768955090155673, "grad_norm": 1.3859963417053223, "learning_rate": 9.9735e-05, "loss": 0.5285, "step": 21017 }, { "epoch": 1.1769515063276963, "grad_norm": 1.4734511375427246, "learning_rate": 9.973473684210527e-05, "loss": 0.4577, "step": 21018 }, { "epoch": 1.1770075036398253, "grad_norm": 1.6155064105987549, "learning_rate": 9.973447368421053e-05, "loss": 0.4629, "step": 21019 }, { "epoch": 1.1770635009519543, "grad_norm": 1.3375678062438965, "learning_rate": 9.97342105263158e-05, "loss": 0.3894, "step": 21020 }, { "epoch": 1.1771194982640834, "grad_norm": 1.2337805032730103, "learning_rate": 9.973394736842106e-05, "loss": 0.3927, "step": 21021 }, { "epoch": 1.1771754955762124, "grad_norm": 1.4827440977096558, "learning_rate": 9.973368421052632e-05, "loss": 0.4476, "step": 21022 }, { "epoch": 1.1772314928883414, "grad_norm": 1.2658737897872925, "learning_rate": 9.973342105263158e-05, "loss": 0.4027, "step": 21023 }, { "epoch": 1.1772874902004704, "grad_norm": 1.3625433444976807, "learning_rate": 9.973315789473685e-05, "loss": 0.4178, "step": 21024 }, { "epoch": 1.1773434875125994, "grad_norm": 1.4328525066375732, "learning_rate": 9.973289473684211e-05, "loss": 0.4612, "step": 21025 }, { "epoch": 1.1773994848247284, "grad_norm": 1.4753912687301636, "learning_rate": 9.973263157894737e-05, "loss": 0.4367, "step": 21026 }, { "epoch": 1.1774554821368575, "grad_norm": 1.2543789148330688, "learning_rate": 9.973236842105263e-05, "loss": 0.4802, "step": 21027 }, { "epoch": 1.1775114794489865, "grad_norm": 1.206464409828186, "learning_rate": 9.97321052631579e-05, "loss": 0.4139, "step": 21028 }, { "epoch": 1.1775674767611155, "grad_norm": 1.302475094795227, "learning_rate": 9.973184210526316e-05, "loss": 0.4461, "step": 21029 }, { "epoch": 1.1776234740732445, "grad_norm": 1.0936827659606934, "learning_rate": 9.973157894736842e-05, "loss": 0.4318, "step": 21030 }, { "epoch": 1.1776794713853735, "grad_norm": 1.2795578241348267, "learning_rate": 9.973131578947368e-05, "loss": 0.3422, "step": 21031 }, { "epoch": 1.1777354686975026, "grad_norm": 1.5770725011825562, "learning_rate": 9.973105263157894e-05, "loss": 0.4887, "step": 21032 }, { "epoch": 1.1777914660096316, "grad_norm": 1.2700650691986084, "learning_rate": 9.973078947368422e-05, "loss": 0.5678, "step": 21033 }, { "epoch": 1.1778474633217606, "grad_norm": 1.371291160583496, "learning_rate": 9.973052631578948e-05, "loss": 0.4998, "step": 21034 }, { "epoch": 1.1779034606338896, "grad_norm": 1.370296597480774, "learning_rate": 9.973026315789475e-05, "loss": 0.4893, "step": 21035 }, { "epoch": 1.1779594579460186, "grad_norm": 1.5093014240264893, "learning_rate": 9.973e-05, "loss": 0.4992, "step": 21036 }, { "epoch": 1.1780154552581477, "grad_norm": 1.3816587924957275, "learning_rate": 9.972973684210527e-05, "loss": 0.3892, "step": 21037 }, { "epoch": 1.1780714525702767, "grad_norm": 1.5106549263000488, "learning_rate": 9.972947368421053e-05, "loss": 0.4294, "step": 21038 }, { "epoch": 1.1781274498824057, "grad_norm": 1.2390880584716797, "learning_rate": 9.97292105263158e-05, "loss": 0.3547, "step": 21039 }, { "epoch": 1.1781834471945347, "grad_norm": 1.52642822265625, "learning_rate": 9.972894736842106e-05, "loss": 0.5257, "step": 21040 }, { "epoch": 1.1782394445066637, "grad_norm": 1.2675113677978516, "learning_rate": 9.972868421052632e-05, "loss": 0.4144, "step": 21041 }, { "epoch": 1.1782954418187928, "grad_norm": 1.4749233722686768, "learning_rate": 9.972842105263158e-05, "loss": 0.4099, "step": 21042 }, { "epoch": 1.1783514391309218, "grad_norm": 1.2252509593963623, "learning_rate": 9.972815789473685e-05, "loss": 0.4446, "step": 21043 }, { "epoch": 1.1784074364430508, "grad_norm": 1.230692982673645, "learning_rate": 9.972789473684211e-05, "loss": 0.4159, "step": 21044 }, { "epoch": 1.1784634337551798, "grad_norm": 1.4128847122192383, "learning_rate": 9.972763157894737e-05, "loss": 0.4486, "step": 21045 }, { "epoch": 1.1785194310673088, "grad_norm": 1.276092290878296, "learning_rate": 9.972736842105263e-05, "loss": 0.4334, "step": 21046 }, { "epoch": 1.1785754283794379, "grad_norm": 1.8333954811096191, "learning_rate": 9.97271052631579e-05, "loss": 0.5329, "step": 21047 }, { "epoch": 1.1786314256915669, "grad_norm": 1.220476746559143, "learning_rate": 9.972684210526317e-05, "loss": 0.425, "step": 21048 }, { "epoch": 1.1786874230036959, "grad_norm": 1.54896879196167, "learning_rate": 9.972657894736843e-05, "loss": 0.5269, "step": 21049 }, { "epoch": 1.178743420315825, "grad_norm": 1.5744624137878418, "learning_rate": 9.972631578947369e-05, "loss": 0.3989, "step": 21050 }, { "epoch": 1.178799417627954, "grad_norm": 1.5536669492721558, "learning_rate": 9.972605263157895e-05, "loss": 0.4169, "step": 21051 }, { "epoch": 1.178855414940083, "grad_norm": 1.5690683126449585, "learning_rate": 9.972578947368422e-05, "loss": 0.4538, "step": 21052 }, { "epoch": 1.178911412252212, "grad_norm": 1.5268265008926392, "learning_rate": 9.972552631578948e-05, "loss": 0.3558, "step": 21053 }, { "epoch": 1.178967409564341, "grad_norm": 1.5727897882461548, "learning_rate": 9.972526315789474e-05, "loss": 0.6762, "step": 21054 }, { "epoch": 1.17902340687647, "grad_norm": 1.347290277481079, "learning_rate": 9.9725e-05, "loss": 0.4515, "step": 21055 }, { "epoch": 1.179079404188599, "grad_norm": 1.4351496696472168, "learning_rate": 9.972473684210527e-05, "loss": 0.5595, "step": 21056 }, { "epoch": 1.179135401500728, "grad_norm": 1.2999906539916992, "learning_rate": 9.972447368421053e-05, "loss": 0.478, "step": 21057 }, { "epoch": 1.179191398812857, "grad_norm": 1.2803971767425537, "learning_rate": 9.97242105263158e-05, "loss": 0.503, "step": 21058 }, { "epoch": 1.179247396124986, "grad_norm": 2.6762804985046387, "learning_rate": 9.972394736842105e-05, "loss": 0.5161, "step": 21059 }, { "epoch": 1.179303393437115, "grad_norm": 1.2523714303970337, "learning_rate": 9.972368421052632e-05, "loss": 0.3979, "step": 21060 }, { "epoch": 1.1793593907492441, "grad_norm": 1.3335241079330444, "learning_rate": 9.972342105263158e-05, "loss": 0.424, "step": 21061 }, { "epoch": 1.1794153880613731, "grad_norm": 1.3811438083648682, "learning_rate": 9.972315789473686e-05, "loss": 0.4412, "step": 21062 }, { "epoch": 1.1794713853735022, "grad_norm": 1.3896058797836304, "learning_rate": 9.97228947368421e-05, "loss": 0.5693, "step": 21063 }, { "epoch": 1.1795273826856312, "grad_norm": 1.2304675579071045, "learning_rate": 9.972263157894738e-05, "loss": 0.3832, "step": 21064 }, { "epoch": 1.1795833799977602, "grad_norm": 1.9563837051391602, "learning_rate": 9.972236842105264e-05, "loss": 0.4691, "step": 21065 }, { "epoch": 1.1796393773098892, "grad_norm": 1.3286216259002686, "learning_rate": 9.97221052631579e-05, "loss": 0.446, "step": 21066 }, { "epoch": 1.1796953746220182, "grad_norm": 1.3801134824752808, "learning_rate": 9.972184210526317e-05, "loss": 0.5168, "step": 21067 }, { "epoch": 1.1797513719341473, "grad_norm": 1.3077819347381592, "learning_rate": 9.972157894736842e-05, "loss": 0.4792, "step": 21068 }, { "epoch": 1.1798073692462763, "grad_norm": 1.4988596439361572, "learning_rate": 9.972131578947369e-05, "loss": 0.5545, "step": 21069 }, { "epoch": 1.1798633665584053, "grad_norm": 1.228618860244751, "learning_rate": 9.972105263157895e-05, "loss": 0.407, "step": 21070 }, { "epoch": 1.1799193638705343, "grad_norm": 1.342016339302063, "learning_rate": 9.972078947368422e-05, "loss": 0.4125, "step": 21071 }, { "epoch": 1.1799753611826633, "grad_norm": 1.7181639671325684, "learning_rate": 9.972052631578948e-05, "loss": 0.3964, "step": 21072 }, { "epoch": 1.1800313584947921, "grad_norm": 1.7656135559082031, "learning_rate": 9.972026315789474e-05, "loss": 0.532, "step": 21073 }, { "epoch": 1.1800873558069211, "grad_norm": 1.4386123418807983, "learning_rate": 9.972e-05, "loss": 0.4891, "step": 21074 }, { "epoch": 1.1801433531190502, "grad_norm": 1.3285212516784668, "learning_rate": 9.971973684210527e-05, "loss": 0.4108, "step": 21075 }, { "epoch": 1.1801993504311792, "grad_norm": 1.3960325717926025, "learning_rate": 9.971947368421053e-05, "loss": 0.3639, "step": 21076 }, { "epoch": 1.1802553477433082, "grad_norm": 1.492347002029419, "learning_rate": 9.97192105263158e-05, "loss": 0.5306, "step": 21077 }, { "epoch": 1.1803113450554372, "grad_norm": 1.1340129375457764, "learning_rate": 9.971894736842105e-05, "loss": 0.3658, "step": 21078 }, { "epoch": 1.1803673423675662, "grad_norm": 1.175807237625122, "learning_rate": 9.971868421052633e-05, "loss": 0.344, "step": 21079 }, { "epoch": 1.1804233396796953, "grad_norm": 1.6824955940246582, "learning_rate": 9.971842105263159e-05, "loss": 0.5288, "step": 21080 }, { "epoch": 1.1804793369918243, "grad_norm": 1.2158433198928833, "learning_rate": 9.971815789473685e-05, "loss": 0.3972, "step": 21081 }, { "epoch": 1.1805353343039533, "grad_norm": 1.6033775806427002, "learning_rate": 9.97178947368421e-05, "loss": 0.4921, "step": 21082 }, { "epoch": 1.1805913316160823, "grad_norm": 1.1867610216140747, "learning_rate": 9.971763157894737e-05, "loss": 0.3832, "step": 21083 }, { "epoch": 1.1806473289282113, "grad_norm": 1.357165813446045, "learning_rate": 9.971736842105264e-05, "loss": 0.4266, "step": 21084 }, { "epoch": 1.1807033262403404, "grad_norm": 1.2187163829803467, "learning_rate": 9.97171052631579e-05, "loss": 0.4727, "step": 21085 }, { "epoch": 1.1807593235524694, "grad_norm": 1.299239993095398, "learning_rate": 9.971684210526316e-05, "loss": 0.4482, "step": 21086 }, { "epoch": 1.1808153208645984, "grad_norm": 1.2567377090454102, "learning_rate": 9.971657894736842e-05, "loss": 0.445, "step": 21087 }, { "epoch": 1.1808713181767274, "grad_norm": 1.2322558164596558, "learning_rate": 9.971631578947369e-05, "loss": 0.3458, "step": 21088 }, { "epoch": 1.1809273154888564, "grad_norm": 1.7484732866287231, "learning_rate": 9.971605263157895e-05, "loss": 0.4452, "step": 21089 }, { "epoch": 1.1809833128009855, "grad_norm": 1.2590219974517822, "learning_rate": 9.971578947368422e-05, "loss": 0.4281, "step": 21090 }, { "epoch": 1.1810393101131145, "grad_norm": 1.654700756072998, "learning_rate": 9.971552631578947e-05, "loss": 0.5242, "step": 21091 }, { "epoch": 1.1810953074252435, "grad_norm": 1.3631558418273926, "learning_rate": 9.971526315789474e-05, "loss": 0.4196, "step": 21092 }, { "epoch": 1.1811513047373725, "grad_norm": 1.3610966205596924, "learning_rate": 9.9715e-05, "loss": 0.4374, "step": 21093 }, { "epoch": 1.1812073020495015, "grad_norm": 1.3292502164840698, "learning_rate": 9.971473684210528e-05, "loss": 0.5613, "step": 21094 }, { "epoch": 1.1812632993616305, "grad_norm": 1.3234503269195557, "learning_rate": 9.971447368421054e-05, "loss": 0.4301, "step": 21095 }, { "epoch": 1.1813192966737596, "grad_norm": 1.2691986560821533, "learning_rate": 9.97142105263158e-05, "loss": 0.3603, "step": 21096 }, { "epoch": 1.1813752939858886, "grad_norm": 1.2768298387527466, "learning_rate": 9.971394736842106e-05, "loss": 0.3793, "step": 21097 }, { "epoch": 1.1814312912980176, "grad_norm": 1.067457914352417, "learning_rate": 9.971368421052633e-05, "loss": 0.5021, "step": 21098 }, { "epoch": 1.1814872886101466, "grad_norm": 1.585533857345581, "learning_rate": 9.971342105263159e-05, "loss": 0.7983, "step": 21099 }, { "epoch": 1.1815432859222756, "grad_norm": 1.301196575164795, "learning_rate": 9.971315789473683e-05, "loss": 0.4705, "step": 21100 }, { "epoch": 1.1815992832344047, "grad_norm": 1.3239070177078247, "learning_rate": 9.971289473684211e-05, "loss": 0.4195, "step": 21101 }, { "epoch": 1.1816552805465337, "grad_norm": 1.5636088848114014, "learning_rate": 9.971263157894737e-05, "loss": 0.544, "step": 21102 }, { "epoch": 1.1817112778586627, "grad_norm": 1.20651113986969, "learning_rate": 9.971236842105264e-05, "loss": 0.4805, "step": 21103 }, { "epoch": 1.1817672751707917, "grad_norm": 1.2414714097976685, "learning_rate": 9.97121052631579e-05, "loss": 0.3596, "step": 21104 }, { "epoch": 1.1818232724829207, "grad_norm": 1.429947853088379, "learning_rate": 9.971184210526316e-05, "loss": 0.4822, "step": 21105 }, { "epoch": 1.1818792697950498, "grad_norm": 1.3667641878128052, "learning_rate": 9.971157894736842e-05, "loss": 0.3992, "step": 21106 }, { "epoch": 1.1819352671071788, "grad_norm": 1.1207997798919678, "learning_rate": 9.97113157894737e-05, "loss": 0.3619, "step": 21107 }, { "epoch": 1.1819912644193078, "grad_norm": 1.050496220588684, "learning_rate": 9.971105263157895e-05, "loss": 0.3202, "step": 21108 }, { "epoch": 1.1820472617314368, "grad_norm": 1.368922472000122, "learning_rate": 9.971078947368421e-05, "loss": 0.585, "step": 21109 }, { "epoch": 1.1821032590435658, "grad_norm": 1.4292868375778198, "learning_rate": 9.971052631578947e-05, "loss": 0.3907, "step": 21110 }, { "epoch": 1.1821592563556949, "grad_norm": 1.2813223600387573, "learning_rate": 9.971026315789475e-05, "loss": 0.3623, "step": 21111 }, { "epoch": 1.1822152536678239, "grad_norm": 1.4284342527389526, "learning_rate": 9.971e-05, "loss": 0.4354, "step": 21112 }, { "epoch": 1.182271250979953, "grad_norm": 1.565657615661621, "learning_rate": 9.970973684210528e-05, "loss": 0.4779, "step": 21113 }, { "epoch": 1.182327248292082, "grad_norm": 1.3310754299163818, "learning_rate": 9.970947368421053e-05, "loss": 0.4205, "step": 21114 }, { "epoch": 1.182383245604211, "grad_norm": 1.2147763967514038, "learning_rate": 9.97092105263158e-05, "loss": 0.4249, "step": 21115 }, { "epoch": 1.18243924291634, "grad_norm": 1.3342591524124146, "learning_rate": 9.970894736842106e-05, "loss": 0.4208, "step": 21116 }, { "epoch": 1.182495240228469, "grad_norm": 1.507914662361145, "learning_rate": 9.970868421052632e-05, "loss": 0.6189, "step": 21117 }, { "epoch": 1.182551237540598, "grad_norm": 1.5464972257614136, "learning_rate": 9.970842105263158e-05, "loss": 0.5017, "step": 21118 }, { "epoch": 1.182607234852727, "grad_norm": 1.3173556327819824, "learning_rate": 9.970815789473684e-05, "loss": 0.4078, "step": 21119 }, { "epoch": 1.182663232164856, "grad_norm": 1.3701661825180054, "learning_rate": 9.970789473684211e-05, "loss": 0.5615, "step": 21120 }, { "epoch": 1.182719229476985, "grad_norm": 1.4368195533752441, "learning_rate": 9.970763157894737e-05, "loss": 0.4509, "step": 21121 }, { "epoch": 1.182775226789114, "grad_norm": 1.326619029045105, "learning_rate": 9.970736842105264e-05, "loss": 0.4387, "step": 21122 }, { "epoch": 1.182831224101243, "grad_norm": 1.341138243675232, "learning_rate": 9.970710526315789e-05, "loss": 0.4683, "step": 21123 }, { "epoch": 1.182887221413372, "grad_norm": 1.3173452615737915, "learning_rate": 9.970684210526316e-05, "loss": 0.4314, "step": 21124 }, { "epoch": 1.1829432187255011, "grad_norm": 1.32731032371521, "learning_rate": 9.970657894736842e-05, "loss": 0.431, "step": 21125 }, { "epoch": 1.1829992160376301, "grad_norm": 1.4874069690704346, "learning_rate": 9.97063157894737e-05, "loss": 0.5038, "step": 21126 }, { "epoch": 1.1830552133497592, "grad_norm": 1.3662054538726807, "learning_rate": 9.970605263157896e-05, "loss": 0.4337, "step": 21127 }, { "epoch": 1.1831112106618882, "grad_norm": 1.2005529403686523, "learning_rate": 9.970578947368422e-05, "loss": 0.3957, "step": 21128 }, { "epoch": 1.1831672079740172, "grad_norm": 1.4747321605682373, "learning_rate": 9.970552631578948e-05, "loss": 0.4642, "step": 21129 }, { "epoch": 1.1832232052861462, "grad_norm": 1.3802555799484253, "learning_rate": 9.970526315789475e-05, "loss": 0.4484, "step": 21130 }, { "epoch": 1.1832792025982752, "grad_norm": 1.442726731300354, "learning_rate": 9.970500000000001e-05, "loss": 0.4193, "step": 21131 }, { "epoch": 1.1833351999104043, "grad_norm": 1.507625937461853, "learning_rate": 9.970473684210527e-05, "loss": 0.5281, "step": 21132 }, { "epoch": 1.1833911972225333, "grad_norm": 1.5044076442718506, "learning_rate": 9.970447368421053e-05, "loss": 0.4702, "step": 21133 }, { "epoch": 1.1834471945346623, "grad_norm": 2.6730031967163086, "learning_rate": 9.970421052631579e-05, "loss": 0.338, "step": 21134 }, { "epoch": 1.1835031918467913, "grad_norm": 1.2748754024505615, "learning_rate": 9.970394736842106e-05, "loss": 0.5369, "step": 21135 }, { "epoch": 1.1835591891589203, "grad_norm": 1.3605180978775024, "learning_rate": 9.970368421052632e-05, "loss": 0.6243, "step": 21136 }, { "epoch": 1.1836151864710494, "grad_norm": 1.4342129230499268, "learning_rate": 9.970342105263158e-05, "loss": 0.3888, "step": 21137 }, { "epoch": 1.1836711837831784, "grad_norm": 1.2869508266448975, "learning_rate": 9.970315789473684e-05, "loss": 0.5126, "step": 21138 }, { "epoch": 1.1837271810953074, "grad_norm": 1.6351805925369263, "learning_rate": 9.970289473684211e-05, "loss": 0.5752, "step": 21139 }, { "epoch": 1.1837831784074364, "grad_norm": 1.45833158493042, "learning_rate": 9.970263157894737e-05, "loss": 0.3881, "step": 21140 }, { "epoch": 1.1838391757195654, "grad_norm": 1.2941596508026123, "learning_rate": 9.970236842105263e-05, "loss": 0.5191, "step": 21141 }, { "epoch": 1.1838951730316944, "grad_norm": 1.3377100229263306, "learning_rate": 9.970210526315789e-05, "loss": 0.3507, "step": 21142 }, { "epoch": 1.1839511703438235, "grad_norm": 1.2845380306243896, "learning_rate": 9.970184210526317e-05, "loss": 0.4726, "step": 21143 }, { "epoch": 1.1840071676559525, "grad_norm": 1.2385921478271484, "learning_rate": 9.970157894736843e-05, "loss": 0.4044, "step": 21144 }, { "epoch": 1.1840631649680815, "grad_norm": 1.1426398754119873, "learning_rate": 9.97013157894737e-05, "loss": 0.4968, "step": 21145 }, { "epoch": 1.1841191622802105, "grad_norm": 1.7909942865371704, "learning_rate": 9.970105263157895e-05, "loss": 0.3966, "step": 21146 }, { "epoch": 1.1841751595923395, "grad_norm": 2.319905996322632, "learning_rate": 9.970078947368422e-05, "loss": 0.5612, "step": 21147 }, { "epoch": 1.1842311569044686, "grad_norm": 1.4069215059280396, "learning_rate": 9.970052631578948e-05, "loss": 0.5357, "step": 21148 }, { "epoch": 1.1842871542165976, "grad_norm": 1.180469036102295, "learning_rate": 9.970026315789475e-05, "loss": 0.3794, "step": 21149 }, { "epoch": 1.1843431515287266, "grad_norm": 1.1504889726638794, "learning_rate": 9.970000000000001e-05, "loss": 0.3408, "step": 21150 }, { "epoch": 1.1843991488408556, "grad_norm": 1.4989124536514282, "learning_rate": 9.969973684210526e-05, "loss": 0.5002, "step": 21151 }, { "epoch": 1.1844551461529846, "grad_norm": 1.8440818786621094, "learning_rate": 9.969947368421053e-05, "loss": 0.6285, "step": 21152 }, { "epoch": 1.1845111434651137, "grad_norm": 7.958072662353516, "learning_rate": 9.969921052631579e-05, "loss": 0.3883, "step": 21153 }, { "epoch": 1.1845671407772427, "grad_norm": 1.494621992111206, "learning_rate": 9.969894736842106e-05, "loss": 0.5401, "step": 21154 }, { "epoch": 1.1846231380893717, "grad_norm": 1.2599050998687744, "learning_rate": 9.969868421052631e-05, "loss": 0.4195, "step": 21155 }, { "epoch": 1.1846791354015007, "grad_norm": 1.1688024997711182, "learning_rate": 9.969842105263158e-05, "loss": 0.3738, "step": 21156 }, { "epoch": 1.1847351327136297, "grad_norm": 1.5886930227279663, "learning_rate": 9.969815789473684e-05, "loss": 0.4661, "step": 21157 }, { "epoch": 1.1847911300257588, "grad_norm": 1.2828972339630127, "learning_rate": 9.969789473684212e-05, "loss": 0.3685, "step": 21158 }, { "epoch": 1.1848471273378878, "grad_norm": 1.2016386985778809, "learning_rate": 9.969763157894738e-05, "loss": 0.3928, "step": 21159 }, { "epoch": 1.1849031246500168, "grad_norm": 1.6821802854537964, "learning_rate": 9.969736842105264e-05, "loss": 0.4608, "step": 21160 }, { "epoch": 1.1849591219621458, "grad_norm": 1.7325512170791626, "learning_rate": 9.96971052631579e-05, "loss": 0.376, "step": 21161 }, { "epoch": 1.1850151192742748, "grad_norm": 1.6614699363708496, "learning_rate": 9.969684210526317e-05, "loss": 0.3121, "step": 21162 }, { "epoch": 1.1850711165864038, "grad_norm": 1.2217885255813599, "learning_rate": 9.969657894736843e-05, "loss": 0.3949, "step": 21163 }, { "epoch": 1.1851271138985329, "grad_norm": 1.9034477472305298, "learning_rate": 9.969631578947369e-05, "loss": 0.6209, "step": 21164 }, { "epoch": 1.1851831112106619, "grad_norm": 1.560486078262329, "learning_rate": 9.969605263157895e-05, "loss": 0.5011, "step": 21165 }, { "epoch": 1.185239108522791, "grad_norm": 1.3662017583847046, "learning_rate": 9.969578947368422e-05, "loss": 0.4632, "step": 21166 }, { "epoch": 1.18529510583492, "grad_norm": 1.2816357612609863, "learning_rate": 9.969552631578948e-05, "loss": 0.371, "step": 21167 }, { "epoch": 1.185351103147049, "grad_norm": 1.4631057977676392, "learning_rate": 9.969526315789474e-05, "loss": 0.491, "step": 21168 }, { "epoch": 1.185407100459178, "grad_norm": 1.1913096904754639, "learning_rate": 9.9695e-05, "loss": 0.4349, "step": 21169 }, { "epoch": 1.185463097771307, "grad_norm": 1.2965353727340698, "learning_rate": 9.969473684210526e-05, "loss": 0.4607, "step": 21170 }, { "epoch": 1.185519095083436, "grad_norm": 1.4136563539505005, "learning_rate": 9.969447368421053e-05, "loss": 0.4314, "step": 21171 }, { "epoch": 1.185575092395565, "grad_norm": 1.3343579769134521, "learning_rate": 9.969421052631579e-05, "loss": 0.4353, "step": 21172 }, { "epoch": 1.185631089707694, "grad_norm": 1.5607622861862183, "learning_rate": 9.969394736842105e-05, "loss": 0.4919, "step": 21173 }, { "epoch": 1.185687087019823, "grad_norm": 1.2649791240692139, "learning_rate": 9.969368421052631e-05, "loss": 0.5258, "step": 21174 }, { "epoch": 1.185743084331952, "grad_norm": 1.422687292098999, "learning_rate": 9.969342105263159e-05, "loss": 0.556, "step": 21175 }, { "epoch": 1.185799081644081, "grad_norm": 1.6056827306747437, "learning_rate": 9.969315789473685e-05, "loss": 0.439, "step": 21176 }, { "epoch": 1.1858550789562101, "grad_norm": 1.2976654767990112, "learning_rate": 9.969289473684212e-05, "loss": 0.4113, "step": 21177 }, { "epoch": 1.1859110762683391, "grad_norm": 1.3225038051605225, "learning_rate": 9.969263157894736e-05, "loss": 0.4313, "step": 21178 }, { "epoch": 1.1859670735804682, "grad_norm": 1.2887746095657349, "learning_rate": 9.969236842105264e-05, "loss": 0.4083, "step": 21179 }, { "epoch": 1.1860230708925972, "grad_norm": 1.6592974662780762, "learning_rate": 9.96921052631579e-05, "loss": 0.4067, "step": 21180 }, { "epoch": 1.1860790682047262, "grad_norm": 1.1698036193847656, "learning_rate": 9.969184210526317e-05, "loss": 0.4029, "step": 21181 }, { "epoch": 1.1861350655168552, "grad_norm": 1.4141706228256226, "learning_rate": 9.969157894736843e-05, "loss": 0.4824, "step": 21182 }, { "epoch": 1.1861910628289842, "grad_norm": 1.3743826150894165, "learning_rate": 9.969131578947369e-05, "loss": 0.4966, "step": 21183 }, { "epoch": 1.1862470601411133, "grad_norm": 1.381693720817566, "learning_rate": 9.969105263157895e-05, "loss": 0.5144, "step": 21184 }, { "epoch": 1.1863030574532423, "grad_norm": 1.380395770072937, "learning_rate": 9.969078947368422e-05, "loss": 0.3171, "step": 21185 }, { "epoch": 1.1863590547653713, "grad_norm": 1.3432729244232178, "learning_rate": 9.969052631578948e-05, "loss": 0.4076, "step": 21186 }, { "epoch": 1.1864150520775003, "grad_norm": 1.2328461408615112, "learning_rate": 9.969026315789474e-05, "loss": 0.3554, "step": 21187 }, { "epoch": 1.1864710493896293, "grad_norm": 1.296217918395996, "learning_rate": 9.969e-05, "loss": 0.5135, "step": 21188 }, { "epoch": 1.1865270467017583, "grad_norm": 35.66089630126953, "learning_rate": 9.968973684210526e-05, "loss": 0.6095, "step": 21189 }, { "epoch": 1.1865830440138874, "grad_norm": 1.3872572183609009, "learning_rate": 9.968947368421054e-05, "loss": 0.4508, "step": 21190 }, { "epoch": 1.1866390413260164, "grad_norm": 2.5166049003601074, "learning_rate": 9.96892105263158e-05, "loss": 0.6397, "step": 21191 }, { "epoch": 1.1866950386381454, "grad_norm": 1.4581425189971924, "learning_rate": 9.968894736842106e-05, "loss": 0.4259, "step": 21192 }, { "epoch": 1.1867510359502744, "grad_norm": 2.6536355018615723, "learning_rate": 9.968868421052631e-05, "loss": 0.5135, "step": 21193 }, { "epoch": 1.1868070332624034, "grad_norm": 1.2793033123016357, "learning_rate": 9.968842105263159e-05, "loss": 0.3964, "step": 21194 }, { "epoch": 1.1868630305745325, "grad_norm": 1.4136197566986084, "learning_rate": 9.968815789473685e-05, "loss": 0.5737, "step": 21195 }, { "epoch": 1.1869190278866615, "grad_norm": 1.2036255598068237, "learning_rate": 9.968789473684211e-05, "loss": 0.359, "step": 21196 }, { "epoch": 1.1869750251987905, "grad_norm": 1.5432047843933105, "learning_rate": 9.968763157894737e-05, "loss": 0.4279, "step": 21197 }, { "epoch": 1.1870310225109195, "grad_norm": 1.3293156623840332, "learning_rate": 9.968736842105264e-05, "loss": 0.3859, "step": 21198 }, { "epoch": 1.1870870198230485, "grad_norm": 1.7360200881958008, "learning_rate": 9.96871052631579e-05, "loss": 0.4776, "step": 21199 }, { "epoch": 1.1871430171351776, "grad_norm": 2.022270917892456, "learning_rate": 9.968684210526317e-05, "loss": 0.3391, "step": 21200 }, { "epoch": 1.1871990144473066, "grad_norm": 1.334336280822754, "learning_rate": 9.968657894736842e-05, "loss": 0.5049, "step": 21201 }, { "epoch": 1.1872550117594356, "grad_norm": 1.9071433544158936, "learning_rate": 9.968631578947369e-05, "loss": 0.3772, "step": 21202 }, { "epoch": 1.1873110090715646, "grad_norm": 1.2202825546264648, "learning_rate": 9.968605263157895e-05, "loss": 0.4446, "step": 21203 }, { "epoch": 1.1873670063836936, "grad_norm": 1.5728347301483154, "learning_rate": 9.968578947368421e-05, "loss": 0.5384, "step": 21204 }, { "epoch": 1.1874230036958227, "grad_norm": 1.3353360891342163, "learning_rate": 9.968552631578949e-05, "loss": 0.3864, "step": 21205 }, { "epoch": 1.1874790010079517, "grad_norm": 1.5640684366226196, "learning_rate": 9.968526315789473e-05, "loss": 0.439, "step": 21206 }, { "epoch": 1.1875349983200807, "grad_norm": 1.3092772960662842, "learning_rate": 9.9685e-05, "loss": 0.4114, "step": 21207 }, { "epoch": 1.1875909956322097, "grad_norm": 1.6550471782684326, "learning_rate": 9.968473684210526e-05, "loss": 0.4563, "step": 21208 }, { "epoch": 1.1876469929443387, "grad_norm": 1.2956535816192627, "learning_rate": 9.968447368421054e-05, "loss": 0.4703, "step": 21209 }, { "epoch": 1.1877029902564677, "grad_norm": 1.3555691242218018, "learning_rate": 9.968421052631578e-05, "loss": 0.5481, "step": 21210 }, { "epoch": 1.1877589875685968, "grad_norm": 1.2641892433166504, "learning_rate": 9.968394736842106e-05, "loss": 0.4387, "step": 21211 }, { "epoch": 1.1878149848807258, "grad_norm": 1.3119490146636963, "learning_rate": 9.968368421052632e-05, "loss": 0.3682, "step": 21212 }, { "epoch": 1.1878709821928548, "grad_norm": 1.429458737373352, "learning_rate": 9.968342105263159e-05, "loss": 0.4616, "step": 21213 }, { "epoch": 1.1879269795049838, "grad_norm": 1.39130437374115, "learning_rate": 9.968315789473685e-05, "loss": 0.4945, "step": 21214 }, { "epoch": 1.1879829768171128, "grad_norm": 1.6254644393920898, "learning_rate": 9.968289473684211e-05, "loss": 0.5361, "step": 21215 }, { "epoch": 1.1880389741292419, "grad_norm": 1.241886854171753, "learning_rate": 9.968263157894737e-05, "loss": 0.4756, "step": 21216 }, { "epoch": 1.1880949714413709, "grad_norm": 1.2532511949539185, "learning_rate": 9.968236842105264e-05, "loss": 0.4426, "step": 21217 }, { "epoch": 1.1881509687535, "grad_norm": 1.3908519744873047, "learning_rate": 9.96821052631579e-05, "loss": 0.5752, "step": 21218 }, { "epoch": 1.188206966065629, "grad_norm": 1.4657741785049438, "learning_rate": 9.968184210526316e-05, "loss": 0.4851, "step": 21219 }, { "epoch": 1.188262963377758, "grad_norm": 1.380887746810913, "learning_rate": 9.968157894736842e-05, "loss": 0.4347, "step": 21220 }, { "epoch": 1.188318960689887, "grad_norm": 1.3606361150741577, "learning_rate": 9.968131578947368e-05, "loss": 0.4537, "step": 21221 }, { "epoch": 1.188374958002016, "grad_norm": 1.447104573249817, "learning_rate": 9.968105263157896e-05, "loss": 0.4163, "step": 21222 }, { "epoch": 1.188430955314145, "grad_norm": 8.842304229736328, "learning_rate": 9.968078947368422e-05, "loss": 0.4515, "step": 21223 }, { "epoch": 1.188486952626274, "grad_norm": 1.6093858480453491, "learning_rate": 9.968052631578947e-05, "loss": 0.5563, "step": 21224 }, { "epoch": 1.188542949938403, "grad_norm": 1.502869963645935, "learning_rate": 9.968026315789473e-05, "loss": 0.4019, "step": 21225 }, { "epoch": 1.188598947250532, "grad_norm": 1.334153413772583, "learning_rate": 9.968000000000001e-05, "loss": 0.5447, "step": 21226 }, { "epoch": 1.188654944562661, "grad_norm": 1.394298791885376, "learning_rate": 9.967973684210527e-05, "loss": 0.4455, "step": 21227 }, { "epoch": 1.18871094187479, "grad_norm": 1.8458468914031982, "learning_rate": 9.967947368421053e-05, "loss": 0.4829, "step": 21228 }, { "epoch": 1.1887669391869191, "grad_norm": 1.3631985187530518, "learning_rate": 9.967921052631579e-05, "loss": 0.4603, "step": 21229 }, { "epoch": 1.1888229364990481, "grad_norm": 1.7145485877990723, "learning_rate": 9.967894736842106e-05, "loss": 0.4807, "step": 21230 }, { "epoch": 1.1888789338111772, "grad_norm": 1.5248175859451294, "learning_rate": 9.967868421052632e-05, "loss": 0.5424, "step": 21231 }, { "epoch": 1.1889349311233062, "grad_norm": 1.4054481983184814, "learning_rate": 9.96784210526316e-05, "loss": 0.3944, "step": 21232 }, { "epoch": 1.1889909284354352, "grad_norm": 1.3018110990524292, "learning_rate": 9.967815789473684e-05, "loss": 0.5388, "step": 21233 }, { "epoch": 1.1890469257475642, "grad_norm": 1.7539788484573364, "learning_rate": 9.967789473684211e-05, "loss": 0.5211, "step": 21234 }, { "epoch": 1.1891029230596932, "grad_norm": 1.2612608671188354, "learning_rate": 9.967763157894737e-05, "loss": 0.3915, "step": 21235 }, { "epoch": 1.1891589203718222, "grad_norm": 1.3768184185028076, "learning_rate": 9.967736842105265e-05, "loss": 0.4413, "step": 21236 }, { "epoch": 1.1892149176839513, "grad_norm": 1.557417392730713, "learning_rate": 9.96771052631579e-05, "loss": 0.5665, "step": 21237 }, { "epoch": 1.1892709149960803, "grad_norm": 1.3070082664489746, "learning_rate": 9.967684210526315e-05, "loss": 0.4983, "step": 21238 }, { "epoch": 1.1893269123082093, "grad_norm": 1.38764226436615, "learning_rate": 9.967657894736842e-05, "loss": 0.4311, "step": 21239 }, { "epoch": 1.1893829096203383, "grad_norm": 1.4594128131866455, "learning_rate": 9.967631578947368e-05, "loss": 0.5161, "step": 21240 }, { "epoch": 1.1894389069324673, "grad_norm": 1.1985621452331543, "learning_rate": 9.967605263157896e-05, "loss": 0.403, "step": 21241 }, { "epoch": 1.1894949042445964, "grad_norm": 1.1932530403137207, "learning_rate": 9.967578947368422e-05, "loss": 0.4045, "step": 21242 }, { "epoch": 1.1895509015567254, "grad_norm": 1.3379814624786377, "learning_rate": 9.967552631578948e-05, "loss": 0.42, "step": 21243 }, { "epoch": 1.1896068988688544, "grad_norm": 1.2524373531341553, "learning_rate": 9.967526315789474e-05, "loss": 0.3603, "step": 21244 }, { "epoch": 1.1896628961809834, "grad_norm": 1.932081699371338, "learning_rate": 9.967500000000001e-05, "loss": 0.5632, "step": 21245 }, { "epoch": 1.1897188934931124, "grad_norm": 1.4900124073028564, "learning_rate": 9.967473684210527e-05, "loss": 0.6733, "step": 21246 }, { "epoch": 1.1897748908052415, "grad_norm": 1.4772049188613892, "learning_rate": 9.967447368421053e-05, "loss": 0.5363, "step": 21247 }, { "epoch": 1.1898308881173705, "grad_norm": 1.3865453004837036, "learning_rate": 9.967421052631579e-05, "loss": 0.5008, "step": 21248 }, { "epoch": 1.1898868854294995, "grad_norm": 1.3593593835830688, "learning_rate": 9.967394736842106e-05, "loss": 0.439, "step": 21249 }, { "epoch": 1.1899428827416285, "grad_norm": 1.2697149515151978, "learning_rate": 9.967368421052632e-05, "loss": 0.391, "step": 21250 }, { "epoch": 1.1899988800537575, "grad_norm": 1.3371034860610962, "learning_rate": 9.967342105263158e-05, "loss": 0.447, "step": 21251 }, { "epoch": 1.1900548773658866, "grad_norm": 1.3540842533111572, "learning_rate": 9.967315789473684e-05, "loss": 0.466, "step": 21252 }, { "epoch": 1.1901108746780156, "grad_norm": 1.2161946296691895, "learning_rate": 9.967289473684212e-05, "loss": 0.4371, "step": 21253 }, { "epoch": 1.1901668719901446, "grad_norm": 1.0837620496749878, "learning_rate": 9.967263157894738e-05, "loss": 0.406, "step": 21254 }, { "epoch": 1.1902228693022736, "grad_norm": 1.256539225578308, "learning_rate": 9.967236842105263e-05, "loss": 0.4907, "step": 21255 }, { "epoch": 1.1902788666144026, "grad_norm": 1.5777192115783691, "learning_rate": 9.96721052631579e-05, "loss": 0.3872, "step": 21256 }, { "epoch": 1.1903348639265316, "grad_norm": 1.4282094240188599, "learning_rate": 9.967184210526315e-05, "loss": 0.5761, "step": 21257 }, { "epoch": 1.1903908612386607, "grad_norm": 1.4186253547668457, "learning_rate": 9.967157894736843e-05, "loss": 0.4186, "step": 21258 }, { "epoch": 1.1904468585507895, "grad_norm": 1.409191370010376, "learning_rate": 9.967131578947369e-05, "loss": 0.4498, "step": 21259 }, { "epoch": 1.1905028558629185, "grad_norm": 1.305984377861023, "learning_rate": 9.967105263157896e-05, "loss": 0.4489, "step": 21260 }, { "epoch": 1.1905588531750475, "grad_norm": 1.7411454916000366, "learning_rate": 9.96707894736842e-05, "loss": 0.4493, "step": 21261 }, { "epoch": 1.1906148504871765, "grad_norm": 1.277187466621399, "learning_rate": 9.967052631578948e-05, "loss": 0.4765, "step": 21262 }, { "epoch": 1.1906708477993055, "grad_norm": 1.2747749090194702, "learning_rate": 9.967026315789474e-05, "loss": 0.4391, "step": 21263 }, { "epoch": 1.1907268451114346, "grad_norm": 1.30585515499115, "learning_rate": 9.967000000000001e-05, "loss": 0.4503, "step": 21264 }, { "epoch": 1.1907828424235636, "grad_norm": 1.7725613117218018, "learning_rate": 9.966973684210526e-05, "loss": 0.6786, "step": 21265 }, { "epoch": 1.1908388397356926, "grad_norm": 1.4894185066223145, "learning_rate": 9.966947368421053e-05, "loss": 0.4221, "step": 21266 }, { "epoch": 1.1908948370478216, "grad_norm": 1.205344319343567, "learning_rate": 9.966921052631579e-05, "loss": 0.4327, "step": 21267 }, { "epoch": 1.1909508343599506, "grad_norm": 1.4172561168670654, "learning_rate": 9.966894736842107e-05, "loss": 0.4166, "step": 21268 }, { "epoch": 1.1910068316720797, "grad_norm": 1.1164103746414185, "learning_rate": 9.966868421052633e-05, "loss": 0.316, "step": 21269 }, { "epoch": 1.1910628289842087, "grad_norm": 1.3036106824874878, "learning_rate": 9.966842105263158e-05, "loss": 0.4415, "step": 21270 }, { "epoch": 1.1911188262963377, "grad_norm": 1.145293951034546, "learning_rate": 9.966815789473684e-05, "loss": 0.335, "step": 21271 }, { "epoch": 1.1911748236084667, "grad_norm": 1.4188669919967651, "learning_rate": 9.96678947368421e-05, "loss": 0.5391, "step": 21272 }, { "epoch": 1.1912308209205957, "grad_norm": 1.3428798913955688, "learning_rate": 9.966763157894738e-05, "loss": 0.4446, "step": 21273 }, { "epoch": 1.1912868182327248, "grad_norm": 1.8040522336959839, "learning_rate": 9.966736842105264e-05, "loss": 0.5103, "step": 21274 }, { "epoch": 1.1913428155448538, "grad_norm": 1.2655612230300903, "learning_rate": 9.96671052631579e-05, "loss": 0.4005, "step": 21275 }, { "epoch": 1.1913988128569828, "grad_norm": 1.1684056520462036, "learning_rate": 9.966684210526316e-05, "loss": 0.2977, "step": 21276 }, { "epoch": 1.1914548101691118, "grad_norm": 1.3208248615264893, "learning_rate": 9.966657894736843e-05, "loss": 0.5333, "step": 21277 }, { "epoch": 1.1915108074812408, "grad_norm": 1.7194039821624756, "learning_rate": 9.966631578947369e-05, "loss": 0.4584, "step": 21278 }, { "epoch": 1.1915668047933698, "grad_norm": 1.319769263267517, "learning_rate": 9.966605263157895e-05, "loss": 0.5436, "step": 21279 }, { "epoch": 1.1916228021054989, "grad_norm": 1.57979416847229, "learning_rate": 9.966578947368421e-05, "loss": 0.3921, "step": 21280 }, { "epoch": 1.1916787994176279, "grad_norm": 1.232108235359192, "learning_rate": 9.966552631578948e-05, "loss": 0.3542, "step": 21281 }, { "epoch": 1.191734796729757, "grad_norm": 1.295562505722046, "learning_rate": 9.966526315789474e-05, "loss": 0.4108, "step": 21282 }, { "epoch": 1.191790794041886, "grad_norm": 1.457898736000061, "learning_rate": 9.9665e-05, "loss": 0.4532, "step": 21283 }, { "epoch": 1.191846791354015, "grad_norm": 1.6455161571502686, "learning_rate": 9.966473684210526e-05, "loss": 0.5247, "step": 21284 }, { "epoch": 1.191902788666144, "grad_norm": 1.4352648258209229, "learning_rate": 9.966447368421054e-05, "loss": 0.4688, "step": 21285 }, { "epoch": 1.191958785978273, "grad_norm": 42.5954475402832, "learning_rate": 9.96642105263158e-05, "loss": 0.5555, "step": 21286 }, { "epoch": 1.192014783290402, "grad_norm": 1.550161361694336, "learning_rate": 9.966394736842107e-05, "loss": 0.493, "step": 21287 }, { "epoch": 1.192070780602531, "grad_norm": 1.437830924987793, "learning_rate": 9.966368421052631e-05, "loss": 0.4395, "step": 21288 }, { "epoch": 1.19212677791466, "grad_norm": 1.3734545707702637, "learning_rate": 9.966342105263159e-05, "loss": 0.432, "step": 21289 }, { "epoch": 1.192182775226789, "grad_norm": 1.2367222309112549, "learning_rate": 9.966315789473685e-05, "loss": 0.3989, "step": 21290 }, { "epoch": 1.192238772538918, "grad_norm": 1.257411003112793, "learning_rate": 9.966289473684211e-05, "loss": 0.4927, "step": 21291 }, { "epoch": 1.192294769851047, "grad_norm": 1.535960078239441, "learning_rate": 9.966263157894738e-05, "loss": 0.3534, "step": 21292 }, { "epoch": 1.1923507671631761, "grad_norm": 1.664744257926941, "learning_rate": 9.966236842105263e-05, "loss": 0.4783, "step": 21293 }, { "epoch": 1.1924067644753051, "grad_norm": 1.3406223058700562, "learning_rate": 9.96621052631579e-05, "loss": 0.5224, "step": 21294 }, { "epoch": 1.1924627617874342, "grad_norm": 1.4745821952819824, "learning_rate": 9.966184210526316e-05, "loss": 0.3627, "step": 21295 }, { "epoch": 1.1925187590995632, "grad_norm": 1.4468576908111572, "learning_rate": 9.966157894736843e-05, "loss": 0.4381, "step": 21296 }, { "epoch": 1.1925747564116922, "grad_norm": 1.48208749294281, "learning_rate": 9.966131578947369e-05, "loss": 0.4918, "step": 21297 }, { "epoch": 1.1926307537238212, "grad_norm": 1.2047662734985352, "learning_rate": 9.966105263157895e-05, "loss": 0.5098, "step": 21298 }, { "epoch": 1.1926867510359502, "grad_norm": 1.2078361511230469, "learning_rate": 9.966078947368421e-05, "loss": 0.4489, "step": 21299 }, { "epoch": 1.1927427483480793, "grad_norm": 1.2705317735671997, "learning_rate": 9.966052631578949e-05, "loss": 0.3713, "step": 21300 }, { "epoch": 1.1927987456602083, "grad_norm": 1.3581575155258179, "learning_rate": 9.966026315789474e-05, "loss": 0.376, "step": 21301 }, { "epoch": 1.1928547429723373, "grad_norm": 1.405099630355835, "learning_rate": 9.966e-05, "loss": 0.4952, "step": 21302 }, { "epoch": 1.1929107402844663, "grad_norm": 1.1661505699157715, "learning_rate": 9.965973684210526e-05, "loss": 0.3296, "step": 21303 }, { "epoch": 1.1929667375965953, "grad_norm": 1.4038792848587036, "learning_rate": 9.965947368421054e-05, "loss": 0.3139, "step": 21304 }, { "epoch": 1.1930227349087243, "grad_norm": 1.3428869247436523, "learning_rate": 9.96592105263158e-05, "loss": 0.4924, "step": 21305 }, { "epoch": 1.1930787322208534, "grad_norm": 1.3574388027191162, "learning_rate": 9.965894736842106e-05, "loss": 0.5391, "step": 21306 }, { "epoch": 1.1931347295329824, "grad_norm": 1.272050142288208, "learning_rate": 9.965868421052632e-05, "loss": 0.401, "step": 21307 }, { "epoch": 1.1931907268451114, "grad_norm": 1.4629734754562378, "learning_rate": 9.965842105263158e-05, "loss": 0.4211, "step": 21308 }, { "epoch": 1.1932467241572404, "grad_norm": 1.3369213342666626, "learning_rate": 9.965815789473685e-05, "loss": 0.3077, "step": 21309 }, { "epoch": 1.1933027214693694, "grad_norm": 1.5254815816879272, "learning_rate": 9.965789473684211e-05, "loss": 0.6437, "step": 21310 }, { "epoch": 1.1933587187814985, "grad_norm": 1.391893744468689, "learning_rate": 9.965763157894737e-05, "loss": 0.4797, "step": 21311 }, { "epoch": 1.1934147160936275, "grad_norm": 1.3041632175445557, "learning_rate": 9.965736842105263e-05, "loss": 0.4406, "step": 21312 }, { "epoch": 1.1934707134057565, "grad_norm": 1.2722002267837524, "learning_rate": 9.96571052631579e-05, "loss": 0.3874, "step": 21313 }, { "epoch": 1.1935267107178855, "grad_norm": 1.2253620624542236, "learning_rate": 9.965684210526316e-05, "loss": 0.4311, "step": 21314 }, { "epoch": 1.1935827080300145, "grad_norm": 1.3320913314819336, "learning_rate": 9.965657894736844e-05, "loss": 0.3835, "step": 21315 }, { "epoch": 1.1936387053421436, "grad_norm": 1.3502252101898193, "learning_rate": 9.965631578947368e-05, "loss": 0.4198, "step": 21316 }, { "epoch": 1.1936947026542726, "grad_norm": 1.4054888486862183, "learning_rate": 9.965605263157895e-05, "loss": 0.4118, "step": 21317 }, { "epoch": 1.1937506999664016, "grad_norm": 1.4755635261535645, "learning_rate": 9.965578947368421e-05, "loss": 0.3879, "step": 21318 }, { "epoch": 1.1938066972785306, "grad_norm": 1.1939069032669067, "learning_rate": 9.965552631578949e-05, "loss": 0.4434, "step": 21319 }, { "epoch": 1.1938626945906596, "grad_norm": 1.3770779371261597, "learning_rate": 9.965526315789473e-05, "loss": 0.4295, "step": 21320 }, { "epoch": 1.1939186919027887, "grad_norm": 1.2577886581420898, "learning_rate": 9.965500000000001e-05, "loss": 0.3482, "step": 21321 }, { "epoch": 1.1939746892149177, "grad_norm": 1.3705942630767822, "learning_rate": 9.965473684210527e-05, "loss": 0.4651, "step": 21322 }, { "epoch": 1.1940306865270467, "grad_norm": 1.2908939123153687, "learning_rate": 9.965447368421054e-05, "loss": 0.3609, "step": 21323 }, { "epoch": 1.1940866838391757, "grad_norm": 1.2212295532226562, "learning_rate": 9.96542105263158e-05, "loss": 0.4819, "step": 21324 }, { "epoch": 1.1941426811513047, "grad_norm": 2.922239303588867, "learning_rate": 9.965394736842105e-05, "loss": 0.3876, "step": 21325 }, { "epoch": 1.1941986784634337, "grad_norm": 1.2339801788330078, "learning_rate": 9.965368421052632e-05, "loss": 0.3714, "step": 21326 }, { "epoch": 1.1942546757755628, "grad_norm": 1.2695056200027466, "learning_rate": 9.965342105263158e-05, "loss": 0.4169, "step": 21327 }, { "epoch": 1.1943106730876918, "grad_norm": 1.566595196723938, "learning_rate": 9.965315789473685e-05, "loss": 0.4286, "step": 21328 }, { "epoch": 1.1943666703998208, "grad_norm": 1.6339783668518066, "learning_rate": 9.965289473684211e-05, "loss": 0.5141, "step": 21329 }, { "epoch": 1.1944226677119498, "grad_norm": 1.6444056034088135, "learning_rate": 9.965263157894737e-05, "loss": 0.4442, "step": 21330 }, { "epoch": 1.1944786650240788, "grad_norm": 9.891744613647461, "learning_rate": 9.965236842105263e-05, "loss": 0.6255, "step": 21331 }, { "epoch": 1.1945346623362079, "grad_norm": 1.4509602785110474, "learning_rate": 9.96521052631579e-05, "loss": 0.5928, "step": 21332 }, { "epoch": 1.1945906596483369, "grad_norm": 1.338374376296997, "learning_rate": 9.965184210526316e-05, "loss": 0.4418, "step": 21333 }, { "epoch": 1.194646656960466, "grad_norm": 2.459693670272827, "learning_rate": 9.965157894736842e-05, "loss": 0.4032, "step": 21334 }, { "epoch": 1.194702654272595, "grad_norm": 1.3252931833267212, "learning_rate": 9.965131578947368e-05, "loss": 0.3934, "step": 21335 }, { "epoch": 1.194758651584724, "grad_norm": 1.5309665203094482, "learning_rate": 9.965105263157896e-05, "loss": 0.5197, "step": 21336 }, { "epoch": 1.194814648896853, "grad_norm": 1.3036658763885498, "learning_rate": 9.965078947368422e-05, "loss": 0.4037, "step": 21337 }, { "epoch": 1.194870646208982, "grad_norm": 1.296344518661499, "learning_rate": 9.965052631578948e-05, "loss": 0.5048, "step": 21338 }, { "epoch": 1.194926643521111, "grad_norm": 1.0616509914398193, "learning_rate": 9.965026315789474e-05, "loss": 0.339, "step": 21339 }, { "epoch": 1.19498264083324, "grad_norm": 1.7376258373260498, "learning_rate": 9.965000000000001e-05, "loss": 0.5438, "step": 21340 }, { "epoch": 1.195038638145369, "grad_norm": 1.2771480083465576, "learning_rate": 9.964973684210527e-05, "loss": 0.4788, "step": 21341 }, { "epoch": 1.195094635457498, "grad_norm": 1.6966956853866577, "learning_rate": 9.964947368421053e-05, "loss": 0.477, "step": 21342 }, { "epoch": 1.195150632769627, "grad_norm": 1.2666939496994019, "learning_rate": 9.964921052631579e-05, "loss": 0.6467, "step": 21343 }, { "epoch": 1.195206630081756, "grad_norm": 1.2245177030563354, "learning_rate": 9.964894736842105e-05, "loss": 0.4658, "step": 21344 }, { "epoch": 1.1952626273938851, "grad_norm": 1.3075501918792725, "learning_rate": 9.964868421052632e-05, "loss": 0.4255, "step": 21345 }, { "epoch": 1.1953186247060141, "grad_norm": 1.3158923387527466, "learning_rate": 9.964842105263158e-05, "loss": 0.4028, "step": 21346 }, { "epoch": 1.1953746220181432, "grad_norm": 1.310836911201477, "learning_rate": 9.964815789473686e-05, "loss": 0.4359, "step": 21347 }, { "epoch": 1.1954306193302722, "grad_norm": 1.4936611652374268, "learning_rate": 9.96478947368421e-05, "loss": 0.5623, "step": 21348 }, { "epoch": 1.1954866166424012, "grad_norm": 1.1484125852584839, "learning_rate": 9.964763157894737e-05, "loss": 0.3725, "step": 21349 }, { "epoch": 1.1955426139545302, "grad_norm": 1.3295791149139404, "learning_rate": 9.964736842105263e-05, "loss": 0.5219, "step": 21350 }, { "epoch": 1.1955986112666592, "grad_norm": 1.4058802127838135, "learning_rate": 9.964710526315791e-05, "loss": 0.4106, "step": 21351 }, { "epoch": 1.1956546085787882, "grad_norm": 1.1733869314193726, "learning_rate": 9.964684210526317e-05, "loss": 0.3727, "step": 21352 }, { "epoch": 1.1957106058909173, "grad_norm": 1.3899941444396973, "learning_rate": 9.964657894736843e-05, "loss": 0.4666, "step": 21353 }, { "epoch": 1.1957666032030463, "grad_norm": 1.5182164907455444, "learning_rate": 9.964631578947369e-05, "loss": 0.6035, "step": 21354 }, { "epoch": 1.1958226005151753, "grad_norm": 1.3368122577667236, "learning_rate": 9.964605263157896e-05, "loss": 0.4835, "step": 21355 }, { "epoch": 1.1958785978273043, "grad_norm": 1.2818577289581299, "learning_rate": 9.964578947368422e-05, "loss": 0.5112, "step": 21356 }, { "epoch": 1.1959345951394333, "grad_norm": 1.1480575799942017, "learning_rate": 9.964552631578948e-05, "loss": 0.4568, "step": 21357 }, { "epoch": 1.1959905924515624, "grad_norm": 1.4644219875335693, "learning_rate": 9.964526315789474e-05, "loss": 0.4418, "step": 21358 }, { "epoch": 1.1960465897636914, "grad_norm": 1.3618824481964111, "learning_rate": 9.9645e-05, "loss": 0.4163, "step": 21359 }, { "epoch": 1.1961025870758204, "grad_norm": 1.3760063648223877, "learning_rate": 9.964473684210527e-05, "loss": 0.4738, "step": 21360 }, { "epoch": 1.1961585843879494, "grad_norm": 1.3598028421401978, "learning_rate": 9.964447368421053e-05, "loss": 0.3665, "step": 21361 }, { "epoch": 1.1962145817000784, "grad_norm": 1.2918466329574585, "learning_rate": 9.964421052631579e-05, "loss": 0.4522, "step": 21362 }, { "epoch": 1.1962705790122075, "grad_norm": 1.4884247779846191, "learning_rate": 9.964394736842105e-05, "loss": 0.4283, "step": 21363 }, { "epoch": 1.1963265763243365, "grad_norm": 1.1714822053909302, "learning_rate": 9.964368421052632e-05, "loss": 0.4868, "step": 21364 }, { "epoch": 1.1963825736364655, "grad_norm": 1.141059160232544, "learning_rate": 9.964342105263158e-05, "loss": 0.3575, "step": 21365 }, { "epoch": 1.1964385709485945, "grad_norm": 1.2665141820907593, "learning_rate": 9.964315789473684e-05, "loss": 0.4476, "step": 21366 }, { "epoch": 1.1964945682607235, "grad_norm": 1.3462717533111572, "learning_rate": 9.96428947368421e-05, "loss": 0.3294, "step": 21367 }, { "epoch": 1.1965505655728526, "grad_norm": 1.5221977233886719, "learning_rate": 9.964263157894738e-05, "loss": 0.4968, "step": 21368 }, { "epoch": 1.1966065628849816, "grad_norm": 2.3991973400115967, "learning_rate": 9.964236842105264e-05, "loss": 0.555, "step": 21369 }, { "epoch": 1.1966625601971106, "grad_norm": 1.454770565032959, "learning_rate": 9.96421052631579e-05, "loss": 0.4786, "step": 21370 }, { "epoch": 1.1967185575092396, "grad_norm": 1.3468774557113647, "learning_rate": 9.964184210526316e-05, "loss": 0.4394, "step": 21371 }, { "epoch": 1.1967745548213686, "grad_norm": 1.3988748788833618, "learning_rate": 9.964157894736843e-05, "loss": 0.4111, "step": 21372 }, { "epoch": 1.1968305521334976, "grad_norm": 1.39087975025177, "learning_rate": 9.964131578947369e-05, "loss": 0.4374, "step": 21373 }, { "epoch": 1.1968865494456267, "grad_norm": 1.1370114088058472, "learning_rate": 9.964105263157896e-05, "loss": 0.4175, "step": 21374 }, { "epoch": 1.1969425467577557, "grad_norm": 1.2697925567626953, "learning_rate": 9.964078947368421e-05, "loss": 0.3974, "step": 21375 }, { "epoch": 1.1969985440698847, "grad_norm": 1.4969922304153442, "learning_rate": 9.964052631578947e-05, "loss": 0.6954, "step": 21376 }, { "epoch": 1.1970545413820137, "grad_norm": 1.474172592163086, "learning_rate": 9.964026315789474e-05, "loss": 0.4591, "step": 21377 }, { "epoch": 1.1971105386941427, "grad_norm": 1.2626137733459473, "learning_rate": 9.964e-05, "loss": 0.4082, "step": 21378 }, { "epoch": 1.1971665360062718, "grad_norm": 1.0900942087173462, "learning_rate": 9.963973684210527e-05, "loss": 0.3125, "step": 21379 }, { "epoch": 1.1972225333184008, "grad_norm": 1.2060264348983765, "learning_rate": 9.963947368421052e-05, "loss": 0.561, "step": 21380 }, { "epoch": 1.1972785306305298, "grad_norm": 1.5008060932159424, "learning_rate": 9.96392105263158e-05, "loss": 0.4794, "step": 21381 }, { "epoch": 1.1973345279426588, "grad_norm": 1.6069817543029785, "learning_rate": 9.963894736842105e-05, "loss": 0.7367, "step": 21382 }, { "epoch": 1.1973905252547878, "grad_norm": 1.6059280633926392, "learning_rate": 9.963868421052633e-05, "loss": 0.4607, "step": 21383 }, { "epoch": 1.1974465225669169, "grad_norm": 1.3171371221542358, "learning_rate": 9.963842105263159e-05, "loss": 0.4136, "step": 21384 }, { "epoch": 1.1975025198790459, "grad_norm": 1.3216816186904907, "learning_rate": 9.963815789473685e-05, "loss": 0.3876, "step": 21385 }, { "epoch": 1.197558517191175, "grad_norm": 1.4280669689178467, "learning_rate": 9.96378947368421e-05, "loss": 0.3501, "step": 21386 }, { "epoch": 1.197614514503304, "grad_norm": 1.2969985008239746, "learning_rate": 9.963763157894738e-05, "loss": 0.4646, "step": 21387 }, { "epoch": 1.197670511815433, "grad_norm": 1.5294933319091797, "learning_rate": 9.963736842105264e-05, "loss": 0.4396, "step": 21388 }, { "epoch": 1.197726509127562, "grad_norm": 1.4797515869140625, "learning_rate": 9.96371052631579e-05, "loss": 0.5789, "step": 21389 }, { "epoch": 1.197782506439691, "grad_norm": 1.872753381729126, "learning_rate": 9.963684210526316e-05, "loss": 0.4848, "step": 21390 }, { "epoch": 1.19783850375182, "grad_norm": 1.5574126243591309, "learning_rate": 9.963657894736843e-05, "loss": 0.4941, "step": 21391 }, { "epoch": 1.197894501063949, "grad_norm": 12.988001823425293, "learning_rate": 9.963631578947369e-05, "loss": 0.5403, "step": 21392 }, { "epoch": 1.197950498376078, "grad_norm": 1.631577730178833, "learning_rate": 9.963605263157895e-05, "loss": 0.4207, "step": 21393 }, { "epoch": 1.198006495688207, "grad_norm": 1.3914836645126343, "learning_rate": 9.963578947368421e-05, "loss": 0.4603, "step": 21394 }, { "epoch": 1.198062493000336, "grad_norm": 1.272395133972168, "learning_rate": 9.963552631578947e-05, "loss": 0.3924, "step": 21395 }, { "epoch": 1.198118490312465, "grad_norm": 1.0716838836669922, "learning_rate": 9.963526315789474e-05, "loss": 0.3835, "step": 21396 }, { "epoch": 1.198174487624594, "grad_norm": 1.6219170093536377, "learning_rate": 9.9635e-05, "loss": 0.6136, "step": 21397 }, { "epoch": 1.1982304849367231, "grad_norm": 1.3890399932861328, "learning_rate": 9.963473684210526e-05, "loss": 0.4818, "step": 21398 }, { "epoch": 1.1982864822488521, "grad_norm": 1.3197038173675537, "learning_rate": 9.963447368421052e-05, "loss": 0.3899, "step": 21399 }, { "epoch": 1.1983424795609812, "grad_norm": 1.3574610948562622, "learning_rate": 9.96342105263158e-05, "loss": 0.4244, "step": 21400 }, { "epoch": 1.1983984768731102, "grad_norm": 1.9468427896499634, "learning_rate": 9.963394736842106e-05, "loss": 0.4961, "step": 21401 }, { "epoch": 1.1984544741852392, "grad_norm": 1.6920266151428223, "learning_rate": 9.963368421052633e-05, "loss": 0.446, "step": 21402 }, { "epoch": 1.198510471497368, "grad_norm": 1.560967206954956, "learning_rate": 9.963342105263158e-05, "loss": 0.4072, "step": 21403 }, { "epoch": 1.198566468809497, "grad_norm": 1.432548999786377, "learning_rate": 9.963315789473685e-05, "loss": 0.4393, "step": 21404 }, { "epoch": 1.198622466121626, "grad_norm": 1.4270511865615845, "learning_rate": 9.963289473684211e-05, "loss": 0.3981, "step": 21405 }, { "epoch": 1.198678463433755, "grad_norm": 1.488874912261963, "learning_rate": 9.963263157894738e-05, "loss": 0.4374, "step": 21406 }, { "epoch": 1.198734460745884, "grad_norm": 1.3100860118865967, "learning_rate": 9.963236842105264e-05, "loss": 0.5362, "step": 21407 }, { "epoch": 1.198790458058013, "grad_norm": 1.2270326614379883, "learning_rate": 9.96321052631579e-05, "loss": 0.3289, "step": 21408 }, { "epoch": 1.1988464553701421, "grad_norm": 1.1580663919448853, "learning_rate": 9.963184210526316e-05, "loss": 0.4817, "step": 21409 }, { "epoch": 1.1989024526822711, "grad_norm": 1.5381953716278076, "learning_rate": 9.963157894736843e-05, "loss": 0.4355, "step": 21410 }, { "epoch": 1.1989584499944002, "grad_norm": 1.052895188331604, "learning_rate": 9.96313157894737e-05, "loss": 0.3743, "step": 21411 }, { "epoch": 1.1990144473065292, "grad_norm": 1.3846203088760376, "learning_rate": 9.963105263157894e-05, "loss": 0.4153, "step": 21412 }, { "epoch": 1.1990704446186582, "grad_norm": 1.3448079824447632, "learning_rate": 9.963078947368421e-05, "loss": 0.3799, "step": 21413 }, { "epoch": 1.1991264419307872, "grad_norm": 1.3910526037216187, "learning_rate": 9.963052631578947e-05, "loss": 0.3322, "step": 21414 }, { "epoch": 1.1991824392429162, "grad_norm": 1.280060052871704, "learning_rate": 9.963026315789475e-05, "loss": 0.4396, "step": 21415 }, { "epoch": 1.1992384365550453, "grad_norm": 1.3302288055419922, "learning_rate": 9.963e-05, "loss": 0.3988, "step": 21416 }, { "epoch": 1.1992944338671743, "grad_norm": 1.322451114654541, "learning_rate": 9.962973684210527e-05, "loss": 0.3296, "step": 21417 }, { "epoch": 1.1993504311793033, "grad_norm": 1.6919171810150146, "learning_rate": 9.962947368421053e-05, "loss": 0.4415, "step": 21418 }, { "epoch": 1.1994064284914323, "grad_norm": 1.2231345176696777, "learning_rate": 9.96292105263158e-05, "loss": 0.3932, "step": 21419 }, { "epoch": 1.1994624258035613, "grad_norm": 1.4488109350204468, "learning_rate": 9.962894736842106e-05, "loss": 0.4924, "step": 21420 }, { "epoch": 1.1995184231156903, "grad_norm": 1.2579987049102783, "learning_rate": 9.962868421052632e-05, "loss": 0.4286, "step": 21421 }, { "epoch": 1.1995744204278194, "grad_norm": 1.4669923782348633, "learning_rate": 9.962842105263158e-05, "loss": 0.4557, "step": 21422 }, { "epoch": 1.1996304177399484, "grad_norm": 1.3621535301208496, "learning_rate": 9.962815789473685e-05, "loss": 0.454, "step": 21423 }, { "epoch": 1.1996864150520774, "grad_norm": 1.6935299634933472, "learning_rate": 9.962789473684211e-05, "loss": 0.4028, "step": 21424 }, { "epoch": 1.1997424123642064, "grad_norm": 1.788367509841919, "learning_rate": 9.962763157894737e-05, "loss": 0.4214, "step": 21425 }, { "epoch": 1.1997984096763354, "grad_norm": 1.615911841392517, "learning_rate": 9.962736842105263e-05, "loss": 0.4857, "step": 21426 }, { "epoch": 1.1998544069884645, "grad_norm": 1.5796549320220947, "learning_rate": 9.96271052631579e-05, "loss": 0.4454, "step": 21427 }, { "epoch": 1.1999104043005935, "grad_norm": 1.3333300352096558, "learning_rate": 9.962684210526316e-05, "loss": 0.4156, "step": 21428 }, { "epoch": 1.1999664016127225, "grad_norm": 1.4734907150268555, "learning_rate": 9.962657894736842e-05, "loss": 0.5741, "step": 21429 }, { "epoch": 1.2000223989248515, "grad_norm": 1.3020298480987549, "learning_rate": 9.962631578947368e-05, "loss": 0.4521, "step": 21430 }, { "epoch": 1.2000783962369805, "grad_norm": 1.2103228569030762, "learning_rate": 9.962605263157894e-05, "loss": 0.3992, "step": 21431 }, { "epoch": 1.2001343935491096, "grad_norm": 1.1992607116699219, "learning_rate": 9.962578947368422e-05, "loss": 0.3216, "step": 21432 }, { "epoch": 1.2001903908612386, "grad_norm": 1.7995073795318604, "learning_rate": 9.962552631578948e-05, "loss": 0.5035, "step": 21433 }, { "epoch": 1.2002463881733676, "grad_norm": 1.3206636905670166, "learning_rate": 9.962526315789475e-05, "loss": 0.3423, "step": 21434 }, { "epoch": 1.2003023854854966, "grad_norm": 1.1745672225952148, "learning_rate": 9.9625e-05, "loss": 0.4342, "step": 21435 }, { "epoch": 1.2003583827976256, "grad_norm": 1.5004053115844727, "learning_rate": 9.962473684210527e-05, "loss": 0.5099, "step": 21436 }, { "epoch": 1.2004143801097547, "grad_norm": 1.3555021286010742, "learning_rate": 9.962447368421053e-05, "loss": 0.4008, "step": 21437 }, { "epoch": 1.2004703774218837, "grad_norm": 1.3807787895202637, "learning_rate": 9.96242105263158e-05, "loss": 0.4379, "step": 21438 }, { "epoch": 1.2005263747340127, "grad_norm": 1.484658122062683, "learning_rate": 9.962394736842106e-05, "loss": 0.5916, "step": 21439 }, { "epoch": 1.2005823720461417, "grad_norm": 1.4990845918655396, "learning_rate": 9.962368421052632e-05, "loss": 0.3455, "step": 21440 }, { "epoch": 1.2006383693582707, "grad_norm": 1.7396429777145386, "learning_rate": 9.962342105263158e-05, "loss": 0.4284, "step": 21441 }, { "epoch": 1.2006943666703997, "grad_norm": 1.1902375221252441, "learning_rate": 9.962315789473685e-05, "loss": 0.3986, "step": 21442 }, { "epoch": 1.2007503639825288, "grad_norm": 1.521207571029663, "learning_rate": 9.962289473684211e-05, "loss": 0.4777, "step": 21443 }, { "epoch": 1.2008063612946578, "grad_norm": 1.5875146389007568, "learning_rate": 9.962263157894737e-05, "loss": 0.6852, "step": 21444 }, { "epoch": 1.2008623586067868, "grad_norm": 1.0622169971466064, "learning_rate": 9.962236842105263e-05, "loss": 0.3953, "step": 21445 }, { "epoch": 1.2009183559189158, "grad_norm": 1.5218700170516968, "learning_rate": 9.96221052631579e-05, "loss": 0.5035, "step": 21446 }, { "epoch": 1.2009743532310448, "grad_norm": 1.2983635663986206, "learning_rate": 9.962184210526317e-05, "loss": 0.3688, "step": 21447 }, { "epoch": 1.2010303505431739, "grad_norm": 1.4676125049591064, "learning_rate": 9.962157894736843e-05, "loss": 0.4088, "step": 21448 }, { "epoch": 1.2010863478553029, "grad_norm": 1.672722578048706, "learning_rate": 9.962131578947369e-05, "loss": 0.5554, "step": 21449 }, { "epoch": 1.201142345167432, "grad_norm": 1.4020755290985107, "learning_rate": 9.962105263157895e-05, "loss": 0.3693, "step": 21450 }, { "epoch": 1.201198342479561, "grad_norm": 2.088850498199463, "learning_rate": 9.962078947368422e-05, "loss": 0.3458, "step": 21451 }, { "epoch": 1.20125433979169, "grad_norm": 1.5886541604995728, "learning_rate": 9.962052631578948e-05, "loss": 0.5027, "step": 21452 }, { "epoch": 1.201310337103819, "grad_norm": 1.2782281637191772, "learning_rate": 9.962026315789474e-05, "loss": 0.4541, "step": 21453 }, { "epoch": 1.201366334415948, "grad_norm": 1.2737568616867065, "learning_rate": 9.962e-05, "loss": 0.4547, "step": 21454 }, { "epoch": 1.201422331728077, "grad_norm": 1.5377733707427979, "learning_rate": 9.961973684210527e-05, "loss": 0.5331, "step": 21455 }, { "epoch": 1.201478329040206, "grad_norm": 1.5774884223937988, "learning_rate": 9.961947368421053e-05, "loss": 0.4701, "step": 21456 }, { "epoch": 1.201534326352335, "grad_norm": 1.1816591024398804, "learning_rate": 9.96192105263158e-05, "loss": 0.3861, "step": 21457 }, { "epoch": 1.201590323664464, "grad_norm": 1.193567156791687, "learning_rate": 9.961894736842105e-05, "loss": 0.408, "step": 21458 }, { "epoch": 1.201646320976593, "grad_norm": 1.3032221794128418, "learning_rate": 9.961868421052632e-05, "loss": 0.4111, "step": 21459 }, { "epoch": 1.201702318288722, "grad_norm": 1.4719740152359009, "learning_rate": 9.961842105263158e-05, "loss": 0.5088, "step": 21460 }, { "epoch": 1.2017583156008511, "grad_norm": 1.2805112600326538, "learning_rate": 9.961815789473686e-05, "loss": 0.381, "step": 21461 }, { "epoch": 1.2018143129129801, "grad_norm": 1.557629108428955, "learning_rate": 9.961789473684212e-05, "loss": 0.4849, "step": 21462 }, { "epoch": 1.2018703102251092, "grad_norm": 1.222686767578125, "learning_rate": 9.961763157894736e-05, "loss": 0.4187, "step": 21463 }, { "epoch": 1.2019263075372382, "grad_norm": 1.6230337619781494, "learning_rate": 9.961736842105264e-05, "loss": 0.5211, "step": 21464 }, { "epoch": 1.2019823048493672, "grad_norm": 1.3267515897750854, "learning_rate": 9.96171052631579e-05, "loss": 0.4405, "step": 21465 }, { "epoch": 1.2020383021614962, "grad_norm": 1.1997919082641602, "learning_rate": 9.961684210526317e-05, "loss": 0.307, "step": 21466 }, { "epoch": 1.2020942994736252, "grad_norm": 1.6164220571517944, "learning_rate": 9.961657894736842e-05, "loss": 0.4621, "step": 21467 }, { "epoch": 1.2021502967857542, "grad_norm": 1.169376254081726, "learning_rate": 9.961631578947369e-05, "loss": 0.366, "step": 21468 }, { "epoch": 1.2022062940978833, "grad_norm": 1.418872594833374, "learning_rate": 9.961605263157895e-05, "loss": 0.3817, "step": 21469 }, { "epoch": 1.2022622914100123, "grad_norm": 1.3538204431533813, "learning_rate": 9.961578947368422e-05, "loss": 0.4116, "step": 21470 }, { "epoch": 1.2023182887221413, "grad_norm": 1.2791597843170166, "learning_rate": 9.961552631578948e-05, "loss": 0.4707, "step": 21471 }, { "epoch": 1.2023742860342703, "grad_norm": 1.051046371459961, "learning_rate": 9.961526315789474e-05, "loss": 0.3611, "step": 21472 }, { "epoch": 1.2024302833463993, "grad_norm": 1.4842638969421387, "learning_rate": 9.9615e-05, "loss": 0.4924, "step": 21473 }, { "epoch": 1.2024862806585284, "grad_norm": 1.563080072402954, "learning_rate": 9.961473684210527e-05, "loss": 0.5495, "step": 21474 }, { "epoch": 1.2025422779706574, "grad_norm": 1.3472084999084473, "learning_rate": 9.961447368421053e-05, "loss": 0.3709, "step": 21475 }, { "epoch": 1.2025982752827864, "grad_norm": 1.3370013236999512, "learning_rate": 9.96142105263158e-05, "loss": 0.3595, "step": 21476 }, { "epoch": 1.2026542725949154, "grad_norm": 1.3887287378311157, "learning_rate": 9.961394736842105e-05, "loss": 0.4461, "step": 21477 }, { "epoch": 1.2027102699070444, "grad_norm": 1.4351294040679932, "learning_rate": 9.961368421052633e-05, "loss": 0.4641, "step": 21478 }, { "epoch": 1.2027662672191735, "grad_norm": 1.1647791862487793, "learning_rate": 9.961342105263159e-05, "loss": 0.3386, "step": 21479 }, { "epoch": 1.2028222645313025, "grad_norm": 1.3226819038391113, "learning_rate": 9.961315789473685e-05, "loss": 0.4429, "step": 21480 }, { "epoch": 1.2028782618434315, "grad_norm": 2.2665774822235107, "learning_rate": 9.96128947368421e-05, "loss": 0.441, "step": 21481 }, { "epoch": 1.2029342591555605, "grad_norm": 1.6629362106323242, "learning_rate": 9.961263157894737e-05, "loss": 0.3264, "step": 21482 }, { "epoch": 1.2029902564676895, "grad_norm": 1.1915266513824463, "learning_rate": 9.961236842105264e-05, "loss": 0.323, "step": 21483 }, { "epoch": 1.2030462537798186, "grad_norm": 1.7877452373504639, "learning_rate": 9.96121052631579e-05, "loss": 0.4892, "step": 21484 }, { "epoch": 1.2031022510919476, "grad_norm": 1.3100731372833252, "learning_rate": 9.961184210526316e-05, "loss": 0.4047, "step": 21485 }, { "epoch": 1.2031582484040766, "grad_norm": 1.5466324090957642, "learning_rate": 9.961157894736842e-05, "loss": 0.4025, "step": 21486 }, { "epoch": 1.2032142457162056, "grad_norm": 1.2910239696502686, "learning_rate": 9.961131578947369e-05, "loss": 0.4358, "step": 21487 }, { "epoch": 1.2032702430283346, "grad_norm": 1.1227089166641235, "learning_rate": 9.961105263157895e-05, "loss": 0.3257, "step": 21488 }, { "epoch": 1.2033262403404636, "grad_norm": 3.1409835815429688, "learning_rate": 9.961078947368422e-05, "loss": 0.4212, "step": 21489 }, { "epoch": 1.2033822376525927, "grad_norm": 1.5216270685195923, "learning_rate": 9.961052631578947e-05, "loss": 0.6025, "step": 21490 }, { "epoch": 1.2034382349647217, "grad_norm": 1.5169122219085693, "learning_rate": 9.961026315789474e-05, "loss": 0.6018, "step": 21491 }, { "epoch": 1.2034942322768507, "grad_norm": 1.2840487957000732, "learning_rate": 9.961e-05, "loss": 0.3487, "step": 21492 }, { "epoch": 1.2035502295889797, "grad_norm": 1.481377363204956, "learning_rate": 9.960973684210528e-05, "loss": 0.5206, "step": 21493 }, { "epoch": 1.2036062269011087, "grad_norm": 1.334641456604004, "learning_rate": 9.960947368421054e-05, "loss": 0.3421, "step": 21494 }, { "epoch": 1.2036622242132378, "grad_norm": 1.4304063320159912, "learning_rate": 9.96092105263158e-05, "loss": 0.5398, "step": 21495 }, { "epoch": 1.2037182215253668, "grad_norm": 1.308811902999878, "learning_rate": 9.960894736842106e-05, "loss": 0.4299, "step": 21496 }, { "epoch": 1.2037742188374958, "grad_norm": 1.3681529760360718, "learning_rate": 9.960868421052632e-05, "loss": 0.5409, "step": 21497 }, { "epoch": 1.2038302161496248, "grad_norm": 1.987178921699524, "learning_rate": 9.960842105263159e-05, "loss": 0.5736, "step": 21498 }, { "epoch": 1.2038862134617538, "grad_norm": 1.3422547578811646, "learning_rate": 9.960815789473685e-05, "loss": 0.4158, "step": 21499 }, { "epoch": 1.2039422107738829, "grad_norm": 1.2379730939865112, "learning_rate": 9.960789473684211e-05, "loss": 0.3255, "step": 21500 }, { "epoch": 1.2039982080860119, "grad_norm": 1.7198050022125244, "learning_rate": 9.960763157894737e-05, "loss": 0.561, "step": 21501 }, { "epoch": 1.204054205398141, "grad_norm": 1.3510574102401733, "learning_rate": 9.960736842105264e-05, "loss": 0.4318, "step": 21502 }, { "epoch": 1.20411020271027, "grad_norm": 1.4553757905960083, "learning_rate": 9.96071052631579e-05, "loss": 0.5253, "step": 21503 }, { "epoch": 1.204166200022399, "grad_norm": 1.3594117164611816, "learning_rate": 9.960684210526316e-05, "loss": 0.3898, "step": 21504 }, { "epoch": 1.204222197334528, "grad_norm": 1.6740353107452393, "learning_rate": 9.960657894736842e-05, "loss": 0.4418, "step": 21505 }, { "epoch": 1.204278194646657, "grad_norm": 1.3242052793502808, "learning_rate": 9.96063157894737e-05, "loss": 0.4278, "step": 21506 }, { "epoch": 1.204334191958786, "grad_norm": 1.1198610067367554, "learning_rate": 9.960605263157895e-05, "loss": 0.3511, "step": 21507 }, { "epoch": 1.204390189270915, "grad_norm": 1.5841436386108398, "learning_rate": 9.960578947368421e-05, "loss": 0.4644, "step": 21508 }, { "epoch": 1.204446186583044, "grad_norm": 1.480082392692566, "learning_rate": 9.960552631578947e-05, "loss": 0.4577, "step": 21509 }, { "epoch": 1.204502183895173, "grad_norm": 1.6126313209533691, "learning_rate": 9.960526315789475e-05, "loss": 0.5515, "step": 21510 }, { "epoch": 1.204558181207302, "grad_norm": 1.250475287437439, "learning_rate": 9.9605e-05, "loss": 0.4454, "step": 21511 }, { "epoch": 1.204614178519431, "grad_norm": 1.662462830543518, "learning_rate": 9.960473684210528e-05, "loss": 0.5816, "step": 21512 }, { "epoch": 1.20467017583156, "grad_norm": 1.361572504043579, "learning_rate": 9.960447368421053e-05, "loss": 0.4464, "step": 21513 }, { "epoch": 1.2047261731436891, "grad_norm": 1.2813810110092163, "learning_rate": 9.960421052631579e-05, "loss": 0.4901, "step": 21514 }, { "epoch": 1.2047821704558181, "grad_norm": 1.5056607723236084, "learning_rate": 9.960394736842106e-05, "loss": 0.5114, "step": 21515 }, { "epoch": 1.2048381677679472, "grad_norm": 1.4565441608428955, "learning_rate": 9.960368421052632e-05, "loss": 0.5469, "step": 21516 }, { "epoch": 1.2048941650800762, "grad_norm": 1.654646635055542, "learning_rate": 9.960342105263159e-05, "loss": 0.4382, "step": 21517 }, { "epoch": 1.2049501623922052, "grad_norm": 1.1464526653289795, "learning_rate": 9.960315789473684e-05, "loss": 0.381, "step": 21518 }, { "epoch": 1.2050061597043342, "grad_norm": 2.7664570808410645, "learning_rate": 9.960289473684211e-05, "loss": 0.6341, "step": 21519 }, { "epoch": 1.2050621570164632, "grad_norm": 1.6704425811767578, "learning_rate": 9.960263157894737e-05, "loss": 0.4385, "step": 21520 }, { "epoch": 1.2051181543285923, "grad_norm": 1.5679821968078613, "learning_rate": 9.960236842105264e-05, "loss": 0.4312, "step": 21521 }, { "epoch": 1.2051741516407213, "grad_norm": 1.2296874523162842, "learning_rate": 9.960210526315789e-05, "loss": 0.4045, "step": 21522 }, { "epoch": 1.2052301489528503, "grad_norm": 1.1888378858566284, "learning_rate": 9.960184210526316e-05, "loss": 0.4166, "step": 21523 }, { "epoch": 1.2052861462649793, "grad_norm": 1.4093694686889648, "learning_rate": 9.960157894736842e-05, "loss": 0.4597, "step": 21524 }, { "epoch": 1.2053421435771083, "grad_norm": 1.5005062818527222, "learning_rate": 9.96013157894737e-05, "loss": 0.4724, "step": 21525 }, { "epoch": 1.2053981408892374, "grad_norm": 1.2124828100204468, "learning_rate": 9.960105263157896e-05, "loss": 0.3277, "step": 21526 }, { "epoch": 1.2054541382013664, "grad_norm": 1.685762882232666, "learning_rate": 9.960078947368422e-05, "loss": 0.3987, "step": 21527 }, { "epoch": 1.2055101355134954, "grad_norm": 1.3396176099777222, "learning_rate": 9.960052631578948e-05, "loss": 0.511, "step": 21528 }, { "epoch": 1.2055661328256244, "grad_norm": 1.2800570726394653, "learning_rate": 9.960026315789475e-05, "loss": 0.3879, "step": 21529 }, { "epoch": 1.2056221301377534, "grad_norm": 1.1974067687988281, "learning_rate": 9.960000000000001e-05, "loss": 0.3899, "step": 21530 }, { "epoch": 1.2056781274498825, "grad_norm": 1.2989071607589722, "learning_rate": 9.959973684210527e-05, "loss": 0.4172, "step": 21531 }, { "epoch": 1.2057341247620115, "grad_norm": 1.431694507598877, "learning_rate": 9.959947368421053e-05, "loss": 0.4452, "step": 21532 }, { "epoch": 1.2057901220741405, "grad_norm": 1.1873540878295898, "learning_rate": 9.959921052631579e-05, "loss": 0.4166, "step": 21533 }, { "epoch": 1.2058461193862695, "grad_norm": 1.2419899702072144, "learning_rate": 9.959894736842106e-05, "loss": 0.4983, "step": 21534 }, { "epoch": 1.2059021166983985, "grad_norm": 1.3938467502593994, "learning_rate": 9.959868421052632e-05, "loss": 0.5308, "step": 21535 }, { "epoch": 1.2059581140105275, "grad_norm": 1.1994540691375732, "learning_rate": 9.959842105263158e-05, "loss": 0.4284, "step": 21536 }, { "epoch": 1.2060141113226566, "grad_norm": 1.2388249635696411, "learning_rate": 9.959815789473684e-05, "loss": 0.3959, "step": 21537 }, { "epoch": 1.2060701086347856, "grad_norm": 1.3360904455184937, "learning_rate": 9.959789473684211e-05, "loss": 0.6121, "step": 21538 }, { "epoch": 1.2061261059469146, "grad_norm": 1.4434248208999634, "learning_rate": 9.959763157894737e-05, "loss": 0.4103, "step": 21539 }, { "epoch": 1.2061821032590436, "grad_norm": 1.458721399307251, "learning_rate": 9.959736842105263e-05, "loss": 0.444, "step": 21540 }, { "epoch": 1.2062381005711726, "grad_norm": 1.3926796913146973, "learning_rate": 9.959710526315789e-05, "loss": 0.5739, "step": 21541 }, { "epoch": 1.2062940978833017, "grad_norm": 1.5073621273040771, "learning_rate": 9.959684210526317e-05, "loss": 0.4406, "step": 21542 }, { "epoch": 1.2063500951954307, "grad_norm": 1.4399853944778442, "learning_rate": 9.959657894736843e-05, "loss": 0.3999, "step": 21543 }, { "epoch": 1.2064060925075597, "grad_norm": 1.4274659156799316, "learning_rate": 9.95963157894737e-05, "loss": 0.4341, "step": 21544 }, { "epoch": 1.2064620898196887, "grad_norm": 1.4583518505096436, "learning_rate": 9.959605263157895e-05, "loss": 0.4265, "step": 21545 }, { "epoch": 1.2065180871318177, "grad_norm": 1.3247216939926147, "learning_rate": 9.959578947368422e-05, "loss": 0.4782, "step": 21546 }, { "epoch": 1.2065740844439468, "grad_norm": 1.3564420938491821, "learning_rate": 9.959552631578948e-05, "loss": 0.5076, "step": 21547 }, { "epoch": 1.2066300817560758, "grad_norm": 1.6099863052368164, "learning_rate": 9.959526315789475e-05, "loss": 0.5902, "step": 21548 }, { "epoch": 1.2066860790682048, "grad_norm": 1.083328127861023, "learning_rate": 9.959500000000001e-05, "loss": 0.3665, "step": 21549 }, { "epoch": 1.2067420763803338, "grad_norm": 1.2699973583221436, "learning_rate": 9.959473684210526e-05, "loss": 0.4768, "step": 21550 }, { "epoch": 1.2067980736924628, "grad_norm": 1.5290131568908691, "learning_rate": 9.959447368421053e-05, "loss": 0.5987, "step": 21551 }, { "epoch": 1.2068540710045919, "grad_norm": 1.3370295763015747, "learning_rate": 9.959421052631579e-05, "loss": 0.3452, "step": 21552 }, { "epoch": 1.2069100683167209, "grad_norm": 1.2521337270736694, "learning_rate": 9.959394736842106e-05, "loss": 0.3931, "step": 21553 }, { "epoch": 1.20696606562885, "grad_norm": 1.3776090145111084, "learning_rate": 9.959368421052632e-05, "loss": 0.4932, "step": 21554 }, { "epoch": 1.207022062940979, "grad_norm": 1.7571406364440918, "learning_rate": 9.959342105263158e-05, "loss": 0.3998, "step": 21555 }, { "epoch": 1.207078060253108, "grad_norm": 1.3792510032653809, "learning_rate": 9.959315789473684e-05, "loss": 0.4431, "step": 21556 }, { "epoch": 1.207134057565237, "grad_norm": 1.34672212600708, "learning_rate": 9.959289473684212e-05, "loss": 0.3362, "step": 21557 }, { "epoch": 1.207190054877366, "grad_norm": 1.3030239343643188, "learning_rate": 9.959263157894738e-05, "loss": 0.4635, "step": 21558 }, { "epoch": 1.207246052189495, "grad_norm": 1.6461795568466187, "learning_rate": 9.959236842105264e-05, "loss": 0.4938, "step": 21559 }, { "epoch": 1.207302049501624, "grad_norm": 1.3412219285964966, "learning_rate": 9.95921052631579e-05, "loss": 0.4992, "step": 21560 }, { "epoch": 1.207358046813753, "grad_norm": 1.4476515054702759, "learning_rate": 9.959184210526317e-05, "loss": 0.4607, "step": 21561 }, { "epoch": 1.207414044125882, "grad_norm": 1.3024920225143433, "learning_rate": 9.959157894736843e-05, "loss": 0.447, "step": 21562 }, { "epoch": 1.207470041438011, "grad_norm": 1.2919872999191284, "learning_rate": 9.959131578947369e-05, "loss": 0.5535, "step": 21563 }, { "epoch": 1.20752603875014, "grad_norm": 1.3789916038513184, "learning_rate": 9.959105263157895e-05, "loss": 0.4159, "step": 21564 }, { "epoch": 1.207582036062269, "grad_norm": 1.3930158615112305, "learning_rate": 9.959078947368422e-05, "loss": 0.3977, "step": 21565 }, { "epoch": 1.2076380333743981, "grad_norm": 1.4170221090316772, "learning_rate": 9.959052631578948e-05, "loss": 0.5159, "step": 21566 }, { "epoch": 1.2076940306865271, "grad_norm": 1.5150773525238037, "learning_rate": 9.959026315789474e-05, "loss": 0.429, "step": 21567 }, { "epoch": 1.2077500279986562, "grad_norm": 6.028048992156982, "learning_rate": 9.959e-05, "loss": 0.5761, "step": 21568 }, { "epoch": 1.2078060253107852, "grad_norm": 1.591076135635376, "learning_rate": 9.958973684210526e-05, "loss": 0.4305, "step": 21569 }, { "epoch": 1.2078620226229142, "grad_norm": 1.4415366649627686, "learning_rate": 9.958947368421053e-05, "loss": 0.3811, "step": 21570 }, { "epoch": 1.2079180199350432, "grad_norm": 1.2981065511703491, "learning_rate": 9.958921052631579e-05, "loss": 0.4176, "step": 21571 }, { "epoch": 1.2079740172471722, "grad_norm": 1.2798056602478027, "learning_rate": 9.958894736842105e-05, "loss": 0.4618, "step": 21572 }, { "epoch": 1.2080300145593013, "grad_norm": 1.2739033699035645, "learning_rate": 9.958868421052631e-05, "loss": 0.4072, "step": 21573 }, { "epoch": 1.2080860118714303, "grad_norm": 1.510717749595642, "learning_rate": 9.958842105263159e-05, "loss": 0.6019, "step": 21574 }, { "epoch": 1.2081420091835593, "grad_norm": 1.3855395317077637, "learning_rate": 9.958815789473685e-05, "loss": 0.5853, "step": 21575 }, { "epoch": 1.2081980064956883, "grad_norm": 1.42533540725708, "learning_rate": 9.958789473684212e-05, "loss": 0.4465, "step": 21576 }, { "epoch": 1.2082540038078173, "grad_norm": 1.3876395225524902, "learning_rate": 9.958763157894736e-05, "loss": 0.4108, "step": 21577 }, { "epoch": 1.2083100011199464, "grad_norm": 1.4974334239959717, "learning_rate": 9.958736842105264e-05, "loss": 0.6935, "step": 21578 }, { "epoch": 1.2083659984320754, "grad_norm": 1.5912235975265503, "learning_rate": 9.95871052631579e-05, "loss": 0.5145, "step": 21579 }, { "epoch": 1.2084219957442044, "grad_norm": 1.3313535451889038, "learning_rate": 9.958684210526317e-05, "loss": 0.4336, "step": 21580 }, { "epoch": 1.2084779930563334, "grad_norm": 1.5140681266784668, "learning_rate": 9.958657894736843e-05, "loss": 0.4945, "step": 21581 }, { "epoch": 1.2085339903684624, "grad_norm": 1.1587097644805908, "learning_rate": 9.958631578947369e-05, "loss": 0.3736, "step": 21582 }, { "epoch": 1.2085899876805914, "grad_norm": 1.2712211608886719, "learning_rate": 9.958605263157895e-05, "loss": 0.3106, "step": 21583 }, { "epoch": 1.2086459849927205, "grad_norm": 1.2500139474868774, "learning_rate": 9.958578947368421e-05, "loss": 0.4155, "step": 21584 }, { "epoch": 1.2087019823048495, "grad_norm": 1.386947751045227, "learning_rate": 9.958552631578948e-05, "loss": 0.4118, "step": 21585 }, { "epoch": 1.2087579796169785, "grad_norm": 1.6933709383010864, "learning_rate": 9.958526315789474e-05, "loss": 0.4991, "step": 21586 }, { "epoch": 1.2088139769291075, "grad_norm": 1.3202474117279053, "learning_rate": 9.9585e-05, "loss": 0.4367, "step": 21587 }, { "epoch": 1.2088699742412365, "grad_norm": 1.3380085229873657, "learning_rate": 9.958473684210526e-05, "loss": 0.4499, "step": 21588 }, { "epoch": 1.2089259715533653, "grad_norm": 1.1275161504745483, "learning_rate": 9.958447368421054e-05, "loss": 0.3378, "step": 21589 }, { "epoch": 1.2089819688654944, "grad_norm": 1.6810859441757202, "learning_rate": 9.95842105263158e-05, "loss": 0.4943, "step": 21590 }, { "epoch": 1.2090379661776234, "grad_norm": 1.4383162260055542, "learning_rate": 9.958394736842106e-05, "loss": 0.541, "step": 21591 }, { "epoch": 1.2090939634897524, "grad_norm": 1.5488122701644897, "learning_rate": 9.958368421052632e-05, "loss": 0.661, "step": 21592 }, { "epoch": 1.2091499608018814, "grad_norm": 1.4834942817687988, "learning_rate": 9.958342105263159e-05, "loss": 0.4147, "step": 21593 }, { "epoch": 1.2092059581140104, "grad_norm": 1.313153624534607, "learning_rate": 9.958315789473685e-05, "loss": 0.3932, "step": 21594 }, { "epoch": 1.2092619554261395, "grad_norm": 1.1389399766921997, "learning_rate": 9.958289473684211e-05, "loss": 0.3893, "step": 21595 }, { "epoch": 1.2093179527382685, "grad_norm": 1.2442681789398193, "learning_rate": 9.958263157894737e-05, "loss": 0.4184, "step": 21596 }, { "epoch": 1.2093739500503975, "grad_norm": 1.3375062942504883, "learning_rate": 9.958236842105264e-05, "loss": 0.4146, "step": 21597 }, { "epoch": 1.2094299473625265, "grad_norm": 1.2993109226226807, "learning_rate": 9.95821052631579e-05, "loss": 0.4562, "step": 21598 }, { "epoch": 1.2094859446746555, "grad_norm": 1.506517767906189, "learning_rate": 9.958184210526317e-05, "loss": 0.5449, "step": 21599 }, { "epoch": 1.2095419419867846, "grad_norm": 1.4779539108276367, "learning_rate": 9.958157894736842e-05, "loss": 0.401, "step": 21600 }, { "epoch": 1.2095979392989136, "grad_norm": 1.145774483680725, "learning_rate": 9.958131578947368e-05, "loss": 0.446, "step": 21601 }, { "epoch": 1.2096539366110426, "grad_norm": 1.577304482460022, "learning_rate": 9.958105263157895e-05, "loss": 0.4408, "step": 21602 }, { "epoch": 1.2097099339231716, "grad_norm": 1.4342930316925049, "learning_rate": 9.958078947368421e-05, "loss": 0.4376, "step": 21603 }, { "epoch": 1.2097659312353006, "grad_norm": 1.370580792427063, "learning_rate": 9.958052631578949e-05, "loss": 0.3853, "step": 21604 }, { "epoch": 1.2098219285474296, "grad_norm": 1.2701548337936401, "learning_rate": 9.958026315789473e-05, "loss": 0.5224, "step": 21605 }, { "epoch": 1.2098779258595587, "grad_norm": 1.2481516599655151, "learning_rate": 9.958e-05, "loss": 0.3483, "step": 21606 }, { "epoch": 1.2099339231716877, "grad_norm": 1.2778574228286743, "learning_rate": 9.957973684210527e-05, "loss": 0.4317, "step": 21607 }, { "epoch": 1.2099899204838167, "grad_norm": 1.218406081199646, "learning_rate": 9.957947368421054e-05, "loss": 0.432, "step": 21608 }, { "epoch": 1.2100459177959457, "grad_norm": 1.25705885887146, "learning_rate": 9.95792105263158e-05, "loss": 0.4008, "step": 21609 }, { "epoch": 1.2101019151080747, "grad_norm": 1.8609923124313354, "learning_rate": 9.957894736842106e-05, "loss": 0.5195, "step": 21610 }, { "epoch": 1.2101579124202038, "grad_norm": 1.0980687141418457, "learning_rate": 9.957868421052632e-05, "loss": 0.3524, "step": 21611 }, { "epoch": 1.2102139097323328, "grad_norm": 1.4224040508270264, "learning_rate": 9.957842105263159e-05, "loss": 0.4607, "step": 21612 }, { "epoch": 1.2102699070444618, "grad_norm": 1.319006323814392, "learning_rate": 9.957815789473685e-05, "loss": 0.5207, "step": 21613 }, { "epoch": 1.2103259043565908, "grad_norm": 1.6345844268798828, "learning_rate": 9.957789473684211e-05, "loss": 0.5688, "step": 21614 }, { "epoch": 1.2103819016687198, "grad_norm": 1.1791088581085205, "learning_rate": 9.957763157894737e-05, "loss": 0.3316, "step": 21615 }, { "epoch": 1.2104378989808489, "grad_norm": 1.996338129043579, "learning_rate": 9.957736842105264e-05, "loss": 0.3288, "step": 21616 }, { "epoch": 1.2104938962929779, "grad_norm": 1.5343947410583496, "learning_rate": 9.95771052631579e-05, "loss": 0.3911, "step": 21617 }, { "epoch": 1.210549893605107, "grad_norm": 1.213384747505188, "learning_rate": 9.957684210526316e-05, "loss": 0.4806, "step": 21618 }, { "epoch": 1.210605890917236, "grad_norm": 1.6705191135406494, "learning_rate": 9.957657894736842e-05, "loss": 0.4419, "step": 21619 }, { "epoch": 1.210661888229365, "grad_norm": 1.6513222455978394, "learning_rate": 9.957631578947368e-05, "loss": 0.4492, "step": 21620 }, { "epoch": 1.210717885541494, "grad_norm": 1.4181777238845825, "learning_rate": 9.957605263157896e-05, "loss": 0.4674, "step": 21621 }, { "epoch": 1.210773882853623, "grad_norm": 4.974144458770752, "learning_rate": 9.957578947368422e-05, "loss": 0.5406, "step": 21622 }, { "epoch": 1.210829880165752, "grad_norm": 1.1779018640518188, "learning_rate": 9.957552631578948e-05, "loss": 0.4889, "step": 21623 }, { "epoch": 1.210885877477881, "grad_norm": 1.3417086601257324, "learning_rate": 9.957526315789473e-05, "loss": 0.3843, "step": 21624 }, { "epoch": 1.21094187479001, "grad_norm": 1.226450800895691, "learning_rate": 9.957500000000001e-05, "loss": 0.3566, "step": 21625 }, { "epoch": 1.210997872102139, "grad_norm": 1.1275731325149536, "learning_rate": 9.957473684210527e-05, "loss": 0.3272, "step": 21626 }, { "epoch": 1.211053869414268, "grad_norm": 1.4575562477111816, "learning_rate": 9.957447368421053e-05, "loss": 0.5366, "step": 21627 }, { "epoch": 1.211109866726397, "grad_norm": 1.3971645832061768, "learning_rate": 9.957421052631579e-05, "loss": 0.442, "step": 21628 }, { "epoch": 1.211165864038526, "grad_norm": 1.5425329208374023, "learning_rate": 9.957394736842106e-05, "loss": 0.5183, "step": 21629 }, { "epoch": 1.2112218613506551, "grad_norm": 1.0971078872680664, "learning_rate": 9.957368421052632e-05, "loss": 0.3486, "step": 21630 }, { "epoch": 1.2112778586627841, "grad_norm": 1.5646895170211792, "learning_rate": 9.95734210526316e-05, "loss": 0.4559, "step": 21631 }, { "epoch": 1.2113338559749132, "grad_norm": 1.5675113201141357, "learning_rate": 9.957315789473684e-05, "loss": 0.423, "step": 21632 }, { "epoch": 1.2113898532870422, "grad_norm": 1.320599913597107, "learning_rate": 9.957289473684211e-05, "loss": 0.3928, "step": 21633 }, { "epoch": 1.2114458505991712, "grad_norm": 1.4620963335037231, "learning_rate": 9.957263157894737e-05, "loss": 0.4002, "step": 21634 }, { "epoch": 1.2115018479113002, "grad_norm": 1.4255797863006592, "learning_rate": 9.957236842105263e-05, "loss": 0.4461, "step": 21635 }, { "epoch": 1.2115578452234292, "grad_norm": 1.2193968296051025, "learning_rate": 9.95721052631579e-05, "loss": 0.4334, "step": 21636 }, { "epoch": 1.2116138425355583, "grad_norm": 1.4193572998046875, "learning_rate": 9.957184210526315e-05, "loss": 0.5235, "step": 21637 }, { "epoch": 1.2116698398476873, "grad_norm": 1.3480080366134644, "learning_rate": 9.957157894736843e-05, "loss": 0.4982, "step": 21638 }, { "epoch": 1.2117258371598163, "grad_norm": 1.2971700429916382, "learning_rate": 9.957131578947368e-05, "loss": 0.5002, "step": 21639 }, { "epoch": 1.2117818344719453, "grad_norm": 4.556559085845947, "learning_rate": 9.957105263157896e-05, "loss": 0.4989, "step": 21640 }, { "epoch": 1.2118378317840743, "grad_norm": 1.9515577554702759, "learning_rate": 9.957078947368422e-05, "loss": 0.4755, "step": 21641 }, { "epoch": 1.2118938290962034, "grad_norm": 1.8058103322982788, "learning_rate": 9.957052631578948e-05, "loss": 0.4307, "step": 21642 }, { "epoch": 1.2119498264083324, "grad_norm": 1.3275461196899414, "learning_rate": 9.957026315789474e-05, "loss": 0.43, "step": 21643 }, { "epoch": 1.2120058237204614, "grad_norm": 3.464611530303955, "learning_rate": 9.957000000000001e-05, "loss": 0.4449, "step": 21644 }, { "epoch": 1.2120618210325904, "grad_norm": 1.3496110439300537, "learning_rate": 9.956973684210527e-05, "loss": 0.396, "step": 21645 }, { "epoch": 1.2121178183447194, "grad_norm": 1.2601943016052246, "learning_rate": 9.956947368421053e-05, "loss": 0.3952, "step": 21646 }, { "epoch": 1.2121738156568485, "grad_norm": 1.7452715635299683, "learning_rate": 9.956921052631579e-05, "loss": 0.3525, "step": 21647 }, { "epoch": 1.2122298129689775, "grad_norm": 1.5159343481063843, "learning_rate": 9.956894736842106e-05, "loss": 0.504, "step": 21648 }, { "epoch": 1.2122858102811065, "grad_norm": 1.2624573707580566, "learning_rate": 9.956868421052632e-05, "loss": 0.3595, "step": 21649 }, { "epoch": 1.2123418075932355, "grad_norm": 1.3922098875045776, "learning_rate": 9.956842105263158e-05, "loss": 0.2768, "step": 21650 }, { "epoch": 1.2123978049053645, "grad_norm": 1.1745513677597046, "learning_rate": 9.956815789473684e-05, "loss": 0.327, "step": 21651 }, { "epoch": 1.2124538022174935, "grad_norm": 1.3081787824630737, "learning_rate": 9.956789473684212e-05, "loss": 0.3648, "step": 21652 }, { "epoch": 1.2125097995296226, "grad_norm": 1.6047451496124268, "learning_rate": 9.956763157894738e-05, "loss": 0.3854, "step": 21653 }, { "epoch": 1.2125657968417516, "grad_norm": 1.5055924654006958, "learning_rate": 9.956736842105263e-05, "loss": 0.4896, "step": 21654 }, { "epoch": 1.2126217941538806, "grad_norm": 1.1641325950622559, "learning_rate": 9.95671052631579e-05, "loss": 0.3244, "step": 21655 }, { "epoch": 1.2126777914660096, "grad_norm": 1.4093161821365356, "learning_rate": 9.956684210526315e-05, "loss": 0.6209, "step": 21656 }, { "epoch": 1.2127337887781386, "grad_norm": 1.447231411933899, "learning_rate": 9.956657894736843e-05, "loss": 0.5382, "step": 21657 }, { "epoch": 1.2127897860902677, "grad_norm": 1.467686653137207, "learning_rate": 9.956631578947369e-05, "loss": 0.5004, "step": 21658 }, { "epoch": 1.2128457834023967, "grad_norm": 1.2182942628860474, "learning_rate": 9.956605263157896e-05, "loss": 0.3318, "step": 21659 }, { "epoch": 1.2129017807145257, "grad_norm": 1.369750738143921, "learning_rate": 9.956578947368421e-05, "loss": 0.4887, "step": 21660 }, { "epoch": 1.2129577780266547, "grad_norm": 1.306819200515747, "learning_rate": 9.956552631578948e-05, "loss": 0.4644, "step": 21661 }, { "epoch": 1.2130137753387837, "grad_norm": 1.2664053440093994, "learning_rate": 9.956526315789474e-05, "loss": 0.3991, "step": 21662 }, { "epoch": 1.2130697726509128, "grad_norm": 1.3251110315322876, "learning_rate": 9.956500000000001e-05, "loss": 0.4963, "step": 21663 }, { "epoch": 1.2131257699630418, "grad_norm": 1.359578013420105, "learning_rate": 9.956473684210527e-05, "loss": 0.4374, "step": 21664 }, { "epoch": 1.2131817672751708, "grad_norm": 1.6653258800506592, "learning_rate": 9.956447368421053e-05, "loss": 0.4607, "step": 21665 }, { "epoch": 1.2132377645872998, "grad_norm": 1.531223177909851, "learning_rate": 9.956421052631579e-05, "loss": 0.4853, "step": 21666 }, { "epoch": 1.2132937618994288, "grad_norm": 1.707360863685608, "learning_rate": 9.956394736842107e-05, "loss": 0.5206, "step": 21667 }, { "epoch": 1.2133497592115579, "grad_norm": 1.4159029722213745, "learning_rate": 9.956368421052633e-05, "loss": 0.4003, "step": 21668 }, { "epoch": 1.2134057565236869, "grad_norm": 1.3171870708465576, "learning_rate": 9.956342105263159e-05, "loss": 0.4966, "step": 21669 }, { "epoch": 1.213461753835816, "grad_norm": 1.4929555654525757, "learning_rate": 9.956315789473684e-05, "loss": 0.4954, "step": 21670 }, { "epoch": 1.213517751147945, "grad_norm": 1.3464287519454956, "learning_rate": 9.95628947368421e-05, "loss": 0.3948, "step": 21671 }, { "epoch": 1.213573748460074, "grad_norm": 1.375730037689209, "learning_rate": 9.956263157894738e-05, "loss": 0.4981, "step": 21672 }, { "epoch": 1.213629745772203, "grad_norm": 1.1861927509307861, "learning_rate": 9.956236842105264e-05, "loss": 0.3404, "step": 21673 }, { "epoch": 1.213685743084332, "grad_norm": 1.2284128665924072, "learning_rate": 9.95621052631579e-05, "loss": 0.4488, "step": 21674 }, { "epoch": 1.213741740396461, "grad_norm": 1.2582566738128662, "learning_rate": 9.956184210526316e-05, "loss": 0.3798, "step": 21675 }, { "epoch": 1.21379773770859, "grad_norm": 1.329533576965332, "learning_rate": 9.956157894736843e-05, "loss": 0.5515, "step": 21676 }, { "epoch": 1.213853735020719, "grad_norm": 1.1948121786117554, "learning_rate": 9.956131578947369e-05, "loss": 0.4594, "step": 21677 }, { "epoch": 1.213909732332848, "grad_norm": 1.0382745265960693, "learning_rate": 9.956105263157895e-05, "loss": 0.3061, "step": 21678 }, { "epoch": 1.213965729644977, "grad_norm": 1.280571699142456, "learning_rate": 9.956078947368421e-05, "loss": 0.5167, "step": 21679 }, { "epoch": 1.214021726957106, "grad_norm": 1.3734811544418335, "learning_rate": 9.956052631578948e-05, "loss": 0.5118, "step": 21680 }, { "epoch": 1.214077724269235, "grad_norm": 1.4955744743347168, "learning_rate": 9.956026315789474e-05, "loss": 0.3749, "step": 21681 }, { "epoch": 1.2141337215813641, "grad_norm": 1.4537034034729004, "learning_rate": 9.956e-05, "loss": 0.4924, "step": 21682 }, { "epoch": 1.2141897188934931, "grad_norm": 1.1773545742034912, "learning_rate": 9.955973684210526e-05, "loss": 0.3595, "step": 21683 }, { "epoch": 1.2142457162056222, "grad_norm": 1.5288422107696533, "learning_rate": 9.955947368421054e-05, "loss": 0.4485, "step": 21684 }, { "epoch": 1.2143017135177512, "grad_norm": 1.2528376579284668, "learning_rate": 9.95592105263158e-05, "loss": 0.4384, "step": 21685 }, { "epoch": 1.2143577108298802, "grad_norm": 1.4522837400436401, "learning_rate": 9.955894736842107e-05, "loss": 0.4189, "step": 21686 }, { "epoch": 1.2144137081420092, "grad_norm": 2.0988106727600098, "learning_rate": 9.955868421052631e-05, "loss": 0.5293, "step": 21687 }, { "epoch": 1.2144697054541382, "grad_norm": 1.4099358320236206, "learning_rate": 9.955842105263157e-05, "loss": 0.4423, "step": 21688 }, { "epoch": 1.2145257027662673, "grad_norm": 1.4312374591827393, "learning_rate": 9.955815789473685e-05, "loss": 0.5194, "step": 21689 }, { "epoch": 1.2145817000783963, "grad_norm": 1.713837742805481, "learning_rate": 9.955789473684211e-05, "loss": 0.3867, "step": 21690 }, { "epoch": 1.2146376973905253, "grad_norm": 1.3737242221832275, "learning_rate": 9.955763157894738e-05, "loss": 0.4443, "step": 21691 }, { "epoch": 1.2146936947026543, "grad_norm": 1.7588526010513306, "learning_rate": 9.955736842105263e-05, "loss": 0.7413, "step": 21692 }, { "epoch": 1.2147496920147833, "grad_norm": 1.221935749053955, "learning_rate": 9.95571052631579e-05, "loss": 0.4119, "step": 21693 }, { "epoch": 1.2148056893269124, "grad_norm": 1.4457955360412598, "learning_rate": 9.955684210526316e-05, "loss": 0.4465, "step": 21694 }, { "epoch": 1.2148616866390414, "grad_norm": 1.3118256330490112, "learning_rate": 9.955657894736843e-05, "loss": 0.4991, "step": 21695 }, { "epoch": 1.2149176839511704, "grad_norm": 1.287116527557373, "learning_rate": 9.955631578947369e-05, "loss": 0.5206, "step": 21696 }, { "epoch": 1.2149736812632994, "grad_norm": 1.5391192436218262, "learning_rate": 9.955605263157895e-05, "loss": 0.4731, "step": 21697 }, { "epoch": 1.2150296785754284, "grad_norm": 1.3973290920257568, "learning_rate": 9.955578947368421e-05, "loss": 0.4245, "step": 21698 }, { "epoch": 1.2150856758875574, "grad_norm": 1.5534621477127075, "learning_rate": 9.955552631578949e-05, "loss": 0.5042, "step": 21699 }, { "epoch": 1.2151416731996865, "grad_norm": 1.383452296257019, "learning_rate": 9.955526315789475e-05, "loss": 0.4167, "step": 21700 }, { "epoch": 1.2151976705118155, "grad_norm": 1.4800420999526978, "learning_rate": 9.9555e-05, "loss": 0.4413, "step": 21701 }, { "epoch": 1.2152536678239445, "grad_norm": 1.3420456647872925, "learning_rate": 9.955473684210526e-05, "loss": 0.5102, "step": 21702 }, { "epoch": 1.2153096651360735, "grad_norm": 1.287355661392212, "learning_rate": 9.955447368421054e-05, "loss": 0.3622, "step": 21703 }, { "epoch": 1.2153656624482025, "grad_norm": 1.442916750907898, "learning_rate": 9.95542105263158e-05, "loss": 0.443, "step": 21704 }, { "epoch": 1.2154216597603316, "grad_norm": 1.3073047399520874, "learning_rate": 9.955394736842106e-05, "loss": 0.4629, "step": 21705 }, { "epoch": 1.2154776570724606, "grad_norm": 1.3350088596343994, "learning_rate": 9.955368421052632e-05, "loss": 0.4702, "step": 21706 }, { "epoch": 1.2155336543845896, "grad_norm": 1.529811143875122, "learning_rate": 9.955342105263158e-05, "loss": 0.4841, "step": 21707 }, { "epoch": 1.2155896516967186, "grad_norm": 1.3264755010604858, "learning_rate": 9.955315789473685e-05, "loss": 0.4314, "step": 21708 }, { "epoch": 1.2156456490088476, "grad_norm": 1.4241228103637695, "learning_rate": 9.955289473684211e-05, "loss": 0.5245, "step": 21709 }, { "epoch": 1.2157016463209767, "grad_norm": 3.6906840801239014, "learning_rate": 9.955263157894737e-05, "loss": 0.4509, "step": 21710 }, { "epoch": 1.2157576436331057, "grad_norm": 1.7241957187652588, "learning_rate": 9.955236842105263e-05, "loss": 0.42, "step": 21711 }, { "epoch": 1.2158136409452347, "grad_norm": 1.3581733703613281, "learning_rate": 9.95521052631579e-05, "loss": 0.4015, "step": 21712 }, { "epoch": 1.2158696382573637, "grad_norm": 1.3444708585739136, "learning_rate": 9.955184210526316e-05, "loss": 0.5752, "step": 21713 }, { "epoch": 1.2159256355694927, "grad_norm": 1.5004737377166748, "learning_rate": 9.955157894736844e-05, "loss": 0.4007, "step": 21714 }, { "epoch": 1.2159816328816218, "grad_norm": 1.3935128450393677, "learning_rate": 9.955131578947368e-05, "loss": 0.4082, "step": 21715 }, { "epoch": 1.2160376301937508, "grad_norm": 1.5477619171142578, "learning_rate": 9.955105263157895e-05, "loss": 0.6332, "step": 21716 }, { "epoch": 1.2160936275058798, "grad_norm": 1.4518890380859375, "learning_rate": 9.955078947368421e-05, "loss": 0.3699, "step": 21717 }, { "epoch": 1.2161496248180088, "grad_norm": 1.3490943908691406, "learning_rate": 9.955052631578949e-05, "loss": 0.4337, "step": 21718 }, { "epoch": 1.2162056221301378, "grad_norm": 1.44427490234375, "learning_rate": 9.955026315789473e-05, "loss": 0.4376, "step": 21719 }, { "epoch": 1.2162616194422669, "grad_norm": 1.369771122932434, "learning_rate": 9.955000000000001e-05, "loss": 0.389, "step": 21720 }, { "epoch": 1.2163176167543959, "grad_norm": 1.5623319149017334, "learning_rate": 9.954973684210527e-05, "loss": 0.4553, "step": 21721 }, { "epoch": 1.2163736140665249, "grad_norm": 1.4721201658248901, "learning_rate": 9.954947368421053e-05, "loss": 0.5529, "step": 21722 }, { "epoch": 1.216429611378654, "grad_norm": 1.42538583278656, "learning_rate": 9.95492105263158e-05, "loss": 0.4611, "step": 21723 }, { "epoch": 1.216485608690783, "grad_norm": 1.4518930912017822, "learning_rate": 9.954894736842105e-05, "loss": 0.4414, "step": 21724 }, { "epoch": 1.216541606002912, "grad_norm": 1.5610616207122803, "learning_rate": 9.954868421052632e-05, "loss": 0.3666, "step": 21725 }, { "epoch": 1.216597603315041, "grad_norm": 1.145919680595398, "learning_rate": 9.954842105263158e-05, "loss": 0.444, "step": 21726 }, { "epoch": 1.21665360062717, "grad_norm": 1.3703927993774414, "learning_rate": 9.954815789473685e-05, "loss": 0.4152, "step": 21727 }, { "epoch": 1.216709597939299, "grad_norm": 1.3518468141555786, "learning_rate": 9.954789473684211e-05, "loss": 0.4267, "step": 21728 }, { "epoch": 1.216765595251428, "grad_norm": 1.1545908451080322, "learning_rate": 9.954763157894737e-05, "loss": 0.4011, "step": 21729 }, { "epoch": 1.216821592563557, "grad_norm": 1.2211787700653076, "learning_rate": 9.954736842105263e-05, "loss": 0.3923, "step": 21730 }, { "epoch": 1.216877589875686, "grad_norm": 1.3977153301239014, "learning_rate": 9.95471052631579e-05, "loss": 0.5393, "step": 21731 }, { "epoch": 1.216933587187815, "grad_norm": 1.1466578245162964, "learning_rate": 9.954684210526316e-05, "loss": 0.5073, "step": 21732 }, { "epoch": 1.216989584499944, "grad_norm": 1.2234030961990356, "learning_rate": 9.954657894736842e-05, "loss": 0.3789, "step": 21733 }, { "epoch": 1.217045581812073, "grad_norm": 1.222891926765442, "learning_rate": 9.954631578947368e-05, "loss": 0.4936, "step": 21734 }, { "epoch": 1.217101579124202, "grad_norm": 1.2851600646972656, "learning_rate": 9.954605263157896e-05, "loss": 0.5131, "step": 21735 }, { "epoch": 1.217157576436331, "grad_norm": 1.6744606494903564, "learning_rate": 9.954578947368422e-05, "loss": 0.5656, "step": 21736 }, { "epoch": 1.21721357374846, "grad_norm": 1.4397356510162354, "learning_rate": 9.954552631578948e-05, "loss": 0.4175, "step": 21737 }, { "epoch": 1.217269571060589, "grad_norm": 1.3127284049987793, "learning_rate": 9.954526315789474e-05, "loss": 0.6474, "step": 21738 }, { "epoch": 1.217325568372718, "grad_norm": 1.4042620658874512, "learning_rate": 9.9545e-05, "loss": 0.4045, "step": 21739 }, { "epoch": 1.217381565684847, "grad_norm": 1.4031119346618652, "learning_rate": 9.954473684210527e-05, "loss": 0.4204, "step": 21740 }, { "epoch": 1.217437562996976, "grad_norm": 1.441322684288025, "learning_rate": 9.954447368421053e-05, "loss": 0.3801, "step": 21741 }, { "epoch": 1.217493560309105, "grad_norm": 1.4328351020812988, "learning_rate": 9.954421052631579e-05, "loss": 0.4234, "step": 21742 }, { "epoch": 1.217549557621234, "grad_norm": 1.3555734157562256, "learning_rate": 9.954394736842105e-05, "loss": 0.418, "step": 21743 }, { "epoch": 1.217605554933363, "grad_norm": 1.1974542140960693, "learning_rate": 9.954368421052632e-05, "loss": 0.4094, "step": 21744 }, { "epoch": 1.217661552245492, "grad_norm": 1.3164422512054443, "learning_rate": 9.954342105263158e-05, "loss": 0.5754, "step": 21745 }, { "epoch": 1.2177175495576211, "grad_norm": 1.6550158262252808, "learning_rate": 9.954315789473686e-05, "loss": 0.4881, "step": 21746 }, { "epoch": 1.2177735468697501, "grad_norm": 1.514971375465393, "learning_rate": 9.95428947368421e-05, "loss": 0.6, "step": 21747 }, { "epoch": 1.2178295441818792, "grad_norm": 1.312474012374878, "learning_rate": 9.954263157894737e-05, "loss": 0.3611, "step": 21748 }, { "epoch": 1.2178855414940082, "grad_norm": 1.4859702587127686, "learning_rate": 9.954236842105263e-05, "loss": 0.4517, "step": 21749 }, { "epoch": 1.2179415388061372, "grad_norm": 1.599524736404419, "learning_rate": 9.954210526315791e-05, "loss": 0.486, "step": 21750 }, { "epoch": 1.2179975361182662, "grad_norm": 1.16690993309021, "learning_rate": 9.954184210526317e-05, "loss": 0.33, "step": 21751 }, { "epoch": 1.2180535334303952, "grad_norm": 1.520194172859192, "learning_rate": 9.954157894736843e-05, "loss": 0.4749, "step": 21752 }, { "epoch": 1.2181095307425243, "grad_norm": 1.2532674074172974, "learning_rate": 9.954131578947369e-05, "loss": 0.375, "step": 21753 }, { "epoch": 1.2181655280546533, "grad_norm": 1.128656029701233, "learning_rate": 9.954105263157896e-05, "loss": 0.359, "step": 21754 }, { "epoch": 1.2182215253667823, "grad_norm": 1.3241829872131348, "learning_rate": 9.954078947368422e-05, "loss": 0.4322, "step": 21755 }, { "epoch": 1.2182775226789113, "grad_norm": 1.3982354402542114, "learning_rate": 9.954052631578948e-05, "loss": 0.6009, "step": 21756 }, { "epoch": 1.2183335199910403, "grad_norm": 1.492138147354126, "learning_rate": 9.954026315789474e-05, "loss": 0.4982, "step": 21757 }, { "epoch": 1.2183895173031694, "grad_norm": 1.5098954439163208, "learning_rate": 9.954e-05, "loss": 0.5411, "step": 21758 }, { "epoch": 1.2184455146152984, "grad_norm": 1.4105461835861206, "learning_rate": 9.953973684210527e-05, "loss": 0.4803, "step": 21759 }, { "epoch": 1.2185015119274274, "grad_norm": 1.2827019691467285, "learning_rate": 9.953947368421053e-05, "loss": 0.416, "step": 21760 }, { "epoch": 1.2185575092395564, "grad_norm": 1.4231181144714355, "learning_rate": 9.953921052631579e-05, "loss": 0.4757, "step": 21761 }, { "epoch": 1.2186135065516854, "grad_norm": 1.319778323173523, "learning_rate": 9.953894736842105e-05, "loss": 0.4043, "step": 21762 }, { "epoch": 1.2186695038638145, "grad_norm": 1.1663310527801514, "learning_rate": 9.953868421052632e-05, "loss": 0.3814, "step": 21763 }, { "epoch": 1.2187255011759435, "grad_norm": 1.4952927827835083, "learning_rate": 9.953842105263158e-05, "loss": 0.4432, "step": 21764 }, { "epoch": 1.2187814984880725, "grad_norm": 1.2660924196243286, "learning_rate": 9.953815789473684e-05, "loss": 0.6356, "step": 21765 }, { "epoch": 1.2188374958002015, "grad_norm": 1.4471004009246826, "learning_rate": 9.95378947368421e-05, "loss": 0.4191, "step": 21766 }, { "epoch": 1.2188934931123305, "grad_norm": 1.4613450765609741, "learning_rate": 9.953763157894738e-05, "loss": 0.487, "step": 21767 }, { "epoch": 1.2189494904244595, "grad_norm": 2.181941032409668, "learning_rate": 9.953736842105264e-05, "loss": 0.3376, "step": 21768 }, { "epoch": 1.2190054877365886, "grad_norm": 1.267209529876709, "learning_rate": 9.953710526315791e-05, "loss": 0.3515, "step": 21769 }, { "epoch": 1.2190614850487176, "grad_norm": 1.8140164613723755, "learning_rate": 9.953684210526316e-05, "loss": 0.5538, "step": 21770 }, { "epoch": 1.2191174823608466, "grad_norm": 1.7960492372512817, "learning_rate": 9.953657894736843e-05, "loss": 0.5585, "step": 21771 }, { "epoch": 1.2191734796729756, "grad_norm": 1.3258659839630127, "learning_rate": 9.953631578947369e-05, "loss": 0.4138, "step": 21772 }, { "epoch": 1.2192294769851046, "grad_norm": 1.4085932970046997, "learning_rate": 9.953605263157896e-05, "loss": 0.5124, "step": 21773 }, { "epoch": 1.2192854742972337, "grad_norm": 1.3527405261993408, "learning_rate": 9.953578947368421e-05, "loss": 0.3983, "step": 21774 }, { "epoch": 1.2193414716093627, "grad_norm": 1.5936415195465088, "learning_rate": 9.953552631578947e-05, "loss": 0.4506, "step": 21775 }, { "epoch": 1.2193974689214917, "grad_norm": 1.4233524799346924, "learning_rate": 9.953526315789474e-05, "loss": 0.5028, "step": 21776 }, { "epoch": 1.2194534662336207, "grad_norm": 1.5624884366989136, "learning_rate": 9.9535e-05, "loss": 0.5677, "step": 21777 }, { "epoch": 1.2195094635457497, "grad_norm": 1.445286750793457, "learning_rate": 9.953473684210527e-05, "loss": 0.4837, "step": 21778 }, { "epoch": 1.2195654608578788, "grad_norm": 1.3730077743530273, "learning_rate": 9.953447368421052e-05, "loss": 0.4522, "step": 21779 }, { "epoch": 1.2196214581700078, "grad_norm": 1.4525657892227173, "learning_rate": 9.95342105263158e-05, "loss": 0.5774, "step": 21780 }, { "epoch": 1.2196774554821368, "grad_norm": 1.1443114280700684, "learning_rate": 9.953394736842105e-05, "loss": 0.3527, "step": 21781 }, { "epoch": 1.2197334527942658, "grad_norm": 1.6584999561309814, "learning_rate": 9.953368421052633e-05, "loss": 0.3506, "step": 21782 }, { "epoch": 1.2197894501063948, "grad_norm": 1.3194817304611206, "learning_rate": 9.953342105263159e-05, "loss": 0.3406, "step": 21783 }, { "epoch": 1.2198454474185239, "grad_norm": 1.5065053701400757, "learning_rate": 9.953315789473685e-05, "loss": 0.4697, "step": 21784 }, { "epoch": 1.2199014447306529, "grad_norm": 1.257745623588562, "learning_rate": 9.95328947368421e-05, "loss": 0.3553, "step": 21785 }, { "epoch": 1.219957442042782, "grad_norm": 1.4275087118148804, "learning_rate": 9.953263157894738e-05, "loss": 0.5639, "step": 21786 }, { "epoch": 1.220013439354911, "grad_norm": 1.3018772602081299, "learning_rate": 9.953236842105264e-05, "loss": 0.5343, "step": 21787 }, { "epoch": 1.22006943666704, "grad_norm": 1.5407583713531494, "learning_rate": 9.95321052631579e-05, "loss": 0.5871, "step": 21788 }, { "epoch": 1.220125433979169, "grad_norm": 1.4225414991378784, "learning_rate": 9.953184210526316e-05, "loss": 0.4552, "step": 21789 }, { "epoch": 1.220181431291298, "grad_norm": 1.134092926979065, "learning_rate": 9.953157894736843e-05, "loss": 0.4514, "step": 21790 }, { "epoch": 1.220237428603427, "grad_norm": 1.4558898210525513, "learning_rate": 9.953131578947369e-05, "loss": 0.4356, "step": 21791 }, { "epoch": 1.220293425915556, "grad_norm": 1.0426757335662842, "learning_rate": 9.953105263157895e-05, "loss": 0.4332, "step": 21792 }, { "epoch": 1.220349423227685, "grad_norm": 1.164726972579956, "learning_rate": 9.953078947368421e-05, "loss": 0.3746, "step": 21793 }, { "epoch": 1.220405420539814, "grad_norm": 1.221907138824463, "learning_rate": 9.953052631578947e-05, "loss": 0.5337, "step": 21794 }, { "epoch": 1.220461417851943, "grad_norm": 1.24656343460083, "learning_rate": 9.953026315789474e-05, "loss": 0.3993, "step": 21795 }, { "epoch": 1.220517415164072, "grad_norm": 1.378005862236023, "learning_rate": 9.953e-05, "loss": 0.4511, "step": 21796 }, { "epoch": 1.220573412476201, "grad_norm": 1.2552727460861206, "learning_rate": 9.952973684210526e-05, "loss": 0.3287, "step": 21797 }, { "epoch": 1.2206294097883301, "grad_norm": 1.2627837657928467, "learning_rate": 9.952947368421052e-05, "loss": 0.4697, "step": 21798 }, { "epoch": 1.2206854071004591, "grad_norm": 1.59735906124115, "learning_rate": 9.95292105263158e-05, "loss": 0.6023, "step": 21799 }, { "epoch": 1.2207414044125882, "grad_norm": 1.2051788568496704, "learning_rate": 9.952894736842106e-05, "loss": 0.4292, "step": 21800 }, { "epoch": 1.2207974017247172, "grad_norm": 1.4525799751281738, "learning_rate": 9.952868421052633e-05, "loss": 0.4569, "step": 21801 }, { "epoch": 1.2208533990368462, "grad_norm": 1.2709426879882812, "learning_rate": 9.952842105263158e-05, "loss": 0.34, "step": 21802 }, { "epoch": 1.2209093963489752, "grad_norm": 1.4722206592559814, "learning_rate": 9.952815789473685e-05, "loss": 0.4586, "step": 21803 }, { "epoch": 1.2209653936611042, "grad_norm": 1.3889596462249756, "learning_rate": 9.952789473684211e-05, "loss": 0.4202, "step": 21804 }, { "epoch": 1.2210213909732333, "grad_norm": 1.5910038948059082, "learning_rate": 9.952763157894738e-05, "loss": 0.4474, "step": 21805 }, { "epoch": 1.2210773882853623, "grad_norm": 1.3301708698272705, "learning_rate": 9.952736842105264e-05, "loss": 0.467, "step": 21806 }, { "epoch": 1.2211333855974913, "grad_norm": 1.9921144247055054, "learning_rate": 9.95271052631579e-05, "loss": 0.3968, "step": 21807 }, { "epoch": 1.2211893829096203, "grad_norm": 1.4369726181030273, "learning_rate": 9.952684210526316e-05, "loss": 0.4611, "step": 21808 }, { "epoch": 1.2212453802217493, "grad_norm": 1.7992271184921265, "learning_rate": 9.952657894736842e-05, "loss": 0.5521, "step": 21809 }, { "epoch": 1.2213013775338784, "grad_norm": 1.1561758518218994, "learning_rate": 9.95263157894737e-05, "loss": 0.3971, "step": 21810 }, { "epoch": 1.2213573748460074, "grad_norm": 1.366234540939331, "learning_rate": 9.952605263157895e-05, "loss": 0.5476, "step": 21811 }, { "epoch": 1.2214133721581364, "grad_norm": 1.1349620819091797, "learning_rate": 9.952578947368421e-05, "loss": 0.4166, "step": 21812 }, { "epoch": 1.2214693694702654, "grad_norm": 1.4356046915054321, "learning_rate": 9.952552631578947e-05, "loss": 0.4494, "step": 21813 }, { "epoch": 1.2215253667823944, "grad_norm": 1.5290894508361816, "learning_rate": 9.952526315789475e-05, "loss": 0.4659, "step": 21814 }, { "epoch": 1.2215813640945234, "grad_norm": 2.1164944171905518, "learning_rate": 9.952500000000001e-05, "loss": 0.4189, "step": 21815 }, { "epoch": 1.2216373614066525, "grad_norm": 1.3442543745040894, "learning_rate": 9.952473684210527e-05, "loss": 0.4612, "step": 21816 }, { "epoch": 1.2216933587187815, "grad_norm": 1.2771326303482056, "learning_rate": 9.952447368421053e-05, "loss": 0.4856, "step": 21817 }, { "epoch": 1.2217493560309105, "grad_norm": 1.2080665826797485, "learning_rate": 9.95242105263158e-05, "loss": 0.32, "step": 21818 }, { "epoch": 1.2218053533430395, "grad_norm": 1.4783048629760742, "learning_rate": 9.952394736842106e-05, "loss": 0.4008, "step": 21819 }, { "epoch": 1.2218613506551685, "grad_norm": 1.331005573272705, "learning_rate": 9.952368421052632e-05, "loss": 0.4148, "step": 21820 }, { "epoch": 1.2219173479672976, "grad_norm": 1.3131736516952515, "learning_rate": 9.952342105263158e-05, "loss": 0.5158, "step": 21821 }, { "epoch": 1.2219733452794266, "grad_norm": 1.3083288669586182, "learning_rate": 9.952315789473685e-05, "loss": 0.4283, "step": 21822 }, { "epoch": 1.2220293425915556, "grad_norm": 1.2380086183547974, "learning_rate": 9.952289473684211e-05, "loss": 0.505, "step": 21823 }, { "epoch": 1.2220853399036846, "grad_norm": 1.2146620750427246, "learning_rate": 9.952263157894739e-05, "loss": 0.4575, "step": 21824 }, { "epoch": 1.2221413372158136, "grad_norm": 1.1539934873580933, "learning_rate": 9.952236842105263e-05, "loss": 0.4541, "step": 21825 }, { "epoch": 1.2221973345279427, "grad_norm": 1.4234150648117065, "learning_rate": 9.952210526315789e-05, "loss": 0.3875, "step": 21826 }, { "epoch": 1.2222533318400717, "grad_norm": 1.3258788585662842, "learning_rate": 9.952184210526316e-05, "loss": 0.3533, "step": 21827 }, { "epoch": 1.2223093291522007, "grad_norm": 1.063740611076355, "learning_rate": 9.952157894736842e-05, "loss": 0.3701, "step": 21828 }, { "epoch": 1.2223653264643297, "grad_norm": 1.3509206771850586, "learning_rate": 9.952131578947368e-05, "loss": 0.5831, "step": 21829 }, { "epoch": 1.2224213237764587, "grad_norm": 1.35112464427948, "learning_rate": 9.952105263157894e-05, "loss": 0.3951, "step": 21830 }, { "epoch": 1.2224773210885878, "grad_norm": 1.0923048257827759, "learning_rate": 9.952078947368422e-05, "loss": 0.2773, "step": 21831 }, { "epoch": 1.2225333184007168, "grad_norm": 1.3539552688598633, "learning_rate": 9.952052631578948e-05, "loss": 0.4607, "step": 21832 }, { "epoch": 1.2225893157128458, "grad_norm": 2.1112303733825684, "learning_rate": 9.952026315789475e-05, "loss": 0.4197, "step": 21833 }, { "epoch": 1.2226453130249748, "grad_norm": 1.36155366897583, "learning_rate": 9.952e-05, "loss": 0.4435, "step": 21834 }, { "epoch": 1.2227013103371038, "grad_norm": 1.6987247467041016, "learning_rate": 9.951973684210527e-05, "loss": 0.6736, "step": 21835 }, { "epoch": 1.2227573076492328, "grad_norm": 1.2438112497329712, "learning_rate": 9.951947368421053e-05, "loss": 0.3778, "step": 21836 }, { "epoch": 1.2228133049613619, "grad_norm": 1.5742439031600952, "learning_rate": 9.95192105263158e-05, "loss": 0.4305, "step": 21837 }, { "epoch": 1.2228693022734909, "grad_norm": 1.335503101348877, "learning_rate": 9.951894736842106e-05, "loss": 0.4913, "step": 21838 }, { "epoch": 1.22292529958562, "grad_norm": 1.3608275651931763, "learning_rate": 9.951868421052632e-05, "loss": 0.3757, "step": 21839 }, { "epoch": 1.222981296897749, "grad_norm": 1.3223425149917603, "learning_rate": 9.951842105263158e-05, "loss": 0.3729, "step": 21840 }, { "epoch": 1.223037294209878, "grad_norm": 1.2835404872894287, "learning_rate": 9.951815789473685e-05, "loss": 0.3632, "step": 21841 }, { "epoch": 1.223093291522007, "grad_norm": 1.4302679300308228, "learning_rate": 9.951789473684211e-05, "loss": 0.496, "step": 21842 }, { "epoch": 1.223149288834136, "grad_norm": 2.2417383193969727, "learning_rate": 9.951763157894737e-05, "loss": 0.4393, "step": 21843 }, { "epoch": 1.223205286146265, "grad_norm": 1.3884906768798828, "learning_rate": 9.951736842105263e-05, "loss": 0.5005, "step": 21844 }, { "epoch": 1.223261283458394, "grad_norm": 1.4113117456436157, "learning_rate": 9.95171052631579e-05, "loss": 0.4553, "step": 21845 }, { "epoch": 1.223317280770523, "grad_norm": 1.2487066984176636, "learning_rate": 9.951684210526317e-05, "loss": 0.3879, "step": 21846 }, { "epoch": 1.223373278082652, "grad_norm": 1.2708920240402222, "learning_rate": 9.951657894736843e-05, "loss": 0.4038, "step": 21847 }, { "epoch": 1.223429275394781, "grad_norm": 1.1820656061172485, "learning_rate": 9.951631578947369e-05, "loss": 0.6154, "step": 21848 }, { "epoch": 1.22348527270691, "grad_norm": 1.363383173942566, "learning_rate": 9.951605263157895e-05, "loss": 0.4523, "step": 21849 }, { "epoch": 1.2235412700190391, "grad_norm": 1.3615005016326904, "learning_rate": 9.951578947368422e-05, "loss": 0.4351, "step": 21850 }, { "epoch": 1.2235972673311681, "grad_norm": 1.5357478857040405, "learning_rate": 9.951552631578948e-05, "loss": 0.3839, "step": 21851 }, { "epoch": 1.2236532646432972, "grad_norm": 1.2987799644470215, "learning_rate": 9.951526315789474e-05, "loss": 0.4115, "step": 21852 }, { "epoch": 1.2237092619554262, "grad_norm": 1.168209195137024, "learning_rate": 9.9515e-05, "loss": 0.3717, "step": 21853 }, { "epoch": 1.2237652592675552, "grad_norm": 2.50730562210083, "learning_rate": 9.951473684210527e-05, "loss": 0.4838, "step": 21854 }, { "epoch": 1.2238212565796842, "grad_norm": 1.7884670495986938, "learning_rate": 9.951447368421053e-05, "loss": 0.5113, "step": 21855 }, { "epoch": 1.2238772538918132, "grad_norm": 1.4803868532180786, "learning_rate": 9.95142105263158e-05, "loss": 0.4637, "step": 21856 }, { "epoch": 1.2239332512039423, "grad_norm": 1.1885650157928467, "learning_rate": 9.951394736842105e-05, "loss": 0.4261, "step": 21857 }, { "epoch": 1.2239892485160713, "grad_norm": 1.30832839012146, "learning_rate": 9.951368421052632e-05, "loss": 0.4534, "step": 21858 }, { "epoch": 1.2240452458282003, "grad_norm": 1.4795804023742676, "learning_rate": 9.951342105263158e-05, "loss": 0.4565, "step": 21859 }, { "epoch": 1.2241012431403293, "grad_norm": 1.636049509048462, "learning_rate": 9.951315789473684e-05, "loss": 0.5965, "step": 21860 }, { "epoch": 1.2241572404524583, "grad_norm": 1.4008960723876953, "learning_rate": 9.951289473684212e-05, "loss": 0.4041, "step": 21861 }, { "epoch": 1.2242132377645873, "grad_norm": 1.2367534637451172, "learning_rate": 9.951263157894736e-05, "loss": 0.4344, "step": 21862 }, { "epoch": 1.2242692350767164, "grad_norm": 1.3793079853057861, "learning_rate": 9.951236842105264e-05, "loss": 0.367, "step": 21863 }, { "epoch": 1.2243252323888454, "grad_norm": 1.6336742639541626, "learning_rate": 9.95121052631579e-05, "loss": 0.4368, "step": 21864 }, { "epoch": 1.2243812297009744, "grad_norm": 1.6996160745620728, "learning_rate": 9.951184210526317e-05, "loss": 0.4245, "step": 21865 }, { "epoch": 1.2244372270131034, "grad_norm": 1.3252843618392944, "learning_rate": 9.951157894736843e-05, "loss": 0.3982, "step": 21866 }, { "epoch": 1.2244932243252324, "grad_norm": 1.583376407623291, "learning_rate": 9.951131578947369e-05, "loss": 0.4504, "step": 21867 }, { "epoch": 1.2245492216373615, "grad_norm": 1.4512195587158203, "learning_rate": 9.951105263157895e-05, "loss": 0.481, "step": 21868 }, { "epoch": 1.2246052189494905, "grad_norm": 1.6560012102127075, "learning_rate": 9.951078947368422e-05, "loss": 0.4535, "step": 21869 }, { "epoch": 1.2246612162616195, "grad_norm": 3.5533268451690674, "learning_rate": 9.951052631578948e-05, "loss": 0.4402, "step": 21870 }, { "epoch": 1.2247172135737485, "grad_norm": 1.4186865091323853, "learning_rate": 9.951026315789474e-05, "loss": 0.5834, "step": 21871 }, { "epoch": 1.2247732108858775, "grad_norm": 1.329943299293518, "learning_rate": 9.951e-05, "loss": 0.4864, "step": 21872 }, { "epoch": 1.2248292081980066, "grad_norm": 1.423318862915039, "learning_rate": 9.950973684210527e-05, "loss": 0.4489, "step": 21873 }, { "epoch": 1.2248852055101356, "grad_norm": 1.3300644159317017, "learning_rate": 9.950947368421053e-05, "loss": 0.4682, "step": 21874 }, { "epoch": 1.2249412028222646, "grad_norm": 1.6161000728607178, "learning_rate": 9.95092105263158e-05, "loss": 0.4761, "step": 21875 }, { "epoch": 1.2249972001343936, "grad_norm": 1.3090307712554932, "learning_rate": 9.950894736842105e-05, "loss": 0.3507, "step": 21876 }, { "epoch": 1.2250531974465226, "grad_norm": 1.590484380722046, "learning_rate": 9.950868421052631e-05, "loss": 0.5039, "step": 21877 }, { "epoch": 1.2251091947586517, "grad_norm": 1.5652788877487183, "learning_rate": 9.950842105263159e-05, "loss": 0.409, "step": 21878 }, { "epoch": 1.2251651920707807, "grad_norm": 1.3590055704116821, "learning_rate": 9.950815789473685e-05, "loss": 0.4055, "step": 21879 }, { "epoch": 1.2252211893829097, "grad_norm": 7.608685493469238, "learning_rate": 9.95078947368421e-05, "loss": 0.3679, "step": 21880 }, { "epoch": 1.2252771866950387, "grad_norm": 1.1881346702575684, "learning_rate": 9.950763157894737e-05, "loss": 0.4288, "step": 21881 }, { "epoch": 1.2253331840071677, "grad_norm": 1.3057074546813965, "learning_rate": 9.950736842105264e-05, "loss": 0.519, "step": 21882 }, { "epoch": 1.2253891813192967, "grad_norm": 1.5817879438400269, "learning_rate": 9.95071052631579e-05, "loss": 0.6551, "step": 21883 }, { "epoch": 1.2254451786314258, "grad_norm": 1.1580766439437866, "learning_rate": 9.950684210526316e-05, "loss": 0.4142, "step": 21884 }, { "epoch": 1.2255011759435548, "grad_norm": 1.2933225631713867, "learning_rate": 9.950657894736842e-05, "loss": 0.3733, "step": 21885 }, { "epoch": 1.2255571732556838, "grad_norm": 1.273256540298462, "learning_rate": 9.950631578947369e-05, "loss": 0.346, "step": 21886 }, { "epoch": 1.2256131705678128, "grad_norm": 1.346725583076477, "learning_rate": 9.950605263157895e-05, "loss": 0.4856, "step": 21887 }, { "epoch": 1.2256691678799418, "grad_norm": 1.4436297416687012, "learning_rate": 9.950578947368422e-05, "loss": 0.3798, "step": 21888 }, { "epoch": 1.2257251651920709, "grad_norm": 1.3356685638427734, "learning_rate": 9.950552631578947e-05, "loss": 0.4504, "step": 21889 }, { "epoch": 1.2257811625041999, "grad_norm": 1.2018934488296509, "learning_rate": 9.950526315789474e-05, "loss": 0.5068, "step": 21890 }, { "epoch": 1.225837159816329, "grad_norm": 1.6328685283660889, "learning_rate": 9.9505e-05, "loss": 0.4376, "step": 21891 }, { "epoch": 1.225893157128458, "grad_norm": 1.0057116746902466, "learning_rate": 9.950473684210528e-05, "loss": 0.3189, "step": 21892 }, { "epoch": 1.225949154440587, "grad_norm": 1.5768277645111084, "learning_rate": 9.950447368421054e-05, "loss": 0.4603, "step": 21893 }, { "epoch": 1.226005151752716, "grad_norm": 1.7225438356399536, "learning_rate": 9.95042105263158e-05, "loss": 0.4968, "step": 21894 }, { "epoch": 1.226061149064845, "grad_norm": 1.4130609035491943, "learning_rate": 9.950394736842106e-05, "loss": 0.4008, "step": 21895 }, { "epoch": 1.226117146376974, "grad_norm": 1.2787760496139526, "learning_rate": 9.950368421052632e-05, "loss": 0.4211, "step": 21896 }, { "epoch": 1.226173143689103, "grad_norm": 1.7041151523590088, "learning_rate": 9.950342105263159e-05, "loss": 0.4104, "step": 21897 }, { "epoch": 1.226229141001232, "grad_norm": 1.1386144161224365, "learning_rate": 9.950315789473685e-05, "loss": 0.3393, "step": 21898 }, { "epoch": 1.226285138313361, "grad_norm": 1.2944810390472412, "learning_rate": 9.950289473684211e-05, "loss": 0.3709, "step": 21899 }, { "epoch": 1.22634113562549, "grad_norm": 1.467772126197815, "learning_rate": 9.950263157894737e-05, "loss": 0.5673, "step": 21900 }, { "epoch": 1.226397132937619, "grad_norm": 1.4492416381835938, "learning_rate": 9.950236842105264e-05, "loss": 0.5022, "step": 21901 }, { "epoch": 1.2264531302497481, "grad_norm": 1.3122985363006592, "learning_rate": 9.95021052631579e-05, "loss": 0.38, "step": 21902 }, { "epoch": 1.2265091275618771, "grad_norm": 1.4372916221618652, "learning_rate": 9.950184210526316e-05, "loss": 0.4658, "step": 21903 }, { "epoch": 1.2265651248740062, "grad_norm": 1.2543593645095825, "learning_rate": 9.950157894736842e-05, "loss": 0.4077, "step": 21904 }, { "epoch": 1.2266211221861352, "grad_norm": 1.4239617586135864, "learning_rate": 9.95013157894737e-05, "loss": 0.4194, "step": 21905 }, { "epoch": 1.2266771194982642, "grad_norm": 1.496111273765564, "learning_rate": 9.950105263157895e-05, "loss": 0.3906, "step": 21906 }, { "epoch": 1.2267331168103932, "grad_norm": 1.2875492572784424, "learning_rate": 9.950078947368421e-05, "loss": 0.4025, "step": 21907 }, { "epoch": 1.2267891141225222, "grad_norm": 1.4934632778167725, "learning_rate": 9.950052631578947e-05, "loss": 0.4507, "step": 21908 }, { "epoch": 1.2268451114346512, "grad_norm": 1.660032033920288, "learning_rate": 9.950026315789475e-05, "loss": 0.5312, "step": 21909 }, { "epoch": 1.2269011087467803, "grad_norm": 1.4773995876312256, "learning_rate": 9.95e-05, "loss": 0.498, "step": 21910 }, { "epoch": 1.2269571060589093, "grad_norm": 1.4536614418029785, "learning_rate": 9.949973684210528e-05, "loss": 0.4295, "step": 21911 }, { "epoch": 1.2270131033710383, "grad_norm": 1.2980177402496338, "learning_rate": 9.949947368421053e-05, "loss": 0.3971, "step": 21912 }, { "epoch": 1.2270691006831673, "grad_norm": 1.3747931718826294, "learning_rate": 9.949921052631579e-05, "loss": 0.458, "step": 21913 }, { "epoch": 1.2271250979952963, "grad_norm": 1.1824984550476074, "learning_rate": 9.949894736842106e-05, "loss": 0.4842, "step": 21914 }, { "epoch": 1.2271810953074254, "grad_norm": 1.315635323524475, "learning_rate": 9.949868421052632e-05, "loss": 0.4605, "step": 21915 }, { "epoch": 1.2272370926195544, "grad_norm": 1.3485517501831055, "learning_rate": 9.949842105263159e-05, "loss": 0.4314, "step": 21916 }, { "epoch": 1.2272930899316834, "grad_norm": 1.3250218629837036, "learning_rate": 9.949815789473684e-05, "loss": 0.3913, "step": 21917 }, { "epoch": 1.2273490872438124, "grad_norm": 1.4313737154006958, "learning_rate": 9.949789473684211e-05, "loss": 0.4219, "step": 21918 }, { "epoch": 1.2274050845559414, "grad_norm": 1.1929755210876465, "learning_rate": 9.949763157894737e-05, "loss": 0.4118, "step": 21919 }, { "epoch": 1.2274610818680702, "grad_norm": 1.24820876121521, "learning_rate": 9.949736842105264e-05, "loss": 0.363, "step": 21920 }, { "epoch": 1.2275170791801993, "grad_norm": 1.482637643814087, "learning_rate": 9.949710526315789e-05, "loss": 0.413, "step": 21921 }, { "epoch": 1.2275730764923283, "grad_norm": 1.4789204597473145, "learning_rate": 9.949684210526316e-05, "loss": 0.4624, "step": 21922 }, { "epoch": 1.2276290738044573, "grad_norm": 1.3046389818191528, "learning_rate": 9.949657894736842e-05, "loss": 0.385, "step": 21923 }, { "epoch": 1.2276850711165863, "grad_norm": 1.3254011869430542, "learning_rate": 9.94963157894737e-05, "loss": 0.4519, "step": 21924 }, { "epoch": 1.2277410684287153, "grad_norm": 1.443664789199829, "learning_rate": 9.949605263157896e-05, "loss": 0.5582, "step": 21925 }, { "epoch": 1.2277970657408444, "grad_norm": 1.4166078567504883, "learning_rate": 9.949578947368422e-05, "loss": 0.5071, "step": 21926 }, { "epoch": 1.2278530630529734, "grad_norm": 1.342435598373413, "learning_rate": 9.949552631578948e-05, "loss": 0.4589, "step": 21927 }, { "epoch": 1.2279090603651024, "grad_norm": 1.318955898284912, "learning_rate": 9.949526315789475e-05, "loss": 0.3732, "step": 21928 }, { "epoch": 1.2279650576772314, "grad_norm": 1.3455950021743774, "learning_rate": 9.949500000000001e-05, "loss": 0.3828, "step": 21929 }, { "epoch": 1.2280210549893604, "grad_norm": 1.5169085264205933, "learning_rate": 9.949473684210527e-05, "loss": 0.3917, "step": 21930 }, { "epoch": 1.2280770523014894, "grad_norm": 1.2915533781051636, "learning_rate": 9.949447368421053e-05, "loss": 0.396, "step": 21931 }, { "epoch": 1.2281330496136185, "grad_norm": 1.4210155010223389, "learning_rate": 9.949421052631579e-05, "loss": 0.5223, "step": 21932 }, { "epoch": 1.2281890469257475, "grad_norm": 1.20391845703125, "learning_rate": 9.949394736842106e-05, "loss": 0.3596, "step": 21933 }, { "epoch": 1.2282450442378765, "grad_norm": 1.4098650217056274, "learning_rate": 9.949368421052632e-05, "loss": 0.6011, "step": 21934 }, { "epoch": 1.2283010415500055, "grad_norm": 1.37461256980896, "learning_rate": 9.949342105263158e-05, "loss": 0.495, "step": 21935 }, { "epoch": 1.2283570388621345, "grad_norm": 1.2328859567642212, "learning_rate": 9.949315789473684e-05, "loss": 0.4217, "step": 21936 }, { "epoch": 1.2284130361742636, "grad_norm": 1.310872197151184, "learning_rate": 9.949289473684211e-05, "loss": 0.3175, "step": 21937 }, { "epoch": 1.2284690334863926, "grad_norm": 1.284334421157837, "learning_rate": 9.949263157894737e-05, "loss": 0.4362, "step": 21938 }, { "epoch": 1.2285250307985216, "grad_norm": 1.304630160331726, "learning_rate": 9.949236842105263e-05, "loss": 0.3955, "step": 21939 }, { "epoch": 1.2285810281106506, "grad_norm": 1.5838603973388672, "learning_rate": 9.949210526315789e-05, "loss": 0.5161, "step": 21940 }, { "epoch": 1.2286370254227796, "grad_norm": 1.4570293426513672, "learning_rate": 9.949184210526317e-05, "loss": 0.4528, "step": 21941 }, { "epoch": 1.2286930227349087, "grad_norm": 1.5105780363082886, "learning_rate": 9.949157894736843e-05, "loss": 0.3747, "step": 21942 }, { "epoch": 1.2287490200470377, "grad_norm": 1.4986460208892822, "learning_rate": 9.94913157894737e-05, "loss": 0.4594, "step": 21943 }, { "epoch": 1.2288050173591667, "grad_norm": 1.5359302759170532, "learning_rate": 9.949105263157895e-05, "loss": 0.4651, "step": 21944 }, { "epoch": 1.2288610146712957, "grad_norm": 1.283356785774231, "learning_rate": 9.949078947368422e-05, "loss": 0.416, "step": 21945 }, { "epoch": 1.2289170119834247, "grad_norm": 1.3096617460250854, "learning_rate": 9.949052631578948e-05, "loss": 0.3662, "step": 21946 }, { "epoch": 1.2289730092955538, "grad_norm": 1.2936625480651855, "learning_rate": 9.949026315789474e-05, "loss": 0.4686, "step": 21947 }, { "epoch": 1.2290290066076828, "grad_norm": 1.3283549547195435, "learning_rate": 9.949000000000001e-05, "loss": 0.4289, "step": 21948 }, { "epoch": 1.2290850039198118, "grad_norm": 1.4151219129562378, "learning_rate": 9.948973684210526e-05, "loss": 0.62, "step": 21949 }, { "epoch": 1.2291410012319408, "grad_norm": 1.4237349033355713, "learning_rate": 9.948947368421053e-05, "loss": 0.5798, "step": 21950 }, { "epoch": 1.2291969985440698, "grad_norm": 1.4631147384643555, "learning_rate": 9.948921052631579e-05, "loss": 0.485, "step": 21951 }, { "epoch": 1.2292529958561988, "grad_norm": 1.5169850587844849, "learning_rate": 9.948894736842106e-05, "loss": 0.5413, "step": 21952 }, { "epoch": 1.2293089931683279, "grad_norm": 1.290627121925354, "learning_rate": 9.948868421052632e-05, "loss": 0.3879, "step": 21953 }, { "epoch": 1.2293649904804569, "grad_norm": 1.3381317853927612, "learning_rate": 9.948842105263158e-05, "loss": 0.4044, "step": 21954 }, { "epoch": 1.229420987792586, "grad_norm": 1.0008268356323242, "learning_rate": 9.948815789473684e-05, "loss": 0.3143, "step": 21955 }, { "epoch": 1.229476985104715, "grad_norm": 2.7601494789123535, "learning_rate": 9.948789473684212e-05, "loss": 0.4273, "step": 21956 }, { "epoch": 1.229532982416844, "grad_norm": 1.354504108428955, "learning_rate": 9.948763157894738e-05, "loss": 0.3963, "step": 21957 }, { "epoch": 1.229588979728973, "grad_norm": 1.7259458303451538, "learning_rate": 9.948736842105264e-05, "loss": 0.5035, "step": 21958 }, { "epoch": 1.229644977041102, "grad_norm": 1.5698692798614502, "learning_rate": 9.94871052631579e-05, "loss": 0.4932, "step": 21959 }, { "epoch": 1.229700974353231, "grad_norm": 1.169247031211853, "learning_rate": 9.948684210526317e-05, "loss": 0.3707, "step": 21960 }, { "epoch": 1.22975697166536, "grad_norm": 1.2915327548980713, "learning_rate": 9.948657894736843e-05, "loss": 0.5059, "step": 21961 }, { "epoch": 1.229812968977489, "grad_norm": 1.2802354097366333, "learning_rate": 9.948631578947369e-05, "loss": 0.4147, "step": 21962 }, { "epoch": 1.229868966289618, "grad_norm": 1.6147280931472778, "learning_rate": 9.948605263157895e-05, "loss": 0.4679, "step": 21963 }, { "epoch": 1.229924963601747, "grad_norm": 1.5009760856628418, "learning_rate": 9.948578947368421e-05, "loss": 0.4806, "step": 21964 }, { "epoch": 1.229980960913876, "grad_norm": 1.4112187623977661, "learning_rate": 9.948552631578948e-05, "loss": 0.5722, "step": 21965 }, { "epoch": 1.2300369582260051, "grad_norm": 1.3325297832489014, "learning_rate": 9.948526315789474e-05, "loss": 0.4073, "step": 21966 }, { "epoch": 1.2300929555381341, "grad_norm": 1.1587578058242798, "learning_rate": 9.9485e-05, "loss": 0.3948, "step": 21967 }, { "epoch": 1.2301489528502632, "grad_norm": 1.315110683441162, "learning_rate": 9.948473684210526e-05, "loss": 0.3932, "step": 21968 }, { "epoch": 1.2302049501623922, "grad_norm": 1.4653820991516113, "learning_rate": 9.948447368421053e-05, "loss": 0.4289, "step": 21969 }, { "epoch": 1.2302609474745212, "grad_norm": 1.9477462768554688, "learning_rate": 9.948421052631579e-05, "loss": 0.7315, "step": 21970 }, { "epoch": 1.2303169447866502, "grad_norm": 1.3053503036499023, "learning_rate": 9.948394736842107e-05, "loss": 0.5627, "step": 21971 }, { "epoch": 1.2303729420987792, "grad_norm": 1.2114213705062866, "learning_rate": 9.948368421052631e-05, "loss": 0.4151, "step": 21972 }, { "epoch": 1.2304289394109083, "grad_norm": 1.4587870836257935, "learning_rate": 9.948342105263159e-05, "loss": 0.4482, "step": 21973 }, { "epoch": 1.2304849367230373, "grad_norm": 1.4069160223007202, "learning_rate": 9.948315789473685e-05, "loss": 0.4085, "step": 21974 }, { "epoch": 1.2305409340351663, "grad_norm": 1.241890549659729, "learning_rate": 9.948289473684212e-05, "loss": 0.4834, "step": 21975 }, { "epoch": 1.2305969313472953, "grad_norm": 1.5539815425872803, "learning_rate": 9.948263157894737e-05, "loss": 0.6285, "step": 21976 }, { "epoch": 1.2306529286594243, "grad_norm": 1.247409462928772, "learning_rate": 9.948236842105264e-05, "loss": 0.4226, "step": 21977 }, { "epoch": 1.2307089259715533, "grad_norm": 1.646337866783142, "learning_rate": 9.94821052631579e-05, "loss": 0.4629, "step": 21978 }, { "epoch": 1.2307649232836824, "grad_norm": 1.3517980575561523, "learning_rate": 9.948184210526317e-05, "loss": 0.3765, "step": 21979 }, { "epoch": 1.2308209205958114, "grad_norm": 1.4111981391906738, "learning_rate": 9.948157894736843e-05, "loss": 0.4171, "step": 21980 }, { "epoch": 1.2308769179079404, "grad_norm": 1.6455378532409668, "learning_rate": 9.948131578947368e-05, "loss": 0.5553, "step": 21981 }, { "epoch": 1.2309329152200694, "grad_norm": 1.2768588066101074, "learning_rate": 9.948105263157895e-05, "loss": 0.432, "step": 21982 }, { "epoch": 1.2309889125321984, "grad_norm": 1.9461045265197754, "learning_rate": 9.948078947368421e-05, "loss": 0.5182, "step": 21983 }, { "epoch": 1.2310449098443275, "grad_norm": 1.5526859760284424, "learning_rate": 9.948052631578948e-05, "loss": 0.5356, "step": 21984 }, { "epoch": 1.2311009071564565, "grad_norm": 1.4912222623825073, "learning_rate": 9.948026315789474e-05, "loss": 0.5068, "step": 21985 }, { "epoch": 1.2311569044685855, "grad_norm": 1.3449803590774536, "learning_rate": 9.948e-05, "loss": 0.4846, "step": 21986 }, { "epoch": 1.2312129017807145, "grad_norm": 1.4552032947540283, "learning_rate": 9.947973684210526e-05, "loss": 0.4275, "step": 21987 }, { "epoch": 1.2312688990928435, "grad_norm": 1.3703938722610474, "learning_rate": 9.947947368421054e-05, "loss": 0.5228, "step": 21988 }, { "epoch": 1.2313248964049726, "grad_norm": 2.451890230178833, "learning_rate": 9.94792105263158e-05, "loss": 0.3579, "step": 21989 }, { "epoch": 1.2313808937171016, "grad_norm": 1.6656391620635986, "learning_rate": 9.947894736842106e-05, "loss": 0.5006, "step": 21990 }, { "epoch": 1.2314368910292306, "grad_norm": 1.4365131855010986, "learning_rate": 9.947868421052632e-05, "loss": 0.3193, "step": 21991 }, { "epoch": 1.2314928883413596, "grad_norm": 1.176457405090332, "learning_rate": 9.947842105263159e-05, "loss": 0.335, "step": 21992 }, { "epoch": 1.2315488856534886, "grad_norm": 1.615265130996704, "learning_rate": 9.947815789473685e-05, "loss": 0.4727, "step": 21993 }, { "epoch": 1.2316048829656177, "grad_norm": 1.2442609071731567, "learning_rate": 9.947789473684211e-05, "loss": 0.5069, "step": 21994 }, { "epoch": 1.2316608802777467, "grad_norm": 1.1496793031692505, "learning_rate": 9.947763157894737e-05, "loss": 0.4064, "step": 21995 }, { "epoch": 1.2317168775898757, "grad_norm": 1.6059843301773071, "learning_rate": 9.947736842105264e-05, "loss": 0.385, "step": 21996 }, { "epoch": 1.2317728749020047, "grad_norm": 1.0907304286956787, "learning_rate": 9.94771052631579e-05, "loss": 0.3182, "step": 21997 }, { "epoch": 1.2318288722141337, "grad_norm": 1.3871437311172485, "learning_rate": 9.947684210526316e-05, "loss": 0.432, "step": 21998 }, { "epoch": 1.2318848695262627, "grad_norm": 1.5002268552780151, "learning_rate": 9.947657894736842e-05, "loss": 0.4025, "step": 21999 }, { "epoch": 1.2319408668383918, "grad_norm": 1.7191928625106812, "learning_rate": 9.947631578947368e-05, "loss": 0.459, "step": 22000 }, { "epoch": 1.2319968641505208, "grad_norm": 1.2988009452819824, "learning_rate": 9.947605263157895e-05, "loss": 0.3909, "step": 22001 }, { "epoch": 1.2320528614626498, "grad_norm": 1.4330321550369263, "learning_rate": 9.947578947368421e-05, "loss": 0.4795, "step": 22002 }, { "epoch": 1.2321088587747788, "grad_norm": 1.443952202796936, "learning_rate": 9.947552631578949e-05, "loss": 0.4142, "step": 22003 }, { "epoch": 1.2321648560869078, "grad_norm": 1.4141242504119873, "learning_rate": 9.947526315789473e-05, "loss": 0.4104, "step": 22004 }, { "epoch": 1.2322208533990369, "grad_norm": 1.3222407102584839, "learning_rate": 9.9475e-05, "loss": 0.5119, "step": 22005 }, { "epoch": 1.2322768507111659, "grad_norm": 1.3565367460250854, "learning_rate": 9.947473684210527e-05, "loss": 0.537, "step": 22006 }, { "epoch": 1.232332848023295, "grad_norm": 1.2684520483016968, "learning_rate": 9.947447368421054e-05, "loss": 0.5206, "step": 22007 }, { "epoch": 1.232388845335424, "grad_norm": 2.818842887878418, "learning_rate": 9.94742105263158e-05, "loss": 0.43, "step": 22008 }, { "epoch": 1.232444842647553, "grad_norm": 1.4474869966506958, "learning_rate": 9.947394736842106e-05, "loss": 0.5481, "step": 22009 }, { "epoch": 1.232500839959682, "grad_norm": 1.3364468812942505, "learning_rate": 9.947368421052632e-05, "loss": 0.417, "step": 22010 }, { "epoch": 1.232556837271811, "grad_norm": 1.1697131395339966, "learning_rate": 9.947342105263159e-05, "loss": 0.392, "step": 22011 }, { "epoch": 1.23261283458394, "grad_norm": 1.129098653793335, "learning_rate": 9.947315789473685e-05, "loss": 0.3236, "step": 22012 }, { "epoch": 1.232668831896069, "grad_norm": 1.2740800380706787, "learning_rate": 9.947289473684211e-05, "loss": 0.4697, "step": 22013 }, { "epoch": 1.232724829208198, "grad_norm": 1.2584391832351685, "learning_rate": 9.947263157894737e-05, "loss": 0.4366, "step": 22014 }, { "epoch": 1.232780826520327, "grad_norm": 1.5128530263900757, "learning_rate": 9.947236842105264e-05, "loss": 0.4795, "step": 22015 }, { "epoch": 1.232836823832456, "grad_norm": 1.2469381093978882, "learning_rate": 9.94721052631579e-05, "loss": 0.3744, "step": 22016 }, { "epoch": 1.232892821144585, "grad_norm": 1.313928246498108, "learning_rate": 9.947184210526316e-05, "loss": 0.5378, "step": 22017 }, { "epoch": 1.2329488184567141, "grad_norm": 1.3469971418380737, "learning_rate": 9.947157894736842e-05, "loss": 0.3226, "step": 22018 }, { "epoch": 1.2330048157688431, "grad_norm": 1.7495861053466797, "learning_rate": 9.947131578947368e-05, "loss": 0.4469, "step": 22019 }, { "epoch": 1.2330608130809722, "grad_norm": 1.542587399482727, "learning_rate": 9.947105263157896e-05, "loss": 0.4056, "step": 22020 }, { "epoch": 1.2331168103931012, "grad_norm": 1.398809552192688, "learning_rate": 9.947078947368422e-05, "loss": 0.5727, "step": 22021 }, { "epoch": 1.2331728077052302, "grad_norm": 1.3981959819793701, "learning_rate": 9.947052631578948e-05, "loss": 0.4962, "step": 22022 }, { "epoch": 1.2332288050173592, "grad_norm": 1.6644326448440552, "learning_rate": 9.947026315789473e-05, "loss": 0.7883, "step": 22023 }, { "epoch": 1.2332848023294882, "grad_norm": 1.3118822574615479, "learning_rate": 9.947000000000001e-05, "loss": 0.4914, "step": 22024 }, { "epoch": 1.2333407996416172, "grad_norm": 1.5341873168945312, "learning_rate": 9.946973684210527e-05, "loss": 0.478, "step": 22025 }, { "epoch": 1.2333967969537463, "grad_norm": 1.3558776378631592, "learning_rate": 9.946947368421054e-05, "loss": 0.4463, "step": 22026 }, { "epoch": 1.2334527942658753, "grad_norm": 1.348998785018921, "learning_rate": 9.946921052631579e-05, "loss": 0.4356, "step": 22027 }, { "epoch": 1.2335087915780043, "grad_norm": 1.3207428455352783, "learning_rate": 9.946894736842106e-05, "loss": 0.5011, "step": 22028 }, { "epoch": 1.2335647888901333, "grad_norm": 1.0505967140197754, "learning_rate": 9.946868421052632e-05, "loss": 0.3547, "step": 22029 }, { "epoch": 1.2336207862022623, "grad_norm": 1.4877405166625977, "learning_rate": 9.94684210526316e-05, "loss": 0.5126, "step": 22030 }, { "epoch": 1.2336767835143914, "grad_norm": 1.4617226123809814, "learning_rate": 9.946815789473684e-05, "loss": 0.457, "step": 22031 }, { "epoch": 1.2337327808265204, "grad_norm": 1.4387129545211792, "learning_rate": 9.946789473684211e-05, "loss": 0.5297, "step": 22032 }, { "epoch": 1.2337887781386494, "grad_norm": 1.2057104110717773, "learning_rate": 9.946763157894737e-05, "loss": 0.44, "step": 22033 }, { "epoch": 1.2338447754507784, "grad_norm": 1.140297532081604, "learning_rate": 9.946736842105263e-05, "loss": 0.4564, "step": 22034 }, { "epoch": 1.2339007727629074, "grad_norm": 1.2437033653259277, "learning_rate": 9.94671052631579e-05, "loss": 0.4692, "step": 22035 }, { "epoch": 1.2339567700750365, "grad_norm": 1.368577003479004, "learning_rate": 9.946684210526315e-05, "loss": 0.4024, "step": 22036 }, { "epoch": 1.2340127673871655, "grad_norm": 1.985155463218689, "learning_rate": 9.946657894736843e-05, "loss": 0.5369, "step": 22037 }, { "epoch": 1.2340687646992945, "grad_norm": 1.2214103937149048, "learning_rate": 9.946631578947369e-05, "loss": 0.3905, "step": 22038 }, { "epoch": 1.2341247620114235, "grad_norm": 1.4224534034729004, "learning_rate": 9.946605263157896e-05, "loss": 0.4445, "step": 22039 }, { "epoch": 1.2341807593235525, "grad_norm": 1.625382423400879, "learning_rate": 9.946578947368422e-05, "loss": 0.4275, "step": 22040 }, { "epoch": 1.2342367566356816, "grad_norm": 1.3813602924346924, "learning_rate": 9.946552631578948e-05, "loss": 0.4503, "step": 22041 }, { "epoch": 1.2342927539478106, "grad_norm": 1.353810429573059, "learning_rate": 9.946526315789474e-05, "loss": 0.5219, "step": 22042 }, { "epoch": 1.2343487512599396, "grad_norm": 1.115501880645752, "learning_rate": 9.946500000000001e-05, "loss": 0.2978, "step": 22043 }, { "epoch": 1.2344047485720686, "grad_norm": 1.4730665683746338, "learning_rate": 9.946473684210527e-05, "loss": 0.3858, "step": 22044 }, { "epoch": 1.2344607458841976, "grad_norm": 5.164679527282715, "learning_rate": 9.946447368421053e-05, "loss": 0.3937, "step": 22045 }, { "epoch": 1.2345167431963266, "grad_norm": 1.3251657485961914, "learning_rate": 9.946421052631579e-05, "loss": 0.4165, "step": 22046 }, { "epoch": 1.2345727405084557, "grad_norm": 2.141411781311035, "learning_rate": 9.946394736842106e-05, "loss": 0.593, "step": 22047 }, { "epoch": 1.2346287378205847, "grad_norm": 1.614847183227539, "learning_rate": 9.946368421052632e-05, "loss": 0.3216, "step": 22048 }, { "epoch": 1.2346847351327137, "grad_norm": 1.5325751304626465, "learning_rate": 9.946342105263158e-05, "loss": 0.5522, "step": 22049 }, { "epoch": 1.2347407324448427, "grad_norm": 1.457053542137146, "learning_rate": 9.946315789473684e-05, "loss": 0.5598, "step": 22050 }, { "epoch": 1.2347967297569717, "grad_norm": 1.3280045986175537, "learning_rate": 9.94628947368421e-05, "loss": 0.4834, "step": 22051 }, { "epoch": 1.2348527270691008, "grad_norm": 1.3620203733444214, "learning_rate": 9.946263157894738e-05, "loss": 0.4212, "step": 22052 }, { "epoch": 1.2349087243812298, "grad_norm": 1.6541097164154053, "learning_rate": 9.946236842105264e-05, "loss": 0.5099, "step": 22053 }, { "epoch": 1.2349647216933588, "grad_norm": 1.9816346168518066, "learning_rate": 9.94621052631579e-05, "loss": 0.6487, "step": 22054 }, { "epoch": 1.2350207190054878, "grad_norm": 1.4045215845108032, "learning_rate": 9.946184210526315e-05, "loss": 0.4277, "step": 22055 }, { "epoch": 1.2350767163176168, "grad_norm": 1.0655875205993652, "learning_rate": 9.946157894736843e-05, "loss": 0.3683, "step": 22056 }, { "epoch": 1.2351327136297459, "grad_norm": 1.6678571701049805, "learning_rate": 9.946131578947369e-05, "loss": 0.5642, "step": 22057 }, { "epoch": 1.2351887109418749, "grad_norm": 1.317575216293335, "learning_rate": 9.946105263157896e-05, "loss": 0.3587, "step": 22058 }, { "epoch": 1.235244708254004, "grad_norm": 1.7448569536209106, "learning_rate": 9.946078947368421e-05, "loss": 0.5373, "step": 22059 }, { "epoch": 1.235300705566133, "grad_norm": 1.504167914390564, "learning_rate": 9.946052631578948e-05, "loss": 0.4347, "step": 22060 }, { "epoch": 1.235356702878262, "grad_norm": 1.193869709968567, "learning_rate": 9.946026315789474e-05, "loss": 0.3772, "step": 22061 }, { "epoch": 1.235412700190391, "grad_norm": 1.2586314678192139, "learning_rate": 9.946000000000001e-05, "loss": 0.4539, "step": 22062 }, { "epoch": 1.23546869750252, "grad_norm": 1.4514977931976318, "learning_rate": 9.945973684210527e-05, "loss": 0.3762, "step": 22063 }, { "epoch": 1.235524694814649, "grad_norm": 2.0244147777557373, "learning_rate": 9.945947368421053e-05, "loss": 0.5464, "step": 22064 }, { "epoch": 1.2355806921267778, "grad_norm": 1.2562710046768188, "learning_rate": 9.945921052631579e-05, "loss": 0.3524, "step": 22065 }, { "epoch": 1.2356366894389068, "grad_norm": 1.542137861251831, "learning_rate": 9.945894736842107e-05, "loss": 0.552, "step": 22066 }, { "epoch": 1.2356926867510358, "grad_norm": 1.2267441749572754, "learning_rate": 9.945868421052633e-05, "loss": 0.4243, "step": 22067 }, { "epoch": 1.2357486840631648, "grad_norm": 1.5006121397018433, "learning_rate": 9.945842105263159e-05, "loss": 0.4032, "step": 22068 }, { "epoch": 1.2358046813752939, "grad_norm": 2.746145725250244, "learning_rate": 9.945815789473684e-05, "loss": 0.4965, "step": 22069 }, { "epoch": 1.2358606786874229, "grad_norm": 1.4521000385284424, "learning_rate": 9.94578947368421e-05, "loss": 0.5064, "step": 22070 }, { "epoch": 1.235916675999552, "grad_norm": 1.5078973770141602, "learning_rate": 9.945763157894738e-05, "loss": 0.4564, "step": 22071 }, { "epoch": 1.235972673311681, "grad_norm": 1.340214490890503, "learning_rate": 9.945736842105264e-05, "loss": 0.3385, "step": 22072 }, { "epoch": 1.23602867062381, "grad_norm": 2.683298110961914, "learning_rate": 9.94571052631579e-05, "loss": 0.6248, "step": 22073 }, { "epoch": 1.236084667935939, "grad_norm": 2.3832719326019287, "learning_rate": 9.945684210526316e-05, "loss": 0.3919, "step": 22074 }, { "epoch": 1.236140665248068, "grad_norm": 1.521113634109497, "learning_rate": 9.945657894736843e-05, "loss": 0.4484, "step": 22075 }, { "epoch": 1.236196662560197, "grad_norm": 1.3267568349838257, "learning_rate": 9.945631578947369e-05, "loss": 0.4635, "step": 22076 }, { "epoch": 1.236252659872326, "grad_norm": 1.3356620073318481, "learning_rate": 9.945605263157895e-05, "loss": 0.438, "step": 22077 }, { "epoch": 1.236308657184455, "grad_norm": 1.4089046716690063, "learning_rate": 9.945578947368421e-05, "loss": 0.6273, "step": 22078 }, { "epoch": 1.236364654496584, "grad_norm": 1.6194113492965698, "learning_rate": 9.945552631578948e-05, "loss": 0.4205, "step": 22079 }, { "epoch": 1.236420651808713, "grad_norm": 1.4121274948120117, "learning_rate": 9.945526315789474e-05, "loss": 0.453, "step": 22080 }, { "epoch": 1.236476649120842, "grad_norm": 1.3679141998291016, "learning_rate": 9.945500000000002e-05, "loss": 0.484, "step": 22081 }, { "epoch": 1.2365326464329711, "grad_norm": 1.5797394514083862, "learning_rate": 9.945473684210526e-05, "loss": 0.5339, "step": 22082 }, { "epoch": 1.2365886437451001, "grad_norm": 1.3631484508514404, "learning_rate": 9.945447368421054e-05, "loss": 0.3056, "step": 22083 }, { "epoch": 1.2366446410572292, "grad_norm": 2.1892387866973877, "learning_rate": 9.94542105263158e-05, "loss": 0.4002, "step": 22084 }, { "epoch": 1.2367006383693582, "grad_norm": 1.414676547050476, "learning_rate": 9.945394736842105e-05, "loss": 0.5185, "step": 22085 }, { "epoch": 1.2367566356814872, "grad_norm": 1.4846559762954712, "learning_rate": 9.945368421052631e-05, "loss": 0.5629, "step": 22086 }, { "epoch": 1.2368126329936162, "grad_norm": 1.285040259361267, "learning_rate": 9.945342105263157e-05, "loss": 0.42, "step": 22087 }, { "epoch": 1.2368686303057452, "grad_norm": 1.1735033988952637, "learning_rate": 9.945315789473685e-05, "loss": 0.4051, "step": 22088 }, { "epoch": 1.2369246276178743, "grad_norm": 1.2226616144180298, "learning_rate": 9.945289473684211e-05, "loss": 0.3276, "step": 22089 }, { "epoch": 1.2369806249300033, "grad_norm": 1.4109970331192017, "learning_rate": 9.945263157894738e-05, "loss": 0.4347, "step": 22090 }, { "epoch": 1.2370366222421323, "grad_norm": 1.3883955478668213, "learning_rate": 9.945236842105263e-05, "loss": 0.3999, "step": 22091 }, { "epoch": 1.2370926195542613, "grad_norm": 1.646531105041504, "learning_rate": 9.94521052631579e-05, "loss": 0.5099, "step": 22092 }, { "epoch": 1.2371486168663903, "grad_norm": 1.2651643753051758, "learning_rate": 9.945184210526316e-05, "loss": 0.3708, "step": 22093 }, { "epoch": 1.2372046141785193, "grad_norm": 1.265564203262329, "learning_rate": 9.945157894736843e-05, "loss": 0.398, "step": 22094 }, { "epoch": 1.2372606114906484, "grad_norm": 1.3503236770629883, "learning_rate": 9.945131578947369e-05, "loss": 0.3481, "step": 22095 }, { "epoch": 1.2373166088027774, "grad_norm": 1.5110692977905273, "learning_rate": 9.945105263157895e-05, "loss": 0.4045, "step": 22096 }, { "epoch": 1.2373726061149064, "grad_norm": 1.2529535293579102, "learning_rate": 9.945078947368421e-05, "loss": 0.4833, "step": 22097 }, { "epoch": 1.2374286034270354, "grad_norm": 1.2186033725738525, "learning_rate": 9.945052631578949e-05, "loss": 0.408, "step": 22098 }, { "epoch": 1.2374846007391644, "grad_norm": 1.1480300426483154, "learning_rate": 9.945026315789475e-05, "loss": 0.4217, "step": 22099 }, { "epoch": 1.2375405980512935, "grad_norm": 1.8707743883132935, "learning_rate": 9.945e-05, "loss": 0.4986, "step": 22100 }, { "epoch": 1.2375965953634225, "grad_norm": 1.02902090549469, "learning_rate": 9.944973684210526e-05, "loss": 0.3439, "step": 22101 }, { "epoch": 1.2376525926755515, "grad_norm": 1.3396210670471191, "learning_rate": 9.944947368421052e-05, "loss": 0.4738, "step": 22102 }, { "epoch": 1.2377085899876805, "grad_norm": 1.529415488243103, "learning_rate": 9.94492105263158e-05, "loss": 0.562, "step": 22103 }, { "epoch": 1.2377645872998095, "grad_norm": 1.4021307229995728, "learning_rate": 9.944894736842106e-05, "loss": 0.5112, "step": 22104 }, { "epoch": 1.2378205846119386, "grad_norm": 1.6247341632843018, "learning_rate": 9.944868421052632e-05, "loss": 0.4893, "step": 22105 }, { "epoch": 1.2378765819240676, "grad_norm": 2.6440608501434326, "learning_rate": 9.944842105263158e-05, "loss": 0.3807, "step": 22106 }, { "epoch": 1.2379325792361966, "grad_norm": 1.268375277519226, "learning_rate": 9.944815789473685e-05, "loss": 0.3197, "step": 22107 }, { "epoch": 1.2379885765483256, "grad_norm": 1.6202696561813354, "learning_rate": 9.944789473684211e-05, "loss": 0.5831, "step": 22108 }, { "epoch": 1.2380445738604546, "grad_norm": 1.4476566314697266, "learning_rate": 9.944763157894737e-05, "loss": 0.5444, "step": 22109 }, { "epoch": 1.2381005711725837, "grad_norm": 1.3280572891235352, "learning_rate": 9.944736842105263e-05, "loss": 0.3884, "step": 22110 }, { "epoch": 1.2381565684847127, "grad_norm": 1.6256791353225708, "learning_rate": 9.94471052631579e-05, "loss": 0.496, "step": 22111 }, { "epoch": 1.2382125657968417, "grad_norm": 1.2308534383773804, "learning_rate": 9.944684210526316e-05, "loss": 0.498, "step": 22112 }, { "epoch": 1.2382685631089707, "grad_norm": 1.2840787172317505, "learning_rate": 9.944657894736844e-05, "loss": 0.478, "step": 22113 }, { "epoch": 1.2383245604210997, "grad_norm": 1.9302207231521606, "learning_rate": 9.944631578947368e-05, "loss": 0.3156, "step": 22114 }, { "epoch": 1.2383805577332287, "grad_norm": 1.466124415397644, "learning_rate": 9.944605263157896e-05, "loss": 0.5097, "step": 22115 }, { "epoch": 1.2384365550453578, "grad_norm": 1.376786470413208, "learning_rate": 9.944578947368421e-05, "loss": 0.5116, "step": 22116 }, { "epoch": 1.2384925523574868, "grad_norm": 1.3685163259506226, "learning_rate": 9.944552631578949e-05, "loss": 0.5624, "step": 22117 }, { "epoch": 1.2385485496696158, "grad_norm": 1.5305886268615723, "learning_rate": 9.944526315789475e-05, "loss": 0.4855, "step": 22118 }, { "epoch": 1.2386045469817448, "grad_norm": 1.2085416316986084, "learning_rate": 9.9445e-05, "loss": 0.382, "step": 22119 }, { "epoch": 1.2386605442938738, "grad_norm": 1.3168268203735352, "learning_rate": 9.944473684210527e-05, "loss": 0.5213, "step": 22120 }, { "epoch": 1.2387165416060029, "grad_norm": 1.3097447156906128, "learning_rate": 9.944447368421053e-05, "loss": 0.4341, "step": 22121 }, { "epoch": 1.2387725389181319, "grad_norm": 1.3421961069107056, "learning_rate": 9.94442105263158e-05, "loss": 0.3893, "step": 22122 }, { "epoch": 1.238828536230261, "grad_norm": 1.2957892417907715, "learning_rate": 9.944394736842105e-05, "loss": 0.4352, "step": 22123 }, { "epoch": 1.23888453354239, "grad_norm": 1.2423800230026245, "learning_rate": 9.944368421052632e-05, "loss": 0.454, "step": 22124 }, { "epoch": 1.238940530854519, "grad_norm": 1.5311256647109985, "learning_rate": 9.944342105263158e-05, "loss": 0.5097, "step": 22125 }, { "epoch": 1.238996528166648, "grad_norm": 1.586444616317749, "learning_rate": 9.944315789473685e-05, "loss": 0.4933, "step": 22126 }, { "epoch": 1.239052525478777, "grad_norm": 1.3216956853866577, "learning_rate": 9.944289473684211e-05, "loss": 0.3512, "step": 22127 }, { "epoch": 1.239108522790906, "grad_norm": 1.4504178762435913, "learning_rate": 9.944263157894737e-05, "loss": 0.4363, "step": 22128 }, { "epoch": 1.239164520103035, "grad_norm": 1.3706461191177368, "learning_rate": 9.944236842105263e-05, "loss": 0.3395, "step": 22129 }, { "epoch": 1.239220517415164, "grad_norm": 1.4141967296600342, "learning_rate": 9.94421052631579e-05, "loss": 0.3825, "step": 22130 }, { "epoch": 1.239276514727293, "grad_norm": 1.3471068143844604, "learning_rate": 9.944184210526316e-05, "loss": 0.4097, "step": 22131 }, { "epoch": 1.239332512039422, "grad_norm": 1.5354331731796265, "learning_rate": 9.944157894736842e-05, "loss": 0.4644, "step": 22132 }, { "epoch": 1.239388509351551, "grad_norm": 1.3198585510253906, "learning_rate": 9.944131578947368e-05, "loss": 0.4278, "step": 22133 }, { "epoch": 1.2394445066636801, "grad_norm": 1.4136360883712769, "learning_rate": 9.944105263157896e-05, "loss": 0.3762, "step": 22134 }, { "epoch": 1.2395005039758091, "grad_norm": 1.4282978773117065, "learning_rate": 9.944078947368422e-05, "loss": 0.4303, "step": 22135 }, { "epoch": 1.2395565012879382, "grad_norm": 1.3022985458374023, "learning_rate": 9.944052631578949e-05, "loss": 0.4176, "step": 22136 }, { "epoch": 1.2396124986000672, "grad_norm": 1.4591301679611206, "learning_rate": 9.944026315789474e-05, "loss": 0.4183, "step": 22137 }, { "epoch": 1.2396684959121962, "grad_norm": 1.5927734375, "learning_rate": 9.944e-05, "loss": 0.5347, "step": 22138 }, { "epoch": 1.2397244932243252, "grad_norm": 1.2869540452957153, "learning_rate": 9.943973684210527e-05, "loss": 0.4606, "step": 22139 }, { "epoch": 1.2397804905364542, "grad_norm": 1.3185064792633057, "learning_rate": 9.943947368421053e-05, "loss": 0.6397, "step": 22140 }, { "epoch": 1.2398364878485832, "grad_norm": 1.3768068552017212, "learning_rate": 9.943921052631579e-05, "loss": 0.3936, "step": 22141 }, { "epoch": 1.2398924851607123, "grad_norm": 1.537149429321289, "learning_rate": 9.943894736842105e-05, "loss": 0.4421, "step": 22142 }, { "epoch": 1.2399484824728413, "grad_norm": 1.7025221586227417, "learning_rate": 9.943868421052632e-05, "loss": 0.6484, "step": 22143 }, { "epoch": 1.2400044797849703, "grad_norm": 1.379907488822937, "learning_rate": 9.943842105263158e-05, "loss": 0.4589, "step": 22144 }, { "epoch": 1.2400604770970993, "grad_norm": 1.6071739196777344, "learning_rate": 9.943815789473686e-05, "loss": 0.5187, "step": 22145 }, { "epoch": 1.2401164744092283, "grad_norm": 1.2255569696426392, "learning_rate": 9.94378947368421e-05, "loss": 0.5301, "step": 22146 }, { "epoch": 1.2401724717213574, "grad_norm": 1.5059415102005005, "learning_rate": 9.943763157894737e-05, "loss": 0.5854, "step": 22147 }, { "epoch": 1.2402284690334864, "grad_norm": 1.4591748714447021, "learning_rate": 9.943736842105263e-05, "loss": 0.3683, "step": 22148 }, { "epoch": 1.2402844663456154, "grad_norm": 1.818745493888855, "learning_rate": 9.943710526315791e-05, "loss": 0.5289, "step": 22149 }, { "epoch": 1.2403404636577444, "grad_norm": 1.3020381927490234, "learning_rate": 9.943684210526317e-05, "loss": 0.4695, "step": 22150 }, { "epoch": 1.2403964609698734, "grad_norm": 1.2086421251296997, "learning_rate": 9.943657894736843e-05, "loss": 0.3829, "step": 22151 }, { "epoch": 1.2404524582820025, "grad_norm": 1.4827592372894287, "learning_rate": 9.943631578947369e-05, "loss": 0.6491, "step": 22152 }, { "epoch": 1.2405084555941315, "grad_norm": 1.2764463424682617, "learning_rate": 9.943605263157896e-05, "loss": 0.4088, "step": 22153 }, { "epoch": 1.2405644529062605, "grad_norm": 1.2118462324142456, "learning_rate": 9.943578947368422e-05, "loss": 0.3283, "step": 22154 }, { "epoch": 1.2406204502183895, "grad_norm": 1.3163617849349976, "learning_rate": 9.943552631578948e-05, "loss": 0.454, "step": 22155 }, { "epoch": 1.2406764475305185, "grad_norm": 1.503927230834961, "learning_rate": 9.943526315789474e-05, "loss": 0.5271, "step": 22156 }, { "epoch": 1.2407324448426476, "grad_norm": 1.8561662435531616, "learning_rate": 9.9435e-05, "loss": 0.5902, "step": 22157 }, { "epoch": 1.2407884421547766, "grad_norm": 1.3484762907028198, "learning_rate": 9.943473684210527e-05, "loss": 0.44, "step": 22158 }, { "epoch": 1.2408444394669056, "grad_norm": 1.370915412902832, "learning_rate": 9.943447368421053e-05, "loss": 0.5732, "step": 22159 }, { "epoch": 1.2409004367790346, "grad_norm": 1.4066773653030396, "learning_rate": 9.943421052631579e-05, "loss": 0.45, "step": 22160 }, { "epoch": 1.2409564340911636, "grad_norm": 1.4867023229599, "learning_rate": 9.943394736842105e-05, "loss": 0.4574, "step": 22161 }, { "epoch": 1.2410124314032926, "grad_norm": 1.3713905811309814, "learning_rate": 9.943368421052632e-05, "loss": 0.4768, "step": 22162 }, { "epoch": 1.2410684287154217, "grad_norm": 1.5865164995193481, "learning_rate": 9.943342105263158e-05, "loss": 0.4932, "step": 22163 }, { "epoch": 1.2411244260275507, "grad_norm": 1.3177303075790405, "learning_rate": 9.943315789473684e-05, "loss": 0.4736, "step": 22164 }, { "epoch": 1.2411804233396797, "grad_norm": 1.3435002565383911, "learning_rate": 9.94328947368421e-05, "loss": 0.5665, "step": 22165 }, { "epoch": 1.2412364206518087, "grad_norm": 2.1346640586853027, "learning_rate": 9.943263157894738e-05, "loss": 0.4935, "step": 22166 }, { "epoch": 1.2412924179639377, "grad_norm": 1.5035032033920288, "learning_rate": 9.943236842105264e-05, "loss": 0.4118, "step": 22167 }, { "epoch": 1.2413484152760668, "grad_norm": 1.4672106504440308, "learning_rate": 9.943210526315791e-05, "loss": 0.3559, "step": 22168 }, { "epoch": 1.2414044125881958, "grad_norm": 1.371329426765442, "learning_rate": 9.943184210526316e-05, "loss": 0.4685, "step": 22169 }, { "epoch": 1.2414604099003248, "grad_norm": 1.3162622451782227, "learning_rate": 9.943157894736843e-05, "loss": 0.4055, "step": 22170 }, { "epoch": 1.2415164072124538, "grad_norm": 1.2451874017715454, "learning_rate": 9.943131578947369e-05, "loss": 0.5376, "step": 22171 }, { "epoch": 1.2415724045245828, "grad_norm": 1.2837245464324951, "learning_rate": 9.943105263157895e-05, "loss": 0.473, "step": 22172 }, { "epoch": 1.2416284018367119, "grad_norm": 1.3146480321884155, "learning_rate": 9.943078947368422e-05, "loss": 0.3934, "step": 22173 }, { "epoch": 1.2416843991488409, "grad_norm": 1.25737464427948, "learning_rate": 9.943052631578947e-05, "loss": 0.3653, "step": 22174 }, { "epoch": 1.24174039646097, "grad_norm": 1.4885339736938477, "learning_rate": 9.943026315789474e-05, "loss": 0.3836, "step": 22175 }, { "epoch": 1.241796393773099, "grad_norm": 1.474367380142212, "learning_rate": 9.943e-05, "loss": 0.5566, "step": 22176 }, { "epoch": 1.241852391085228, "grad_norm": 1.4683201313018799, "learning_rate": 9.942973684210528e-05, "loss": 0.4191, "step": 22177 }, { "epoch": 1.241908388397357, "grad_norm": 1.3633314371109009, "learning_rate": 9.942947368421052e-05, "loss": 0.4509, "step": 22178 }, { "epoch": 1.241964385709486, "grad_norm": 1.3684865236282349, "learning_rate": 9.94292105263158e-05, "loss": 0.4024, "step": 22179 }, { "epoch": 1.242020383021615, "grad_norm": 1.5431170463562012, "learning_rate": 9.942894736842105e-05, "loss": 0.5027, "step": 22180 }, { "epoch": 1.242076380333744, "grad_norm": 1.3154023885726929, "learning_rate": 9.942868421052633e-05, "loss": 0.5557, "step": 22181 }, { "epoch": 1.242132377645873, "grad_norm": 1.136061429977417, "learning_rate": 9.942842105263159e-05, "loss": 0.4273, "step": 22182 }, { "epoch": 1.242188374958002, "grad_norm": 1.4089622497558594, "learning_rate": 9.942815789473685e-05, "loss": 0.4442, "step": 22183 }, { "epoch": 1.242244372270131, "grad_norm": 1.4251272678375244, "learning_rate": 9.94278947368421e-05, "loss": 0.4511, "step": 22184 }, { "epoch": 1.24230036958226, "grad_norm": 1.441521406173706, "learning_rate": 9.942763157894738e-05, "loss": 0.4659, "step": 22185 }, { "epoch": 1.242356366894389, "grad_norm": 1.539623737335205, "learning_rate": 9.942736842105264e-05, "loss": 0.5026, "step": 22186 }, { "epoch": 1.2424123642065181, "grad_norm": 1.4089666604995728, "learning_rate": 9.94271052631579e-05, "loss": 0.3992, "step": 22187 }, { "epoch": 1.2424683615186471, "grad_norm": 1.3999193906784058, "learning_rate": 9.942684210526316e-05, "loss": 0.6125, "step": 22188 }, { "epoch": 1.2425243588307762, "grad_norm": 1.5073438882827759, "learning_rate": 9.942657894736842e-05, "loss": 0.4619, "step": 22189 }, { "epoch": 1.2425803561429052, "grad_norm": 1.6267902851104736, "learning_rate": 9.942631578947369e-05, "loss": 0.5663, "step": 22190 }, { "epoch": 1.2426363534550342, "grad_norm": 1.5895700454711914, "learning_rate": 9.942605263157895e-05, "loss": 0.4105, "step": 22191 }, { "epoch": 1.2426923507671632, "grad_norm": 1.6286300420761108, "learning_rate": 9.942578947368421e-05, "loss": 0.5225, "step": 22192 }, { "epoch": 1.2427483480792922, "grad_norm": 1.1963218450546265, "learning_rate": 9.942552631578947e-05, "loss": 0.3773, "step": 22193 }, { "epoch": 1.2428043453914213, "grad_norm": 1.268042802810669, "learning_rate": 9.942526315789474e-05, "loss": 0.4605, "step": 22194 }, { "epoch": 1.2428603427035503, "grad_norm": 1.5324139595031738, "learning_rate": 9.9425e-05, "loss": 0.4588, "step": 22195 }, { "epoch": 1.2429163400156793, "grad_norm": 1.2663476467132568, "learning_rate": 9.942473684210526e-05, "loss": 0.4409, "step": 22196 }, { "epoch": 1.2429723373278083, "grad_norm": 1.4255861043930054, "learning_rate": 9.942447368421052e-05, "loss": 0.4213, "step": 22197 }, { "epoch": 1.2430283346399373, "grad_norm": 1.4221341609954834, "learning_rate": 9.94242105263158e-05, "loss": 0.4848, "step": 22198 }, { "epoch": 1.2430843319520664, "grad_norm": 1.505860686302185, "learning_rate": 9.942394736842106e-05, "loss": 0.6597, "step": 22199 }, { "epoch": 1.2431403292641954, "grad_norm": 1.4762026071548462, "learning_rate": 9.942368421052633e-05, "loss": 0.4869, "step": 22200 }, { "epoch": 1.2431963265763244, "grad_norm": 1.6010220050811768, "learning_rate": 9.942342105263158e-05, "loss": 0.5208, "step": 22201 }, { "epoch": 1.2432523238884534, "grad_norm": 1.171999216079712, "learning_rate": 9.942315789473685e-05, "loss": 0.3945, "step": 22202 }, { "epoch": 1.2433083212005824, "grad_norm": 1.299700379371643, "learning_rate": 9.942289473684211e-05, "loss": 0.4803, "step": 22203 }, { "epoch": 1.2433643185127115, "grad_norm": 1.5711313486099243, "learning_rate": 9.942263157894738e-05, "loss": 0.4983, "step": 22204 }, { "epoch": 1.2434203158248405, "grad_norm": 1.389089822769165, "learning_rate": 9.942236842105264e-05, "loss": 0.4457, "step": 22205 }, { "epoch": 1.2434763131369695, "grad_norm": 1.341274380683899, "learning_rate": 9.942210526315789e-05, "loss": 0.3424, "step": 22206 }, { "epoch": 1.2435323104490985, "grad_norm": 1.1177542209625244, "learning_rate": 9.942184210526316e-05, "loss": 0.3997, "step": 22207 }, { "epoch": 1.2435883077612275, "grad_norm": 1.4258272647857666, "learning_rate": 9.942157894736842e-05, "loss": 0.4808, "step": 22208 }, { "epoch": 1.2436443050733565, "grad_norm": 1.605284571647644, "learning_rate": 9.94213157894737e-05, "loss": 0.4504, "step": 22209 }, { "epoch": 1.2437003023854856, "grad_norm": 1.4095648527145386, "learning_rate": 9.942105263157895e-05, "loss": 0.3706, "step": 22210 }, { "epoch": 1.2437562996976146, "grad_norm": 1.2311546802520752, "learning_rate": 9.942078947368421e-05, "loss": 0.3636, "step": 22211 }, { "epoch": 1.2438122970097436, "grad_norm": 1.4106169939041138, "learning_rate": 9.942052631578947e-05, "loss": 0.5184, "step": 22212 }, { "epoch": 1.2438682943218726, "grad_norm": 1.5023138523101807, "learning_rate": 9.942026315789475e-05, "loss": 0.3936, "step": 22213 }, { "epoch": 1.2439242916340016, "grad_norm": 1.2082806825637817, "learning_rate": 9.942000000000001e-05, "loss": 0.5197, "step": 22214 }, { "epoch": 1.2439802889461307, "grad_norm": 1.411442756652832, "learning_rate": 9.941973684210527e-05, "loss": 0.4805, "step": 22215 }, { "epoch": 1.2440362862582597, "grad_norm": 1.3924330472946167, "learning_rate": 9.941947368421053e-05, "loss": 0.3279, "step": 22216 }, { "epoch": 1.2440922835703887, "grad_norm": 1.2783757448196411, "learning_rate": 9.94192105263158e-05, "loss": 0.4446, "step": 22217 }, { "epoch": 1.2441482808825177, "grad_norm": 1.6165719032287598, "learning_rate": 9.941894736842106e-05, "loss": 0.5575, "step": 22218 }, { "epoch": 1.2442042781946467, "grad_norm": 1.1490000486373901, "learning_rate": 9.941868421052632e-05, "loss": 0.4069, "step": 22219 }, { "epoch": 1.2442602755067758, "grad_norm": 1.1328785419464111, "learning_rate": 9.941842105263158e-05, "loss": 0.3499, "step": 22220 }, { "epoch": 1.2443162728189048, "grad_norm": 1.3411613702774048, "learning_rate": 9.941815789473685e-05, "loss": 0.4329, "step": 22221 }, { "epoch": 1.2443722701310338, "grad_norm": 1.3682115077972412, "learning_rate": 9.941789473684211e-05, "loss": 0.4656, "step": 22222 }, { "epoch": 1.2444282674431628, "grad_norm": 1.3170279264450073, "learning_rate": 9.941763157894737e-05, "loss": 0.5161, "step": 22223 }, { "epoch": 1.2444842647552918, "grad_norm": 1.417770504951477, "learning_rate": 9.941736842105263e-05, "loss": 0.3697, "step": 22224 }, { "epoch": 1.2445402620674209, "grad_norm": 1.6053637266159058, "learning_rate": 9.941710526315789e-05, "loss": 0.4575, "step": 22225 }, { "epoch": 1.2445962593795499, "grad_norm": 1.4814941883087158, "learning_rate": 9.941684210526316e-05, "loss": 0.3615, "step": 22226 }, { "epoch": 1.244652256691679, "grad_norm": 1.3396278619766235, "learning_rate": 9.941657894736842e-05, "loss": 0.4416, "step": 22227 }, { "epoch": 1.244708254003808, "grad_norm": 1.1928372383117676, "learning_rate": 9.94163157894737e-05, "loss": 0.4007, "step": 22228 }, { "epoch": 1.244764251315937, "grad_norm": 1.2904865741729736, "learning_rate": 9.941605263157894e-05, "loss": 0.7117, "step": 22229 }, { "epoch": 1.244820248628066, "grad_norm": 1.3465505838394165, "learning_rate": 9.941578947368422e-05, "loss": 0.4399, "step": 22230 }, { "epoch": 1.244876245940195, "grad_norm": 1.3889199495315552, "learning_rate": 9.941552631578948e-05, "loss": 0.4516, "step": 22231 }, { "epoch": 1.244932243252324, "grad_norm": 1.256568193435669, "learning_rate": 9.941526315789475e-05, "loss": 0.4759, "step": 22232 }, { "epoch": 1.244988240564453, "grad_norm": 1.5486410856246948, "learning_rate": 9.9415e-05, "loss": 0.4454, "step": 22233 }, { "epoch": 1.245044237876582, "grad_norm": 1.2451177835464478, "learning_rate": 9.941473684210527e-05, "loss": 0.3403, "step": 22234 }, { "epoch": 1.245100235188711, "grad_norm": 1.2428570985794067, "learning_rate": 9.941447368421053e-05, "loss": 0.377, "step": 22235 }, { "epoch": 1.24515623250084, "grad_norm": 1.336918830871582, "learning_rate": 9.94142105263158e-05, "loss": 0.4067, "step": 22236 }, { "epoch": 1.245212229812969, "grad_norm": 1.4773956537246704, "learning_rate": 9.941394736842106e-05, "loss": 0.4488, "step": 22237 }, { "epoch": 1.245268227125098, "grad_norm": 1.2210487127304077, "learning_rate": 9.941368421052632e-05, "loss": 0.4032, "step": 22238 }, { "epoch": 1.2453242244372271, "grad_norm": 1.3869651556015015, "learning_rate": 9.941342105263158e-05, "loss": 0.4829, "step": 22239 }, { "epoch": 1.2453802217493561, "grad_norm": 1.6309747695922852, "learning_rate": 9.941315789473684e-05, "loss": 0.4308, "step": 22240 }, { "epoch": 1.2454362190614852, "grad_norm": 1.2051997184753418, "learning_rate": 9.941289473684211e-05, "loss": 0.5023, "step": 22241 }, { "epoch": 1.2454922163736142, "grad_norm": 1.7952145338058472, "learning_rate": 9.941263157894737e-05, "loss": 0.3756, "step": 22242 }, { "epoch": 1.2455482136857432, "grad_norm": 1.40574049949646, "learning_rate": 9.941236842105263e-05, "loss": 0.4318, "step": 22243 }, { "epoch": 1.2456042109978722, "grad_norm": 1.3252264261245728, "learning_rate": 9.94121052631579e-05, "loss": 0.6048, "step": 22244 }, { "epoch": 1.2456602083100012, "grad_norm": 1.380267858505249, "learning_rate": 9.941184210526317e-05, "loss": 0.4548, "step": 22245 }, { "epoch": 1.2457162056221303, "grad_norm": 1.1676914691925049, "learning_rate": 9.941157894736843e-05, "loss": 0.4655, "step": 22246 }, { "epoch": 1.2457722029342593, "grad_norm": 1.3717881441116333, "learning_rate": 9.941131578947369e-05, "loss": 0.3673, "step": 22247 }, { "epoch": 1.2458282002463883, "grad_norm": 1.1353402137756348, "learning_rate": 9.941105263157895e-05, "loss": 0.355, "step": 22248 }, { "epoch": 1.2458841975585173, "grad_norm": 1.4019495248794556, "learning_rate": 9.941078947368422e-05, "loss": 0.3895, "step": 22249 }, { "epoch": 1.2459401948706463, "grad_norm": 1.2970120906829834, "learning_rate": 9.941052631578948e-05, "loss": 0.3942, "step": 22250 }, { "epoch": 1.2459961921827751, "grad_norm": 1.249554991722107, "learning_rate": 9.941026315789474e-05, "loss": 0.3587, "step": 22251 }, { "epoch": 1.2460521894949041, "grad_norm": 1.4984300136566162, "learning_rate": 9.941e-05, "loss": 0.5534, "step": 22252 }, { "epoch": 1.2461081868070332, "grad_norm": 1.3894104957580566, "learning_rate": 9.940973684210527e-05, "loss": 0.4597, "step": 22253 }, { "epoch": 1.2461641841191622, "grad_norm": 1.3068922758102417, "learning_rate": 9.940947368421053e-05, "loss": 0.3798, "step": 22254 }, { "epoch": 1.2462201814312912, "grad_norm": 1.3988882303237915, "learning_rate": 9.94092105263158e-05, "loss": 0.4954, "step": 22255 }, { "epoch": 1.2462761787434202, "grad_norm": 1.2204664945602417, "learning_rate": 9.940894736842105e-05, "loss": 0.4786, "step": 22256 }, { "epoch": 1.2463321760555492, "grad_norm": 1.2899380922317505, "learning_rate": 9.940868421052632e-05, "loss": 0.4025, "step": 22257 }, { "epoch": 1.2463881733676783, "grad_norm": 1.3493857383728027, "learning_rate": 9.940842105263158e-05, "loss": 0.5435, "step": 22258 }, { "epoch": 1.2464441706798073, "grad_norm": 1.234354853630066, "learning_rate": 9.940815789473684e-05, "loss": 0.3658, "step": 22259 }, { "epoch": 1.2465001679919363, "grad_norm": 1.7639458179473877, "learning_rate": 9.940789473684212e-05, "loss": 0.4752, "step": 22260 }, { "epoch": 1.2465561653040653, "grad_norm": 1.5952787399291992, "learning_rate": 9.940763157894736e-05, "loss": 0.508, "step": 22261 }, { "epoch": 1.2466121626161943, "grad_norm": 1.4668351411819458, "learning_rate": 9.940736842105264e-05, "loss": 0.4437, "step": 22262 }, { "epoch": 1.2466681599283234, "grad_norm": 1.3444815874099731, "learning_rate": 9.94071052631579e-05, "loss": 0.3824, "step": 22263 }, { "epoch": 1.2467241572404524, "grad_norm": 1.4767779111862183, "learning_rate": 9.940684210526317e-05, "loss": 0.4853, "step": 22264 }, { "epoch": 1.2467801545525814, "grad_norm": 1.5499085187911987, "learning_rate": 9.940657894736843e-05, "loss": 0.4013, "step": 22265 }, { "epoch": 1.2468361518647104, "grad_norm": 1.2688138484954834, "learning_rate": 9.940631578947369e-05, "loss": 0.4274, "step": 22266 }, { "epoch": 1.2468921491768394, "grad_norm": 1.3768786191940308, "learning_rate": 9.940605263157895e-05, "loss": 0.4536, "step": 22267 }, { "epoch": 1.2469481464889685, "grad_norm": 1.2985590696334839, "learning_rate": 9.940578947368422e-05, "loss": 0.4155, "step": 22268 }, { "epoch": 1.2470041438010975, "grad_norm": 1.2233731746673584, "learning_rate": 9.940552631578948e-05, "loss": 0.3946, "step": 22269 }, { "epoch": 1.2470601411132265, "grad_norm": 1.357564091682434, "learning_rate": 9.940526315789474e-05, "loss": 0.3937, "step": 22270 }, { "epoch": 1.2471161384253555, "grad_norm": 1.3891246318817139, "learning_rate": 9.9405e-05, "loss": 0.4024, "step": 22271 }, { "epoch": 1.2471721357374845, "grad_norm": 1.4539968967437744, "learning_rate": 9.940473684210527e-05, "loss": 0.4038, "step": 22272 }, { "epoch": 1.2472281330496136, "grad_norm": 1.7275545597076416, "learning_rate": 9.940447368421053e-05, "loss": 0.5758, "step": 22273 }, { "epoch": 1.2472841303617426, "grad_norm": 1.1803683042526245, "learning_rate": 9.94042105263158e-05, "loss": 0.4849, "step": 22274 }, { "epoch": 1.2473401276738716, "grad_norm": 1.2474204301834106, "learning_rate": 9.940394736842105e-05, "loss": 0.4421, "step": 22275 }, { "epoch": 1.2473961249860006, "grad_norm": 1.5943682193756104, "learning_rate": 9.940368421052631e-05, "loss": 0.5201, "step": 22276 }, { "epoch": 1.2474521222981296, "grad_norm": 1.2650312185287476, "learning_rate": 9.940342105263159e-05, "loss": 0.3833, "step": 22277 }, { "epoch": 1.2475081196102586, "grad_norm": 4.03690242767334, "learning_rate": 9.940315789473685e-05, "loss": 0.4693, "step": 22278 }, { "epoch": 1.2475641169223877, "grad_norm": 1.3714172840118408, "learning_rate": 9.94028947368421e-05, "loss": 0.5724, "step": 22279 }, { "epoch": 1.2476201142345167, "grad_norm": 1.2636523246765137, "learning_rate": 9.940263157894737e-05, "loss": 0.4871, "step": 22280 }, { "epoch": 1.2476761115466457, "grad_norm": 1.3796244859695435, "learning_rate": 9.940236842105264e-05, "loss": 0.4291, "step": 22281 }, { "epoch": 1.2477321088587747, "grad_norm": 1.2901058197021484, "learning_rate": 9.94021052631579e-05, "loss": 0.5145, "step": 22282 }, { "epoch": 1.2477881061709037, "grad_norm": 1.2571921348571777, "learning_rate": 9.940184210526317e-05, "loss": 0.3346, "step": 22283 }, { "epoch": 1.2478441034830328, "grad_norm": 1.4464619159698486, "learning_rate": 9.940157894736842e-05, "loss": 0.5793, "step": 22284 }, { "epoch": 1.2479001007951618, "grad_norm": 1.5002973079681396, "learning_rate": 9.940131578947369e-05, "loss": 0.4873, "step": 22285 }, { "epoch": 1.2479560981072908, "grad_norm": 1.4570118188858032, "learning_rate": 9.940105263157895e-05, "loss": 0.3904, "step": 22286 }, { "epoch": 1.2480120954194198, "grad_norm": 1.5554147958755493, "learning_rate": 9.940078947368422e-05, "loss": 0.4778, "step": 22287 }, { "epoch": 1.2480680927315488, "grad_norm": 8.259346961975098, "learning_rate": 9.940052631578947e-05, "loss": 0.4362, "step": 22288 }, { "epoch": 1.2481240900436779, "grad_norm": 1.4304662942886353, "learning_rate": 9.940026315789474e-05, "loss": 0.5186, "step": 22289 }, { "epoch": 1.2481800873558069, "grad_norm": 1.2556445598602295, "learning_rate": 9.94e-05, "loss": 0.3436, "step": 22290 }, { "epoch": 1.248236084667936, "grad_norm": 1.1076174974441528, "learning_rate": 9.939973684210528e-05, "loss": 0.3614, "step": 22291 }, { "epoch": 1.248292081980065, "grad_norm": 1.23257315158844, "learning_rate": 9.939947368421054e-05, "loss": 0.4392, "step": 22292 }, { "epoch": 1.248348079292194, "grad_norm": 1.283825397491455, "learning_rate": 9.939921052631578e-05, "loss": 0.4014, "step": 22293 }, { "epoch": 1.248404076604323, "grad_norm": 1.176995038986206, "learning_rate": 9.939894736842106e-05, "loss": 0.3408, "step": 22294 }, { "epoch": 1.248460073916452, "grad_norm": 1.4385688304901123, "learning_rate": 9.939868421052632e-05, "loss": 0.3907, "step": 22295 }, { "epoch": 1.248516071228581, "grad_norm": 1.8741458654403687, "learning_rate": 9.939842105263159e-05, "loss": 0.8512, "step": 22296 }, { "epoch": 1.24857206854071, "grad_norm": 1.3005434274673462, "learning_rate": 9.939815789473685e-05, "loss": 0.5882, "step": 22297 }, { "epoch": 1.248628065852839, "grad_norm": 1.5760449171066284, "learning_rate": 9.939789473684211e-05, "loss": 0.3411, "step": 22298 }, { "epoch": 1.248684063164968, "grad_norm": 1.1352293491363525, "learning_rate": 9.939763157894737e-05, "loss": 0.4053, "step": 22299 }, { "epoch": 1.248740060477097, "grad_norm": 1.546398639678955, "learning_rate": 9.939736842105264e-05, "loss": 0.4613, "step": 22300 }, { "epoch": 1.248796057789226, "grad_norm": 1.702652096748352, "learning_rate": 9.93971052631579e-05, "loss": 0.5096, "step": 22301 }, { "epoch": 1.248852055101355, "grad_norm": 1.2364565134048462, "learning_rate": 9.939684210526316e-05, "loss": 0.3482, "step": 22302 }, { "epoch": 1.2489080524134841, "grad_norm": 1.5198919773101807, "learning_rate": 9.939657894736842e-05, "loss": 0.585, "step": 22303 }, { "epoch": 1.2489640497256131, "grad_norm": 1.2503507137298584, "learning_rate": 9.93963157894737e-05, "loss": 0.3867, "step": 22304 }, { "epoch": 1.2490200470377422, "grad_norm": 1.4735126495361328, "learning_rate": 9.939605263157895e-05, "loss": 0.4261, "step": 22305 }, { "epoch": 1.2490760443498712, "grad_norm": 1.3438857793807983, "learning_rate": 9.939578947368421e-05, "loss": 0.4776, "step": 22306 }, { "epoch": 1.2491320416620002, "grad_norm": 1.3483028411865234, "learning_rate": 9.939552631578947e-05, "loss": 0.5217, "step": 22307 }, { "epoch": 1.2491880389741292, "grad_norm": 1.2666211128234863, "learning_rate": 9.939526315789475e-05, "loss": 0.4004, "step": 22308 }, { "epoch": 1.2492440362862582, "grad_norm": 1.336466670036316, "learning_rate": 9.9395e-05, "loss": 0.4599, "step": 22309 }, { "epoch": 1.2493000335983873, "grad_norm": 1.5537548065185547, "learning_rate": 9.939473684210527e-05, "loss": 0.6118, "step": 22310 }, { "epoch": 1.2493560309105163, "grad_norm": 1.4111347198486328, "learning_rate": 9.939447368421053e-05, "loss": 0.4152, "step": 22311 }, { "epoch": 1.2494120282226453, "grad_norm": 1.4069594144821167, "learning_rate": 9.939421052631579e-05, "loss": 0.4386, "step": 22312 }, { "epoch": 1.2494680255347743, "grad_norm": 1.263898253440857, "learning_rate": 9.939394736842106e-05, "loss": 0.4336, "step": 22313 }, { "epoch": 1.2495240228469033, "grad_norm": 1.617743730545044, "learning_rate": 9.939368421052632e-05, "loss": 0.6002, "step": 22314 }, { "epoch": 1.2495800201590324, "grad_norm": 1.2375640869140625, "learning_rate": 9.939342105263159e-05, "loss": 0.4006, "step": 22315 }, { "epoch": 1.2496360174711614, "grad_norm": 1.4769809246063232, "learning_rate": 9.939315789473684e-05, "loss": 0.4202, "step": 22316 }, { "epoch": 1.2496920147832904, "grad_norm": 1.7477962970733643, "learning_rate": 9.939289473684211e-05, "loss": 0.5736, "step": 22317 }, { "epoch": 1.2497480120954194, "grad_norm": 1.5742456912994385, "learning_rate": 9.939263157894737e-05, "loss": 0.4195, "step": 22318 }, { "epoch": 1.2498040094075484, "grad_norm": 1.5923107862472534, "learning_rate": 9.939236842105264e-05, "loss": 0.4855, "step": 22319 }, { "epoch": 1.2498600067196775, "grad_norm": 1.0994890928268433, "learning_rate": 9.93921052631579e-05, "loss": 0.2878, "step": 22320 }, { "epoch": 1.2499160040318065, "grad_norm": 1.2776751518249512, "learning_rate": 9.939184210526316e-05, "loss": 0.5888, "step": 22321 }, { "epoch": 1.2499720013439355, "grad_norm": 1.275792121887207, "learning_rate": 9.939157894736842e-05, "loss": 0.4022, "step": 22322 }, { "epoch": 1.2500279986560645, "grad_norm": 1.2924615144729614, "learning_rate": 9.93913157894737e-05, "loss": 0.4109, "step": 22323 }, { "epoch": 1.2500839959681935, "grad_norm": 1.181273341178894, "learning_rate": 9.939105263157896e-05, "loss": 0.4517, "step": 22324 }, { "epoch": 1.2501399932803225, "grad_norm": 1.4977375268936157, "learning_rate": 9.939078947368422e-05, "loss": 0.4974, "step": 22325 }, { "epoch": 1.2501959905924516, "grad_norm": 1.384088158607483, "learning_rate": 9.939052631578948e-05, "loss": 0.3408, "step": 22326 }, { "epoch": 1.2502519879045806, "grad_norm": 1.5096538066864014, "learning_rate": 9.939026315789474e-05, "loss": 0.5548, "step": 22327 }, { "epoch": 1.2503079852167096, "grad_norm": 1.2352845668792725, "learning_rate": 9.939000000000001e-05, "loss": 0.3673, "step": 22328 }, { "epoch": 1.2503639825288386, "grad_norm": 1.3208589553833008, "learning_rate": 9.938973684210527e-05, "loss": 0.6064, "step": 22329 }, { "epoch": 1.2504199798409676, "grad_norm": 2.8895554542541504, "learning_rate": 9.938947368421053e-05, "loss": 0.4623, "step": 22330 }, { "epoch": 1.2504759771530967, "grad_norm": 1.7574106454849243, "learning_rate": 9.938921052631579e-05, "loss": 0.5314, "step": 22331 }, { "epoch": 1.2505319744652257, "grad_norm": 1.6027345657348633, "learning_rate": 9.938894736842106e-05, "loss": 0.442, "step": 22332 }, { "epoch": 1.2505879717773547, "grad_norm": 2.04962158203125, "learning_rate": 9.938868421052632e-05, "loss": 0.5385, "step": 22333 }, { "epoch": 1.2506439690894837, "grad_norm": 1.3174147605895996, "learning_rate": 9.938842105263158e-05, "loss": 0.569, "step": 22334 }, { "epoch": 1.2506999664016127, "grad_norm": 1.7815295457839966, "learning_rate": 9.938815789473684e-05, "loss": 0.5231, "step": 22335 }, { "epoch": 1.2507559637137418, "grad_norm": 1.4336323738098145, "learning_rate": 9.938789473684211e-05, "loss": 0.6225, "step": 22336 }, { "epoch": 1.2508119610258708, "grad_norm": 1.342552661895752, "learning_rate": 9.938763157894737e-05, "loss": 0.4109, "step": 22337 }, { "epoch": 1.2508679583379998, "grad_norm": 1.2027740478515625, "learning_rate": 9.938736842105265e-05, "loss": 0.3825, "step": 22338 }, { "epoch": 1.2509239556501288, "grad_norm": 1.3601100444793701, "learning_rate": 9.938710526315789e-05, "loss": 0.448, "step": 22339 }, { "epoch": 1.2509799529622578, "grad_norm": 1.4747097492218018, "learning_rate": 9.938684210526317e-05, "loss": 0.4084, "step": 22340 }, { "epoch": 1.2510359502743869, "grad_norm": 1.3954651355743408, "learning_rate": 9.938657894736843e-05, "loss": 0.4549, "step": 22341 }, { "epoch": 1.2510919475865159, "grad_norm": 1.2875089645385742, "learning_rate": 9.93863157894737e-05, "loss": 0.3957, "step": 22342 }, { "epoch": 1.251147944898645, "grad_norm": 1.372676968574524, "learning_rate": 9.938605263157895e-05, "loss": 0.4569, "step": 22343 }, { "epoch": 1.251203942210774, "grad_norm": 1.1782981157302856, "learning_rate": 9.93857894736842e-05, "loss": 0.4145, "step": 22344 }, { "epoch": 1.251259939522903, "grad_norm": 1.5319457054138184, "learning_rate": 9.938552631578948e-05, "loss": 0.5579, "step": 22345 }, { "epoch": 1.251315936835032, "grad_norm": 1.3733348846435547, "learning_rate": 9.938526315789474e-05, "loss": 0.4205, "step": 22346 }, { "epoch": 1.251371934147161, "grad_norm": 1.3196403980255127, "learning_rate": 9.938500000000001e-05, "loss": 0.3781, "step": 22347 }, { "epoch": 1.25142793145929, "grad_norm": 1.1703699827194214, "learning_rate": 9.938473684210526e-05, "loss": 0.3795, "step": 22348 }, { "epoch": 1.251483928771419, "grad_norm": 1.5440356731414795, "learning_rate": 9.938447368421053e-05, "loss": 0.4559, "step": 22349 }, { "epoch": 1.251539926083548, "grad_norm": 1.190883755683899, "learning_rate": 9.938421052631579e-05, "loss": 0.3914, "step": 22350 }, { "epoch": 1.251595923395677, "grad_norm": 1.065455436706543, "learning_rate": 9.938394736842106e-05, "loss": 0.3576, "step": 22351 }, { "epoch": 1.251651920707806, "grad_norm": 1.5451136827468872, "learning_rate": 9.938368421052632e-05, "loss": 0.5983, "step": 22352 }, { "epoch": 1.251707918019935, "grad_norm": 1.3333629369735718, "learning_rate": 9.938342105263158e-05, "loss": 0.4733, "step": 22353 }, { "epoch": 1.251763915332064, "grad_norm": 1.1256550550460815, "learning_rate": 9.938315789473684e-05, "loss": 0.3686, "step": 22354 }, { "epoch": 1.2518199126441931, "grad_norm": 1.2385841608047485, "learning_rate": 9.938289473684212e-05, "loss": 0.4191, "step": 22355 }, { "epoch": 1.2518759099563221, "grad_norm": 1.2452980279922485, "learning_rate": 9.938263157894738e-05, "loss": 0.4756, "step": 22356 }, { "epoch": 1.2519319072684512, "grad_norm": 1.3275479078292847, "learning_rate": 9.938236842105264e-05, "loss": 0.3779, "step": 22357 }, { "epoch": 1.2519879045805802, "grad_norm": 1.409300446510315, "learning_rate": 9.93821052631579e-05, "loss": 0.3782, "step": 22358 }, { "epoch": 1.2520439018927092, "grad_norm": 1.6561707258224487, "learning_rate": 9.938184210526317e-05, "loss": 0.5491, "step": 22359 }, { "epoch": 1.2520998992048382, "grad_norm": 1.3989508152008057, "learning_rate": 9.938157894736843e-05, "loss": 0.4917, "step": 22360 }, { "epoch": 1.2521558965169672, "grad_norm": 1.2363028526306152, "learning_rate": 9.938131578947369e-05, "loss": 0.4661, "step": 22361 }, { "epoch": 1.2522118938290963, "grad_norm": 1.3518849611282349, "learning_rate": 9.938105263157895e-05, "loss": 0.4335, "step": 22362 }, { "epoch": 1.2522678911412253, "grad_norm": 1.5282328128814697, "learning_rate": 9.938078947368421e-05, "loss": 0.5125, "step": 22363 }, { "epoch": 1.2523238884533543, "grad_norm": 1.4124184846878052, "learning_rate": 9.938052631578948e-05, "loss": 0.5615, "step": 22364 }, { "epoch": 1.2523798857654833, "grad_norm": 1.2635563611984253, "learning_rate": 9.938026315789474e-05, "loss": 0.403, "step": 22365 }, { "epoch": 1.2524358830776123, "grad_norm": 1.281112551689148, "learning_rate": 9.938e-05, "loss": 0.3814, "step": 22366 }, { "epoch": 1.2524918803897414, "grad_norm": 1.242078423500061, "learning_rate": 9.937973684210526e-05, "loss": 0.4, "step": 22367 }, { "epoch": 1.2525478777018704, "grad_norm": 1.2379521131515503, "learning_rate": 9.937947368421053e-05, "loss": 0.4875, "step": 22368 }, { "epoch": 1.2526038750139994, "grad_norm": 1.6423168182373047, "learning_rate": 9.93792105263158e-05, "loss": 0.4695, "step": 22369 }, { "epoch": 1.2526598723261284, "grad_norm": 1.422355055809021, "learning_rate": 9.937894736842107e-05, "loss": 0.5009, "step": 22370 }, { "epoch": 1.2527158696382574, "grad_norm": 1.112548828125, "learning_rate": 9.937868421052631e-05, "loss": 0.3949, "step": 22371 }, { "epoch": 1.2527718669503864, "grad_norm": 1.3380110263824463, "learning_rate": 9.937842105263159e-05, "loss": 0.458, "step": 22372 }, { "epoch": 1.2528278642625155, "grad_norm": 1.386528730392456, "learning_rate": 9.937815789473685e-05, "loss": 0.4417, "step": 22373 }, { "epoch": 1.2528838615746445, "grad_norm": 1.596781849861145, "learning_rate": 9.937789473684212e-05, "loss": 0.6139, "step": 22374 }, { "epoch": 1.2529398588867735, "grad_norm": 1.2813609838485718, "learning_rate": 9.937763157894738e-05, "loss": 0.41, "step": 22375 }, { "epoch": 1.2529958561989025, "grad_norm": 2.168900728225708, "learning_rate": 9.937736842105264e-05, "loss": 0.6009, "step": 22376 }, { "epoch": 1.2530518535110315, "grad_norm": 1.4229382276535034, "learning_rate": 9.93771052631579e-05, "loss": 0.4844, "step": 22377 }, { "epoch": 1.2531078508231606, "grad_norm": 1.288794755935669, "learning_rate": 9.937684210526317e-05, "loss": 0.4613, "step": 22378 }, { "epoch": 1.2531638481352896, "grad_norm": 1.322526216506958, "learning_rate": 9.937657894736843e-05, "loss": 0.4081, "step": 22379 }, { "epoch": 1.2532198454474186, "grad_norm": 1.375086784362793, "learning_rate": 9.937631578947368e-05, "loss": 0.3796, "step": 22380 }, { "epoch": 1.2532758427595476, "grad_norm": 1.1509244441986084, "learning_rate": 9.937605263157895e-05, "loss": 0.4033, "step": 22381 }, { "epoch": 1.2533318400716766, "grad_norm": 1.2860116958618164, "learning_rate": 9.937578947368421e-05, "loss": 0.462, "step": 22382 }, { "epoch": 1.2533878373838057, "grad_norm": 1.4602913856506348, "learning_rate": 9.937552631578948e-05, "loss": 0.4564, "step": 22383 }, { "epoch": 1.2534438346959347, "grad_norm": 1.9868360757827759, "learning_rate": 9.937526315789474e-05, "loss": 0.4496, "step": 22384 }, { "epoch": 1.2534998320080635, "grad_norm": 1.5726895332336426, "learning_rate": 9.9375e-05, "loss": 0.4239, "step": 22385 }, { "epoch": 1.2535558293201925, "grad_norm": 2.0822389125823975, "learning_rate": 9.937473684210526e-05, "loss": 0.4286, "step": 22386 }, { "epoch": 1.2536118266323215, "grad_norm": 1.3133800029754639, "learning_rate": 9.937447368421054e-05, "loss": 0.5122, "step": 22387 }, { "epoch": 1.2536678239444505, "grad_norm": 1.394006371498108, "learning_rate": 9.93742105263158e-05, "loss": 0.4558, "step": 22388 }, { "epoch": 1.2537238212565796, "grad_norm": 1.356560230255127, "learning_rate": 9.937394736842106e-05, "loss": 0.424, "step": 22389 }, { "epoch": 1.2537798185687086, "grad_norm": 1.328401803970337, "learning_rate": 9.937368421052632e-05, "loss": 0.5114, "step": 22390 }, { "epoch": 1.2538358158808376, "grad_norm": 1.2721163034439087, "learning_rate": 9.937342105263159e-05, "loss": 0.4165, "step": 22391 }, { "epoch": 1.2538918131929666, "grad_norm": 3.9038705825805664, "learning_rate": 9.937315789473685e-05, "loss": 0.3836, "step": 22392 }, { "epoch": 1.2539478105050956, "grad_norm": 1.5038775205612183, "learning_rate": 9.937289473684211e-05, "loss": 0.6409, "step": 22393 }, { "epoch": 1.2540038078172246, "grad_norm": 1.7684061527252197, "learning_rate": 9.937263157894737e-05, "loss": 0.4112, "step": 22394 }, { "epoch": 1.2540598051293537, "grad_norm": 1.331947922706604, "learning_rate": 9.937236842105264e-05, "loss": 0.4065, "step": 22395 }, { "epoch": 1.2541158024414827, "grad_norm": 1.4933711290359497, "learning_rate": 9.93721052631579e-05, "loss": 0.384, "step": 22396 }, { "epoch": 1.2541717997536117, "grad_norm": 1.411307454109192, "learning_rate": 9.937184210526316e-05, "loss": 0.4413, "step": 22397 }, { "epoch": 1.2542277970657407, "grad_norm": 1.5018677711486816, "learning_rate": 9.937157894736842e-05, "loss": 0.4437, "step": 22398 }, { "epoch": 1.2542837943778697, "grad_norm": 1.2441569566726685, "learning_rate": 9.937131578947368e-05, "loss": 0.404, "step": 22399 }, { "epoch": 1.2543397916899988, "grad_norm": 1.35121488571167, "learning_rate": 9.937105263157895e-05, "loss": 0.4505, "step": 22400 }, { "epoch": 1.2543957890021278, "grad_norm": 1.5654253959655762, "learning_rate": 9.937078947368421e-05, "loss": 0.4104, "step": 22401 }, { "epoch": 1.2544517863142568, "grad_norm": 1.331819772720337, "learning_rate": 9.937052631578949e-05, "loss": 0.3457, "step": 22402 }, { "epoch": 1.2545077836263858, "grad_norm": 1.3496921062469482, "learning_rate": 9.937026315789473e-05, "loss": 0.4175, "step": 22403 }, { "epoch": 1.2545637809385148, "grad_norm": 1.254142165184021, "learning_rate": 9.937e-05, "loss": 0.5591, "step": 22404 }, { "epoch": 1.2546197782506439, "grad_norm": 1.2177461385726929, "learning_rate": 9.936973684210527e-05, "loss": 0.3637, "step": 22405 }, { "epoch": 1.2546757755627729, "grad_norm": 1.4695017337799072, "learning_rate": 9.936947368421054e-05, "loss": 0.4761, "step": 22406 }, { "epoch": 1.254731772874902, "grad_norm": 1.5132089853286743, "learning_rate": 9.93692105263158e-05, "loss": 0.634, "step": 22407 }, { "epoch": 1.254787770187031, "grad_norm": 1.3467780351638794, "learning_rate": 9.936894736842106e-05, "loss": 0.6429, "step": 22408 }, { "epoch": 1.25484376749916, "grad_norm": 1.2279789447784424, "learning_rate": 9.936868421052632e-05, "loss": 0.4997, "step": 22409 }, { "epoch": 1.254899764811289, "grad_norm": 1.2986432313919067, "learning_rate": 9.936842105263159e-05, "loss": 0.4413, "step": 22410 }, { "epoch": 1.254955762123418, "grad_norm": 1.3174965381622314, "learning_rate": 9.936815789473685e-05, "loss": 0.3556, "step": 22411 }, { "epoch": 1.255011759435547, "grad_norm": 1.1024069786071777, "learning_rate": 9.936789473684211e-05, "loss": 0.3554, "step": 22412 }, { "epoch": 1.255067756747676, "grad_norm": 13.55655574798584, "learning_rate": 9.936763157894737e-05, "loss": 0.4215, "step": 22413 }, { "epoch": 1.255123754059805, "grad_norm": 1.4097927808761597, "learning_rate": 9.936736842105263e-05, "loss": 0.441, "step": 22414 }, { "epoch": 1.255179751371934, "grad_norm": 1.3753725290298462, "learning_rate": 9.93671052631579e-05, "loss": 0.5994, "step": 22415 }, { "epoch": 1.255235748684063, "grad_norm": 1.4475756883621216, "learning_rate": 9.936684210526316e-05, "loss": 0.3789, "step": 22416 }, { "epoch": 1.255291745996192, "grad_norm": 2.690183401107788, "learning_rate": 9.936657894736842e-05, "loss": 0.445, "step": 22417 }, { "epoch": 1.255347743308321, "grad_norm": 1.5758112668991089, "learning_rate": 9.936631578947368e-05, "loss": 0.4321, "step": 22418 }, { "epoch": 1.2554037406204501, "grad_norm": 1.377387285232544, "learning_rate": 9.936605263157896e-05, "loss": 0.5935, "step": 22419 }, { "epoch": 1.2554597379325791, "grad_norm": 1.4579378366470337, "learning_rate": 9.936578947368422e-05, "loss": 0.3929, "step": 22420 }, { "epoch": 1.2555157352447082, "grad_norm": 1.174188494682312, "learning_rate": 9.936552631578948e-05, "loss": 0.3638, "step": 22421 }, { "epoch": 1.2555717325568372, "grad_norm": 1.19265615940094, "learning_rate": 9.936526315789474e-05, "loss": 0.3975, "step": 22422 }, { "epoch": 1.2556277298689662, "grad_norm": 1.4435083866119385, "learning_rate": 9.936500000000001e-05, "loss": 0.4415, "step": 22423 }, { "epoch": 1.2556837271810952, "grad_norm": 1.4970040321350098, "learning_rate": 9.936473684210527e-05, "loss": 0.5084, "step": 22424 }, { "epoch": 1.2557397244932242, "grad_norm": 1.7829281091690063, "learning_rate": 9.936447368421054e-05, "loss": 0.4626, "step": 22425 }, { "epoch": 1.2557957218053533, "grad_norm": 1.4064826965332031, "learning_rate": 9.936421052631579e-05, "loss": 0.4295, "step": 22426 }, { "epoch": 1.2558517191174823, "grad_norm": 1.2395046949386597, "learning_rate": 9.936394736842106e-05, "loss": 0.5363, "step": 22427 }, { "epoch": 1.2559077164296113, "grad_norm": 1.4189103841781616, "learning_rate": 9.936368421052632e-05, "loss": 0.346, "step": 22428 }, { "epoch": 1.2559637137417403, "grad_norm": 1.219213843345642, "learning_rate": 9.93634210526316e-05, "loss": 0.4594, "step": 22429 }, { "epoch": 1.2560197110538693, "grad_norm": 1.280315637588501, "learning_rate": 9.936315789473685e-05, "loss": 0.3594, "step": 22430 }, { "epoch": 1.2560757083659984, "grad_norm": 1.197350025177002, "learning_rate": 9.93628947368421e-05, "loss": 0.4468, "step": 22431 }, { "epoch": 1.2561317056781274, "grad_norm": 1.1670315265655518, "learning_rate": 9.936263157894737e-05, "loss": 0.3612, "step": 22432 }, { "epoch": 1.2561877029902564, "grad_norm": 1.382169485092163, "learning_rate": 9.936236842105263e-05, "loss": 0.4715, "step": 22433 }, { "epoch": 1.2562437003023854, "grad_norm": 1.1867884397506714, "learning_rate": 9.93621052631579e-05, "loss": 0.3865, "step": 22434 }, { "epoch": 1.2562996976145144, "grad_norm": 1.3531599044799805, "learning_rate": 9.936184210526315e-05, "loss": 0.4135, "step": 22435 }, { "epoch": 1.2563556949266435, "grad_norm": 1.5227432250976562, "learning_rate": 9.936157894736843e-05, "loss": 0.4965, "step": 22436 }, { "epoch": 1.2564116922387725, "grad_norm": 1.5554322004318237, "learning_rate": 9.936131578947369e-05, "loss": 0.63, "step": 22437 }, { "epoch": 1.2564676895509015, "grad_norm": 1.5383269786834717, "learning_rate": 9.936105263157896e-05, "loss": 0.5737, "step": 22438 }, { "epoch": 1.2565236868630305, "grad_norm": 1.520043134689331, "learning_rate": 9.936078947368422e-05, "loss": 0.5204, "step": 22439 }, { "epoch": 1.2565796841751595, "grad_norm": 1.1973000764846802, "learning_rate": 9.936052631578948e-05, "loss": 0.3516, "step": 22440 }, { "epoch": 1.2566356814872885, "grad_norm": 1.0638693571090698, "learning_rate": 9.936026315789474e-05, "loss": 0.3651, "step": 22441 }, { "epoch": 1.2566916787994176, "grad_norm": 3.457150936126709, "learning_rate": 9.936000000000001e-05, "loss": 0.4624, "step": 22442 }, { "epoch": 1.2567476761115466, "grad_norm": 1.3856191635131836, "learning_rate": 9.935973684210527e-05, "loss": 0.4131, "step": 22443 }, { "epoch": 1.2568036734236756, "grad_norm": 1.7095839977264404, "learning_rate": 9.935947368421053e-05, "loss": 0.4786, "step": 22444 }, { "epoch": 1.2568596707358046, "grad_norm": 1.5006550550460815, "learning_rate": 9.935921052631579e-05, "loss": 0.4271, "step": 22445 }, { "epoch": 1.2569156680479336, "grad_norm": 1.334295392036438, "learning_rate": 9.935894736842106e-05, "loss": 0.4931, "step": 22446 }, { "epoch": 1.2569716653600627, "grad_norm": 1.4089369773864746, "learning_rate": 9.935868421052632e-05, "loss": 0.5179, "step": 22447 }, { "epoch": 1.2570276626721917, "grad_norm": 1.551820993423462, "learning_rate": 9.935842105263158e-05, "loss": 0.6057, "step": 22448 }, { "epoch": 1.2570836599843207, "grad_norm": 1.658945083618164, "learning_rate": 9.935815789473684e-05, "loss": 0.3865, "step": 22449 }, { "epoch": 1.2571396572964497, "grad_norm": 1.4844506978988647, "learning_rate": 9.93578947368421e-05, "loss": 0.4183, "step": 22450 }, { "epoch": 1.2571956546085787, "grad_norm": 1.2960132360458374, "learning_rate": 9.935763157894738e-05, "loss": 0.4867, "step": 22451 }, { "epoch": 1.2572516519207078, "grad_norm": 1.2157917022705078, "learning_rate": 9.935736842105264e-05, "loss": 0.4369, "step": 22452 }, { "epoch": 1.2573076492328368, "grad_norm": 1.3133894205093384, "learning_rate": 9.93571052631579e-05, "loss": 0.3787, "step": 22453 }, { "epoch": 1.2573636465449658, "grad_norm": 1.1887586116790771, "learning_rate": 9.935684210526315e-05, "loss": 0.3378, "step": 22454 }, { "epoch": 1.2574196438570948, "grad_norm": 1.3167790174484253, "learning_rate": 9.935657894736843e-05, "loss": 0.3459, "step": 22455 }, { "epoch": 1.2574756411692238, "grad_norm": 1.3035658597946167, "learning_rate": 9.935631578947369e-05, "loss": 0.4213, "step": 22456 }, { "epoch": 1.2575316384813529, "grad_norm": 1.229737401008606, "learning_rate": 9.935605263157896e-05, "loss": 0.2927, "step": 22457 }, { "epoch": 1.2575876357934819, "grad_norm": 1.2345038652420044, "learning_rate": 9.935578947368421e-05, "loss": 0.4091, "step": 22458 }, { "epoch": 1.257643633105611, "grad_norm": 1.5438814163208008, "learning_rate": 9.935552631578948e-05, "loss": 0.5458, "step": 22459 }, { "epoch": 1.25769963041774, "grad_norm": 1.3399029970169067, "learning_rate": 9.935526315789474e-05, "loss": 0.4387, "step": 22460 }, { "epoch": 1.257755627729869, "grad_norm": 1.5059328079223633, "learning_rate": 9.935500000000001e-05, "loss": 0.5205, "step": 22461 }, { "epoch": 1.257811625041998, "grad_norm": 1.3784549236297607, "learning_rate": 9.935473684210527e-05, "loss": 0.4553, "step": 22462 }, { "epoch": 1.257867622354127, "grad_norm": 1.2881489992141724, "learning_rate": 9.935447368421053e-05, "loss": 0.4272, "step": 22463 }, { "epoch": 1.257923619666256, "grad_norm": 1.2905099391937256, "learning_rate": 9.935421052631579e-05, "loss": 0.4378, "step": 22464 }, { "epoch": 1.257979616978385, "grad_norm": 1.4473720788955688, "learning_rate": 9.935394736842105e-05, "loss": 0.4647, "step": 22465 }, { "epoch": 1.258035614290514, "grad_norm": 1.42154860496521, "learning_rate": 9.935368421052633e-05, "loss": 0.3559, "step": 22466 }, { "epoch": 1.258091611602643, "grad_norm": 1.2381058931350708, "learning_rate": 9.935342105263159e-05, "loss": 0.4903, "step": 22467 }, { "epoch": 1.258147608914772, "grad_norm": 1.1547553539276123, "learning_rate": 9.935315789473685e-05, "loss": 0.2718, "step": 22468 }, { "epoch": 1.258203606226901, "grad_norm": 1.392749547958374, "learning_rate": 9.93528947368421e-05, "loss": 0.5243, "step": 22469 }, { "epoch": 1.25825960353903, "grad_norm": 1.1658364534378052, "learning_rate": 9.935263157894738e-05, "loss": 0.4561, "step": 22470 }, { "epoch": 1.2583156008511591, "grad_norm": 1.4107457399368286, "learning_rate": 9.935236842105264e-05, "loss": 0.3249, "step": 22471 }, { "epoch": 1.2583715981632881, "grad_norm": 1.2678806781768799, "learning_rate": 9.93521052631579e-05, "loss": 0.3788, "step": 22472 }, { "epoch": 1.2584275954754172, "grad_norm": 1.3851932287216187, "learning_rate": 9.935184210526316e-05, "loss": 0.4094, "step": 22473 }, { "epoch": 1.2584835927875462, "grad_norm": 1.5530339479446411, "learning_rate": 9.935157894736843e-05, "loss": 0.6043, "step": 22474 }, { "epoch": 1.2585395900996752, "grad_norm": 1.711771845817566, "learning_rate": 9.935131578947369e-05, "loss": 0.6201, "step": 22475 }, { "epoch": 1.2585955874118042, "grad_norm": 1.614958643913269, "learning_rate": 9.935105263157895e-05, "loss": 0.5384, "step": 22476 }, { "epoch": 1.2586515847239332, "grad_norm": 1.2900781631469727, "learning_rate": 9.935078947368421e-05, "loss": 0.3236, "step": 22477 }, { "epoch": 1.2587075820360623, "grad_norm": 1.4119040966033936, "learning_rate": 9.935052631578948e-05, "loss": 0.4346, "step": 22478 }, { "epoch": 1.2587635793481913, "grad_norm": 1.2123664617538452, "learning_rate": 9.935026315789474e-05, "loss": 0.3862, "step": 22479 }, { "epoch": 1.2588195766603203, "grad_norm": 1.2141798734664917, "learning_rate": 9.935000000000002e-05, "loss": 0.4515, "step": 22480 }, { "epoch": 1.2588755739724493, "grad_norm": 1.382216453552246, "learning_rate": 9.934973684210526e-05, "loss": 0.4536, "step": 22481 }, { "epoch": 1.2589315712845783, "grad_norm": 1.378426432609558, "learning_rate": 9.934947368421052e-05, "loss": 0.581, "step": 22482 }, { "epoch": 1.2589875685967074, "grad_norm": 1.417020320892334, "learning_rate": 9.93492105263158e-05, "loss": 0.3848, "step": 22483 }, { "epoch": 1.2590435659088364, "grad_norm": 1.6204237937927246, "learning_rate": 9.934894736842106e-05, "loss": 0.5406, "step": 22484 }, { "epoch": 1.2590995632209654, "grad_norm": 1.4758626222610474, "learning_rate": 9.934868421052633e-05, "loss": 0.5592, "step": 22485 }, { "epoch": 1.2591555605330944, "grad_norm": 3.563157320022583, "learning_rate": 9.934842105263157e-05, "loss": 0.3563, "step": 22486 }, { "epoch": 1.2592115578452234, "grad_norm": 1.2028464078903198, "learning_rate": 9.934815789473685e-05, "loss": 0.3935, "step": 22487 }, { "epoch": 1.2592675551573524, "grad_norm": 1.3662872314453125, "learning_rate": 9.934789473684211e-05, "loss": 0.3752, "step": 22488 }, { "epoch": 1.2593235524694815, "grad_norm": 1.7939461469650269, "learning_rate": 9.934763157894738e-05, "loss": 0.4497, "step": 22489 }, { "epoch": 1.2593795497816105, "grad_norm": 1.3583811521530151, "learning_rate": 9.934736842105263e-05, "loss": 0.4819, "step": 22490 }, { "epoch": 1.2594355470937395, "grad_norm": 1.4784542322158813, "learning_rate": 9.93471052631579e-05, "loss": 0.512, "step": 22491 }, { "epoch": 1.2594915444058685, "grad_norm": 1.7761197090148926, "learning_rate": 9.934684210526316e-05, "loss": 0.4361, "step": 22492 }, { "epoch": 1.2595475417179975, "grad_norm": 1.5552124977111816, "learning_rate": 9.934657894736843e-05, "loss": 0.4968, "step": 22493 }, { "epoch": 1.2596035390301266, "grad_norm": 1.419356107711792, "learning_rate": 9.934631578947369e-05, "loss": 0.4412, "step": 22494 }, { "epoch": 1.2596595363422556, "grad_norm": 1.3594200611114502, "learning_rate": 9.934605263157895e-05, "loss": 0.4545, "step": 22495 }, { "epoch": 1.2597155336543846, "grad_norm": 1.2832539081573486, "learning_rate": 9.934578947368421e-05, "loss": 0.4305, "step": 22496 }, { "epoch": 1.2597715309665136, "grad_norm": 1.2215479612350464, "learning_rate": 9.934552631578949e-05, "loss": 0.4875, "step": 22497 }, { "epoch": 1.2598275282786426, "grad_norm": 1.4554733037948608, "learning_rate": 9.934526315789475e-05, "loss": 0.4148, "step": 22498 }, { "epoch": 1.2598835255907717, "grad_norm": 1.5905406475067139, "learning_rate": 9.9345e-05, "loss": 0.4645, "step": 22499 }, { "epoch": 1.2599395229029007, "grad_norm": 1.4548741579055786, "learning_rate": 9.934473684210526e-05, "loss": 0.4613, "step": 22500 }, { "epoch": 1.2599955202150297, "grad_norm": 1.1292834281921387, "learning_rate": 9.934447368421052e-05, "loss": 0.3886, "step": 22501 }, { "epoch": 1.2600515175271587, "grad_norm": 1.3880661725997925, "learning_rate": 9.93442105263158e-05, "loss": 0.4223, "step": 22502 }, { "epoch": 1.2601075148392877, "grad_norm": 1.3671998977661133, "learning_rate": 9.934394736842106e-05, "loss": 0.4806, "step": 22503 }, { "epoch": 1.2601635121514168, "grad_norm": 4.363045692443848, "learning_rate": 9.934368421052632e-05, "loss": 0.4813, "step": 22504 }, { "epoch": 1.2602195094635458, "grad_norm": 1.4290465116500854, "learning_rate": 9.934342105263158e-05, "loss": 0.5236, "step": 22505 }, { "epoch": 1.2602755067756748, "grad_norm": 2.196024179458618, "learning_rate": 9.934315789473685e-05, "loss": 0.3131, "step": 22506 }, { "epoch": 1.2603315040878038, "grad_norm": 1.4151439666748047, "learning_rate": 9.934289473684211e-05, "loss": 0.4163, "step": 22507 }, { "epoch": 1.2603875013999328, "grad_norm": 1.2434852123260498, "learning_rate": 9.934263157894737e-05, "loss": 0.4072, "step": 22508 }, { "epoch": 1.2604434987120618, "grad_norm": 1.2777451276779175, "learning_rate": 9.934236842105263e-05, "loss": 0.4306, "step": 22509 }, { "epoch": 1.2604994960241909, "grad_norm": 1.3524788618087769, "learning_rate": 9.93421052631579e-05, "loss": 0.4388, "step": 22510 }, { "epoch": 1.2605554933363199, "grad_norm": 1.5618058443069458, "learning_rate": 9.934184210526316e-05, "loss": 0.6227, "step": 22511 }, { "epoch": 1.260611490648449, "grad_norm": 1.243468165397644, "learning_rate": 9.934157894736844e-05, "loss": 0.4222, "step": 22512 }, { "epoch": 1.260667487960578, "grad_norm": 2.1506240367889404, "learning_rate": 9.934131578947368e-05, "loss": 0.4433, "step": 22513 }, { "epoch": 1.260723485272707, "grad_norm": 3.119558572769165, "learning_rate": 9.934105263157896e-05, "loss": 0.4319, "step": 22514 }, { "epoch": 1.260779482584836, "grad_norm": 1.3329764604568481, "learning_rate": 9.934078947368421e-05, "loss": 0.4438, "step": 22515 }, { "epoch": 1.260835479896965, "grad_norm": 1.4162479639053345, "learning_rate": 9.934052631578949e-05, "loss": 0.4017, "step": 22516 }, { "epoch": 1.260891477209094, "grad_norm": 1.6753971576690674, "learning_rate": 9.934026315789475e-05, "loss": 0.5185, "step": 22517 }, { "epoch": 1.260947474521223, "grad_norm": 1.3797465562820435, "learning_rate": 9.934e-05, "loss": 0.3933, "step": 22518 }, { "epoch": 1.261003471833352, "grad_norm": 1.4132909774780273, "learning_rate": 9.933973684210527e-05, "loss": 0.4236, "step": 22519 }, { "epoch": 1.261059469145481, "grad_norm": 1.2999058961868286, "learning_rate": 9.933947368421053e-05, "loss": 0.3285, "step": 22520 }, { "epoch": 1.26111546645761, "grad_norm": 1.4819581508636475, "learning_rate": 9.93392105263158e-05, "loss": 0.3924, "step": 22521 }, { "epoch": 1.261171463769739, "grad_norm": 2.1245462894439697, "learning_rate": 9.933894736842106e-05, "loss": 0.4404, "step": 22522 }, { "epoch": 1.2612274610818681, "grad_norm": 1.2867658138275146, "learning_rate": 9.933868421052632e-05, "loss": 0.5039, "step": 22523 }, { "epoch": 1.2612834583939971, "grad_norm": 1.4297866821289062, "learning_rate": 9.933842105263158e-05, "loss": 0.4432, "step": 22524 }, { "epoch": 1.2613394557061262, "grad_norm": 1.3429068326950073, "learning_rate": 9.933815789473685e-05, "loss": 0.5242, "step": 22525 }, { "epoch": 1.2613954530182552, "grad_norm": 2.5976576805114746, "learning_rate": 9.933789473684211e-05, "loss": 0.5616, "step": 22526 }, { "epoch": 1.2614514503303842, "grad_norm": 1.0975341796875, "learning_rate": 9.933763157894737e-05, "loss": 0.3919, "step": 22527 }, { "epoch": 1.2615074476425132, "grad_norm": 1.465174913406372, "learning_rate": 9.933736842105263e-05, "loss": 0.5706, "step": 22528 }, { "epoch": 1.2615634449546422, "grad_norm": 1.300743579864502, "learning_rate": 9.93371052631579e-05, "loss": 0.4728, "step": 22529 }, { "epoch": 1.2616194422667713, "grad_norm": 2.122889757156372, "learning_rate": 9.933684210526317e-05, "loss": 0.3431, "step": 22530 }, { "epoch": 1.2616754395789003, "grad_norm": 1.3574353456497192, "learning_rate": 9.933657894736842e-05, "loss": 0.4763, "step": 22531 }, { "epoch": 1.2617314368910293, "grad_norm": 1.1337097883224487, "learning_rate": 9.933631578947368e-05, "loss": 0.3846, "step": 22532 }, { "epoch": 1.2617874342031583, "grad_norm": 1.2418605089187622, "learning_rate": 9.933605263157896e-05, "loss": 0.4468, "step": 22533 }, { "epoch": 1.2618434315152873, "grad_norm": 1.3232368230819702, "learning_rate": 9.933578947368422e-05, "loss": 0.5621, "step": 22534 }, { "epoch": 1.2618994288274163, "grad_norm": 1.1776726245880127, "learning_rate": 9.933552631578948e-05, "loss": 0.4511, "step": 22535 }, { "epoch": 1.2619554261395454, "grad_norm": 1.4379807710647583, "learning_rate": 9.933526315789474e-05, "loss": 0.4569, "step": 22536 }, { "epoch": 1.2620114234516744, "grad_norm": 1.3012895584106445, "learning_rate": 9.9335e-05, "loss": 0.3947, "step": 22537 }, { "epoch": 1.2620674207638034, "grad_norm": 1.8353028297424316, "learning_rate": 9.933473684210527e-05, "loss": 0.3924, "step": 22538 }, { "epoch": 1.2621234180759324, "grad_norm": 1.2095950841903687, "learning_rate": 9.933447368421053e-05, "loss": 0.4401, "step": 22539 }, { "epoch": 1.2621794153880614, "grad_norm": 1.2847932577133179, "learning_rate": 9.93342105263158e-05, "loss": 0.4456, "step": 22540 }, { "epoch": 1.2622354127001905, "grad_norm": 1.5020012855529785, "learning_rate": 9.933394736842105e-05, "loss": 0.5991, "step": 22541 }, { "epoch": 1.2622914100123195, "grad_norm": 1.4112101793289185, "learning_rate": 9.933368421052632e-05, "loss": 0.4483, "step": 22542 }, { "epoch": 1.2623474073244485, "grad_norm": 3.058119058609009, "learning_rate": 9.933342105263158e-05, "loss": 0.3954, "step": 22543 }, { "epoch": 1.2624034046365775, "grad_norm": 1.421589732170105, "learning_rate": 9.933315789473686e-05, "loss": 0.4271, "step": 22544 }, { "epoch": 1.2624594019487065, "grad_norm": 1.2064660787582397, "learning_rate": 9.93328947368421e-05, "loss": 0.3404, "step": 22545 }, { "epoch": 1.2625153992608356, "grad_norm": 1.434069037437439, "learning_rate": 9.933263157894737e-05, "loss": 0.4127, "step": 22546 }, { "epoch": 1.2625713965729646, "grad_norm": 1.3939950466156006, "learning_rate": 9.933236842105263e-05, "loss": 0.4086, "step": 22547 }, { "epoch": 1.2626273938850936, "grad_norm": 1.2280699014663696, "learning_rate": 9.933210526315791e-05, "loss": 0.5022, "step": 22548 }, { "epoch": 1.2626833911972226, "grad_norm": 1.229111909866333, "learning_rate": 9.933184210526317e-05, "loss": 0.4686, "step": 22549 }, { "epoch": 1.2627393885093516, "grad_norm": 1.3211842775344849, "learning_rate": 9.933157894736843e-05, "loss": 0.3912, "step": 22550 }, { "epoch": 1.2627953858214807, "grad_norm": 1.461344838142395, "learning_rate": 9.933131578947369e-05, "loss": 0.5204, "step": 22551 }, { "epoch": 1.2628513831336097, "grad_norm": 1.4679924249649048, "learning_rate": 9.933105263157895e-05, "loss": 0.4477, "step": 22552 }, { "epoch": 1.2629073804457387, "grad_norm": 1.2944622039794922, "learning_rate": 9.933078947368422e-05, "loss": 0.3761, "step": 22553 }, { "epoch": 1.2629633777578677, "grad_norm": 1.4670464992523193, "learning_rate": 9.933052631578948e-05, "loss": 0.4294, "step": 22554 }, { "epoch": 1.2630193750699967, "grad_norm": 1.3800796270370483, "learning_rate": 9.933026315789474e-05, "loss": 0.4694, "step": 22555 }, { "epoch": 1.2630753723821257, "grad_norm": 1.2836834192276, "learning_rate": 9.933e-05, "loss": 0.437, "step": 22556 }, { "epoch": 1.2631313696942548, "grad_norm": 1.2517718076705933, "learning_rate": 9.932973684210527e-05, "loss": 0.446, "step": 22557 }, { "epoch": 1.2631873670063838, "grad_norm": 1.3426647186279297, "learning_rate": 9.932947368421053e-05, "loss": 0.4189, "step": 22558 }, { "epoch": 1.2632433643185128, "grad_norm": 1.2995389699935913, "learning_rate": 9.932921052631579e-05, "loss": 0.3016, "step": 22559 }, { "epoch": 1.2632993616306418, "grad_norm": 1.4668916463851929, "learning_rate": 9.932894736842105e-05, "loss": 0.3776, "step": 22560 }, { "epoch": 1.2633553589427708, "grad_norm": 1.3852781057357788, "learning_rate": 9.932868421052633e-05, "loss": 0.5454, "step": 22561 }, { "epoch": 1.2634113562548999, "grad_norm": 1.3087972402572632, "learning_rate": 9.932842105263158e-05, "loss": 0.4569, "step": 22562 }, { "epoch": 1.2634673535670289, "grad_norm": 1.356679916381836, "learning_rate": 9.932815789473684e-05, "loss": 0.4116, "step": 22563 }, { "epoch": 1.263523350879158, "grad_norm": 1.1689960956573486, "learning_rate": 9.93278947368421e-05, "loss": 0.3984, "step": 22564 }, { "epoch": 1.263579348191287, "grad_norm": 1.408976674079895, "learning_rate": 9.932763157894738e-05, "loss": 0.5703, "step": 22565 }, { "epoch": 1.263635345503416, "grad_norm": 1.362504243850708, "learning_rate": 9.932736842105264e-05, "loss": 0.3978, "step": 22566 }, { "epoch": 1.263691342815545, "grad_norm": 1.2659173011779785, "learning_rate": 9.932710526315791e-05, "loss": 0.4344, "step": 22567 }, { "epoch": 1.263747340127674, "grad_norm": 1.2167366743087769, "learning_rate": 9.932684210526316e-05, "loss": 0.4085, "step": 22568 }, { "epoch": 1.263803337439803, "grad_norm": 1.1932754516601562, "learning_rate": 9.932657894736842e-05, "loss": 0.3856, "step": 22569 }, { "epoch": 1.263859334751932, "grad_norm": 1.2302398681640625, "learning_rate": 9.932631578947369e-05, "loss": 0.3514, "step": 22570 }, { "epoch": 1.263915332064061, "grad_norm": 1.4113627672195435, "learning_rate": 9.932605263157895e-05, "loss": 0.562, "step": 22571 }, { "epoch": 1.26397132937619, "grad_norm": 1.2027363777160645, "learning_rate": 9.932578947368422e-05, "loss": 0.4555, "step": 22572 }, { "epoch": 1.264027326688319, "grad_norm": 1.3833410739898682, "learning_rate": 9.932552631578947e-05, "loss": 0.4095, "step": 22573 }, { "epoch": 1.264083324000448, "grad_norm": 1.2301329374313354, "learning_rate": 9.932526315789474e-05, "loss": 0.3541, "step": 22574 }, { "epoch": 1.2641393213125771, "grad_norm": 1.1803874969482422, "learning_rate": 9.9325e-05, "loss": 0.3284, "step": 22575 }, { "epoch": 1.2641953186247061, "grad_norm": 1.8832403421401978, "learning_rate": 9.932473684210528e-05, "loss": 0.4084, "step": 22576 }, { "epoch": 1.2642513159368352, "grad_norm": 1.1612879037857056, "learning_rate": 9.932447368421053e-05, "loss": 0.3429, "step": 22577 }, { "epoch": 1.2643073132489642, "grad_norm": 1.7041854858398438, "learning_rate": 9.93242105263158e-05, "loss": 0.3767, "step": 22578 }, { "epoch": 1.2643633105610932, "grad_norm": 1.5245976448059082, "learning_rate": 9.932394736842105e-05, "loss": 0.4273, "step": 22579 }, { "epoch": 1.2644193078732222, "grad_norm": 1.4128202199935913, "learning_rate": 9.932368421052633e-05, "loss": 0.3816, "step": 22580 }, { "epoch": 1.2644753051853512, "grad_norm": 1.1555976867675781, "learning_rate": 9.932342105263159e-05, "loss": 0.4031, "step": 22581 }, { "epoch": 1.2645313024974802, "grad_norm": 1.5287786722183228, "learning_rate": 9.932315789473685e-05, "loss": 0.4134, "step": 22582 }, { "epoch": 1.2645872998096093, "grad_norm": 1.2848385572433472, "learning_rate": 9.932289473684211e-05, "loss": 0.3846, "step": 22583 }, { "epoch": 1.2646432971217383, "grad_norm": 4.332828521728516, "learning_rate": 9.932263157894738e-05, "loss": 0.5469, "step": 22584 }, { "epoch": 1.2646992944338673, "grad_norm": 1.4228366613388062, "learning_rate": 9.932236842105264e-05, "loss": 0.6276, "step": 22585 }, { "epoch": 1.2647552917459963, "grad_norm": 1.3125123977661133, "learning_rate": 9.93221052631579e-05, "loss": 0.5502, "step": 22586 }, { "epoch": 1.2648112890581253, "grad_norm": 1.3094737529754639, "learning_rate": 9.932184210526316e-05, "loss": 0.4707, "step": 22587 }, { "epoch": 1.2648672863702544, "grad_norm": 1.190169334411621, "learning_rate": 9.932157894736842e-05, "loss": 0.4817, "step": 22588 }, { "epoch": 1.2649232836823834, "grad_norm": 1.2978438138961792, "learning_rate": 9.932131578947369e-05, "loss": 0.4921, "step": 22589 }, { "epoch": 1.2649792809945124, "grad_norm": 1.3994370698928833, "learning_rate": 9.932105263157895e-05, "loss": 0.8326, "step": 22590 }, { "epoch": 1.2650352783066414, "grad_norm": Infinity, "learning_rate": 9.932105263157895e-05, "loss": 0.5344, "step": 22591 }, { "epoch": 1.2650912756187702, "grad_norm": 1.2328864336013794, "learning_rate": 9.932078947368421e-05, "loss": 0.3464, "step": 22592 }, { "epoch": 1.2651472729308992, "grad_norm": 1.366552710533142, "learning_rate": 9.932052631578947e-05, "loss": 0.481, "step": 22593 }, { "epoch": 1.2652032702430283, "grad_norm": 1.4918768405914307, "learning_rate": 9.932026315789474e-05, "loss": 0.5796, "step": 22594 }, { "epoch": 1.2652592675551573, "grad_norm": 1.2238956689834595, "learning_rate": 9.932e-05, "loss": 0.4425, "step": 22595 }, { "epoch": 1.2653152648672863, "grad_norm": 1.2184131145477295, "learning_rate": 9.931973684210526e-05, "loss": 0.3699, "step": 22596 }, { "epoch": 1.2653712621794153, "grad_norm": 1.5443089008331299, "learning_rate": 9.931947368421052e-05, "loss": 0.4648, "step": 22597 }, { "epoch": 1.2654272594915443, "grad_norm": 1.5369009971618652, "learning_rate": 9.93192105263158e-05, "loss": 0.4838, "step": 22598 }, { "epoch": 1.2654832568036734, "grad_norm": 1.3724949359893799, "learning_rate": 9.931894736842106e-05, "loss": 0.5331, "step": 22599 }, { "epoch": 1.2655392541158024, "grad_norm": 1.2850879430770874, "learning_rate": 9.931868421052633e-05, "loss": 0.4102, "step": 22600 }, { "epoch": 1.2655952514279314, "grad_norm": 1.2060558795928955, "learning_rate": 9.931842105263158e-05, "loss": 0.4479, "step": 22601 }, { "epoch": 1.2656512487400604, "grad_norm": 1.307312250137329, "learning_rate": 9.931815789473685e-05, "loss": 0.4925, "step": 22602 }, { "epoch": 1.2657072460521894, "grad_norm": 1.4637165069580078, "learning_rate": 9.931789473684211e-05, "loss": 0.4648, "step": 22603 }, { "epoch": 1.2657632433643184, "grad_norm": 1.487190842628479, "learning_rate": 9.931763157894737e-05, "loss": 0.4919, "step": 22604 }, { "epoch": 1.2658192406764475, "grad_norm": 1.3126341104507446, "learning_rate": 9.931736842105264e-05, "loss": 0.3671, "step": 22605 }, { "epoch": 1.2658752379885765, "grad_norm": 1.2969690561294556, "learning_rate": 9.931710526315789e-05, "loss": 0.5605, "step": 22606 }, { "epoch": 1.2659312353007055, "grad_norm": 1.6308763027191162, "learning_rate": 9.931684210526316e-05, "loss": 0.4663, "step": 22607 }, { "epoch": 1.2659872326128345, "grad_norm": 1.4421902894973755, "learning_rate": 9.931657894736842e-05, "loss": 0.5375, "step": 22608 }, { "epoch": 1.2660432299249635, "grad_norm": 1.2509255409240723, "learning_rate": 9.93163157894737e-05, "loss": 0.4908, "step": 22609 }, { "epoch": 1.2660992272370926, "grad_norm": 1.2548978328704834, "learning_rate": 9.931605263157895e-05, "loss": 0.4617, "step": 22610 }, { "epoch": 1.2661552245492216, "grad_norm": 1.403206467628479, "learning_rate": 9.931578947368421e-05, "loss": 0.4799, "step": 22611 }, { "epoch": 1.2662112218613506, "grad_norm": 1.8052750825881958, "learning_rate": 9.931552631578947e-05, "loss": 0.5925, "step": 22612 }, { "epoch": 1.2662672191734796, "grad_norm": 6.041443824768066, "learning_rate": 9.931526315789475e-05, "loss": 0.4942, "step": 22613 }, { "epoch": 1.2663232164856086, "grad_norm": 1.2583448886871338, "learning_rate": 9.931500000000001e-05, "loss": 0.456, "step": 22614 }, { "epoch": 1.2663792137977377, "grad_norm": 1.2603918313980103, "learning_rate": 9.931473684210527e-05, "loss": 0.3992, "step": 22615 }, { "epoch": 1.2664352111098667, "grad_norm": 1.594143271446228, "learning_rate": 9.931447368421053e-05, "loss": 0.6076, "step": 22616 }, { "epoch": 1.2664912084219957, "grad_norm": 1.2620702981948853, "learning_rate": 9.93142105263158e-05, "loss": 0.3765, "step": 22617 }, { "epoch": 1.2665472057341247, "grad_norm": 1.5117707252502441, "learning_rate": 9.931394736842106e-05, "loss": 0.4807, "step": 22618 }, { "epoch": 1.2666032030462537, "grad_norm": 1.297575831413269, "learning_rate": 9.931368421052632e-05, "loss": 0.4377, "step": 22619 }, { "epoch": 1.2666592003583828, "grad_norm": 1.1439862251281738, "learning_rate": 9.931342105263158e-05, "loss": 0.2711, "step": 22620 }, { "epoch": 1.2667151976705118, "grad_norm": 1.2498003244400024, "learning_rate": 9.931315789473685e-05, "loss": 0.4216, "step": 22621 }, { "epoch": 1.2667711949826408, "grad_norm": 1.4508490562438965, "learning_rate": 9.931289473684211e-05, "loss": 0.5733, "step": 22622 }, { "epoch": 1.2668271922947698, "grad_norm": 1.2860407829284668, "learning_rate": 9.931263157894737e-05, "loss": 0.4151, "step": 22623 }, { "epoch": 1.2668831896068988, "grad_norm": 1.059950590133667, "learning_rate": 9.931236842105263e-05, "loss": 0.4224, "step": 22624 }, { "epoch": 1.2669391869190278, "grad_norm": 1.1142617464065552, "learning_rate": 9.931210526315789e-05, "loss": 0.348, "step": 22625 }, { "epoch": 1.2669951842311569, "grad_norm": 2.086233615875244, "learning_rate": 9.931184210526316e-05, "loss": 0.5684, "step": 22626 }, { "epoch": 1.2670511815432859, "grad_norm": 1.5305190086364746, "learning_rate": 9.931157894736842e-05, "loss": 0.4595, "step": 22627 }, { "epoch": 1.267107178855415, "grad_norm": 1.4111287593841553, "learning_rate": 9.93113157894737e-05, "loss": 0.5267, "step": 22628 }, { "epoch": 1.267163176167544, "grad_norm": 2.5089962482452393, "learning_rate": 9.931105263157894e-05, "loss": 0.4802, "step": 22629 }, { "epoch": 1.267219173479673, "grad_norm": 1.3890100717544556, "learning_rate": 9.931078947368422e-05, "loss": 0.4647, "step": 22630 }, { "epoch": 1.267275170791802, "grad_norm": 1.2288728952407837, "learning_rate": 9.931052631578948e-05, "loss": 0.4682, "step": 22631 }, { "epoch": 1.267331168103931, "grad_norm": 1.0214381217956543, "learning_rate": 9.931026315789475e-05, "loss": 0.3127, "step": 22632 }, { "epoch": 1.26738716541606, "grad_norm": 1.4180487394332886, "learning_rate": 9.931000000000001e-05, "loss": 0.3874, "step": 22633 }, { "epoch": 1.267443162728189, "grad_norm": 1.4683669805526733, "learning_rate": 9.930973684210527e-05, "loss": 0.424, "step": 22634 }, { "epoch": 1.267499160040318, "grad_norm": 1.7716941833496094, "learning_rate": 9.930947368421053e-05, "loss": 0.5721, "step": 22635 }, { "epoch": 1.267555157352447, "grad_norm": 1.7148070335388184, "learning_rate": 9.93092105263158e-05, "loss": 0.5187, "step": 22636 }, { "epoch": 1.267611154664576, "grad_norm": 1.155671238899231, "learning_rate": 9.930894736842106e-05, "loss": 0.2932, "step": 22637 }, { "epoch": 1.267667151976705, "grad_norm": 1.2717502117156982, "learning_rate": 9.930868421052632e-05, "loss": 0.3633, "step": 22638 }, { "epoch": 1.2677231492888341, "grad_norm": 1.1481229066848755, "learning_rate": 9.930842105263158e-05, "loss": 0.3724, "step": 22639 }, { "epoch": 1.2677791466009631, "grad_norm": 1.5281686782836914, "learning_rate": 9.930815789473684e-05, "loss": 0.4342, "step": 22640 }, { "epoch": 1.2678351439130922, "grad_norm": 1.3560229539871216, "learning_rate": 9.930789473684211e-05, "loss": 0.3925, "step": 22641 }, { "epoch": 1.2678911412252212, "grad_norm": 1.318763017654419, "learning_rate": 9.930763157894737e-05, "loss": 0.3761, "step": 22642 }, { "epoch": 1.2679471385373502, "grad_norm": 1.2733261585235596, "learning_rate": 9.930736842105263e-05, "loss": 0.5068, "step": 22643 }, { "epoch": 1.2680031358494792, "grad_norm": 1.5524920225143433, "learning_rate": 9.93071052631579e-05, "loss": 0.4662, "step": 22644 }, { "epoch": 1.2680591331616082, "grad_norm": 1.2721418142318726, "learning_rate": 9.930684210526317e-05, "loss": 0.4295, "step": 22645 }, { "epoch": 1.2681151304737373, "grad_norm": 1.371688723564148, "learning_rate": 9.930657894736843e-05, "loss": 0.4775, "step": 22646 }, { "epoch": 1.2681711277858663, "grad_norm": 1.32282292842865, "learning_rate": 9.930631578947369e-05, "loss": 0.4748, "step": 22647 }, { "epoch": 1.2682271250979953, "grad_norm": 1.218231439590454, "learning_rate": 9.930605263157895e-05, "loss": 0.4281, "step": 22648 }, { "epoch": 1.2682831224101243, "grad_norm": 1.451771855354309, "learning_rate": 9.930578947368422e-05, "loss": 0.4842, "step": 22649 }, { "epoch": 1.2683391197222533, "grad_norm": 3.515385150909424, "learning_rate": 9.930552631578948e-05, "loss": 0.5745, "step": 22650 }, { "epoch": 1.2683951170343823, "grad_norm": 1.4690711498260498, "learning_rate": 9.930526315789474e-05, "loss": 0.4232, "step": 22651 }, { "epoch": 1.2684511143465114, "grad_norm": 2.0906107425689697, "learning_rate": 9.9305e-05, "loss": 0.3972, "step": 22652 }, { "epoch": 1.2685071116586404, "grad_norm": 1.2995021343231201, "learning_rate": 9.930473684210527e-05, "loss": 0.5252, "step": 22653 }, { "epoch": 1.2685631089707694, "grad_norm": 1.2416428327560425, "learning_rate": 9.930447368421053e-05, "loss": 0.4176, "step": 22654 }, { "epoch": 1.2686191062828984, "grad_norm": 1.553397536277771, "learning_rate": 9.93042105263158e-05, "loss": 0.4392, "step": 22655 }, { "epoch": 1.2686751035950274, "grad_norm": 1.4556041955947876, "learning_rate": 9.930394736842105e-05, "loss": 0.4612, "step": 22656 }, { "epoch": 1.2687311009071565, "grad_norm": 1.3560270071029663, "learning_rate": 9.930368421052631e-05, "loss": 0.5722, "step": 22657 }, { "epoch": 1.2687870982192855, "grad_norm": 1.7231885194778442, "learning_rate": 9.930342105263158e-05, "loss": 0.5763, "step": 22658 }, { "epoch": 1.2688430955314145, "grad_norm": 1.4385744333267212, "learning_rate": 9.930315789473684e-05, "loss": 0.3599, "step": 22659 }, { "epoch": 1.2688990928435435, "grad_norm": 2.524521589279175, "learning_rate": 9.930289473684212e-05, "loss": 0.6094, "step": 22660 }, { "epoch": 1.2689550901556725, "grad_norm": 3.413239002227783, "learning_rate": 9.930263157894736e-05, "loss": 0.6705, "step": 22661 }, { "epoch": 1.2690110874678016, "grad_norm": 1.4177204370498657, "learning_rate": 9.930236842105264e-05, "loss": 0.4917, "step": 22662 }, { "epoch": 1.2690670847799306, "grad_norm": 1.4034504890441895, "learning_rate": 9.93021052631579e-05, "loss": 0.432, "step": 22663 }, { "epoch": 1.2691230820920596, "grad_norm": 1.50716233253479, "learning_rate": 9.930184210526317e-05, "loss": 0.5209, "step": 22664 }, { "epoch": 1.2691790794041886, "grad_norm": 1.3623316287994385, "learning_rate": 9.930157894736843e-05, "loss": 0.4581, "step": 22665 }, { "epoch": 1.2692350767163176, "grad_norm": 1.5193994045257568, "learning_rate": 9.930131578947369e-05, "loss": 0.3974, "step": 22666 }, { "epoch": 1.2692910740284467, "grad_norm": 1.293651819229126, "learning_rate": 9.930105263157895e-05, "loss": 0.3696, "step": 22667 }, { "epoch": 1.2693470713405757, "grad_norm": 1.8996182680130005, "learning_rate": 9.930078947368422e-05, "loss": 0.4571, "step": 22668 }, { "epoch": 1.2694030686527047, "grad_norm": 1.2406213283538818, "learning_rate": 9.930052631578948e-05, "loss": 0.4494, "step": 22669 }, { "epoch": 1.2694590659648337, "grad_norm": 4.635396957397461, "learning_rate": 9.930026315789474e-05, "loss": 0.5383, "step": 22670 }, { "epoch": 1.2695150632769627, "grad_norm": 1.3629764318466187, "learning_rate": 9.93e-05, "loss": 0.3442, "step": 22671 }, { "epoch": 1.2695710605890917, "grad_norm": 1.5809091329574585, "learning_rate": 9.929973684210527e-05, "loss": 0.4282, "step": 22672 }, { "epoch": 1.2696270579012208, "grad_norm": 1.1668684482574463, "learning_rate": 9.929947368421053e-05, "loss": 0.3434, "step": 22673 }, { "epoch": 1.2696830552133498, "grad_norm": 1.1767786741256714, "learning_rate": 9.92992105263158e-05, "loss": 0.3571, "step": 22674 }, { "epoch": 1.2697390525254788, "grad_norm": 1.4719984531402588, "learning_rate": 9.929894736842105e-05, "loss": 0.4222, "step": 22675 }, { "epoch": 1.2697950498376078, "grad_norm": 1.181861162185669, "learning_rate": 9.929868421052631e-05, "loss": 0.4426, "step": 22676 }, { "epoch": 1.2698510471497368, "grad_norm": 1.6003828048706055, "learning_rate": 9.929842105263159e-05, "loss": 0.4858, "step": 22677 }, { "epoch": 1.2699070444618659, "grad_norm": 2.3208487033843994, "learning_rate": 9.929815789473685e-05, "loss": 0.506, "step": 22678 }, { "epoch": 1.2699630417739949, "grad_norm": 1.3948818445205688, "learning_rate": 9.92978947368421e-05, "loss": 0.3287, "step": 22679 }, { "epoch": 1.270019039086124, "grad_norm": 1.5072851181030273, "learning_rate": 9.929763157894737e-05, "loss": 0.4279, "step": 22680 }, { "epoch": 1.270075036398253, "grad_norm": 1.5477532148361206, "learning_rate": 9.929736842105264e-05, "loss": 0.5258, "step": 22681 }, { "epoch": 1.270131033710382, "grad_norm": 1.3021478652954102, "learning_rate": 9.92971052631579e-05, "loss": 0.4106, "step": 22682 }, { "epoch": 1.270187031022511, "grad_norm": 12.972148895263672, "learning_rate": 9.929684210526317e-05, "loss": 0.4896, "step": 22683 }, { "epoch": 1.27024302833464, "grad_norm": 1.3581346273422241, "learning_rate": 9.929657894736842e-05, "loss": 0.5091, "step": 22684 }, { "epoch": 1.270299025646769, "grad_norm": 2.3846018314361572, "learning_rate": 9.929631578947369e-05, "loss": 0.5777, "step": 22685 }, { "epoch": 1.270355022958898, "grad_norm": 1.26251220703125, "learning_rate": 9.929605263157895e-05, "loss": 0.4046, "step": 22686 }, { "epoch": 1.270411020271027, "grad_norm": 1.7669390439987183, "learning_rate": 9.929578947368422e-05, "loss": 0.3931, "step": 22687 }, { "epoch": 1.270467017583156, "grad_norm": 1.5544970035552979, "learning_rate": 9.929552631578948e-05, "loss": 0.5815, "step": 22688 }, { "epoch": 1.270523014895285, "grad_norm": 1.0569502115249634, "learning_rate": 9.929526315789474e-05, "loss": 0.3724, "step": 22689 }, { "epoch": 1.270579012207414, "grad_norm": 1.8381023406982422, "learning_rate": 9.9295e-05, "loss": 0.5538, "step": 22690 }, { "epoch": 1.2706350095195431, "grad_norm": 1.1855922937393188, "learning_rate": 9.929473684210526e-05, "loss": 0.4204, "step": 22691 }, { "epoch": 1.2706910068316721, "grad_norm": 1.2583905458450317, "learning_rate": 9.929447368421054e-05, "loss": 0.4074, "step": 22692 }, { "epoch": 1.2707470041438012, "grad_norm": 1.2125978469848633, "learning_rate": 9.929421052631578e-05, "loss": 0.4162, "step": 22693 }, { "epoch": 1.2708030014559302, "grad_norm": 1.5250681638717651, "learning_rate": 9.929394736842106e-05, "loss": 0.4806, "step": 22694 }, { "epoch": 1.2708589987680592, "grad_norm": 1.3331366777420044, "learning_rate": 9.929368421052632e-05, "loss": 0.3697, "step": 22695 }, { "epoch": 1.2709149960801882, "grad_norm": 1.1396459341049194, "learning_rate": 9.929342105263159e-05, "loss": 0.4164, "step": 22696 }, { "epoch": 1.2709709933923172, "grad_norm": 1.2768718004226685, "learning_rate": 9.929315789473685e-05, "loss": 0.3931, "step": 22697 }, { "epoch": 1.2710269907044462, "grad_norm": 1.8912655115127563, "learning_rate": 9.929289473684211e-05, "loss": 0.5287, "step": 22698 }, { "epoch": 1.2710829880165753, "grad_norm": 1.3284953832626343, "learning_rate": 9.929263157894737e-05, "loss": 0.4845, "step": 22699 }, { "epoch": 1.2711389853287043, "grad_norm": 1.2338652610778809, "learning_rate": 9.929236842105264e-05, "loss": 0.4182, "step": 22700 }, { "epoch": 1.2711949826408333, "grad_norm": 1.208163857460022, "learning_rate": 9.92921052631579e-05, "loss": 0.4332, "step": 22701 }, { "epoch": 1.2712509799529623, "grad_norm": 1.2945038080215454, "learning_rate": 9.929184210526316e-05, "loss": 0.4089, "step": 22702 }, { "epoch": 1.2713069772650913, "grad_norm": 1.4350838661193848, "learning_rate": 9.929157894736842e-05, "loss": 0.5331, "step": 22703 }, { "epoch": 1.2713629745772204, "grad_norm": 1.4823426008224487, "learning_rate": 9.92913157894737e-05, "loss": 0.4397, "step": 22704 }, { "epoch": 1.2714189718893494, "grad_norm": 1.3634966611862183, "learning_rate": 9.929105263157895e-05, "loss": 0.4962, "step": 22705 }, { "epoch": 1.2714749692014784, "grad_norm": 1.4452133178710938, "learning_rate": 9.929078947368421e-05, "loss": 0.5064, "step": 22706 }, { "epoch": 1.2715309665136074, "grad_norm": 1.3377217054367065, "learning_rate": 9.929052631578947e-05, "loss": 0.4391, "step": 22707 }, { "epoch": 1.2715869638257364, "grad_norm": 1.923106074333191, "learning_rate": 9.929026315789473e-05, "loss": 0.5312, "step": 22708 }, { "epoch": 1.2716429611378655, "grad_norm": 1.1288068294525146, "learning_rate": 9.929e-05, "loss": 0.4602, "step": 22709 }, { "epoch": 1.2716989584499945, "grad_norm": 1.2240934371948242, "learning_rate": 9.928973684210527e-05, "loss": 0.4053, "step": 22710 }, { "epoch": 1.2717549557621235, "grad_norm": 1.4392454624176025, "learning_rate": 9.928947368421053e-05, "loss": 0.4006, "step": 22711 }, { "epoch": 1.2718109530742525, "grad_norm": 1.578526496887207, "learning_rate": 9.928921052631579e-05, "loss": 0.5425, "step": 22712 }, { "epoch": 1.2718669503863815, "grad_norm": 2.0550968647003174, "learning_rate": 9.928894736842106e-05, "loss": 0.5106, "step": 22713 }, { "epoch": 1.2719229476985106, "grad_norm": 1.3066192865371704, "learning_rate": 9.928868421052632e-05, "loss": 0.3744, "step": 22714 }, { "epoch": 1.2719789450106396, "grad_norm": 1.4003432989120483, "learning_rate": 9.928842105263159e-05, "loss": 0.4796, "step": 22715 }, { "epoch": 1.2720349423227684, "grad_norm": 1.2575757503509521, "learning_rate": 9.928815789473684e-05, "loss": 0.3904, "step": 22716 }, { "epoch": 1.2720909396348974, "grad_norm": 1.1841113567352295, "learning_rate": 9.928789473684211e-05, "loss": 0.3849, "step": 22717 }, { "epoch": 1.2721469369470264, "grad_norm": 1.306808352470398, "learning_rate": 9.928763157894737e-05, "loss": 0.434, "step": 22718 }, { "epoch": 1.2722029342591554, "grad_norm": 1.1899160146713257, "learning_rate": 9.928736842105264e-05, "loss": 0.4461, "step": 22719 }, { "epoch": 1.2722589315712844, "grad_norm": 1.0495973825454712, "learning_rate": 9.92871052631579e-05, "loss": 0.4398, "step": 22720 }, { "epoch": 1.2723149288834135, "grad_norm": 1.5488747358322144, "learning_rate": 9.928684210526316e-05, "loss": 0.5062, "step": 22721 }, { "epoch": 1.2723709261955425, "grad_norm": 1.3600432872772217, "learning_rate": 9.928657894736842e-05, "loss": 0.4004, "step": 22722 }, { "epoch": 1.2724269235076715, "grad_norm": 1.1288678646087646, "learning_rate": 9.92863157894737e-05, "loss": 0.4184, "step": 22723 }, { "epoch": 1.2724829208198005, "grad_norm": 1.2847695350646973, "learning_rate": 9.928605263157896e-05, "loss": 0.434, "step": 22724 }, { "epoch": 1.2725389181319295, "grad_norm": 1.3045952320098877, "learning_rate": 9.928578947368422e-05, "loss": 0.4622, "step": 22725 }, { "epoch": 1.2725949154440586, "grad_norm": 1.2724335193634033, "learning_rate": 9.928552631578948e-05, "loss": 0.3853, "step": 22726 }, { "epoch": 1.2726509127561876, "grad_norm": 1.4003771543502808, "learning_rate": 9.928526315789474e-05, "loss": 0.4671, "step": 22727 }, { "epoch": 1.2727069100683166, "grad_norm": 1.9080029726028442, "learning_rate": 9.928500000000001e-05, "loss": 0.6663, "step": 22728 }, { "epoch": 1.2727629073804456, "grad_norm": 1.3403178453445435, "learning_rate": 9.928473684210527e-05, "loss": 0.4044, "step": 22729 }, { "epoch": 1.2728189046925746, "grad_norm": 1.050033450126648, "learning_rate": 9.928447368421053e-05, "loss": 0.3496, "step": 22730 }, { "epoch": 1.2728749020047037, "grad_norm": 1.0832682847976685, "learning_rate": 9.928421052631579e-05, "loss": 0.4166, "step": 22731 }, { "epoch": 1.2729308993168327, "grad_norm": 1.425012469291687, "learning_rate": 9.928394736842106e-05, "loss": 0.4182, "step": 22732 }, { "epoch": 1.2729868966289617, "grad_norm": 1.3718820810317993, "learning_rate": 9.928368421052632e-05, "loss": 0.5987, "step": 22733 }, { "epoch": 1.2730428939410907, "grad_norm": 1.3492567539215088, "learning_rate": 9.928342105263158e-05, "loss": 0.4202, "step": 22734 }, { "epoch": 1.2730988912532197, "grad_norm": 9.869452476501465, "learning_rate": 9.928315789473684e-05, "loss": 0.5806, "step": 22735 }, { "epoch": 1.2731548885653488, "grad_norm": 1.481376051902771, "learning_rate": 9.928289473684211e-05, "loss": 0.4489, "step": 22736 }, { "epoch": 1.2732108858774778, "grad_norm": 1.1709439754486084, "learning_rate": 9.928263157894737e-05, "loss": 0.3878, "step": 22737 }, { "epoch": 1.2732668831896068, "grad_norm": 4.314883708953857, "learning_rate": 9.928236842105265e-05, "loss": 0.5497, "step": 22738 }, { "epoch": 1.2733228805017358, "grad_norm": 1.635369062423706, "learning_rate": 9.928210526315789e-05, "loss": 0.4089, "step": 22739 }, { "epoch": 1.2733788778138648, "grad_norm": 1.622637391090393, "learning_rate": 9.928184210526317e-05, "loss": 0.4684, "step": 22740 }, { "epoch": 1.2734348751259938, "grad_norm": 1.3720473051071167, "learning_rate": 9.928157894736843e-05, "loss": 0.3942, "step": 22741 }, { "epoch": 1.2734908724381229, "grad_norm": 1.2264175415039062, "learning_rate": 9.92813157894737e-05, "loss": 0.3322, "step": 22742 }, { "epoch": 1.2735468697502519, "grad_norm": 1.4567515850067139, "learning_rate": 9.928105263157895e-05, "loss": 0.4325, "step": 22743 }, { "epoch": 1.273602867062381, "grad_norm": 1.5054537057876587, "learning_rate": 9.92807894736842e-05, "loss": 0.4869, "step": 22744 }, { "epoch": 1.27365886437451, "grad_norm": 1.3365561962127686, "learning_rate": 9.928052631578948e-05, "loss": 0.4938, "step": 22745 }, { "epoch": 1.273714861686639, "grad_norm": 1.5146760940551758, "learning_rate": 9.928026315789474e-05, "loss": 0.5028, "step": 22746 }, { "epoch": 1.273770858998768, "grad_norm": 1.0109838247299194, "learning_rate": 9.928000000000001e-05, "loss": 0.3685, "step": 22747 }, { "epoch": 1.273826856310897, "grad_norm": 1.3436310291290283, "learning_rate": 9.927973684210526e-05, "loss": 0.5354, "step": 22748 }, { "epoch": 1.273882853623026, "grad_norm": 1.4745495319366455, "learning_rate": 9.927947368421053e-05, "loss": 0.4171, "step": 22749 }, { "epoch": 1.273938850935155, "grad_norm": 1.4597543478012085, "learning_rate": 9.927921052631579e-05, "loss": 0.3829, "step": 22750 }, { "epoch": 1.273994848247284, "grad_norm": 1.4160975217819214, "learning_rate": 9.927894736842106e-05, "loss": 0.4748, "step": 22751 }, { "epoch": 1.274050845559413, "grad_norm": 1.3686274290084839, "learning_rate": 9.927868421052632e-05, "loss": 0.4098, "step": 22752 }, { "epoch": 1.274106842871542, "grad_norm": 1.3850740194320679, "learning_rate": 9.927842105263158e-05, "loss": 0.4831, "step": 22753 }, { "epoch": 1.274162840183671, "grad_norm": 2.055142402648926, "learning_rate": 9.927815789473684e-05, "loss": 0.4452, "step": 22754 }, { "epoch": 1.2742188374958001, "grad_norm": 1.2674225568771362, "learning_rate": 9.927789473684212e-05, "loss": 0.3733, "step": 22755 }, { "epoch": 1.2742748348079291, "grad_norm": 1.5821294784545898, "learning_rate": 9.927763157894738e-05, "loss": 0.4717, "step": 22756 }, { "epoch": 1.2743308321200582, "grad_norm": 1.2697193622589111, "learning_rate": 9.927736842105264e-05, "loss": 0.3312, "step": 22757 }, { "epoch": 1.2743868294321872, "grad_norm": 1.1778618097305298, "learning_rate": 9.92771052631579e-05, "loss": 0.3977, "step": 22758 }, { "epoch": 1.2744428267443162, "grad_norm": 1.5188071727752686, "learning_rate": 9.927684210526317e-05, "loss": 0.5024, "step": 22759 }, { "epoch": 1.2744988240564452, "grad_norm": 2.8332552909851074, "learning_rate": 9.927657894736843e-05, "loss": 0.4515, "step": 22760 }, { "epoch": 1.2745548213685742, "grad_norm": 0.9822652339935303, "learning_rate": 9.927631578947369e-05, "loss": 0.2588, "step": 22761 }, { "epoch": 1.2746108186807033, "grad_norm": 1.1252400875091553, "learning_rate": 9.927605263157895e-05, "loss": 0.4655, "step": 22762 }, { "epoch": 1.2746668159928323, "grad_norm": 1.4667878150939941, "learning_rate": 9.927578947368421e-05, "loss": 0.5684, "step": 22763 }, { "epoch": 1.2747228133049613, "grad_norm": 1.3001575469970703, "learning_rate": 9.927552631578948e-05, "loss": 0.3713, "step": 22764 }, { "epoch": 1.2747788106170903, "grad_norm": 1.185944676399231, "learning_rate": 9.927526315789474e-05, "loss": 0.3744, "step": 22765 }, { "epoch": 1.2748348079292193, "grad_norm": 1.4352209568023682, "learning_rate": 9.9275e-05, "loss": 0.5192, "step": 22766 }, { "epoch": 1.2748908052413483, "grad_norm": 1.1593495607376099, "learning_rate": 9.927473684210526e-05, "loss": 0.3618, "step": 22767 }, { "epoch": 1.2749468025534774, "grad_norm": 1.3951646089553833, "learning_rate": 9.927447368421053e-05, "loss": 0.4428, "step": 22768 }, { "epoch": 1.2750027998656064, "grad_norm": 1.2441003322601318, "learning_rate": 9.92742105263158e-05, "loss": 0.37, "step": 22769 }, { "epoch": 1.2750587971777354, "grad_norm": 1.2693815231323242, "learning_rate": 9.927394736842107e-05, "loss": 0.3531, "step": 22770 }, { "epoch": 1.2751147944898644, "grad_norm": 1.3525625467300415, "learning_rate": 9.927368421052631e-05, "loss": 0.3369, "step": 22771 }, { "epoch": 1.2751707918019934, "grad_norm": 1.563995122909546, "learning_rate": 9.927342105263159e-05, "loss": 0.4396, "step": 22772 }, { "epoch": 1.2752267891141225, "grad_norm": 1.6154165267944336, "learning_rate": 9.927315789473685e-05, "loss": 0.4805, "step": 22773 }, { "epoch": 1.2752827864262515, "grad_norm": 1.5421653985977173, "learning_rate": 9.927289473684212e-05, "loss": 0.4318, "step": 22774 }, { "epoch": 1.2753387837383805, "grad_norm": 1.5196174383163452, "learning_rate": 9.927263157894738e-05, "loss": 0.4372, "step": 22775 }, { "epoch": 1.2753947810505095, "grad_norm": 1.498060703277588, "learning_rate": 9.927236842105264e-05, "loss": 0.5817, "step": 22776 }, { "epoch": 1.2754507783626385, "grad_norm": 1.1516247987747192, "learning_rate": 9.92721052631579e-05, "loss": 0.4153, "step": 22777 }, { "epoch": 1.2755067756747676, "grad_norm": 1.5450985431671143, "learning_rate": 9.927184210526316e-05, "loss": 0.3987, "step": 22778 }, { "epoch": 1.2755627729868966, "grad_norm": 1.2992115020751953, "learning_rate": 9.927157894736843e-05, "loss": 0.4102, "step": 22779 }, { "epoch": 1.2756187702990256, "grad_norm": 1.2836987972259521, "learning_rate": 9.927131578947369e-05, "loss": 0.4377, "step": 22780 }, { "epoch": 1.2756747676111546, "grad_norm": 1.2355024814605713, "learning_rate": 9.927105263157895e-05, "loss": 0.3656, "step": 22781 }, { "epoch": 1.2757307649232836, "grad_norm": 1.5421550273895264, "learning_rate": 9.927078947368421e-05, "loss": 0.395, "step": 22782 }, { "epoch": 1.2757867622354127, "grad_norm": 1.3215744495391846, "learning_rate": 9.927052631578948e-05, "loss": 0.3083, "step": 22783 }, { "epoch": 1.2758427595475417, "grad_norm": 1.0425972938537598, "learning_rate": 9.927026315789474e-05, "loss": 0.3134, "step": 22784 }, { "epoch": 1.2758987568596707, "grad_norm": 1.2993683815002441, "learning_rate": 9.927e-05, "loss": 0.4841, "step": 22785 }, { "epoch": 1.2759547541717997, "grad_norm": 1.6722605228424072, "learning_rate": 9.926973684210526e-05, "loss": 0.6632, "step": 22786 }, { "epoch": 1.2760107514839287, "grad_norm": 1.634016513824463, "learning_rate": 9.926947368421054e-05, "loss": 0.4314, "step": 22787 }, { "epoch": 1.2760667487960577, "grad_norm": 1.4416072368621826, "learning_rate": 9.92692105263158e-05, "loss": 0.4843, "step": 22788 }, { "epoch": 1.2761227461081868, "grad_norm": 1.86212158203125, "learning_rate": 9.926894736842106e-05, "loss": 0.3136, "step": 22789 }, { "epoch": 1.2761787434203158, "grad_norm": 1.419284701347351, "learning_rate": 9.926868421052632e-05, "loss": 0.4174, "step": 22790 }, { "epoch": 1.2762347407324448, "grad_norm": 1.5763509273529053, "learning_rate": 9.926842105263159e-05, "loss": 0.4896, "step": 22791 }, { "epoch": 1.2762907380445738, "grad_norm": 1.4166450500488281, "learning_rate": 9.926815789473685e-05, "loss": 0.5477, "step": 22792 }, { "epoch": 1.2763467353567028, "grad_norm": 1.4179977178573608, "learning_rate": 9.926789473684212e-05, "loss": 0.5523, "step": 22793 }, { "epoch": 1.2764027326688319, "grad_norm": 1.2980576753616333, "learning_rate": 9.926763157894737e-05, "loss": 0.4405, "step": 22794 }, { "epoch": 1.2764587299809609, "grad_norm": 1.8971774578094482, "learning_rate": 9.926736842105263e-05, "loss": 0.3989, "step": 22795 }, { "epoch": 1.27651472729309, "grad_norm": 1.6122095584869385, "learning_rate": 9.92671052631579e-05, "loss": 0.4271, "step": 22796 }, { "epoch": 1.276570724605219, "grad_norm": 1.5946292877197266, "learning_rate": 9.926684210526316e-05, "loss": 0.6605, "step": 22797 }, { "epoch": 1.276626721917348, "grad_norm": 1.4192845821380615, "learning_rate": 9.926657894736842e-05, "loss": 0.4882, "step": 22798 }, { "epoch": 1.276682719229477, "grad_norm": 1.4047622680664062, "learning_rate": 9.926631578947368e-05, "loss": 0.5338, "step": 22799 }, { "epoch": 1.276738716541606, "grad_norm": 2.6966567039489746, "learning_rate": 9.926605263157895e-05, "loss": 0.4442, "step": 22800 }, { "epoch": 1.276794713853735, "grad_norm": 1.3931602239608765, "learning_rate": 9.926578947368421e-05, "loss": 0.3877, "step": 22801 }, { "epoch": 1.276850711165864, "grad_norm": 1.3101481199264526, "learning_rate": 9.926552631578949e-05, "loss": 0.426, "step": 22802 }, { "epoch": 1.276906708477993, "grad_norm": 1.3449358940124512, "learning_rate": 9.926526315789473e-05, "loss": 0.3264, "step": 22803 }, { "epoch": 1.276962705790122, "grad_norm": 1.4406251907348633, "learning_rate": 9.9265e-05, "loss": 0.4213, "step": 22804 }, { "epoch": 1.277018703102251, "grad_norm": 1.2217750549316406, "learning_rate": 9.926473684210527e-05, "loss": 0.527, "step": 22805 }, { "epoch": 1.27707470041438, "grad_norm": 1.2291377782821655, "learning_rate": 9.926447368421054e-05, "loss": 0.5167, "step": 22806 }, { "epoch": 1.2771306977265091, "grad_norm": 1.5686665773391724, "learning_rate": 9.92642105263158e-05, "loss": 0.4669, "step": 22807 }, { "epoch": 1.2771866950386381, "grad_norm": 1.1758569478988647, "learning_rate": 9.926394736842106e-05, "loss": 0.3745, "step": 22808 }, { "epoch": 1.2772426923507672, "grad_norm": 1.3279191255569458, "learning_rate": 9.926368421052632e-05, "loss": 0.5415, "step": 22809 }, { "epoch": 1.2772986896628962, "grad_norm": 1.224311351776123, "learning_rate": 9.926342105263159e-05, "loss": 0.4282, "step": 22810 }, { "epoch": 1.2773546869750252, "grad_norm": 1.3506956100463867, "learning_rate": 9.926315789473685e-05, "loss": 0.4217, "step": 22811 }, { "epoch": 1.2774106842871542, "grad_norm": 1.8883819580078125, "learning_rate": 9.926289473684211e-05, "loss": 0.4152, "step": 22812 }, { "epoch": 1.2774666815992832, "grad_norm": 1.3244603872299194, "learning_rate": 9.926263157894737e-05, "loss": 0.3734, "step": 22813 }, { "epoch": 1.2775226789114122, "grad_norm": 1.224166989326477, "learning_rate": 9.926236842105263e-05, "loss": 0.3873, "step": 22814 }, { "epoch": 1.2775786762235413, "grad_norm": 1.5324288606643677, "learning_rate": 9.92621052631579e-05, "loss": 0.5001, "step": 22815 }, { "epoch": 1.2776346735356703, "grad_norm": 1.5295687913894653, "learning_rate": 9.926184210526316e-05, "loss": 0.4734, "step": 22816 }, { "epoch": 1.2776906708477993, "grad_norm": 1.729920744895935, "learning_rate": 9.926157894736842e-05, "loss": 0.5655, "step": 22817 }, { "epoch": 1.2777466681599283, "grad_norm": 1.1620714664459229, "learning_rate": 9.926131578947368e-05, "loss": 0.3914, "step": 22818 }, { "epoch": 1.2778026654720573, "grad_norm": 1.2352561950683594, "learning_rate": 9.926105263157896e-05, "loss": 0.4323, "step": 22819 }, { "epoch": 1.2778586627841864, "grad_norm": 1.185273289680481, "learning_rate": 9.926078947368422e-05, "loss": 0.3784, "step": 22820 }, { "epoch": 1.2779146600963154, "grad_norm": 1.7075848579406738, "learning_rate": 9.926052631578948e-05, "loss": 0.4938, "step": 22821 }, { "epoch": 1.2779706574084444, "grad_norm": 1.936911940574646, "learning_rate": 9.926026315789474e-05, "loss": 0.5181, "step": 22822 }, { "epoch": 1.2780266547205734, "grad_norm": 1.4712456464767456, "learning_rate": 9.926000000000001e-05, "loss": 0.482, "step": 22823 }, { "epoch": 1.2780826520327024, "grad_norm": 1.2620044946670532, "learning_rate": 9.925973684210527e-05, "loss": 0.3792, "step": 22824 }, { "epoch": 1.2781386493448315, "grad_norm": 1.149962067604065, "learning_rate": 9.925947368421054e-05, "loss": 0.413, "step": 22825 }, { "epoch": 1.2781946466569605, "grad_norm": 1.4623929262161255, "learning_rate": 9.925921052631579e-05, "loss": 0.4461, "step": 22826 }, { "epoch": 1.2782506439690895, "grad_norm": 1.2333474159240723, "learning_rate": 9.925894736842106e-05, "loss": 0.5105, "step": 22827 }, { "epoch": 1.2783066412812185, "grad_norm": 1.1953965425491333, "learning_rate": 9.925868421052632e-05, "loss": 0.3357, "step": 22828 }, { "epoch": 1.2783626385933475, "grad_norm": 1.5779125690460205, "learning_rate": 9.925842105263158e-05, "loss": 0.4918, "step": 22829 }, { "epoch": 1.2784186359054766, "grad_norm": 1.3370437622070312, "learning_rate": 9.925815789473685e-05, "loss": 0.441, "step": 22830 }, { "epoch": 1.2784746332176056, "grad_norm": 1.671709418296814, "learning_rate": 9.92578947368421e-05, "loss": 0.5659, "step": 22831 }, { "epoch": 1.2785306305297346, "grad_norm": 1.4015928506851196, "learning_rate": 9.925763157894737e-05, "loss": 0.3417, "step": 22832 }, { "epoch": 1.2785866278418636, "grad_norm": 1.3909051418304443, "learning_rate": 9.925736842105263e-05, "loss": 0.4817, "step": 22833 }, { "epoch": 1.2786426251539926, "grad_norm": 1.496991515159607, "learning_rate": 9.92571052631579e-05, "loss": 0.4239, "step": 22834 }, { "epoch": 1.2786986224661216, "grad_norm": 1.3109850883483887, "learning_rate": 9.925684210526317e-05, "loss": 0.4174, "step": 22835 }, { "epoch": 1.2787546197782507, "grad_norm": 1.5020232200622559, "learning_rate": 9.925657894736843e-05, "loss": 0.3756, "step": 22836 }, { "epoch": 1.2788106170903797, "grad_norm": 1.3662246465682983, "learning_rate": 9.925631578947369e-05, "loss": 0.3375, "step": 22837 }, { "epoch": 1.2788666144025087, "grad_norm": 1.225379228591919, "learning_rate": 9.925605263157896e-05, "loss": 0.3597, "step": 22838 }, { "epoch": 1.2789226117146377, "grad_norm": 1.8974720239639282, "learning_rate": 9.925578947368422e-05, "loss": 0.455, "step": 22839 }, { "epoch": 1.2789786090267667, "grad_norm": 1.445088267326355, "learning_rate": 9.925552631578948e-05, "loss": 0.3239, "step": 22840 }, { "epoch": 1.2790346063388958, "grad_norm": 1.2902582883834839, "learning_rate": 9.925526315789474e-05, "loss": 0.595, "step": 22841 }, { "epoch": 1.2790906036510248, "grad_norm": 1.2275384664535522, "learning_rate": 9.925500000000001e-05, "loss": 0.3344, "step": 22842 }, { "epoch": 1.2791466009631538, "grad_norm": 1.4865615367889404, "learning_rate": 9.925473684210527e-05, "loss": 0.5262, "step": 22843 }, { "epoch": 1.2792025982752828, "grad_norm": 1.4789400100708008, "learning_rate": 9.925447368421053e-05, "loss": 0.4692, "step": 22844 }, { "epoch": 1.2792585955874118, "grad_norm": 1.9096077680587769, "learning_rate": 9.925421052631579e-05, "loss": 0.4197, "step": 22845 }, { "epoch": 1.2793145928995409, "grad_norm": 9.500982284545898, "learning_rate": 9.925394736842105e-05, "loss": 0.3817, "step": 22846 }, { "epoch": 1.2793705902116699, "grad_norm": 1.4734511375427246, "learning_rate": 9.925368421052632e-05, "loss": 0.4654, "step": 22847 }, { "epoch": 1.279426587523799, "grad_norm": 1.6119033098220825, "learning_rate": 9.925342105263158e-05, "loss": 0.4628, "step": 22848 }, { "epoch": 1.279482584835928, "grad_norm": 1.4294317960739136, "learning_rate": 9.925315789473684e-05, "loss": 0.4772, "step": 22849 }, { "epoch": 1.279538582148057, "grad_norm": 1.3593608140945435, "learning_rate": 9.92528947368421e-05, "loss": 0.478, "step": 22850 }, { "epoch": 1.279594579460186, "grad_norm": 1.2838557958602905, "learning_rate": 9.925263157894738e-05, "loss": 0.4266, "step": 22851 }, { "epoch": 1.279650576772315, "grad_norm": 1.3299751281738281, "learning_rate": 9.925236842105264e-05, "loss": 0.4981, "step": 22852 }, { "epoch": 1.279706574084444, "grad_norm": 1.2452366352081299, "learning_rate": 9.92521052631579e-05, "loss": 0.5585, "step": 22853 }, { "epoch": 1.279762571396573, "grad_norm": 1.288326621055603, "learning_rate": 9.925184210526315e-05, "loss": 0.4661, "step": 22854 }, { "epoch": 1.279818568708702, "grad_norm": 1.3746986389160156, "learning_rate": 9.925157894736843e-05, "loss": 0.4094, "step": 22855 }, { "epoch": 1.279874566020831, "grad_norm": 1.115032434463501, "learning_rate": 9.925131578947369e-05, "loss": 0.4007, "step": 22856 }, { "epoch": 1.27993056333296, "grad_norm": 1.2488641738891602, "learning_rate": 9.925105263157896e-05, "loss": 0.4994, "step": 22857 }, { "epoch": 1.279986560645089, "grad_norm": 1.425959825515747, "learning_rate": 9.925078947368421e-05, "loss": 0.4653, "step": 22858 }, { "epoch": 1.280042557957218, "grad_norm": 1.4689654111862183, "learning_rate": 9.925052631578948e-05, "loss": 0.5289, "step": 22859 }, { "epoch": 1.2800985552693471, "grad_norm": 1.4871768951416016, "learning_rate": 9.925026315789474e-05, "loss": 0.4693, "step": 22860 }, { "epoch": 1.2801545525814761, "grad_norm": 1.5057939291000366, "learning_rate": 9.925000000000001e-05, "loss": 0.5627, "step": 22861 }, { "epoch": 1.2802105498936052, "grad_norm": 1.2794889211654663, "learning_rate": 9.924973684210527e-05, "loss": 0.4061, "step": 22862 }, { "epoch": 1.2802665472057342, "grad_norm": 1.3119163513183594, "learning_rate": 9.924947368421053e-05, "loss": 0.3685, "step": 22863 }, { "epoch": 1.2803225445178632, "grad_norm": 1.499916672706604, "learning_rate": 9.924921052631579e-05, "loss": 0.4594, "step": 22864 }, { "epoch": 1.2803785418299922, "grad_norm": 1.4060181379318237, "learning_rate": 9.924894736842105e-05, "loss": 0.3679, "step": 22865 }, { "epoch": 1.2804345391421212, "grad_norm": 1.4560726881027222, "learning_rate": 9.924868421052633e-05, "loss": 0.4411, "step": 22866 }, { "epoch": 1.2804905364542503, "grad_norm": 1.4118001461029053, "learning_rate": 9.924842105263159e-05, "loss": 0.4929, "step": 22867 }, { "epoch": 1.2805465337663793, "grad_norm": 1.429483413696289, "learning_rate": 9.924815789473685e-05, "loss": 0.6687, "step": 22868 }, { "epoch": 1.2806025310785083, "grad_norm": 1.3398168087005615, "learning_rate": 9.92478947368421e-05, "loss": 0.4995, "step": 22869 }, { "epoch": 1.2806585283906373, "grad_norm": 1.3888211250305176, "learning_rate": 9.924763157894738e-05, "loss": 0.4382, "step": 22870 }, { "epoch": 1.2807145257027663, "grad_norm": 1.420980453491211, "learning_rate": 9.924736842105264e-05, "loss": 0.5059, "step": 22871 }, { "epoch": 1.2807705230148954, "grad_norm": 1.3897972106933594, "learning_rate": 9.92471052631579e-05, "loss": 0.5511, "step": 22872 }, { "epoch": 1.2808265203270244, "grad_norm": 1.4518322944641113, "learning_rate": 9.924684210526316e-05, "loss": 0.5084, "step": 22873 }, { "epoch": 1.2808825176391534, "grad_norm": 1.3902095556259155, "learning_rate": 9.924657894736843e-05, "loss": 0.7499, "step": 22874 }, { "epoch": 1.2809385149512824, "grad_norm": 1.311393141746521, "learning_rate": 9.924631578947369e-05, "loss": 0.4051, "step": 22875 }, { "epoch": 1.2809945122634114, "grad_norm": 1.9083179235458374, "learning_rate": 9.924605263157895e-05, "loss": 0.4966, "step": 22876 }, { "epoch": 1.2810505095755405, "grad_norm": 1.2876859903335571, "learning_rate": 9.924578947368421e-05, "loss": 0.4092, "step": 22877 }, { "epoch": 1.2811065068876695, "grad_norm": 1.4630753993988037, "learning_rate": 9.924552631578948e-05, "loss": 0.5745, "step": 22878 }, { "epoch": 1.2811625041997985, "grad_norm": 1.289911150932312, "learning_rate": 9.924526315789474e-05, "loss": 0.4048, "step": 22879 }, { "epoch": 1.2812185015119275, "grad_norm": 1.2508763074874878, "learning_rate": 9.924500000000002e-05, "loss": 0.4841, "step": 22880 }, { "epoch": 1.2812744988240565, "grad_norm": 1.3895246982574463, "learning_rate": 9.924473684210526e-05, "loss": 0.4195, "step": 22881 }, { "epoch": 1.2813304961361855, "grad_norm": 1.732861876487732, "learning_rate": 9.924447368421052e-05, "loss": 0.4791, "step": 22882 }, { "epoch": 1.2813864934483146, "grad_norm": 1.433110237121582, "learning_rate": 9.92442105263158e-05, "loss": 0.4858, "step": 22883 }, { "epoch": 1.2814424907604436, "grad_norm": 1.2530598640441895, "learning_rate": 9.924394736842106e-05, "loss": 0.4106, "step": 22884 }, { "epoch": 1.2814984880725726, "grad_norm": 1.45210599899292, "learning_rate": 9.924368421052633e-05, "loss": 0.4895, "step": 22885 }, { "epoch": 1.2815544853847016, "grad_norm": 1.2971900701522827, "learning_rate": 9.924342105263157e-05, "loss": 0.4203, "step": 22886 }, { "epoch": 1.2816104826968306, "grad_norm": 1.337934136390686, "learning_rate": 9.924315789473685e-05, "loss": 0.438, "step": 22887 }, { "epoch": 1.2816664800089597, "grad_norm": 1.5100983381271362, "learning_rate": 9.924289473684211e-05, "loss": 0.4976, "step": 22888 }, { "epoch": 1.2817224773210887, "grad_norm": 1.305716872215271, "learning_rate": 9.924263157894738e-05, "loss": 0.5341, "step": 22889 }, { "epoch": 1.2817784746332177, "grad_norm": 1.6484627723693848, "learning_rate": 9.924236842105264e-05, "loss": 0.5838, "step": 22890 }, { "epoch": 1.2818344719453467, "grad_norm": 1.4973548650741577, "learning_rate": 9.92421052631579e-05, "loss": 0.4746, "step": 22891 }, { "epoch": 1.2818904692574757, "grad_norm": 1.2990798950195312, "learning_rate": 9.924184210526316e-05, "loss": 0.3971, "step": 22892 }, { "epoch": 1.2819464665696048, "grad_norm": 1.5169073343276978, "learning_rate": 9.924157894736843e-05, "loss": 0.5484, "step": 22893 }, { "epoch": 1.2820024638817338, "grad_norm": 1.4381048679351807, "learning_rate": 9.924131578947369e-05, "loss": 0.4447, "step": 22894 }, { "epoch": 1.2820584611938628, "grad_norm": 1.9256312847137451, "learning_rate": 9.924105263157895e-05, "loss": 0.3603, "step": 22895 }, { "epoch": 1.2821144585059918, "grad_norm": 1.35012686252594, "learning_rate": 9.924078947368421e-05, "loss": 0.3953, "step": 22896 }, { "epoch": 1.2821704558181208, "grad_norm": 1.3643207550048828, "learning_rate": 9.924052631578949e-05, "loss": 0.4825, "step": 22897 }, { "epoch": 1.2822264531302499, "grad_norm": 1.6131207942962646, "learning_rate": 9.924026315789475e-05, "loss": 0.5158, "step": 22898 }, { "epoch": 1.2822824504423789, "grad_norm": 1.4574543237686157, "learning_rate": 9.924e-05, "loss": 0.485, "step": 22899 }, { "epoch": 1.282338447754508, "grad_norm": 1.2956045866012573, "learning_rate": 9.923973684210527e-05, "loss": 0.4301, "step": 22900 }, { "epoch": 1.282394445066637, "grad_norm": 1.2448629140853882, "learning_rate": 9.923947368421052e-05, "loss": 0.4139, "step": 22901 }, { "epoch": 1.282450442378766, "grad_norm": 1.3458913564682007, "learning_rate": 9.92392105263158e-05, "loss": 0.5027, "step": 22902 }, { "epoch": 1.282506439690895, "grad_norm": 1.2780959606170654, "learning_rate": 9.923894736842106e-05, "loss": 0.348, "step": 22903 }, { "epoch": 1.282562437003024, "grad_norm": 1.369726300239563, "learning_rate": 9.923868421052632e-05, "loss": 0.4936, "step": 22904 }, { "epoch": 1.282618434315153, "grad_norm": 1.415688395500183, "learning_rate": 9.923842105263158e-05, "loss": 0.4766, "step": 22905 }, { "epoch": 1.282674431627282, "grad_norm": 1.3793411254882812, "learning_rate": 9.923815789473685e-05, "loss": 0.419, "step": 22906 }, { "epoch": 1.282730428939411, "grad_norm": 1.4151756763458252, "learning_rate": 9.923789473684211e-05, "loss": 0.5377, "step": 22907 }, { "epoch": 1.28278642625154, "grad_norm": 1.4490402936935425, "learning_rate": 9.923763157894737e-05, "loss": 0.4964, "step": 22908 }, { "epoch": 1.282842423563669, "grad_norm": 1.4372507333755493, "learning_rate": 9.923736842105263e-05, "loss": 0.455, "step": 22909 }, { "epoch": 1.282898420875798, "grad_norm": 1.2414966821670532, "learning_rate": 9.92371052631579e-05, "loss": 0.438, "step": 22910 }, { "epoch": 1.282954418187927, "grad_norm": 1.3037598133087158, "learning_rate": 9.923684210526316e-05, "loss": 0.4289, "step": 22911 }, { "epoch": 1.2830104155000561, "grad_norm": 2.441774368286133, "learning_rate": 9.923657894736844e-05, "loss": 0.486, "step": 22912 }, { "epoch": 1.2830664128121851, "grad_norm": 1.678240418434143, "learning_rate": 9.923631578947368e-05, "loss": 0.4303, "step": 22913 }, { "epoch": 1.2831224101243142, "grad_norm": 1.2907674312591553, "learning_rate": 9.923605263157896e-05, "loss": 0.4251, "step": 22914 }, { "epoch": 1.2831784074364432, "grad_norm": 1.5444412231445312, "learning_rate": 9.923578947368422e-05, "loss": 0.5689, "step": 22915 }, { "epoch": 1.2832344047485722, "grad_norm": 1.678706169128418, "learning_rate": 9.923552631578947e-05, "loss": 0.5201, "step": 22916 }, { "epoch": 1.2832904020607012, "grad_norm": 1.3194148540496826, "learning_rate": 9.923526315789475e-05, "loss": 0.3818, "step": 22917 }, { "epoch": 1.2833463993728302, "grad_norm": 1.344393014907837, "learning_rate": 9.9235e-05, "loss": 0.4487, "step": 22918 }, { "epoch": 1.2834023966849593, "grad_norm": 1.3960899114608765, "learning_rate": 9.923473684210527e-05, "loss": 0.4525, "step": 22919 }, { "epoch": 1.2834583939970883, "grad_norm": 1.4669640064239502, "learning_rate": 9.923447368421053e-05, "loss": 0.3751, "step": 22920 }, { "epoch": 1.2835143913092173, "grad_norm": 1.2237739562988281, "learning_rate": 9.92342105263158e-05, "loss": 0.4054, "step": 22921 }, { "epoch": 1.283570388621346, "grad_norm": 1.3809826374053955, "learning_rate": 9.923394736842106e-05, "loss": 0.405, "step": 22922 }, { "epoch": 1.2836263859334751, "grad_norm": 1.3056340217590332, "learning_rate": 9.923368421052632e-05, "loss": 0.4087, "step": 22923 }, { "epoch": 1.2836823832456041, "grad_norm": 1.3003950119018555, "learning_rate": 9.923342105263158e-05, "loss": 0.5366, "step": 22924 }, { "epoch": 1.2837383805577331, "grad_norm": 1.3883391618728638, "learning_rate": 9.923315789473685e-05, "loss": 0.4015, "step": 22925 }, { "epoch": 1.2837943778698622, "grad_norm": 1.3687617778778076, "learning_rate": 9.923289473684211e-05, "loss": 0.5419, "step": 22926 }, { "epoch": 1.2838503751819912, "grad_norm": 1.247031569480896, "learning_rate": 9.923263157894737e-05, "loss": 0.4375, "step": 22927 }, { "epoch": 1.2839063724941202, "grad_norm": 1.4073357582092285, "learning_rate": 9.923236842105263e-05, "loss": 0.4831, "step": 22928 }, { "epoch": 1.2839623698062492, "grad_norm": 1.2339050769805908, "learning_rate": 9.92321052631579e-05, "loss": 0.4407, "step": 22929 }, { "epoch": 1.2840183671183782, "grad_norm": 1.2139928340911865, "learning_rate": 9.923184210526317e-05, "loss": 0.4936, "step": 22930 }, { "epoch": 1.2840743644305073, "grad_norm": 1.5380933284759521, "learning_rate": 9.923157894736842e-05, "loss": 0.4634, "step": 22931 }, { "epoch": 1.2841303617426363, "grad_norm": 1.1507809162139893, "learning_rate": 9.923131578947368e-05, "loss": 0.434, "step": 22932 }, { "epoch": 1.2841863590547653, "grad_norm": 1.1982436180114746, "learning_rate": 9.923105263157894e-05, "loss": 0.3755, "step": 22933 }, { "epoch": 1.2842423563668943, "grad_norm": 1.8436267375946045, "learning_rate": 9.923078947368422e-05, "loss": 0.4652, "step": 22934 }, { "epoch": 1.2842983536790233, "grad_norm": 2.4296042919158936, "learning_rate": 9.923052631578948e-05, "loss": 0.7161, "step": 22935 }, { "epoch": 1.2843543509911524, "grad_norm": 1.224182367324829, "learning_rate": 9.923026315789474e-05, "loss": 0.5098, "step": 22936 }, { "epoch": 1.2844103483032814, "grad_norm": 1.4148139953613281, "learning_rate": 9.923e-05, "loss": 0.4536, "step": 22937 }, { "epoch": 1.2844663456154104, "grad_norm": 1.2621914148330688, "learning_rate": 9.922973684210527e-05, "loss": 0.4855, "step": 22938 }, { "epoch": 1.2845223429275394, "grad_norm": 1.5679540634155273, "learning_rate": 9.922947368421053e-05, "loss": 0.5845, "step": 22939 }, { "epoch": 1.2845783402396684, "grad_norm": 1.6275293827056885, "learning_rate": 9.92292105263158e-05, "loss": 0.4481, "step": 22940 }, { "epoch": 1.2846343375517975, "grad_norm": 1.2808688879013062, "learning_rate": 9.922894736842105e-05, "loss": 0.4123, "step": 22941 }, { "epoch": 1.2846903348639265, "grad_norm": 1.4030125141143799, "learning_rate": 9.922868421052632e-05, "loss": 0.5084, "step": 22942 }, { "epoch": 1.2847463321760555, "grad_norm": 1.191109299659729, "learning_rate": 9.922842105263158e-05, "loss": 0.4447, "step": 22943 }, { "epoch": 1.2848023294881845, "grad_norm": 1.2105380296707153, "learning_rate": 9.922815789473686e-05, "loss": 0.4081, "step": 22944 }, { "epoch": 1.2848583268003135, "grad_norm": 1.7374064922332764, "learning_rate": 9.92278947368421e-05, "loss": 0.4894, "step": 22945 }, { "epoch": 1.2849143241124426, "grad_norm": 1.5357162952423096, "learning_rate": 9.922763157894738e-05, "loss": 0.4604, "step": 22946 }, { "epoch": 1.2849703214245716, "grad_norm": 1.4711189270019531, "learning_rate": 9.922736842105263e-05, "loss": 0.523, "step": 22947 }, { "epoch": 1.2850263187367006, "grad_norm": 1.3166229724884033, "learning_rate": 9.922710526315791e-05, "loss": 0.4023, "step": 22948 }, { "epoch": 1.2850823160488296, "grad_norm": 1.3460315465927124, "learning_rate": 9.922684210526317e-05, "loss": 0.4786, "step": 22949 }, { "epoch": 1.2851383133609586, "grad_norm": 1.4328230619430542, "learning_rate": 9.922657894736841e-05, "loss": 0.6095, "step": 22950 }, { "epoch": 1.2851943106730876, "grad_norm": 1.4445209503173828, "learning_rate": 9.922631578947369e-05, "loss": 0.4819, "step": 22951 }, { "epoch": 1.2852503079852167, "grad_norm": 1.5904972553253174, "learning_rate": 9.922605263157895e-05, "loss": 0.4077, "step": 22952 }, { "epoch": 1.2853063052973457, "grad_norm": 1.2449522018432617, "learning_rate": 9.922578947368422e-05, "loss": 0.4679, "step": 22953 }, { "epoch": 1.2853623026094747, "grad_norm": 1.2776936292648315, "learning_rate": 9.922552631578948e-05, "loss": 0.4167, "step": 22954 }, { "epoch": 1.2854182999216037, "grad_norm": 5.033515930175781, "learning_rate": 9.922526315789474e-05, "loss": 0.4469, "step": 22955 }, { "epoch": 1.2854742972337327, "grad_norm": 1.3768417835235596, "learning_rate": 9.9225e-05, "loss": 0.4248, "step": 22956 }, { "epoch": 1.2855302945458618, "grad_norm": 1.1071889400482178, "learning_rate": 9.922473684210527e-05, "loss": 0.3615, "step": 22957 }, { "epoch": 1.2855862918579908, "grad_norm": 1.2394598722457886, "learning_rate": 9.922447368421053e-05, "loss": 0.4888, "step": 22958 }, { "epoch": 1.2856422891701198, "grad_norm": 1.2708501815795898, "learning_rate": 9.922421052631579e-05, "loss": 0.4206, "step": 22959 }, { "epoch": 1.2856982864822488, "grad_norm": 1.3693835735321045, "learning_rate": 9.922394736842105e-05, "loss": 0.4744, "step": 22960 }, { "epoch": 1.2857542837943778, "grad_norm": 1.2162554264068604, "learning_rate": 9.922368421052633e-05, "loss": 0.317, "step": 22961 }, { "epoch": 1.2858102811065069, "grad_norm": 1.262914776802063, "learning_rate": 9.922342105263158e-05, "loss": 0.4, "step": 22962 }, { "epoch": 1.2858662784186359, "grad_norm": 1.3579424619674683, "learning_rate": 9.922315789473684e-05, "loss": 0.4434, "step": 22963 }, { "epoch": 1.285922275730765, "grad_norm": 1.2783117294311523, "learning_rate": 9.92228947368421e-05, "loss": 0.398, "step": 22964 }, { "epoch": 1.285978273042894, "grad_norm": 1.3735289573669434, "learning_rate": 9.922263157894738e-05, "loss": 0.4491, "step": 22965 }, { "epoch": 1.286034270355023, "grad_norm": 1.716067910194397, "learning_rate": 9.922236842105264e-05, "loss": 0.5226, "step": 22966 }, { "epoch": 1.286090267667152, "grad_norm": 1.2945685386657715, "learning_rate": 9.92221052631579e-05, "loss": 0.4089, "step": 22967 }, { "epoch": 1.286146264979281, "grad_norm": 1.5746315717697144, "learning_rate": 9.922184210526316e-05, "loss": 0.4545, "step": 22968 }, { "epoch": 1.28620226229141, "grad_norm": 1.5032798051834106, "learning_rate": 9.922157894736842e-05, "loss": 0.4913, "step": 22969 }, { "epoch": 1.286258259603539, "grad_norm": 1.5064482688903809, "learning_rate": 9.922131578947369e-05, "loss": 0.4879, "step": 22970 }, { "epoch": 1.286314256915668, "grad_norm": 1.6576777696609497, "learning_rate": 9.922105263157895e-05, "loss": 0.445, "step": 22971 }, { "epoch": 1.286370254227797, "grad_norm": 1.1466107368469238, "learning_rate": 9.922078947368422e-05, "loss": 0.3226, "step": 22972 }, { "epoch": 1.286426251539926, "grad_norm": 1.3217601776123047, "learning_rate": 9.922052631578947e-05, "loss": 0.4444, "step": 22973 }, { "epoch": 1.286482248852055, "grad_norm": 1.2319965362548828, "learning_rate": 9.922026315789474e-05, "loss": 0.4418, "step": 22974 }, { "epoch": 1.286538246164184, "grad_norm": 1.4229353666305542, "learning_rate": 9.922e-05, "loss": 0.52, "step": 22975 }, { "epoch": 1.2865942434763131, "grad_norm": 1.6926286220550537, "learning_rate": 9.921973684210528e-05, "loss": 0.4617, "step": 22976 }, { "epoch": 1.2866502407884421, "grad_norm": 1.4075435400009155, "learning_rate": 9.921947368421054e-05, "loss": 0.4135, "step": 22977 }, { "epoch": 1.2867062381005712, "grad_norm": 1.431416630744934, "learning_rate": 9.92192105263158e-05, "loss": 0.5034, "step": 22978 }, { "epoch": 1.2867622354127002, "grad_norm": 1.2957077026367188, "learning_rate": 9.921894736842105e-05, "loss": 0.4524, "step": 22979 }, { "epoch": 1.2868182327248292, "grad_norm": 1.4212028980255127, "learning_rate": 9.921868421052633e-05, "loss": 0.4904, "step": 22980 }, { "epoch": 1.2868742300369582, "grad_norm": 1.1704585552215576, "learning_rate": 9.921842105263159e-05, "loss": 0.483, "step": 22981 }, { "epoch": 1.2869302273490872, "grad_norm": 1.2489521503448486, "learning_rate": 9.921815789473685e-05, "loss": 0.428, "step": 22982 }, { "epoch": 1.2869862246612163, "grad_norm": 1.6500221490859985, "learning_rate": 9.921789473684211e-05, "loss": 0.5857, "step": 22983 }, { "epoch": 1.2870422219733453, "grad_norm": 1.25044584274292, "learning_rate": 9.921763157894738e-05, "loss": 0.4966, "step": 22984 }, { "epoch": 1.2870982192854743, "grad_norm": 1.3077419996261597, "learning_rate": 9.921736842105264e-05, "loss": 0.4304, "step": 22985 }, { "epoch": 1.2871542165976033, "grad_norm": 1.6721961498260498, "learning_rate": 9.92171052631579e-05, "loss": 0.5613, "step": 22986 }, { "epoch": 1.2872102139097323, "grad_norm": 1.2416599988937378, "learning_rate": 9.921684210526316e-05, "loss": 0.5033, "step": 22987 }, { "epoch": 1.2872662112218614, "grad_norm": 1.3946799039840698, "learning_rate": 9.921657894736842e-05, "loss": 0.4012, "step": 22988 }, { "epoch": 1.2873222085339904, "grad_norm": 1.3088847398757935, "learning_rate": 9.921631578947369e-05, "loss": 0.3774, "step": 22989 }, { "epoch": 1.2873782058461194, "grad_norm": 1.2221100330352783, "learning_rate": 9.921605263157895e-05, "loss": 0.4021, "step": 22990 }, { "epoch": 1.2874342031582484, "grad_norm": 1.2882754802703857, "learning_rate": 9.921578947368421e-05, "loss": 0.4187, "step": 22991 }, { "epoch": 1.2874902004703774, "grad_norm": 1.244855523109436, "learning_rate": 9.921552631578947e-05, "loss": 0.4645, "step": 22992 }, { "epoch": 1.2875461977825065, "grad_norm": 1.4222966432571411, "learning_rate": 9.921526315789474e-05, "loss": 0.4661, "step": 22993 }, { "epoch": 1.2876021950946355, "grad_norm": 1.1522328853607178, "learning_rate": 9.9215e-05, "loss": 0.35, "step": 22994 }, { "epoch": 1.2876581924067645, "grad_norm": 1.5621757507324219, "learning_rate": 9.921473684210528e-05, "loss": 0.3748, "step": 22995 }, { "epoch": 1.2877141897188935, "grad_norm": 1.5290151834487915, "learning_rate": 9.921447368421052e-05, "loss": 0.5444, "step": 22996 }, { "epoch": 1.2877701870310225, "grad_norm": 1.228925347328186, "learning_rate": 9.92142105263158e-05, "loss": 0.4329, "step": 22997 }, { "epoch": 1.2878261843431515, "grad_norm": 1.1519784927368164, "learning_rate": 9.921394736842106e-05, "loss": 0.3533, "step": 22998 }, { "epoch": 1.2878821816552806, "grad_norm": 1.2597635984420776, "learning_rate": 9.921368421052633e-05, "loss": 0.5011, "step": 22999 }, { "epoch": 1.2879381789674096, "grad_norm": 1.2440520524978638, "learning_rate": 9.921342105263158e-05, "loss": 0.5224, "step": 23000 }, { "epoch": 1.2879941762795386, "grad_norm": 1.4682767391204834, "learning_rate": 9.921315789473685e-05, "loss": 0.3505, "step": 23001 }, { "epoch": 1.2880501735916676, "grad_norm": 1.5257654190063477, "learning_rate": 9.921289473684211e-05, "loss": 0.5408, "step": 23002 }, { "epoch": 1.2881061709037966, "grad_norm": 1.4215542078018188, "learning_rate": 9.921263157894737e-05, "loss": 0.466, "step": 23003 }, { "epoch": 1.2881621682159257, "grad_norm": 1.2798796892166138, "learning_rate": 9.921236842105264e-05, "loss": 0.4863, "step": 23004 }, { "epoch": 1.2882181655280547, "grad_norm": 1.1246417760849, "learning_rate": 9.921210526315789e-05, "loss": 0.4249, "step": 23005 }, { "epoch": 1.2882741628401837, "grad_norm": 1.7751320600509644, "learning_rate": 9.921184210526316e-05, "loss": 0.4136, "step": 23006 }, { "epoch": 1.2883301601523127, "grad_norm": 1.4135159254074097, "learning_rate": 9.921157894736842e-05, "loss": 0.5479, "step": 23007 }, { "epoch": 1.2883861574644417, "grad_norm": 1.7594817876815796, "learning_rate": 9.92113157894737e-05, "loss": 0.4854, "step": 23008 }, { "epoch": 1.2884421547765708, "grad_norm": 1.517059564590454, "learning_rate": 9.921105263157895e-05, "loss": 0.8628, "step": 23009 }, { "epoch": 1.2884981520886998, "grad_norm": 1.2950935363769531, "learning_rate": 9.921078947368421e-05, "loss": 0.3913, "step": 23010 }, { "epoch": 1.2885541494008288, "grad_norm": 1.353660225868225, "learning_rate": 9.921052631578947e-05, "loss": 0.4868, "step": 23011 }, { "epoch": 1.2886101467129578, "grad_norm": 0.9766411185264587, "learning_rate": 9.921026315789475e-05, "loss": 0.407, "step": 23012 }, { "epoch": 1.2886661440250868, "grad_norm": 1.3170545101165771, "learning_rate": 9.921000000000001e-05, "loss": 0.5691, "step": 23013 }, { "epoch": 1.2887221413372159, "grad_norm": 5.148563385009766, "learning_rate": 9.920973684210527e-05, "loss": 0.3936, "step": 23014 }, { "epoch": 1.2887781386493449, "grad_norm": 1.4028126001358032, "learning_rate": 9.920947368421053e-05, "loss": 0.4737, "step": 23015 }, { "epoch": 1.288834135961474, "grad_norm": 1.4964874982833862, "learning_rate": 9.92092105263158e-05, "loss": 0.5049, "step": 23016 }, { "epoch": 1.288890133273603, "grad_norm": 2.170239210128784, "learning_rate": 9.920894736842106e-05, "loss": 0.4979, "step": 23017 }, { "epoch": 1.288946130585732, "grad_norm": 1.45847487449646, "learning_rate": 9.920868421052632e-05, "loss": 0.4045, "step": 23018 }, { "epoch": 1.289002127897861, "grad_norm": 1.2943224906921387, "learning_rate": 9.920842105263158e-05, "loss": 0.3783, "step": 23019 }, { "epoch": 1.28905812520999, "grad_norm": 1.3317760229110718, "learning_rate": 9.920815789473684e-05, "loss": 0.4034, "step": 23020 }, { "epoch": 1.289114122522119, "grad_norm": 1.1763025522232056, "learning_rate": 9.920789473684211e-05, "loss": 0.4745, "step": 23021 }, { "epoch": 1.289170119834248, "grad_norm": 1.17929208278656, "learning_rate": 9.920763157894737e-05, "loss": 0.4017, "step": 23022 }, { "epoch": 1.289226117146377, "grad_norm": 1.2791979312896729, "learning_rate": 9.920736842105263e-05, "loss": 0.4489, "step": 23023 }, { "epoch": 1.289282114458506, "grad_norm": 4.220419406890869, "learning_rate": 9.920710526315789e-05, "loss": 0.3953, "step": 23024 }, { "epoch": 1.289338111770635, "grad_norm": 1.5760184526443481, "learning_rate": 9.920684210526316e-05, "loss": 0.6107, "step": 23025 }, { "epoch": 1.289394109082764, "grad_norm": 1.4542875289916992, "learning_rate": 9.920657894736842e-05, "loss": 0.5295, "step": 23026 }, { "epoch": 1.289450106394893, "grad_norm": 1.0801390409469604, "learning_rate": 9.92063157894737e-05, "loss": 0.4279, "step": 23027 }, { "epoch": 1.2895061037070221, "grad_norm": 1.3517122268676758, "learning_rate": 9.920605263157894e-05, "loss": 0.6221, "step": 23028 }, { "epoch": 1.2895621010191511, "grad_norm": 1.0612396001815796, "learning_rate": 9.920578947368422e-05, "loss": 0.379, "step": 23029 }, { "epoch": 1.2896180983312802, "grad_norm": 1.3833634853363037, "learning_rate": 9.920552631578948e-05, "loss": 0.5444, "step": 23030 }, { "epoch": 1.2896740956434092, "grad_norm": 1.5076874494552612, "learning_rate": 9.920526315789475e-05, "loss": 0.4223, "step": 23031 }, { "epoch": 1.2897300929555382, "grad_norm": 1.2255945205688477, "learning_rate": 9.920500000000001e-05, "loss": 0.4352, "step": 23032 }, { "epoch": 1.2897860902676672, "grad_norm": 16.56756591796875, "learning_rate": 9.920473684210527e-05, "loss": 0.4737, "step": 23033 }, { "epoch": 1.2898420875797962, "grad_norm": 1.5384169816970825, "learning_rate": 9.920447368421053e-05, "loss": 0.5292, "step": 23034 }, { "epoch": 1.2898980848919253, "grad_norm": 1.4383050203323364, "learning_rate": 9.92042105263158e-05, "loss": 0.5717, "step": 23035 }, { "epoch": 1.2899540822040543, "grad_norm": 1.4422905445098877, "learning_rate": 9.920394736842106e-05, "loss": 0.5326, "step": 23036 }, { "epoch": 1.2900100795161833, "grad_norm": 1.395366907119751, "learning_rate": 9.920368421052632e-05, "loss": 0.5032, "step": 23037 }, { "epoch": 1.2900660768283123, "grad_norm": 1.51004159450531, "learning_rate": 9.920342105263158e-05, "loss": 0.584, "step": 23038 }, { "epoch": 1.2901220741404413, "grad_norm": 12.25399112701416, "learning_rate": 9.920315789473684e-05, "loss": 0.4347, "step": 23039 }, { "epoch": 1.2901780714525704, "grad_norm": 1.4509413242340088, "learning_rate": 9.920289473684211e-05, "loss": 0.4863, "step": 23040 }, { "epoch": 1.2902340687646994, "grad_norm": 1.5943634510040283, "learning_rate": 9.920263157894737e-05, "loss": 0.47, "step": 23041 }, { "epoch": 1.2902900660768284, "grad_norm": 1.487524390220642, "learning_rate": 9.920236842105263e-05, "loss": 0.4692, "step": 23042 }, { "epoch": 1.2903460633889574, "grad_norm": 1.4222885370254517, "learning_rate": 9.92021052631579e-05, "loss": 0.4326, "step": 23043 }, { "epoch": 1.2904020607010864, "grad_norm": 1.3612477779388428, "learning_rate": 9.920184210526317e-05, "loss": 0.4119, "step": 23044 }, { "epoch": 1.2904580580132154, "grad_norm": 1.4671094417572021, "learning_rate": 9.920157894736843e-05, "loss": 0.4623, "step": 23045 }, { "epoch": 1.2905140553253442, "grad_norm": 1.35136079788208, "learning_rate": 9.920131578947369e-05, "loss": 0.4039, "step": 23046 }, { "epoch": 1.2905700526374733, "grad_norm": 1.349650502204895, "learning_rate": 9.920105263157895e-05, "loss": 0.4961, "step": 23047 }, { "epoch": 1.2906260499496023, "grad_norm": 1.610949158668518, "learning_rate": 9.920078947368422e-05, "loss": 0.4722, "step": 23048 }, { "epoch": 1.2906820472617313, "grad_norm": 1.4275761842727661, "learning_rate": 9.920052631578948e-05, "loss": 0.4182, "step": 23049 }, { "epoch": 1.2907380445738603, "grad_norm": 1.6514980792999268, "learning_rate": 9.920026315789475e-05, "loss": 0.5518, "step": 23050 }, { "epoch": 1.2907940418859893, "grad_norm": 1.4580165147781372, "learning_rate": 9.92e-05, "loss": 0.3794, "step": 23051 }, { "epoch": 1.2908500391981184, "grad_norm": 1.6363708972930908, "learning_rate": 9.919973684210527e-05, "loss": 0.4578, "step": 23052 }, { "epoch": 1.2909060365102474, "grad_norm": 1.6622533798217773, "learning_rate": 9.919947368421053e-05, "loss": 0.7171, "step": 23053 }, { "epoch": 1.2909620338223764, "grad_norm": 1.8035329580307007, "learning_rate": 9.919921052631579e-05, "loss": 0.4881, "step": 23054 }, { "epoch": 1.2910180311345054, "grad_norm": 1.3802659511566162, "learning_rate": 9.919894736842105e-05, "loss": 0.4492, "step": 23055 }, { "epoch": 1.2910740284466344, "grad_norm": 2.016417980194092, "learning_rate": 9.919868421052631e-05, "loss": 0.5521, "step": 23056 }, { "epoch": 1.2911300257587635, "grad_norm": 1.479732632637024, "learning_rate": 9.919842105263158e-05, "loss": 0.4334, "step": 23057 }, { "epoch": 1.2911860230708925, "grad_norm": 1.2081102132797241, "learning_rate": 9.919815789473684e-05, "loss": 0.4467, "step": 23058 }, { "epoch": 1.2912420203830215, "grad_norm": 1.1472679376602173, "learning_rate": 9.919789473684212e-05, "loss": 0.3787, "step": 23059 }, { "epoch": 1.2912980176951505, "grad_norm": 1.3147450685501099, "learning_rate": 9.919763157894736e-05, "loss": 0.3942, "step": 23060 }, { "epoch": 1.2913540150072795, "grad_norm": 1.5052769184112549, "learning_rate": 9.919736842105264e-05, "loss": 0.4573, "step": 23061 }, { "epoch": 1.2914100123194086, "grad_norm": 1.358510136604309, "learning_rate": 9.91971052631579e-05, "loss": 0.5059, "step": 23062 }, { "epoch": 1.2914660096315376, "grad_norm": 1.321942925453186, "learning_rate": 9.919684210526317e-05, "loss": 0.5401, "step": 23063 }, { "epoch": 1.2915220069436666, "grad_norm": 1.363074779510498, "learning_rate": 9.919657894736843e-05, "loss": 0.4345, "step": 23064 }, { "epoch": 1.2915780042557956, "grad_norm": 1.3865578174591064, "learning_rate": 9.919631578947369e-05, "loss": 0.5602, "step": 23065 }, { "epoch": 1.2916340015679246, "grad_norm": 1.3393489122390747, "learning_rate": 9.919605263157895e-05, "loss": 0.3869, "step": 23066 }, { "epoch": 1.2916899988800536, "grad_norm": 1.5695127248764038, "learning_rate": 9.919578947368422e-05, "loss": 0.5572, "step": 23067 }, { "epoch": 1.2917459961921827, "grad_norm": 1.1903185844421387, "learning_rate": 9.919552631578948e-05, "loss": 0.4136, "step": 23068 }, { "epoch": 1.2918019935043117, "grad_norm": 1.7370045185089111, "learning_rate": 9.919526315789474e-05, "loss": 0.6811, "step": 23069 }, { "epoch": 1.2918579908164407, "grad_norm": 1.3773322105407715, "learning_rate": 9.9195e-05, "loss": 0.4945, "step": 23070 }, { "epoch": 1.2919139881285697, "grad_norm": 1.3980776071548462, "learning_rate": 9.919473684210526e-05, "loss": 0.4945, "step": 23071 }, { "epoch": 1.2919699854406987, "grad_norm": 1.73771071434021, "learning_rate": 9.919447368421053e-05, "loss": 0.4702, "step": 23072 }, { "epoch": 1.2920259827528278, "grad_norm": 1.491405963897705, "learning_rate": 9.91942105263158e-05, "loss": 0.5025, "step": 23073 }, { "epoch": 1.2920819800649568, "grad_norm": 1.2875639200210571, "learning_rate": 9.919394736842105e-05, "loss": 0.3839, "step": 23074 }, { "epoch": 1.2921379773770858, "grad_norm": 1.4439479112625122, "learning_rate": 9.919368421052631e-05, "loss": 0.4741, "step": 23075 }, { "epoch": 1.2921939746892148, "grad_norm": 1.5555182695388794, "learning_rate": 9.919342105263159e-05, "loss": 0.547, "step": 23076 }, { "epoch": 1.2922499720013438, "grad_norm": 1.3721479177474976, "learning_rate": 9.919315789473685e-05, "loss": 0.3663, "step": 23077 }, { "epoch": 1.2923059693134729, "grad_norm": 4.716894626617432, "learning_rate": 9.91928947368421e-05, "loss": 0.4126, "step": 23078 }, { "epoch": 1.2923619666256019, "grad_norm": 1.2825888395309448, "learning_rate": 9.919263157894737e-05, "loss": 0.4151, "step": 23079 }, { "epoch": 1.292417963937731, "grad_norm": 1.594100832939148, "learning_rate": 9.919236842105264e-05, "loss": 0.5598, "step": 23080 }, { "epoch": 1.29247396124986, "grad_norm": 1.3940268754959106, "learning_rate": 9.91921052631579e-05, "loss": 0.4255, "step": 23081 }, { "epoch": 1.292529958561989, "grad_norm": 1.435741901397705, "learning_rate": 9.919184210526317e-05, "loss": 0.5045, "step": 23082 }, { "epoch": 1.292585955874118, "grad_norm": 1.2281544208526611, "learning_rate": 9.919157894736842e-05, "loss": 0.4039, "step": 23083 }, { "epoch": 1.292641953186247, "grad_norm": 1.472573161125183, "learning_rate": 9.919131578947369e-05, "loss": 0.5481, "step": 23084 }, { "epoch": 1.292697950498376, "grad_norm": 1.5225231647491455, "learning_rate": 9.919105263157895e-05, "loss": 0.5484, "step": 23085 }, { "epoch": 1.292753947810505, "grad_norm": 1.4332185983657837, "learning_rate": 9.919078947368422e-05, "loss": 0.5739, "step": 23086 }, { "epoch": 1.292809945122634, "grad_norm": 1.1747983694076538, "learning_rate": 9.919052631578948e-05, "loss": 0.3832, "step": 23087 }, { "epoch": 1.292865942434763, "grad_norm": 1.521575927734375, "learning_rate": 9.919026315789473e-05, "loss": 0.4869, "step": 23088 }, { "epoch": 1.292921939746892, "grad_norm": 1.1403270959854126, "learning_rate": 9.919e-05, "loss": 0.3319, "step": 23089 }, { "epoch": 1.292977937059021, "grad_norm": 2.0384562015533447, "learning_rate": 9.918973684210526e-05, "loss": 0.4807, "step": 23090 }, { "epoch": 1.29303393437115, "grad_norm": 1.4302958250045776, "learning_rate": 9.918947368421054e-05, "loss": 0.5648, "step": 23091 }, { "epoch": 1.2930899316832791, "grad_norm": 1.2359665632247925, "learning_rate": 9.918921052631578e-05, "loss": 0.5447, "step": 23092 }, { "epoch": 1.2931459289954081, "grad_norm": 1.3534879684448242, "learning_rate": 9.918894736842106e-05, "loss": 0.6683, "step": 23093 }, { "epoch": 1.2932019263075372, "grad_norm": 1.5503642559051514, "learning_rate": 9.918868421052632e-05, "loss": 0.5193, "step": 23094 }, { "epoch": 1.2932579236196662, "grad_norm": 1.4643770456314087, "learning_rate": 9.918842105263159e-05, "loss": 0.4131, "step": 23095 }, { "epoch": 1.2933139209317952, "grad_norm": 2.064157247543335, "learning_rate": 9.918815789473685e-05, "loss": 0.4761, "step": 23096 }, { "epoch": 1.2933699182439242, "grad_norm": 1.501900553703308, "learning_rate": 9.918789473684211e-05, "loss": 0.4655, "step": 23097 }, { "epoch": 1.2934259155560532, "grad_norm": 1.5062413215637207, "learning_rate": 9.918763157894737e-05, "loss": 0.4824, "step": 23098 }, { "epoch": 1.2934819128681823, "grad_norm": 2.126584053039551, "learning_rate": 9.918736842105264e-05, "loss": 0.4974, "step": 23099 }, { "epoch": 1.2935379101803113, "grad_norm": 1.206376075744629, "learning_rate": 9.91871052631579e-05, "loss": 0.4751, "step": 23100 }, { "epoch": 1.2935939074924403, "grad_norm": 1.2563661336898804, "learning_rate": 9.918684210526316e-05, "loss": 0.4306, "step": 23101 }, { "epoch": 1.2936499048045693, "grad_norm": 1.3478997945785522, "learning_rate": 9.918657894736842e-05, "loss": 0.4101, "step": 23102 }, { "epoch": 1.2937059021166983, "grad_norm": 1.3861345052719116, "learning_rate": 9.91863157894737e-05, "loss": 0.4732, "step": 23103 }, { "epoch": 1.2937618994288274, "grad_norm": 1.8398720026016235, "learning_rate": 9.918605263157895e-05, "loss": 0.5285, "step": 23104 }, { "epoch": 1.2938178967409564, "grad_norm": 1.252652645111084, "learning_rate": 9.918578947368423e-05, "loss": 0.4509, "step": 23105 }, { "epoch": 1.2938738940530854, "grad_norm": 1.211471676826477, "learning_rate": 9.918552631578947e-05, "loss": 0.4291, "step": 23106 }, { "epoch": 1.2939298913652144, "grad_norm": 1.5865445137023926, "learning_rate": 9.918526315789473e-05, "loss": 0.6073, "step": 23107 }, { "epoch": 1.2939858886773434, "grad_norm": 1.4167582988739014, "learning_rate": 9.9185e-05, "loss": 0.4617, "step": 23108 }, { "epoch": 1.2940418859894725, "grad_norm": 1.1195567846298218, "learning_rate": 9.918473684210527e-05, "loss": 0.4275, "step": 23109 }, { "epoch": 1.2940978833016015, "grad_norm": 1.261151909828186, "learning_rate": 9.918447368421053e-05, "loss": 0.4719, "step": 23110 }, { "epoch": 1.2941538806137305, "grad_norm": 1.4415532350540161, "learning_rate": 9.918421052631579e-05, "loss": 0.4583, "step": 23111 }, { "epoch": 1.2942098779258595, "grad_norm": 1.394088864326477, "learning_rate": 9.918394736842106e-05, "loss": 0.488, "step": 23112 }, { "epoch": 1.2942658752379885, "grad_norm": 1.8100812435150146, "learning_rate": 9.918368421052632e-05, "loss": 0.6291, "step": 23113 }, { "epoch": 1.2943218725501175, "grad_norm": 1.3433568477630615, "learning_rate": 9.918342105263159e-05, "loss": 0.5986, "step": 23114 }, { "epoch": 1.2943778698622466, "grad_norm": 1.442548155784607, "learning_rate": 9.918315789473684e-05, "loss": 0.412, "step": 23115 }, { "epoch": 1.2944338671743756, "grad_norm": 1.5105018615722656, "learning_rate": 9.918289473684211e-05, "loss": 0.3668, "step": 23116 }, { "epoch": 1.2944898644865046, "grad_norm": 1.1323000192642212, "learning_rate": 9.918263157894737e-05, "loss": 0.4888, "step": 23117 }, { "epoch": 1.2945458617986336, "grad_norm": 1.354244589805603, "learning_rate": 9.918236842105264e-05, "loss": 0.4052, "step": 23118 }, { "epoch": 1.2946018591107626, "grad_norm": 1.3402349948883057, "learning_rate": 9.91821052631579e-05, "loss": 0.3926, "step": 23119 }, { "epoch": 1.2946578564228917, "grad_norm": 1.3419550657272339, "learning_rate": 9.918184210526316e-05, "loss": 0.437, "step": 23120 }, { "epoch": 1.2947138537350207, "grad_norm": 2.1664059162139893, "learning_rate": 9.918157894736842e-05, "loss": 0.6513, "step": 23121 }, { "epoch": 1.2947698510471497, "grad_norm": 1.4331457614898682, "learning_rate": 9.91813157894737e-05, "loss": 0.4336, "step": 23122 }, { "epoch": 1.2948258483592787, "grad_norm": 1.2470271587371826, "learning_rate": 9.918105263157896e-05, "loss": 0.4391, "step": 23123 }, { "epoch": 1.2948818456714077, "grad_norm": 1.4891743659973145, "learning_rate": 9.918078947368422e-05, "loss": 0.4758, "step": 23124 }, { "epoch": 1.2949378429835368, "grad_norm": 1.537827491760254, "learning_rate": 9.918052631578948e-05, "loss": 0.5983, "step": 23125 }, { "epoch": 1.2949938402956658, "grad_norm": 1.628988265991211, "learning_rate": 9.918026315789474e-05, "loss": 0.4892, "step": 23126 }, { "epoch": 1.2950498376077948, "grad_norm": 1.4828397035598755, "learning_rate": 9.918000000000001e-05, "loss": 0.4178, "step": 23127 }, { "epoch": 1.2951058349199238, "grad_norm": 1.4297609329223633, "learning_rate": 9.917973684210527e-05, "loss": 0.6309, "step": 23128 }, { "epoch": 1.2951618322320528, "grad_norm": 1.074594497680664, "learning_rate": 9.917947368421053e-05, "loss": 0.3676, "step": 23129 }, { "epoch": 1.2952178295441819, "grad_norm": 1.2312418222427368, "learning_rate": 9.917921052631579e-05, "loss": 0.3847, "step": 23130 }, { "epoch": 1.2952738268563109, "grad_norm": 1.3568131923675537, "learning_rate": 9.917894736842106e-05, "loss": 0.5376, "step": 23131 }, { "epoch": 1.29532982416844, "grad_norm": 1.3103629350662231, "learning_rate": 9.917868421052632e-05, "loss": 0.5244, "step": 23132 }, { "epoch": 1.295385821480569, "grad_norm": 1.4107483625411987, "learning_rate": 9.917842105263158e-05, "loss": 0.462, "step": 23133 }, { "epoch": 1.295441818792698, "grad_norm": 1.229911208152771, "learning_rate": 9.917815789473684e-05, "loss": 0.303, "step": 23134 }, { "epoch": 1.295497816104827, "grad_norm": 2.078338623046875, "learning_rate": 9.917789473684211e-05, "loss": 0.4735, "step": 23135 }, { "epoch": 1.295553813416956, "grad_norm": 1.6818737983703613, "learning_rate": 9.917763157894737e-05, "loss": 0.4912, "step": 23136 }, { "epoch": 1.295609810729085, "grad_norm": 1.308260202407837, "learning_rate": 9.917736842105265e-05, "loss": 0.5235, "step": 23137 }, { "epoch": 1.295665808041214, "grad_norm": 1.2334599494934082, "learning_rate": 9.91771052631579e-05, "loss": 0.3955, "step": 23138 }, { "epoch": 1.295721805353343, "grad_norm": 1.2334140539169312, "learning_rate": 9.917684210526317e-05, "loss": 0.3996, "step": 23139 }, { "epoch": 1.295777802665472, "grad_norm": 1.3915631771087646, "learning_rate": 9.917657894736843e-05, "loss": 0.4741, "step": 23140 }, { "epoch": 1.295833799977601, "grad_norm": 1.263722538948059, "learning_rate": 9.917631578947369e-05, "loss": 0.5301, "step": 23141 }, { "epoch": 1.29588979728973, "grad_norm": 1.5782407522201538, "learning_rate": 9.917605263157896e-05, "loss": 0.5274, "step": 23142 }, { "epoch": 1.295945794601859, "grad_norm": 1.4976212978363037, "learning_rate": 9.91757894736842e-05, "loss": 0.4581, "step": 23143 }, { "epoch": 1.2960017919139881, "grad_norm": 1.3090317249298096, "learning_rate": 9.917552631578948e-05, "loss": 0.4289, "step": 23144 }, { "epoch": 1.2960577892261171, "grad_norm": 1.5705372095108032, "learning_rate": 9.917526315789474e-05, "loss": 0.4602, "step": 23145 }, { "epoch": 1.2961137865382462, "grad_norm": 1.349718451499939, "learning_rate": 9.917500000000001e-05, "loss": 0.5322, "step": 23146 }, { "epoch": 1.2961697838503752, "grad_norm": 1.5553501844406128, "learning_rate": 9.917473684210526e-05, "loss": 0.5053, "step": 23147 }, { "epoch": 1.2962257811625042, "grad_norm": 1.4202433824539185, "learning_rate": 9.917447368421053e-05, "loss": 0.4849, "step": 23148 }, { "epoch": 1.2962817784746332, "grad_norm": 10.982044219970703, "learning_rate": 9.917421052631579e-05, "loss": 0.4594, "step": 23149 }, { "epoch": 1.2963377757867622, "grad_norm": 2.137157917022705, "learning_rate": 9.917394736842106e-05, "loss": 0.4211, "step": 23150 }, { "epoch": 1.2963937730988913, "grad_norm": 1.470205545425415, "learning_rate": 9.917368421052632e-05, "loss": 0.5466, "step": 23151 }, { "epoch": 1.2964497704110203, "grad_norm": 1.2946199178695679, "learning_rate": 9.917342105263158e-05, "loss": 0.3932, "step": 23152 }, { "epoch": 1.2965057677231493, "grad_norm": 1.8522518873214722, "learning_rate": 9.917315789473684e-05, "loss": 0.5165, "step": 23153 }, { "epoch": 1.2965617650352783, "grad_norm": 1.4067450761795044, "learning_rate": 9.917289473684212e-05, "loss": 0.4807, "step": 23154 }, { "epoch": 1.2966177623474073, "grad_norm": 1.2604808807373047, "learning_rate": 9.917263157894738e-05, "loss": 0.4159, "step": 23155 }, { "epoch": 1.2966737596595364, "grad_norm": 1.3013297319412231, "learning_rate": 9.917236842105264e-05, "loss": 0.4577, "step": 23156 }, { "epoch": 1.2967297569716654, "grad_norm": 1.9174307584762573, "learning_rate": 9.91721052631579e-05, "loss": 0.5487, "step": 23157 }, { "epoch": 1.2967857542837944, "grad_norm": 1.2069087028503418, "learning_rate": 9.917184210526316e-05, "loss": 0.4228, "step": 23158 }, { "epoch": 1.2968417515959234, "grad_norm": 1.5480200052261353, "learning_rate": 9.917157894736843e-05, "loss": 0.5662, "step": 23159 }, { "epoch": 1.2968977489080524, "grad_norm": 1.130193829536438, "learning_rate": 9.917131578947369e-05, "loss": 0.4401, "step": 23160 }, { "epoch": 1.2969537462201814, "grad_norm": 1.2581520080566406, "learning_rate": 9.917105263157895e-05, "loss": 0.3823, "step": 23161 }, { "epoch": 1.2970097435323105, "grad_norm": 1.2995219230651855, "learning_rate": 9.917078947368421e-05, "loss": 0.39, "step": 23162 }, { "epoch": 1.2970657408444395, "grad_norm": 1.1968741416931152, "learning_rate": 9.917052631578948e-05, "loss": 0.371, "step": 23163 }, { "epoch": 1.2971217381565685, "grad_norm": 1.3204034566879272, "learning_rate": 9.917026315789474e-05, "loss": 0.4821, "step": 23164 }, { "epoch": 1.2971777354686975, "grad_norm": 1.4278463125228882, "learning_rate": 9.917e-05, "loss": 0.4841, "step": 23165 }, { "epoch": 1.2972337327808265, "grad_norm": 1.7300227880477905, "learning_rate": 9.916973684210526e-05, "loss": 0.4935, "step": 23166 }, { "epoch": 1.2972897300929556, "grad_norm": 1.3219255208969116, "learning_rate": 9.916947368421053e-05, "loss": 0.4569, "step": 23167 }, { "epoch": 1.2973457274050846, "grad_norm": 1.4268701076507568, "learning_rate": 9.91692105263158e-05, "loss": 0.5079, "step": 23168 }, { "epoch": 1.2974017247172136, "grad_norm": 1.519562005996704, "learning_rate": 9.916894736842107e-05, "loss": 0.4115, "step": 23169 }, { "epoch": 1.2974577220293426, "grad_norm": 1.5569610595703125, "learning_rate": 9.916868421052631e-05, "loss": 0.4973, "step": 23170 }, { "epoch": 1.2975137193414716, "grad_norm": 1.2861106395721436, "learning_rate": 9.916842105263159e-05, "loss": 0.4534, "step": 23171 }, { "epoch": 1.2975697166536007, "grad_norm": 1.3012754917144775, "learning_rate": 9.916815789473685e-05, "loss": 0.5036, "step": 23172 }, { "epoch": 1.2976257139657297, "grad_norm": 1.5721306800842285, "learning_rate": 9.916789473684212e-05, "loss": 0.5698, "step": 23173 }, { "epoch": 1.2976817112778587, "grad_norm": 1.498849868774414, "learning_rate": 9.916763157894738e-05, "loss": 0.5996, "step": 23174 }, { "epoch": 1.2977377085899877, "grad_norm": 1.4276922941207886, "learning_rate": 9.916736842105263e-05, "loss": 0.4091, "step": 23175 }, { "epoch": 1.2977937059021167, "grad_norm": 1.5499815940856934, "learning_rate": 9.91671052631579e-05, "loss": 0.3802, "step": 23176 }, { "epoch": 1.2978497032142458, "grad_norm": 1.2192739248275757, "learning_rate": 9.916684210526316e-05, "loss": 0.5103, "step": 23177 }, { "epoch": 1.2979057005263748, "grad_norm": 1.1659903526306152, "learning_rate": 9.916657894736843e-05, "loss": 0.4654, "step": 23178 }, { "epoch": 1.2979616978385038, "grad_norm": 1.4987658262252808, "learning_rate": 9.916631578947369e-05, "loss": 0.4804, "step": 23179 }, { "epoch": 1.2980176951506328, "grad_norm": 2.007099151611328, "learning_rate": 9.916605263157895e-05, "loss": 0.5708, "step": 23180 }, { "epoch": 1.2980736924627618, "grad_norm": 1.3453506231307983, "learning_rate": 9.916578947368421e-05, "loss": 0.4868, "step": 23181 }, { "epoch": 1.2981296897748908, "grad_norm": 1.3088680505752563, "learning_rate": 9.916552631578948e-05, "loss": 0.4905, "step": 23182 }, { "epoch": 1.2981856870870199, "grad_norm": 1.4027286767959595, "learning_rate": 9.916526315789474e-05, "loss": 0.6571, "step": 23183 }, { "epoch": 1.2982416843991489, "grad_norm": 1.422242522239685, "learning_rate": 9.9165e-05, "loss": 0.4173, "step": 23184 }, { "epoch": 1.298297681711278, "grad_norm": 1.295256495475769, "learning_rate": 9.916473684210526e-05, "loss": 0.4624, "step": 23185 }, { "epoch": 1.298353679023407, "grad_norm": 1.3469479084014893, "learning_rate": 9.916447368421054e-05, "loss": 0.6286, "step": 23186 }, { "epoch": 1.298409676335536, "grad_norm": 1.4076720476150513, "learning_rate": 9.91642105263158e-05, "loss": 0.4399, "step": 23187 }, { "epoch": 1.298465673647665, "grad_norm": 1.4321870803833008, "learning_rate": 9.916394736842106e-05, "loss": 0.4432, "step": 23188 }, { "epoch": 1.298521670959794, "grad_norm": 1.5671515464782715, "learning_rate": 9.916368421052632e-05, "loss": 0.4282, "step": 23189 }, { "epoch": 1.298577668271923, "grad_norm": 1.2739958763122559, "learning_rate": 9.916342105263159e-05, "loss": 0.3489, "step": 23190 }, { "epoch": 1.298633665584052, "grad_norm": 1.3998924493789673, "learning_rate": 9.916315789473685e-05, "loss": 0.4127, "step": 23191 }, { "epoch": 1.298689662896181, "grad_norm": 1.3785351514816284, "learning_rate": 9.916289473684211e-05, "loss": 0.452, "step": 23192 }, { "epoch": 1.29874566020831, "grad_norm": 1.5077903270721436, "learning_rate": 9.916263157894737e-05, "loss": 0.4126, "step": 23193 }, { "epoch": 1.298801657520439, "grad_norm": 1.72370445728302, "learning_rate": 9.916236842105263e-05, "loss": 0.513, "step": 23194 }, { "epoch": 1.298857654832568, "grad_norm": 1.2757314443588257, "learning_rate": 9.91621052631579e-05, "loss": 0.4414, "step": 23195 }, { "epoch": 1.2989136521446971, "grad_norm": 6.817765712738037, "learning_rate": 9.916184210526316e-05, "loss": 0.4587, "step": 23196 }, { "epoch": 1.2989696494568261, "grad_norm": 1.236328363418579, "learning_rate": 9.916157894736843e-05, "loss": 0.5042, "step": 23197 }, { "epoch": 1.2990256467689552, "grad_norm": 1.5084718465805054, "learning_rate": 9.916131578947368e-05, "loss": 0.6613, "step": 23198 }, { "epoch": 1.2990816440810842, "grad_norm": 1.5099937915802002, "learning_rate": 9.916105263157895e-05, "loss": 0.4576, "step": 23199 }, { "epoch": 1.2991376413932132, "grad_norm": 1.9682420492172241, "learning_rate": 9.916078947368421e-05, "loss": 0.508, "step": 23200 }, { "epoch": 1.2991936387053422, "grad_norm": 1.282922625541687, "learning_rate": 9.916052631578949e-05, "loss": 0.5232, "step": 23201 }, { "epoch": 1.2992496360174712, "grad_norm": 1.4363484382629395, "learning_rate": 9.916026315789473e-05, "loss": 0.5648, "step": 23202 }, { "epoch": 1.2993056333296003, "grad_norm": 1.1488429307937622, "learning_rate": 9.916e-05, "loss": 0.5025, "step": 23203 }, { "epoch": 1.2993616306417293, "grad_norm": 1.7818005084991455, "learning_rate": 9.915973684210527e-05, "loss": 0.7077, "step": 23204 }, { "epoch": 1.2994176279538583, "grad_norm": 1.6766014099121094, "learning_rate": 9.915947368421054e-05, "loss": 0.4379, "step": 23205 }, { "epoch": 1.2994736252659873, "grad_norm": 1.5596418380737305, "learning_rate": 9.91592105263158e-05, "loss": 0.4459, "step": 23206 }, { "epoch": 1.2995296225781163, "grad_norm": 1.570223093032837, "learning_rate": 9.915894736842106e-05, "loss": 0.5508, "step": 23207 }, { "epoch": 1.2995856198902453, "grad_norm": 1.3504315614700317, "learning_rate": 9.915868421052632e-05, "loss": 0.4035, "step": 23208 }, { "epoch": 1.2996416172023744, "grad_norm": 1.6046736240386963, "learning_rate": 9.915842105263158e-05, "loss": 0.3972, "step": 23209 }, { "epoch": 1.2996976145145034, "grad_norm": 1.485824465751648, "learning_rate": 9.915815789473685e-05, "loss": 0.4442, "step": 23210 }, { "epoch": 1.2997536118266324, "grad_norm": 1.156082272529602, "learning_rate": 9.915789473684211e-05, "loss": 0.4251, "step": 23211 }, { "epoch": 1.2998096091387614, "grad_norm": 1.5914150476455688, "learning_rate": 9.915763157894737e-05, "loss": 0.5364, "step": 23212 }, { "epoch": 1.2998656064508904, "grad_norm": 1.5587588548660278, "learning_rate": 9.915736842105263e-05, "loss": 0.5468, "step": 23213 }, { "epoch": 1.2999216037630195, "grad_norm": 1.4740647077560425, "learning_rate": 9.91571052631579e-05, "loss": 0.4464, "step": 23214 }, { "epoch": 1.2999776010751485, "grad_norm": 1.7286256551742554, "learning_rate": 9.915684210526316e-05, "loss": 0.5414, "step": 23215 }, { "epoch": 1.3000335983872775, "grad_norm": 1.2766107320785522, "learning_rate": 9.915657894736842e-05, "loss": 0.4173, "step": 23216 }, { "epoch": 1.3000895956994065, "grad_norm": 6.632284641265869, "learning_rate": 9.915631578947368e-05, "loss": 0.5423, "step": 23217 }, { "epoch": 1.3001455930115355, "grad_norm": 1.4115962982177734, "learning_rate": 9.915605263157896e-05, "loss": 0.5377, "step": 23218 }, { "epoch": 1.3002015903236646, "grad_norm": 1.3499791622161865, "learning_rate": 9.915578947368422e-05, "loss": 0.4549, "step": 23219 }, { "epoch": 1.3002575876357936, "grad_norm": 1.404026746749878, "learning_rate": 9.915552631578948e-05, "loss": 0.5127, "step": 23220 }, { "epoch": 1.3003135849479226, "grad_norm": 8.091444969177246, "learning_rate": 9.915526315789474e-05, "loss": 0.3913, "step": 23221 }, { "epoch": 1.3003695822600516, "grad_norm": 1.43260657787323, "learning_rate": 9.915500000000001e-05, "loss": 0.4801, "step": 23222 }, { "epoch": 1.3004255795721806, "grad_norm": 1.3025494813919067, "learning_rate": 9.915473684210527e-05, "loss": 0.3282, "step": 23223 }, { "epoch": 1.3004815768843097, "grad_norm": 1.3490900993347168, "learning_rate": 9.915447368421054e-05, "loss": 0.4077, "step": 23224 }, { "epoch": 1.3005375741964387, "grad_norm": 1.1420457363128662, "learning_rate": 9.915421052631579e-05, "loss": 0.3791, "step": 23225 }, { "epoch": 1.3005935715085677, "grad_norm": 1.450870156288147, "learning_rate": 9.915394736842106e-05, "loss": 0.3893, "step": 23226 }, { "epoch": 1.3006495688206967, "grad_norm": 1.3151649236679077, "learning_rate": 9.915368421052632e-05, "loss": 0.4269, "step": 23227 }, { "epoch": 1.3007055661328257, "grad_norm": 1.3947206735610962, "learning_rate": 9.915342105263158e-05, "loss": 0.5421, "step": 23228 }, { "epoch": 1.3007615634449547, "grad_norm": 1.2942920923233032, "learning_rate": 9.915315789473685e-05, "loss": 0.4889, "step": 23229 }, { "epoch": 1.3008175607570838, "grad_norm": 1.3855652809143066, "learning_rate": 9.91528947368421e-05, "loss": 0.4664, "step": 23230 }, { "epoch": 1.3008735580692128, "grad_norm": 1.5210803747177124, "learning_rate": 9.915263157894737e-05, "loss": 0.447, "step": 23231 }, { "epoch": 1.3009295553813418, "grad_norm": 1.2672810554504395, "learning_rate": 9.915236842105263e-05, "loss": 0.4659, "step": 23232 }, { "epoch": 1.3009855526934708, "grad_norm": 1.268456220626831, "learning_rate": 9.91521052631579e-05, "loss": 0.3986, "step": 23233 }, { "epoch": 1.3010415500055998, "grad_norm": 1.3206121921539307, "learning_rate": 9.915184210526317e-05, "loss": 0.4101, "step": 23234 }, { "epoch": 1.3010975473177289, "grad_norm": 1.2786929607391357, "learning_rate": 9.915157894736843e-05, "loss": 0.412, "step": 23235 }, { "epoch": 1.3011535446298579, "grad_norm": 1.324182152748108, "learning_rate": 9.915131578947369e-05, "loss": 0.5193, "step": 23236 }, { "epoch": 1.301209541941987, "grad_norm": 1.262039303779602, "learning_rate": 9.915105263157896e-05, "loss": 0.3655, "step": 23237 }, { "epoch": 1.301265539254116, "grad_norm": 1.12849760055542, "learning_rate": 9.915078947368422e-05, "loss": 0.371, "step": 23238 }, { "epoch": 1.301321536566245, "grad_norm": 1.2479850053787231, "learning_rate": 9.915052631578948e-05, "loss": 0.4755, "step": 23239 }, { "epoch": 1.301377533878374, "grad_norm": 1.3527796268463135, "learning_rate": 9.915026315789474e-05, "loss": 0.5213, "step": 23240 }, { "epoch": 1.301433531190503, "grad_norm": 1.407912015914917, "learning_rate": 9.915000000000001e-05, "loss": 0.4465, "step": 23241 }, { "epoch": 1.301489528502632, "grad_norm": 1.2366207838058472, "learning_rate": 9.914973684210527e-05, "loss": 0.3874, "step": 23242 }, { "epoch": 1.301545525814761, "grad_norm": 1.6261301040649414, "learning_rate": 9.914947368421053e-05, "loss": 0.4496, "step": 23243 }, { "epoch": 1.30160152312689, "grad_norm": 2.06445574760437, "learning_rate": 9.914921052631579e-05, "loss": 0.7028, "step": 23244 }, { "epoch": 1.301657520439019, "grad_norm": 1.4373807907104492, "learning_rate": 9.914894736842105e-05, "loss": 0.4088, "step": 23245 }, { "epoch": 1.301713517751148, "grad_norm": 1.5163663625717163, "learning_rate": 9.914868421052632e-05, "loss": 0.4876, "step": 23246 }, { "epoch": 1.301769515063277, "grad_norm": 1.2399814128875732, "learning_rate": 9.914842105263158e-05, "loss": 0.4709, "step": 23247 }, { "epoch": 1.3018255123754061, "grad_norm": 1.331455945968628, "learning_rate": 9.914815789473684e-05, "loss": 0.4659, "step": 23248 }, { "epoch": 1.3018815096875351, "grad_norm": 1.4218822717666626, "learning_rate": 9.91478947368421e-05, "loss": 0.4753, "step": 23249 }, { "epoch": 1.3019375069996642, "grad_norm": 1.5947976112365723, "learning_rate": 9.914763157894738e-05, "loss": 0.5404, "step": 23250 }, { "epoch": 1.3019935043117932, "grad_norm": 1.4025335311889648, "learning_rate": 9.914736842105264e-05, "loss": 0.4188, "step": 23251 }, { "epoch": 1.3020495016239222, "grad_norm": 1.6437119245529175, "learning_rate": 9.914710526315791e-05, "loss": 0.5713, "step": 23252 }, { "epoch": 1.302105498936051, "grad_norm": 1.9255142211914062, "learning_rate": 9.914684210526316e-05, "loss": 0.6225, "step": 23253 }, { "epoch": 1.30216149624818, "grad_norm": 1.7305785417556763, "learning_rate": 9.914657894736843e-05, "loss": 0.4303, "step": 23254 }, { "epoch": 1.302217493560309, "grad_norm": 1.2466832399368286, "learning_rate": 9.914631578947369e-05, "loss": 0.4593, "step": 23255 }, { "epoch": 1.302273490872438, "grad_norm": 1.3282427787780762, "learning_rate": 9.914605263157896e-05, "loss": 0.4284, "step": 23256 }, { "epoch": 1.302329488184567, "grad_norm": 1.3135789632797241, "learning_rate": 9.914578947368421e-05, "loss": 0.5275, "step": 23257 }, { "epoch": 1.302385485496696, "grad_norm": 1.4246985912322998, "learning_rate": 9.914552631578948e-05, "loss": 0.4657, "step": 23258 }, { "epoch": 1.302441482808825, "grad_norm": 1.2614291906356812, "learning_rate": 9.914526315789474e-05, "loss": 0.4454, "step": 23259 }, { "epoch": 1.3024974801209541, "grad_norm": 1.3540866374969482, "learning_rate": 9.914500000000001e-05, "loss": 0.5905, "step": 23260 }, { "epoch": 1.3025534774330831, "grad_norm": 1.4114477634429932, "learning_rate": 9.914473684210527e-05, "loss": 0.4183, "step": 23261 }, { "epoch": 1.3026094747452122, "grad_norm": 1.3629343509674072, "learning_rate": 9.914447368421052e-05, "loss": 0.5109, "step": 23262 }, { "epoch": 1.3026654720573412, "grad_norm": 1.3114348649978638, "learning_rate": 9.914421052631579e-05, "loss": 0.404, "step": 23263 }, { "epoch": 1.3027214693694702, "grad_norm": 1.1841384172439575, "learning_rate": 9.914394736842105e-05, "loss": 0.4458, "step": 23264 }, { "epoch": 1.3027774666815992, "grad_norm": 1.4203022718429565, "learning_rate": 9.914368421052633e-05, "loss": 0.5124, "step": 23265 }, { "epoch": 1.3028334639937282, "grad_norm": 1.2591602802276611, "learning_rate": 9.914342105263159e-05, "loss": 0.3617, "step": 23266 }, { "epoch": 1.3028894613058573, "grad_norm": 1.407633900642395, "learning_rate": 9.914315789473685e-05, "loss": 0.5957, "step": 23267 }, { "epoch": 1.3029454586179863, "grad_norm": 1.4555846452713013, "learning_rate": 9.91428947368421e-05, "loss": 0.5717, "step": 23268 }, { "epoch": 1.3030014559301153, "grad_norm": 1.4491373300552368, "learning_rate": 9.914263157894738e-05, "loss": 0.5847, "step": 23269 }, { "epoch": 1.3030574532422443, "grad_norm": 1.156622052192688, "learning_rate": 9.914236842105264e-05, "loss": 0.4499, "step": 23270 }, { "epoch": 1.3031134505543733, "grad_norm": 1.415259599685669, "learning_rate": 9.91421052631579e-05, "loss": 0.4797, "step": 23271 }, { "epoch": 1.3031694478665024, "grad_norm": 1.3011866807937622, "learning_rate": 9.914184210526316e-05, "loss": 0.5682, "step": 23272 }, { "epoch": 1.3032254451786314, "grad_norm": 1.2020739316940308, "learning_rate": 9.914157894736843e-05, "loss": 0.4119, "step": 23273 }, { "epoch": 1.3032814424907604, "grad_norm": 1.252511739730835, "learning_rate": 9.914131578947369e-05, "loss": 0.4318, "step": 23274 }, { "epoch": 1.3033374398028894, "grad_norm": 1.3260678052902222, "learning_rate": 9.914105263157895e-05, "loss": 0.456, "step": 23275 }, { "epoch": 1.3033934371150184, "grad_norm": 1.4194862842559814, "learning_rate": 9.914078947368421e-05, "loss": 0.3703, "step": 23276 }, { "epoch": 1.3034494344271474, "grad_norm": 4.396236419677734, "learning_rate": 9.914052631578948e-05, "loss": 0.5069, "step": 23277 }, { "epoch": 1.3035054317392765, "grad_norm": 1.5681681632995605, "learning_rate": 9.914026315789474e-05, "loss": 0.3922, "step": 23278 }, { "epoch": 1.3035614290514055, "grad_norm": 1.3979411125183105, "learning_rate": 9.914e-05, "loss": 0.4714, "step": 23279 }, { "epoch": 1.3036174263635345, "grad_norm": 1.3500561714172363, "learning_rate": 9.913973684210526e-05, "loss": 0.4894, "step": 23280 }, { "epoch": 1.3036734236756635, "grad_norm": 1.3158615827560425, "learning_rate": 9.913947368421052e-05, "loss": 0.4118, "step": 23281 }, { "epoch": 1.3037294209877925, "grad_norm": 1.5024524927139282, "learning_rate": 9.91392105263158e-05, "loss": 0.5543, "step": 23282 }, { "epoch": 1.3037854182999216, "grad_norm": 1.415050745010376, "learning_rate": 9.913894736842106e-05, "loss": 0.4649, "step": 23283 }, { "epoch": 1.3038414156120506, "grad_norm": 1.1837997436523438, "learning_rate": 9.913868421052633e-05, "loss": 0.3367, "step": 23284 }, { "epoch": 1.3038974129241796, "grad_norm": 1.5360347032546997, "learning_rate": 9.913842105263157e-05, "loss": 0.4749, "step": 23285 }, { "epoch": 1.3039534102363086, "grad_norm": 1.4858312606811523, "learning_rate": 9.913815789473685e-05, "loss": 0.5332, "step": 23286 }, { "epoch": 1.3040094075484376, "grad_norm": 1.4720791578292847, "learning_rate": 9.913789473684211e-05, "loss": 0.4504, "step": 23287 }, { "epoch": 1.3040654048605667, "grad_norm": 1.3621578216552734, "learning_rate": 9.913763157894738e-05, "loss": 0.4723, "step": 23288 }, { "epoch": 1.3041214021726957, "grad_norm": 1.3581650257110596, "learning_rate": 9.913736842105264e-05, "loss": 0.4798, "step": 23289 }, { "epoch": 1.3041773994848247, "grad_norm": 1.228894591331482, "learning_rate": 9.91371052631579e-05, "loss": 0.4839, "step": 23290 }, { "epoch": 1.3042333967969537, "grad_norm": 1.6072814464569092, "learning_rate": 9.913684210526316e-05, "loss": 0.5408, "step": 23291 }, { "epoch": 1.3042893941090827, "grad_norm": 1.314273476600647, "learning_rate": 9.913657894736843e-05, "loss": 0.428, "step": 23292 }, { "epoch": 1.3043453914212118, "grad_norm": 1.6539702415466309, "learning_rate": 9.91363157894737e-05, "loss": 0.4635, "step": 23293 }, { "epoch": 1.3044013887333408, "grad_norm": 1.5021799802780151, "learning_rate": 9.913605263157895e-05, "loss": 0.4711, "step": 23294 }, { "epoch": 1.3044573860454698, "grad_norm": 1.4261759519577026, "learning_rate": 9.913578947368421e-05, "loss": 0.3836, "step": 23295 }, { "epoch": 1.3045133833575988, "grad_norm": 1.695491909980774, "learning_rate": 9.913552631578947e-05, "loss": 0.43, "step": 23296 }, { "epoch": 1.3045693806697278, "grad_norm": 1.4659150838851929, "learning_rate": 9.913526315789475e-05, "loss": 0.4848, "step": 23297 }, { "epoch": 1.3046253779818568, "grad_norm": 1.2196158170700073, "learning_rate": 9.9135e-05, "loss": 0.479, "step": 23298 }, { "epoch": 1.3046813752939859, "grad_norm": 1.092777967453003, "learning_rate": 9.913473684210527e-05, "loss": 0.3985, "step": 23299 }, { "epoch": 1.3047373726061149, "grad_norm": 1.4441514015197754, "learning_rate": 9.913447368421052e-05, "loss": 0.5424, "step": 23300 }, { "epoch": 1.304793369918244, "grad_norm": 1.565144658088684, "learning_rate": 9.91342105263158e-05, "loss": 0.4983, "step": 23301 }, { "epoch": 1.304849367230373, "grad_norm": 1.3440790176391602, "learning_rate": 9.913394736842106e-05, "loss": 0.3815, "step": 23302 }, { "epoch": 1.304905364542502, "grad_norm": 1.4669320583343506, "learning_rate": 9.913368421052632e-05, "loss": 0.5202, "step": 23303 }, { "epoch": 1.304961361854631, "grad_norm": 1.23827064037323, "learning_rate": 9.913342105263158e-05, "loss": 0.4864, "step": 23304 }, { "epoch": 1.30501735916676, "grad_norm": 1.6211961507797241, "learning_rate": 9.913315789473685e-05, "loss": 0.6918, "step": 23305 }, { "epoch": 1.305073356478889, "grad_norm": 1.5507512092590332, "learning_rate": 9.913289473684211e-05, "loss": 0.4171, "step": 23306 }, { "epoch": 1.305129353791018, "grad_norm": 1.3851318359375, "learning_rate": 9.913263157894738e-05, "loss": 0.59, "step": 23307 }, { "epoch": 1.305185351103147, "grad_norm": 1.2288877964019775, "learning_rate": 9.913236842105263e-05, "loss": 0.574, "step": 23308 }, { "epoch": 1.305241348415276, "grad_norm": 1.1446152925491333, "learning_rate": 9.91321052631579e-05, "loss": 0.4483, "step": 23309 }, { "epoch": 1.305297345727405, "grad_norm": 1.4239884614944458, "learning_rate": 9.913184210526316e-05, "loss": 0.5982, "step": 23310 }, { "epoch": 1.305353343039534, "grad_norm": 1.4093072414398193, "learning_rate": 9.913157894736844e-05, "loss": 0.5805, "step": 23311 }, { "epoch": 1.3054093403516631, "grad_norm": 1.4945576190948486, "learning_rate": 9.913131578947368e-05, "loss": 0.4839, "step": 23312 }, { "epoch": 1.3054653376637921, "grad_norm": 1.3525145053863525, "learning_rate": 9.913105263157894e-05, "loss": 0.4899, "step": 23313 }, { "epoch": 1.3055213349759212, "grad_norm": 3.119349241256714, "learning_rate": 9.913078947368422e-05, "loss": 0.5881, "step": 23314 }, { "epoch": 1.3055773322880502, "grad_norm": 1.3717396259307861, "learning_rate": 9.913052631578948e-05, "loss": 0.4957, "step": 23315 }, { "epoch": 1.3056333296001792, "grad_norm": 1.430946707725525, "learning_rate": 9.913026315789475e-05, "loss": 0.5012, "step": 23316 }, { "epoch": 1.3056893269123082, "grad_norm": 1.260886788368225, "learning_rate": 9.913e-05, "loss": 0.365, "step": 23317 }, { "epoch": 1.3057453242244372, "grad_norm": 1.356167197227478, "learning_rate": 9.912973684210527e-05, "loss": 0.4189, "step": 23318 }, { "epoch": 1.3058013215365663, "grad_norm": 1.2889000177383423, "learning_rate": 9.912947368421053e-05, "loss": 0.4207, "step": 23319 }, { "epoch": 1.3058573188486953, "grad_norm": 1.3411356210708618, "learning_rate": 9.91292105263158e-05, "loss": 0.5744, "step": 23320 }, { "epoch": 1.3059133161608243, "grad_norm": 1.5483306646347046, "learning_rate": 9.912894736842106e-05, "loss": 0.4928, "step": 23321 }, { "epoch": 1.3059693134729533, "grad_norm": 1.1878876686096191, "learning_rate": 9.912868421052632e-05, "loss": 0.4499, "step": 23322 }, { "epoch": 1.3060253107850823, "grad_norm": 1.2884507179260254, "learning_rate": 9.912842105263158e-05, "loss": 0.4424, "step": 23323 }, { "epoch": 1.3060813080972113, "grad_norm": 1.4340940713882446, "learning_rate": 9.912815789473685e-05, "loss": 0.3464, "step": 23324 }, { "epoch": 1.3061373054093404, "grad_norm": 1.3988069295883179, "learning_rate": 9.912789473684211e-05, "loss": 0.4857, "step": 23325 }, { "epoch": 1.3061933027214694, "grad_norm": 1.6385242938995361, "learning_rate": 9.912763157894737e-05, "loss": 0.4739, "step": 23326 }, { "epoch": 1.3062493000335984, "grad_norm": 1.5921891927719116, "learning_rate": 9.912736842105263e-05, "loss": 0.4359, "step": 23327 }, { "epoch": 1.3063052973457274, "grad_norm": 1.787548303604126, "learning_rate": 9.91271052631579e-05, "loss": 0.6647, "step": 23328 }, { "epoch": 1.3063612946578564, "grad_norm": 1.3793574571609497, "learning_rate": 9.912684210526317e-05, "loss": 0.5143, "step": 23329 }, { "epoch": 1.3064172919699855, "grad_norm": 1.6733791828155518, "learning_rate": 9.912657894736843e-05, "loss": 0.3727, "step": 23330 }, { "epoch": 1.3064732892821145, "grad_norm": 1.2901532649993896, "learning_rate": 9.912631578947368e-05, "loss": 0.3639, "step": 23331 }, { "epoch": 1.3065292865942435, "grad_norm": 1.5952303409576416, "learning_rate": 9.912605263157894e-05, "loss": 0.4403, "step": 23332 }, { "epoch": 1.3065852839063725, "grad_norm": 1.2913167476654053, "learning_rate": 9.912578947368422e-05, "loss": 0.4291, "step": 23333 }, { "epoch": 1.3066412812185015, "grad_norm": 1.5069353580474854, "learning_rate": 9.912552631578948e-05, "loss": 0.5659, "step": 23334 }, { "epoch": 1.3066972785306306, "grad_norm": 1.4820927381515503, "learning_rate": 9.912526315789474e-05, "loss": 0.6137, "step": 23335 }, { "epoch": 1.3067532758427596, "grad_norm": 1.560227870941162, "learning_rate": 9.9125e-05, "loss": 0.5614, "step": 23336 }, { "epoch": 1.3068092731548886, "grad_norm": 1.298736572265625, "learning_rate": 9.912473684210527e-05, "loss": 0.6134, "step": 23337 }, { "epoch": 1.3068652704670176, "grad_norm": 1.2767888307571411, "learning_rate": 9.912447368421053e-05, "loss": 0.4978, "step": 23338 }, { "epoch": 1.3069212677791466, "grad_norm": 1.344883680343628, "learning_rate": 9.91242105263158e-05, "loss": 0.4717, "step": 23339 }, { "epoch": 1.3069772650912757, "grad_norm": 1.1105804443359375, "learning_rate": 9.912394736842105e-05, "loss": 0.4326, "step": 23340 }, { "epoch": 1.3070332624034047, "grad_norm": 1.3717206716537476, "learning_rate": 9.912368421052632e-05, "loss": 0.4515, "step": 23341 }, { "epoch": 1.3070892597155337, "grad_norm": 1.207067847251892, "learning_rate": 9.912342105263158e-05, "loss": 0.3183, "step": 23342 }, { "epoch": 1.3071452570276627, "grad_norm": 1.5931912660598755, "learning_rate": 9.912315789473686e-05, "loss": 0.5249, "step": 23343 }, { "epoch": 1.3072012543397917, "grad_norm": 1.3497254848480225, "learning_rate": 9.912289473684212e-05, "loss": 0.4493, "step": 23344 }, { "epoch": 1.3072572516519207, "grad_norm": 1.2425850629806519, "learning_rate": 9.912263157894738e-05, "loss": 0.5717, "step": 23345 }, { "epoch": 1.3073132489640498, "grad_norm": 1.7057499885559082, "learning_rate": 9.912236842105264e-05, "loss": 0.6663, "step": 23346 }, { "epoch": 1.3073692462761788, "grad_norm": 1.1396676301956177, "learning_rate": 9.912210526315791e-05, "loss": 0.4206, "step": 23347 }, { "epoch": 1.3074252435883078, "grad_norm": 1.438910961151123, "learning_rate": 9.912184210526317e-05, "loss": 0.521, "step": 23348 }, { "epoch": 1.3074812409004368, "grad_norm": 1.5462173223495483, "learning_rate": 9.912157894736841e-05, "loss": 0.5551, "step": 23349 }, { "epoch": 1.3075372382125658, "grad_norm": 1.5258138179779053, "learning_rate": 9.912131578947369e-05, "loss": 0.4648, "step": 23350 }, { "epoch": 1.3075932355246949, "grad_norm": 1.5073356628417969, "learning_rate": 9.912105263157895e-05, "loss": 0.4733, "step": 23351 }, { "epoch": 1.3076492328368239, "grad_norm": 1.3298369646072388, "learning_rate": 9.912078947368422e-05, "loss": 0.5597, "step": 23352 }, { "epoch": 1.307705230148953, "grad_norm": 1.5825093984603882, "learning_rate": 9.912052631578948e-05, "loss": 0.5574, "step": 23353 }, { "epoch": 1.307761227461082, "grad_norm": 1.3182618618011475, "learning_rate": 9.912026315789474e-05, "loss": 0.5032, "step": 23354 }, { "epoch": 1.307817224773211, "grad_norm": 1.4643890857696533, "learning_rate": 9.912e-05, "loss": 0.6005, "step": 23355 }, { "epoch": 1.30787322208534, "grad_norm": 1.3440163135528564, "learning_rate": 9.911973684210527e-05, "loss": 0.6323, "step": 23356 }, { "epoch": 1.307929219397469, "grad_norm": 1.2643388509750366, "learning_rate": 9.911947368421053e-05, "loss": 0.3923, "step": 23357 }, { "epoch": 1.307985216709598, "grad_norm": 1.3173118829727173, "learning_rate": 9.911921052631579e-05, "loss": 0.5506, "step": 23358 }, { "epoch": 1.308041214021727, "grad_norm": 1.325744867324829, "learning_rate": 9.911894736842105e-05, "loss": 0.3308, "step": 23359 }, { "epoch": 1.308097211333856, "grad_norm": 1.4909019470214844, "learning_rate": 9.911868421052633e-05, "loss": 0.4128, "step": 23360 }, { "epoch": 1.308153208645985, "grad_norm": 1.180491328239441, "learning_rate": 9.911842105263159e-05, "loss": 0.5487, "step": 23361 }, { "epoch": 1.308209205958114, "grad_norm": 1.4190880060195923, "learning_rate": 9.911815789473686e-05, "loss": 0.3859, "step": 23362 }, { "epoch": 1.308265203270243, "grad_norm": 1.1913870573043823, "learning_rate": 9.91178947368421e-05, "loss": 0.4741, "step": 23363 }, { "epoch": 1.3083212005823721, "grad_norm": 1.2166470289230347, "learning_rate": 9.911763157894738e-05, "loss": 0.3879, "step": 23364 }, { "epoch": 1.3083771978945011, "grad_norm": 1.200237512588501, "learning_rate": 9.911736842105264e-05, "loss": 0.4206, "step": 23365 }, { "epoch": 1.3084331952066302, "grad_norm": 1.4765000343322754, "learning_rate": 9.91171052631579e-05, "loss": 0.4388, "step": 23366 }, { "epoch": 1.3084891925187592, "grad_norm": 1.7343289852142334, "learning_rate": 9.911684210526316e-05, "loss": 0.5143, "step": 23367 }, { "epoch": 1.3085451898308882, "grad_norm": 1.2377667427062988, "learning_rate": 9.911657894736842e-05, "loss": 0.4297, "step": 23368 }, { "epoch": 1.3086011871430172, "grad_norm": 1.5643452405929565, "learning_rate": 9.911631578947369e-05, "loss": 0.4753, "step": 23369 }, { "epoch": 1.3086571844551462, "grad_norm": 1.6922138929367065, "learning_rate": 9.911605263157895e-05, "loss": 0.4719, "step": 23370 }, { "epoch": 1.3087131817672752, "grad_norm": 1.2513607740402222, "learning_rate": 9.911578947368422e-05, "loss": 0.401, "step": 23371 }, { "epoch": 1.3087691790794043, "grad_norm": 1.266663908958435, "learning_rate": 9.911552631578947e-05, "loss": 0.4367, "step": 23372 }, { "epoch": 1.3088251763915333, "grad_norm": 1.1690346002578735, "learning_rate": 9.911526315789474e-05, "loss": 0.3916, "step": 23373 }, { "epoch": 1.3088811737036623, "grad_norm": 1.3350450992584229, "learning_rate": 9.9115e-05, "loss": 0.4463, "step": 23374 }, { "epoch": 1.3089371710157913, "grad_norm": 1.3662041425704956, "learning_rate": 9.911473684210528e-05, "loss": 0.4163, "step": 23375 }, { "epoch": 1.3089931683279203, "grad_norm": 1.4900022745132446, "learning_rate": 9.911447368421054e-05, "loss": 0.4521, "step": 23376 }, { "epoch": 1.3090491656400491, "grad_norm": 1.3184181451797485, "learning_rate": 9.91142105263158e-05, "loss": 0.4926, "step": 23377 }, { "epoch": 1.3091051629521782, "grad_norm": 1.287375807762146, "learning_rate": 9.911394736842105e-05, "loss": 0.4877, "step": 23378 }, { "epoch": 1.3091611602643072, "grad_norm": 1.5189950466156006, "learning_rate": 9.911368421052633e-05, "loss": 0.5017, "step": 23379 }, { "epoch": 1.3092171575764362, "grad_norm": 1.565103530883789, "learning_rate": 9.911342105263159e-05, "loss": 0.6216, "step": 23380 }, { "epoch": 1.3092731548885652, "grad_norm": 1.495690941810608, "learning_rate": 9.911315789473685e-05, "loss": 0.5092, "step": 23381 }, { "epoch": 1.3093291522006942, "grad_norm": 1.5382283926010132, "learning_rate": 9.911289473684211e-05, "loss": 0.7018, "step": 23382 }, { "epoch": 1.3093851495128233, "grad_norm": 1.5031582117080688, "learning_rate": 9.911263157894737e-05, "loss": 0.5123, "step": 23383 }, { "epoch": 1.3094411468249523, "grad_norm": 1.1811401844024658, "learning_rate": 9.911236842105264e-05, "loss": 0.5173, "step": 23384 }, { "epoch": 1.3094971441370813, "grad_norm": 1.1787773370742798, "learning_rate": 9.91121052631579e-05, "loss": 0.4346, "step": 23385 }, { "epoch": 1.3095531414492103, "grad_norm": 1.3113309144973755, "learning_rate": 9.911184210526316e-05, "loss": 0.3488, "step": 23386 }, { "epoch": 1.3096091387613393, "grad_norm": 1.4525781869888306, "learning_rate": 9.911157894736842e-05, "loss": 0.4528, "step": 23387 }, { "epoch": 1.3096651360734684, "grad_norm": 1.2588186264038086, "learning_rate": 9.911131578947369e-05, "loss": 0.4124, "step": 23388 }, { "epoch": 1.3097211333855974, "grad_norm": 1.700707197189331, "learning_rate": 9.911105263157895e-05, "loss": 0.4042, "step": 23389 }, { "epoch": 1.3097771306977264, "grad_norm": 2.0704498291015625, "learning_rate": 9.911078947368421e-05, "loss": 0.5692, "step": 23390 }, { "epoch": 1.3098331280098554, "grad_norm": 1.415603756904602, "learning_rate": 9.911052631578947e-05, "loss": 0.376, "step": 23391 }, { "epoch": 1.3098891253219844, "grad_norm": 1.505025029182434, "learning_rate": 9.911026315789475e-05, "loss": 0.4533, "step": 23392 }, { "epoch": 1.3099451226341134, "grad_norm": 1.4242792129516602, "learning_rate": 9.911e-05, "loss": 0.4603, "step": 23393 }, { "epoch": 1.3100011199462425, "grad_norm": 1.1377774477005005, "learning_rate": 9.910973684210528e-05, "loss": 0.3962, "step": 23394 }, { "epoch": 1.3100571172583715, "grad_norm": 1.831237554550171, "learning_rate": 9.910947368421052e-05, "loss": 0.4864, "step": 23395 }, { "epoch": 1.3101131145705005, "grad_norm": 1.5969734191894531, "learning_rate": 9.91092105263158e-05, "loss": 0.4875, "step": 23396 }, { "epoch": 1.3101691118826295, "grad_norm": 1.4190731048583984, "learning_rate": 9.910894736842106e-05, "loss": 0.4684, "step": 23397 }, { "epoch": 1.3102251091947585, "grad_norm": 1.3128427267074585, "learning_rate": 9.910868421052633e-05, "loss": 0.5009, "step": 23398 }, { "epoch": 1.3102811065068876, "grad_norm": 1.2901273965835571, "learning_rate": 9.910842105263159e-05, "loss": 0.5086, "step": 23399 }, { "epoch": 1.3103371038190166, "grad_norm": 1.7611383199691772, "learning_rate": 9.910815789473684e-05, "loss": 0.461, "step": 23400 }, { "epoch": 1.3103931011311456, "grad_norm": 1.4367409944534302, "learning_rate": 9.910789473684211e-05, "loss": 0.6223, "step": 23401 }, { "epoch": 1.3104490984432746, "grad_norm": 1.338951587677002, "learning_rate": 9.910763157894737e-05, "loss": 0.4048, "step": 23402 }, { "epoch": 1.3105050957554036, "grad_norm": 1.4415680170059204, "learning_rate": 9.910736842105264e-05, "loss": 0.5566, "step": 23403 }, { "epoch": 1.3105610930675327, "grad_norm": 1.296931266784668, "learning_rate": 9.910710526315789e-05, "loss": 0.5373, "step": 23404 }, { "epoch": 1.3106170903796617, "grad_norm": 1.7117139101028442, "learning_rate": 9.910684210526316e-05, "loss": 0.4762, "step": 23405 }, { "epoch": 1.3106730876917907, "grad_norm": 1.2670321464538574, "learning_rate": 9.910657894736842e-05, "loss": 0.4376, "step": 23406 }, { "epoch": 1.3107290850039197, "grad_norm": 1.2396507263183594, "learning_rate": 9.91063157894737e-05, "loss": 0.5328, "step": 23407 }, { "epoch": 1.3107850823160487, "grad_norm": 1.573288917541504, "learning_rate": 9.910605263157895e-05, "loss": 0.7282, "step": 23408 }, { "epoch": 1.3108410796281778, "grad_norm": 1.1891690492630005, "learning_rate": 9.910578947368421e-05, "loss": 0.4956, "step": 23409 }, { "epoch": 1.3108970769403068, "grad_norm": 1.2904771566390991, "learning_rate": 9.910552631578947e-05, "loss": 0.5151, "step": 23410 }, { "epoch": 1.3109530742524358, "grad_norm": 1.278806209564209, "learning_rate": 9.910526315789475e-05, "loss": 0.5051, "step": 23411 }, { "epoch": 1.3110090715645648, "grad_norm": 1.4368467330932617, "learning_rate": 9.910500000000001e-05, "loss": 0.447, "step": 23412 }, { "epoch": 1.3110650688766938, "grad_norm": 1.5844480991363525, "learning_rate": 9.910473684210527e-05, "loss": 0.6849, "step": 23413 }, { "epoch": 1.3111210661888228, "grad_norm": 1.3631752729415894, "learning_rate": 9.910447368421053e-05, "loss": 0.5, "step": 23414 }, { "epoch": 1.3111770635009519, "grad_norm": 1.7622642517089844, "learning_rate": 9.91042105263158e-05, "loss": 0.7695, "step": 23415 }, { "epoch": 1.3112330608130809, "grad_norm": 1.1892203092575073, "learning_rate": 9.910394736842106e-05, "loss": 0.3501, "step": 23416 }, { "epoch": 1.31128905812521, "grad_norm": 1.4493550062179565, "learning_rate": 9.910368421052632e-05, "loss": 0.5148, "step": 23417 }, { "epoch": 1.311345055437339, "grad_norm": 2.0674352645874023, "learning_rate": 9.910342105263158e-05, "loss": 0.3919, "step": 23418 }, { "epoch": 1.311401052749468, "grad_norm": 1.2321887016296387, "learning_rate": 9.910315789473684e-05, "loss": 0.4374, "step": 23419 }, { "epoch": 1.311457050061597, "grad_norm": 1.5014137029647827, "learning_rate": 9.910289473684211e-05, "loss": 0.5516, "step": 23420 }, { "epoch": 1.311513047373726, "grad_norm": 1.337637186050415, "learning_rate": 9.910263157894737e-05, "loss": 0.4281, "step": 23421 }, { "epoch": 1.311569044685855, "grad_norm": 1.747450351715088, "learning_rate": 9.910236842105263e-05, "loss": 0.594, "step": 23422 }, { "epoch": 1.311625041997984, "grad_norm": 1.2766363620758057, "learning_rate": 9.910210526315789e-05, "loss": 0.4447, "step": 23423 }, { "epoch": 1.311681039310113, "grad_norm": 1.3487766981124878, "learning_rate": 9.910184210526316e-05, "loss": 0.5729, "step": 23424 }, { "epoch": 1.311737036622242, "grad_norm": 1.3309218883514404, "learning_rate": 9.910157894736842e-05, "loss": 0.5082, "step": 23425 }, { "epoch": 1.311793033934371, "grad_norm": 1.2421057224273682, "learning_rate": 9.91013157894737e-05, "loss": 0.423, "step": 23426 }, { "epoch": 1.3118490312465, "grad_norm": 1.2512723207473755, "learning_rate": 9.910105263157894e-05, "loss": 0.3813, "step": 23427 }, { "epoch": 1.3119050285586291, "grad_norm": 1.492981195449829, "learning_rate": 9.910078947368422e-05, "loss": 0.5883, "step": 23428 }, { "epoch": 1.3119610258707581, "grad_norm": 1.3836313486099243, "learning_rate": 9.910052631578948e-05, "loss": 0.4316, "step": 23429 }, { "epoch": 1.3120170231828872, "grad_norm": 1.6830849647521973, "learning_rate": 9.910026315789475e-05, "loss": 0.5704, "step": 23430 }, { "epoch": 1.3120730204950162, "grad_norm": 1.978153944015503, "learning_rate": 9.910000000000001e-05, "loss": 0.5258, "step": 23431 }, { "epoch": 1.3121290178071452, "grad_norm": 1.2363853454589844, "learning_rate": 9.909973684210527e-05, "loss": 0.4237, "step": 23432 }, { "epoch": 1.3121850151192742, "grad_norm": 1.2318207025527954, "learning_rate": 9.909947368421053e-05, "loss": 0.4274, "step": 23433 }, { "epoch": 1.3122410124314032, "grad_norm": 1.1699894666671753, "learning_rate": 9.909921052631579e-05, "loss": 0.4445, "step": 23434 }, { "epoch": 1.3122970097435323, "grad_norm": 1.2759298086166382, "learning_rate": 9.909894736842106e-05, "loss": 0.4688, "step": 23435 }, { "epoch": 1.3123530070556613, "grad_norm": 1.3733446598052979, "learning_rate": 9.909868421052632e-05, "loss": 0.4503, "step": 23436 }, { "epoch": 1.3124090043677903, "grad_norm": 1.1985161304473877, "learning_rate": 9.909842105263158e-05, "loss": 0.469, "step": 23437 }, { "epoch": 1.3124650016799193, "grad_norm": 1.4328134059906006, "learning_rate": 9.909815789473684e-05, "loss": 0.4739, "step": 23438 }, { "epoch": 1.3125209989920483, "grad_norm": 1.2340866327285767, "learning_rate": 9.909789473684211e-05, "loss": 0.415, "step": 23439 }, { "epoch": 1.3125769963041773, "grad_norm": 1.4035005569458008, "learning_rate": 9.909763157894737e-05, "loss": 0.3763, "step": 23440 }, { "epoch": 1.3126329936163064, "grad_norm": 1.3108558654785156, "learning_rate": 9.909736842105263e-05, "loss": 0.371, "step": 23441 }, { "epoch": 1.3126889909284354, "grad_norm": 1.7617404460906982, "learning_rate": 9.90971052631579e-05, "loss": 0.5191, "step": 23442 }, { "epoch": 1.3127449882405644, "grad_norm": 1.3765901327133179, "learning_rate": 9.909684210526317e-05, "loss": 0.6761, "step": 23443 }, { "epoch": 1.3128009855526934, "grad_norm": 1.166473150253296, "learning_rate": 9.909657894736843e-05, "loss": 0.4358, "step": 23444 }, { "epoch": 1.3128569828648224, "grad_norm": 1.3755375146865845, "learning_rate": 9.909631578947369e-05, "loss": 0.4297, "step": 23445 }, { "epoch": 1.3129129801769515, "grad_norm": 1.3300050497055054, "learning_rate": 9.909605263157895e-05, "loss": 0.4262, "step": 23446 }, { "epoch": 1.3129689774890805, "grad_norm": 1.282021403312683, "learning_rate": 9.909578947368422e-05, "loss": 0.4449, "step": 23447 }, { "epoch": 1.3130249748012095, "grad_norm": 1.2514032125473022, "learning_rate": 9.909552631578948e-05, "loss": 0.5067, "step": 23448 }, { "epoch": 1.3130809721133385, "grad_norm": 1.432243824005127, "learning_rate": 9.909526315789475e-05, "loss": 0.4014, "step": 23449 }, { "epoch": 1.3131369694254675, "grad_norm": 1.4824650287628174, "learning_rate": 9.9095e-05, "loss": 0.4567, "step": 23450 }, { "epoch": 1.3131929667375966, "grad_norm": 2.4755280017852783, "learning_rate": 9.909473684210526e-05, "loss": 0.5068, "step": 23451 }, { "epoch": 1.3132489640497256, "grad_norm": 1.7152857780456543, "learning_rate": 9.909447368421053e-05, "loss": 0.4997, "step": 23452 }, { "epoch": 1.3133049613618546, "grad_norm": 1.4264469146728516, "learning_rate": 9.909421052631579e-05, "loss": 0.475, "step": 23453 }, { "epoch": 1.3133609586739836, "grad_norm": 1.1713494062423706, "learning_rate": 9.909394736842107e-05, "loss": 0.317, "step": 23454 }, { "epoch": 1.3134169559861126, "grad_norm": 1.0777666568756104, "learning_rate": 9.909368421052631e-05, "loss": 0.3438, "step": 23455 }, { "epoch": 1.3134729532982417, "grad_norm": 1.5504034757614136, "learning_rate": 9.909342105263158e-05, "loss": 0.7497, "step": 23456 }, { "epoch": 1.3135289506103707, "grad_norm": 1.3663454055786133, "learning_rate": 9.909315789473684e-05, "loss": 0.4087, "step": 23457 }, { "epoch": 1.3135849479224997, "grad_norm": 1.5986995697021484, "learning_rate": 9.909289473684212e-05, "loss": 0.5057, "step": 23458 }, { "epoch": 1.3136409452346287, "grad_norm": 1.3435286283493042, "learning_rate": 9.909263157894736e-05, "loss": 0.3877, "step": 23459 }, { "epoch": 1.3136969425467577, "grad_norm": 1.3977755308151245, "learning_rate": 9.909236842105264e-05, "loss": 0.5025, "step": 23460 }, { "epoch": 1.3137529398588867, "grad_norm": 1.5699728727340698, "learning_rate": 9.90921052631579e-05, "loss": 0.4892, "step": 23461 }, { "epoch": 1.3138089371710158, "grad_norm": 1.2305196523666382, "learning_rate": 9.909184210526317e-05, "loss": 0.4396, "step": 23462 }, { "epoch": 1.3138649344831448, "grad_norm": 1.4703571796417236, "learning_rate": 9.909157894736843e-05, "loss": 0.4488, "step": 23463 }, { "epoch": 1.3139209317952738, "grad_norm": 2.4165940284729004, "learning_rate": 9.909131578947369e-05, "loss": 0.4089, "step": 23464 }, { "epoch": 1.3139769291074028, "grad_norm": 1.211911916732788, "learning_rate": 9.909105263157895e-05, "loss": 0.4259, "step": 23465 }, { "epoch": 1.3140329264195318, "grad_norm": 1.1673394441604614, "learning_rate": 9.909078947368422e-05, "loss": 0.3949, "step": 23466 }, { "epoch": 1.3140889237316609, "grad_norm": 1.3991273641586304, "learning_rate": 9.909052631578948e-05, "loss": 0.4416, "step": 23467 }, { "epoch": 1.3141449210437899, "grad_norm": 9.993056297302246, "learning_rate": 9.909026315789474e-05, "loss": 0.5116, "step": 23468 }, { "epoch": 1.314200918355919, "grad_norm": 1.3935984373092651, "learning_rate": 9.909e-05, "loss": 0.4229, "step": 23469 }, { "epoch": 1.314256915668048, "grad_norm": 1.576490044593811, "learning_rate": 9.908973684210526e-05, "loss": 0.5644, "step": 23470 }, { "epoch": 1.314312912980177, "grad_norm": 1.3270258903503418, "learning_rate": 9.908947368421053e-05, "loss": 0.4396, "step": 23471 }, { "epoch": 1.314368910292306, "grad_norm": 1.487135410308838, "learning_rate": 9.90892105263158e-05, "loss": 0.6049, "step": 23472 }, { "epoch": 1.314424907604435, "grad_norm": 1.3678137063980103, "learning_rate": 9.908894736842105e-05, "loss": 0.5267, "step": 23473 }, { "epoch": 1.314480904916564, "grad_norm": 2.1125478744506836, "learning_rate": 9.908868421052631e-05, "loss": 0.4868, "step": 23474 }, { "epoch": 1.314536902228693, "grad_norm": 1.335037112236023, "learning_rate": 9.908842105263159e-05, "loss": 0.5048, "step": 23475 }, { "epoch": 1.314592899540822, "grad_norm": 1.3628175258636475, "learning_rate": 9.908815789473685e-05, "loss": 0.4408, "step": 23476 }, { "epoch": 1.314648896852951, "grad_norm": 1.4586461782455444, "learning_rate": 9.90878947368421e-05, "loss": 0.5362, "step": 23477 }, { "epoch": 1.31470489416508, "grad_norm": 1.2521742582321167, "learning_rate": 9.908763157894737e-05, "loss": 0.4385, "step": 23478 }, { "epoch": 1.314760891477209, "grad_norm": 1.889302134513855, "learning_rate": 9.908736842105264e-05, "loss": 0.7015, "step": 23479 }, { "epoch": 1.3148168887893381, "grad_norm": 1.3009964227676392, "learning_rate": 9.90871052631579e-05, "loss": 0.4561, "step": 23480 }, { "epoch": 1.3148728861014671, "grad_norm": 1.9913792610168457, "learning_rate": 9.908684210526317e-05, "loss": 0.4249, "step": 23481 }, { "epoch": 1.3149288834135962, "grad_norm": 1.304996132850647, "learning_rate": 9.908657894736842e-05, "loss": 0.5047, "step": 23482 }, { "epoch": 1.3149848807257252, "grad_norm": 1.3131805658340454, "learning_rate": 9.908631578947369e-05, "loss": 0.4517, "step": 23483 }, { "epoch": 1.3150408780378542, "grad_norm": 1.6353704929351807, "learning_rate": 9.908605263157895e-05, "loss": 0.396, "step": 23484 }, { "epoch": 1.3150968753499832, "grad_norm": 1.500420093536377, "learning_rate": 9.908578947368423e-05, "loss": 0.5526, "step": 23485 }, { "epoch": 1.3151528726621122, "grad_norm": 1.3532007932662964, "learning_rate": 9.908552631578948e-05, "loss": 0.4128, "step": 23486 }, { "epoch": 1.3152088699742412, "grad_norm": 1.5647958517074585, "learning_rate": 9.908526315789473e-05, "loss": 0.5633, "step": 23487 }, { "epoch": 1.3152648672863703, "grad_norm": 1.302091121673584, "learning_rate": 9.9085e-05, "loss": 0.4586, "step": 23488 }, { "epoch": 1.3153208645984993, "grad_norm": 1.4732964038848877, "learning_rate": 9.908473684210526e-05, "loss": 0.5431, "step": 23489 }, { "epoch": 1.3153768619106283, "grad_norm": 1.4219179153442383, "learning_rate": 9.908447368421054e-05, "loss": 0.5483, "step": 23490 }, { "epoch": 1.3154328592227573, "grad_norm": 1.3076339960098267, "learning_rate": 9.90842105263158e-05, "loss": 0.4174, "step": 23491 }, { "epoch": 1.3154888565348863, "grad_norm": 1.2017974853515625, "learning_rate": 9.908394736842106e-05, "loss": 0.4726, "step": 23492 }, { "epoch": 1.3155448538470154, "grad_norm": 1.4847337007522583, "learning_rate": 9.908368421052632e-05, "loss": 0.5449, "step": 23493 }, { "epoch": 1.3156008511591444, "grad_norm": 1.6240051984786987, "learning_rate": 9.908342105263159e-05, "loss": 0.4613, "step": 23494 }, { "epoch": 1.3156568484712734, "grad_norm": 1.3387105464935303, "learning_rate": 9.908315789473685e-05, "loss": 0.5762, "step": 23495 }, { "epoch": 1.3157128457834024, "grad_norm": 1.5709962844848633, "learning_rate": 9.908289473684211e-05, "loss": 0.4868, "step": 23496 }, { "epoch": 1.3157688430955314, "grad_norm": 1.442383885383606, "learning_rate": 9.908263157894737e-05, "loss": 0.516, "step": 23497 }, { "epoch": 1.3158248404076605, "grad_norm": 1.1911067962646484, "learning_rate": 9.908236842105264e-05, "loss": 0.4622, "step": 23498 }, { "epoch": 1.3158808377197895, "grad_norm": 1.2875454425811768, "learning_rate": 9.90821052631579e-05, "loss": 0.3478, "step": 23499 }, { "epoch": 1.3159368350319185, "grad_norm": 1.450999140739441, "learning_rate": 9.908184210526316e-05, "loss": 0.3776, "step": 23500 }, { "epoch": 1.3159928323440475, "grad_norm": 1.3420605659484863, "learning_rate": 9.908157894736842e-05, "loss": 0.3914, "step": 23501 }, { "epoch": 1.3160488296561765, "grad_norm": 1.3781684637069702, "learning_rate": 9.90813157894737e-05, "loss": 0.4826, "step": 23502 }, { "epoch": 1.3161048269683056, "grad_norm": 1.4783337116241455, "learning_rate": 9.908105263157895e-05, "loss": 0.4982, "step": 23503 }, { "epoch": 1.3161608242804346, "grad_norm": 1.4523431062698364, "learning_rate": 9.908078947368421e-05, "loss": 0.4266, "step": 23504 }, { "epoch": 1.3162168215925636, "grad_norm": 1.678612470626831, "learning_rate": 9.908052631578947e-05, "loss": 0.5663, "step": 23505 }, { "epoch": 1.3162728189046926, "grad_norm": 1.7157851457595825, "learning_rate": 9.908026315789473e-05, "loss": 0.4389, "step": 23506 }, { "epoch": 1.3163288162168216, "grad_norm": 1.1345454454421997, "learning_rate": 9.908000000000001e-05, "loss": 0.3387, "step": 23507 }, { "epoch": 1.3163848135289506, "grad_norm": 1.5600314140319824, "learning_rate": 9.907973684210527e-05, "loss": 0.4027, "step": 23508 }, { "epoch": 1.3164408108410797, "grad_norm": 1.4446918964385986, "learning_rate": 9.907947368421054e-05, "loss": 0.3934, "step": 23509 }, { "epoch": 1.3164968081532087, "grad_norm": 1.4340838193893433, "learning_rate": 9.907921052631579e-05, "loss": 0.4871, "step": 23510 }, { "epoch": 1.3165528054653377, "grad_norm": 1.360880732536316, "learning_rate": 9.907894736842106e-05, "loss": 0.4362, "step": 23511 }, { "epoch": 1.3166088027774667, "grad_norm": 1.3731484413146973, "learning_rate": 9.907868421052632e-05, "loss": 0.3982, "step": 23512 }, { "epoch": 1.3166648000895957, "grad_norm": 1.2852195501327515, "learning_rate": 9.907842105263159e-05, "loss": 0.4248, "step": 23513 }, { "epoch": 1.3167207974017248, "grad_norm": 1.6905219554901123, "learning_rate": 9.907815789473684e-05, "loss": 0.466, "step": 23514 }, { "epoch": 1.3167767947138538, "grad_norm": 1.1920872926712036, "learning_rate": 9.907789473684211e-05, "loss": 0.3411, "step": 23515 }, { "epoch": 1.3168327920259828, "grad_norm": 1.4498956203460693, "learning_rate": 9.907763157894737e-05, "loss": 0.4507, "step": 23516 }, { "epoch": 1.3168887893381118, "grad_norm": 1.4717071056365967, "learning_rate": 9.907736842105264e-05, "loss": 0.635, "step": 23517 }, { "epoch": 1.3169447866502408, "grad_norm": 1.6787028312683105, "learning_rate": 9.90771052631579e-05, "loss": 0.4732, "step": 23518 }, { "epoch": 1.3170007839623699, "grad_norm": 1.3689689636230469, "learning_rate": 9.907684210526316e-05, "loss": 0.3653, "step": 23519 }, { "epoch": 1.3170567812744989, "grad_norm": 1.5186647176742554, "learning_rate": 9.907657894736842e-05, "loss": 0.5114, "step": 23520 }, { "epoch": 1.317112778586628, "grad_norm": 1.6663926839828491, "learning_rate": 9.907631578947368e-05, "loss": 0.5142, "step": 23521 }, { "epoch": 1.317168775898757, "grad_norm": 1.1840616464614868, "learning_rate": 9.907605263157896e-05, "loss": 0.3912, "step": 23522 }, { "epoch": 1.317224773210886, "grad_norm": 1.6792411804199219, "learning_rate": 9.907578947368422e-05, "loss": 0.4961, "step": 23523 }, { "epoch": 1.317280770523015, "grad_norm": 1.4093555212020874, "learning_rate": 9.907552631578948e-05, "loss": 0.4001, "step": 23524 }, { "epoch": 1.317336767835144, "grad_norm": 1.30805242061615, "learning_rate": 9.907526315789474e-05, "loss": 0.4575, "step": 23525 }, { "epoch": 1.317392765147273, "grad_norm": 1.1401379108428955, "learning_rate": 9.907500000000001e-05, "loss": 0.2999, "step": 23526 }, { "epoch": 1.317448762459402, "grad_norm": 1.3849704265594482, "learning_rate": 9.907473684210527e-05, "loss": 0.419, "step": 23527 }, { "epoch": 1.317504759771531, "grad_norm": 1.422619104385376, "learning_rate": 9.907447368421053e-05, "loss": 0.4429, "step": 23528 }, { "epoch": 1.31756075708366, "grad_norm": 2.4414379596710205, "learning_rate": 9.907421052631579e-05, "loss": 0.5035, "step": 23529 }, { "epoch": 1.317616754395789, "grad_norm": 1.6046645641326904, "learning_rate": 9.907394736842106e-05, "loss": 0.4716, "step": 23530 }, { "epoch": 1.317672751707918, "grad_norm": 1.6596109867095947, "learning_rate": 9.907368421052632e-05, "loss": 0.6331, "step": 23531 }, { "epoch": 1.317728749020047, "grad_norm": 1.4059385061264038, "learning_rate": 9.907342105263158e-05, "loss": 0.6395, "step": 23532 }, { "epoch": 1.3177847463321761, "grad_norm": 1.3787490129470825, "learning_rate": 9.907315789473684e-05, "loss": 0.4719, "step": 23533 }, { "epoch": 1.3178407436443051, "grad_norm": 1.3036671876907349, "learning_rate": 9.907289473684211e-05, "loss": 0.5903, "step": 23534 }, { "epoch": 1.3178967409564342, "grad_norm": 1.2987549304962158, "learning_rate": 9.907263157894737e-05, "loss": 0.4609, "step": 23535 }, { "epoch": 1.3179527382685632, "grad_norm": 1.3021806478500366, "learning_rate": 9.907236842105265e-05, "loss": 0.4061, "step": 23536 }, { "epoch": 1.3180087355806922, "grad_norm": 1.3273011445999146, "learning_rate": 9.90721052631579e-05, "loss": 0.5937, "step": 23537 }, { "epoch": 1.3180647328928212, "grad_norm": 1.6703534126281738, "learning_rate": 9.907184210526315e-05, "loss": 0.6635, "step": 23538 }, { "epoch": 1.3181207302049502, "grad_norm": 1.2274681329727173, "learning_rate": 9.907157894736843e-05, "loss": 0.418, "step": 23539 }, { "epoch": 1.3181767275170793, "grad_norm": 1.3198310136795044, "learning_rate": 9.907131578947369e-05, "loss": 0.516, "step": 23540 }, { "epoch": 1.3182327248292083, "grad_norm": 0.9746121168136597, "learning_rate": 9.907105263157896e-05, "loss": 0.3675, "step": 23541 }, { "epoch": 1.3182887221413373, "grad_norm": 1.48617684841156, "learning_rate": 9.90707894736842e-05, "loss": 0.4091, "step": 23542 }, { "epoch": 1.3183447194534663, "grad_norm": 1.5356407165527344, "learning_rate": 9.907052631578948e-05, "loss": 0.5064, "step": 23543 }, { "epoch": 1.3184007167655953, "grad_norm": 1.353983998298645, "learning_rate": 9.907026315789474e-05, "loss": 0.5867, "step": 23544 }, { "epoch": 1.3184567140777244, "grad_norm": 1.3710651397705078, "learning_rate": 9.907000000000001e-05, "loss": 0.515, "step": 23545 }, { "epoch": 1.3185127113898534, "grad_norm": 1.3370361328125, "learning_rate": 9.906973684210527e-05, "loss": 0.3541, "step": 23546 }, { "epoch": 1.3185687087019824, "grad_norm": 1.3643617630004883, "learning_rate": 9.906947368421053e-05, "loss": 0.3977, "step": 23547 }, { "epoch": 1.3186247060141114, "grad_norm": 1.2992652654647827, "learning_rate": 9.906921052631579e-05, "loss": 0.5181, "step": 23548 }, { "epoch": 1.3186807033262404, "grad_norm": 2.7935121059417725, "learning_rate": 9.906894736842106e-05, "loss": 0.4438, "step": 23549 }, { "epoch": 1.3187367006383695, "grad_norm": 1.092849612236023, "learning_rate": 9.906868421052632e-05, "loss": 0.3636, "step": 23550 }, { "epoch": 1.3187926979504985, "grad_norm": 1.3572300672531128, "learning_rate": 9.906842105263158e-05, "loss": 0.3727, "step": 23551 }, { "epoch": 1.3188486952626275, "grad_norm": 1.3177443742752075, "learning_rate": 9.906815789473684e-05, "loss": 0.4337, "step": 23552 }, { "epoch": 1.3189046925747565, "grad_norm": 1.4217324256896973, "learning_rate": 9.906789473684212e-05, "loss": 0.4441, "step": 23553 }, { "epoch": 1.3189606898868855, "grad_norm": 1.5611952543258667, "learning_rate": 9.906763157894738e-05, "loss": 0.4666, "step": 23554 }, { "epoch": 1.3190166871990145, "grad_norm": 1.3093900680541992, "learning_rate": 9.906736842105264e-05, "loss": 0.4245, "step": 23555 }, { "epoch": 1.3190726845111436, "grad_norm": 1.3968415260314941, "learning_rate": 9.90671052631579e-05, "loss": 0.4779, "step": 23556 }, { "epoch": 1.3191286818232726, "grad_norm": 1.3176441192626953, "learning_rate": 9.906684210526316e-05, "loss": 0.5966, "step": 23557 }, { "epoch": 1.3191846791354016, "grad_norm": 1.1914488077163696, "learning_rate": 9.906657894736843e-05, "loss": 0.3269, "step": 23558 }, { "epoch": 1.3192406764475306, "grad_norm": 1.5402307510375977, "learning_rate": 9.906631578947369e-05, "loss": 0.4766, "step": 23559 }, { "epoch": 1.3192966737596596, "grad_norm": 1.4681423902511597, "learning_rate": 9.906605263157895e-05, "loss": 0.6561, "step": 23560 }, { "epoch": 1.3193526710717887, "grad_norm": 1.2147117853164673, "learning_rate": 9.906578947368421e-05, "loss": 0.3518, "step": 23561 }, { "epoch": 1.3194086683839177, "grad_norm": 1.1922838687896729, "learning_rate": 9.906552631578948e-05, "loss": 0.4168, "step": 23562 }, { "epoch": 1.3194646656960467, "grad_norm": 1.4813134670257568, "learning_rate": 9.906526315789474e-05, "loss": 0.4051, "step": 23563 }, { "epoch": 1.3195206630081757, "grad_norm": 1.6255486011505127, "learning_rate": 9.9065e-05, "loss": 0.6109, "step": 23564 }, { "epoch": 1.3195766603203047, "grad_norm": 1.5070154666900635, "learning_rate": 9.906473684210526e-05, "loss": 0.5359, "step": 23565 }, { "epoch": 1.3196326576324338, "grad_norm": 1.2078232765197754, "learning_rate": 9.906447368421053e-05, "loss": 0.5, "step": 23566 }, { "epoch": 1.3196886549445628, "grad_norm": 1.221826195716858, "learning_rate": 9.90642105263158e-05, "loss": 0.4715, "step": 23567 }, { "epoch": 1.3197446522566918, "grad_norm": 1.1665562391281128, "learning_rate": 9.906394736842107e-05, "loss": 0.4743, "step": 23568 }, { "epoch": 1.3198006495688208, "grad_norm": 1.7769436836242676, "learning_rate": 9.906368421052631e-05, "loss": 0.651, "step": 23569 }, { "epoch": 1.3198566468809498, "grad_norm": 1.6282001733779907, "learning_rate": 9.906342105263159e-05, "loss": 0.5087, "step": 23570 }, { "epoch": 1.3199126441930789, "grad_norm": 1.4367485046386719, "learning_rate": 9.906315789473685e-05, "loss": 0.5163, "step": 23571 }, { "epoch": 1.3199686415052079, "grad_norm": 1.1818881034851074, "learning_rate": 9.90628947368421e-05, "loss": 0.4329, "step": 23572 }, { "epoch": 1.320024638817337, "grad_norm": 1.3425483703613281, "learning_rate": 9.906263157894738e-05, "loss": 0.4593, "step": 23573 }, { "epoch": 1.320080636129466, "grad_norm": 1.2086644172668457, "learning_rate": 9.906236842105263e-05, "loss": 0.497, "step": 23574 }, { "epoch": 1.320136633441595, "grad_norm": 1.4390406608581543, "learning_rate": 9.90621052631579e-05, "loss": 0.607, "step": 23575 }, { "epoch": 1.320192630753724, "grad_norm": 1.5852943658828735, "learning_rate": 9.906184210526316e-05, "loss": 0.4744, "step": 23576 }, { "epoch": 1.320248628065853, "grad_norm": 1.4295347929000854, "learning_rate": 9.906157894736843e-05, "loss": 0.3604, "step": 23577 }, { "epoch": 1.320304625377982, "grad_norm": 1.4102352857589722, "learning_rate": 9.906131578947369e-05, "loss": 0.4666, "step": 23578 }, { "epoch": 1.320360622690111, "grad_norm": 1.2955905199050903, "learning_rate": 9.906105263157895e-05, "loss": 0.3701, "step": 23579 }, { "epoch": 1.32041662000224, "grad_norm": 1.466480016708374, "learning_rate": 9.906078947368421e-05, "loss": 0.7739, "step": 23580 }, { "epoch": 1.320472617314369, "grad_norm": 1.4667768478393555, "learning_rate": 9.906052631578948e-05, "loss": 0.6104, "step": 23581 }, { "epoch": 1.320528614626498, "grad_norm": 1.45987868309021, "learning_rate": 9.906026315789474e-05, "loss": 0.5134, "step": 23582 }, { "epoch": 1.320584611938627, "grad_norm": 1.2193151712417603, "learning_rate": 9.906e-05, "loss": 0.5586, "step": 23583 }, { "epoch": 1.3206406092507559, "grad_norm": 1.5995523929595947, "learning_rate": 9.905973684210526e-05, "loss": 0.4621, "step": 23584 }, { "epoch": 1.320696606562885, "grad_norm": 1.2353379726409912, "learning_rate": 9.905947368421054e-05, "loss": 0.514, "step": 23585 }, { "epoch": 1.320752603875014, "grad_norm": 1.2061514854431152, "learning_rate": 9.90592105263158e-05, "loss": 0.4795, "step": 23586 }, { "epoch": 1.320808601187143, "grad_norm": 1.2943384647369385, "learning_rate": 9.905894736842106e-05, "loss": 0.3866, "step": 23587 }, { "epoch": 1.320864598499272, "grad_norm": 1.351973056793213, "learning_rate": 9.905868421052632e-05, "loss": 0.4919, "step": 23588 }, { "epoch": 1.320920595811401, "grad_norm": 1.2900298833847046, "learning_rate": 9.905842105263159e-05, "loss": 0.3786, "step": 23589 }, { "epoch": 1.32097659312353, "grad_norm": 1.6199102401733398, "learning_rate": 9.905815789473685e-05, "loss": 0.523, "step": 23590 }, { "epoch": 1.321032590435659, "grad_norm": 1.2421436309814453, "learning_rate": 9.905789473684211e-05, "loss": 0.5326, "step": 23591 }, { "epoch": 1.321088587747788, "grad_norm": 1.2802703380584717, "learning_rate": 9.905763157894737e-05, "loss": 0.4736, "step": 23592 }, { "epoch": 1.321144585059917, "grad_norm": 1.3789219856262207, "learning_rate": 9.905736842105263e-05, "loss": 0.5246, "step": 23593 }, { "epoch": 1.321200582372046, "grad_norm": 2.457754373550415, "learning_rate": 9.90571052631579e-05, "loss": 0.6164, "step": 23594 }, { "epoch": 1.321256579684175, "grad_norm": 1.5327216386795044, "learning_rate": 9.905684210526316e-05, "loss": 0.4155, "step": 23595 }, { "epoch": 1.3213125769963041, "grad_norm": 1.3117456436157227, "learning_rate": 9.905657894736843e-05, "loss": 0.502, "step": 23596 }, { "epoch": 1.3213685743084331, "grad_norm": 1.27993905544281, "learning_rate": 9.905631578947368e-05, "loss": 0.4094, "step": 23597 }, { "epoch": 1.3214245716205621, "grad_norm": 1.54668128490448, "learning_rate": 9.905605263157895e-05, "loss": 0.3735, "step": 23598 }, { "epoch": 1.3214805689326912, "grad_norm": 2.8389892578125, "learning_rate": 9.905578947368421e-05, "loss": 0.5307, "step": 23599 }, { "epoch": 1.3215365662448202, "grad_norm": 1.3819185495376587, "learning_rate": 9.905552631578949e-05, "loss": 0.5652, "step": 23600 }, { "epoch": 1.3215925635569492, "grad_norm": 1.5571867227554321, "learning_rate": 9.905526315789475e-05, "loss": 0.5315, "step": 23601 }, { "epoch": 1.3216485608690782, "grad_norm": 1.5231329202651978, "learning_rate": 9.9055e-05, "loss": 0.4304, "step": 23602 }, { "epoch": 1.3217045581812072, "grad_norm": 1.5173362493515015, "learning_rate": 9.905473684210527e-05, "loss": 0.3471, "step": 23603 }, { "epoch": 1.3217605554933363, "grad_norm": 1.2694761753082275, "learning_rate": 9.905447368421054e-05, "loss": 0.5271, "step": 23604 }, { "epoch": 1.3218165528054653, "grad_norm": 1.2435005903244019, "learning_rate": 9.90542105263158e-05, "loss": 0.3912, "step": 23605 }, { "epoch": 1.3218725501175943, "grad_norm": 1.2682331800460815, "learning_rate": 9.905394736842106e-05, "loss": 0.4828, "step": 23606 }, { "epoch": 1.3219285474297233, "grad_norm": 1.5103245973587036, "learning_rate": 9.905368421052632e-05, "loss": 0.4391, "step": 23607 }, { "epoch": 1.3219845447418523, "grad_norm": 1.295146107673645, "learning_rate": 9.905342105263158e-05, "loss": 0.3748, "step": 23608 }, { "epoch": 1.3220405420539814, "grad_norm": 1.4384539127349854, "learning_rate": 9.905315789473685e-05, "loss": 0.5867, "step": 23609 }, { "epoch": 1.3220965393661104, "grad_norm": 1.448047399520874, "learning_rate": 9.905289473684211e-05, "loss": 0.4441, "step": 23610 }, { "epoch": 1.3221525366782394, "grad_norm": 1.8027104139328003, "learning_rate": 9.905263157894737e-05, "loss": 0.5361, "step": 23611 }, { "epoch": 1.3222085339903684, "grad_norm": 1.6681878566741943, "learning_rate": 9.905236842105263e-05, "loss": 0.5267, "step": 23612 }, { "epoch": 1.3222645313024974, "grad_norm": 1.4393134117126465, "learning_rate": 9.90521052631579e-05, "loss": 0.7328, "step": 23613 }, { "epoch": 1.3223205286146265, "grad_norm": 1.2529213428497314, "learning_rate": 9.905184210526316e-05, "loss": 0.4681, "step": 23614 }, { "epoch": 1.3223765259267555, "grad_norm": 1.8222781419754028, "learning_rate": 9.905157894736842e-05, "loss": 0.5243, "step": 23615 }, { "epoch": 1.3224325232388845, "grad_norm": 1.4331270456314087, "learning_rate": 9.905131578947368e-05, "loss": 0.4813, "step": 23616 }, { "epoch": 1.3224885205510135, "grad_norm": 1.3592449426651, "learning_rate": 9.905105263157896e-05, "loss": 0.5771, "step": 23617 }, { "epoch": 1.3225445178631425, "grad_norm": 1.7799861431121826, "learning_rate": 9.905078947368422e-05, "loss": 0.6256, "step": 23618 }, { "epoch": 1.3226005151752716, "grad_norm": 1.8847559690475464, "learning_rate": 9.905052631578948e-05, "loss": 0.6124, "step": 23619 }, { "epoch": 1.3226565124874006, "grad_norm": 1.9002000093460083, "learning_rate": 9.905026315789474e-05, "loss": 0.5131, "step": 23620 }, { "epoch": 1.3227125097995296, "grad_norm": 1.2628037929534912, "learning_rate": 9.905000000000001e-05, "loss": 0.5107, "step": 23621 }, { "epoch": 1.3227685071116586, "grad_norm": 1.39218270778656, "learning_rate": 9.904973684210527e-05, "loss": 0.4912, "step": 23622 }, { "epoch": 1.3228245044237876, "grad_norm": 1.3301101922988892, "learning_rate": 9.904947368421054e-05, "loss": 0.4268, "step": 23623 }, { "epoch": 1.3228805017359166, "grad_norm": 1.2446775436401367, "learning_rate": 9.904921052631579e-05, "loss": 0.3518, "step": 23624 }, { "epoch": 1.3229364990480457, "grad_norm": 1.6048548221588135, "learning_rate": 9.904894736842105e-05, "loss": 0.5149, "step": 23625 }, { "epoch": 1.3229924963601747, "grad_norm": 1.354109764099121, "learning_rate": 9.904868421052632e-05, "loss": 0.4675, "step": 23626 }, { "epoch": 1.3230484936723037, "grad_norm": 1.8846253156661987, "learning_rate": 9.904842105263158e-05, "loss": 0.48, "step": 23627 }, { "epoch": 1.3231044909844327, "grad_norm": 1.7262190580368042, "learning_rate": 9.904815789473685e-05, "loss": 0.6207, "step": 23628 }, { "epoch": 1.3231604882965617, "grad_norm": 1.422507405281067, "learning_rate": 9.90478947368421e-05, "loss": 0.4041, "step": 23629 }, { "epoch": 1.3232164856086908, "grad_norm": 1.5060955286026, "learning_rate": 9.904763157894737e-05, "loss": 0.454, "step": 23630 }, { "epoch": 1.3232724829208198, "grad_norm": 1.2696168422698975, "learning_rate": 9.904736842105263e-05, "loss": 0.4178, "step": 23631 }, { "epoch": 1.3233284802329488, "grad_norm": 18.032329559326172, "learning_rate": 9.90471052631579e-05, "loss": 0.4343, "step": 23632 }, { "epoch": 1.3233844775450778, "grad_norm": 1.286111831665039, "learning_rate": 9.904684210526317e-05, "loss": 0.5, "step": 23633 }, { "epoch": 1.3234404748572068, "grad_norm": 1.2645292282104492, "learning_rate": 9.904657894736843e-05, "loss": 0.3458, "step": 23634 }, { "epoch": 1.3234964721693359, "grad_norm": 1.7569935321807861, "learning_rate": 9.904631578947369e-05, "loss": 0.4716, "step": 23635 }, { "epoch": 1.3235524694814649, "grad_norm": 5.75446891784668, "learning_rate": 9.904605263157896e-05, "loss": 0.4636, "step": 23636 }, { "epoch": 1.323608466793594, "grad_norm": 1.3120683431625366, "learning_rate": 9.904578947368422e-05, "loss": 0.5186, "step": 23637 }, { "epoch": 1.323664464105723, "grad_norm": 1.477587103843689, "learning_rate": 9.904552631578948e-05, "loss": 0.3833, "step": 23638 }, { "epoch": 1.323720461417852, "grad_norm": 2.1010634899139404, "learning_rate": 9.904526315789474e-05, "loss": 0.5562, "step": 23639 }, { "epoch": 1.323776458729981, "grad_norm": 1.5339431762695312, "learning_rate": 9.904500000000001e-05, "loss": 0.4913, "step": 23640 }, { "epoch": 1.32383245604211, "grad_norm": 1.4985063076019287, "learning_rate": 9.904473684210527e-05, "loss": 0.4716, "step": 23641 }, { "epoch": 1.323888453354239, "grad_norm": 1.2650494575500488, "learning_rate": 9.904447368421053e-05, "loss": 0.4502, "step": 23642 }, { "epoch": 1.323944450666368, "grad_norm": 1.399677038192749, "learning_rate": 9.904421052631579e-05, "loss": 0.453, "step": 23643 }, { "epoch": 1.324000447978497, "grad_norm": 1.3583924770355225, "learning_rate": 9.904394736842105e-05, "loss": 0.4261, "step": 23644 }, { "epoch": 1.324056445290626, "grad_norm": 1.2457411289215088, "learning_rate": 9.904368421052632e-05, "loss": 0.4543, "step": 23645 }, { "epoch": 1.324112442602755, "grad_norm": 1.3103523254394531, "learning_rate": 9.904342105263158e-05, "loss": 0.4961, "step": 23646 }, { "epoch": 1.324168439914884, "grad_norm": 2.0466365814208984, "learning_rate": 9.904315789473684e-05, "loss": 0.5445, "step": 23647 }, { "epoch": 1.324224437227013, "grad_norm": 1.5205439329147339, "learning_rate": 9.90428947368421e-05, "loss": 0.4668, "step": 23648 }, { "epoch": 1.3242804345391421, "grad_norm": 1.236509919166565, "learning_rate": 9.904263157894738e-05, "loss": 0.428, "step": 23649 }, { "epoch": 1.3243364318512711, "grad_norm": 1.7488242387771606, "learning_rate": 9.904236842105264e-05, "loss": 0.523, "step": 23650 }, { "epoch": 1.3243924291634002, "grad_norm": 1.2344638109207153, "learning_rate": 9.904210526315791e-05, "loss": 0.6077, "step": 23651 }, { "epoch": 1.3244484264755292, "grad_norm": 1.4946460723876953, "learning_rate": 9.904184210526316e-05, "loss": 0.4272, "step": 23652 }, { "epoch": 1.3245044237876582, "grad_norm": 1.2750486135482788, "learning_rate": 9.904157894736843e-05, "loss": 0.3809, "step": 23653 }, { "epoch": 1.3245604210997872, "grad_norm": 1.2810460329055786, "learning_rate": 9.904131578947369e-05, "loss": 0.45, "step": 23654 }, { "epoch": 1.3246164184119162, "grad_norm": 1.2462742328643799, "learning_rate": 9.904105263157896e-05, "loss": 0.452, "step": 23655 }, { "epoch": 1.3246724157240453, "grad_norm": 1.3404079675674438, "learning_rate": 9.904078947368422e-05, "loss": 0.426, "step": 23656 }, { "epoch": 1.3247284130361743, "grad_norm": 2.8471829891204834, "learning_rate": 9.904052631578948e-05, "loss": 0.4118, "step": 23657 }, { "epoch": 1.3247844103483033, "grad_norm": 1.2869397401809692, "learning_rate": 9.904026315789474e-05, "loss": 0.3979, "step": 23658 }, { "epoch": 1.3248404076604323, "grad_norm": 1.3661129474639893, "learning_rate": 9.904e-05, "loss": 0.5137, "step": 23659 }, { "epoch": 1.3248964049725613, "grad_norm": 1.7654578685760498, "learning_rate": 9.903973684210527e-05, "loss": 0.5692, "step": 23660 }, { "epoch": 1.3249524022846904, "grad_norm": 1.4775665998458862, "learning_rate": 9.903947368421052e-05, "loss": 0.4675, "step": 23661 }, { "epoch": 1.3250083995968194, "grad_norm": 1.3470083475112915, "learning_rate": 9.903921052631579e-05, "loss": 0.461, "step": 23662 }, { "epoch": 1.3250643969089484, "grad_norm": 1.2037103176116943, "learning_rate": 9.903894736842105e-05, "loss": 0.3816, "step": 23663 }, { "epoch": 1.3251203942210774, "grad_norm": 1.2058035135269165, "learning_rate": 9.903868421052633e-05, "loss": 0.372, "step": 23664 }, { "epoch": 1.3251763915332064, "grad_norm": 1.8195207118988037, "learning_rate": 9.903842105263159e-05, "loss": 0.5475, "step": 23665 }, { "epoch": 1.3252323888453355, "grad_norm": 1.3020286560058594, "learning_rate": 9.903815789473685e-05, "loss": 0.4242, "step": 23666 }, { "epoch": 1.3252883861574645, "grad_norm": 1.6143763065338135, "learning_rate": 9.90378947368421e-05, "loss": 0.4134, "step": 23667 }, { "epoch": 1.3253443834695935, "grad_norm": 1.2927172183990479, "learning_rate": 9.903763157894738e-05, "loss": 0.3848, "step": 23668 }, { "epoch": 1.3254003807817225, "grad_norm": 1.150124430656433, "learning_rate": 9.903736842105264e-05, "loss": 0.44, "step": 23669 }, { "epoch": 1.3254563780938515, "grad_norm": 1.4438631534576416, "learning_rate": 9.90371052631579e-05, "loss": 0.4656, "step": 23670 }, { "epoch": 1.3255123754059805, "grad_norm": 1.6583176851272583, "learning_rate": 9.903684210526316e-05, "loss": 0.5393, "step": 23671 }, { "epoch": 1.3255683727181096, "grad_norm": 1.572582721710205, "learning_rate": 9.903657894736843e-05, "loss": 0.4246, "step": 23672 }, { "epoch": 1.3256243700302386, "grad_norm": 1.8211469650268555, "learning_rate": 9.903631578947369e-05, "loss": 0.5976, "step": 23673 }, { "epoch": 1.3256803673423676, "grad_norm": 1.1383578777313232, "learning_rate": 9.903605263157895e-05, "loss": 0.3202, "step": 23674 }, { "epoch": 1.3257363646544966, "grad_norm": 1.207336664199829, "learning_rate": 9.903578947368421e-05, "loss": 0.4443, "step": 23675 }, { "epoch": 1.3257923619666256, "grad_norm": 1.387158751487732, "learning_rate": 9.903552631578947e-05, "loss": 0.5514, "step": 23676 }, { "epoch": 1.3258483592787547, "grad_norm": 1.4295538663864136, "learning_rate": 9.903526315789474e-05, "loss": 0.4565, "step": 23677 }, { "epoch": 1.3259043565908837, "grad_norm": 1.262218952178955, "learning_rate": 9.9035e-05, "loss": 0.3952, "step": 23678 }, { "epoch": 1.3259603539030127, "grad_norm": 2.4699628353118896, "learning_rate": 9.903473684210526e-05, "loss": 0.571, "step": 23679 }, { "epoch": 1.3260163512151417, "grad_norm": 1.5111093521118164, "learning_rate": 9.903447368421052e-05, "loss": 0.4995, "step": 23680 }, { "epoch": 1.3260723485272707, "grad_norm": 1.4468804597854614, "learning_rate": 9.90342105263158e-05, "loss": 0.4995, "step": 23681 }, { "epoch": 1.3261283458393998, "grad_norm": 1.4177590608596802, "learning_rate": 9.903394736842106e-05, "loss": 0.4481, "step": 23682 }, { "epoch": 1.3261843431515288, "grad_norm": 1.2426563501358032, "learning_rate": 9.903368421052633e-05, "loss": 0.6114, "step": 23683 }, { "epoch": 1.3262403404636578, "grad_norm": 1.3937419652938843, "learning_rate": 9.903342105263157e-05, "loss": 0.5069, "step": 23684 }, { "epoch": 1.3262963377757868, "grad_norm": 1.3842486143112183, "learning_rate": 9.903315789473685e-05, "loss": 0.5955, "step": 23685 }, { "epoch": 1.3263523350879158, "grad_norm": 1.3546769618988037, "learning_rate": 9.903289473684211e-05, "loss": 0.4685, "step": 23686 }, { "epoch": 1.3264083324000449, "grad_norm": 1.254425287246704, "learning_rate": 9.903263157894738e-05, "loss": 0.5517, "step": 23687 }, { "epoch": 1.3264643297121739, "grad_norm": 1.3651771545410156, "learning_rate": 9.903236842105264e-05, "loss": 0.4507, "step": 23688 }, { "epoch": 1.326520327024303, "grad_norm": 1.5061709880828857, "learning_rate": 9.90321052631579e-05, "loss": 0.5048, "step": 23689 }, { "epoch": 1.326576324336432, "grad_norm": 1.3575242757797241, "learning_rate": 9.903184210526316e-05, "loss": 0.5553, "step": 23690 }, { "epoch": 1.326632321648561, "grad_norm": 1.389123558998108, "learning_rate": 9.903157894736843e-05, "loss": 0.5087, "step": 23691 }, { "epoch": 1.32668831896069, "grad_norm": 1.5058927536010742, "learning_rate": 9.90313157894737e-05, "loss": 0.4397, "step": 23692 }, { "epoch": 1.326744316272819, "grad_norm": 1.1221836805343628, "learning_rate": 9.903105263157895e-05, "loss": 0.4602, "step": 23693 }, { "epoch": 1.326800313584948, "grad_norm": 1.5292891263961792, "learning_rate": 9.903078947368421e-05, "loss": 0.6224, "step": 23694 }, { "epoch": 1.326856310897077, "grad_norm": 1.6511558294296265, "learning_rate": 9.903052631578947e-05, "loss": 0.4988, "step": 23695 }, { "epoch": 1.326912308209206, "grad_norm": 1.4084031581878662, "learning_rate": 9.903026315789475e-05, "loss": 0.4657, "step": 23696 }, { "epoch": 1.326968305521335, "grad_norm": 1.500809669494629, "learning_rate": 9.903e-05, "loss": 0.5624, "step": 23697 }, { "epoch": 1.327024302833464, "grad_norm": 1.824904441833496, "learning_rate": 9.902973684210527e-05, "loss": 0.5121, "step": 23698 }, { "epoch": 1.327080300145593, "grad_norm": 1.3979873657226562, "learning_rate": 9.902947368421053e-05, "loss": 0.3647, "step": 23699 }, { "epoch": 1.327136297457722, "grad_norm": 1.4668703079223633, "learning_rate": 9.90292105263158e-05, "loss": 0.5255, "step": 23700 }, { "epoch": 1.3271922947698511, "grad_norm": 1.708409070968628, "learning_rate": 9.902894736842106e-05, "loss": 0.5224, "step": 23701 }, { "epoch": 1.3272482920819801, "grad_norm": 1.5218987464904785, "learning_rate": 9.902868421052632e-05, "loss": 0.373, "step": 23702 }, { "epoch": 1.3273042893941092, "grad_norm": 1.4584457874298096, "learning_rate": 9.902842105263158e-05, "loss": 0.4978, "step": 23703 }, { "epoch": 1.3273602867062382, "grad_norm": 1.9057399034500122, "learning_rate": 9.902815789473685e-05, "loss": 0.4955, "step": 23704 }, { "epoch": 1.3274162840183672, "grad_norm": 1.1189838647842407, "learning_rate": 9.902789473684211e-05, "loss": 0.3878, "step": 23705 }, { "epoch": 1.3274722813304962, "grad_norm": 1.8672751188278198, "learning_rate": 9.902763157894738e-05, "loss": 0.4425, "step": 23706 }, { "epoch": 1.3275282786426252, "grad_norm": 1.2922204732894897, "learning_rate": 9.902736842105263e-05, "loss": 0.4279, "step": 23707 }, { "epoch": 1.327584275954754, "grad_norm": 1.1615028381347656, "learning_rate": 9.90271052631579e-05, "loss": 0.3908, "step": 23708 }, { "epoch": 1.327640273266883, "grad_norm": 1.163578748703003, "learning_rate": 9.902684210526316e-05, "loss": 0.4818, "step": 23709 }, { "epoch": 1.327696270579012, "grad_norm": 1.2902050018310547, "learning_rate": 9.902657894736844e-05, "loss": 0.4775, "step": 23710 }, { "epoch": 1.327752267891141, "grad_norm": 1.1960757970809937, "learning_rate": 9.90263157894737e-05, "loss": 0.3787, "step": 23711 }, { "epoch": 1.32780826520327, "grad_norm": 1.5162867307662964, "learning_rate": 9.902605263157894e-05, "loss": 0.5849, "step": 23712 }, { "epoch": 1.3278642625153991, "grad_norm": 1.2826370000839233, "learning_rate": 9.902578947368422e-05, "loss": 0.4882, "step": 23713 }, { "epoch": 1.3279202598275281, "grad_norm": 1.2803281545639038, "learning_rate": 9.902552631578948e-05, "loss": 0.4433, "step": 23714 }, { "epoch": 1.3279762571396572, "grad_norm": 2.0065600872039795, "learning_rate": 9.902526315789475e-05, "loss": 0.5693, "step": 23715 }, { "epoch": 1.3280322544517862, "grad_norm": 1.1913907527923584, "learning_rate": 9.9025e-05, "loss": 0.4042, "step": 23716 }, { "epoch": 1.3280882517639152, "grad_norm": 1.5494247674942017, "learning_rate": 9.902473684210527e-05, "loss": 0.4673, "step": 23717 }, { "epoch": 1.3281442490760442, "grad_norm": 1.230475664138794, "learning_rate": 9.902447368421053e-05, "loss": 0.4799, "step": 23718 }, { "epoch": 1.3282002463881732, "grad_norm": 1.455629587173462, "learning_rate": 9.90242105263158e-05, "loss": 0.5006, "step": 23719 }, { "epoch": 1.3282562437003023, "grad_norm": 1.6144088506698608, "learning_rate": 9.902394736842106e-05, "loss": 0.4582, "step": 23720 }, { "epoch": 1.3283122410124313, "grad_norm": 1.2861231565475464, "learning_rate": 9.902368421052632e-05, "loss": 0.4438, "step": 23721 }, { "epoch": 1.3283682383245603, "grad_norm": 1.314123272895813, "learning_rate": 9.902342105263158e-05, "loss": 0.4057, "step": 23722 }, { "epoch": 1.3284242356366893, "grad_norm": 1.452102780342102, "learning_rate": 9.902315789473685e-05, "loss": 0.4223, "step": 23723 }, { "epoch": 1.3284802329488183, "grad_norm": 1.2253562211990356, "learning_rate": 9.902289473684211e-05, "loss": 0.501, "step": 23724 }, { "epoch": 1.3285362302609474, "grad_norm": 1.557931661605835, "learning_rate": 9.902263157894737e-05, "loss": 0.4189, "step": 23725 }, { "epoch": 1.3285922275730764, "grad_norm": 1.4173012971878052, "learning_rate": 9.902236842105263e-05, "loss": 0.4374, "step": 23726 }, { "epoch": 1.3286482248852054, "grad_norm": 1.6278053522109985, "learning_rate": 9.90221052631579e-05, "loss": 0.4733, "step": 23727 }, { "epoch": 1.3287042221973344, "grad_norm": 2.0490424633026123, "learning_rate": 9.902184210526317e-05, "loss": 0.4737, "step": 23728 }, { "epoch": 1.3287602195094634, "grad_norm": 1.7847868204116821, "learning_rate": 9.902157894736843e-05, "loss": 0.4162, "step": 23729 }, { "epoch": 1.3288162168215925, "grad_norm": 1.4404356479644775, "learning_rate": 9.902131578947369e-05, "loss": 0.3731, "step": 23730 }, { "epoch": 1.3288722141337215, "grad_norm": 1.6273163557052612, "learning_rate": 9.902105263157894e-05, "loss": 0.5964, "step": 23731 }, { "epoch": 1.3289282114458505, "grad_norm": 1.505690097808838, "learning_rate": 9.902078947368422e-05, "loss": 0.5087, "step": 23732 }, { "epoch": 1.3289842087579795, "grad_norm": 1.3277822732925415, "learning_rate": 9.902052631578948e-05, "loss": 0.6378, "step": 23733 }, { "epoch": 1.3290402060701085, "grad_norm": 1.4372085332870483, "learning_rate": 9.902026315789474e-05, "loss": 0.6282, "step": 23734 }, { "epoch": 1.3290962033822376, "grad_norm": 1.8132154941558838, "learning_rate": 9.902e-05, "loss": 0.5666, "step": 23735 }, { "epoch": 1.3291522006943666, "grad_norm": 1.291549563407898, "learning_rate": 9.901973684210527e-05, "loss": 0.3794, "step": 23736 }, { "epoch": 1.3292081980064956, "grad_norm": 1.598890781402588, "learning_rate": 9.901947368421053e-05, "loss": 0.5991, "step": 23737 }, { "epoch": 1.3292641953186246, "grad_norm": 1.4591436386108398, "learning_rate": 9.90192105263158e-05, "loss": 0.4907, "step": 23738 }, { "epoch": 1.3293201926307536, "grad_norm": 1.3021881580352783, "learning_rate": 9.901894736842105e-05, "loss": 0.458, "step": 23739 }, { "epoch": 1.3293761899428826, "grad_norm": 1.9970439672470093, "learning_rate": 9.901868421052632e-05, "loss": 0.5475, "step": 23740 }, { "epoch": 1.3294321872550117, "grad_norm": 1.3215672969818115, "learning_rate": 9.901842105263158e-05, "loss": 0.4985, "step": 23741 }, { "epoch": 1.3294881845671407, "grad_norm": 1.1670761108398438, "learning_rate": 9.901815789473686e-05, "loss": 0.4236, "step": 23742 }, { "epoch": 1.3295441818792697, "grad_norm": 1.6619517803192139, "learning_rate": 9.901789473684212e-05, "loss": 0.5797, "step": 23743 }, { "epoch": 1.3296001791913987, "grad_norm": 1.4194841384887695, "learning_rate": 9.901763157894738e-05, "loss": 0.4307, "step": 23744 }, { "epoch": 1.3296561765035277, "grad_norm": 1.455501914024353, "learning_rate": 9.901736842105264e-05, "loss": 0.4563, "step": 23745 }, { "epoch": 1.3297121738156568, "grad_norm": 1.743912696838379, "learning_rate": 9.90171052631579e-05, "loss": 0.5251, "step": 23746 }, { "epoch": 1.3297681711277858, "grad_norm": 1.3530521392822266, "learning_rate": 9.901684210526317e-05, "loss": 0.412, "step": 23747 }, { "epoch": 1.3298241684399148, "grad_norm": 1.2381137609481812, "learning_rate": 9.901657894736843e-05, "loss": 0.4692, "step": 23748 }, { "epoch": 1.3298801657520438, "grad_norm": 1.2860863208770752, "learning_rate": 9.901631578947369e-05, "loss": 0.396, "step": 23749 }, { "epoch": 1.3299361630641728, "grad_norm": 1.500382423400879, "learning_rate": 9.901605263157895e-05, "loss": 0.6196, "step": 23750 }, { "epoch": 1.3299921603763019, "grad_norm": 1.6573114395141602, "learning_rate": 9.901578947368422e-05, "loss": 0.4758, "step": 23751 }, { "epoch": 1.3300481576884309, "grad_norm": 1.65608811378479, "learning_rate": 9.901552631578948e-05, "loss": 0.5383, "step": 23752 }, { "epoch": 1.33010415500056, "grad_norm": 1.4041887521743774, "learning_rate": 9.901526315789474e-05, "loss": 0.5479, "step": 23753 }, { "epoch": 1.330160152312689, "grad_norm": 1.302161455154419, "learning_rate": 9.9015e-05, "loss": 0.4635, "step": 23754 }, { "epoch": 1.330216149624818, "grad_norm": 1.4354580640792847, "learning_rate": 9.901473684210527e-05, "loss": 0.4113, "step": 23755 }, { "epoch": 1.330272146936947, "grad_norm": 1.409231424331665, "learning_rate": 9.901447368421053e-05, "loss": 0.4626, "step": 23756 }, { "epoch": 1.330328144249076, "grad_norm": 1.9290640354156494, "learning_rate": 9.901421052631579e-05, "loss": 0.6919, "step": 23757 }, { "epoch": 1.330384141561205, "grad_norm": 1.1375243663787842, "learning_rate": 9.901394736842105e-05, "loss": 0.4853, "step": 23758 }, { "epoch": 1.330440138873334, "grad_norm": 1.466916561126709, "learning_rate": 9.901368421052633e-05, "loss": 0.4337, "step": 23759 }, { "epoch": 1.330496136185463, "grad_norm": 1.4821162223815918, "learning_rate": 9.901342105263159e-05, "loss": 0.4767, "step": 23760 }, { "epoch": 1.330552133497592, "grad_norm": 2.9241535663604736, "learning_rate": 9.901315789473686e-05, "loss": 0.4131, "step": 23761 }, { "epoch": 1.330608130809721, "grad_norm": 1.6557778120040894, "learning_rate": 9.90128947368421e-05, "loss": 0.4093, "step": 23762 }, { "epoch": 1.33066412812185, "grad_norm": 1.8381612300872803, "learning_rate": 9.901263157894736e-05, "loss": 1.0502, "step": 23763 }, { "epoch": 1.330720125433979, "grad_norm": 1.4598925113677979, "learning_rate": 9.901236842105264e-05, "loss": 0.5094, "step": 23764 }, { "epoch": 1.3307761227461081, "grad_norm": 1.5245411396026611, "learning_rate": 9.90121052631579e-05, "loss": 0.4248, "step": 23765 }, { "epoch": 1.3308321200582371, "grad_norm": 1.3863434791564941, "learning_rate": 9.901184210526316e-05, "loss": 0.4848, "step": 23766 }, { "epoch": 1.3308881173703662, "grad_norm": 1.2266355752944946, "learning_rate": 9.901157894736842e-05, "loss": 0.5014, "step": 23767 }, { "epoch": 1.3309441146824952, "grad_norm": 1.5366392135620117, "learning_rate": 9.901131578947369e-05, "loss": 0.4406, "step": 23768 }, { "epoch": 1.3310001119946242, "grad_norm": 1.3355239629745483, "learning_rate": 9.901105263157895e-05, "loss": 0.5142, "step": 23769 }, { "epoch": 1.3310561093067532, "grad_norm": 1.3259963989257812, "learning_rate": 9.901078947368422e-05, "loss": 0.4757, "step": 23770 }, { "epoch": 1.3311121066188822, "grad_norm": 1.2686805725097656, "learning_rate": 9.901052631578947e-05, "loss": 0.4744, "step": 23771 }, { "epoch": 1.3311681039310113, "grad_norm": 1.6363102197647095, "learning_rate": 9.901026315789474e-05, "loss": 0.5022, "step": 23772 }, { "epoch": 1.3312241012431403, "grad_norm": 1.3323304653167725, "learning_rate": 9.901e-05, "loss": 0.4078, "step": 23773 }, { "epoch": 1.3312800985552693, "grad_norm": 1.2837271690368652, "learning_rate": 9.900973684210528e-05, "loss": 0.4101, "step": 23774 }, { "epoch": 1.3313360958673983, "grad_norm": 1.3309253454208374, "learning_rate": 9.900947368421054e-05, "loss": 0.5137, "step": 23775 }, { "epoch": 1.3313920931795273, "grad_norm": 1.3191558122634888, "learning_rate": 9.90092105263158e-05, "loss": 0.4689, "step": 23776 }, { "epoch": 1.3314480904916564, "grad_norm": 1.7781141996383667, "learning_rate": 9.900894736842105e-05, "loss": 0.6415, "step": 23777 }, { "epoch": 1.3315040878037854, "grad_norm": 1.3525689840316772, "learning_rate": 9.900868421052633e-05, "loss": 0.4296, "step": 23778 }, { "epoch": 1.3315600851159144, "grad_norm": 1.1907037496566772, "learning_rate": 9.900842105263159e-05, "loss": 0.3696, "step": 23779 }, { "epoch": 1.3316160824280434, "grad_norm": 1.3596187829971313, "learning_rate": 9.900815789473685e-05, "loss": 0.451, "step": 23780 }, { "epoch": 1.3316720797401724, "grad_norm": 1.2794840335845947, "learning_rate": 9.900789473684211e-05, "loss": 0.5895, "step": 23781 }, { "epoch": 1.3317280770523015, "grad_norm": 1.1582374572753906, "learning_rate": 9.900763157894737e-05, "loss": 0.4681, "step": 23782 }, { "epoch": 1.3317840743644305, "grad_norm": 1.742173433303833, "learning_rate": 9.900736842105264e-05, "loss": 0.5168, "step": 23783 }, { "epoch": 1.3318400716765595, "grad_norm": 1.382517695426941, "learning_rate": 9.90071052631579e-05, "loss": 0.4484, "step": 23784 }, { "epoch": 1.3318960689886885, "grad_norm": 1.4867075681686401, "learning_rate": 9.900684210526316e-05, "loss": 0.5246, "step": 23785 }, { "epoch": 1.3319520663008175, "grad_norm": 1.6939916610717773, "learning_rate": 9.900657894736842e-05, "loss": 0.5886, "step": 23786 }, { "epoch": 1.3320080636129465, "grad_norm": 1.1807746887207031, "learning_rate": 9.900631578947369e-05, "loss": 0.3459, "step": 23787 }, { "epoch": 1.3320640609250756, "grad_norm": 1.394364595413208, "learning_rate": 9.900605263157895e-05, "loss": 0.4881, "step": 23788 }, { "epoch": 1.3321200582372046, "grad_norm": 1.2194969654083252, "learning_rate": 9.900578947368421e-05, "loss": 0.4091, "step": 23789 }, { "epoch": 1.3321760555493336, "grad_norm": 1.711050033569336, "learning_rate": 9.900552631578947e-05, "loss": 0.5224, "step": 23790 }, { "epoch": 1.3322320528614626, "grad_norm": 1.814692497253418, "learning_rate": 9.900526315789475e-05, "loss": 0.4065, "step": 23791 }, { "epoch": 1.3322880501735916, "grad_norm": 1.7624183893203735, "learning_rate": 9.9005e-05, "loss": 0.4169, "step": 23792 }, { "epoch": 1.3323440474857207, "grad_norm": 1.291270136833191, "learning_rate": 9.900473684210528e-05, "loss": 0.4397, "step": 23793 }, { "epoch": 1.3324000447978497, "grad_norm": 2.6617846488952637, "learning_rate": 9.900447368421052e-05, "loss": 0.681, "step": 23794 }, { "epoch": 1.3324560421099787, "grad_norm": 1.305954933166504, "learning_rate": 9.90042105263158e-05, "loss": 0.5125, "step": 23795 }, { "epoch": 1.3325120394221077, "grad_norm": 1.3788312673568726, "learning_rate": 9.900394736842106e-05, "loss": 0.4138, "step": 23796 }, { "epoch": 1.3325680367342367, "grad_norm": 1.4607813358306885, "learning_rate": 9.900368421052632e-05, "loss": 0.4552, "step": 23797 }, { "epoch": 1.3326240340463658, "grad_norm": 1.2364460229873657, "learning_rate": 9.900342105263159e-05, "loss": 0.3507, "step": 23798 }, { "epoch": 1.3326800313584948, "grad_norm": 1.564463496208191, "learning_rate": 9.900315789473684e-05, "loss": 0.4571, "step": 23799 }, { "epoch": 1.3327360286706238, "grad_norm": 1.3653931617736816, "learning_rate": 9.900289473684211e-05, "loss": 0.4893, "step": 23800 }, { "epoch": 1.3327920259827528, "grad_norm": 1.6454015970230103, "learning_rate": 9.900263157894737e-05, "loss": 0.7647, "step": 23801 }, { "epoch": 1.3328480232948818, "grad_norm": 2.1272964477539062, "learning_rate": 9.900236842105264e-05, "loss": 0.7298, "step": 23802 }, { "epoch": 1.3329040206070109, "grad_norm": 1.3992019891738892, "learning_rate": 9.90021052631579e-05, "loss": 0.4638, "step": 23803 }, { "epoch": 1.3329600179191399, "grad_norm": 1.2260433435440063, "learning_rate": 9.900184210526316e-05, "loss": 0.457, "step": 23804 }, { "epoch": 1.333016015231269, "grad_norm": 1.2614973783493042, "learning_rate": 9.900157894736842e-05, "loss": 0.5395, "step": 23805 }, { "epoch": 1.333072012543398, "grad_norm": 1.2293323278427124, "learning_rate": 9.90013157894737e-05, "loss": 0.4591, "step": 23806 }, { "epoch": 1.333128009855527, "grad_norm": 1.2137826681137085, "learning_rate": 9.900105263157896e-05, "loss": 0.4168, "step": 23807 }, { "epoch": 1.333184007167656, "grad_norm": 1.2654191255569458, "learning_rate": 9.900078947368421e-05, "loss": 0.4625, "step": 23808 }, { "epoch": 1.333240004479785, "grad_norm": 1.5558730363845825, "learning_rate": 9.900052631578947e-05, "loss": 0.5058, "step": 23809 }, { "epoch": 1.333296001791914, "grad_norm": 1.408207893371582, "learning_rate": 9.900026315789475e-05, "loss": 0.466, "step": 23810 }, { "epoch": 1.333351999104043, "grad_norm": 1.3477212190628052, "learning_rate": 9.900000000000001e-05, "loss": 0.4401, "step": 23811 }, { "epoch": 1.333407996416172, "grad_norm": 1.4396218061447144, "learning_rate": 9.899973684210527e-05, "loss": 0.4345, "step": 23812 }, { "epoch": 1.333463993728301, "grad_norm": 1.7757333517074585, "learning_rate": 9.899947368421053e-05, "loss": 0.4976, "step": 23813 }, { "epoch": 1.33351999104043, "grad_norm": 1.4348074197769165, "learning_rate": 9.899921052631579e-05, "loss": 0.5476, "step": 23814 }, { "epoch": 1.333575988352559, "grad_norm": 1.3225101232528687, "learning_rate": 9.899894736842106e-05, "loss": 0.371, "step": 23815 }, { "epoch": 1.333631985664688, "grad_norm": 1.611476182937622, "learning_rate": 9.899868421052632e-05, "loss": 0.6114, "step": 23816 }, { "epoch": 1.3336879829768171, "grad_norm": 1.424462080001831, "learning_rate": 9.899842105263158e-05, "loss": 0.3367, "step": 23817 }, { "epoch": 1.3337439802889461, "grad_norm": 1.14649498462677, "learning_rate": 9.899815789473684e-05, "loss": 0.3709, "step": 23818 }, { "epoch": 1.3337999776010752, "grad_norm": 1.7224383354187012, "learning_rate": 9.899789473684211e-05, "loss": 0.6526, "step": 23819 }, { "epoch": 1.3338559749132042, "grad_norm": 1.3974765539169312, "learning_rate": 9.899763157894737e-05, "loss": 0.6072, "step": 23820 }, { "epoch": 1.3339119722253332, "grad_norm": 1.3082847595214844, "learning_rate": 9.899736842105263e-05, "loss": 0.4481, "step": 23821 }, { "epoch": 1.3339679695374622, "grad_norm": 1.349003791809082, "learning_rate": 9.899710526315789e-05, "loss": 0.5265, "step": 23822 }, { "epoch": 1.3340239668495912, "grad_norm": 1.996715784072876, "learning_rate": 9.899684210526316e-05, "loss": 0.5606, "step": 23823 }, { "epoch": 1.3340799641617203, "grad_norm": 1.4019711017608643, "learning_rate": 9.899657894736842e-05, "loss": 0.4384, "step": 23824 }, { "epoch": 1.3341359614738493, "grad_norm": 1.392748236656189, "learning_rate": 9.89963157894737e-05, "loss": 0.5603, "step": 23825 }, { "epoch": 1.3341919587859783, "grad_norm": 1.4841468334197998, "learning_rate": 9.899605263157894e-05, "loss": 0.4993, "step": 23826 }, { "epoch": 1.3342479560981073, "grad_norm": 1.6232941150665283, "learning_rate": 9.899578947368422e-05, "loss": 0.5431, "step": 23827 }, { "epoch": 1.3343039534102363, "grad_norm": 1.5400526523590088, "learning_rate": 9.899552631578948e-05, "loss": 0.436, "step": 23828 }, { "epoch": 1.3343599507223654, "grad_norm": 1.1462382078170776, "learning_rate": 9.899526315789475e-05, "loss": 0.4438, "step": 23829 }, { "epoch": 1.3344159480344944, "grad_norm": 1.3616477251052856, "learning_rate": 9.899500000000001e-05, "loss": 0.3233, "step": 23830 }, { "epoch": 1.3344719453466234, "grad_norm": 2.2361109256744385, "learning_rate": 9.899473684210527e-05, "loss": 0.7127, "step": 23831 }, { "epoch": 1.3345279426587524, "grad_norm": 1.9489185810089111, "learning_rate": 9.899447368421053e-05, "loss": 0.4546, "step": 23832 }, { "epoch": 1.3345839399708814, "grad_norm": 7.203779220581055, "learning_rate": 9.899421052631579e-05, "loss": 0.5088, "step": 23833 }, { "epoch": 1.3346399372830104, "grad_norm": 1.516080617904663, "learning_rate": 9.899394736842106e-05, "loss": 0.4153, "step": 23834 }, { "epoch": 1.3346959345951395, "grad_norm": 1.2757153511047363, "learning_rate": 9.899368421052632e-05, "loss": 0.4722, "step": 23835 }, { "epoch": 1.3347519319072685, "grad_norm": 1.1644984483718872, "learning_rate": 9.899342105263158e-05, "loss": 0.4474, "step": 23836 }, { "epoch": 1.3348079292193975, "grad_norm": 1.3059358596801758, "learning_rate": 9.899315789473684e-05, "loss": 0.5506, "step": 23837 }, { "epoch": 1.3348639265315265, "grad_norm": 1.5706517696380615, "learning_rate": 9.899289473684212e-05, "loss": 0.4251, "step": 23838 }, { "epoch": 1.3349199238436555, "grad_norm": 1.316448450088501, "learning_rate": 9.899263157894737e-05, "loss": 0.4417, "step": 23839 }, { "epoch": 1.3349759211557846, "grad_norm": 1.5210957527160645, "learning_rate": 9.899236842105263e-05, "loss": 0.4646, "step": 23840 }, { "epoch": 1.3350319184679136, "grad_norm": 1.292188286781311, "learning_rate": 9.89921052631579e-05, "loss": 0.4646, "step": 23841 }, { "epoch": 1.3350879157800426, "grad_norm": 1.5652449131011963, "learning_rate": 9.899184210526317e-05, "loss": 0.3949, "step": 23842 }, { "epoch": 1.3351439130921716, "grad_norm": 2.441153049468994, "learning_rate": 9.899157894736843e-05, "loss": 0.4277, "step": 23843 }, { "epoch": 1.3351999104043006, "grad_norm": 1.1771906614303589, "learning_rate": 9.899131578947369e-05, "loss": 0.4003, "step": 23844 }, { "epoch": 1.3352559077164297, "grad_norm": 1.5029371976852417, "learning_rate": 9.899105263157895e-05, "loss": 0.5011, "step": 23845 }, { "epoch": 1.3353119050285587, "grad_norm": 1.25845468044281, "learning_rate": 9.899078947368422e-05, "loss": 0.4785, "step": 23846 }, { "epoch": 1.3353679023406877, "grad_norm": 1.39874267578125, "learning_rate": 9.899052631578948e-05, "loss": 0.4808, "step": 23847 }, { "epoch": 1.3354238996528167, "grad_norm": 1.8373452425003052, "learning_rate": 9.899026315789475e-05, "loss": 0.5285, "step": 23848 }, { "epoch": 1.3354798969649457, "grad_norm": 1.3263756036758423, "learning_rate": 9.899e-05, "loss": 0.5416, "step": 23849 }, { "epoch": 1.3355358942770748, "grad_norm": 1.2071919441223145, "learning_rate": 9.898973684210526e-05, "loss": 0.4166, "step": 23850 }, { "epoch": 1.3355918915892038, "grad_norm": 1.3608685731887817, "learning_rate": 9.898947368421053e-05, "loss": 0.4098, "step": 23851 }, { "epoch": 1.3356478889013328, "grad_norm": 1.263219952583313, "learning_rate": 9.898921052631579e-05, "loss": 0.4067, "step": 23852 }, { "epoch": 1.3357038862134618, "grad_norm": 1.5391923189163208, "learning_rate": 9.898894736842107e-05, "loss": 0.51, "step": 23853 }, { "epoch": 1.3357598835255908, "grad_norm": 1.4992666244506836, "learning_rate": 9.898868421052631e-05, "loss": 0.5237, "step": 23854 }, { "epoch": 1.3358158808377198, "grad_norm": 1.4221733808517456, "learning_rate": 9.898842105263158e-05, "loss": 0.473, "step": 23855 }, { "epoch": 1.3358718781498489, "grad_norm": 1.4912809133529663, "learning_rate": 9.898815789473684e-05, "loss": 0.4861, "step": 23856 }, { "epoch": 1.3359278754619779, "grad_norm": 1.3533376455307007, "learning_rate": 9.898789473684212e-05, "loss": 0.5411, "step": 23857 }, { "epoch": 1.335983872774107, "grad_norm": 1.2698400020599365, "learning_rate": 9.898763157894738e-05, "loss": 0.4958, "step": 23858 }, { "epoch": 1.336039870086236, "grad_norm": 1.6540385484695435, "learning_rate": 9.898736842105264e-05, "loss": 0.5944, "step": 23859 }, { "epoch": 1.336095867398365, "grad_norm": 1.4382905960083008, "learning_rate": 9.89871052631579e-05, "loss": 0.4067, "step": 23860 }, { "epoch": 1.336151864710494, "grad_norm": 1.4954227209091187, "learning_rate": 9.898684210526317e-05, "loss": 0.5319, "step": 23861 }, { "epoch": 1.336207862022623, "grad_norm": 1.2915523052215576, "learning_rate": 9.898657894736843e-05, "loss": 0.4074, "step": 23862 }, { "epoch": 1.336263859334752, "grad_norm": 1.2406599521636963, "learning_rate": 9.898631578947369e-05, "loss": 0.4717, "step": 23863 }, { "epoch": 1.336319856646881, "grad_norm": 1.2790749073028564, "learning_rate": 9.898605263157895e-05, "loss": 0.5137, "step": 23864 }, { "epoch": 1.33637585395901, "grad_norm": 1.512863039970398, "learning_rate": 9.898578947368422e-05, "loss": 0.4512, "step": 23865 }, { "epoch": 1.336431851271139, "grad_norm": 1.4381901025772095, "learning_rate": 9.898552631578948e-05, "loss": 0.5711, "step": 23866 }, { "epoch": 1.336487848583268, "grad_norm": 1.1895484924316406, "learning_rate": 9.898526315789474e-05, "loss": 0.4086, "step": 23867 }, { "epoch": 1.336543845895397, "grad_norm": 1.587947964668274, "learning_rate": 9.8985e-05, "loss": 0.46, "step": 23868 }, { "epoch": 1.3365998432075261, "grad_norm": 1.3747241497039795, "learning_rate": 9.898473684210526e-05, "loss": 0.549, "step": 23869 }, { "epoch": 1.3366558405196551, "grad_norm": 1.4006222486495972, "learning_rate": 9.898447368421053e-05, "loss": 0.4107, "step": 23870 }, { "epoch": 1.3367118378317842, "grad_norm": 1.5141626596450806, "learning_rate": 9.89842105263158e-05, "loss": 0.5305, "step": 23871 }, { "epoch": 1.3367678351439132, "grad_norm": 1.4598748683929443, "learning_rate": 9.898394736842105e-05, "loss": 0.4646, "step": 23872 }, { "epoch": 1.3368238324560422, "grad_norm": 1.4114829301834106, "learning_rate": 9.898368421052631e-05, "loss": 0.4784, "step": 23873 }, { "epoch": 1.3368798297681712, "grad_norm": 1.3672314882278442, "learning_rate": 9.898342105263159e-05, "loss": 0.4479, "step": 23874 }, { "epoch": 1.3369358270803002, "grad_norm": 1.234108567237854, "learning_rate": 9.898315789473685e-05, "loss": 0.4793, "step": 23875 }, { "epoch": 1.3369918243924293, "grad_norm": 1.3132939338684082, "learning_rate": 9.89828947368421e-05, "loss": 0.4638, "step": 23876 }, { "epoch": 1.3370478217045583, "grad_norm": 1.4558367729187012, "learning_rate": 9.898263157894737e-05, "loss": 0.566, "step": 23877 }, { "epoch": 1.3371038190166873, "grad_norm": 1.4092020988464355, "learning_rate": 9.898236842105264e-05, "loss": 0.3604, "step": 23878 }, { "epoch": 1.3371598163288163, "grad_norm": 1.476643443107605, "learning_rate": 9.89821052631579e-05, "loss": 0.5447, "step": 23879 }, { "epoch": 1.3372158136409453, "grad_norm": 1.3778104782104492, "learning_rate": 9.898184210526317e-05, "loss": 0.4364, "step": 23880 }, { "epoch": 1.3372718109530743, "grad_norm": 1.4713430404663086, "learning_rate": 9.898157894736842e-05, "loss": 0.516, "step": 23881 }, { "epoch": 1.3373278082652034, "grad_norm": 1.4517395496368408, "learning_rate": 9.898131578947369e-05, "loss": 0.4876, "step": 23882 }, { "epoch": 1.3373838055773324, "grad_norm": 1.3174575567245483, "learning_rate": 9.898105263157895e-05, "loss": 0.4194, "step": 23883 }, { "epoch": 1.3374398028894614, "grad_norm": 1.451480507850647, "learning_rate": 9.898078947368421e-05, "loss": 0.5662, "step": 23884 }, { "epoch": 1.3374958002015904, "grad_norm": 1.321994662284851, "learning_rate": 9.898052631578948e-05, "loss": 0.3988, "step": 23885 }, { "epoch": 1.3375517975137194, "grad_norm": 1.323826789855957, "learning_rate": 9.898026315789473e-05, "loss": 0.4865, "step": 23886 }, { "epoch": 1.3376077948258485, "grad_norm": 1.6025311946868896, "learning_rate": 9.898e-05, "loss": 0.5381, "step": 23887 }, { "epoch": 1.3376637921379775, "grad_norm": 1.4213122129440308, "learning_rate": 9.897973684210526e-05, "loss": 0.5454, "step": 23888 }, { "epoch": 1.3377197894501065, "grad_norm": 1.103776216506958, "learning_rate": 9.897947368421054e-05, "loss": 0.4856, "step": 23889 }, { "epoch": 1.3377757867622355, "grad_norm": 1.3291431665420532, "learning_rate": 9.89792105263158e-05, "loss": 0.4835, "step": 23890 }, { "epoch": 1.3378317840743645, "grad_norm": 1.5485930442810059, "learning_rate": 9.897894736842106e-05, "loss": 0.6487, "step": 23891 }, { "epoch": 1.3378877813864936, "grad_norm": 1.4208502769470215, "learning_rate": 9.897868421052632e-05, "loss": 0.4842, "step": 23892 }, { "epoch": 1.3379437786986226, "grad_norm": 1.3566259145736694, "learning_rate": 9.897842105263159e-05, "loss": 0.3162, "step": 23893 }, { "epoch": 1.3379997760107516, "grad_norm": 1.3701115846633911, "learning_rate": 9.897815789473685e-05, "loss": 0.4999, "step": 23894 }, { "epoch": 1.3380557733228806, "grad_norm": 1.1461424827575684, "learning_rate": 9.897789473684211e-05, "loss": 0.4746, "step": 23895 }, { "epoch": 1.3381117706350096, "grad_norm": 1.3429561853408813, "learning_rate": 9.897763157894737e-05, "loss": 0.4234, "step": 23896 }, { "epoch": 1.3381677679471387, "grad_norm": 1.2542699575424194, "learning_rate": 9.897736842105264e-05, "loss": 0.4548, "step": 23897 }, { "epoch": 1.3382237652592677, "grad_norm": 1.7728514671325684, "learning_rate": 9.89771052631579e-05, "loss": 0.5006, "step": 23898 }, { "epoch": 1.3382797625713967, "grad_norm": 2.911759376525879, "learning_rate": 9.897684210526316e-05, "loss": 0.4549, "step": 23899 }, { "epoch": 1.3383357598835257, "grad_norm": 2.126786708831787, "learning_rate": 9.897657894736842e-05, "loss": 0.5309, "step": 23900 }, { "epoch": 1.3383917571956547, "grad_norm": 1.303514003753662, "learning_rate": 9.897631578947368e-05, "loss": 0.4588, "step": 23901 }, { "epoch": 1.3384477545077837, "grad_norm": 1.3816440105438232, "learning_rate": 9.897605263157895e-05, "loss": 0.4875, "step": 23902 }, { "epoch": 1.3385037518199128, "grad_norm": 1.1241973638534546, "learning_rate": 9.897578947368421e-05, "loss": 0.3535, "step": 23903 }, { "epoch": 1.3385597491320418, "grad_norm": 1.3934160470962524, "learning_rate": 9.897552631578947e-05, "loss": 0.4369, "step": 23904 }, { "epoch": 1.3386157464441708, "grad_norm": 1.5238735675811768, "learning_rate": 9.897526315789473e-05, "loss": 0.4324, "step": 23905 }, { "epoch": 1.3386717437562998, "grad_norm": 1.5582380294799805, "learning_rate": 9.897500000000001e-05, "loss": 0.5663, "step": 23906 }, { "epoch": 1.3387277410684288, "grad_norm": 1.082220435142517, "learning_rate": 9.897473684210527e-05, "loss": 0.4133, "step": 23907 }, { "epoch": 1.3387837383805579, "grad_norm": 1.3235070705413818, "learning_rate": 9.897447368421054e-05, "loss": 0.4129, "step": 23908 }, { "epoch": 1.3388397356926869, "grad_norm": 1.3532748222351074, "learning_rate": 9.897421052631579e-05, "loss": 0.5135, "step": 23909 }, { "epoch": 1.338895733004816, "grad_norm": 1.4414713382720947, "learning_rate": 9.897394736842106e-05, "loss": 0.3811, "step": 23910 }, { "epoch": 1.338951730316945, "grad_norm": 1.3468427658081055, "learning_rate": 9.897368421052632e-05, "loss": 0.4218, "step": 23911 }, { "epoch": 1.339007727629074, "grad_norm": 1.3484735488891602, "learning_rate": 9.897342105263159e-05, "loss": 0.4637, "step": 23912 }, { "epoch": 1.339063724941203, "grad_norm": 2.199549436569214, "learning_rate": 9.897315789473685e-05, "loss": 0.5371, "step": 23913 }, { "epoch": 1.339119722253332, "grad_norm": 1.2956798076629639, "learning_rate": 9.897289473684211e-05, "loss": 0.4634, "step": 23914 }, { "epoch": 1.3391757195654608, "grad_norm": 1.543900728225708, "learning_rate": 9.897263157894737e-05, "loss": 0.5257, "step": 23915 }, { "epoch": 1.3392317168775898, "grad_norm": 1.3237605094909668, "learning_rate": 9.897236842105264e-05, "loss": 0.4308, "step": 23916 }, { "epoch": 1.3392877141897188, "grad_norm": 1.7733278274536133, "learning_rate": 9.89721052631579e-05, "loss": 0.4651, "step": 23917 }, { "epoch": 1.3393437115018478, "grad_norm": 1.2714046239852905, "learning_rate": 9.897184210526315e-05, "loss": 0.4149, "step": 23918 }, { "epoch": 1.3393997088139769, "grad_norm": 1.4521942138671875, "learning_rate": 9.897157894736842e-05, "loss": 0.3297, "step": 23919 }, { "epoch": 1.3394557061261059, "grad_norm": 1.4575254917144775, "learning_rate": 9.897131578947368e-05, "loss": 0.6913, "step": 23920 }, { "epoch": 1.339511703438235, "grad_norm": 1.6212458610534668, "learning_rate": 9.897105263157896e-05, "loss": 0.5082, "step": 23921 }, { "epoch": 1.339567700750364, "grad_norm": 1.4157074689865112, "learning_rate": 9.897078947368422e-05, "loss": 0.4816, "step": 23922 }, { "epoch": 1.339623698062493, "grad_norm": 1.3685389757156372, "learning_rate": 9.897052631578948e-05, "loss": 0.4676, "step": 23923 }, { "epoch": 1.339679695374622, "grad_norm": 1.3349485397338867, "learning_rate": 9.897026315789474e-05, "loss": 0.7631, "step": 23924 }, { "epoch": 1.339735692686751, "grad_norm": 1.2802698612213135, "learning_rate": 9.897000000000001e-05, "loss": 0.5143, "step": 23925 }, { "epoch": 1.33979168999888, "grad_norm": 1.3084275722503662, "learning_rate": 9.896973684210527e-05, "loss": 0.4478, "step": 23926 }, { "epoch": 1.339847687311009, "grad_norm": 1.560441255569458, "learning_rate": 9.896947368421053e-05, "loss": 0.5547, "step": 23927 }, { "epoch": 1.339903684623138, "grad_norm": 1.327688217163086, "learning_rate": 9.896921052631579e-05, "loss": 0.6401, "step": 23928 }, { "epoch": 1.339959681935267, "grad_norm": 1.4617291688919067, "learning_rate": 9.896894736842106e-05, "loss": 0.4964, "step": 23929 }, { "epoch": 1.340015679247396, "grad_norm": 1.5066403150558472, "learning_rate": 9.896868421052632e-05, "loss": 0.4309, "step": 23930 }, { "epoch": 1.340071676559525, "grad_norm": 1.3324167728424072, "learning_rate": 9.896842105263158e-05, "loss": 0.4384, "step": 23931 }, { "epoch": 1.340127673871654, "grad_norm": 1.2845227718353271, "learning_rate": 9.896815789473684e-05, "loss": 0.4418, "step": 23932 }, { "epoch": 1.3401836711837831, "grad_norm": 1.507017731666565, "learning_rate": 9.896789473684211e-05, "loss": 0.5666, "step": 23933 }, { "epoch": 1.3402396684959121, "grad_norm": 1.5677680969238281, "learning_rate": 9.896763157894737e-05, "loss": 0.5223, "step": 23934 }, { "epoch": 1.3402956658080412, "grad_norm": 1.4154253005981445, "learning_rate": 9.896736842105263e-05, "loss": 0.5921, "step": 23935 }, { "epoch": 1.3403516631201702, "grad_norm": 1.358176589012146, "learning_rate": 9.89671052631579e-05, "loss": 0.4748, "step": 23936 }, { "epoch": 1.3404076604322992, "grad_norm": 1.3491337299346924, "learning_rate": 9.896684210526315e-05, "loss": 0.4248, "step": 23937 }, { "epoch": 1.3404636577444282, "grad_norm": 1.1043469905853271, "learning_rate": 9.896657894736843e-05, "loss": 0.4779, "step": 23938 }, { "epoch": 1.3405196550565572, "grad_norm": 1.4574761390686035, "learning_rate": 9.896631578947369e-05, "loss": 0.6477, "step": 23939 }, { "epoch": 1.3405756523686863, "grad_norm": 1.283100962638855, "learning_rate": 9.896605263157896e-05, "loss": 0.4059, "step": 23940 }, { "epoch": 1.3406316496808153, "grad_norm": 1.1675559282302856, "learning_rate": 9.89657894736842e-05, "loss": 0.3313, "step": 23941 }, { "epoch": 1.3406876469929443, "grad_norm": 1.175600290298462, "learning_rate": 9.896552631578948e-05, "loss": 0.3677, "step": 23942 }, { "epoch": 1.3407436443050733, "grad_norm": 1.2264043092727661, "learning_rate": 9.896526315789474e-05, "loss": 0.4248, "step": 23943 }, { "epoch": 1.3407996416172023, "grad_norm": 1.5873838663101196, "learning_rate": 9.896500000000001e-05, "loss": 0.487, "step": 23944 }, { "epoch": 1.3408556389293314, "grad_norm": 1.2819849252700806, "learning_rate": 9.896473684210527e-05, "loss": 0.4605, "step": 23945 }, { "epoch": 1.3409116362414604, "grad_norm": 1.4461268186569214, "learning_rate": 9.896447368421053e-05, "loss": 0.525, "step": 23946 }, { "epoch": 1.3409676335535894, "grad_norm": 1.2202770709991455, "learning_rate": 9.896421052631579e-05, "loss": 0.4893, "step": 23947 }, { "epoch": 1.3410236308657184, "grad_norm": 1.4147801399230957, "learning_rate": 9.896394736842106e-05, "loss": 0.4573, "step": 23948 }, { "epoch": 1.3410796281778474, "grad_norm": 1.9122450351715088, "learning_rate": 9.896368421052632e-05, "loss": 0.4832, "step": 23949 }, { "epoch": 1.3411356254899764, "grad_norm": 1.5094122886657715, "learning_rate": 9.896342105263158e-05, "loss": 0.4196, "step": 23950 }, { "epoch": 1.3411916228021055, "grad_norm": 1.4098691940307617, "learning_rate": 9.896315789473684e-05, "loss": 0.478, "step": 23951 }, { "epoch": 1.3412476201142345, "grad_norm": 1.3345513343811035, "learning_rate": 9.896289473684212e-05, "loss": 0.3645, "step": 23952 }, { "epoch": 1.3413036174263635, "grad_norm": 1.1226009130477905, "learning_rate": 9.896263157894738e-05, "loss": 0.3738, "step": 23953 }, { "epoch": 1.3413596147384925, "grad_norm": 1.3263577222824097, "learning_rate": 9.896236842105264e-05, "loss": 0.4815, "step": 23954 }, { "epoch": 1.3414156120506215, "grad_norm": 1.2852894067764282, "learning_rate": 9.89621052631579e-05, "loss": 0.4569, "step": 23955 }, { "epoch": 1.3414716093627506, "grad_norm": 1.4982022047042847, "learning_rate": 9.896184210526316e-05, "loss": 0.6568, "step": 23956 }, { "epoch": 1.3415276066748796, "grad_norm": 1.3840714693069458, "learning_rate": 9.896157894736843e-05, "loss": 0.4405, "step": 23957 }, { "epoch": 1.3415836039870086, "grad_norm": 1.18630051612854, "learning_rate": 9.896131578947369e-05, "loss": 0.5221, "step": 23958 }, { "epoch": 1.3416396012991376, "grad_norm": 1.3169230222702026, "learning_rate": 9.896105263157895e-05, "loss": 0.5608, "step": 23959 }, { "epoch": 1.3416955986112666, "grad_norm": 1.7112146615982056, "learning_rate": 9.896078947368421e-05, "loss": 0.5546, "step": 23960 }, { "epoch": 1.3417515959233957, "grad_norm": 1.5063049793243408, "learning_rate": 9.896052631578948e-05, "loss": 0.6101, "step": 23961 }, { "epoch": 1.3418075932355247, "grad_norm": 1.383370280265808, "learning_rate": 9.896026315789474e-05, "loss": 0.4475, "step": 23962 }, { "epoch": 1.3418635905476537, "grad_norm": 1.4847168922424316, "learning_rate": 9.896000000000001e-05, "loss": 0.5212, "step": 23963 }, { "epoch": 1.3419195878597827, "grad_norm": 1.172202229499817, "learning_rate": 9.895973684210526e-05, "loss": 0.4173, "step": 23964 }, { "epoch": 1.3419755851719117, "grad_norm": 1.6316022872924805, "learning_rate": 9.895947368421053e-05, "loss": 0.5424, "step": 23965 }, { "epoch": 1.3420315824840408, "grad_norm": 1.3216683864593506, "learning_rate": 9.89592105263158e-05, "loss": 0.4224, "step": 23966 }, { "epoch": 1.3420875797961698, "grad_norm": 1.0985703468322754, "learning_rate": 9.895894736842107e-05, "loss": 0.3667, "step": 23967 }, { "epoch": 1.3421435771082988, "grad_norm": 1.6286671161651611, "learning_rate": 9.895868421052631e-05, "loss": 0.5541, "step": 23968 }, { "epoch": 1.3421995744204278, "grad_norm": 1.5791877508163452, "learning_rate": 9.895842105263159e-05, "loss": 0.5442, "step": 23969 }, { "epoch": 1.3422555717325568, "grad_norm": 1.5616247653961182, "learning_rate": 9.895815789473685e-05, "loss": 0.6061, "step": 23970 }, { "epoch": 1.3423115690446858, "grad_norm": 1.5898517370224, "learning_rate": 9.89578947368421e-05, "loss": 0.4698, "step": 23971 }, { "epoch": 1.3423675663568149, "grad_norm": 1.1105499267578125, "learning_rate": 9.895763157894738e-05, "loss": 0.4383, "step": 23972 }, { "epoch": 1.3424235636689439, "grad_norm": 1.3896452188491821, "learning_rate": 9.895736842105263e-05, "loss": 0.4147, "step": 23973 }, { "epoch": 1.342479560981073, "grad_norm": 1.7090681791305542, "learning_rate": 9.89571052631579e-05, "loss": 0.4694, "step": 23974 }, { "epoch": 1.342535558293202, "grad_norm": 2.046135187149048, "learning_rate": 9.895684210526316e-05, "loss": 0.7754, "step": 23975 }, { "epoch": 1.342591555605331, "grad_norm": 1.567522644996643, "learning_rate": 9.895657894736843e-05, "loss": 0.5032, "step": 23976 }, { "epoch": 1.34264755291746, "grad_norm": 1.248820424079895, "learning_rate": 9.895631578947369e-05, "loss": 0.4866, "step": 23977 }, { "epoch": 1.342703550229589, "grad_norm": 1.32659113407135, "learning_rate": 9.895605263157895e-05, "loss": 0.5262, "step": 23978 }, { "epoch": 1.342759547541718, "grad_norm": 1.36756432056427, "learning_rate": 9.895578947368421e-05, "loss": 0.4342, "step": 23979 }, { "epoch": 1.342815544853847, "grad_norm": 1.3160245418548584, "learning_rate": 9.895552631578948e-05, "loss": 0.4198, "step": 23980 }, { "epoch": 1.342871542165976, "grad_norm": 1.4190582036972046, "learning_rate": 9.895526315789474e-05, "loss": 0.4506, "step": 23981 }, { "epoch": 1.342927539478105, "grad_norm": 1.7953743934631348, "learning_rate": 9.8955e-05, "loss": 0.4899, "step": 23982 }, { "epoch": 1.342983536790234, "grad_norm": 1.3152244091033936, "learning_rate": 9.895473684210526e-05, "loss": 0.4237, "step": 23983 }, { "epoch": 1.343039534102363, "grad_norm": 1.426226019859314, "learning_rate": 9.895447368421054e-05, "loss": 0.5225, "step": 23984 }, { "epoch": 1.3430955314144921, "grad_norm": 1.4641419649124146, "learning_rate": 9.89542105263158e-05, "loss": 0.476, "step": 23985 }, { "epoch": 1.3431515287266211, "grad_norm": 1.3623632192611694, "learning_rate": 9.895394736842106e-05, "loss": 0.5086, "step": 23986 }, { "epoch": 1.3432075260387502, "grad_norm": 1.0939357280731201, "learning_rate": 9.895368421052632e-05, "loss": 0.5236, "step": 23987 }, { "epoch": 1.3432635233508792, "grad_norm": 1.5585516691207886, "learning_rate": 9.895342105263158e-05, "loss": 0.594, "step": 23988 }, { "epoch": 1.3433195206630082, "grad_norm": 19.078744888305664, "learning_rate": 9.895315789473685e-05, "loss": 0.4286, "step": 23989 }, { "epoch": 1.3433755179751372, "grad_norm": 1.1451103687286377, "learning_rate": 9.895289473684211e-05, "loss": 0.4013, "step": 23990 }, { "epoch": 1.3434315152872662, "grad_norm": 1.6688843965530396, "learning_rate": 9.895263157894737e-05, "loss": 0.6271, "step": 23991 }, { "epoch": 1.3434875125993953, "grad_norm": 1.4637559652328491, "learning_rate": 9.895236842105263e-05, "loss": 0.6666, "step": 23992 }, { "epoch": 1.3435435099115243, "grad_norm": 1.3607884645462036, "learning_rate": 9.89521052631579e-05, "loss": 0.5131, "step": 23993 }, { "epoch": 1.3435995072236533, "grad_norm": 1.3291383981704712, "learning_rate": 9.895184210526316e-05, "loss": 0.4581, "step": 23994 }, { "epoch": 1.3436555045357823, "grad_norm": 1.298227071762085, "learning_rate": 9.895157894736843e-05, "loss": 0.5034, "step": 23995 }, { "epoch": 1.3437115018479113, "grad_norm": 1.3066940307617188, "learning_rate": 9.895131578947368e-05, "loss": 0.4796, "step": 23996 }, { "epoch": 1.3437674991600403, "grad_norm": 1.0873332023620605, "learning_rate": 9.895105263157895e-05, "loss": 0.3945, "step": 23997 }, { "epoch": 1.3438234964721694, "grad_norm": 1.6476949453353882, "learning_rate": 9.895078947368421e-05, "loss": 0.5423, "step": 23998 }, { "epoch": 1.3438794937842984, "grad_norm": 1.4215620756149292, "learning_rate": 9.895052631578949e-05, "loss": 0.4441, "step": 23999 }, { "epoch": 1.3439354910964274, "grad_norm": 1.2197273969650269, "learning_rate": 9.895026315789475e-05, "loss": 0.383, "step": 24000 }, { "epoch": 1.3439914884085564, "grad_norm": 1.3874455690383911, "learning_rate": 9.895e-05, "loss": 0.5279, "step": 24001 }, { "epoch": 1.3440474857206854, "grad_norm": 1.2576227188110352, "learning_rate": 9.894973684210527e-05, "loss": 0.4465, "step": 24002 }, { "epoch": 1.3441034830328145, "grad_norm": 1.453696846961975, "learning_rate": 9.894947368421054e-05, "loss": 0.4785, "step": 24003 }, { "epoch": 1.3441594803449435, "grad_norm": 1.446753978729248, "learning_rate": 9.89492105263158e-05, "loss": 0.5079, "step": 24004 }, { "epoch": 1.3442154776570725, "grad_norm": 1.7142091989517212, "learning_rate": 9.894894736842106e-05, "loss": 0.6639, "step": 24005 }, { "epoch": 1.3442714749692015, "grad_norm": 1.5219104290008545, "learning_rate": 9.894868421052632e-05, "loss": 0.4946, "step": 24006 }, { "epoch": 1.3443274722813305, "grad_norm": 1.135257601737976, "learning_rate": 9.894842105263158e-05, "loss": 0.3277, "step": 24007 }, { "epoch": 1.3443834695934596, "grad_norm": 1.2626960277557373, "learning_rate": 9.894815789473685e-05, "loss": 0.4215, "step": 24008 }, { "epoch": 1.3444394669055886, "grad_norm": 1.1269794702529907, "learning_rate": 9.894789473684211e-05, "loss": 0.4061, "step": 24009 }, { "epoch": 1.3444954642177176, "grad_norm": 1.2398040294647217, "learning_rate": 9.894763157894737e-05, "loss": 0.5051, "step": 24010 }, { "epoch": 1.3445514615298466, "grad_norm": 1.2162896394729614, "learning_rate": 9.894736842105263e-05, "loss": 0.3527, "step": 24011 }, { "epoch": 1.3446074588419756, "grad_norm": 1.1273400783538818, "learning_rate": 9.89471052631579e-05, "loss": 0.3632, "step": 24012 }, { "epoch": 1.3446634561541047, "grad_norm": 1.2960906028747559, "learning_rate": 9.894684210526316e-05, "loss": 0.4308, "step": 24013 }, { "epoch": 1.3447194534662337, "grad_norm": 1.1805799007415771, "learning_rate": 9.894657894736842e-05, "loss": 0.3707, "step": 24014 }, { "epoch": 1.3447754507783627, "grad_norm": 1.424858570098877, "learning_rate": 9.894631578947368e-05, "loss": 0.4553, "step": 24015 }, { "epoch": 1.3448314480904917, "grad_norm": 1.1969478130340576, "learning_rate": 9.894605263157896e-05, "loss": 0.5471, "step": 24016 }, { "epoch": 1.3448874454026207, "grad_norm": 1.5464682579040527, "learning_rate": 9.894578947368422e-05, "loss": 0.4241, "step": 24017 }, { "epoch": 1.3449434427147497, "grad_norm": 1.2001961469650269, "learning_rate": 9.894552631578949e-05, "loss": 0.4713, "step": 24018 }, { "epoch": 1.3449994400268788, "grad_norm": 1.5608412027359009, "learning_rate": 9.894526315789474e-05, "loss": 0.5646, "step": 24019 }, { "epoch": 1.3450554373390078, "grad_norm": 1.757699728012085, "learning_rate": 9.894500000000001e-05, "loss": 0.5386, "step": 24020 }, { "epoch": 1.3451114346511368, "grad_norm": 1.6653386354446411, "learning_rate": 9.894473684210527e-05, "loss": 0.593, "step": 24021 }, { "epoch": 1.3451674319632658, "grad_norm": 1.305739402770996, "learning_rate": 9.894447368421053e-05, "loss": 0.4264, "step": 24022 }, { "epoch": 1.3452234292753948, "grad_norm": 1.2630366086959839, "learning_rate": 9.894421052631579e-05, "loss": 0.5489, "step": 24023 }, { "epoch": 1.3452794265875239, "grad_norm": 1.4003286361694336, "learning_rate": 9.894394736842105e-05, "loss": 0.5305, "step": 24024 }, { "epoch": 1.3453354238996529, "grad_norm": 1.5072417259216309, "learning_rate": 9.894368421052632e-05, "loss": 0.5619, "step": 24025 }, { "epoch": 1.345391421211782, "grad_norm": 1.4055123329162598, "learning_rate": 9.894342105263158e-05, "loss": 0.4955, "step": 24026 }, { "epoch": 1.345447418523911, "grad_norm": 1.1914441585540771, "learning_rate": 9.894315789473685e-05, "loss": 0.3817, "step": 24027 }, { "epoch": 1.34550341583604, "grad_norm": 1.378165364265442, "learning_rate": 9.89428947368421e-05, "loss": 0.4743, "step": 24028 }, { "epoch": 1.345559413148169, "grad_norm": 1.399539828300476, "learning_rate": 9.894263157894737e-05, "loss": 0.4816, "step": 24029 }, { "epoch": 1.345615410460298, "grad_norm": 1.3169493675231934, "learning_rate": 9.894236842105263e-05, "loss": 0.4081, "step": 24030 }, { "epoch": 1.345671407772427, "grad_norm": 1.3407050371170044, "learning_rate": 9.89421052631579e-05, "loss": 0.3549, "step": 24031 }, { "epoch": 1.345727405084556, "grad_norm": 1.0616273880004883, "learning_rate": 9.894184210526317e-05, "loss": 0.3927, "step": 24032 }, { "epoch": 1.345783402396685, "grad_norm": 1.173591136932373, "learning_rate": 9.894157894736843e-05, "loss": 0.3702, "step": 24033 }, { "epoch": 1.345839399708814, "grad_norm": 1.3523924350738525, "learning_rate": 9.894131578947369e-05, "loss": 0.4446, "step": 24034 }, { "epoch": 1.345895397020943, "grad_norm": 1.2364277839660645, "learning_rate": 9.894105263157896e-05, "loss": 0.4579, "step": 24035 }, { "epoch": 1.345951394333072, "grad_norm": 1.3914684057235718, "learning_rate": 9.894078947368422e-05, "loss": 0.4652, "step": 24036 }, { "epoch": 1.3460073916452011, "grad_norm": 1.5705797672271729, "learning_rate": 9.894052631578948e-05, "loss": 0.4735, "step": 24037 }, { "epoch": 1.3460633889573301, "grad_norm": 1.4909099340438843, "learning_rate": 9.894026315789474e-05, "loss": 0.4696, "step": 24038 }, { "epoch": 1.346119386269459, "grad_norm": 1.4098501205444336, "learning_rate": 9.894e-05, "loss": 0.5234, "step": 24039 }, { "epoch": 1.346175383581588, "grad_norm": 1.753283143043518, "learning_rate": 9.893973684210527e-05, "loss": 0.3517, "step": 24040 }, { "epoch": 1.346231380893717, "grad_norm": 1.263336420059204, "learning_rate": 9.893947368421053e-05, "loss": 0.4078, "step": 24041 }, { "epoch": 1.346287378205846, "grad_norm": 1.461779236793518, "learning_rate": 9.893921052631579e-05, "loss": 0.4651, "step": 24042 }, { "epoch": 1.346343375517975, "grad_norm": 1.3641862869262695, "learning_rate": 9.893894736842105e-05, "loss": 0.5486, "step": 24043 }, { "epoch": 1.346399372830104, "grad_norm": 1.3653966188430786, "learning_rate": 9.893868421052632e-05, "loss": 0.4846, "step": 24044 }, { "epoch": 1.346455370142233, "grad_norm": 1.9145190715789795, "learning_rate": 9.893842105263158e-05, "loss": 0.3525, "step": 24045 }, { "epoch": 1.346511367454362, "grad_norm": 1.817022442817688, "learning_rate": 9.893815789473684e-05, "loss": 0.4692, "step": 24046 }, { "epoch": 1.346567364766491, "grad_norm": 1.4517470598220825, "learning_rate": 9.89378947368421e-05, "loss": 0.4655, "step": 24047 }, { "epoch": 1.34662336207862, "grad_norm": 1.4838329553604126, "learning_rate": 9.893763157894738e-05, "loss": 0.5176, "step": 24048 }, { "epoch": 1.3466793593907491, "grad_norm": 1.3544859886169434, "learning_rate": 9.893736842105264e-05, "loss": 0.435, "step": 24049 }, { "epoch": 1.3467353567028781, "grad_norm": 1.4888906478881836, "learning_rate": 9.893710526315791e-05, "loss": 0.4983, "step": 24050 }, { "epoch": 1.3467913540150072, "grad_norm": 1.4776861667633057, "learning_rate": 9.893684210526316e-05, "loss": 0.3908, "step": 24051 }, { "epoch": 1.3468473513271362, "grad_norm": 1.268038034439087, "learning_rate": 9.893657894736843e-05, "loss": 0.5198, "step": 24052 }, { "epoch": 1.3469033486392652, "grad_norm": 1.2676458358764648, "learning_rate": 9.893631578947369e-05, "loss": 0.4383, "step": 24053 }, { "epoch": 1.3469593459513942, "grad_norm": 1.3287932872772217, "learning_rate": 9.893605263157896e-05, "loss": 0.6178, "step": 24054 }, { "epoch": 1.3470153432635232, "grad_norm": 1.209586501121521, "learning_rate": 9.893578947368422e-05, "loss": 0.3452, "step": 24055 }, { "epoch": 1.3470713405756523, "grad_norm": 1.239275336265564, "learning_rate": 9.893552631578947e-05, "loss": 0.4227, "step": 24056 }, { "epoch": 1.3471273378877813, "grad_norm": 1.1617977619171143, "learning_rate": 9.893526315789474e-05, "loss": 0.5356, "step": 24057 }, { "epoch": 1.3471833351999103, "grad_norm": 1.5607088804244995, "learning_rate": 9.8935e-05, "loss": 0.4493, "step": 24058 }, { "epoch": 1.3472393325120393, "grad_norm": 1.4072494506835938, "learning_rate": 9.893473684210527e-05, "loss": 0.4303, "step": 24059 }, { "epoch": 1.3472953298241683, "grad_norm": 1.407301902770996, "learning_rate": 9.893447368421053e-05, "loss": 0.4385, "step": 24060 }, { "epoch": 1.3473513271362973, "grad_norm": 2.6843373775482178, "learning_rate": 9.89342105263158e-05, "loss": 0.5358, "step": 24061 }, { "epoch": 1.3474073244484264, "grad_norm": 1.351423978805542, "learning_rate": 9.893394736842105e-05, "loss": 0.4161, "step": 24062 }, { "epoch": 1.3474633217605554, "grad_norm": 1.351823329925537, "learning_rate": 9.893368421052633e-05, "loss": 0.4508, "step": 24063 }, { "epoch": 1.3475193190726844, "grad_norm": 1.767401933670044, "learning_rate": 9.893342105263159e-05, "loss": 0.7152, "step": 24064 }, { "epoch": 1.3475753163848134, "grad_norm": 6.961129188537598, "learning_rate": 9.893315789473685e-05, "loss": 0.5912, "step": 24065 }, { "epoch": 1.3476313136969424, "grad_norm": 1.4069809913635254, "learning_rate": 9.89328947368421e-05, "loss": 0.3986, "step": 24066 }, { "epoch": 1.3476873110090715, "grad_norm": 1.523929476737976, "learning_rate": 9.893263157894738e-05, "loss": 0.5491, "step": 24067 }, { "epoch": 1.3477433083212005, "grad_norm": 1.4899320602416992, "learning_rate": 9.893236842105264e-05, "loss": 0.474, "step": 24068 }, { "epoch": 1.3477993056333295, "grad_norm": 1.280630350112915, "learning_rate": 9.89321052631579e-05, "loss": 0.4654, "step": 24069 }, { "epoch": 1.3478553029454585, "grad_norm": 1.3929500579833984, "learning_rate": 9.893184210526316e-05, "loss": 0.3879, "step": 24070 }, { "epoch": 1.3479113002575875, "grad_norm": 1.3972034454345703, "learning_rate": 9.893157894736843e-05, "loss": 0.4501, "step": 24071 }, { "epoch": 1.3479672975697166, "grad_norm": 1.2639726400375366, "learning_rate": 9.893131578947369e-05, "loss": 0.4856, "step": 24072 }, { "epoch": 1.3480232948818456, "grad_norm": 1.5184447765350342, "learning_rate": 9.893105263157896e-05, "loss": 0.4357, "step": 24073 }, { "epoch": 1.3480792921939746, "grad_norm": 1.315146565437317, "learning_rate": 9.893078947368421e-05, "loss": 0.4788, "step": 24074 }, { "epoch": 1.3481352895061036, "grad_norm": 1.240248680114746, "learning_rate": 9.893052631578947e-05, "loss": 0.4533, "step": 24075 }, { "epoch": 1.3481912868182326, "grad_norm": 1.172282338142395, "learning_rate": 9.893026315789474e-05, "loss": 0.4746, "step": 24076 }, { "epoch": 1.3482472841303617, "grad_norm": 1.311255931854248, "learning_rate": 9.893e-05, "loss": 0.3963, "step": 24077 }, { "epoch": 1.3483032814424907, "grad_norm": 1.3184565305709839, "learning_rate": 9.892973684210526e-05, "loss": 0.539, "step": 24078 }, { "epoch": 1.3483592787546197, "grad_norm": 1.647714376449585, "learning_rate": 9.892947368421052e-05, "loss": 0.4814, "step": 24079 }, { "epoch": 1.3484152760667487, "grad_norm": 1.3690624237060547, "learning_rate": 9.89292105263158e-05, "loss": 0.5514, "step": 24080 }, { "epoch": 1.3484712733788777, "grad_norm": 1.2252135276794434, "learning_rate": 9.892894736842106e-05, "loss": 0.3129, "step": 24081 }, { "epoch": 1.3485272706910068, "grad_norm": 1.3502732515335083, "learning_rate": 9.892868421052633e-05, "loss": 0.5709, "step": 24082 }, { "epoch": 1.3485832680031358, "grad_norm": 1.294970989227295, "learning_rate": 9.892842105263158e-05, "loss": 0.4358, "step": 24083 }, { "epoch": 1.3486392653152648, "grad_norm": 1.469613790512085, "learning_rate": 9.892815789473685e-05, "loss": 0.4896, "step": 24084 }, { "epoch": 1.3486952626273938, "grad_norm": 1.8542873859405518, "learning_rate": 9.892789473684211e-05, "loss": 0.5623, "step": 24085 }, { "epoch": 1.3487512599395228, "grad_norm": 1.658245325088501, "learning_rate": 9.892763157894738e-05, "loss": 0.6528, "step": 24086 }, { "epoch": 1.3488072572516518, "grad_norm": 1.2902683019638062, "learning_rate": 9.892736842105264e-05, "loss": 0.5618, "step": 24087 }, { "epoch": 1.3488632545637809, "grad_norm": 1.1421093940734863, "learning_rate": 9.89271052631579e-05, "loss": 0.4831, "step": 24088 }, { "epoch": 1.3489192518759099, "grad_norm": 1.7525116205215454, "learning_rate": 9.892684210526316e-05, "loss": 0.5039, "step": 24089 }, { "epoch": 1.348975249188039, "grad_norm": 1.113273024559021, "learning_rate": 9.892657894736843e-05, "loss": 0.3923, "step": 24090 }, { "epoch": 1.349031246500168, "grad_norm": 1.418257713317871, "learning_rate": 9.89263157894737e-05, "loss": 0.5238, "step": 24091 }, { "epoch": 1.349087243812297, "grad_norm": 1.840466856956482, "learning_rate": 9.892605263157895e-05, "loss": 0.542, "step": 24092 }, { "epoch": 1.349143241124426, "grad_norm": 1.3966180086135864, "learning_rate": 9.892578947368421e-05, "loss": 0.4315, "step": 24093 }, { "epoch": 1.349199238436555, "grad_norm": 1.2987778186798096, "learning_rate": 9.892552631578947e-05, "loss": 0.5709, "step": 24094 }, { "epoch": 1.349255235748684, "grad_norm": 1.2208820581436157, "learning_rate": 9.892526315789475e-05, "loss": 0.45, "step": 24095 }, { "epoch": 1.349311233060813, "grad_norm": 2.0124058723449707, "learning_rate": 9.8925e-05, "loss": 0.544, "step": 24096 }, { "epoch": 1.349367230372942, "grad_norm": 1.3852250576019287, "learning_rate": 9.892473684210527e-05, "loss": 0.4652, "step": 24097 }, { "epoch": 1.349423227685071, "grad_norm": 1.2076325416564941, "learning_rate": 9.892447368421053e-05, "loss": 0.3717, "step": 24098 }, { "epoch": 1.3494792249972, "grad_norm": 1.1834232807159424, "learning_rate": 9.89242105263158e-05, "loss": 0.4067, "step": 24099 }, { "epoch": 1.349535222309329, "grad_norm": 1.4517765045166016, "learning_rate": 9.892394736842106e-05, "loss": 0.5416, "step": 24100 }, { "epoch": 1.3495912196214581, "grad_norm": 1.5725260972976685, "learning_rate": 9.892368421052632e-05, "loss": 0.607, "step": 24101 }, { "epoch": 1.3496472169335871, "grad_norm": 1.9018875360488892, "learning_rate": 9.892342105263158e-05, "loss": 0.6389, "step": 24102 }, { "epoch": 1.3497032142457162, "grad_norm": 1.2932662963867188, "learning_rate": 9.892315789473685e-05, "loss": 0.432, "step": 24103 }, { "epoch": 1.3497592115578452, "grad_norm": 1.3422033786773682, "learning_rate": 9.892289473684211e-05, "loss": 0.4565, "step": 24104 }, { "epoch": 1.3498152088699742, "grad_norm": 1.4826048612594604, "learning_rate": 9.892263157894738e-05, "loss": 0.5029, "step": 24105 }, { "epoch": 1.3498712061821032, "grad_norm": 1.4096723794937134, "learning_rate": 9.892236842105263e-05, "loss": 0.5243, "step": 24106 }, { "epoch": 1.3499272034942322, "grad_norm": 1.2899080514907837, "learning_rate": 9.89221052631579e-05, "loss": 0.4729, "step": 24107 }, { "epoch": 1.3499832008063612, "grad_norm": 3.5839273929595947, "learning_rate": 9.892184210526316e-05, "loss": 0.4041, "step": 24108 }, { "epoch": 1.3500391981184903, "grad_norm": 1.2935422658920288, "learning_rate": 9.892157894736842e-05, "loss": 0.5591, "step": 24109 }, { "epoch": 1.3500951954306193, "grad_norm": 1.571065068244934, "learning_rate": 9.89213157894737e-05, "loss": 0.4823, "step": 24110 }, { "epoch": 1.3501511927427483, "grad_norm": 1.3770251274108887, "learning_rate": 9.892105263157894e-05, "loss": 0.5661, "step": 24111 }, { "epoch": 1.3502071900548773, "grad_norm": 1.1110776662826538, "learning_rate": 9.892078947368422e-05, "loss": 0.2878, "step": 24112 }, { "epoch": 1.3502631873670063, "grad_norm": 1.5924330949783325, "learning_rate": 9.892052631578948e-05, "loss": 0.6316, "step": 24113 }, { "epoch": 1.3503191846791354, "grad_norm": 1.457471489906311, "learning_rate": 9.892026315789475e-05, "loss": 0.4762, "step": 24114 }, { "epoch": 1.3503751819912644, "grad_norm": 1.3572285175323486, "learning_rate": 9.892e-05, "loss": 0.6146, "step": 24115 }, { "epoch": 1.3504311793033934, "grad_norm": 1.2651491165161133, "learning_rate": 9.891973684210527e-05, "loss": 0.5161, "step": 24116 }, { "epoch": 1.3504871766155224, "grad_norm": 1.3892878293991089, "learning_rate": 9.891947368421053e-05, "loss": 0.4709, "step": 24117 }, { "epoch": 1.3505431739276514, "grad_norm": 1.5117753744125366, "learning_rate": 9.89192105263158e-05, "loss": 0.5472, "step": 24118 }, { "epoch": 1.3505991712397805, "grad_norm": 1.2897844314575195, "learning_rate": 9.891894736842106e-05, "loss": 0.4222, "step": 24119 }, { "epoch": 1.3506551685519095, "grad_norm": 1.2057937383651733, "learning_rate": 9.891868421052632e-05, "loss": 0.4167, "step": 24120 }, { "epoch": 1.3507111658640385, "grad_norm": 1.2438232898712158, "learning_rate": 9.891842105263158e-05, "loss": 0.3894, "step": 24121 }, { "epoch": 1.3507671631761675, "grad_norm": 1.4046286344528198, "learning_rate": 9.891815789473685e-05, "loss": 0.5186, "step": 24122 }, { "epoch": 1.3508231604882965, "grad_norm": 1.3658525943756104, "learning_rate": 9.891789473684211e-05, "loss": 0.4163, "step": 24123 }, { "epoch": 1.3508791578004256, "grad_norm": 1.5054686069488525, "learning_rate": 9.891763157894737e-05, "loss": 0.5208, "step": 24124 }, { "epoch": 1.3509351551125546, "grad_norm": 1.4575799703598022, "learning_rate": 9.891736842105263e-05, "loss": 0.439, "step": 24125 }, { "epoch": 1.3509911524246836, "grad_norm": 1.4247299432754517, "learning_rate": 9.891710526315789e-05, "loss": 0.4459, "step": 24126 }, { "epoch": 1.3510471497368126, "grad_norm": 1.237309455871582, "learning_rate": 9.891684210526317e-05, "loss": 0.5082, "step": 24127 }, { "epoch": 1.3511031470489416, "grad_norm": 1.267808198928833, "learning_rate": 9.891657894736843e-05, "loss": 0.3936, "step": 24128 }, { "epoch": 1.3511591443610707, "grad_norm": 1.9146208763122559, "learning_rate": 9.891631578947369e-05, "loss": 0.5779, "step": 24129 }, { "epoch": 1.3512151416731997, "grad_norm": 1.3047025203704834, "learning_rate": 9.891605263157894e-05, "loss": 0.5201, "step": 24130 }, { "epoch": 1.3512711389853287, "grad_norm": 1.3481351137161255, "learning_rate": 9.891578947368422e-05, "loss": 0.5738, "step": 24131 }, { "epoch": 1.3513271362974577, "grad_norm": 1.3294408321380615, "learning_rate": 9.891552631578948e-05, "loss": 0.4455, "step": 24132 }, { "epoch": 1.3513831336095867, "grad_norm": 1.4808603525161743, "learning_rate": 9.891526315789474e-05, "loss": 0.454, "step": 24133 }, { "epoch": 1.3514391309217157, "grad_norm": 1.704590082168579, "learning_rate": 9.8915e-05, "loss": 0.4827, "step": 24134 }, { "epoch": 1.3514951282338448, "grad_norm": 1.5826306343078613, "learning_rate": 9.891473684210527e-05, "loss": 0.3863, "step": 24135 }, { "epoch": 1.3515511255459738, "grad_norm": 1.5127642154693604, "learning_rate": 9.891447368421053e-05, "loss": 0.4407, "step": 24136 }, { "epoch": 1.3516071228581028, "grad_norm": 1.5637285709381104, "learning_rate": 9.89142105263158e-05, "loss": 0.5563, "step": 24137 }, { "epoch": 1.3516631201702318, "grad_norm": 1.279770851135254, "learning_rate": 9.891394736842105e-05, "loss": 0.4167, "step": 24138 }, { "epoch": 1.3517191174823608, "grad_norm": 1.5844379663467407, "learning_rate": 9.891368421052632e-05, "loss": 0.4667, "step": 24139 }, { "epoch": 1.3517751147944899, "grad_norm": 1.5427618026733398, "learning_rate": 9.891342105263158e-05, "loss": 0.546, "step": 24140 }, { "epoch": 1.3518311121066189, "grad_norm": 1.4364628791809082, "learning_rate": 9.891315789473686e-05, "loss": 0.5371, "step": 24141 }, { "epoch": 1.351887109418748, "grad_norm": 1.5178382396697998, "learning_rate": 9.891289473684212e-05, "loss": 0.5804, "step": 24142 }, { "epoch": 1.351943106730877, "grad_norm": 1.4384104013442993, "learning_rate": 9.891263157894736e-05, "loss": 0.3964, "step": 24143 }, { "epoch": 1.351999104043006, "grad_norm": 1.4983294010162354, "learning_rate": 9.891236842105264e-05, "loss": 0.4637, "step": 24144 }, { "epoch": 1.352055101355135, "grad_norm": 1.268157720565796, "learning_rate": 9.89121052631579e-05, "loss": 0.4143, "step": 24145 }, { "epoch": 1.352111098667264, "grad_norm": 2.0224101543426514, "learning_rate": 9.891184210526317e-05, "loss": 0.5155, "step": 24146 }, { "epoch": 1.352167095979393, "grad_norm": 1.149941086769104, "learning_rate": 9.891157894736843e-05, "loss": 0.4014, "step": 24147 }, { "epoch": 1.352223093291522, "grad_norm": 1.277801752090454, "learning_rate": 9.891131578947369e-05, "loss": 0.3095, "step": 24148 }, { "epoch": 1.352279090603651, "grad_norm": 1.5732860565185547, "learning_rate": 9.891105263157895e-05, "loss": 0.5345, "step": 24149 }, { "epoch": 1.35233508791578, "grad_norm": 1.5471495389938354, "learning_rate": 9.891078947368422e-05, "loss": 0.4654, "step": 24150 }, { "epoch": 1.352391085227909, "grad_norm": 1.4263899326324463, "learning_rate": 9.891052631578948e-05, "loss": 0.5575, "step": 24151 }, { "epoch": 1.352447082540038, "grad_norm": 1.419783115386963, "learning_rate": 9.891026315789474e-05, "loss": 0.5694, "step": 24152 }, { "epoch": 1.3525030798521671, "grad_norm": 1.1714369058609009, "learning_rate": 9.891e-05, "loss": 0.3831, "step": 24153 }, { "epoch": 1.3525590771642961, "grad_norm": 1.4590812921524048, "learning_rate": 9.890973684210527e-05, "loss": 0.4227, "step": 24154 }, { "epoch": 1.3526150744764251, "grad_norm": 1.3871046304702759, "learning_rate": 9.890947368421053e-05, "loss": 0.4155, "step": 24155 }, { "epoch": 1.3526710717885542, "grad_norm": 1.2001183032989502, "learning_rate": 9.890921052631579e-05, "loss": 0.506, "step": 24156 }, { "epoch": 1.3527270691006832, "grad_norm": 1.2502472400665283, "learning_rate": 9.890894736842105e-05, "loss": 0.4634, "step": 24157 }, { "epoch": 1.3527830664128122, "grad_norm": 1.3538122177124023, "learning_rate": 9.890868421052633e-05, "loss": 0.5772, "step": 24158 }, { "epoch": 1.3528390637249412, "grad_norm": 1.198918104171753, "learning_rate": 9.890842105263159e-05, "loss": 0.4198, "step": 24159 }, { "epoch": 1.3528950610370702, "grad_norm": 1.3008321523666382, "learning_rate": 9.890815789473685e-05, "loss": 0.4081, "step": 24160 }, { "epoch": 1.3529510583491993, "grad_norm": 1.3779500722885132, "learning_rate": 9.89078947368421e-05, "loss": 0.3471, "step": 24161 }, { "epoch": 1.3530070556613283, "grad_norm": 1.5330551862716675, "learning_rate": 9.890763157894736e-05, "loss": 0.5198, "step": 24162 }, { "epoch": 1.3530630529734573, "grad_norm": 1.5421743392944336, "learning_rate": 9.890736842105264e-05, "loss": 0.5889, "step": 24163 }, { "epoch": 1.3531190502855863, "grad_norm": 1.4606456756591797, "learning_rate": 9.89071052631579e-05, "loss": 0.5037, "step": 24164 }, { "epoch": 1.3531750475977153, "grad_norm": 1.3997637033462524, "learning_rate": 9.890684210526317e-05, "loss": 0.4339, "step": 24165 }, { "epoch": 1.3532310449098444, "grad_norm": 2.146996259689331, "learning_rate": 9.890657894736842e-05, "loss": 0.4112, "step": 24166 }, { "epoch": 1.3532870422219734, "grad_norm": 1.3905378580093384, "learning_rate": 9.890631578947369e-05, "loss": 0.4845, "step": 24167 }, { "epoch": 1.3533430395341024, "grad_norm": 1.1500251293182373, "learning_rate": 9.890605263157895e-05, "loss": 0.3876, "step": 24168 }, { "epoch": 1.3533990368462314, "grad_norm": 1.3702603578567505, "learning_rate": 9.890578947368422e-05, "loss": 0.5397, "step": 24169 }, { "epoch": 1.3534550341583604, "grad_norm": 1.3898779153823853, "learning_rate": 9.890552631578947e-05, "loss": 0.61, "step": 24170 }, { "epoch": 1.3535110314704895, "grad_norm": 1.6539660692214966, "learning_rate": 9.890526315789474e-05, "loss": 0.5015, "step": 24171 }, { "epoch": 1.3535670287826185, "grad_norm": 1.4206243753433228, "learning_rate": 9.8905e-05, "loss": 0.5337, "step": 24172 }, { "epoch": 1.3536230260947475, "grad_norm": 1.1216557025909424, "learning_rate": 9.890473684210528e-05, "loss": 0.4585, "step": 24173 }, { "epoch": 1.3536790234068765, "grad_norm": 1.3988527059555054, "learning_rate": 9.890447368421054e-05, "loss": 0.4261, "step": 24174 }, { "epoch": 1.3537350207190055, "grad_norm": 1.198021411895752, "learning_rate": 9.89042105263158e-05, "loss": 0.4139, "step": 24175 }, { "epoch": 1.3537910180311346, "grad_norm": 1.432989478111267, "learning_rate": 9.890394736842106e-05, "loss": 0.5061, "step": 24176 }, { "epoch": 1.3538470153432636, "grad_norm": 1.4509515762329102, "learning_rate": 9.890368421052631e-05, "loss": 0.4368, "step": 24177 }, { "epoch": 1.3539030126553926, "grad_norm": 1.2205407619476318, "learning_rate": 9.890342105263159e-05, "loss": 0.4162, "step": 24178 }, { "epoch": 1.3539590099675216, "grad_norm": 1.3447471857070923, "learning_rate": 9.890315789473685e-05, "loss": 0.4203, "step": 24179 }, { "epoch": 1.3540150072796506, "grad_norm": 1.5836881399154663, "learning_rate": 9.890289473684211e-05, "loss": 0.4985, "step": 24180 }, { "epoch": 1.3540710045917796, "grad_norm": 1.6705793142318726, "learning_rate": 9.890263157894737e-05, "loss": 0.4286, "step": 24181 }, { "epoch": 1.3541270019039087, "grad_norm": 1.2332497835159302, "learning_rate": 9.890236842105264e-05, "loss": 0.3292, "step": 24182 }, { "epoch": 1.3541829992160377, "grad_norm": 3.5343668460845947, "learning_rate": 9.89021052631579e-05, "loss": 0.5716, "step": 24183 }, { "epoch": 1.3542389965281667, "grad_norm": 1.3043307065963745, "learning_rate": 9.890184210526316e-05, "loss": 0.4799, "step": 24184 }, { "epoch": 1.3542949938402957, "grad_norm": 1.2603365182876587, "learning_rate": 9.890157894736842e-05, "loss": 0.4495, "step": 24185 }, { "epoch": 1.3543509911524247, "grad_norm": 1.324040412902832, "learning_rate": 9.890131578947369e-05, "loss": 0.3862, "step": 24186 }, { "epoch": 1.3544069884645538, "grad_norm": 1.3032180070877075, "learning_rate": 9.890105263157895e-05, "loss": 0.5301, "step": 24187 }, { "epoch": 1.3544629857766828, "grad_norm": 1.455611228942871, "learning_rate": 9.890078947368421e-05, "loss": 0.4353, "step": 24188 }, { "epoch": 1.3545189830888118, "grad_norm": 1.2120689153671265, "learning_rate": 9.890052631578947e-05, "loss": 0.429, "step": 24189 }, { "epoch": 1.3545749804009408, "grad_norm": 1.3751366138458252, "learning_rate": 9.890026315789475e-05, "loss": 0.4037, "step": 24190 }, { "epoch": 1.3546309777130698, "grad_norm": 1.38014817237854, "learning_rate": 9.89e-05, "loss": 0.5004, "step": 24191 }, { "epoch": 1.3546869750251989, "grad_norm": 1.1251559257507324, "learning_rate": 9.889973684210528e-05, "loss": 0.4815, "step": 24192 }, { "epoch": 1.3547429723373279, "grad_norm": 1.3140547275543213, "learning_rate": 9.889947368421052e-05, "loss": 0.5415, "step": 24193 }, { "epoch": 1.354798969649457, "grad_norm": 1.2560465335845947, "learning_rate": 9.88992105263158e-05, "loss": 0.4763, "step": 24194 }, { "epoch": 1.354854966961586, "grad_norm": 1.240114450454712, "learning_rate": 9.889894736842106e-05, "loss": 0.4366, "step": 24195 }, { "epoch": 1.354910964273715, "grad_norm": 1.2777663469314575, "learning_rate": 9.889868421052632e-05, "loss": 0.3919, "step": 24196 }, { "epoch": 1.354966961585844, "grad_norm": 1.5230880975723267, "learning_rate": 9.889842105263159e-05, "loss": 0.5446, "step": 24197 }, { "epoch": 1.355022958897973, "grad_norm": 1.1917310953140259, "learning_rate": 9.889815789473684e-05, "loss": 0.5723, "step": 24198 }, { "epoch": 1.355078956210102, "grad_norm": 1.3890283107757568, "learning_rate": 9.889789473684211e-05, "loss": 0.6764, "step": 24199 }, { "epoch": 1.355134953522231, "grad_norm": 1.5664749145507812, "learning_rate": 9.889763157894737e-05, "loss": 0.5056, "step": 24200 }, { "epoch": 1.35519095083436, "grad_norm": 1.706183671951294, "learning_rate": 9.889736842105264e-05, "loss": 0.4927, "step": 24201 }, { "epoch": 1.355246948146489, "grad_norm": 1.0862797498703003, "learning_rate": 9.88971052631579e-05, "loss": 0.3748, "step": 24202 }, { "epoch": 1.355302945458618, "grad_norm": 1.3727495670318604, "learning_rate": 9.889684210526316e-05, "loss": 0.4898, "step": 24203 }, { "epoch": 1.355358942770747, "grad_norm": 1.635277271270752, "learning_rate": 9.889657894736842e-05, "loss": 0.4993, "step": 24204 }, { "epoch": 1.355414940082876, "grad_norm": 1.2087209224700928, "learning_rate": 9.88963157894737e-05, "loss": 0.3636, "step": 24205 }, { "epoch": 1.3554709373950051, "grad_norm": 1.111750602722168, "learning_rate": 9.889605263157896e-05, "loss": 0.4543, "step": 24206 }, { "epoch": 1.3555269347071341, "grad_norm": 1.292447805404663, "learning_rate": 9.889578947368422e-05, "loss": 0.4484, "step": 24207 }, { "epoch": 1.3555829320192632, "grad_norm": 1.3316152095794678, "learning_rate": 9.889552631578947e-05, "loss": 0.3604, "step": 24208 }, { "epoch": 1.3556389293313922, "grad_norm": 1.314098834991455, "learning_rate": 9.889526315789475e-05, "loss": 0.3969, "step": 24209 }, { "epoch": 1.3556949266435212, "grad_norm": 1.5935890674591064, "learning_rate": 9.889500000000001e-05, "loss": 0.4482, "step": 24210 }, { "epoch": 1.3557509239556502, "grad_norm": 1.6971734762191772, "learning_rate": 9.889473684210527e-05, "loss": 0.4667, "step": 24211 }, { "epoch": 1.3558069212677792, "grad_norm": 1.528926134109497, "learning_rate": 9.889447368421053e-05, "loss": 0.4533, "step": 24212 }, { "epoch": 1.3558629185799083, "grad_norm": 1.1695584058761597, "learning_rate": 9.889421052631579e-05, "loss": 0.3628, "step": 24213 }, { "epoch": 1.3559189158920373, "grad_norm": 1.1592373847961426, "learning_rate": 9.889394736842106e-05, "loss": 0.4212, "step": 24214 }, { "epoch": 1.3559749132041663, "grad_norm": 1.202193021774292, "learning_rate": 9.889368421052632e-05, "loss": 0.4012, "step": 24215 }, { "epoch": 1.3560309105162953, "grad_norm": 1.3646345138549805, "learning_rate": 9.889342105263158e-05, "loss": 0.3992, "step": 24216 }, { "epoch": 1.3560869078284243, "grad_norm": 1.2469518184661865, "learning_rate": 9.889315789473684e-05, "loss": 0.5593, "step": 24217 }, { "epoch": 1.3561429051405534, "grad_norm": 1.3526692390441895, "learning_rate": 9.889289473684211e-05, "loss": 0.434, "step": 24218 }, { "epoch": 1.3561989024526824, "grad_norm": 1.6600333452224731, "learning_rate": 9.889263157894737e-05, "loss": 0.456, "step": 24219 }, { "epoch": 1.3562548997648114, "grad_norm": 1.4503874778747559, "learning_rate": 9.889236842105265e-05, "loss": 0.4817, "step": 24220 }, { "epoch": 1.3563108970769404, "grad_norm": 1.2051103115081787, "learning_rate": 9.889210526315789e-05, "loss": 0.48, "step": 24221 }, { "epoch": 1.3563668943890694, "grad_norm": 1.310248613357544, "learning_rate": 9.889184210526317e-05, "loss": 0.4754, "step": 24222 }, { "epoch": 1.3564228917011985, "grad_norm": 1.2252604961395264, "learning_rate": 9.889157894736842e-05, "loss": 0.4161, "step": 24223 }, { "epoch": 1.3564788890133275, "grad_norm": 1.4256223440170288, "learning_rate": 9.88913157894737e-05, "loss": 0.6532, "step": 24224 }, { "epoch": 1.3565348863254565, "grad_norm": 1.3690801858901978, "learning_rate": 9.889105263157894e-05, "loss": 0.5856, "step": 24225 }, { "epoch": 1.3565908836375855, "grad_norm": 1.3578503131866455, "learning_rate": 9.889078947368422e-05, "loss": 0.4091, "step": 24226 }, { "epoch": 1.3566468809497145, "grad_norm": 1.3648658990859985, "learning_rate": 9.889052631578948e-05, "loss": 0.5009, "step": 24227 }, { "epoch": 1.3567028782618435, "grad_norm": 1.122082233428955, "learning_rate": 9.889026315789475e-05, "loss": 0.4603, "step": 24228 }, { "epoch": 1.3567588755739726, "grad_norm": 1.250191330909729, "learning_rate": 9.889000000000001e-05, "loss": 0.4719, "step": 24229 }, { "epoch": 1.3568148728861016, "grad_norm": 1.492835283279419, "learning_rate": 9.888973684210526e-05, "loss": 0.3728, "step": 24230 }, { "epoch": 1.3568708701982306, "grad_norm": 1.8283497095108032, "learning_rate": 9.888947368421053e-05, "loss": 0.5613, "step": 24231 }, { "epoch": 1.3569268675103596, "grad_norm": 1.3256876468658447, "learning_rate": 9.888921052631579e-05, "loss": 0.458, "step": 24232 }, { "epoch": 1.3569828648224886, "grad_norm": 1.3450658321380615, "learning_rate": 9.888894736842106e-05, "loss": 0.5543, "step": 24233 }, { "epoch": 1.3570388621346177, "grad_norm": 1.1495815515518188, "learning_rate": 9.888868421052632e-05, "loss": 0.3296, "step": 24234 }, { "epoch": 1.3570948594467467, "grad_norm": 1.409485936164856, "learning_rate": 9.888842105263158e-05, "loss": 0.5735, "step": 24235 }, { "epoch": 1.3571508567588757, "grad_norm": 1.1729849576950073, "learning_rate": 9.888815789473684e-05, "loss": 0.4933, "step": 24236 }, { "epoch": 1.3572068540710047, "grad_norm": 1.3659942150115967, "learning_rate": 9.888789473684212e-05, "loss": 0.5493, "step": 24237 }, { "epoch": 1.3572628513831337, "grad_norm": 1.3520770072937012, "learning_rate": 9.888763157894737e-05, "loss": 0.5127, "step": 24238 }, { "epoch": 1.3573188486952628, "grad_norm": 1.4395102262496948, "learning_rate": 9.888736842105263e-05, "loss": 0.3862, "step": 24239 }, { "epoch": 1.3573748460073918, "grad_norm": 1.3926860094070435, "learning_rate": 9.88871052631579e-05, "loss": 0.476, "step": 24240 }, { "epoch": 1.3574308433195208, "grad_norm": 1.5400936603546143, "learning_rate": 9.888684210526317e-05, "loss": 0.415, "step": 24241 }, { "epoch": 1.3574868406316498, "grad_norm": 1.6989082098007202, "learning_rate": 9.888657894736843e-05, "loss": 0.4737, "step": 24242 }, { "epoch": 1.3575428379437788, "grad_norm": 1.441806674003601, "learning_rate": 9.888631578947369e-05, "loss": 0.3868, "step": 24243 }, { "epoch": 1.3575988352559079, "grad_norm": 1.4132057428359985, "learning_rate": 9.888605263157895e-05, "loss": 0.536, "step": 24244 }, { "epoch": 1.3576548325680367, "grad_norm": 1.4496694803237915, "learning_rate": 9.888578947368422e-05, "loss": 0.4312, "step": 24245 }, { "epoch": 1.3577108298801657, "grad_norm": 1.1595571041107178, "learning_rate": 9.888552631578948e-05, "loss": 0.3974, "step": 24246 }, { "epoch": 1.3577668271922947, "grad_norm": 1.4537824392318726, "learning_rate": 9.888526315789474e-05, "loss": 0.5266, "step": 24247 }, { "epoch": 1.3578228245044237, "grad_norm": 1.4500279426574707, "learning_rate": 9.8885e-05, "loss": 0.5608, "step": 24248 }, { "epoch": 1.3578788218165527, "grad_norm": 1.2889269590377808, "learning_rate": 9.888473684210526e-05, "loss": 0.4108, "step": 24249 }, { "epoch": 1.3579348191286817, "grad_norm": 1.3143959045410156, "learning_rate": 9.888447368421053e-05, "loss": 0.4612, "step": 24250 }, { "epoch": 1.3579908164408108, "grad_norm": 1.5159499645233154, "learning_rate": 9.888421052631579e-05, "loss": 0.5644, "step": 24251 }, { "epoch": 1.3580468137529398, "grad_norm": 1.3953338861465454, "learning_rate": 9.888394736842107e-05, "loss": 0.4103, "step": 24252 }, { "epoch": 1.3581028110650688, "grad_norm": 1.1751587390899658, "learning_rate": 9.888368421052631e-05, "loss": 0.4934, "step": 24253 }, { "epoch": 1.3581588083771978, "grad_norm": 1.4721421003341675, "learning_rate": 9.888342105263158e-05, "loss": 0.5593, "step": 24254 }, { "epoch": 1.3582148056893268, "grad_norm": 1.2335082292556763, "learning_rate": 9.888315789473684e-05, "loss": 0.5028, "step": 24255 }, { "epoch": 1.3582708030014559, "grad_norm": 1.506395697593689, "learning_rate": 9.888289473684212e-05, "loss": 0.4373, "step": 24256 }, { "epoch": 1.3583268003135849, "grad_norm": 1.208866000175476, "learning_rate": 9.888263157894738e-05, "loss": 0.4574, "step": 24257 }, { "epoch": 1.358382797625714, "grad_norm": 1.3434886932373047, "learning_rate": 9.888236842105264e-05, "loss": 0.5081, "step": 24258 }, { "epoch": 1.358438794937843, "grad_norm": 1.523879051208496, "learning_rate": 9.88821052631579e-05, "loss": 0.4654, "step": 24259 }, { "epoch": 1.358494792249972, "grad_norm": 1.5192832946777344, "learning_rate": 9.888184210526317e-05, "loss": 0.3732, "step": 24260 }, { "epoch": 1.358550789562101, "grad_norm": 1.497955560684204, "learning_rate": 9.888157894736843e-05, "loss": 0.4321, "step": 24261 }, { "epoch": 1.35860678687423, "grad_norm": 1.3706817626953125, "learning_rate": 9.888131578947369e-05, "loss": 0.5732, "step": 24262 }, { "epoch": 1.358662784186359, "grad_norm": 1.3490325212478638, "learning_rate": 9.888105263157895e-05, "loss": 0.4374, "step": 24263 }, { "epoch": 1.358718781498488, "grad_norm": 1.118749737739563, "learning_rate": 9.888078947368421e-05, "loss": 0.3573, "step": 24264 }, { "epoch": 1.358774778810617, "grad_norm": 1.126299262046814, "learning_rate": 9.888052631578948e-05, "loss": 0.2995, "step": 24265 }, { "epoch": 1.358830776122746, "grad_norm": 1.1920883655548096, "learning_rate": 9.888026315789474e-05, "loss": 0.4185, "step": 24266 }, { "epoch": 1.358886773434875, "grad_norm": 1.254758358001709, "learning_rate": 9.888e-05, "loss": 0.4563, "step": 24267 }, { "epoch": 1.358942770747004, "grad_norm": 1.0732206106185913, "learning_rate": 9.887973684210526e-05, "loss": 0.3767, "step": 24268 }, { "epoch": 1.358998768059133, "grad_norm": 1.278638243675232, "learning_rate": 9.887947368421053e-05, "loss": 0.4268, "step": 24269 }, { "epoch": 1.3590547653712621, "grad_norm": 1.693253517150879, "learning_rate": 9.88792105263158e-05, "loss": 0.4124, "step": 24270 }, { "epoch": 1.3591107626833911, "grad_norm": 1.4540988206863403, "learning_rate": 9.887894736842105e-05, "loss": 0.4139, "step": 24271 }, { "epoch": 1.3591667599955202, "grad_norm": 1.2501556873321533, "learning_rate": 9.887868421052631e-05, "loss": 0.467, "step": 24272 }, { "epoch": 1.3592227573076492, "grad_norm": 1.3515464067459106, "learning_rate": 9.887842105263159e-05, "loss": 0.5135, "step": 24273 }, { "epoch": 1.3592787546197782, "grad_norm": 1.2665053606033325, "learning_rate": 9.887815789473685e-05, "loss": 0.3851, "step": 24274 }, { "epoch": 1.3593347519319072, "grad_norm": 1.5510942935943604, "learning_rate": 9.887789473684212e-05, "loss": 0.5612, "step": 24275 }, { "epoch": 1.3593907492440362, "grad_norm": 1.9409633874893188, "learning_rate": 9.887763157894737e-05, "loss": 0.6694, "step": 24276 }, { "epoch": 1.3594467465561653, "grad_norm": 1.3130944967269897, "learning_rate": 9.887736842105264e-05, "loss": 0.4047, "step": 24277 }, { "epoch": 1.3595027438682943, "grad_norm": 1.4870585203170776, "learning_rate": 9.88771052631579e-05, "loss": 0.4797, "step": 24278 }, { "epoch": 1.3595587411804233, "grad_norm": 1.2149924039840698, "learning_rate": 9.887684210526317e-05, "loss": 0.3805, "step": 24279 }, { "epoch": 1.3596147384925523, "grad_norm": 1.4774595499038696, "learning_rate": 9.887657894736842e-05, "loss": 0.5419, "step": 24280 }, { "epoch": 1.3596707358046813, "grad_norm": 1.419573187828064, "learning_rate": 9.887631578947368e-05, "loss": 0.5009, "step": 24281 }, { "epoch": 1.3597267331168104, "grad_norm": 1.3416260480880737, "learning_rate": 9.887605263157895e-05, "loss": 0.4692, "step": 24282 }, { "epoch": 1.3597827304289394, "grad_norm": 1.3263635635375977, "learning_rate": 9.887578947368421e-05, "loss": 0.4656, "step": 24283 }, { "epoch": 1.3598387277410684, "grad_norm": 1.96281099319458, "learning_rate": 9.887552631578949e-05, "loss": 0.4804, "step": 24284 }, { "epoch": 1.3598947250531974, "grad_norm": 1.3477357625961304, "learning_rate": 9.887526315789473e-05, "loss": 0.4723, "step": 24285 }, { "epoch": 1.3599507223653264, "grad_norm": 1.3089745044708252, "learning_rate": 9.8875e-05, "loss": 0.5166, "step": 24286 }, { "epoch": 1.3600067196774555, "grad_norm": 1.3642199039459229, "learning_rate": 9.887473684210526e-05, "loss": 0.5577, "step": 24287 }, { "epoch": 1.3600627169895845, "grad_norm": 1.3563685417175293, "learning_rate": 9.887447368421054e-05, "loss": 0.6061, "step": 24288 }, { "epoch": 1.3601187143017135, "grad_norm": 1.2317285537719727, "learning_rate": 9.88742105263158e-05, "loss": 0.3844, "step": 24289 }, { "epoch": 1.3601747116138425, "grad_norm": 1.2608413696289062, "learning_rate": 9.887394736842106e-05, "loss": 0.5228, "step": 24290 }, { "epoch": 1.3602307089259715, "grad_norm": 1.5067768096923828, "learning_rate": 9.887368421052632e-05, "loss": 0.4704, "step": 24291 }, { "epoch": 1.3602867062381006, "grad_norm": 1.5356553792953491, "learning_rate": 9.887342105263159e-05, "loss": 0.7033, "step": 24292 }, { "epoch": 1.3603427035502296, "grad_norm": 1.1460360288619995, "learning_rate": 9.887315789473685e-05, "loss": 0.399, "step": 24293 }, { "epoch": 1.3603987008623586, "grad_norm": 1.5921638011932373, "learning_rate": 9.887289473684211e-05, "loss": 0.5124, "step": 24294 }, { "epoch": 1.3604546981744876, "grad_norm": 1.4019817113876343, "learning_rate": 9.887263157894737e-05, "loss": 0.4948, "step": 24295 }, { "epoch": 1.3605106954866166, "grad_norm": 1.5591027736663818, "learning_rate": 9.887236842105264e-05, "loss": 0.4862, "step": 24296 }, { "epoch": 1.3605666927987456, "grad_norm": 1.374293565750122, "learning_rate": 9.88721052631579e-05, "loss": 0.5368, "step": 24297 }, { "epoch": 1.3606226901108747, "grad_norm": 1.49769926071167, "learning_rate": 9.887184210526316e-05, "loss": 0.55, "step": 24298 }, { "epoch": 1.3606786874230037, "grad_norm": 1.1991252899169922, "learning_rate": 9.887157894736842e-05, "loss": 0.4687, "step": 24299 }, { "epoch": 1.3607346847351327, "grad_norm": 1.3198788166046143, "learning_rate": 9.887131578947368e-05, "loss": 0.4489, "step": 24300 }, { "epoch": 1.3607906820472617, "grad_norm": 1.2617061138153076, "learning_rate": 9.887105263157895e-05, "loss": 0.395, "step": 24301 }, { "epoch": 1.3608466793593907, "grad_norm": 1.3370567560195923, "learning_rate": 9.887078947368421e-05, "loss": 0.3999, "step": 24302 }, { "epoch": 1.3609026766715198, "grad_norm": 1.3850817680358887, "learning_rate": 9.887052631578947e-05, "loss": 0.5725, "step": 24303 }, { "epoch": 1.3609586739836488, "grad_norm": 1.5199081897735596, "learning_rate": 9.887026315789473e-05, "loss": 0.4016, "step": 24304 }, { "epoch": 1.3610146712957778, "grad_norm": 1.3331574201583862, "learning_rate": 9.887000000000001e-05, "loss": 0.4695, "step": 24305 }, { "epoch": 1.3610706686079068, "grad_norm": 1.2357096672058105, "learning_rate": 9.886973684210527e-05, "loss": 0.4227, "step": 24306 }, { "epoch": 1.3611266659200358, "grad_norm": 1.3251912593841553, "learning_rate": 9.886947368421054e-05, "loss": 0.4537, "step": 24307 }, { "epoch": 1.3611826632321649, "grad_norm": 1.1950063705444336, "learning_rate": 9.886921052631579e-05, "loss": 0.4806, "step": 24308 }, { "epoch": 1.3612386605442939, "grad_norm": 1.2474392652511597, "learning_rate": 9.886894736842106e-05, "loss": 0.4358, "step": 24309 }, { "epoch": 1.361294657856423, "grad_norm": 1.0946778059005737, "learning_rate": 9.886868421052632e-05, "loss": 0.3485, "step": 24310 }, { "epoch": 1.361350655168552, "grad_norm": 1.3305790424346924, "learning_rate": 9.886842105263159e-05, "loss": 0.3922, "step": 24311 }, { "epoch": 1.361406652480681, "grad_norm": 1.1710745096206665, "learning_rate": 9.886815789473685e-05, "loss": 0.3931, "step": 24312 }, { "epoch": 1.36146264979281, "grad_norm": 1.5039604902267456, "learning_rate": 9.886789473684211e-05, "loss": 0.4597, "step": 24313 }, { "epoch": 1.361518647104939, "grad_norm": 1.3262319564819336, "learning_rate": 9.886763157894737e-05, "loss": 0.5, "step": 24314 }, { "epoch": 1.361574644417068, "grad_norm": 1.3019124269485474, "learning_rate": 9.886736842105265e-05, "loss": 0.5419, "step": 24315 }, { "epoch": 1.361630641729197, "grad_norm": 1.5411773920059204, "learning_rate": 9.88671052631579e-05, "loss": 0.4171, "step": 24316 }, { "epoch": 1.361686639041326, "grad_norm": 1.4485447406768799, "learning_rate": 9.886684210526315e-05, "loss": 0.4519, "step": 24317 }, { "epoch": 1.361742636353455, "grad_norm": 1.3841382265090942, "learning_rate": 9.886657894736842e-05, "loss": 0.4242, "step": 24318 }, { "epoch": 1.361798633665584, "grad_norm": 1.1673097610473633, "learning_rate": 9.886631578947368e-05, "loss": 0.4415, "step": 24319 }, { "epoch": 1.361854630977713, "grad_norm": 1.6136304140090942, "learning_rate": 9.886605263157896e-05, "loss": 0.546, "step": 24320 }, { "epoch": 1.361910628289842, "grad_norm": 1.567229151725769, "learning_rate": 9.886578947368422e-05, "loss": 0.4276, "step": 24321 }, { "epoch": 1.3619666256019711, "grad_norm": 1.3506273031234741, "learning_rate": 9.886552631578948e-05, "loss": 0.4994, "step": 24322 }, { "epoch": 1.3620226229141001, "grad_norm": 1.3055624961853027, "learning_rate": 9.886526315789474e-05, "loss": 0.347, "step": 24323 }, { "epoch": 1.3620786202262292, "grad_norm": 1.38615882396698, "learning_rate": 9.886500000000001e-05, "loss": 0.4291, "step": 24324 }, { "epoch": 1.3621346175383582, "grad_norm": 1.3798707723617554, "learning_rate": 9.886473684210527e-05, "loss": 0.4276, "step": 24325 }, { "epoch": 1.3621906148504872, "grad_norm": 1.1843464374542236, "learning_rate": 9.886447368421053e-05, "loss": 0.4777, "step": 24326 }, { "epoch": 1.3622466121626162, "grad_norm": 1.2293568849563599, "learning_rate": 9.886421052631579e-05, "loss": 0.448, "step": 24327 }, { "epoch": 1.3623026094747452, "grad_norm": 1.2945222854614258, "learning_rate": 9.886394736842106e-05, "loss": 0.4568, "step": 24328 }, { "epoch": 1.3623586067868743, "grad_norm": 1.490443229675293, "learning_rate": 9.886368421052632e-05, "loss": 0.6576, "step": 24329 }, { "epoch": 1.3624146040990033, "grad_norm": 1.3081691265106201, "learning_rate": 9.88634210526316e-05, "loss": 0.516, "step": 24330 }, { "epoch": 1.3624706014111323, "grad_norm": 2.057906150817871, "learning_rate": 9.886315789473684e-05, "loss": 0.4726, "step": 24331 }, { "epoch": 1.3625265987232613, "grad_norm": 1.1586756706237793, "learning_rate": 9.886289473684211e-05, "loss": 0.364, "step": 24332 }, { "epoch": 1.3625825960353903, "grad_norm": 1.5228568315505981, "learning_rate": 9.886263157894737e-05, "loss": 0.5054, "step": 24333 }, { "epoch": 1.3626385933475194, "grad_norm": 1.2080132961273193, "learning_rate": 9.886236842105263e-05, "loss": 0.5405, "step": 24334 }, { "epoch": 1.3626945906596484, "grad_norm": 1.8775131702423096, "learning_rate": 9.88621052631579e-05, "loss": 0.5727, "step": 24335 }, { "epoch": 1.3627505879717774, "grad_norm": 1.4475866556167603, "learning_rate": 9.886184210526315e-05, "loss": 0.4302, "step": 24336 }, { "epoch": 1.3628065852839064, "grad_norm": 1.6076793670654297, "learning_rate": 9.886157894736843e-05, "loss": 0.5248, "step": 24337 }, { "epoch": 1.3628625825960354, "grad_norm": 1.269997000694275, "learning_rate": 9.886131578947369e-05, "loss": 0.3485, "step": 24338 }, { "epoch": 1.3629185799081645, "grad_norm": 1.2178103923797607, "learning_rate": 9.886105263157896e-05, "loss": 0.4373, "step": 24339 }, { "epoch": 1.3629745772202935, "grad_norm": 1.101955771446228, "learning_rate": 9.88607894736842e-05, "loss": 0.3823, "step": 24340 }, { "epoch": 1.3630305745324225, "grad_norm": 1.411412239074707, "learning_rate": 9.886052631578948e-05, "loss": 0.5211, "step": 24341 }, { "epoch": 1.3630865718445515, "grad_norm": 1.3933584690093994, "learning_rate": 9.886026315789474e-05, "loss": 0.5002, "step": 24342 }, { "epoch": 1.3631425691566805, "grad_norm": 1.3537825345993042, "learning_rate": 9.886000000000001e-05, "loss": 0.5249, "step": 24343 }, { "epoch": 1.3631985664688095, "grad_norm": 1.3923225402832031, "learning_rate": 9.885973684210527e-05, "loss": 0.4003, "step": 24344 }, { "epoch": 1.3632545637809386, "grad_norm": 1.1837642192840576, "learning_rate": 9.885947368421053e-05, "loss": 0.4657, "step": 24345 }, { "epoch": 1.3633105610930676, "grad_norm": 1.1448791027069092, "learning_rate": 9.885921052631579e-05, "loss": 0.4039, "step": 24346 }, { "epoch": 1.3633665584051966, "grad_norm": 1.3756505250930786, "learning_rate": 9.885894736842106e-05, "loss": 0.4797, "step": 24347 }, { "epoch": 1.3634225557173256, "grad_norm": 1.3280857801437378, "learning_rate": 9.885868421052632e-05, "loss": 0.4247, "step": 24348 }, { "epoch": 1.3634785530294546, "grad_norm": 1.4541010856628418, "learning_rate": 9.885842105263158e-05, "loss": 0.4844, "step": 24349 }, { "epoch": 1.3635345503415837, "grad_norm": 1.3154367208480835, "learning_rate": 9.885815789473684e-05, "loss": 0.5767, "step": 24350 }, { "epoch": 1.3635905476537127, "grad_norm": 1.5184102058410645, "learning_rate": 9.88578947368421e-05, "loss": 0.5135, "step": 24351 }, { "epoch": 1.3636465449658417, "grad_norm": 1.2757196426391602, "learning_rate": 9.885763157894738e-05, "loss": 0.4336, "step": 24352 }, { "epoch": 1.3637025422779707, "grad_norm": 1.3980156183242798, "learning_rate": 9.885736842105264e-05, "loss": 0.4931, "step": 24353 }, { "epoch": 1.3637585395900997, "grad_norm": 1.666050672531128, "learning_rate": 9.88571052631579e-05, "loss": 0.3957, "step": 24354 }, { "epoch": 1.3638145369022288, "grad_norm": 1.250495195388794, "learning_rate": 9.885684210526316e-05, "loss": 0.4734, "step": 24355 }, { "epoch": 1.3638705342143578, "grad_norm": 1.1907325983047485, "learning_rate": 9.885657894736843e-05, "loss": 0.4164, "step": 24356 }, { "epoch": 1.3639265315264868, "grad_norm": 1.5041922330856323, "learning_rate": 9.885631578947369e-05, "loss": 0.5274, "step": 24357 }, { "epoch": 1.3639825288386158, "grad_norm": 1.2572085857391357, "learning_rate": 9.885605263157895e-05, "loss": 0.4697, "step": 24358 }, { "epoch": 1.3640385261507448, "grad_norm": 1.5445501804351807, "learning_rate": 9.885578947368421e-05, "loss": 0.5151, "step": 24359 }, { "epoch": 1.3640945234628739, "grad_norm": 1.4259023666381836, "learning_rate": 9.885552631578948e-05, "loss": 0.5375, "step": 24360 }, { "epoch": 1.3641505207750029, "grad_norm": 1.1657230854034424, "learning_rate": 9.885526315789474e-05, "loss": 0.4456, "step": 24361 }, { "epoch": 1.364206518087132, "grad_norm": 1.5526230335235596, "learning_rate": 9.885500000000001e-05, "loss": 0.4723, "step": 24362 }, { "epoch": 1.364262515399261, "grad_norm": 1.306484580039978, "learning_rate": 9.885473684210526e-05, "loss": 0.4282, "step": 24363 }, { "epoch": 1.36431851271139, "grad_norm": 1.1856343746185303, "learning_rate": 9.885447368421053e-05, "loss": 0.4246, "step": 24364 }, { "epoch": 1.364374510023519, "grad_norm": 1.329923391342163, "learning_rate": 9.88542105263158e-05, "loss": 0.502, "step": 24365 }, { "epoch": 1.364430507335648, "grad_norm": 1.2879289388656616, "learning_rate": 9.885394736842107e-05, "loss": 0.4894, "step": 24366 }, { "epoch": 1.364486504647777, "grad_norm": 1.3006231784820557, "learning_rate": 9.885368421052633e-05, "loss": 0.462, "step": 24367 }, { "epoch": 1.364542501959906, "grad_norm": 1.650084376335144, "learning_rate": 9.885342105263157e-05, "loss": 0.387, "step": 24368 }, { "epoch": 1.3645984992720348, "grad_norm": 1.4683609008789062, "learning_rate": 9.885315789473685e-05, "loss": 0.3361, "step": 24369 }, { "epoch": 1.3646544965841638, "grad_norm": 1.5280910730361938, "learning_rate": 9.88528947368421e-05, "loss": 0.4098, "step": 24370 }, { "epoch": 1.3647104938962928, "grad_norm": 1.3542401790618896, "learning_rate": 9.885263157894738e-05, "loss": 0.4193, "step": 24371 }, { "epoch": 1.3647664912084219, "grad_norm": 1.2905116081237793, "learning_rate": 9.885236842105263e-05, "loss": 0.4167, "step": 24372 }, { "epoch": 1.3648224885205509, "grad_norm": 1.6238502264022827, "learning_rate": 9.88521052631579e-05, "loss": 0.495, "step": 24373 }, { "epoch": 1.36487848583268, "grad_norm": 1.3055897951126099, "learning_rate": 9.885184210526316e-05, "loss": 0.4439, "step": 24374 }, { "epoch": 1.364934483144809, "grad_norm": 1.3446067571640015, "learning_rate": 9.885157894736843e-05, "loss": 0.3404, "step": 24375 }, { "epoch": 1.364990480456938, "grad_norm": 1.355931043624878, "learning_rate": 9.885131578947369e-05, "loss": 0.4637, "step": 24376 }, { "epoch": 1.365046477769067, "grad_norm": 1.3010797500610352, "learning_rate": 9.885105263157895e-05, "loss": 0.457, "step": 24377 }, { "epoch": 1.365102475081196, "grad_norm": 1.3977206945419312, "learning_rate": 9.885078947368421e-05, "loss": 0.5038, "step": 24378 }, { "epoch": 1.365158472393325, "grad_norm": 1.1111963987350464, "learning_rate": 9.885052631578948e-05, "loss": 0.3715, "step": 24379 }, { "epoch": 1.365214469705454, "grad_norm": 1.1828562021255493, "learning_rate": 9.885026315789474e-05, "loss": 0.4013, "step": 24380 }, { "epoch": 1.365270467017583, "grad_norm": 1.5111533403396606, "learning_rate": 9.885e-05, "loss": 0.4277, "step": 24381 }, { "epoch": 1.365326464329712, "grad_norm": 24.50279426574707, "learning_rate": 9.884973684210526e-05, "loss": 0.5584, "step": 24382 }, { "epoch": 1.365382461641841, "grad_norm": 1.3131940364837646, "learning_rate": 9.884947368421054e-05, "loss": 0.3771, "step": 24383 }, { "epoch": 1.36543845895397, "grad_norm": 1.5126020908355713, "learning_rate": 9.88492105263158e-05, "loss": 0.5313, "step": 24384 }, { "epoch": 1.365494456266099, "grad_norm": 1.2867248058319092, "learning_rate": 9.884894736842106e-05, "loss": 0.4346, "step": 24385 }, { "epoch": 1.3655504535782281, "grad_norm": 1.7722824811935425, "learning_rate": 9.884868421052632e-05, "loss": 0.4468, "step": 24386 }, { "epoch": 1.3656064508903571, "grad_norm": 1.28387451171875, "learning_rate": 9.884842105263158e-05, "loss": 0.4015, "step": 24387 }, { "epoch": 1.3656624482024862, "grad_norm": 1.6635466814041138, "learning_rate": 9.884815789473685e-05, "loss": 0.4431, "step": 24388 }, { "epoch": 1.3657184455146152, "grad_norm": 1.243938684463501, "learning_rate": 9.884789473684211e-05, "loss": 0.47, "step": 24389 }, { "epoch": 1.3657744428267442, "grad_norm": 1.4119457006454468, "learning_rate": 9.884763157894737e-05, "loss": 0.5061, "step": 24390 }, { "epoch": 1.3658304401388732, "grad_norm": 1.2198487520217896, "learning_rate": 9.884736842105263e-05, "loss": 0.3484, "step": 24391 }, { "epoch": 1.3658864374510022, "grad_norm": 1.2817981243133545, "learning_rate": 9.88471052631579e-05, "loss": 0.5075, "step": 24392 }, { "epoch": 1.3659424347631313, "grad_norm": 1.5079666376113892, "learning_rate": 9.884684210526316e-05, "loss": 0.5416, "step": 24393 }, { "epoch": 1.3659984320752603, "grad_norm": 1.1374844312667847, "learning_rate": 9.884657894736843e-05, "loss": 0.3744, "step": 24394 }, { "epoch": 1.3660544293873893, "grad_norm": 1.438244342803955, "learning_rate": 9.884631578947368e-05, "loss": 0.4617, "step": 24395 }, { "epoch": 1.3661104266995183, "grad_norm": 1.3679901361465454, "learning_rate": 9.884605263157895e-05, "loss": 0.4839, "step": 24396 }, { "epoch": 1.3661664240116473, "grad_norm": 1.2486059665679932, "learning_rate": 9.884578947368421e-05, "loss": 0.4152, "step": 24397 }, { "epoch": 1.3662224213237764, "grad_norm": 1.423294186592102, "learning_rate": 9.884552631578949e-05, "loss": 0.4701, "step": 24398 }, { "epoch": 1.3662784186359054, "grad_norm": 1.3104181289672852, "learning_rate": 9.884526315789475e-05, "loss": 0.5171, "step": 24399 }, { "epoch": 1.3663344159480344, "grad_norm": 1.3045990467071533, "learning_rate": 9.8845e-05, "loss": 0.4298, "step": 24400 }, { "epoch": 1.3663904132601634, "grad_norm": 1.4200360774993896, "learning_rate": 9.884473684210527e-05, "loss": 0.5425, "step": 24401 }, { "epoch": 1.3664464105722924, "grad_norm": 1.6482840776443481, "learning_rate": 9.884447368421053e-05, "loss": 0.644, "step": 24402 }, { "epoch": 1.3665024078844215, "grad_norm": 1.2006323337554932, "learning_rate": 9.88442105263158e-05, "loss": 0.4565, "step": 24403 }, { "epoch": 1.3665584051965505, "grad_norm": 1.4098336696624756, "learning_rate": 9.884394736842106e-05, "loss": 0.4197, "step": 24404 }, { "epoch": 1.3666144025086795, "grad_norm": 1.4508843421936035, "learning_rate": 9.884368421052632e-05, "loss": 0.5249, "step": 24405 }, { "epoch": 1.3666703998208085, "grad_norm": 1.574019432067871, "learning_rate": 9.884342105263158e-05, "loss": 0.6243, "step": 24406 }, { "epoch": 1.3667263971329375, "grad_norm": 1.6158201694488525, "learning_rate": 9.884315789473685e-05, "loss": 0.5481, "step": 24407 }, { "epoch": 1.3667823944450666, "grad_norm": 1.360234260559082, "learning_rate": 9.884289473684211e-05, "loss": 0.5952, "step": 24408 }, { "epoch": 1.3668383917571956, "grad_norm": 1.18491530418396, "learning_rate": 9.884263157894737e-05, "loss": 0.2932, "step": 24409 }, { "epoch": 1.3668943890693246, "grad_norm": 1.4678871631622314, "learning_rate": 9.884236842105263e-05, "loss": 0.4682, "step": 24410 }, { "epoch": 1.3669503863814536, "grad_norm": 1.540473222732544, "learning_rate": 9.88421052631579e-05, "loss": 0.4994, "step": 24411 }, { "epoch": 1.3670063836935826, "grad_norm": 1.4704207181930542, "learning_rate": 9.884184210526316e-05, "loss": 0.4684, "step": 24412 }, { "epoch": 1.3670623810057116, "grad_norm": 1.3927764892578125, "learning_rate": 9.884157894736842e-05, "loss": 0.4538, "step": 24413 }, { "epoch": 1.3671183783178407, "grad_norm": 1.0648525953292847, "learning_rate": 9.884131578947368e-05, "loss": 0.3678, "step": 24414 }, { "epoch": 1.3671743756299697, "grad_norm": 1.3720592260360718, "learning_rate": 9.884105263157896e-05, "loss": 0.4999, "step": 24415 }, { "epoch": 1.3672303729420987, "grad_norm": 1.8324408531188965, "learning_rate": 9.884078947368422e-05, "loss": 0.5443, "step": 24416 }, { "epoch": 1.3672863702542277, "grad_norm": 1.3010083436965942, "learning_rate": 9.884052631578949e-05, "loss": 0.3828, "step": 24417 }, { "epoch": 1.3673423675663567, "grad_norm": 1.4595146179199219, "learning_rate": 9.884026315789474e-05, "loss": 0.6761, "step": 24418 }, { "epoch": 1.3673983648784858, "grad_norm": 1.4448872804641724, "learning_rate": 9.884e-05, "loss": 0.4775, "step": 24419 }, { "epoch": 1.3674543621906148, "grad_norm": 2.1411805152893066, "learning_rate": 9.883973684210527e-05, "loss": 0.7041, "step": 24420 }, { "epoch": 1.3675103595027438, "grad_norm": 1.5478935241699219, "learning_rate": 9.883947368421053e-05, "loss": 0.503, "step": 24421 }, { "epoch": 1.3675663568148728, "grad_norm": 1.2933239936828613, "learning_rate": 9.88392105263158e-05, "loss": 0.3763, "step": 24422 }, { "epoch": 1.3676223541270018, "grad_norm": 1.6128108501434326, "learning_rate": 9.883894736842105e-05, "loss": 0.5932, "step": 24423 }, { "epoch": 1.3676783514391309, "grad_norm": 1.8574882745742798, "learning_rate": 9.883868421052632e-05, "loss": 0.4911, "step": 24424 }, { "epoch": 1.3677343487512599, "grad_norm": 1.3461345434188843, "learning_rate": 9.883842105263158e-05, "loss": 0.4497, "step": 24425 }, { "epoch": 1.367790346063389, "grad_norm": 1.1728650331497192, "learning_rate": 9.883815789473685e-05, "loss": 0.3946, "step": 24426 }, { "epoch": 1.367846343375518, "grad_norm": 1.2892390489578247, "learning_rate": 9.88378947368421e-05, "loss": 0.4278, "step": 24427 }, { "epoch": 1.367902340687647, "grad_norm": 1.397377848625183, "learning_rate": 9.883763157894737e-05, "loss": 0.3746, "step": 24428 }, { "epoch": 1.367958337999776, "grad_norm": 1.3317534923553467, "learning_rate": 9.883736842105263e-05, "loss": 0.449, "step": 24429 }, { "epoch": 1.368014335311905, "grad_norm": 1.370968222618103, "learning_rate": 9.883710526315791e-05, "loss": 0.5822, "step": 24430 }, { "epoch": 1.368070332624034, "grad_norm": 1.4074839353561401, "learning_rate": 9.883684210526317e-05, "loss": 0.4826, "step": 24431 }, { "epoch": 1.368126329936163, "grad_norm": 1.3753305673599243, "learning_rate": 9.883657894736843e-05, "loss": 0.3796, "step": 24432 }, { "epoch": 1.368182327248292, "grad_norm": 1.620547890663147, "learning_rate": 9.883631578947369e-05, "loss": 0.5574, "step": 24433 }, { "epoch": 1.368238324560421, "grad_norm": 1.3297902345657349, "learning_rate": 9.883605263157896e-05, "loss": 0.4992, "step": 24434 }, { "epoch": 1.36829432187255, "grad_norm": 1.27519953250885, "learning_rate": 9.883578947368422e-05, "loss": 0.4256, "step": 24435 }, { "epoch": 1.368350319184679, "grad_norm": 1.8830041885375977, "learning_rate": 9.883552631578948e-05, "loss": 0.5545, "step": 24436 }, { "epoch": 1.368406316496808, "grad_norm": 1.4125512838363647, "learning_rate": 9.883526315789474e-05, "loss": 0.4259, "step": 24437 }, { "epoch": 1.3684623138089371, "grad_norm": 1.3391048908233643, "learning_rate": 9.8835e-05, "loss": 0.4256, "step": 24438 }, { "epoch": 1.3685183111210661, "grad_norm": 1.4954277276992798, "learning_rate": 9.883473684210527e-05, "loss": 0.5805, "step": 24439 }, { "epoch": 1.3685743084331952, "grad_norm": 1.4033195972442627, "learning_rate": 9.883447368421053e-05, "loss": 0.4628, "step": 24440 }, { "epoch": 1.3686303057453242, "grad_norm": 1.6419473886489868, "learning_rate": 9.883421052631579e-05, "loss": 0.4202, "step": 24441 }, { "epoch": 1.3686863030574532, "grad_norm": 1.9852774143218994, "learning_rate": 9.883394736842105e-05, "loss": 0.5977, "step": 24442 }, { "epoch": 1.3687423003695822, "grad_norm": 1.5373849868774414, "learning_rate": 9.883368421052632e-05, "loss": 0.7519, "step": 24443 }, { "epoch": 1.3687982976817112, "grad_norm": 1.4142190217971802, "learning_rate": 9.883342105263158e-05, "loss": 0.5456, "step": 24444 }, { "epoch": 1.3688542949938403, "grad_norm": 1.35452139377594, "learning_rate": 9.883315789473684e-05, "loss": 0.5445, "step": 24445 }, { "epoch": 1.3689102923059693, "grad_norm": 1.2676734924316406, "learning_rate": 9.88328947368421e-05, "loss": 0.3916, "step": 24446 }, { "epoch": 1.3689662896180983, "grad_norm": 1.472924828529358, "learning_rate": 9.883263157894738e-05, "loss": 0.5168, "step": 24447 }, { "epoch": 1.3690222869302273, "grad_norm": 1.5486114025115967, "learning_rate": 9.883236842105264e-05, "loss": 0.5228, "step": 24448 }, { "epoch": 1.3690782842423563, "grad_norm": 1.091369390487671, "learning_rate": 9.883210526315791e-05, "loss": 0.3869, "step": 24449 }, { "epoch": 1.3691342815544854, "grad_norm": 1.6277631521224976, "learning_rate": 9.883184210526316e-05, "loss": 0.4969, "step": 24450 }, { "epoch": 1.3691902788666144, "grad_norm": 1.6347336769104004, "learning_rate": 9.883157894736843e-05, "loss": 0.5751, "step": 24451 }, { "epoch": 1.3692462761787434, "grad_norm": 1.3268325328826904, "learning_rate": 9.883131578947369e-05, "loss": 0.3474, "step": 24452 }, { "epoch": 1.3693022734908724, "grad_norm": 1.3420542478561401, "learning_rate": 9.883105263157896e-05, "loss": 0.4211, "step": 24453 }, { "epoch": 1.3693582708030014, "grad_norm": 1.2873342037200928, "learning_rate": 9.883078947368422e-05, "loss": 0.5331, "step": 24454 }, { "epoch": 1.3694142681151305, "grad_norm": 1.4083830118179321, "learning_rate": 9.883052631578947e-05, "loss": 0.4748, "step": 24455 }, { "epoch": 1.3694702654272595, "grad_norm": 2.098632335662842, "learning_rate": 9.883026315789474e-05, "loss": 0.5454, "step": 24456 }, { "epoch": 1.3695262627393885, "grad_norm": 1.594950795173645, "learning_rate": 9.883e-05, "loss": 0.5077, "step": 24457 }, { "epoch": 1.3695822600515175, "grad_norm": 1.342111349105835, "learning_rate": 9.882973684210527e-05, "loss": 0.5786, "step": 24458 }, { "epoch": 1.3696382573636465, "grad_norm": 1.401429295539856, "learning_rate": 9.882947368421053e-05, "loss": 0.4681, "step": 24459 }, { "epoch": 1.3696942546757755, "grad_norm": 2.9357070922851562, "learning_rate": 9.88292105263158e-05, "loss": 0.6699, "step": 24460 }, { "epoch": 1.3697502519879046, "grad_norm": 1.313418984413147, "learning_rate": 9.882894736842105e-05, "loss": 0.4755, "step": 24461 }, { "epoch": 1.3698062493000336, "grad_norm": 1.44545316696167, "learning_rate": 9.882868421052633e-05, "loss": 0.4548, "step": 24462 }, { "epoch": 1.3698622466121626, "grad_norm": 1.3986786603927612, "learning_rate": 9.882842105263159e-05, "loss": 0.4006, "step": 24463 }, { "epoch": 1.3699182439242916, "grad_norm": 1.4873632192611694, "learning_rate": 9.882815789473685e-05, "loss": 0.5852, "step": 24464 }, { "epoch": 1.3699742412364206, "grad_norm": 1.25263512134552, "learning_rate": 9.88278947368421e-05, "loss": 0.3935, "step": 24465 }, { "epoch": 1.3700302385485497, "grad_norm": 1.2605167627334595, "learning_rate": 9.882763157894738e-05, "loss": 0.5369, "step": 24466 }, { "epoch": 1.3700862358606787, "grad_norm": 1.7462189197540283, "learning_rate": 9.882736842105264e-05, "loss": 0.5562, "step": 24467 }, { "epoch": 1.3701422331728077, "grad_norm": 1.29815673828125, "learning_rate": 9.88271052631579e-05, "loss": 0.4363, "step": 24468 }, { "epoch": 1.3701982304849367, "grad_norm": 1.4782339334487915, "learning_rate": 9.882684210526316e-05, "loss": 0.4121, "step": 24469 }, { "epoch": 1.3702542277970657, "grad_norm": 1.279172420501709, "learning_rate": 9.882657894736843e-05, "loss": 0.4359, "step": 24470 }, { "epoch": 1.3703102251091948, "grad_norm": 1.3147929906845093, "learning_rate": 9.882631578947369e-05, "loss": 0.4926, "step": 24471 }, { "epoch": 1.3703662224213238, "grad_norm": 1.593000888824463, "learning_rate": 9.882605263157895e-05, "loss": 0.43, "step": 24472 }, { "epoch": 1.3704222197334528, "grad_norm": 1.2197630405426025, "learning_rate": 9.882578947368421e-05, "loss": 0.424, "step": 24473 }, { "epoch": 1.3704782170455818, "grad_norm": 1.2020893096923828, "learning_rate": 9.882552631578947e-05, "loss": 0.4941, "step": 24474 }, { "epoch": 1.3705342143577108, "grad_norm": 1.323865532875061, "learning_rate": 9.882526315789474e-05, "loss": 0.6674, "step": 24475 }, { "epoch": 1.3705902116698399, "grad_norm": 1.4042918682098389, "learning_rate": 9.8825e-05, "loss": 0.3982, "step": 24476 }, { "epoch": 1.3706462089819689, "grad_norm": 1.2864902019500732, "learning_rate": 9.882473684210528e-05, "loss": 0.4238, "step": 24477 }, { "epoch": 1.370702206294098, "grad_norm": 1.6968425512313843, "learning_rate": 9.882447368421052e-05, "loss": 0.4565, "step": 24478 }, { "epoch": 1.370758203606227, "grad_norm": 1.2619678974151611, "learning_rate": 9.88242105263158e-05, "loss": 0.4263, "step": 24479 }, { "epoch": 1.370814200918356, "grad_norm": 1.5142074823379517, "learning_rate": 9.882394736842106e-05, "loss": 0.4625, "step": 24480 }, { "epoch": 1.370870198230485, "grad_norm": 1.8756927251815796, "learning_rate": 9.882368421052633e-05, "loss": 0.4626, "step": 24481 }, { "epoch": 1.370926195542614, "grad_norm": 1.309545874595642, "learning_rate": 9.882342105263158e-05, "loss": 0.4355, "step": 24482 }, { "epoch": 1.370982192854743, "grad_norm": 1.2559274435043335, "learning_rate": 9.882315789473685e-05, "loss": 0.4347, "step": 24483 }, { "epoch": 1.371038190166872, "grad_norm": 1.3520760536193848, "learning_rate": 9.882289473684211e-05, "loss": 0.4774, "step": 24484 }, { "epoch": 1.371094187479001, "grad_norm": 1.803807258605957, "learning_rate": 9.882263157894738e-05, "loss": 0.5191, "step": 24485 }, { "epoch": 1.37115018479113, "grad_norm": 1.264050006866455, "learning_rate": 9.882236842105264e-05, "loss": 0.42, "step": 24486 }, { "epoch": 1.371206182103259, "grad_norm": 1.4715521335601807, "learning_rate": 9.88221052631579e-05, "loss": 0.453, "step": 24487 }, { "epoch": 1.371262179415388, "grad_norm": 1.6300288438796997, "learning_rate": 9.882184210526316e-05, "loss": 0.4245, "step": 24488 }, { "epoch": 1.371318176727517, "grad_norm": 1.3610649108886719, "learning_rate": 9.882157894736842e-05, "loss": 0.5405, "step": 24489 }, { "epoch": 1.3713741740396461, "grad_norm": 1.3018198013305664, "learning_rate": 9.88213157894737e-05, "loss": 0.392, "step": 24490 }, { "epoch": 1.3714301713517751, "grad_norm": 1.224164366722107, "learning_rate": 9.882105263157895e-05, "loss": 0.478, "step": 24491 }, { "epoch": 1.3714861686639042, "grad_norm": 1.346011757850647, "learning_rate": 9.882078947368421e-05, "loss": 0.5077, "step": 24492 }, { "epoch": 1.3715421659760332, "grad_norm": 1.1635698080062866, "learning_rate": 9.882052631578947e-05, "loss": 0.4368, "step": 24493 }, { "epoch": 1.3715981632881622, "grad_norm": 0.9612644910812378, "learning_rate": 9.882026315789475e-05, "loss": 0.3163, "step": 24494 }, { "epoch": 1.3716541606002912, "grad_norm": 1.6849596500396729, "learning_rate": 9.882e-05, "loss": 0.4784, "step": 24495 }, { "epoch": 1.3717101579124202, "grad_norm": 1.464515209197998, "learning_rate": 9.881973684210527e-05, "loss": 0.5673, "step": 24496 }, { "epoch": 1.3717661552245493, "grad_norm": 1.300506830215454, "learning_rate": 9.881947368421053e-05, "loss": 0.4394, "step": 24497 }, { "epoch": 1.3718221525366783, "grad_norm": 1.3180797100067139, "learning_rate": 9.88192105263158e-05, "loss": 0.5123, "step": 24498 }, { "epoch": 1.3718781498488073, "grad_norm": 1.2333238124847412, "learning_rate": 9.881894736842106e-05, "loss": 0.4677, "step": 24499 }, { "epoch": 1.3719341471609363, "grad_norm": 1.2898578643798828, "learning_rate": 9.881868421052632e-05, "loss": 0.4185, "step": 24500 }, { "epoch": 1.3719901444730653, "grad_norm": 1.4654231071472168, "learning_rate": 9.881842105263158e-05, "loss": 0.5147, "step": 24501 }, { "epoch": 1.3720461417851944, "grad_norm": 1.352738618850708, "learning_rate": 9.881815789473685e-05, "loss": 0.5083, "step": 24502 }, { "epoch": 1.3721021390973234, "grad_norm": 1.1011550426483154, "learning_rate": 9.881789473684211e-05, "loss": 0.385, "step": 24503 }, { "epoch": 1.3721581364094524, "grad_norm": 3.091196060180664, "learning_rate": 9.881763157894738e-05, "loss": 0.4052, "step": 24504 }, { "epoch": 1.3722141337215814, "grad_norm": 1.392540454864502, "learning_rate": 9.881736842105263e-05, "loss": 0.4687, "step": 24505 }, { "epoch": 1.3722701310337104, "grad_norm": 1.606001853942871, "learning_rate": 9.881710526315789e-05, "loss": 0.4993, "step": 24506 }, { "epoch": 1.3723261283458394, "grad_norm": 1.2238637208938599, "learning_rate": 9.881684210526316e-05, "loss": 0.3676, "step": 24507 }, { "epoch": 1.3723821256579685, "grad_norm": 1.2948689460754395, "learning_rate": 9.881657894736842e-05, "loss": 0.5334, "step": 24508 }, { "epoch": 1.3724381229700975, "grad_norm": 1.380072832107544, "learning_rate": 9.88163157894737e-05, "loss": 0.4704, "step": 24509 }, { "epoch": 1.3724941202822265, "grad_norm": 1.105183482170105, "learning_rate": 9.881605263157894e-05, "loss": 0.4189, "step": 24510 }, { "epoch": 1.3725501175943555, "grad_norm": 1.1319661140441895, "learning_rate": 9.881578947368422e-05, "loss": 0.4072, "step": 24511 }, { "epoch": 1.3726061149064845, "grad_norm": 2.206179141998291, "learning_rate": 9.881552631578948e-05, "loss": 0.6171, "step": 24512 }, { "epoch": 1.3726621122186136, "grad_norm": 1.2761162519454956, "learning_rate": 9.881526315789475e-05, "loss": 0.3865, "step": 24513 }, { "epoch": 1.3727181095307426, "grad_norm": 1.4989609718322754, "learning_rate": 9.881500000000001e-05, "loss": 0.5733, "step": 24514 }, { "epoch": 1.3727741068428716, "grad_norm": 1.320183277130127, "learning_rate": 9.881473684210527e-05, "loss": 0.475, "step": 24515 }, { "epoch": 1.3728301041550006, "grad_norm": 1.1859266757965088, "learning_rate": 9.881447368421053e-05, "loss": 0.4993, "step": 24516 }, { "epoch": 1.3728861014671296, "grad_norm": 1.1573539972305298, "learning_rate": 9.88142105263158e-05, "loss": 0.3842, "step": 24517 }, { "epoch": 1.3729420987792587, "grad_norm": 1.4029028415679932, "learning_rate": 9.881394736842106e-05, "loss": 0.5271, "step": 24518 }, { "epoch": 1.3729980960913877, "grad_norm": 1.2326328754425049, "learning_rate": 9.881368421052632e-05, "loss": 0.4335, "step": 24519 }, { "epoch": 1.3730540934035167, "grad_norm": 3.316587209701538, "learning_rate": 9.881342105263158e-05, "loss": 0.5095, "step": 24520 }, { "epoch": 1.3731100907156457, "grad_norm": 1.5112648010253906, "learning_rate": 9.881315789473685e-05, "loss": 0.4672, "step": 24521 }, { "epoch": 1.3731660880277747, "grad_norm": 1.3780750036239624, "learning_rate": 9.881289473684211e-05, "loss": 0.446, "step": 24522 }, { "epoch": 1.3732220853399038, "grad_norm": 1.4295034408569336, "learning_rate": 9.881263157894737e-05, "loss": 0.3854, "step": 24523 }, { "epoch": 1.3732780826520328, "grad_norm": 1.6485295295715332, "learning_rate": 9.881236842105263e-05, "loss": 0.5406, "step": 24524 }, { "epoch": 1.3733340799641618, "grad_norm": 1.5437777042388916, "learning_rate": 9.881210526315789e-05, "loss": 0.6657, "step": 24525 }, { "epoch": 1.3733900772762908, "grad_norm": 1.3259806632995605, "learning_rate": 9.881184210526317e-05, "loss": 0.4393, "step": 24526 }, { "epoch": 1.3734460745884198, "grad_norm": 1.273875117301941, "learning_rate": 9.881157894736843e-05, "loss": 0.5589, "step": 24527 }, { "epoch": 1.3735020719005488, "grad_norm": 5.459080696105957, "learning_rate": 9.881131578947369e-05, "loss": 0.3946, "step": 24528 }, { "epoch": 1.3735580692126779, "grad_norm": 1.2246806621551514, "learning_rate": 9.881105263157895e-05, "loss": 0.4394, "step": 24529 }, { "epoch": 1.3736140665248069, "grad_norm": 1.2419131994247437, "learning_rate": 9.881078947368422e-05, "loss": 0.5603, "step": 24530 }, { "epoch": 1.373670063836936, "grad_norm": 1.3171749114990234, "learning_rate": 9.881052631578948e-05, "loss": 0.4098, "step": 24531 }, { "epoch": 1.373726061149065, "grad_norm": 1.3591086864471436, "learning_rate": 9.881026315789475e-05, "loss": 0.3953, "step": 24532 }, { "epoch": 1.373782058461194, "grad_norm": 1.4800583124160767, "learning_rate": 9.881e-05, "loss": 0.3976, "step": 24533 }, { "epoch": 1.373838055773323, "grad_norm": 1.4704041481018066, "learning_rate": 9.880973684210527e-05, "loss": 0.4332, "step": 24534 }, { "epoch": 1.373894053085452, "grad_norm": 1.4346450567245483, "learning_rate": 9.880947368421053e-05, "loss": 0.6352, "step": 24535 }, { "epoch": 1.373950050397581, "grad_norm": 1.7150028944015503, "learning_rate": 9.88092105263158e-05, "loss": 0.5394, "step": 24536 }, { "epoch": 1.37400604770971, "grad_norm": 1.4494582414627075, "learning_rate": 9.880894736842105e-05, "loss": 0.4575, "step": 24537 }, { "epoch": 1.374062045021839, "grad_norm": 1.6348971128463745, "learning_rate": 9.880868421052632e-05, "loss": 0.5308, "step": 24538 }, { "epoch": 1.374118042333968, "grad_norm": 1.4942302703857422, "learning_rate": 9.880842105263158e-05, "loss": 0.4096, "step": 24539 }, { "epoch": 1.374174039646097, "grad_norm": 1.2634156942367554, "learning_rate": 9.880815789473684e-05, "loss": 0.465, "step": 24540 }, { "epoch": 1.374230036958226, "grad_norm": 54.6109504699707, "learning_rate": 9.880789473684212e-05, "loss": 0.4818, "step": 24541 }, { "epoch": 1.3742860342703551, "grad_norm": 1.678976058959961, "learning_rate": 9.880763157894736e-05, "loss": 0.4527, "step": 24542 }, { "epoch": 1.3743420315824841, "grad_norm": 1.4438743591308594, "learning_rate": 9.880736842105264e-05, "loss": 0.4749, "step": 24543 }, { "epoch": 1.3743980288946132, "grad_norm": 1.649899959564209, "learning_rate": 9.88071052631579e-05, "loss": 0.3697, "step": 24544 }, { "epoch": 1.3744540262067422, "grad_norm": 1.311410903930664, "learning_rate": 9.880684210526317e-05, "loss": 0.3974, "step": 24545 }, { "epoch": 1.3745100235188712, "grad_norm": 1.3957568407058716, "learning_rate": 9.880657894736843e-05, "loss": 0.5027, "step": 24546 }, { "epoch": 1.3745660208310002, "grad_norm": 3.369692802429199, "learning_rate": 9.880631578947369e-05, "loss": 0.5027, "step": 24547 }, { "epoch": 1.3746220181431292, "grad_norm": 1.3832881450653076, "learning_rate": 9.880605263157895e-05, "loss": 0.4475, "step": 24548 }, { "epoch": 1.3746780154552583, "grad_norm": 1.0688807964324951, "learning_rate": 9.880578947368422e-05, "loss": 0.3709, "step": 24549 }, { "epoch": 1.3747340127673873, "grad_norm": 1.4251041412353516, "learning_rate": 9.880552631578948e-05, "loss": 0.5787, "step": 24550 }, { "epoch": 1.3747900100795163, "grad_norm": 3.9179627895355225, "learning_rate": 9.880526315789474e-05, "loss": 0.5285, "step": 24551 }, { "epoch": 1.3748460073916453, "grad_norm": 1.3410621881484985, "learning_rate": 9.8805e-05, "loss": 0.3814, "step": 24552 }, { "epoch": 1.3749020047037743, "grad_norm": 1.1759706735610962, "learning_rate": 9.880473684210527e-05, "loss": 0.4728, "step": 24553 }, { "epoch": 1.3749580020159033, "grad_norm": 1.222511649131775, "learning_rate": 9.880447368421053e-05, "loss": 0.3362, "step": 24554 }, { "epoch": 1.3750139993280324, "grad_norm": 1.8133656978607178, "learning_rate": 9.880421052631579e-05, "loss": 0.6572, "step": 24555 }, { "epoch": 1.3750699966401614, "grad_norm": 1.2357032299041748, "learning_rate": 9.880394736842105e-05, "loss": 0.4965, "step": 24556 }, { "epoch": 1.3751259939522904, "grad_norm": 1.3727672100067139, "learning_rate": 9.880368421052633e-05, "loss": 0.3667, "step": 24557 }, { "epoch": 1.3751819912644194, "grad_norm": 1.2672778367996216, "learning_rate": 9.880342105263159e-05, "loss": 0.3819, "step": 24558 }, { "epoch": 1.3752379885765484, "grad_norm": 1.537623643875122, "learning_rate": 9.880315789473685e-05, "loss": 0.4472, "step": 24559 }, { "epoch": 1.3752939858886775, "grad_norm": 1.3242238759994507, "learning_rate": 9.88028947368421e-05, "loss": 0.3838, "step": 24560 }, { "epoch": 1.3753499832008065, "grad_norm": 1.2333530187606812, "learning_rate": 9.880263157894736e-05, "loss": 0.4078, "step": 24561 }, { "epoch": 1.3754059805129355, "grad_norm": 1.4914171695709229, "learning_rate": 9.880236842105264e-05, "loss": 0.4885, "step": 24562 }, { "epoch": 1.3754619778250645, "grad_norm": 1.398218035697937, "learning_rate": 9.88021052631579e-05, "loss": 0.404, "step": 24563 }, { "epoch": 1.3755179751371935, "grad_norm": 1.4480798244476318, "learning_rate": 9.880184210526317e-05, "loss": 0.4943, "step": 24564 }, { "epoch": 1.3755739724493226, "grad_norm": 1.5293318033218384, "learning_rate": 9.880157894736842e-05, "loss": 0.5123, "step": 24565 }, { "epoch": 1.3756299697614516, "grad_norm": 2.8959763050079346, "learning_rate": 9.880131578947369e-05, "loss": 0.5938, "step": 24566 }, { "epoch": 1.3756859670735806, "grad_norm": 1.165149211883545, "learning_rate": 9.880105263157895e-05, "loss": 0.43, "step": 24567 }, { "epoch": 1.3757419643857096, "grad_norm": 1.2796895503997803, "learning_rate": 9.880078947368422e-05, "loss": 0.3938, "step": 24568 }, { "epoch": 1.3757979616978386, "grad_norm": 1.3704499006271362, "learning_rate": 9.880052631578948e-05, "loss": 0.4379, "step": 24569 }, { "epoch": 1.3758539590099677, "grad_norm": 1.426423192024231, "learning_rate": 9.880026315789474e-05, "loss": 0.4486, "step": 24570 }, { "epoch": 1.3759099563220967, "grad_norm": 1.323846459388733, "learning_rate": 9.88e-05, "loss": 0.3953, "step": 24571 }, { "epoch": 1.3759659536342257, "grad_norm": 1.355147361755371, "learning_rate": 9.879973684210528e-05, "loss": 0.4667, "step": 24572 }, { "epoch": 1.3760219509463547, "grad_norm": 1.044049620628357, "learning_rate": 9.879947368421054e-05, "loss": 0.374, "step": 24573 }, { "epoch": 1.3760779482584837, "grad_norm": 1.219199538230896, "learning_rate": 9.87992105263158e-05, "loss": 0.4622, "step": 24574 }, { "epoch": 1.3761339455706127, "grad_norm": 1.918542504310608, "learning_rate": 9.879894736842106e-05, "loss": 0.4706, "step": 24575 }, { "epoch": 1.3761899428827415, "grad_norm": 1.292521595954895, "learning_rate": 9.879868421052631e-05, "loss": 0.3774, "step": 24576 }, { "epoch": 1.3762459401948706, "grad_norm": 1.4552044868469238, "learning_rate": 9.879842105263159e-05, "loss": 0.5177, "step": 24577 }, { "epoch": 1.3763019375069996, "grad_norm": 1.1717822551727295, "learning_rate": 9.879815789473685e-05, "loss": 0.5617, "step": 24578 }, { "epoch": 1.3763579348191286, "grad_norm": 1.4983909130096436, "learning_rate": 9.879789473684211e-05, "loss": 0.4669, "step": 24579 }, { "epoch": 1.3764139321312576, "grad_norm": 1.1356526613235474, "learning_rate": 9.879763157894737e-05, "loss": 0.4193, "step": 24580 }, { "epoch": 1.3764699294433866, "grad_norm": 1.1955333948135376, "learning_rate": 9.879736842105264e-05, "loss": 0.3941, "step": 24581 }, { "epoch": 1.3765259267555157, "grad_norm": 1.644252896308899, "learning_rate": 9.87971052631579e-05, "loss": 0.5471, "step": 24582 }, { "epoch": 1.3765819240676447, "grad_norm": 1.354682445526123, "learning_rate": 9.879684210526316e-05, "loss": 0.4126, "step": 24583 }, { "epoch": 1.3766379213797737, "grad_norm": 1.320406436920166, "learning_rate": 9.879657894736842e-05, "loss": 0.4518, "step": 24584 }, { "epoch": 1.3766939186919027, "grad_norm": 1.5978227853775024, "learning_rate": 9.879631578947369e-05, "loss": 0.5316, "step": 24585 }, { "epoch": 1.3767499160040317, "grad_norm": 1.4413739442825317, "learning_rate": 9.879605263157895e-05, "loss": 0.5114, "step": 24586 }, { "epoch": 1.3768059133161608, "grad_norm": 1.6403529644012451, "learning_rate": 9.879578947368421e-05, "loss": 0.5069, "step": 24587 }, { "epoch": 1.3768619106282898, "grad_norm": 2.017141103744507, "learning_rate": 9.879552631578947e-05, "loss": 0.4421, "step": 24588 }, { "epoch": 1.3769179079404188, "grad_norm": 1.1616919040679932, "learning_rate": 9.879526315789475e-05, "loss": 0.3874, "step": 24589 }, { "epoch": 1.3769739052525478, "grad_norm": 1.409917950630188, "learning_rate": 9.8795e-05, "loss": 0.4899, "step": 24590 }, { "epoch": 1.3770299025646768, "grad_norm": 1.1836915016174316, "learning_rate": 9.879473684210528e-05, "loss": 0.4048, "step": 24591 }, { "epoch": 1.3770858998768059, "grad_norm": 1.5157136917114258, "learning_rate": 9.879447368421052e-05, "loss": 0.6242, "step": 24592 }, { "epoch": 1.3771418971889349, "grad_norm": 1.304404616355896, "learning_rate": 9.879421052631578e-05, "loss": 0.4373, "step": 24593 }, { "epoch": 1.377197894501064, "grad_norm": 1.431369423866272, "learning_rate": 9.879394736842106e-05, "loss": 0.4589, "step": 24594 }, { "epoch": 1.377253891813193, "grad_norm": 1.170019507408142, "learning_rate": 9.879368421052632e-05, "loss": 0.456, "step": 24595 }, { "epoch": 1.377309889125322, "grad_norm": 1.325610876083374, "learning_rate": 9.879342105263159e-05, "loss": 0.5171, "step": 24596 }, { "epoch": 1.377365886437451, "grad_norm": 1.6425721645355225, "learning_rate": 9.879315789473684e-05, "loss": 0.5185, "step": 24597 }, { "epoch": 1.37742188374958, "grad_norm": 1.7873594760894775, "learning_rate": 9.879289473684211e-05, "loss": 0.5937, "step": 24598 }, { "epoch": 1.377477881061709, "grad_norm": 1.5284775495529175, "learning_rate": 9.879263157894737e-05, "loss": 0.411, "step": 24599 }, { "epoch": 1.377533878373838, "grad_norm": 1.3774112462997437, "learning_rate": 9.879236842105264e-05, "loss": 0.4618, "step": 24600 }, { "epoch": 1.377589875685967, "grad_norm": 1.327060580253601, "learning_rate": 9.87921052631579e-05, "loss": 0.443, "step": 24601 }, { "epoch": 1.377645872998096, "grad_norm": 1.7170281410217285, "learning_rate": 9.879184210526316e-05, "loss": 0.5056, "step": 24602 }, { "epoch": 1.377701870310225, "grad_norm": 1.513201355934143, "learning_rate": 9.879157894736842e-05, "loss": 0.5068, "step": 24603 }, { "epoch": 1.377757867622354, "grad_norm": 1.3682736158370972, "learning_rate": 9.87913157894737e-05, "loss": 0.566, "step": 24604 }, { "epoch": 1.377813864934483, "grad_norm": 1.3634577989578247, "learning_rate": 9.879105263157896e-05, "loss": 0.4635, "step": 24605 }, { "epoch": 1.3778698622466121, "grad_norm": 1.2694344520568848, "learning_rate": 9.879078947368422e-05, "loss": 0.5066, "step": 24606 }, { "epoch": 1.3779258595587411, "grad_norm": 1.6849581003189087, "learning_rate": 9.879052631578947e-05, "loss": 0.5615, "step": 24607 }, { "epoch": 1.3779818568708702, "grad_norm": 1.1305550336837769, "learning_rate": 9.879026315789475e-05, "loss": 0.5246, "step": 24608 }, { "epoch": 1.3780378541829992, "grad_norm": 1.4769785404205322, "learning_rate": 9.879000000000001e-05, "loss": 0.4707, "step": 24609 }, { "epoch": 1.3780938514951282, "grad_norm": 1.4925611019134521, "learning_rate": 9.878973684210527e-05, "loss": 0.8428, "step": 24610 }, { "epoch": 1.3781498488072572, "grad_norm": 1.2915689945220947, "learning_rate": 9.878947368421053e-05, "loss": 0.4596, "step": 24611 }, { "epoch": 1.3782058461193862, "grad_norm": 1.2616046667099, "learning_rate": 9.878921052631579e-05, "loss": 0.4247, "step": 24612 }, { "epoch": 1.3782618434315153, "grad_norm": 1.432315707206726, "learning_rate": 9.878894736842106e-05, "loss": 0.4922, "step": 24613 }, { "epoch": 1.3783178407436443, "grad_norm": 1.3737187385559082, "learning_rate": 9.878868421052632e-05, "loss": 0.5139, "step": 24614 }, { "epoch": 1.3783738380557733, "grad_norm": 1.4290072917938232, "learning_rate": 9.878842105263158e-05, "loss": 0.3874, "step": 24615 }, { "epoch": 1.3784298353679023, "grad_norm": 1.3011133670806885, "learning_rate": 9.878815789473684e-05, "loss": 0.4629, "step": 24616 }, { "epoch": 1.3784858326800313, "grad_norm": 1.2998813390731812, "learning_rate": 9.878789473684211e-05, "loss": 0.4393, "step": 24617 }, { "epoch": 1.3785418299921604, "grad_norm": 1.4572488069534302, "learning_rate": 9.878763157894737e-05, "loss": 0.4486, "step": 24618 }, { "epoch": 1.3785978273042894, "grad_norm": 1.1944403648376465, "learning_rate": 9.878736842105265e-05, "loss": 0.3721, "step": 24619 }, { "epoch": 1.3786538246164184, "grad_norm": 1.4873815774917603, "learning_rate": 9.878710526315789e-05, "loss": 0.4823, "step": 24620 }, { "epoch": 1.3787098219285474, "grad_norm": 1.6921597719192505, "learning_rate": 9.878684210526317e-05, "loss": 0.5813, "step": 24621 }, { "epoch": 1.3787658192406764, "grad_norm": 1.540161371231079, "learning_rate": 9.878657894736843e-05, "loss": 0.5494, "step": 24622 }, { "epoch": 1.3788218165528054, "grad_norm": 1.5844582319259644, "learning_rate": 9.87863157894737e-05, "loss": 0.5868, "step": 24623 }, { "epoch": 1.3788778138649345, "grad_norm": 1.5227375030517578, "learning_rate": 9.878605263157896e-05, "loss": 0.4472, "step": 24624 }, { "epoch": 1.3789338111770635, "grad_norm": 1.295911431312561, "learning_rate": 9.878578947368422e-05, "loss": 0.434, "step": 24625 }, { "epoch": 1.3789898084891925, "grad_norm": 1.7489311695098877, "learning_rate": 9.878552631578948e-05, "loss": 0.4214, "step": 24626 }, { "epoch": 1.3790458058013215, "grad_norm": 1.0726250410079956, "learning_rate": 9.878526315789474e-05, "loss": 0.3382, "step": 24627 }, { "epoch": 1.3791018031134505, "grad_norm": 1.4631832838058472, "learning_rate": 9.878500000000001e-05, "loss": 0.4638, "step": 24628 }, { "epoch": 1.3791578004255796, "grad_norm": 1.142659068107605, "learning_rate": 9.878473684210526e-05, "loss": 0.351, "step": 24629 }, { "epoch": 1.3792137977377086, "grad_norm": 1.292729377746582, "learning_rate": 9.878447368421053e-05, "loss": 0.3391, "step": 24630 }, { "epoch": 1.3792697950498376, "grad_norm": 1.1582690477371216, "learning_rate": 9.878421052631579e-05, "loss": 0.4104, "step": 24631 }, { "epoch": 1.3793257923619666, "grad_norm": 1.4168444871902466, "learning_rate": 9.878394736842106e-05, "loss": 0.3806, "step": 24632 }, { "epoch": 1.3793817896740956, "grad_norm": 1.4440557956695557, "learning_rate": 9.878368421052632e-05, "loss": 0.5473, "step": 24633 }, { "epoch": 1.3794377869862247, "grad_norm": 1.5434112548828125, "learning_rate": 9.878342105263158e-05, "loss": 0.4289, "step": 24634 }, { "epoch": 1.3794937842983537, "grad_norm": 1.4226053953170776, "learning_rate": 9.878315789473684e-05, "loss": 0.4301, "step": 24635 }, { "epoch": 1.3795497816104827, "grad_norm": 1.3752599954605103, "learning_rate": 9.878289473684212e-05, "loss": 0.583, "step": 24636 }, { "epoch": 1.3796057789226117, "grad_norm": 1.5356531143188477, "learning_rate": 9.878263157894738e-05, "loss": 0.4519, "step": 24637 }, { "epoch": 1.3796617762347407, "grad_norm": 1.1383914947509766, "learning_rate": 9.878236842105263e-05, "loss": 0.4648, "step": 24638 }, { "epoch": 1.3797177735468698, "grad_norm": 1.3420947790145874, "learning_rate": 9.87821052631579e-05, "loss": 0.4335, "step": 24639 }, { "epoch": 1.3797737708589988, "grad_norm": 1.1258677244186401, "learning_rate": 9.878184210526317e-05, "loss": 0.4219, "step": 24640 }, { "epoch": 1.3798297681711278, "grad_norm": 1.1578919887542725, "learning_rate": 9.878157894736843e-05, "loss": 0.4617, "step": 24641 }, { "epoch": 1.3798857654832568, "grad_norm": 1.464247226715088, "learning_rate": 9.878131578947369e-05, "loss": 0.5192, "step": 24642 }, { "epoch": 1.3799417627953858, "grad_norm": 1.4965718984603882, "learning_rate": 9.878105263157895e-05, "loss": 0.4869, "step": 24643 }, { "epoch": 1.3799977601075148, "grad_norm": 2.279975652694702, "learning_rate": 9.878078947368421e-05, "loss": 0.4597, "step": 24644 }, { "epoch": 1.3800537574196439, "grad_norm": 1.3425018787384033, "learning_rate": 9.878052631578948e-05, "loss": 0.3994, "step": 24645 }, { "epoch": 1.3801097547317729, "grad_norm": 1.2083648443222046, "learning_rate": 9.878026315789474e-05, "loss": 0.3594, "step": 24646 }, { "epoch": 1.380165752043902, "grad_norm": 1.3597885370254517, "learning_rate": 9.878e-05, "loss": 0.4676, "step": 24647 }, { "epoch": 1.380221749356031, "grad_norm": 1.4435182809829712, "learning_rate": 9.877973684210526e-05, "loss": 0.4462, "step": 24648 }, { "epoch": 1.38027774666816, "grad_norm": 1.4416115283966064, "learning_rate": 9.877947368421053e-05, "loss": 0.5033, "step": 24649 }, { "epoch": 1.380333743980289, "grad_norm": 1.3227752447128296, "learning_rate": 9.877921052631579e-05, "loss": 0.4932, "step": 24650 }, { "epoch": 1.380389741292418, "grad_norm": 1.5899603366851807, "learning_rate": 9.877894736842107e-05, "loss": 0.6036, "step": 24651 }, { "epoch": 1.380445738604547, "grad_norm": 1.2183899879455566, "learning_rate": 9.877868421052631e-05, "loss": 0.4057, "step": 24652 }, { "epoch": 1.380501735916676, "grad_norm": 1.0418287515640259, "learning_rate": 9.877842105263159e-05, "loss": 0.3262, "step": 24653 }, { "epoch": 1.380557733228805, "grad_norm": 1.2916969060897827, "learning_rate": 9.877815789473684e-05, "loss": 0.4956, "step": 24654 }, { "epoch": 1.380613730540934, "grad_norm": 1.5164707899093628, "learning_rate": 9.877789473684212e-05, "loss": 0.46, "step": 24655 }, { "epoch": 1.380669727853063, "grad_norm": 1.257965326309204, "learning_rate": 9.877763157894738e-05, "loss": 0.5249, "step": 24656 }, { "epoch": 1.380725725165192, "grad_norm": 1.462067723274231, "learning_rate": 9.877736842105264e-05, "loss": 0.4827, "step": 24657 }, { "epoch": 1.3807817224773211, "grad_norm": 1.2253189086914062, "learning_rate": 9.87771052631579e-05, "loss": 0.379, "step": 24658 }, { "epoch": 1.3808377197894501, "grad_norm": 1.1978322267532349, "learning_rate": 9.877684210526317e-05, "loss": 0.3814, "step": 24659 }, { "epoch": 1.3808937171015792, "grad_norm": 1.9540668725967407, "learning_rate": 9.877657894736843e-05, "loss": 0.4833, "step": 24660 }, { "epoch": 1.3809497144137082, "grad_norm": 1.3607112169265747, "learning_rate": 9.877631578947369e-05, "loss": 0.5521, "step": 24661 }, { "epoch": 1.3810057117258372, "grad_norm": 1.5534237623214722, "learning_rate": 9.877605263157895e-05, "loss": 0.385, "step": 24662 }, { "epoch": 1.3810617090379662, "grad_norm": 1.330488681793213, "learning_rate": 9.877578947368421e-05, "loss": 0.5468, "step": 24663 }, { "epoch": 1.3811177063500952, "grad_norm": 1.4170063734054565, "learning_rate": 9.877552631578948e-05, "loss": 0.5088, "step": 24664 }, { "epoch": 1.3811737036622243, "grad_norm": 1.3981940746307373, "learning_rate": 9.877526315789474e-05, "loss": 0.5177, "step": 24665 }, { "epoch": 1.3812297009743533, "grad_norm": 1.315423846244812, "learning_rate": 9.8775e-05, "loss": 0.4008, "step": 24666 }, { "epoch": 1.3812856982864823, "grad_norm": 1.325331687927246, "learning_rate": 9.877473684210526e-05, "loss": 0.6356, "step": 24667 }, { "epoch": 1.3813416955986113, "grad_norm": 1.5587759017944336, "learning_rate": 9.877447368421054e-05, "loss": 0.5645, "step": 24668 }, { "epoch": 1.3813976929107403, "grad_norm": 1.4653170108795166, "learning_rate": 9.87742105263158e-05, "loss": 0.5262, "step": 24669 }, { "epoch": 1.3814536902228693, "grad_norm": 1.3540924787521362, "learning_rate": 9.877394736842105e-05, "loss": 0.3624, "step": 24670 }, { "epoch": 1.3815096875349984, "grad_norm": 1.3128352165222168, "learning_rate": 9.877368421052631e-05, "loss": 0.4842, "step": 24671 }, { "epoch": 1.3815656848471274, "grad_norm": 1.0599039793014526, "learning_rate": 9.877342105263159e-05, "loss": 0.3384, "step": 24672 }, { "epoch": 1.3816216821592564, "grad_norm": 1.2542729377746582, "learning_rate": 9.877315789473685e-05, "loss": 0.5512, "step": 24673 }, { "epoch": 1.3816776794713854, "grad_norm": 1.223557710647583, "learning_rate": 9.877289473684212e-05, "loss": 0.4321, "step": 24674 }, { "epoch": 1.3817336767835144, "grad_norm": 1.2568310499191284, "learning_rate": 9.877263157894737e-05, "loss": 0.5891, "step": 24675 }, { "epoch": 1.3817896740956435, "grad_norm": 1.4729034900665283, "learning_rate": 9.877236842105264e-05, "loss": 0.4209, "step": 24676 }, { "epoch": 1.3818456714077725, "grad_norm": 1.715546727180481, "learning_rate": 9.87721052631579e-05, "loss": 0.5552, "step": 24677 }, { "epoch": 1.3819016687199015, "grad_norm": 1.522024154663086, "learning_rate": 9.877184210526317e-05, "loss": 0.4391, "step": 24678 }, { "epoch": 1.3819576660320305, "grad_norm": 1.5103111267089844, "learning_rate": 9.877157894736843e-05, "loss": 0.6572, "step": 24679 }, { "epoch": 1.3820136633441595, "grad_norm": 1.2847040891647339, "learning_rate": 9.877131578947368e-05, "loss": 0.3829, "step": 24680 }, { "epoch": 1.3820696606562886, "grad_norm": 1.3065253496170044, "learning_rate": 9.877105263157895e-05, "loss": 0.364, "step": 24681 }, { "epoch": 1.3821256579684176, "grad_norm": 1.60619056224823, "learning_rate": 9.877078947368421e-05, "loss": 0.4733, "step": 24682 }, { "epoch": 1.3821816552805466, "grad_norm": 1.3917906284332275, "learning_rate": 9.877052631578949e-05, "loss": 0.5068, "step": 24683 }, { "epoch": 1.3822376525926756, "grad_norm": 1.093767523765564, "learning_rate": 9.877026315789473e-05, "loss": 0.3872, "step": 24684 }, { "epoch": 1.3822936499048046, "grad_norm": 1.4227591753005981, "learning_rate": 9.877e-05, "loss": 0.565, "step": 24685 }, { "epoch": 1.3823496472169337, "grad_norm": 1.251604437828064, "learning_rate": 9.876973684210526e-05, "loss": 0.5833, "step": 24686 }, { "epoch": 1.3824056445290627, "grad_norm": 1.1730982065200806, "learning_rate": 9.876947368421054e-05, "loss": 0.4711, "step": 24687 }, { "epoch": 1.3824616418411917, "grad_norm": 1.510393500328064, "learning_rate": 9.87692105263158e-05, "loss": 0.486, "step": 24688 }, { "epoch": 1.3825176391533207, "grad_norm": 1.3401352167129517, "learning_rate": 9.876894736842106e-05, "loss": 0.4406, "step": 24689 }, { "epoch": 1.3825736364654497, "grad_norm": 1.283668041229248, "learning_rate": 9.876868421052632e-05, "loss": 0.5077, "step": 24690 }, { "epoch": 1.3826296337775787, "grad_norm": 1.4496607780456543, "learning_rate": 9.876842105263159e-05, "loss": 0.5748, "step": 24691 }, { "epoch": 1.3826856310897078, "grad_norm": 1.473398208618164, "learning_rate": 9.876815789473685e-05, "loss": 0.4832, "step": 24692 }, { "epoch": 1.3827416284018368, "grad_norm": 1.3518096208572388, "learning_rate": 9.876789473684211e-05, "loss": 0.3901, "step": 24693 }, { "epoch": 1.3827976257139658, "grad_norm": 1.4092508554458618, "learning_rate": 9.876763157894737e-05, "loss": 0.4113, "step": 24694 }, { "epoch": 1.3828536230260948, "grad_norm": 1.2037180662155151, "learning_rate": 9.876736842105264e-05, "loss": 0.435, "step": 24695 }, { "epoch": 1.3829096203382238, "grad_norm": 1.2900842428207397, "learning_rate": 9.87671052631579e-05, "loss": 0.4582, "step": 24696 }, { "epoch": 1.3829656176503529, "grad_norm": 1.3820942640304565, "learning_rate": 9.876684210526316e-05, "loss": 0.5405, "step": 24697 }, { "epoch": 1.3830216149624819, "grad_norm": 1.4084657430648804, "learning_rate": 9.876657894736842e-05, "loss": 0.5001, "step": 24698 }, { "epoch": 1.383077612274611, "grad_norm": NaN, "learning_rate": 9.876657894736842e-05, "loss": 0.5658, "step": 24699 }, { "epoch": 1.3831336095867397, "grad_norm": 1.2814196348190308, "learning_rate": 9.876631578947368e-05, "loss": 0.4785, "step": 24700 }, { "epoch": 1.3831896068988687, "grad_norm": 1.270787239074707, "learning_rate": 9.876605263157895e-05, "loss": 0.4129, "step": 24701 }, { "epoch": 1.3832456042109977, "grad_norm": 1.2655234336853027, "learning_rate": 9.876578947368421e-05, "loss": 0.5402, "step": 24702 }, { "epoch": 1.3833016015231268, "grad_norm": 1.4280146360397339, "learning_rate": 9.876552631578947e-05, "loss": 0.5095, "step": 24703 }, { "epoch": 1.3833575988352558, "grad_norm": 1.4920822381973267, "learning_rate": 9.876526315789473e-05, "loss": 0.6155, "step": 24704 }, { "epoch": 1.3834135961473848, "grad_norm": 1.2366554737091064, "learning_rate": 9.876500000000001e-05, "loss": 0.5197, "step": 24705 }, { "epoch": 1.3834695934595138, "grad_norm": 1.5644608736038208, "learning_rate": 9.876473684210527e-05, "loss": 0.5479, "step": 24706 }, { "epoch": 1.3835255907716428, "grad_norm": 1.2335882186889648, "learning_rate": 9.876447368421054e-05, "loss": 0.3712, "step": 24707 }, { "epoch": 1.3835815880837719, "grad_norm": 1.6657646894454956, "learning_rate": 9.876421052631579e-05, "loss": 0.4121, "step": 24708 }, { "epoch": 1.3836375853959009, "grad_norm": 1.9175894260406494, "learning_rate": 9.876394736842106e-05, "loss": 0.4764, "step": 24709 }, { "epoch": 1.38369358270803, "grad_norm": 1.399888515472412, "learning_rate": 9.876368421052632e-05, "loss": 0.5589, "step": 24710 }, { "epoch": 1.383749580020159, "grad_norm": 1.2780975103378296, "learning_rate": 9.876342105263159e-05, "loss": 0.3981, "step": 24711 }, { "epoch": 1.383805577332288, "grad_norm": 1.5270181894302368, "learning_rate": 9.876315789473685e-05, "loss": 0.3951, "step": 24712 }, { "epoch": 1.383861574644417, "grad_norm": 1.413232684135437, "learning_rate": 9.876289473684211e-05, "loss": 0.5846, "step": 24713 }, { "epoch": 1.383917571956546, "grad_norm": 1.5790107250213623, "learning_rate": 9.876263157894737e-05, "loss": 0.5064, "step": 24714 }, { "epoch": 1.383973569268675, "grad_norm": 1.382285714149475, "learning_rate": 9.876236842105263e-05, "loss": 0.5088, "step": 24715 }, { "epoch": 1.384029566580804, "grad_norm": 1.5090510845184326, "learning_rate": 9.87621052631579e-05, "loss": 0.5117, "step": 24716 }, { "epoch": 1.384085563892933, "grad_norm": 1.3253145217895508, "learning_rate": 9.876184210526316e-05, "loss": 0.4963, "step": 24717 }, { "epoch": 1.384141561205062, "grad_norm": 1.4267219305038452, "learning_rate": 9.876157894736842e-05, "loss": 0.5101, "step": 24718 }, { "epoch": 1.384197558517191, "grad_norm": 1.1575123071670532, "learning_rate": 9.876131578947368e-05, "loss": 0.3897, "step": 24719 }, { "epoch": 1.38425355582932, "grad_norm": 1.4060261249542236, "learning_rate": 9.876105263157896e-05, "loss": 0.3516, "step": 24720 }, { "epoch": 1.384309553141449, "grad_norm": 1.3479305505752563, "learning_rate": 9.876078947368422e-05, "loss": 0.3674, "step": 24721 }, { "epoch": 1.3843655504535781, "grad_norm": 1.3310964107513428, "learning_rate": 9.876052631578948e-05, "loss": 0.435, "step": 24722 }, { "epoch": 1.3844215477657071, "grad_norm": 1.4235334396362305, "learning_rate": 9.876026315789474e-05, "loss": 0.4469, "step": 24723 }, { "epoch": 1.3844775450778362, "grad_norm": 1.3577880859375, "learning_rate": 9.876000000000001e-05, "loss": 0.4412, "step": 24724 }, { "epoch": 1.3845335423899652, "grad_norm": 1.4240859746932983, "learning_rate": 9.875973684210527e-05, "loss": 0.5463, "step": 24725 }, { "epoch": 1.3845895397020942, "grad_norm": 1.590160608291626, "learning_rate": 9.875947368421053e-05, "loss": 0.5334, "step": 24726 }, { "epoch": 1.3846455370142232, "grad_norm": 1.2518236637115479, "learning_rate": 9.875921052631579e-05, "loss": 0.4658, "step": 24727 }, { "epoch": 1.3847015343263522, "grad_norm": 1.2080436944961548, "learning_rate": 9.875894736842106e-05, "loss": 0.4117, "step": 24728 }, { "epoch": 1.3847575316384813, "grad_norm": 1.5010343790054321, "learning_rate": 9.875868421052632e-05, "loss": 0.4611, "step": 24729 }, { "epoch": 1.3848135289506103, "grad_norm": 1.06449294090271, "learning_rate": 9.87584210526316e-05, "loss": 0.3236, "step": 24730 }, { "epoch": 1.3848695262627393, "grad_norm": 1.2299138307571411, "learning_rate": 9.875815789473684e-05, "loss": 0.4756, "step": 24731 }, { "epoch": 1.3849255235748683, "grad_norm": 1.4848288297653198, "learning_rate": 9.87578947368421e-05, "loss": 0.4313, "step": 24732 }, { "epoch": 1.3849815208869973, "grad_norm": 1.2216533422470093, "learning_rate": 9.875763157894737e-05, "loss": 0.5013, "step": 24733 }, { "epoch": 1.3850375181991263, "grad_norm": 1.5379114151000977, "learning_rate": 9.875736842105263e-05, "loss": 0.5764, "step": 24734 }, { "epoch": 1.3850935155112554, "grad_norm": 1.2929130792617798, "learning_rate": 9.875710526315791e-05, "loss": 0.4394, "step": 24735 }, { "epoch": 1.3851495128233844, "grad_norm": 1.3166282176971436, "learning_rate": 9.875684210526315e-05, "loss": 0.3692, "step": 24736 }, { "epoch": 1.3852055101355134, "grad_norm": 1.3195898532867432, "learning_rate": 9.875657894736843e-05, "loss": 0.3983, "step": 24737 }, { "epoch": 1.3852615074476424, "grad_norm": 1.0964648723602295, "learning_rate": 9.875631578947369e-05, "loss": 0.3791, "step": 24738 }, { "epoch": 1.3853175047597714, "grad_norm": 1.157021164894104, "learning_rate": 9.875605263157896e-05, "loss": 0.4123, "step": 24739 }, { "epoch": 1.3853735020719005, "grad_norm": 1.494493842124939, "learning_rate": 9.87557894736842e-05, "loss": 0.5978, "step": 24740 }, { "epoch": 1.3854294993840295, "grad_norm": 1.2963590621948242, "learning_rate": 9.875552631578948e-05, "loss": 0.4444, "step": 24741 }, { "epoch": 1.3854854966961585, "grad_norm": 1.553191900253296, "learning_rate": 9.875526315789474e-05, "loss": 0.5829, "step": 24742 }, { "epoch": 1.3855414940082875, "grad_norm": 1.1810563802719116, "learning_rate": 9.875500000000001e-05, "loss": 0.3162, "step": 24743 }, { "epoch": 1.3855974913204165, "grad_norm": 1.3519984483718872, "learning_rate": 9.875473684210527e-05, "loss": 0.5755, "step": 24744 }, { "epoch": 1.3856534886325456, "grad_norm": 1.3369777202606201, "learning_rate": 9.875447368421053e-05, "loss": 0.5488, "step": 24745 }, { "epoch": 1.3857094859446746, "grad_norm": 1.6594231128692627, "learning_rate": 9.875421052631579e-05, "loss": 0.7104, "step": 24746 }, { "epoch": 1.3857654832568036, "grad_norm": 1.256836175918579, "learning_rate": 9.875394736842106e-05, "loss": 0.403, "step": 24747 }, { "epoch": 1.3858214805689326, "grad_norm": 1.3793790340423584, "learning_rate": 9.875368421052632e-05, "loss": 0.5349, "step": 24748 }, { "epoch": 1.3858774778810616, "grad_norm": 1.877210259437561, "learning_rate": 9.875342105263158e-05, "loss": 0.5041, "step": 24749 }, { "epoch": 1.3859334751931907, "grad_norm": 1.3058441877365112, "learning_rate": 9.875315789473684e-05, "loss": 0.5957, "step": 24750 }, { "epoch": 1.3859894725053197, "grad_norm": 1.1621700525283813, "learning_rate": 9.87528947368421e-05, "loss": 0.3936, "step": 24751 }, { "epoch": 1.3860454698174487, "grad_norm": 1.2263314723968506, "learning_rate": 9.875263157894738e-05, "loss": 0.4816, "step": 24752 }, { "epoch": 1.3861014671295777, "grad_norm": 1.1986078023910522, "learning_rate": 9.875236842105264e-05, "loss": 0.4512, "step": 24753 }, { "epoch": 1.3861574644417067, "grad_norm": 1.5553449392318726, "learning_rate": 9.87521052631579e-05, "loss": 0.4484, "step": 24754 }, { "epoch": 1.3862134617538358, "grad_norm": 1.3770147562026978, "learning_rate": 9.875184210526316e-05, "loss": 0.5147, "step": 24755 }, { "epoch": 1.3862694590659648, "grad_norm": 1.026626706123352, "learning_rate": 9.875157894736843e-05, "loss": 0.323, "step": 24756 }, { "epoch": 1.3863254563780938, "grad_norm": 1.424196481704712, "learning_rate": 9.875131578947369e-05, "loss": 0.5097, "step": 24757 }, { "epoch": 1.3863814536902228, "grad_norm": 1.755611777305603, "learning_rate": 9.875105263157895e-05, "loss": 0.52, "step": 24758 }, { "epoch": 1.3864374510023518, "grad_norm": 1.2150194644927979, "learning_rate": 9.875078947368421e-05, "loss": 0.4028, "step": 24759 }, { "epoch": 1.3864934483144808, "grad_norm": 1.298742413520813, "learning_rate": 9.875052631578948e-05, "loss": 0.4336, "step": 24760 }, { "epoch": 1.3865494456266099, "grad_norm": 1.5085780620574951, "learning_rate": 9.875026315789474e-05, "loss": 0.3935, "step": 24761 }, { "epoch": 1.3866054429387389, "grad_norm": 1.3242181539535522, "learning_rate": 9.875000000000002e-05, "loss": 0.51, "step": 24762 }, { "epoch": 1.386661440250868, "grad_norm": 1.444621205329895, "learning_rate": 9.874973684210526e-05, "loss": 0.4794, "step": 24763 }, { "epoch": 1.386717437562997, "grad_norm": 1.2220877408981323, "learning_rate": 9.874947368421053e-05, "loss": 0.4074, "step": 24764 }, { "epoch": 1.386773434875126, "grad_norm": 1.3091826438903809, "learning_rate": 9.87492105263158e-05, "loss": 0.4074, "step": 24765 }, { "epoch": 1.386829432187255, "grad_norm": 1.3461570739746094, "learning_rate": 9.874894736842105e-05, "loss": 0.4497, "step": 24766 }, { "epoch": 1.386885429499384, "grad_norm": 1.4616492986679077, "learning_rate": 9.874868421052633e-05, "loss": 0.4912, "step": 24767 }, { "epoch": 1.386941426811513, "grad_norm": 1.343125343322754, "learning_rate": 9.874842105263157e-05, "loss": 0.5187, "step": 24768 }, { "epoch": 1.386997424123642, "grad_norm": 1.45669424533844, "learning_rate": 9.874815789473685e-05, "loss": 0.5832, "step": 24769 }, { "epoch": 1.387053421435771, "grad_norm": 1.4133449792861938, "learning_rate": 9.87478947368421e-05, "loss": 0.4095, "step": 24770 }, { "epoch": 1.3871094187479, "grad_norm": 1.8313589096069336, "learning_rate": 9.874763157894738e-05, "loss": 0.6705, "step": 24771 }, { "epoch": 1.387165416060029, "grad_norm": 1.3852347135543823, "learning_rate": 9.874736842105264e-05, "loss": 0.5292, "step": 24772 }, { "epoch": 1.387221413372158, "grad_norm": 1.3958368301391602, "learning_rate": 9.87471052631579e-05, "loss": 0.3874, "step": 24773 }, { "epoch": 1.3872774106842871, "grad_norm": 1.496346354484558, "learning_rate": 9.874684210526316e-05, "loss": 0.5661, "step": 24774 }, { "epoch": 1.3873334079964161, "grad_norm": 1.2413619756698608, "learning_rate": 9.874657894736843e-05, "loss": 0.5856, "step": 24775 }, { "epoch": 1.3873894053085452, "grad_norm": 1.4150477647781372, "learning_rate": 9.874631578947369e-05, "loss": 0.4548, "step": 24776 }, { "epoch": 1.3874454026206742, "grad_norm": 1.2681676149368286, "learning_rate": 9.874605263157895e-05, "loss": 0.5001, "step": 24777 }, { "epoch": 1.3875013999328032, "grad_norm": 1.6500073671340942, "learning_rate": 9.874578947368421e-05, "loss": 0.5182, "step": 24778 }, { "epoch": 1.3875573972449322, "grad_norm": 1.3954724073410034, "learning_rate": 9.874552631578948e-05, "loss": 0.3805, "step": 24779 }, { "epoch": 1.3876133945570612, "grad_norm": 1.540405035018921, "learning_rate": 9.874526315789474e-05, "loss": 0.6096, "step": 24780 }, { "epoch": 1.3876693918691902, "grad_norm": 1.313897728919983, "learning_rate": 9.8745e-05, "loss": 0.4262, "step": 24781 }, { "epoch": 1.3877253891813193, "grad_norm": 1.4382654428482056, "learning_rate": 9.874473684210526e-05, "loss": 0.4548, "step": 24782 }, { "epoch": 1.3877813864934483, "grad_norm": 1.3728103637695312, "learning_rate": 9.874447368421052e-05, "loss": 0.4775, "step": 24783 }, { "epoch": 1.3878373838055773, "grad_norm": 1.2177050113677979, "learning_rate": 9.87442105263158e-05, "loss": 0.4215, "step": 24784 }, { "epoch": 1.3878933811177063, "grad_norm": 1.2666003704071045, "learning_rate": 9.874394736842106e-05, "loss": 0.5811, "step": 24785 }, { "epoch": 1.3879493784298353, "grad_norm": 1.3011101484298706, "learning_rate": 9.874368421052632e-05, "loss": 0.4707, "step": 24786 }, { "epoch": 1.3880053757419644, "grad_norm": 1.4403101205825806, "learning_rate": 9.874342105263158e-05, "loss": 0.5231, "step": 24787 }, { "epoch": 1.3880613730540934, "grad_norm": 1.5996503829956055, "learning_rate": 9.874315789473685e-05, "loss": 0.5047, "step": 24788 }, { "epoch": 1.3881173703662224, "grad_norm": 1.383208155632019, "learning_rate": 9.874289473684211e-05, "loss": 0.4274, "step": 24789 }, { "epoch": 1.3881733676783514, "grad_norm": 1.2859172821044922, "learning_rate": 9.874263157894737e-05, "loss": 0.4462, "step": 24790 }, { "epoch": 1.3882293649904804, "grad_norm": 1.318159818649292, "learning_rate": 9.874236842105263e-05, "loss": 0.4422, "step": 24791 }, { "epoch": 1.3882853623026095, "grad_norm": 1.6016712188720703, "learning_rate": 9.87421052631579e-05, "loss": 0.4696, "step": 24792 }, { "epoch": 1.3883413596147385, "grad_norm": 1.4052642583847046, "learning_rate": 9.874184210526316e-05, "loss": 0.534, "step": 24793 }, { "epoch": 1.3883973569268675, "grad_norm": 1.2752387523651123, "learning_rate": 9.874157894736843e-05, "loss": 0.4008, "step": 24794 }, { "epoch": 1.3884533542389965, "grad_norm": 1.1638542413711548, "learning_rate": 9.874131578947368e-05, "loss": 0.494, "step": 24795 }, { "epoch": 1.3885093515511255, "grad_norm": 1.2391552925109863, "learning_rate": 9.874105263157895e-05, "loss": 0.5531, "step": 24796 }, { "epoch": 1.3885653488632546, "grad_norm": 1.5980744361877441, "learning_rate": 9.874078947368421e-05, "loss": 0.3775, "step": 24797 }, { "epoch": 1.3886213461753836, "grad_norm": 1.2987377643585205, "learning_rate": 9.874052631578949e-05, "loss": 0.448, "step": 24798 }, { "epoch": 1.3886773434875126, "grad_norm": 1.1683251857757568, "learning_rate": 9.874026315789475e-05, "loss": 0.3695, "step": 24799 }, { "epoch": 1.3887333407996416, "grad_norm": 1.540535807609558, "learning_rate": 9.874e-05, "loss": 0.3995, "step": 24800 }, { "epoch": 1.3887893381117706, "grad_norm": 1.7004040479660034, "learning_rate": 9.873973684210527e-05, "loss": 0.6755, "step": 24801 }, { "epoch": 1.3888453354238997, "grad_norm": 1.4696658849716187, "learning_rate": 9.873947368421053e-05, "loss": 0.5824, "step": 24802 }, { "epoch": 1.3889013327360287, "grad_norm": 1.2471377849578857, "learning_rate": 9.87392105263158e-05, "loss": 0.4334, "step": 24803 }, { "epoch": 1.3889573300481577, "grad_norm": 1.641444444656372, "learning_rate": 9.873894736842106e-05, "loss": 0.4461, "step": 24804 }, { "epoch": 1.3890133273602867, "grad_norm": 1.4771136045455933, "learning_rate": 9.873868421052632e-05, "loss": 0.4074, "step": 24805 }, { "epoch": 1.3890693246724157, "grad_norm": 1.2724125385284424, "learning_rate": 9.873842105263158e-05, "loss": 0.4007, "step": 24806 }, { "epoch": 1.3891253219845447, "grad_norm": 1.336830973625183, "learning_rate": 9.873815789473685e-05, "loss": 0.4776, "step": 24807 }, { "epoch": 1.3891813192966738, "grad_norm": 1.6851872205734253, "learning_rate": 9.873789473684211e-05, "loss": 0.5717, "step": 24808 }, { "epoch": 1.3892373166088028, "grad_norm": 1.4466724395751953, "learning_rate": 9.873763157894737e-05, "loss": 0.5386, "step": 24809 }, { "epoch": 1.3892933139209318, "grad_norm": 1.3866513967514038, "learning_rate": 9.873736842105263e-05, "loss": 0.4549, "step": 24810 }, { "epoch": 1.3893493112330608, "grad_norm": 1.4376996755599976, "learning_rate": 9.87371052631579e-05, "loss": 0.5588, "step": 24811 }, { "epoch": 1.3894053085451898, "grad_norm": 1.3783830404281616, "learning_rate": 9.873684210526316e-05, "loss": 0.3829, "step": 24812 }, { "epoch": 1.3894613058573189, "grad_norm": 1.3257275819778442, "learning_rate": 9.873657894736842e-05, "loss": 0.4604, "step": 24813 }, { "epoch": 1.3895173031694479, "grad_norm": 1.4820529222488403, "learning_rate": 9.873631578947368e-05, "loss": 0.4265, "step": 24814 }, { "epoch": 1.389573300481577, "grad_norm": 1.401774287223816, "learning_rate": 9.873605263157896e-05, "loss": 0.4687, "step": 24815 }, { "epoch": 1.389629297793706, "grad_norm": 1.2722257375717163, "learning_rate": 9.873578947368422e-05, "loss": 0.4431, "step": 24816 }, { "epoch": 1.389685295105835, "grad_norm": 1.3507345914840698, "learning_rate": 9.873552631578949e-05, "loss": 0.499, "step": 24817 }, { "epoch": 1.389741292417964, "grad_norm": 1.3066329956054688, "learning_rate": 9.873526315789474e-05, "loss": 0.3926, "step": 24818 }, { "epoch": 1.389797289730093, "grad_norm": 1.2284746170043945, "learning_rate": 9.8735e-05, "loss": 0.4999, "step": 24819 }, { "epoch": 1.389853287042222, "grad_norm": 1.3337175846099854, "learning_rate": 9.873473684210527e-05, "loss": 0.4306, "step": 24820 }, { "epoch": 1.389909284354351, "grad_norm": 1.391411542892456, "learning_rate": 9.873447368421053e-05, "loss": 0.5146, "step": 24821 }, { "epoch": 1.38996528166648, "grad_norm": 1.3370726108551025, "learning_rate": 9.87342105263158e-05, "loss": 0.5088, "step": 24822 }, { "epoch": 1.390021278978609, "grad_norm": 1.313614010810852, "learning_rate": 9.873394736842105e-05, "loss": 0.545, "step": 24823 }, { "epoch": 1.390077276290738, "grad_norm": 1.2633293867111206, "learning_rate": 9.873368421052632e-05, "loss": 0.5053, "step": 24824 }, { "epoch": 1.390133273602867, "grad_norm": 1.53170907497406, "learning_rate": 9.873342105263158e-05, "loss": 0.541, "step": 24825 }, { "epoch": 1.3901892709149961, "grad_norm": 1.2162044048309326, "learning_rate": 9.873315789473685e-05, "loss": 0.4113, "step": 24826 }, { "epoch": 1.3902452682271251, "grad_norm": 1.4812928438186646, "learning_rate": 9.873289473684211e-05, "loss": 0.416, "step": 24827 }, { "epoch": 1.3903012655392541, "grad_norm": 1.2285056114196777, "learning_rate": 9.873263157894737e-05, "loss": 0.304, "step": 24828 }, { "epoch": 1.3903572628513832, "grad_norm": 1.3497986793518066, "learning_rate": 9.873236842105263e-05, "loss": 0.5413, "step": 24829 }, { "epoch": 1.3904132601635122, "grad_norm": 1.410062551498413, "learning_rate": 9.873210526315791e-05, "loss": 0.6767, "step": 24830 }, { "epoch": 1.3904692574756412, "grad_norm": 1.5519541501998901, "learning_rate": 9.873184210526317e-05, "loss": 0.5966, "step": 24831 }, { "epoch": 1.3905252547877702, "grad_norm": 1.116697907447815, "learning_rate": 9.873157894736843e-05, "loss": 0.474, "step": 24832 }, { "epoch": 1.3905812520998992, "grad_norm": 1.367313265800476, "learning_rate": 9.873131578947369e-05, "loss": 0.4658, "step": 24833 }, { "epoch": 1.3906372494120283, "grad_norm": 1.3255162239074707, "learning_rate": 9.873105263157896e-05, "loss": 0.5173, "step": 24834 }, { "epoch": 1.3906932467241573, "grad_norm": 1.3296301364898682, "learning_rate": 9.873078947368422e-05, "loss": 0.5345, "step": 24835 }, { "epoch": 1.3907492440362863, "grad_norm": 1.52505624294281, "learning_rate": 9.873052631578948e-05, "loss": 0.4608, "step": 24836 }, { "epoch": 1.3908052413484153, "grad_norm": 1.3161721229553223, "learning_rate": 9.873026315789474e-05, "loss": 0.5049, "step": 24837 }, { "epoch": 1.3908612386605443, "grad_norm": 1.3209775686264038, "learning_rate": 9.873e-05, "loss": 0.397, "step": 24838 }, { "epoch": 1.3909172359726734, "grad_norm": 1.4917718172073364, "learning_rate": 9.872973684210527e-05, "loss": 0.694, "step": 24839 }, { "epoch": 1.3909732332848024, "grad_norm": 1.4647622108459473, "learning_rate": 9.872947368421053e-05, "loss": 0.6038, "step": 24840 }, { "epoch": 1.3910292305969314, "grad_norm": 1.9479498863220215, "learning_rate": 9.872921052631579e-05, "loss": 0.5725, "step": 24841 }, { "epoch": 1.3910852279090604, "grad_norm": 1.154721736907959, "learning_rate": 9.872894736842105e-05, "loss": 0.3818, "step": 24842 }, { "epoch": 1.3911412252211894, "grad_norm": 1.3792271614074707, "learning_rate": 9.872868421052632e-05, "loss": 0.5345, "step": 24843 }, { "epoch": 1.3911972225333185, "grad_norm": 1.2258814573287964, "learning_rate": 9.872842105263158e-05, "loss": 0.4447, "step": 24844 }, { "epoch": 1.3912532198454475, "grad_norm": 1.2293710708618164, "learning_rate": 9.872815789473684e-05, "loss": 0.4907, "step": 24845 }, { "epoch": 1.3913092171575765, "grad_norm": 1.2616662979125977, "learning_rate": 9.87278947368421e-05, "loss": 0.5773, "step": 24846 }, { "epoch": 1.3913652144697055, "grad_norm": 1.1698933839797974, "learning_rate": 9.872763157894738e-05, "loss": 0.3963, "step": 24847 }, { "epoch": 1.3914212117818345, "grad_norm": 1.3462740182876587, "learning_rate": 9.872736842105264e-05, "loss": 0.4663, "step": 24848 }, { "epoch": 1.3914772090939636, "grad_norm": 1.2720924615859985, "learning_rate": 9.872710526315791e-05, "loss": 0.456, "step": 24849 }, { "epoch": 1.3915332064060926, "grad_norm": 1.2319821119308472, "learning_rate": 9.872684210526316e-05, "loss": 0.4582, "step": 24850 }, { "epoch": 1.3915892037182216, "grad_norm": 1.3817527294158936, "learning_rate": 9.872657894736843e-05, "loss": 0.558, "step": 24851 }, { "epoch": 1.3916452010303506, "grad_norm": 1.1048712730407715, "learning_rate": 9.872631578947369e-05, "loss": 0.405, "step": 24852 }, { "epoch": 1.3917011983424796, "grad_norm": 1.3013241291046143, "learning_rate": 9.872605263157895e-05, "loss": 0.4592, "step": 24853 }, { "epoch": 1.3917571956546086, "grad_norm": 1.7271647453308105, "learning_rate": 9.872578947368422e-05, "loss": 0.5207, "step": 24854 }, { "epoch": 1.3918131929667377, "grad_norm": 1.4949439764022827, "learning_rate": 9.872552631578947e-05, "loss": 0.4057, "step": 24855 }, { "epoch": 1.3918691902788667, "grad_norm": 1.1679201126098633, "learning_rate": 9.872526315789474e-05, "loss": 0.4945, "step": 24856 }, { "epoch": 1.3919251875909957, "grad_norm": 1.4353044033050537, "learning_rate": 9.8725e-05, "loss": 0.4496, "step": 24857 }, { "epoch": 1.3919811849031247, "grad_norm": 1.3244861364364624, "learning_rate": 9.872473684210527e-05, "loss": 0.5183, "step": 24858 }, { "epoch": 1.3920371822152537, "grad_norm": 1.666304588317871, "learning_rate": 9.872447368421053e-05, "loss": 0.4756, "step": 24859 }, { "epoch": 1.3920931795273828, "grad_norm": 1.4096368551254272, "learning_rate": 9.87242105263158e-05, "loss": 0.5145, "step": 24860 }, { "epoch": 1.3921491768395118, "grad_norm": 1.434039831161499, "learning_rate": 9.872394736842105e-05, "loss": 0.3844, "step": 24861 }, { "epoch": 1.3922051741516408, "grad_norm": 1.3839205503463745, "learning_rate": 9.872368421052633e-05, "loss": 0.4218, "step": 24862 }, { "epoch": 1.3922611714637698, "grad_norm": 1.1076642274856567, "learning_rate": 9.872342105263159e-05, "loss": 0.3004, "step": 24863 }, { "epoch": 1.3923171687758988, "grad_norm": 1.3232903480529785, "learning_rate": 9.872315789473685e-05, "loss": 0.4578, "step": 24864 }, { "epoch": 1.3923731660880279, "grad_norm": 1.181853175163269, "learning_rate": 9.87228947368421e-05, "loss": 0.4971, "step": 24865 }, { "epoch": 1.3924291634001569, "grad_norm": 1.3774648904800415, "learning_rate": 9.872263157894738e-05, "loss": 0.4604, "step": 24866 }, { "epoch": 1.392485160712286, "grad_norm": 1.4425255060195923, "learning_rate": 9.872236842105264e-05, "loss": 0.4997, "step": 24867 }, { "epoch": 1.392541158024415, "grad_norm": 1.4591766595840454, "learning_rate": 9.87221052631579e-05, "loss": 0.5154, "step": 24868 }, { "epoch": 1.392597155336544, "grad_norm": 1.1732569932937622, "learning_rate": 9.872184210526316e-05, "loss": 0.3848, "step": 24869 }, { "epoch": 1.392653152648673, "grad_norm": 1.3114980459213257, "learning_rate": 9.872157894736842e-05, "loss": 0.4703, "step": 24870 }, { "epoch": 1.392709149960802, "grad_norm": 1.6508270502090454, "learning_rate": 9.872131578947369e-05, "loss": 0.4463, "step": 24871 }, { "epoch": 1.392765147272931, "grad_norm": 1.3888405561447144, "learning_rate": 9.872105263157895e-05, "loss": 0.6197, "step": 24872 }, { "epoch": 1.39282114458506, "grad_norm": 1.3268065452575684, "learning_rate": 9.872078947368421e-05, "loss": 0.5795, "step": 24873 }, { "epoch": 1.392877141897189, "grad_norm": 1.2224912643432617, "learning_rate": 9.872052631578947e-05, "loss": 0.3589, "step": 24874 }, { "epoch": 1.392933139209318, "grad_norm": 1.2793575525283813, "learning_rate": 9.872026315789474e-05, "loss": 0.401, "step": 24875 }, { "epoch": 1.392989136521447, "grad_norm": 1.3169201612472534, "learning_rate": 9.872e-05, "loss": 0.5786, "step": 24876 }, { "epoch": 1.393045133833576, "grad_norm": 1.3333497047424316, "learning_rate": 9.871973684210528e-05, "loss": 0.5736, "step": 24877 }, { "epoch": 1.393101131145705, "grad_norm": 1.2146955728530884, "learning_rate": 9.871947368421052e-05, "loss": 0.4339, "step": 24878 }, { "epoch": 1.3931571284578341, "grad_norm": 1.5016287565231323, "learning_rate": 9.87192105263158e-05, "loss": 0.4464, "step": 24879 }, { "epoch": 1.3932131257699631, "grad_norm": 1.2557700872421265, "learning_rate": 9.871894736842106e-05, "loss": 0.437, "step": 24880 }, { "epoch": 1.3932691230820922, "grad_norm": 1.499138355255127, "learning_rate": 9.871868421052633e-05, "loss": 0.585, "step": 24881 }, { "epoch": 1.3933251203942212, "grad_norm": 1.5727580785751343, "learning_rate": 9.871842105263159e-05, "loss": 0.4809, "step": 24882 }, { "epoch": 1.3933811177063502, "grad_norm": 2.0146894454956055, "learning_rate": 9.871815789473685e-05, "loss": 0.6236, "step": 24883 }, { "epoch": 1.3934371150184792, "grad_norm": 1.3595463037490845, "learning_rate": 9.871789473684211e-05, "loss": 0.498, "step": 24884 }, { "epoch": 1.3934931123306082, "grad_norm": 1.3299007415771484, "learning_rate": 9.871763157894738e-05, "loss": 0.3855, "step": 24885 }, { "epoch": 1.3935491096427373, "grad_norm": 1.214621663093567, "learning_rate": 9.871736842105264e-05, "loss": 0.4616, "step": 24886 }, { "epoch": 1.3936051069548663, "grad_norm": 1.3083232641220093, "learning_rate": 9.871710526315789e-05, "loss": 0.4835, "step": 24887 }, { "epoch": 1.3936611042669953, "grad_norm": 1.3820178508758545, "learning_rate": 9.871684210526316e-05, "loss": 0.4902, "step": 24888 }, { "epoch": 1.3937171015791243, "grad_norm": 1.3684327602386475, "learning_rate": 9.871657894736842e-05, "loss": 0.5788, "step": 24889 }, { "epoch": 1.3937730988912533, "grad_norm": 1.7087435722351074, "learning_rate": 9.87163157894737e-05, "loss": 0.6771, "step": 24890 }, { "epoch": 1.3938290962033824, "grad_norm": 1.7902201414108276, "learning_rate": 9.871605263157895e-05, "loss": 0.7825, "step": 24891 }, { "epoch": 1.3938850935155114, "grad_norm": 1.5060359239578247, "learning_rate": 9.871578947368421e-05, "loss": 0.4653, "step": 24892 }, { "epoch": 1.3939410908276404, "grad_norm": 1.374214768409729, "learning_rate": 9.871552631578947e-05, "loss": 0.4788, "step": 24893 }, { "epoch": 1.3939970881397694, "grad_norm": 1.6973198652267456, "learning_rate": 9.871526315789475e-05, "loss": 0.5671, "step": 24894 }, { "epoch": 1.3940530854518984, "grad_norm": 1.3596285581588745, "learning_rate": 9.8715e-05, "loss": 0.4583, "step": 24895 }, { "epoch": 1.3941090827640275, "grad_norm": 1.5764806270599365, "learning_rate": 9.871473684210527e-05, "loss": 0.4715, "step": 24896 }, { "epoch": 1.3941650800761565, "grad_norm": 2.0739612579345703, "learning_rate": 9.871447368421053e-05, "loss": 0.5395, "step": 24897 }, { "epoch": 1.3942210773882855, "grad_norm": 1.426306128501892, "learning_rate": 9.87142105263158e-05, "loss": 0.4057, "step": 24898 }, { "epoch": 1.3942770747004145, "grad_norm": 1.3731775283813477, "learning_rate": 9.871394736842106e-05, "loss": 0.4563, "step": 24899 }, { "epoch": 1.3943330720125435, "grad_norm": 1.3683106899261475, "learning_rate": 9.871368421052632e-05, "loss": 0.4421, "step": 24900 }, { "epoch": 1.3943890693246725, "grad_norm": 1.569212555885315, "learning_rate": 9.871342105263158e-05, "loss": 0.5724, "step": 24901 }, { "epoch": 1.3944450666368016, "grad_norm": 1.333946704864502, "learning_rate": 9.871315789473685e-05, "loss": 0.4599, "step": 24902 }, { "epoch": 1.3945010639489306, "grad_norm": 1.504699945449829, "learning_rate": 9.871289473684211e-05, "loss": 0.4934, "step": 24903 }, { "epoch": 1.3945570612610596, "grad_norm": 1.2900937795639038, "learning_rate": 9.871263157894737e-05, "loss": 0.4915, "step": 24904 }, { "epoch": 1.3946130585731886, "grad_norm": 1.4753167629241943, "learning_rate": 9.871236842105263e-05, "loss": 0.4803, "step": 24905 }, { "epoch": 1.3946690558853176, "grad_norm": 2.1940629482269287, "learning_rate": 9.871210526315789e-05, "loss": 0.6225, "step": 24906 }, { "epoch": 1.3947250531974464, "grad_norm": 1.283613920211792, "learning_rate": 9.871184210526316e-05, "loss": 0.4688, "step": 24907 }, { "epoch": 1.3947810505095755, "grad_norm": 1.2940969467163086, "learning_rate": 9.871157894736842e-05, "loss": 0.5717, "step": 24908 }, { "epoch": 1.3948370478217045, "grad_norm": 1.5835634469985962, "learning_rate": 9.87113157894737e-05, "loss": 0.4983, "step": 24909 }, { "epoch": 1.3948930451338335, "grad_norm": 1.1694972515106201, "learning_rate": 9.871105263157894e-05, "loss": 0.3836, "step": 24910 }, { "epoch": 1.3949490424459625, "grad_norm": 1.377196192741394, "learning_rate": 9.871078947368422e-05, "loss": 0.4218, "step": 24911 }, { "epoch": 1.3950050397580915, "grad_norm": 1.129834771156311, "learning_rate": 9.871052631578948e-05, "loss": 0.4985, "step": 24912 }, { "epoch": 1.3950610370702206, "grad_norm": 1.351381540298462, "learning_rate": 9.871026315789475e-05, "loss": 0.5257, "step": 24913 }, { "epoch": 1.3951170343823496, "grad_norm": 1.200209379196167, "learning_rate": 9.871000000000001e-05, "loss": 0.4166, "step": 24914 }, { "epoch": 1.3951730316944786, "grad_norm": 1.3657712936401367, "learning_rate": 9.870973684210527e-05, "loss": 0.5301, "step": 24915 }, { "epoch": 1.3952290290066076, "grad_norm": 1.5016694068908691, "learning_rate": 9.870947368421053e-05, "loss": 0.4532, "step": 24916 }, { "epoch": 1.3952850263187366, "grad_norm": 1.2941175699234009, "learning_rate": 9.87092105263158e-05, "loss": 0.4956, "step": 24917 }, { "epoch": 1.3953410236308657, "grad_norm": 1.1918156147003174, "learning_rate": 9.870894736842106e-05, "loss": 0.427, "step": 24918 }, { "epoch": 1.3953970209429947, "grad_norm": 1.270900011062622, "learning_rate": 9.870868421052632e-05, "loss": 0.4563, "step": 24919 }, { "epoch": 1.3954530182551237, "grad_norm": 1.9775011539459229, "learning_rate": 9.870842105263158e-05, "loss": 0.5685, "step": 24920 }, { "epoch": 1.3955090155672527, "grad_norm": 1.4194166660308838, "learning_rate": 9.870815789473685e-05, "loss": 0.5493, "step": 24921 }, { "epoch": 1.3955650128793817, "grad_norm": 1.7846382856369019, "learning_rate": 9.870789473684211e-05, "loss": 0.4364, "step": 24922 }, { "epoch": 1.3956210101915107, "grad_norm": 1.2885242700576782, "learning_rate": 9.870763157894737e-05, "loss": 0.4624, "step": 24923 }, { "epoch": 1.3956770075036398, "grad_norm": 1.304019570350647, "learning_rate": 9.870736842105263e-05, "loss": 0.3475, "step": 24924 }, { "epoch": 1.3957330048157688, "grad_norm": 1.1312437057495117, "learning_rate": 9.870710526315789e-05, "loss": 0.4062, "step": 24925 }, { "epoch": 1.3957890021278978, "grad_norm": 1.23923921585083, "learning_rate": 9.870684210526317e-05, "loss": 0.3765, "step": 24926 }, { "epoch": 1.3958449994400268, "grad_norm": 1.3791269063949585, "learning_rate": 9.870657894736843e-05, "loss": 0.4871, "step": 24927 }, { "epoch": 1.3959009967521558, "grad_norm": 1.3312296867370605, "learning_rate": 9.870631578947369e-05, "loss": 0.3912, "step": 24928 }, { "epoch": 1.3959569940642849, "grad_norm": 1.7769653797149658, "learning_rate": 9.870605263157895e-05, "loss": 0.475, "step": 24929 }, { "epoch": 1.3960129913764139, "grad_norm": 1.4600893259048462, "learning_rate": 9.870578947368422e-05, "loss": 0.5491, "step": 24930 }, { "epoch": 1.396068988688543, "grad_norm": 1.404732346534729, "learning_rate": 9.870552631578948e-05, "loss": 0.4643, "step": 24931 }, { "epoch": 1.396124986000672, "grad_norm": 1.6797943115234375, "learning_rate": 9.870526315789475e-05, "loss": 0.7382, "step": 24932 }, { "epoch": 1.396180983312801, "grad_norm": 1.4181331396102905, "learning_rate": 9.8705e-05, "loss": 0.5598, "step": 24933 }, { "epoch": 1.39623698062493, "grad_norm": 1.5253980159759521, "learning_rate": 9.870473684210527e-05, "loss": 0.4718, "step": 24934 }, { "epoch": 1.396292977937059, "grad_norm": 1.158437728881836, "learning_rate": 9.870447368421053e-05, "loss": 0.3587, "step": 24935 }, { "epoch": 1.396348975249188, "grad_norm": 1.0369495153427124, "learning_rate": 9.87042105263158e-05, "loss": 0.2864, "step": 24936 }, { "epoch": 1.396404972561317, "grad_norm": 1.5150394439697266, "learning_rate": 9.870394736842105e-05, "loss": 0.4394, "step": 24937 }, { "epoch": 1.396460969873446, "grad_norm": 1.1184009313583374, "learning_rate": 9.870368421052632e-05, "loss": 0.3565, "step": 24938 }, { "epoch": 1.396516967185575, "grad_norm": 1.5306912660598755, "learning_rate": 9.870342105263158e-05, "loss": 0.5793, "step": 24939 }, { "epoch": 1.396572964497704, "grad_norm": 1.180704951286316, "learning_rate": 9.870315789473684e-05, "loss": 0.4901, "step": 24940 }, { "epoch": 1.396628961809833, "grad_norm": 1.214342474937439, "learning_rate": 9.870289473684212e-05, "loss": 0.527, "step": 24941 }, { "epoch": 1.396684959121962, "grad_norm": 1.1204359531402588, "learning_rate": 9.870263157894736e-05, "loss": 0.6086, "step": 24942 }, { "epoch": 1.3967409564340911, "grad_norm": 1.6315031051635742, "learning_rate": 9.870236842105264e-05, "loss": 0.6003, "step": 24943 }, { "epoch": 1.3967969537462201, "grad_norm": 1.6887123584747314, "learning_rate": 9.87021052631579e-05, "loss": 0.6169, "step": 24944 }, { "epoch": 1.3968529510583492, "grad_norm": 1.3773730993270874, "learning_rate": 9.870184210526317e-05, "loss": 0.4426, "step": 24945 }, { "epoch": 1.3969089483704782, "grad_norm": 1.2887552976608276, "learning_rate": 9.870157894736843e-05, "loss": 0.5258, "step": 24946 }, { "epoch": 1.3969649456826072, "grad_norm": 1.3308465480804443, "learning_rate": 9.870131578947369e-05, "loss": 0.5726, "step": 24947 }, { "epoch": 1.3970209429947362, "grad_norm": 1.4514648914337158, "learning_rate": 9.870105263157895e-05, "loss": 0.4428, "step": 24948 }, { "epoch": 1.3970769403068652, "grad_norm": 1.5108258724212646, "learning_rate": 9.870078947368422e-05, "loss": 0.4245, "step": 24949 }, { "epoch": 1.3971329376189943, "grad_norm": 1.440600872039795, "learning_rate": 9.870052631578948e-05, "loss": 0.4508, "step": 24950 }, { "epoch": 1.3971889349311233, "grad_norm": 1.2561041116714478, "learning_rate": 9.870026315789474e-05, "loss": 0.4105, "step": 24951 }, { "epoch": 1.3972449322432523, "grad_norm": 1.3195409774780273, "learning_rate": 9.87e-05, "loss": 0.5741, "step": 24952 }, { "epoch": 1.3973009295553813, "grad_norm": 1.2621251344680786, "learning_rate": 9.869973684210527e-05, "loss": 0.42, "step": 24953 }, { "epoch": 1.3973569268675103, "grad_norm": 1.4393939971923828, "learning_rate": 9.869947368421053e-05, "loss": 0.4588, "step": 24954 }, { "epoch": 1.3974129241796394, "grad_norm": 1.3184726238250732, "learning_rate": 9.869921052631579e-05, "loss": 0.3996, "step": 24955 }, { "epoch": 1.3974689214917684, "grad_norm": 1.3019930124282837, "learning_rate": 9.869894736842105e-05, "loss": 0.35, "step": 24956 }, { "epoch": 1.3975249188038974, "grad_norm": 1.455165982246399, "learning_rate": 9.869868421052631e-05, "loss": 0.471, "step": 24957 }, { "epoch": 1.3975809161160264, "grad_norm": 1.0577287673950195, "learning_rate": 9.869842105263159e-05, "loss": 0.3745, "step": 24958 }, { "epoch": 1.3976369134281554, "grad_norm": 1.6369513273239136, "learning_rate": 9.869815789473685e-05, "loss": 0.5191, "step": 24959 }, { "epoch": 1.3976929107402845, "grad_norm": 1.1356538534164429, "learning_rate": 9.86978947368421e-05, "loss": 0.4098, "step": 24960 }, { "epoch": 1.3977489080524135, "grad_norm": 1.5345463752746582, "learning_rate": 9.869763157894736e-05, "loss": 0.5791, "step": 24961 }, { "epoch": 1.3978049053645425, "grad_norm": 1.3994766473770142, "learning_rate": 9.869736842105264e-05, "loss": 0.4134, "step": 24962 }, { "epoch": 1.3978609026766715, "grad_norm": 1.4844574928283691, "learning_rate": 9.86971052631579e-05, "loss": 0.7353, "step": 24963 }, { "epoch": 1.3979168999888005, "grad_norm": 1.3428765535354614, "learning_rate": 9.869684210526317e-05, "loss": 0.4492, "step": 24964 }, { "epoch": 1.3979728973009296, "grad_norm": 1.1608526706695557, "learning_rate": 9.869657894736842e-05, "loss": 0.3797, "step": 24965 }, { "epoch": 1.3980288946130586, "grad_norm": 1.1632158756256104, "learning_rate": 9.869631578947369e-05, "loss": 0.3693, "step": 24966 }, { "epoch": 1.3980848919251876, "grad_norm": 1.0819597244262695, "learning_rate": 9.869605263157895e-05, "loss": 0.3702, "step": 24967 }, { "epoch": 1.3981408892373166, "grad_norm": 1.1602689027786255, "learning_rate": 9.869578947368422e-05, "loss": 0.4737, "step": 24968 }, { "epoch": 1.3981968865494456, "grad_norm": 1.334148645401001, "learning_rate": 9.869552631578948e-05, "loss": 0.4082, "step": 24969 }, { "epoch": 1.3982528838615746, "grad_norm": 1.1840577125549316, "learning_rate": 9.869526315789474e-05, "loss": 0.4302, "step": 24970 }, { "epoch": 1.3983088811737037, "grad_norm": 1.401780366897583, "learning_rate": 9.8695e-05, "loss": 0.4981, "step": 24971 }, { "epoch": 1.3983648784858327, "grad_norm": 1.2232701778411865, "learning_rate": 9.869473684210528e-05, "loss": 0.3785, "step": 24972 }, { "epoch": 1.3984208757979617, "grad_norm": 1.3001359701156616, "learning_rate": 9.869447368421054e-05, "loss": 0.6799, "step": 24973 }, { "epoch": 1.3984768731100907, "grad_norm": 1.3686200380325317, "learning_rate": 9.86942105263158e-05, "loss": 0.4898, "step": 24974 }, { "epoch": 1.3985328704222197, "grad_norm": 1.2565200328826904, "learning_rate": 9.869394736842106e-05, "loss": 0.5439, "step": 24975 }, { "epoch": 1.3985888677343488, "grad_norm": 1.2554477453231812, "learning_rate": 9.869368421052632e-05, "loss": 0.3778, "step": 24976 }, { "epoch": 1.3986448650464778, "grad_norm": 1.2617875337600708, "learning_rate": 9.869342105263159e-05, "loss": 0.5034, "step": 24977 }, { "epoch": 1.3987008623586068, "grad_norm": 1.0165064334869385, "learning_rate": 9.869315789473685e-05, "loss": 0.3976, "step": 24978 }, { "epoch": 1.3987568596707358, "grad_norm": 1.1564257144927979, "learning_rate": 9.869289473684211e-05, "loss": 0.4893, "step": 24979 }, { "epoch": 1.3988128569828648, "grad_norm": 1.5127946138381958, "learning_rate": 9.869263157894737e-05, "loss": 0.6844, "step": 24980 }, { "epoch": 1.3988688542949939, "grad_norm": 1.4281575679779053, "learning_rate": 9.869236842105264e-05, "loss": 0.481, "step": 24981 }, { "epoch": 1.3989248516071229, "grad_norm": 1.6895654201507568, "learning_rate": 9.86921052631579e-05, "loss": 0.5923, "step": 24982 }, { "epoch": 1.398980848919252, "grad_norm": 1.4220174551010132, "learning_rate": 9.869184210526316e-05, "loss": 0.4944, "step": 24983 }, { "epoch": 1.399036846231381, "grad_norm": 1.6604970693588257, "learning_rate": 9.869157894736842e-05, "loss": 0.5302, "step": 24984 }, { "epoch": 1.39909284354351, "grad_norm": 1.1861413717269897, "learning_rate": 9.86913157894737e-05, "loss": 0.4556, "step": 24985 }, { "epoch": 1.399148840855639, "grad_norm": 1.10104238986969, "learning_rate": 9.869105263157895e-05, "loss": 0.3691, "step": 24986 }, { "epoch": 1.399204838167768, "grad_norm": 1.4999996423721313, "learning_rate": 9.869078947368423e-05, "loss": 0.488, "step": 24987 }, { "epoch": 1.399260835479897, "grad_norm": 1.4314191341400146, "learning_rate": 9.869052631578947e-05, "loss": 0.5801, "step": 24988 }, { "epoch": 1.399316832792026, "grad_norm": 1.188765287399292, "learning_rate": 9.869026315789475e-05, "loss": 0.4722, "step": 24989 }, { "epoch": 1.399372830104155, "grad_norm": 1.447745442390442, "learning_rate": 9.869e-05, "loss": 0.4674, "step": 24990 }, { "epoch": 1.399428827416284, "grad_norm": 1.2270760536193848, "learning_rate": 9.868973684210527e-05, "loss": 0.4121, "step": 24991 }, { "epoch": 1.399484824728413, "grad_norm": 1.1734915971755981, "learning_rate": 9.868947368421052e-05, "loss": 0.385, "step": 24992 }, { "epoch": 1.399540822040542, "grad_norm": 1.1888699531555176, "learning_rate": 9.868921052631578e-05, "loss": 0.3492, "step": 24993 }, { "epoch": 1.399596819352671, "grad_norm": 1.3761533498764038, "learning_rate": 9.868894736842106e-05, "loss": 0.5162, "step": 24994 }, { "epoch": 1.3996528166648001, "grad_norm": 1.2712526321411133, "learning_rate": 9.868868421052632e-05, "loss": 0.5014, "step": 24995 }, { "epoch": 1.3997088139769291, "grad_norm": 1.1590772867202759, "learning_rate": 9.868842105263159e-05, "loss": 0.3405, "step": 24996 }, { "epoch": 1.3997648112890582, "grad_norm": 1.3925637006759644, "learning_rate": 9.868815789473684e-05, "loss": 0.4855, "step": 24997 }, { "epoch": 1.3998208086011872, "grad_norm": 1.4906693696975708, "learning_rate": 9.868789473684211e-05, "loss": 0.4965, "step": 24998 }, { "epoch": 1.3998768059133162, "grad_norm": 1.0911304950714111, "learning_rate": 9.868763157894737e-05, "loss": 0.5819, "step": 24999 }, { "epoch": 1.3999328032254452, "grad_norm": 1.2345770597457886, "learning_rate": 9.868736842105264e-05, "loss": 0.3439, "step": 25000 }, { "epoch": 1.3999888005375742, "grad_norm": 1.4910013675689697, "learning_rate": 9.86871052631579e-05, "loss": 0.4444, "step": 25001 }, { "epoch": 1.4000447978497033, "grad_norm": 1.181877613067627, "learning_rate": 9.868684210526316e-05, "loss": 0.4475, "step": 25002 }, { "epoch": 1.4001007951618323, "grad_norm": 1.181572675704956, "learning_rate": 9.868657894736842e-05, "loss": 0.4454, "step": 25003 }, { "epoch": 1.4001567924739613, "grad_norm": 1.3123735189437866, "learning_rate": 9.86863157894737e-05, "loss": 0.4238, "step": 25004 }, { "epoch": 1.4002127897860903, "grad_norm": 1.2522852420806885, "learning_rate": 9.868605263157896e-05, "loss": 0.3972, "step": 25005 }, { "epoch": 1.4002687870982193, "grad_norm": 1.5129053592681885, "learning_rate": 9.868578947368422e-05, "loss": 0.4779, "step": 25006 }, { "epoch": 1.4003247844103484, "grad_norm": 1.5927486419677734, "learning_rate": 9.868552631578948e-05, "loss": 0.3409, "step": 25007 }, { "epoch": 1.4003807817224774, "grad_norm": 1.570186734199524, "learning_rate": 9.868526315789473e-05, "loss": 0.4116, "step": 25008 }, { "epoch": 1.4004367790346064, "grad_norm": 1.2253022193908691, "learning_rate": 9.868500000000001e-05, "loss": 0.4927, "step": 25009 }, { "epoch": 1.4004927763467354, "grad_norm": 1.3859150409698486, "learning_rate": 9.868473684210527e-05, "loss": 0.4001, "step": 25010 }, { "epoch": 1.4005487736588644, "grad_norm": 1.146719217300415, "learning_rate": 9.868447368421053e-05, "loss": 0.4135, "step": 25011 }, { "epoch": 1.4006047709709935, "grad_norm": 1.5594269037246704, "learning_rate": 9.868421052631579e-05, "loss": 0.4072, "step": 25012 }, { "epoch": 1.4006607682831225, "grad_norm": 1.3745996952056885, "learning_rate": 9.868394736842106e-05, "loss": 0.4967, "step": 25013 }, { "epoch": 1.4007167655952515, "grad_norm": 1.658186912536621, "learning_rate": 9.868368421052632e-05, "loss": 0.5801, "step": 25014 }, { "epoch": 1.4007727629073805, "grad_norm": 1.9151132106781006, "learning_rate": 9.868342105263158e-05, "loss": 0.5393, "step": 25015 }, { "epoch": 1.4008287602195095, "grad_norm": 1.4492130279541016, "learning_rate": 9.868315789473684e-05, "loss": 0.4651, "step": 25016 }, { "epoch": 1.4008847575316385, "grad_norm": 1.218334436416626, "learning_rate": 9.868289473684211e-05, "loss": 0.4098, "step": 25017 }, { "epoch": 1.4009407548437676, "grad_norm": 1.4384442567825317, "learning_rate": 9.868263157894737e-05, "loss": 0.558, "step": 25018 }, { "epoch": 1.4009967521558966, "grad_norm": 1.6621073484420776, "learning_rate": 9.868236842105265e-05, "loss": 0.438, "step": 25019 }, { "epoch": 1.4010527494680256, "grad_norm": 1.2724928855895996, "learning_rate": 9.868210526315789e-05, "loss": 0.416, "step": 25020 }, { "epoch": 1.4011087467801546, "grad_norm": 1.4699560403823853, "learning_rate": 9.868184210526317e-05, "loss": 0.4839, "step": 25021 }, { "epoch": 1.4011647440922836, "grad_norm": 1.3664757013320923, "learning_rate": 9.868157894736843e-05, "loss": 0.5102, "step": 25022 }, { "epoch": 1.4012207414044127, "grad_norm": 1.3542282581329346, "learning_rate": 9.86813157894737e-05, "loss": 0.5138, "step": 25023 }, { "epoch": 1.4012767387165417, "grad_norm": 1.2294542789459229, "learning_rate": 9.868105263157896e-05, "loss": 0.4108, "step": 25024 }, { "epoch": 1.4013327360286707, "grad_norm": 1.1522819995880127, "learning_rate": 9.86807894736842e-05, "loss": 0.4243, "step": 25025 }, { "epoch": 1.4013887333407997, "grad_norm": 1.4976519346237183, "learning_rate": 9.868052631578948e-05, "loss": 0.6372, "step": 25026 }, { "epoch": 1.4014447306529287, "grad_norm": 1.2452642917633057, "learning_rate": 9.868026315789474e-05, "loss": 0.5069, "step": 25027 }, { "epoch": 1.4015007279650578, "grad_norm": 1.0968621969223022, "learning_rate": 9.868000000000001e-05, "loss": 0.4141, "step": 25028 }, { "epoch": 1.4015567252771868, "grad_norm": 1.0446137189865112, "learning_rate": 9.867973684210527e-05, "loss": 0.3676, "step": 25029 }, { "epoch": 1.4016127225893158, "grad_norm": 1.1803224086761475, "learning_rate": 9.867947368421053e-05, "loss": 0.5365, "step": 25030 }, { "epoch": 1.4016687199014446, "grad_norm": 1.3734524250030518, "learning_rate": 9.867921052631579e-05, "loss": 0.542, "step": 25031 }, { "epoch": 1.4017247172135736, "grad_norm": 1.3984856605529785, "learning_rate": 9.867894736842106e-05, "loss": 0.4396, "step": 25032 }, { "epoch": 1.4017807145257026, "grad_norm": 1.6063979864120483, "learning_rate": 9.867868421052632e-05, "loss": 0.5612, "step": 25033 }, { "epoch": 1.4018367118378317, "grad_norm": 1.452295184135437, "learning_rate": 9.867842105263158e-05, "loss": 0.5969, "step": 25034 }, { "epoch": 1.4018927091499607, "grad_norm": 1.2554432153701782, "learning_rate": 9.867815789473684e-05, "loss": 0.4216, "step": 25035 }, { "epoch": 1.4019487064620897, "grad_norm": 1.2694385051727295, "learning_rate": 9.867789473684212e-05, "loss": 0.4662, "step": 25036 }, { "epoch": 1.4020047037742187, "grad_norm": 1.1114654541015625, "learning_rate": 9.867763157894738e-05, "loss": 0.3756, "step": 25037 }, { "epoch": 1.4020607010863477, "grad_norm": 3.559643268585205, "learning_rate": 9.867736842105264e-05, "loss": 0.4125, "step": 25038 }, { "epoch": 1.4021166983984767, "grad_norm": 1.172831416130066, "learning_rate": 9.86771052631579e-05, "loss": 0.357, "step": 25039 }, { "epoch": 1.4021726957106058, "grad_norm": 1.3581202030181885, "learning_rate": 9.867684210526317e-05, "loss": 0.5453, "step": 25040 }, { "epoch": 1.4022286930227348, "grad_norm": 1.535194993019104, "learning_rate": 9.867657894736843e-05, "loss": 0.4958, "step": 25041 }, { "epoch": 1.4022846903348638, "grad_norm": 1.432216763496399, "learning_rate": 9.86763157894737e-05, "loss": 0.4126, "step": 25042 }, { "epoch": 1.4023406876469928, "grad_norm": 1.2078896760940552, "learning_rate": 9.867605263157895e-05, "loss": 0.4448, "step": 25043 }, { "epoch": 1.4023966849591218, "grad_norm": 1.6733994483947754, "learning_rate": 9.867578947368421e-05, "loss": 0.506, "step": 25044 }, { "epoch": 1.4024526822712509, "grad_norm": 1.4144052267074585, "learning_rate": 9.867552631578948e-05, "loss": 0.4617, "step": 25045 }, { "epoch": 1.4025086795833799, "grad_norm": 1.3199208974838257, "learning_rate": 9.867526315789474e-05, "loss": 0.3546, "step": 25046 }, { "epoch": 1.402564676895509, "grad_norm": 1.1983124017715454, "learning_rate": 9.8675e-05, "loss": 0.4099, "step": 25047 }, { "epoch": 1.402620674207638, "grad_norm": 1.9280728101730347, "learning_rate": 9.867473684210526e-05, "loss": 0.4807, "step": 25048 }, { "epoch": 1.402676671519767, "grad_norm": 1.4171066284179688, "learning_rate": 9.867447368421053e-05, "loss": 0.4783, "step": 25049 }, { "epoch": 1.402732668831896, "grad_norm": 1.6370997428894043, "learning_rate": 9.867421052631579e-05, "loss": 0.4487, "step": 25050 }, { "epoch": 1.402788666144025, "grad_norm": 1.3617645502090454, "learning_rate": 9.867394736842107e-05, "loss": 0.3976, "step": 25051 }, { "epoch": 1.402844663456154, "grad_norm": 1.1175732612609863, "learning_rate": 9.867368421052631e-05, "loss": 0.3508, "step": 25052 }, { "epoch": 1.402900660768283, "grad_norm": 5.507682800292969, "learning_rate": 9.867342105263159e-05, "loss": 0.3978, "step": 25053 }, { "epoch": 1.402956658080412, "grad_norm": 1.6868797540664673, "learning_rate": 9.867315789473684e-05, "loss": 0.5949, "step": 25054 }, { "epoch": 1.403012655392541, "grad_norm": 1.537165641784668, "learning_rate": 9.867289473684212e-05, "loss": 0.4219, "step": 25055 }, { "epoch": 1.40306865270467, "grad_norm": 1.173524022102356, "learning_rate": 9.867263157894738e-05, "loss": 0.4213, "step": 25056 }, { "epoch": 1.403124650016799, "grad_norm": 1.24052095413208, "learning_rate": 9.867236842105264e-05, "loss": 0.4199, "step": 25057 }, { "epoch": 1.403180647328928, "grad_norm": 1.5992597341537476, "learning_rate": 9.86721052631579e-05, "loss": 0.5079, "step": 25058 }, { "epoch": 1.4032366446410571, "grad_norm": 1.393551230430603, "learning_rate": 9.867184210526317e-05, "loss": 0.4988, "step": 25059 }, { "epoch": 1.4032926419531861, "grad_norm": 1.587053894996643, "learning_rate": 9.867157894736843e-05, "loss": 0.5091, "step": 25060 }, { "epoch": 1.4033486392653152, "grad_norm": 1.4986960887908936, "learning_rate": 9.867131578947369e-05, "loss": 0.4199, "step": 25061 }, { "epoch": 1.4034046365774442, "grad_norm": 1.3810064792633057, "learning_rate": 9.867105263157895e-05, "loss": 0.3867, "step": 25062 }, { "epoch": 1.4034606338895732, "grad_norm": 1.9216339588165283, "learning_rate": 9.867078947368421e-05, "loss": 0.4258, "step": 25063 }, { "epoch": 1.4035166312017022, "grad_norm": 1.329543113708496, "learning_rate": 9.867052631578948e-05, "loss": 0.437, "step": 25064 }, { "epoch": 1.4035726285138312, "grad_norm": 1.2471803426742554, "learning_rate": 9.867026315789474e-05, "loss": 0.4124, "step": 25065 }, { "epoch": 1.4036286258259603, "grad_norm": 1.6240177154541016, "learning_rate": 9.867e-05, "loss": 0.5176, "step": 25066 }, { "epoch": 1.4036846231380893, "grad_norm": 1.3207478523254395, "learning_rate": 9.866973684210526e-05, "loss": 0.4693, "step": 25067 }, { "epoch": 1.4037406204502183, "grad_norm": 1.6427310705184937, "learning_rate": 9.866947368421054e-05, "loss": 0.6914, "step": 25068 }, { "epoch": 1.4037966177623473, "grad_norm": 1.5911667346954346, "learning_rate": 9.86692105263158e-05, "loss": 0.5381, "step": 25069 }, { "epoch": 1.4038526150744763, "grad_norm": 8.670450210571289, "learning_rate": 9.866894736842105e-05, "loss": 0.5282, "step": 25070 }, { "epoch": 1.4039086123866054, "grad_norm": 1.9461487531661987, "learning_rate": 9.866868421052631e-05, "loss": 0.4911, "step": 25071 }, { "epoch": 1.4039646096987344, "grad_norm": 1.3057644367218018, "learning_rate": 9.866842105263159e-05, "loss": 0.5616, "step": 25072 }, { "epoch": 1.4040206070108634, "grad_norm": 1.534982442855835, "learning_rate": 9.866815789473685e-05, "loss": 0.4504, "step": 25073 }, { "epoch": 1.4040766043229924, "grad_norm": 1.2518433332443237, "learning_rate": 9.866789473684212e-05, "loss": 0.5359, "step": 25074 }, { "epoch": 1.4041326016351214, "grad_norm": 6.048405170440674, "learning_rate": 9.866763157894737e-05, "loss": 0.4873, "step": 25075 }, { "epoch": 1.4041885989472505, "grad_norm": 1.257227897644043, "learning_rate": 9.866736842105264e-05, "loss": 0.4599, "step": 25076 }, { "epoch": 1.4042445962593795, "grad_norm": 1.189857006072998, "learning_rate": 9.86671052631579e-05, "loss": 0.4897, "step": 25077 }, { "epoch": 1.4043005935715085, "grad_norm": 1.6713621616363525, "learning_rate": 9.866684210526316e-05, "loss": 0.5393, "step": 25078 }, { "epoch": 1.4043565908836375, "grad_norm": 1.223427176475525, "learning_rate": 9.866657894736843e-05, "loss": 0.4036, "step": 25079 }, { "epoch": 1.4044125881957665, "grad_norm": 1.4842644929885864, "learning_rate": 9.866631578947368e-05, "loss": 0.4918, "step": 25080 }, { "epoch": 1.4044685855078956, "grad_norm": 1.2758557796478271, "learning_rate": 9.866605263157895e-05, "loss": 0.417, "step": 25081 }, { "epoch": 1.4045245828200246, "grad_norm": 1.3707480430603027, "learning_rate": 9.866578947368421e-05, "loss": 0.4824, "step": 25082 }, { "epoch": 1.4045805801321536, "grad_norm": 1.6210976839065552, "learning_rate": 9.866552631578949e-05, "loss": 0.4343, "step": 25083 }, { "epoch": 1.4046365774442826, "grad_norm": 1.2979321479797363, "learning_rate": 9.866526315789475e-05, "loss": 0.3599, "step": 25084 }, { "epoch": 1.4046925747564116, "grad_norm": 1.4203746318817139, "learning_rate": 9.8665e-05, "loss": 0.4211, "step": 25085 }, { "epoch": 1.4047485720685406, "grad_norm": 1.2466440200805664, "learning_rate": 9.866473684210526e-05, "loss": 0.3942, "step": 25086 }, { "epoch": 1.4048045693806697, "grad_norm": 1.479690432548523, "learning_rate": 9.866447368421054e-05, "loss": 0.4823, "step": 25087 }, { "epoch": 1.4048605666927987, "grad_norm": 1.480630874633789, "learning_rate": 9.86642105263158e-05, "loss": 0.4906, "step": 25088 }, { "epoch": 1.4049165640049277, "grad_norm": 1.1486139297485352, "learning_rate": 9.866394736842106e-05, "loss": 0.4376, "step": 25089 }, { "epoch": 1.4049725613170567, "grad_norm": 1.2108925580978394, "learning_rate": 9.866368421052632e-05, "loss": 0.4693, "step": 25090 }, { "epoch": 1.4050285586291857, "grad_norm": 1.401533603668213, "learning_rate": 9.866342105263159e-05, "loss": 0.4328, "step": 25091 }, { "epoch": 1.4050845559413148, "grad_norm": 1.4225749969482422, "learning_rate": 9.866315789473685e-05, "loss": 0.5919, "step": 25092 }, { "epoch": 1.4051405532534438, "grad_norm": 1.5397193431854248, "learning_rate": 9.866289473684211e-05, "loss": 0.5583, "step": 25093 }, { "epoch": 1.4051965505655728, "grad_norm": 1.6258461475372314, "learning_rate": 9.866263157894737e-05, "loss": 0.5521, "step": 25094 }, { "epoch": 1.4052525478777018, "grad_norm": 1.1896156072616577, "learning_rate": 9.866236842105263e-05, "loss": 0.3568, "step": 25095 }, { "epoch": 1.4053085451898308, "grad_norm": 1.330722689628601, "learning_rate": 9.86621052631579e-05, "loss": 0.418, "step": 25096 }, { "epoch": 1.4053645425019599, "grad_norm": 1.491829514503479, "learning_rate": 9.866184210526316e-05, "loss": 0.4913, "step": 25097 }, { "epoch": 1.4054205398140889, "grad_norm": 1.9061870574951172, "learning_rate": 9.866157894736842e-05, "loss": 0.6653, "step": 25098 }, { "epoch": 1.405476537126218, "grad_norm": 1.0821285247802734, "learning_rate": 9.866131578947368e-05, "loss": 0.3845, "step": 25099 }, { "epoch": 1.405532534438347, "grad_norm": 1.4515200853347778, "learning_rate": 9.866105263157895e-05, "loss": 0.4841, "step": 25100 }, { "epoch": 1.405588531750476, "grad_norm": 1.3448429107666016, "learning_rate": 9.866078947368421e-05, "loss": 0.3733, "step": 25101 }, { "epoch": 1.405644529062605, "grad_norm": 1.5497682094573975, "learning_rate": 9.866052631578947e-05, "loss": 0.7214, "step": 25102 }, { "epoch": 1.405700526374734, "grad_norm": 1.6154425144195557, "learning_rate": 9.866026315789473e-05, "loss": 0.4119, "step": 25103 }, { "epoch": 1.405756523686863, "grad_norm": 1.3904379606246948, "learning_rate": 9.866000000000001e-05, "loss": 0.3991, "step": 25104 }, { "epoch": 1.405812520998992, "grad_norm": 1.2952815294265747, "learning_rate": 9.865973684210527e-05, "loss": 0.4578, "step": 25105 }, { "epoch": 1.405868518311121, "grad_norm": 1.5114688873291016, "learning_rate": 9.865947368421054e-05, "loss": 0.4603, "step": 25106 }, { "epoch": 1.40592451562325, "grad_norm": 1.2152762413024902, "learning_rate": 9.865921052631579e-05, "loss": 0.505, "step": 25107 }, { "epoch": 1.405980512935379, "grad_norm": 1.5251792669296265, "learning_rate": 9.865894736842106e-05, "loss": 0.4445, "step": 25108 }, { "epoch": 1.406036510247508, "grad_norm": 1.2245151996612549, "learning_rate": 9.865868421052632e-05, "loss": 0.4804, "step": 25109 }, { "epoch": 1.406092507559637, "grad_norm": 1.192751169204712, "learning_rate": 9.865842105263159e-05, "loss": 0.4295, "step": 25110 }, { "epoch": 1.4061485048717661, "grad_norm": 1.083000659942627, "learning_rate": 9.865815789473685e-05, "loss": 0.4174, "step": 25111 }, { "epoch": 1.4062045021838951, "grad_norm": 1.5058971643447876, "learning_rate": 9.86578947368421e-05, "loss": 0.4594, "step": 25112 }, { "epoch": 1.4062604994960242, "grad_norm": 1.3602256774902344, "learning_rate": 9.865763157894737e-05, "loss": 0.5681, "step": 25113 }, { "epoch": 1.4063164968081532, "grad_norm": 1.3668222427368164, "learning_rate": 9.865736842105263e-05, "loss": 0.4684, "step": 25114 }, { "epoch": 1.4063724941202822, "grad_norm": 1.3513565063476562, "learning_rate": 9.86571052631579e-05, "loss": 0.4067, "step": 25115 }, { "epoch": 1.4064284914324112, "grad_norm": 1.42878258228302, "learning_rate": 9.865684210526316e-05, "loss": 0.5119, "step": 25116 }, { "epoch": 1.4064844887445402, "grad_norm": 1.2328746318817139, "learning_rate": 9.865657894736842e-05, "loss": 0.49, "step": 25117 }, { "epoch": 1.4065404860566693, "grad_norm": 1.290832281112671, "learning_rate": 9.865631578947368e-05, "loss": 0.4261, "step": 25118 }, { "epoch": 1.4065964833687983, "grad_norm": 1.6286808252334595, "learning_rate": 9.865605263157896e-05, "loss": 0.548, "step": 25119 }, { "epoch": 1.4066524806809273, "grad_norm": 1.1603643894195557, "learning_rate": 9.865578947368422e-05, "loss": 0.4343, "step": 25120 }, { "epoch": 1.4067084779930563, "grad_norm": 1.461963415145874, "learning_rate": 9.865552631578948e-05, "loss": 0.5455, "step": 25121 }, { "epoch": 1.4067644753051853, "grad_norm": 1.9852609634399414, "learning_rate": 9.865526315789474e-05, "loss": 0.4646, "step": 25122 }, { "epoch": 1.4068204726173144, "grad_norm": 1.3366806507110596, "learning_rate": 9.865500000000001e-05, "loss": 0.4645, "step": 25123 }, { "epoch": 1.4068764699294434, "grad_norm": 1.3001078367233276, "learning_rate": 9.865473684210527e-05, "loss": 0.466, "step": 25124 }, { "epoch": 1.4069324672415724, "grad_norm": 1.2729926109313965, "learning_rate": 9.865447368421053e-05, "loss": 0.4699, "step": 25125 }, { "epoch": 1.4069884645537014, "grad_norm": 1.61940336227417, "learning_rate": 9.865421052631579e-05, "loss": 0.6571, "step": 25126 }, { "epoch": 1.4070444618658304, "grad_norm": 1.4772520065307617, "learning_rate": 9.865394736842106e-05, "loss": 0.5843, "step": 25127 }, { "epoch": 1.4071004591779595, "grad_norm": 1.5451966524124146, "learning_rate": 9.865368421052632e-05, "loss": 0.5362, "step": 25128 }, { "epoch": 1.4071564564900885, "grad_norm": 1.266040325164795, "learning_rate": 9.865342105263158e-05, "loss": 0.4418, "step": 25129 }, { "epoch": 1.4072124538022175, "grad_norm": 1.3463425636291504, "learning_rate": 9.865315789473684e-05, "loss": 0.4314, "step": 25130 }, { "epoch": 1.4072684511143465, "grad_norm": 1.5727450847625732, "learning_rate": 9.86528947368421e-05, "loss": 0.5435, "step": 25131 }, { "epoch": 1.4073244484264755, "grad_norm": 1.3411790132522583, "learning_rate": 9.865263157894737e-05, "loss": 0.4142, "step": 25132 }, { "epoch": 1.4073804457386045, "grad_norm": 1.4852484464645386, "learning_rate": 9.865236842105263e-05, "loss": 0.4494, "step": 25133 }, { "epoch": 1.4074364430507336, "grad_norm": 1.394225001335144, "learning_rate": 9.865210526315791e-05, "loss": 0.5541, "step": 25134 }, { "epoch": 1.4074924403628626, "grad_norm": 1.3585059642791748, "learning_rate": 9.865184210526315e-05, "loss": 0.3974, "step": 25135 }, { "epoch": 1.4075484376749916, "grad_norm": 1.5817770957946777, "learning_rate": 9.865157894736843e-05, "loss": 0.5899, "step": 25136 }, { "epoch": 1.4076044349871206, "grad_norm": 1.5890610218048096, "learning_rate": 9.865131578947369e-05, "loss": 0.5236, "step": 25137 }, { "epoch": 1.4076604322992496, "grad_norm": 1.1551625728607178, "learning_rate": 9.865105263157896e-05, "loss": 0.4185, "step": 25138 }, { "epoch": 1.4077164296113787, "grad_norm": 1.1804637908935547, "learning_rate": 9.86507894736842e-05, "loss": 0.4683, "step": 25139 }, { "epoch": 1.4077724269235077, "grad_norm": 1.4861499071121216, "learning_rate": 9.865052631578948e-05, "loss": 0.4231, "step": 25140 }, { "epoch": 1.4078284242356367, "grad_norm": 1.6176575422286987, "learning_rate": 9.865026315789474e-05, "loss": 0.4366, "step": 25141 }, { "epoch": 1.4078844215477657, "grad_norm": 1.3628233671188354, "learning_rate": 9.865000000000001e-05, "loss": 0.453, "step": 25142 }, { "epoch": 1.4079404188598947, "grad_norm": 1.26882004737854, "learning_rate": 9.864973684210527e-05, "loss": 0.4369, "step": 25143 }, { "epoch": 1.4079964161720238, "grad_norm": 1.4862089157104492, "learning_rate": 9.864947368421053e-05, "loss": 0.3508, "step": 25144 }, { "epoch": 1.4080524134841528, "grad_norm": 1.1362391710281372, "learning_rate": 9.864921052631579e-05, "loss": 0.3829, "step": 25145 }, { "epoch": 1.4081084107962818, "grad_norm": 1.2971348762512207, "learning_rate": 9.864894736842105e-05, "loss": 0.4346, "step": 25146 }, { "epoch": 1.4081644081084108, "grad_norm": 1.207412600517273, "learning_rate": 9.864868421052632e-05, "loss": 0.4799, "step": 25147 }, { "epoch": 1.4082204054205398, "grad_norm": 1.363423466682434, "learning_rate": 9.864842105263158e-05, "loss": 0.4393, "step": 25148 }, { "epoch": 1.4082764027326689, "grad_norm": 1.479589581489563, "learning_rate": 9.864815789473684e-05, "loss": 0.3937, "step": 25149 }, { "epoch": 1.4083324000447979, "grad_norm": 1.668657660484314, "learning_rate": 9.86478947368421e-05, "loss": 0.5922, "step": 25150 }, { "epoch": 1.408388397356927, "grad_norm": 1.1311315298080444, "learning_rate": 9.864763157894738e-05, "loss": 0.392, "step": 25151 }, { "epoch": 1.408444394669056, "grad_norm": 2.721068859100342, "learning_rate": 9.864736842105264e-05, "loss": 0.6064, "step": 25152 }, { "epoch": 1.408500391981185, "grad_norm": 1.382392168045044, "learning_rate": 9.86471052631579e-05, "loss": 0.4802, "step": 25153 }, { "epoch": 1.408556389293314, "grad_norm": 1.329283356666565, "learning_rate": 9.864684210526316e-05, "loss": 0.3837, "step": 25154 }, { "epoch": 1.408612386605443, "grad_norm": 1.3018931150436401, "learning_rate": 9.864657894736843e-05, "loss": 0.3788, "step": 25155 }, { "epoch": 1.408668383917572, "grad_norm": 1.6913831233978271, "learning_rate": 9.864631578947369e-05, "loss": 0.4156, "step": 25156 }, { "epoch": 1.408724381229701, "grad_norm": 1.326263427734375, "learning_rate": 9.864605263157895e-05, "loss": 0.4448, "step": 25157 }, { "epoch": 1.40878037854183, "grad_norm": 1.895554780960083, "learning_rate": 9.864578947368421e-05, "loss": 0.4293, "step": 25158 }, { "epoch": 1.408836375853959, "grad_norm": 1.3707953691482544, "learning_rate": 9.864552631578948e-05, "loss": 0.4818, "step": 25159 }, { "epoch": 1.408892373166088, "grad_norm": 1.4448717832565308, "learning_rate": 9.864526315789474e-05, "loss": 0.55, "step": 25160 }, { "epoch": 1.408948370478217, "grad_norm": 1.3688149452209473, "learning_rate": 9.864500000000002e-05, "loss": 0.6002, "step": 25161 }, { "epoch": 1.409004367790346, "grad_norm": 1.3279937505722046, "learning_rate": 9.864473684210526e-05, "loss": 0.5392, "step": 25162 }, { "epoch": 1.4090603651024751, "grad_norm": 1.3392213582992554, "learning_rate": 9.864447368421053e-05, "loss": 0.3843, "step": 25163 }, { "epoch": 1.4091163624146041, "grad_norm": 1.4783343076705933, "learning_rate": 9.86442105263158e-05, "loss": 0.4332, "step": 25164 }, { "epoch": 1.4091723597267332, "grad_norm": 1.4156527519226074, "learning_rate": 9.864394736842105e-05, "loss": 0.421, "step": 25165 }, { "epoch": 1.4092283570388622, "grad_norm": 1.6359426975250244, "learning_rate": 9.864368421052633e-05, "loss": 0.5679, "step": 25166 }, { "epoch": 1.4092843543509912, "grad_norm": 1.2974162101745605, "learning_rate": 9.864342105263157e-05, "loss": 0.3891, "step": 25167 }, { "epoch": 1.4093403516631202, "grad_norm": 1.4267315864562988, "learning_rate": 9.864315789473685e-05, "loss": 0.4329, "step": 25168 }, { "epoch": 1.4093963489752492, "grad_norm": 1.202324390411377, "learning_rate": 9.86428947368421e-05, "loss": 0.3673, "step": 25169 }, { "epoch": 1.4094523462873783, "grad_norm": 1.3575880527496338, "learning_rate": 9.864263157894738e-05, "loss": 0.4658, "step": 25170 }, { "epoch": 1.4095083435995073, "grad_norm": 1.1915929317474365, "learning_rate": 9.864236842105264e-05, "loss": 0.346, "step": 25171 }, { "epoch": 1.4095643409116363, "grad_norm": 1.1185379028320312, "learning_rate": 9.86421052631579e-05, "loss": 0.3682, "step": 25172 }, { "epoch": 1.4096203382237653, "grad_norm": 3.813021183013916, "learning_rate": 9.864184210526316e-05, "loss": 0.6769, "step": 25173 }, { "epoch": 1.4096763355358943, "grad_norm": 1.7576009035110474, "learning_rate": 9.864157894736843e-05, "loss": 0.5629, "step": 25174 }, { "epoch": 1.4097323328480234, "grad_norm": 1.261063575744629, "learning_rate": 9.864131578947369e-05, "loss": 0.4364, "step": 25175 }, { "epoch": 1.4097883301601524, "grad_norm": 1.2424596548080444, "learning_rate": 9.864105263157895e-05, "loss": 0.4822, "step": 25176 }, { "epoch": 1.4098443274722814, "grad_norm": 1.6005308628082275, "learning_rate": 9.864078947368421e-05, "loss": 0.4941, "step": 25177 }, { "epoch": 1.4099003247844104, "grad_norm": 1.1510900259017944, "learning_rate": 9.864052631578948e-05, "loss": 0.477, "step": 25178 }, { "epoch": 1.4099563220965394, "grad_norm": 1.8903448581695557, "learning_rate": 9.864026315789474e-05, "loss": 0.4593, "step": 25179 }, { "epoch": 1.4100123194086684, "grad_norm": 2.102847099304199, "learning_rate": 9.864e-05, "loss": 0.4552, "step": 25180 }, { "epoch": 1.4100683167207975, "grad_norm": 1.7312819957733154, "learning_rate": 9.863973684210526e-05, "loss": 0.4896, "step": 25181 }, { "epoch": 1.4101243140329265, "grad_norm": 1.3706915378570557, "learning_rate": 9.863947368421052e-05, "loss": 0.3614, "step": 25182 }, { "epoch": 1.4101803113450555, "grad_norm": 1.6786187887191772, "learning_rate": 9.86392105263158e-05, "loss": 0.5758, "step": 25183 }, { "epoch": 1.4102363086571845, "grad_norm": 1.2910107374191284, "learning_rate": 9.863894736842106e-05, "loss": 0.4112, "step": 25184 }, { "epoch": 1.4102923059693135, "grad_norm": 1.2806596755981445, "learning_rate": 9.863868421052632e-05, "loss": 0.4152, "step": 25185 }, { "epoch": 1.4103483032814426, "grad_norm": 1.6166380643844604, "learning_rate": 9.863842105263158e-05, "loss": 0.4378, "step": 25186 }, { "epoch": 1.4104043005935716, "grad_norm": 1.2381101846694946, "learning_rate": 9.863815789473685e-05, "loss": 0.3784, "step": 25187 }, { "epoch": 1.4104602979057006, "grad_norm": 1.633828043937683, "learning_rate": 9.863789473684211e-05, "loss": 0.4597, "step": 25188 }, { "epoch": 1.4105162952178296, "grad_norm": 1.0757710933685303, "learning_rate": 9.863763157894738e-05, "loss": 0.4191, "step": 25189 }, { "epoch": 1.4105722925299586, "grad_norm": 1.5068286657333374, "learning_rate": 9.863736842105263e-05, "loss": 0.452, "step": 25190 }, { "epoch": 1.4106282898420877, "grad_norm": 1.1757184267044067, "learning_rate": 9.86371052631579e-05, "loss": 0.3778, "step": 25191 }, { "epoch": 1.4106842871542167, "grad_norm": 1.402175784111023, "learning_rate": 9.863684210526316e-05, "loss": 0.7289, "step": 25192 }, { "epoch": 1.4107402844663457, "grad_norm": 1.231673240661621, "learning_rate": 9.863657894736843e-05, "loss": 0.4304, "step": 25193 }, { "epoch": 1.4107962817784747, "grad_norm": 1.4890073537826538, "learning_rate": 9.863631578947368e-05, "loss": 0.4979, "step": 25194 }, { "epoch": 1.4108522790906037, "grad_norm": 1.2157163619995117, "learning_rate": 9.863605263157895e-05, "loss": 0.4838, "step": 25195 }, { "epoch": 1.4109082764027328, "grad_norm": 1.2294938564300537, "learning_rate": 9.863578947368421e-05, "loss": 0.5075, "step": 25196 }, { "epoch": 1.4109642737148618, "grad_norm": 2.9420714378356934, "learning_rate": 9.863552631578949e-05, "loss": 0.4397, "step": 25197 }, { "epoch": 1.4110202710269908, "grad_norm": 1.3563071489334106, "learning_rate": 9.863526315789475e-05, "loss": 0.5007, "step": 25198 }, { "epoch": 1.4110762683391198, "grad_norm": 1.259868860244751, "learning_rate": 9.8635e-05, "loss": 0.6522, "step": 25199 }, { "epoch": 1.4111322656512488, "grad_norm": 1.3337322473526, "learning_rate": 9.863473684210527e-05, "loss": 0.4302, "step": 25200 }, { "epoch": 1.4111882629633778, "grad_norm": 1.2552626132965088, "learning_rate": 9.863447368421053e-05, "loss": 0.4105, "step": 25201 }, { "epoch": 1.4112442602755069, "grad_norm": 1.2617647647857666, "learning_rate": 9.86342105263158e-05, "loss": 0.3968, "step": 25202 }, { "epoch": 1.4113002575876359, "grad_norm": 1.4309154748916626, "learning_rate": 9.863394736842106e-05, "loss": 0.4875, "step": 25203 }, { "epoch": 1.411356254899765, "grad_norm": 1.6625126600265503, "learning_rate": 9.863368421052632e-05, "loss": 0.6625, "step": 25204 }, { "epoch": 1.411412252211894, "grad_norm": 1.1364010572433472, "learning_rate": 9.863342105263158e-05, "loss": 0.3535, "step": 25205 }, { "epoch": 1.411468249524023, "grad_norm": 1.4304853677749634, "learning_rate": 9.863315789473685e-05, "loss": 0.5349, "step": 25206 }, { "epoch": 1.411524246836152, "grad_norm": 1.3051401376724243, "learning_rate": 9.863289473684211e-05, "loss": 0.4308, "step": 25207 }, { "epoch": 1.411580244148281, "grad_norm": 1.8065649271011353, "learning_rate": 9.863263157894737e-05, "loss": 0.567, "step": 25208 }, { "epoch": 1.41163624146041, "grad_norm": 1.2617363929748535, "learning_rate": 9.863236842105263e-05, "loss": 0.5085, "step": 25209 }, { "epoch": 1.411692238772539, "grad_norm": 1.63270103931427, "learning_rate": 9.86321052631579e-05, "loss": 0.5207, "step": 25210 }, { "epoch": 1.411748236084668, "grad_norm": 1.464239478111267, "learning_rate": 9.863184210526316e-05, "loss": 0.4098, "step": 25211 }, { "epoch": 1.411804233396797, "grad_norm": 1.193290114402771, "learning_rate": 9.863157894736842e-05, "loss": 0.3687, "step": 25212 }, { "epoch": 1.411860230708926, "grad_norm": 1.1258714199066162, "learning_rate": 9.863131578947368e-05, "loss": 0.4189, "step": 25213 }, { "epoch": 1.411916228021055, "grad_norm": 1.2496358156204224, "learning_rate": 9.863105263157896e-05, "loss": 0.5213, "step": 25214 }, { "epoch": 1.4119722253331841, "grad_norm": 1.1736644506454468, "learning_rate": 9.863078947368422e-05, "loss": 0.5512, "step": 25215 }, { "epoch": 1.4120282226453131, "grad_norm": 1.31013023853302, "learning_rate": 9.863052631578948e-05, "loss": 0.4178, "step": 25216 }, { "epoch": 1.4120842199574422, "grad_norm": 1.6401429176330566, "learning_rate": 9.863026315789474e-05, "loss": 0.4042, "step": 25217 }, { "epoch": 1.4121402172695712, "grad_norm": 1.447288990020752, "learning_rate": 9.863e-05, "loss": 0.3885, "step": 25218 }, { "epoch": 1.4121962145817002, "grad_norm": 1.3267236948013306, "learning_rate": 9.862973684210527e-05, "loss": 0.6762, "step": 25219 }, { "epoch": 1.4122522118938292, "grad_norm": 1.245842456817627, "learning_rate": 9.862947368421053e-05, "loss": 0.6838, "step": 25220 }, { "epoch": 1.4123082092059582, "grad_norm": 1.2799817323684692, "learning_rate": 9.86292105263158e-05, "loss": 0.3342, "step": 25221 }, { "epoch": 1.4123642065180873, "grad_norm": 1.138277292251587, "learning_rate": 9.862894736842105e-05, "loss": 0.4473, "step": 25222 }, { "epoch": 1.4124202038302163, "grad_norm": 1.3019230365753174, "learning_rate": 9.862868421052632e-05, "loss": 0.4225, "step": 25223 }, { "epoch": 1.4124762011423453, "grad_norm": 1.2484922409057617, "learning_rate": 9.862842105263158e-05, "loss": 0.5595, "step": 25224 }, { "epoch": 1.4125321984544743, "grad_norm": 1.2911787033081055, "learning_rate": 9.862815789473685e-05, "loss": 0.5246, "step": 25225 }, { "epoch": 1.4125881957666033, "grad_norm": 1.1501758098602295, "learning_rate": 9.862789473684211e-05, "loss": 0.3478, "step": 25226 }, { "epoch": 1.4126441930787323, "grad_norm": 1.0365339517593384, "learning_rate": 9.862763157894737e-05, "loss": 0.3588, "step": 25227 }, { "epoch": 1.4127001903908614, "grad_norm": 1.8831053972244263, "learning_rate": 9.862736842105263e-05, "loss": 0.4659, "step": 25228 }, { "epoch": 1.4127561877029904, "grad_norm": 1.3875123262405396, "learning_rate": 9.862710526315791e-05, "loss": 0.4413, "step": 25229 }, { "epoch": 1.4128121850151194, "grad_norm": 1.4171642065048218, "learning_rate": 9.862684210526317e-05, "loss": 0.5081, "step": 25230 }, { "epoch": 1.4128681823272484, "grad_norm": 1.3800628185272217, "learning_rate": 9.862657894736843e-05, "loss": 0.3952, "step": 25231 }, { "epoch": 1.4129241796393774, "grad_norm": 1.1604880094528198, "learning_rate": 9.862631578947369e-05, "loss": 0.3431, "step": 25232 }, { "epoch": 1.4129801769515065, "grad_norm": 1.3972384929656982, "learning_rate": 9.862605263157895e-05, "loss": 0.5271, "step": 25233 }, { "epoch": 1.4130361742636355, "grad_norm": 1.6486868858337402, "learning_rate": 9.862578947368422e-05, "loss": 0.5212, "step": 25234 }, { "epoch": 1.4130921715757645, "grad_norm": 1.3751742839813232, "learning_rate": 9.862552631578948e-05, "loss": 0.3862, "step": 25235 }, { "epoch": 1.4131481688878935, "grad_norm": 1.791991949081421, "learning_rate": 9.862526315789474e-05, "loss": 0.5196, "step": 25236 }, { "epoch": 1.4132041662000223, "grad_norm": 1.3346039056777954, "learning_rate": 9.8625e-05, "loss": 0.5303, "step": 25237 }, { "epoch": 1.4132601635121513, "grad_norm": 1.5640456676483154, "learning_rate": 9.862473684210527e-05, "loss": 0.4898, "step": 25238 }, { "epoch": 1.4133161608242804, "grad_norm": 1.4504514932632446, "learning_rate": 9.862447368421053e-05, "loss": 0.4962, "step": 25239 }, { "epoch": 1.4133721581364094, "grad_norm": 1.5084350109100342, "learning_rate": 9.862421052631579e-05, "loss": 0.6098, "step": 25240 }, { "epoch": 1.4134281554485384, "grad_norm": 1.341499924659729, "learning_rate": 9.862394736842105e-05, "loss": 0.6482, "step": 25241 }, { "epoch": 1.4134841527606674, "grad_norm": 1.2496647834777832, "learning_rate": 9.862368421052632e-05, "loss": 0.3923, "step": 25242 }, { "epoch": 1.4135401500727964, "grad_norm": 1.2085115909576416, "learning_rate": 9.862342105263158e-05, "loss": 0.4895, "step": 25243 }, { "epoch": 1.4135961473849254, "grad_norm": 1.3878856897354126, "learning_rate": 9.862315789473686e-05, "loss": 0.4754, "step": 25244 }, { "epoch": 1.4136521446970545, "grad_norm": 1.2352678775787354, "learning_rate": 9.86228947368421e-05, "loss": 0.4851, "step": 25245 }, { "epoch": 1.4137081420091835, "grad_norm": 1.5618391036987305, "learning_rate": 9.862263157894738e-05, "loss": 0.5905, "step": 25246 }, { "epoch": 1.4137641393213125, "grad_norm": 1.4296767711639404, "learning_rate": 9.862236842105264e-05, "loss": 0.4608, "step": 25247 }, { "epoch": 1.4138201366334415, "grad_norm": 1.3693063259124756, "learning_rate": 9.862210526315791e-05, "loss": 0.3787, "step": 25248 }, { "epoch": 1.4138761339455705, "grad_norm": 1.2497797012329102, "learning_rate": 9.862184210526316e-05, "loss": 0.4279, "step": 25249 }, { "epoch": 1.4139321312576996, "grad_norm": 1.4723483324050903, "learning_rate": 9.862157894736842e-05, "loss": 0.5253, "step": 25250 }, { "epoch": 1.4139881285698286, "grad_norm": 1.3741096258163452, "learning_rate": 9.862131578947369e-05, "loss": 0.5041, "step": 25251 }, { "epoch": 1.4140441258819576, "grad_norm": 1.483561396598816, "learning_rate": 9.862105263157895e-05, "loss": 0.4966, "step": 25252 }, { "epoch": 1.4141001231940866, "grad_norm": 1.5171470642089844, "learning_rate": 9.862078947368422e-05, "loss": 0.4932, "step": 25253 }, { "epoch": 1.4141561205062156, "grad_norm": 1.2929694652557373, "learning_rate": 9.862052631578947e-05, "loss": 0.5337, "step": 25254 }, { "epoch": 1.4142121178183447, "grad_norm": 1.4575672149658203, "learning_rate": 9.862026315789474e-05, "loss": 0.4108, "step": 25255 }, { "epoch": 1.4142681151304737, "grad_norm": 1.2953664064407349, "learning_rate": 9.862e-05, "loss": 0.4451, "step": 25256 }, { "epoch": 1.4143241124426027, "grad_norm": 1.1995396614074707, "learning_rate": 9.861973684210527e-05, "loss": 0.5422, "step": 25257 }, { "epoch": 1.4143801097547317, "grad_norm": 1.1539461612701416, "learning_rate": 9.861947368421053e-05, "loss": 0.401, "step": 25258 }, { "epoch": 1.4144361070668607, "grad_norm": 1.0791876316070557, "learning_rate": 9.86192105263158e-05, "loss": 0.4076, "step": 25259 }, { "epoch": 1.4144921043789898, "grad_norm": 1.157410979270935, "learning_rate": 9.861894736842105e-05, "loss": 0.4216, "step": 25260 }, { "epoch": 1.4145481016911188, "grad_norm": 1.2240326404571533, "learning_rate": 9.861868421052633e-05, "loss": 0.3884, "step": 25261 }, { "epoch": 1.4146040990032478, "grad_norm": 1.393512487411499, "learning_rate": 9.861842105263159e-05, "loss": 0.4577, "step": 25262 }, { "epoch": 1.4146600963153768, "grad_norm": 1.1321724653244019, "learning_rate": 9.861815789473685e-05, "loss": 0.3056, "step": 25263 }, { "epoch": 1.4147160936275058, "grad_norm": 1.144692063331604, "learning_rate": 9.86178947368421e-05, "loss": 0.4127, "step": 25264 }, { "epoch": 1.4147720909396349, "grad_norm": 1.49162757396698, "learning_rate": 9.861763157894738e-05, "loss": 0.5832, "step": 25265 }, { "epoch": 1.4148280882517639, "grad_norm": 1.2007313966751099, "learning_rate": 9.861736842105264e-05, "loss": 0.4345, "step": 25266 }, { "epoch": 1.414884085563893, "grad_norm": 1.4854063987731934, "learning_rate": 9.86171052631579e-05, "loss": 0.5534, "step": 25267 }, { "epoch": 1.414940082876022, "grad_norm": 2.267275094985962, "learning_rate": 9.861684210526316e-05, "loss": 0.5159, "step": 25268 }, { "epoch": 1.414996080188151, "grad_norm": 1.5217339992523193, "learning_rate": 9.861657894736842e-05, "loss": 0.4637, "step": 25269 }, { "epoch": 1.41505207750028, "grad_norm": 1.3068915605545044, "learning_rate": 9.861631578947369e-05, "loss": 0.4396, "step": 25270 }, { "epoch": 1.415108074812409, "grad_norm": 1.3373641967773438, "learning_rate": 9.861605263157895e-05, "loss": 0.3759, "step": 25271 }, { "epoch": 1.415164072124538, "grad_norm": 1.6124898195266724, "learning_rate": 9.861578947368421e-05, "loss": 0.4239, "step": 25272 }, { "epoch": 1.415220069436667, "grad_norm": 1.4194447994232178, "learning_rate": 9.861552631578947e-05, "loss": 0.4973, "step": 25273 }, { "epoch": 1.415276066748796, "grad_norm": 1.24207603931427, "learning_rate": 9.861526315789474e-05, "loss": 0.4253, "step": 25274 }, { "epoch": 1.415332064060925, "grad_norm": 1.3166199922561646, "learning_rate": 9.8615e-05, "loss": 0.4236, "step": 25275 }, { "epoch": 1.415388061373054, "grad_norm": 1.2715953588485718, "learning_rate": 9.861473684210528e-05, "loss": 0.486, "step": 25276 }, { "epoch": 1.415444058685183, "grad_norm": 1.384478211402893, "learning_rate": 9.861447368421052e-05, "loss": 0.3994, "step": 25277 }, { "epoch": 1.415500055997312, "grad_norm": 1.9101049900054932, "learning_rate": 9.86142105263158e-05, "loss": 0.4845, "step": 25278 }, { "epoch": 1.4155560533094411, "grad_norm": 1.2240445613861084, "learning_rate": 9.861394736842106e-05, "loss": 0.4528, "step": 25279 }, { "epoch": 1.4156120506215701, "grad_norm": 1.328916072845459, "learning_rate": 9.861368421052633e-05, "loss": 0.3869, "step": 25280 }, { "epoch": 1.4156680479336992, "grad_norm": 1.400500774383545, "learning_rate": 9.861342105263159e-05, "loss": 0.5352, "step": 25281 }, { "epoch": 1.4157240452458282, "grad_norm": 1.5027445554733276, "learning_rate": 9.861315789473685e-05, "loss": 0.5391, "step": 25282 }, { "epoch": 1.4157800425579572, "grad_norm": 1.1852748394012451, "learning_rate": 9.861289473684211e-05, "loss": 0.4876, "step": 25283 }, { "epoch": 1.4158360398700862, "grad_norm": 1.3775509595870972, "learning_rate": 9.861263157894738e-05, "loss": 0.4517, "step": 25284 }, { "epoch": 1.4158920371822152, "grad_norm": 1.608871340751648, "learning_rate": 9.861236842105264e-05, "loss": 0.5324, "step": 25285 }, { "epoch": 1.4159480344943443, "grad_norm": 1.3725733757019043, "learning_rate": 9.86121052631579e-05, "loss": 0.4773, "step": 25286 }, { "epoch": 1.4160040318064733, "grad_norm": 1.2922005653381348, "learning_rate": 9.861184210526316e-05, "loss": 0.4044, "step": 25287 }, { "epoch": 1.4160600291186023, "grad_norm": 1.2618980407714844, "learning_rate": 9.861157894736842e-05, "loss": 0.4802, "step": 25288 }, { "epoch": 1.4161160264307313, "grad_norm": 1.3802038431167603, "learning_rate": 9.86113157894737e-05, "loss": 0.4104, "step": 25289 }, { "epoch": 1.4161720237428603, "grad_norm": 1.4444077014923096, "learning_rate": 9.861105263157895e-05, "loss": 0.4617, "step": 25290 }, { "epoch": 1.4162280210549893, "grad_norm": 3.483999013900757, "learning_rate": 9.861078947368421e-05, "loss": 0.5907, "step": 25291 }, { "epoch": 1.4162840183671184, "grad_norm": 1.1073352098464966, "learning_rate": 9.861052631578947e-05, "loss": 0.4187, "step": 25292 }, { "epoch": 1.4163400156792474, "grad_norm": 1.1422905921936035, "learning_rate": 9.861026315789475e-05, "loss": 0.3392, "step": 25293 }, { "epoch": 1.4163960129913764, "grad_norm": 1.6512439250946045, "learning_rate": 9.861e-05, "loss": 0.4894, "step": 25294 }, { "epoch": 1.4164520103035054, "grad_norm": 1.3519871234893799, "learning_rate": 9.860973684210527e-05, "loss": 0.5066, "step": 25295 }, { "epoch": 1.4165080076156344, "grad_norm": 1.090065360069275, "learning_rate": 9.860947368421053e-05, "loss": 0.3512, "step": 25296 }, { "epoch": 1.4165640049277635, "grad_norm": 1.3428910970687866, "learning_rate": 9.86092105263158e-05, "loss": 0.4256, "step": 25297 }, { "epoch": 1.4166200022398925, "grad_norm": 1.3911185264587402, "learning_rate": 9.860894736842106e-05, "loss": 0.4326, "step": 25298 }, { "epoch": 1.4166759995520215, "grad_norm": 1.390636920928955, "learning_rate": 9.860868421052633e-05, "loss": 0.4339, "step": 25299 }, { "epoch": 1.4167319968641505, "grad_norm": 1.4221104383468628, "learning_rate": 9.860842105263158e-05, "loss": 0.3952, "step": 25300 }, { "epoch": 1.4167879941762795, "grad_norm": 1.3638477325439453, "learning_rate": 9.860815789473685e-05, "loss": 0.4597, "step": 25301 }, { "epoch": 1.4168439914884086, "grad_norm": 1.2395983934402466, "learning_rate": 9.860789473684211e-05, "loss": 0.3628, "step": 25302 }, { "epoch": 1.4168999888005376, "grad_norm": 1.2597976922988892, "learning_rate": 9.860763157894737e-05, "loss": 0.422, "step": 25303 }, { "epoch": 1.4169559861126666, "grad_norm": 1.3745061159133911, "learning_rate": 9.860736842105263e-05, "loss": 0.5168, "step": 25304 }, { "epoch": 1.4170119834247956, "grad_norm": 1.2840275764465332, "learning_rate": 9.860710526315789e-05, "loss": 0.4069, "step": 25305 }, { "epoch": 1.4170679807369246, "grad_norm": 1.353759765625, "learning_rate": 9.860684210526316e-05, "loss": 0.4599, "step": 25306 }, { "epoch": 1.4171239780490537, "grad_norm": 1.3688899278640747, "learning_rate": 9.860657894736842e-05, "loss": 0.4981, "step": 25307 }, { "epoch": 1.4171799753611827, "grad_norm": 1.1914405822753906, "learning_rate": 9.86063157894737e-05, "loss": 0.3851, "step": 25308 }, { "epoch": 1.4172359726733117, "grad_norm": 1.4963513612747192, "learning_rate": 9.860605263157894e-05, "loss": 0.4801, "step": 25309 }, { "epoch": 1.4172919699854407, "grad_norm": 1.4012556076049805, "learning_rate": 9.860578947368422e-05, "loss": 0.4715, "step": 25310 }, { "epoch": 1.4173479672975697, "grad_norm": 1.372742772102356, "learning_rate": 9.860552631578948e-05, "loss": 0.4207, "step": 25311 }, { "epoch": 1.4174039646096988, "grad_norm": 1.3505504131317139, "learning_rate": 9.860526315789475e-05, "loss": 0.5519, "step": 25312 }, { "epoch": 1.4174599619218278, "grad_norm": 1.4344871044158936, "learning_rate": 9.860500000000001e-05, "loss": 0.4624, "step": 25313 }, { "epoch": 1.4175159592339568, "grad_norm": 1.3725773096084595, "learning_rate": 9.860473684210527e-05, "loss": 0.4659, "step": 25314 }, { "epoch": 1.4175719565460858, "grad_norm": 1.265283226966858, "learning_rate": 9.860447368421053e-05, "loss": 0.4751, "step": 25315 }, { "epoch": 1.4176279538582148, "grad_norm": 1.3512071371078491, "learning_rate": 9.86042105263158e-05, "loss": 0.5503, "step": 25316 }, { "epoch": 1.4176839511703438, "grad_norm": 1.2943168878555298, "learning_rate": 9.860394736842106e-05, "loss": 0.3472, "step": 25317 }, { "epoch": 1.4177399484824729, "grad_norm": 1.1657301187515259, "learning_rate": 9.860368421052632e-05, "loss": 0.4913, "step": 25318 }, { "epoch": 1.4177959457946019, "grad_norm": 1.530411958694458, "learning_rate": 9.860342105263158e-05, "loss": 0.4243, "step": 25319 }, { "epoch": 1.417851943106731, "grad_norm": 1.179490089416504, "learning_rate": 9.860315789473684e-05, "loss": 0.4927, "step": 25320 }, { "epoch": 1.41790794041886, "grad_norm": 1.3139373064041138, "learning_rate": 9.860289473684211e-05, "loss": 0.4186, "step": 25321 }, { "epoch": 1.417963937730989, "grad_norm": 1.1053187847137451, "learning_rate": 9.860263157894737e-05, "loss": 0.4252, "step": 25322 }, { "epoch": 1.418019935043118, "grad_norm": 1.361271858215332, "learning_rate": 9.860236842105263e-05, "loss": 0.5176, "step": 25323 }, { "epoch": 1.418075932355247, "grad_norm": 1.6484142541885376, "learning_rate": 9.860210526315789e-05, "loss": 0.5088, "step": 25324 }, { "epoch": 1.418131929667376, "grad_norm": 1.2425090074539185, "learning_rate": 9.860184210526317e-05, "loss": 0.4417, "step": 25325 }, { "epoch": 1.418187926979505, "grad_norm": 1.8077564239501953, "learning_rate": 9.860157894736843e-05, "loss": 0.5891, "step": 25326 }, { "epoch": 1.418243924291634, "grad_norm": 1.4657273292541504, "learning_rate": 9.860131578947369e-05, "loss": 0.5193, "step": 25327 }, { "epoch": 1.418299921603763, "grad_norm": 1.2672172784805298, "learning_rate": 9.860105263157895e-05, "loss": 0.398, "step": 25328 }, { "epoch": 1.418355918915892, "grad_norm": 1.22288978099823, "learning_rate": 9.860078947368422e-05, "loss": 0.5111, "step": 25329 }, { "epoch": 1.418411916228021, "grad_norm": 1.6638717651367188, "learning_rate": 9.860052631578948e-05, "loss": 0.5352, "step": 25330 }, { "epoch": 1.4184679135401501, "grad_norm": 1.5996172428131104, "learning_rate": 9.860026315789475e-05, "loss": 0.5242, "step": 25331 }, { "epoch": 1.4185239108522791, "grad_norm": 1.2739406824111938, "learning_rate": 9.86e-05, "loss": 0.4868, "step": 25332 }, { "epoch": 1.4185799081644082, "grad_norm": 1.4929602146148682, "learning_rate": 9.859973684210527e-05, "loss": 0.4906, "step": 25333 }, { "epoch": 1.4186359054765372, "grad_norm": 1.3288265466690063, "learning_rate": 9.859947368421053e-05, "loss": 0.4633, "step": 25334 }, { "epoch": 1.4186919027886662, "grad_norm": 1.4196847677230835, "learning_rate": 9.85992105263158e-05, "loss": 0.5721, "step": 25335 }, { "epoch": 1.4187479001007952, "grad_norm": 1.4367882013320923, "learning_rate": 9.859894736842106e-05, "loss": 0.5164, "step": 25336 }, { "epoch": 1.4188038974129242, "grad_norm": 1.402292013168335, "learning_rate": 9.859868421052631e-05, "loss": 0.5308, "step": 25337 }, { "epoch": 1.4188598947250533, "grad_norm": 1.1375635862350464, "learning_rate": 9.859842105263158e-05, "loss": 0.3699, "step": 25338 }, { "epoch": 1.4189158920371823, "grad_norm": 1.2534152269363403, "learning_rate": 9.859815789473684e-05, "loss": 0.5699, "step": 25339 }, { "epoch": 1.4189718893493113, "grad_norm": 1.2568928003311157, "learning_rate": 9.859789473684212e-05, "loss": 0.3924, "step": 25340 }, { "epoch": 1.4190278866614403, "grad_norm": 1.2098966836929321, "learning_rate": 9.859763157894736e-05, "loss": 0.5723, "step": 25341 }, { "epoch": 1.4190838839735693, "grad_norm": 1.161672592163086, "learning_rate": 9.859736842105264e-05, "loss": 0.4008, "step": 25342 }, { "epoch": 1.4191398812856983, "grad_norm": 1.3864883184432983, "learning_rate": 9.85971052631579e-05, "loss": 0.4221, "step": 25343 }, { "epoch": 1.4191958785978274, "grad_norm": 1.7194762229919434, "learning_rate": 9.859684210526317e-05, "loss": 0.571, "step": 25344 }, { "epoch": 1.4192518759099564, "grad_norm": 1.3612377643585205, "learning_rate": 9.859657894736843e-05, "loss": 0.3921, "step": 25345 }, { "epoch": 1.4193078732220854, "grad_norm": 1.2218958139419556, "learning_rate": 9.859631578947369e-05, "loss": 0.3954, "step": 25346 }, { "epoch": 1.4193638705342144, "grad_norm": 1.7550920248031616, "learning_rate": 9.859605263157895e-05, "loss": 0.529, "step": 25347 }, { "epoch": 1.4194198678463434, "grad_norm": 1.7339459657669067, "learning_rate": 9.859578947368422e-05, "loss": 0.5703, "step": 25348 }, { "epoch": 1.4194758651584725, "grad_norm": 1.1789673566818237, "learning_rate": 9.859552631578948e-05, "loss": 0.3967, "step": 25349 }, { "epoch": 1.4195318624706015, "grad_norm": 1.3035659790039062, "learning_rate": 9.859526315789474e-05, "loss": 0.416, "step": 25350 }, { "epoch": 1.4195878597827305, "grad_norm": 1.4353876113891602, "learning_rate": 9.8595e-05, "loss": 0.4244, "step": 25351 }, { "epoch": 1.4196438570948595, "grad_norm": 1.650483250617981, "learning_rate": 9.859473684210527e-05, "loss": 0.662, "step": 25352 }, { "epoch": 1.4196998544069885, "grad_norm": 1.2579351663589478, "learning_rate": 9.859447368421053e-05, "loss": 0.3497, "step": 25353 }, { "epoch": 1.4197558517191176, "grad_norm": 1.3151016235351562, "learning_rate": 9.859421052631579e-05, "loss": 0.4805, "step": 25354 }, { "epoch": 1.4198118490312466, "grad_norm": 2.101506471633911, "learning_rate": 9.859394736842105e-05, "loss": 0.5463, "step": 25355 }, { "epoch": 1.4198678463433756, "grad_norm": 1.4948760271072388, "learning_rate": 9.859368421052631e-05, "loss": 0.6012, "step": 25356 }, { "epoch": 1.4199238436555046, "grad_norm": 1.2718428373336792, "learning_rate": 9.859342105263159e-05, "loss": 0.5115, "step": 25357 }, { "epoch": 1.4199798409676336, "grad_norm": 1.3629120588302612, "learning_rate": 9.859315789473685e-05, "loss": 0.4204, "step": 25358 }, { "epoch": 1.4200358382797627, "grad_norm": 1.874666452407837, "learning_rate": 9.85928947368421e-05, "loss": 0.5246, "step": 25359 }, { "epoch": 1.4200918355918917, "grad_norm": 1.4612150192260742, "learning_rate": 9.859263157894737e-05, "loss": 0.4192, "step": 25360 }, { "epoch": 1.4201478329040205, "grad_norm": 1.6113919019699097, "learning_rate": 9.859236842105264e-05, "loss": 0.6654, "step": 25361 }, { "epoch": 1.4202038302161495, "grad_norm": 1.3224183320999146, "learning_rate": 9.85921052631579e-05, "loss": 0.5592, "step": 25362 }, { "epoch": 1.4202598275282785, "grad_norm": 1.343196153640747, "learning_rate": 9.859184210526317e-05, "loss": 0.4718, "step": 25363 }, { "epoch": 1.4203158248404075, "grad_norm": 1.2355575561523438, "learning_rate": 9.859157894736842e-05, "loss": 0.4856, "step": 25364 }, { "epoch": 1.4203718221525365, "grad_norm": 1.216900110244751, "learning_rate": 9.859131578947369e-05, "loss": 0.4172, "step": 25365 }, { "epoch": 1.4204278194646656, "grad_norm": 1.3724902868270874, "learning_rate": 9.859105263157895e-05, "loss": 0.4328, "step": 25366 }, { "epoch": 1.4204838167767946, "grad_norm": 1.2023346424102783, "learning_rate": 9.859078947368422e-05, "loss": 0.493, "step": 25367 }, { "epoch": 1.4205398140889236, "grad_norm": 1.1472097635269165, "learning_rate": 9.859052631578948e-05, "loss": 0.3719, "step": 25368 }, { "epoch": 1.4205958114010526, "grad_norm": 1.615952730178833, "learning_rate": 9.859026315789474e-05, "loss": 0.595, "step": 25369 }, { "epoch": 1.4206518087131816, "grad_norm": 1.4684040546417236, "learning_rate": 9.859e-05, "loss": 0.4789, "step": 25370 }, { "epoch": 1.4207078060253107, "grad_norm": 1.437103271484375, "learning_rate": 9.858973684210526e-05, "loss": 0.6406, "step": 25371 }, { "epoch": 1.4207638033374397, "grad_norm": 1.5414632558822632, "learning_rate": 9.858947368421054e-05, "loss": 0.3936, "step": 25372 }, { "epoch": 1.4208198006495687, "grad_norm": 1.5145574808120728, "learning_rate": 9.85892105263158e-05, "loss": 0.4852, "step": 25373 }, { "epoch": 1.4208757979616977, "grad_norm": 1.5883476734161377, "learning_rate": 9.858894736842106e-05, "loss": 0.5683, "step": 25374 }, { "epoch": 1.4209317952738267, "grad_norm": 1.665426254272461, "learning_rate": 9.858868421052632e-05, "loss": 0.4271, "step": 25375 }, { "epoch": 1.4209877925859558, "grad_norm": 1.6864166259765625, "learning_rate": 9.858842105263159e-05, "loss": 0.6829, "step": 25376 }, { "epoch": 1.4210437898980848, "grad_norm": 1.3218377828598022, "learning_rate": 9.858815789473685e-05, "loss": 0.4846, "step": 25377 }, { "epoch": 1.4210997872102138, "grad_norm": 1.1267671585083008, "learning_rate": 9.858789473684211e-05, "loss": 0.4406, "step": 25378 }, { "epoch": 1.4211557845223428, "grad_norm": 1.2661861181259155, "learning_rate": 9.858763157894737e-05, "loss": 0.4269, "step": 25379 }, { "epoch": 1.4212117818344718, "grad_norm": 1.487857460975647, "learning_rate": 9.858736842105264e-05, "loss": 0.5297, "step": 25380 }, { "epoch": 1.4212677791466009, "grad_norm": 1.3049241304397583, "learning_rate": 9.85871052631579e-05, "loss": 0.4583, "step": 25381 }, { "epoch": 1.4213237764587299, "grad_norm": 1.3917500972747803, "learning_rate": 9.858684210526316e-05, "loss": 0.444, "step": 25382 }, { "epoch": 1.421379773770859, "grad_norm": 1.7665787935256958, "learning_rate": 9.858657894736842e-05, "loss": 0.4787, "step": 25383 }, { "epoch": 1.421435771082988, "grad_norm": 2.91767954826355, "learning_rate": 9.85863157894737e-05, "loss": 0.471, "step": 25384 }, { "epoch": 1.421491768395117, "grad_norm": 1.3150043487548828, "learning_rate": 9.858605263157895e-05, "loss": 0.4157, "step": 25385 }, { "epoch": 1.421547765707246, "grad_norm": 1.415709137916565, "learning_rate": 9.858578947368423e-05, "loss": 0.4664, "step": 25386 }, { "epoch": 1.421603763019375, "grad_norm": 1.6887582540512085, "learning_rate": 9.858552631578947e-05, "loss": 0.5149, "step": 25387 }, { "epoch": 1.421659760331504, "grad_norm": 1.4312492609024048, "learning_rate": 9.858526315789473e-05, "loss": 0.5076, "step": 25388 }, { "epoch": 1.421715757643633, "grad_norm": 1.4411532878875732, "learning_rate": 9.8585e-05, "loss": 0.5419, "step": 25389 }, { "epoch": 1.421771754955762, "grad_norm": 1.3503779172897339, "learning_rate": 9.858473684210527e-05, "loss": 0.4765, "step": 25390 }, { "epoch": 1.421827752267891, "grad_norm": 1.2609636783599854, "learning_rate": 9.858447368421054e-05, "loss": 0.4416, "step": 25391 }, { "epoch": 1.42188374958002, "grad_norm": 1.4397571086883545, "learning_rate": 9.858421052631578e-05, "loss": 0.4568, "step": 25392 }, { "epoch": 1.421939746892149, "grad_norm": 1.258315086364746, "learning_rate": 9.858394736842106e-05, "loss": 0.4831, "step": 25393 }, { "epoch": 1.421995744204278, "grad_norm": 1.210988998413086, "learning_rate": 9.858368421052632e-05, "loss": 0.5015, "step": 25394 }, { "epoch": 1.4220517415164071, "grad_norm": 1.2439945936203003, "learning_rate": 9.858342105263159e-05, "loss": 0.3687, "step": 25395 }, { "epoch": 1.4221077388285361, "grad_norm": 1.1381407976150513, "learning_rate": 9.858315789473684e-05, "loss": 0.4425, "step": 25396 }, { "epoch": 1.4221637361406652, "grad_norm": 1.4158564805984497, "learning_rate": 9.858289473684211e-05, "loss": 0.4328, "step": 25397 }, { "epoch": 1.4222197334527942, "grad_norm": 1.231938123703003, "learning_rate": 9.858263157894737e-05, "loss": 0.3726, "step": 25398 }, { "epoch": 1.4222757307649232, "grad_norm": 1.071649193763733, "learning_rate": 9.858236842105264e-05, "loss": 0.3096, "step": 25399 }, { "epoch": 1.4223317280770522, "grad_norm": 3.990752696990967, "learning_rate": 9.85821052631579e-05, "loss": 0.3287, "step": 25400 }, { "epoch": 1.4223877253891812, "grad_norm": 1.2647135257720947, "learning_rate": 9.858184210526316e-05, "loss": 0.409, "step": 25401 }, { "epoch": 1.4224437227013103, "grad_norm": 1.5738040208816528, "learning_rate": 9.858157894736842e-05, "loss": 0.5038, "step": 25402 }, { "epoch": 1.4224997200134393, "grad_norm": 1.283447265625, "learning_rate": 9.85813157894737e-05, "loss": 0.4931, "step": 25403 }, { "epoch": 1.4225557173255683, "grad_norm": 1.3840880393981934, "learning_rate": 9.858105263157896e-05, "loss": 0.5407, "step": 25404 }, { "epoch": 1.4226117146376973, "grad_norm": 1.3599412441253662, "learning_rate": 9.858078947368422e-05, "loss": 0.5234, "step": 25405 }, { "epoch": 1.4226677119498263, "grad_norm": 1.2359181642532349, "learning_rate": 9.858052631578948e-05, "loss": 0.4504, "step": 25406 }, { "epoch": 1.4227237092619553, "grad_norm": 1.4266167879104614, "learning_rate": 9.858026315789473e-05, "loss": 0.5036, "step": 25407 }, { "epoch": 1.4227797065740844, "grad_norm": 1.2063062191009521, "learning_rate": 9.858000000000001e-05, "loss": 0.4117, "step": 25408 }, { "epoch": 1.4228357038862134, "grad_norm": 1.31067955493927, "learning_rate": 9.857973684210527e-05, "loss": 0.3727, "step": 25409 }, { "epoch": 1.4228917011983424, "grad_norm": 2.1875691413879395, "learning_rate": 9.857947368421053e-05, "loss": 0.4012, "step": 25410 }, { "epoch": 1.4229476985104714, "grad_norm": 1.1316533088684082, "learning_rate": 9.857921052631579e-05, "loss": 0.4388, "step": 25411 }, { "epoch": 1.4230036958226004, "grad_norm": 1.3008742332458496, "learning_rate": 9.857894736842106e-05, "loss": 0.3254, "step": 25412 }, { "epoch": 1.4230596931347295, "grad_norm": 1.3735431432724, "learning_rate": 9.857868421052632e-05, "loss": 0.4582, "step": 25413 }, { "epoch": 1.4231156904468585, "grad_norm": 1.4176539182662964, "learning_rate": 9.857842105263158e-05, "loss": 0.4259, "step": 25414 }, { "epoch": 1.4231716877589875, "grad_norm": 1.55917489528656, "learning_rate": 9.857815789473684e-05, "loss": 0.5155, "step": 25415 }, { "epoch": 1.4232276850711165, "grad_norm": 3.2967946529388428, "learning_rate": 9.857789473684211e-05, "loss": 0.4553, "step": 25416 }, { "epoch": 1.4232836823832455, "grad_norm": 2.0240895748138428, "learning_rate": 9.857763157894737e-05, "loss": 0.5449, "step": 25417 }, { "epoch": 1.4233396796953746, "grad_norm": 1.7926132678985596, "learning_rate": 9.857736842105265e-05, "loss": 0.4438, "step": 25418 }, { "epoch": 1.4233956770075036, "grad_norm": 1.3341935873031616, "learning_rate": 9.857710526315789e-05, "loss": 0.4905, "step": 25419 }, { "epoch": 1.4234516743196326, "grad_norm": 1.5490853786468506, "learning_rate": 9.857684210526317e-05, "loss": 0.5582, "step": 25420 }, { "epoch": 1.4235076716317616, "grad_norm": 1.5838794708251953, "learning_rate": 9.857657894736843e-05, "loss": 0.5194, "step": 25421 }, { "epoch": 1.4235636689438906, "grad_norm": 1.493714690208435, "learning_rate": 9.85763157894737e-05, "loss": 0.7161, "step": 25422 }, { "epoch": 1.4236196662560197, "grad_norm": 1.3017715215682983, "learning_rate": 9.857605263157896e-05, "loss": 0.4129, "step": 25423 }, { "epoch": 1.4236756635681487, "grad_norm": 2.0874288082122803, "learning_rate": 9.85757894736842e-05, "loss": 0.5038, "step": 25424 }, { "epoch": 1.4237316608802777, "grad_norm": 1.455629587173462, "learning_rate": 9.857552631578948e-05, "loss": 0.6834, "step": 25425 }, { "epoch": 1.4237876581924067, "grad_norm": 1.1289703845977783, "learning_rate": 9.857526315789474e-05, "loss": 0.3529, "step": 25426 }, { "epoch": 1.4238436555045357, "grad_norm": 1.2799420356750488, "learning_rate": 9.857500000000001e-05, "loss": 0.3936, "step": 25427 }, { "epoch": 1.4238996528166648, "grad_norm": 1.4523367881774902, "learning_rate": 9.857473684210527e-05, "loss": 0.4842, "step": 25428 }, { "epoch": 1.4239556501287938, "grad_norm": 1.5793325901031494, "learning_rate": 9.857447368421053e-05, "loss": 0.3518, "step": 25429 }, { "epoch": 1.4240116474409228, "grad_norm": 1.5430808067321777, "learning_rate": 9.857421052631579e-05, "loss": 0.5515, "step": 25430 }, { "epoch": 1.4240676447530518, "grad_norm": 1.567837119102478, "learning_rate": 9.857394736842106e-05, "loss": 0.4913, "step": 25431 }, { "epoch": 1.4241236420651808, "grad_norm": 1.3282862901687622, "learning_rate": 9.857368421052632e-05, "loss": 0.5311, "step": 25432 }, { "epoch": 1.4241796393773098, "grad_norm": 1.3393051624298096, "learning_rate": 9.857342105263158e-05, "loss": 0.4615, "step": 25433 }, { "epoch": 1.4242356366894389, "grad_norm": 1.48483145236969, "learning_rate": 9.857315789473684e-05, "loss": 0.6161, "step": 25434 }, { "epoch": 1.4242916340015679, "grad_norm": 1.5160679817199707, "learning_rate": 9.857289473684212e-05, "loss": 0.4744, "step": 25435 }, { "epoch": 1.424347631313697, "grad_norm": 1.438240647315979, "learning_rate": 9.857263157894738e-05, "loss": 0.5894, "step": 25436 }, { "epoch": 1.424403628625826, "grad_norm": 1.6397444009780884, "learning_rate": 9.857236842105264e-05, "loss": 0.6918, "step": 25437 }, { "epoch": 1.424459625937955, "grad_norm": 1.548452615737915, "learning_rate": 9.85721052631579e-05, "loss": 0.4115, "step": 25438 }, { "epoch": 1.424515623250084, "grad_norm": 1.4626857042312622, "learning_rate": 9.857184210526317e-05, "loss": 0.6428, "step": 25439 }, { "epoch": 1.424571620562213, "grad_norm": 1.2954314947128296, "learning_rate": 9.857157894736843e-05, "loss": 0.4795, "step": 25440 }, { "epoch": 1.424627617874342, "grad_norm": 1.250012755393982, "learning_rate": 9.857131578947369e-05, "loss": 0.3832, "step": 25441 }, { "epoch": 1.424683615186471, "grad_norm": 1.386883020401001, "learning_rate": 9.857105263157895e-05, "loss": 0.5218, "step": 25442 }, { "epoch": 1.4247396124986, "grad_norm": 1.4339525699615479, "learning_rate": 9.857078947368421e-05, "loss": 0.3912, "step": 25443 }, { "epoch": 1.424795609810729, "grad_norm": 1.4767746925354004, "learning_rate": 9.857052631578948e-05, "loss": 0.5203, "step": 25444 }, { "epoch": 1.424851607122858, "grad_norm": 1.441213607788086, "learning_rate": 9.857026315789474e-05, "loss": 0.4481, "step": 25445 }, { "epoch": 1.424907604434987, "grad_norm": 1.6515579223632812, "learning_rate": 9.857000000000001e-05, "loss": 0.4584, "step": 25446 }, { "epoch": 1.4249636017471161, "grad_norm": 1.3358708620071411, "learning_rate": 9.856973684210526e-05, "loss": 0.6889, "step": 25447 }, { "epoch": 1.4250195990592451, "grad_norm": 1.3359014987945557, "learning_rate": 9.856947368421053e-05, "loss": 0.4941, "step": 25448 }, { "epoch": 1.4250755963713742, "grad_norm": 1.5393649339675903, "learning_rate": 9.856921052631579e-05, "loss": 0.477, "step": 25449 }, { "epoch": 1.4251315936835032, "grad_norm": 1.1691579818725586, "learning_rate": 9.856894736842107e-05, "loss": 0.3377, "step": 25450 }, { "epoch": 1.4251875909956322, "grad_norm": 1.5221678018569946, "learning_rate": 9.856868421052631e-05, "loss": 0.6133, "step": 25451 }, { "epoch": 1.4252435883077612, "grad_norm": 1.448843002319336, "learning_rate": 9.856842105263159e-05, "loss": 0.4993, "step": 25452 }, { "epoch": 1.4252995856198902, "grad_norm": 1.2847239971160889, "learning_rate": 9.856815789473685e-05, "loss": 0.5198, "step": 25453 }, { "epoch": 1.4253555829320192, "grad_norm": 1.0127615928649902, "learning_rate": 9.856789473684212e-05, "loss": 0.3715, "step": 25454 }, { "epoch": 1.4254115802441483, "grad_norm": 1.3241877555847168, "learning_rate": 9.856763157894738e-05, "loss": 0.3097, "step": 25455 }, { "epoch": 1.4254675775562773, "grad_norm": 1.3852566480636597, "learning_rate": 9.856736842105264e-05, "loss": 0.4012, "step": 25456 }, { "epoch": 1.4255235748684063, "grad_norm": 1.2788549661636353, "learning_rate": 9.85671052631579e-05, "loss": 0.5921, "step": 25457 }, { "epoch": 1.4255795721805353, "grad_norm": 1.4329038858413696, "learning_rate": 9.856684210526316e-05, "loss": 0.5277, "step": 25458 }, { "epoch": 1.4256355694926643, "grad_norm": 1.3823659420013428, "learning_rate": 9.856657894736843e-05, "loss": 0.6759, "step": 25459 }, { "epoch": 1.4256915668047934, "grad_norm": 1.3341295719146729, "learning_rate": 9.856631578947369e-05, "loss": 0.4195, "step": 25460 }, { "epoch": 1.4257475641169224, "grad_norm": 1.3343552350997925, "learning_rate": 9.856605263157895e-05, "loss": 0.5346, "step": 25461 }, { "epoch": 1.4258035614290514, "grad_norm": 1.2055914402008057, "learning_rate": 9.856578947368421e-05, "loss": 0.4703, "step": 25462 }, { "epoch": 1.4258595587411804, "grad_norm": 2.759641170501709, "learning_rate": 9.856552631578948e-05, "loss": 0.4597, "step": 25463 }, { "epoch": 1.4259155560533094, "grad_norm": 1.228317141532898, "learning_rate": 9.856526315789474e-05, "loss": 0.427, "step": 25464 }, { "epoch": 1.4259715533654385, "grad_norm": 1.2668286561965942, "learning_rate": 9.8565e-05, "loss": 0.6717, "step": 25465 }, { "epoch": 1.4260275506775675, "grad_norm": 1.519857406616211, "learning_rate": 9.856473684210526e-05, "loss": 0.5231, "step": 25466 }, { "epoch": 1.4260835479896965, "grad_norm": 2.9786980152130127, "learning_rate": 9.856447368421054e-05, "loss": 0.5596, "step": 25467 }, { "epoch": 1.4261395453018255, "grad_norm": 1.5299054384231567, "learning_rate": 9.85642105263158e-05, "loss": 0.4107, "step": 25468 }, { "epoch": 1.4261955426139545, "grad_norm": 1.3316971063613892, "learning_rate": 9.856394736842105e-05, "loss": 0.4201, "step": 25469 }, { "epoch": 1.4262515399260836, "grad_norm": 1.4687387943267822, "learning_rate": 9.856368421052631e-05, "loss": 0.5388, "step": 25470 }, { "epoch": 1.4263075372382126, "grad_norm": 1.4971003532409668, "learning_rate": 9.856342105263159e-05, "loss": 0.4766, "step": 25471 }, { "epoch": 1.4263635345503416, "grad_norm": 1.244888186454773, "learning_rate": 9.856315789473685e-05, "loss": 0.4872, "step": 25472 }, { "epoch": 1.4264195318624706, "grad_norm": 1.1782761812210083, "learning_rate": 9.856289473684212e-05, "loss": 0.4772, "step": 25473 }, { "epoch": 1.4264755291745996, "grad_norm": 1.4044368267059326, "learning_rate": 9.856263157894737e-05, "loss": 0.4442, "step": 25474 }, { "epoch": 1.4265315264867287, "grad_norm": 1.432761311531067, "learning_rate": 9.856236842105263e-05, "loss": 0.4935, "step": 25475 }, { "epoch": 1.4265875237988577, "grad_norm": 1.283448338508606, "learning_rate": 9.85621052631579e-05, "loss": 0.38, "step": 25476 }, { "epoch": 1.4266435211109867, "grad_norm": 1.4212082624435425, "learning_rate": 9.856184210526316e-05, "loss": 0.539, "step": 25477 }, { "epoch": 1.4266995184231157, "grad_norm": 1.3751952648162842, "learning_rate": 9.856157894736843e-05, "loss": 0.399, "step": 25478 }, { "epoch": 1.4267555157352447, "grad_norm": 1.0430189371109009, "learning_rate": 9.856131578947368e-05, "loss": 0.3344, "step": 25479 }, { "epoch": 1.4268115130473737, "grad_norm": 1.3726401329040527, "learning_rate": 9.856105263157895e-05, "loss": 0.5293, "step": 25480 }, { "epoch": 1.4268675103595028, "grad_norm": 1.6846280097961426, "learning_rate": 9.856078947368421e-05, "loss": 0.5713, "step": 25481 }, { "epoch": 1.4269235076716318, "grad_norm": 1.4059313535690308, "learning_rate": 9.856052631578949e-05, "loss": 0.4264, "step": 25482 }, { "epoch": 1.4269795049837608, "grad_norm": 1.0575387477874756, "learning_rate": 9.856026315789475e-05, "loss": 0.3784, "step": 25483 }, { "epoch": 1.4270355022958898, "grad_norm": 1.3428961038589478, "learning_rate": 9.856e-05, "loss": 0.5346, "step": 25484 }, { "epoch": 1.4270914996080188, "grad_norm": 1.1390739679336548, "learning_rate": 9.855973684210526e-05, "loss": 0.3727, "step": 25485 }, { "epoch": 1.4271474969201479, "grad_norm": 1.6633925437927246, "learning_rate": 9.855947368421054e-05, "loss": 0.551, "step": 25486 }, { "epoch": 1.4272034942322769, "grad_norm": 1.5276228189468384, "learning_rate": 9.85592105263158e-05, "loss": 0.4956, "step": 25487 }, { "epoch": 1.427259491544406, "grad_norm": 1.3396916389465332, "learning_rate": 9.855894736842106e-05, "loss": 0.484, "step": 25488 }, { "epoch": 1.427315488856535, "grad_norm": 1.318367838859558, "learning_rate": 9.855868421052632e-05, "loss": 0.5128, "step": 25489 }, { "epoch": 1.427371486168664, "grad_norm": 1.217202067375183, "learning_rate": 9.855842105263159e-05, "loss": 0.4146, "step": 25490 }, { "epoch": 1.427427483480793, "grad_norm": 1.3000566959381104, "learning_rate": 9.855815789473685e-05, "loss": 0.5356, "step": 25491 }, { "epoch": 1.427483480792922, "grad_norm": 1.2982467412948608, "learning_rate": 9.855789473684211e-05, "loss": 0.4702, "step": 25492 }, { "epoch": 1.427539478105051, "grad_norm": 1.4222043752670288, "learning_rate": 9.855763157894737e-05, "loss": 0.4602, "step": 25493 }, { "epoch": 1.42759547541718, "grad_norm": 1.2356319427490234, "learning_rate": 9.855736842105263e-05, "loss": 0.4027, "step": 25494 }, { "epoch": 1.427651472729309, "grad_norm": 1.4177634716033936, "learning_rate": 9.85571052631579e-05, "loss": 0.3962, "step": 25495 }, { "epoch": 1.427707470041438, "grad_norm": 1.6410006284713745, "learning_rate": 9.855684210526316e-05, "loss": 0.4403, "step": 25496 }, { "epoch": 1.427763467353567, "grad_norm": 1.3921312093734741, "learning_rate": 9.855657894736842e-05, "loss": 0.4249, "step": 25497 }, { "epoch": 1.427819464665696, "grad_norm": 1.0755549669265747, "learning_rate": 9.855631578947368e-05, "loss": 0.3739, "step": 25498 }, { "epoch": 1.4278754619778251, "grad_norm": 1.423352599143982, "learning_rate": 9.855605263157896e-05, "loss": 0.514, "step": 25499 }, { "epoch": 1.4279314592899541, "grad_norm": 1.5200079679489136, "learning_rate": 9.855578947368421e-05, "loss": 0.5753, "step": 25500 }, { "epoch": 1.4279874566020831, "grad_norm": 1.4804867506027222, "learning_rate": 9.855552631578949e-05, "loss": 0.5534, "step": 25501 }, { "epoch": 1.4280434539142122, "grad_norm": 1.2663437128067017, "learning_rate": 9.855526315789473e-05, "loss": 0.4266, "step": 25502 }, { "epoch": 1.4280994512263412, "grad_norm": 1.291031002998352, "learning_rate": 9.855500000000001e-05, "loss": 0.4723, "step": 25503 }, { "epoch": 1.4281554485384702, "grad_norm": 1.6855638027191162, "learning_rate": 9.855473684210527e-05, "loss": 0.4717, "step": 25504 }, { "epoch": 1.4282114458505992, "grad_norm": 1.5187420845031738, "learning_rate": 9.855447368421054e-05, "loss": 0.4999, "step": 25505 }, { "epoch": 1.4282674431627282, "grad_norm": 1.2003096342086792, "learning_rate": 9.855421052631579e-05, "loss": 0.4537, "step": 25506 }, { "epoch": 1.4283234404748573, "grad_norm": 1.055364966392517, "learning_rate": 9.855394736842106e-05, "loss": 0.4704, "step": 25507 }, { "epoch": 1.4283794377869863, "grad_norm": 1.474247932434082, "learning_rate": 9.855368421052632e-05, "loss": 0.577, "step": 25508 }, { "epoch": 1.4284354350991153, "grad_norm": 1.2746540307998657, "learning_rate": 9.855342105263158e-05, "loss": 0.436, "step": 25509 }, { "epoch": 1.4284914324112443, "grad_norm": 1.1947314739227295, "learning_rate": 9.855315789473685e-05, "loss": 0.4319, "step": 25510 }, { "epoch": 1.4285474297233733, "grad_norm": 1.2716975212097168, "learning_rate": 9.85528947368421e-05, "loss": 0.4281, "step": 25511 }, { "epoch": 1.4286034270355024, "grad_norm": 1.1532578468322754, "learning_rate": 9.855263157894737e-05, "loss": 0.5804, "step": 25512 }, { "epoch": 1.4286594243476314, "grad_norm": 1.9912092685699463, "learning_rate": 9.855236842105263e-05, "loss": 0.5872, "step": 25513 }, { "epoch": 1.4287154216597604, "grad_norm": 1.3635388612747192, "learning_rate": 9.85521052631579e-05, "loss": 0.4668, "step": 25514 }, { "epoch": 1.4287714189718894, "grad_norm": 1.347943902015686, "learning_rate": 9.855184210526317e-05, "loss": 0.4532, "step": 25515 }, { "epoch": 1.4288274162840184, "grad_norm": 1.3383828401565552, "learning_rate": 9.855157894736842e-05, "loss": 0.4915, "step": 25516 }, { "epoch": 1.4288834135961475, "grad_norm": 1.4385755062103271, "learning_rate": 9.855131578947368e-05, "loss": 0.4985, "step": 25517 }, { "epoch": 1.4289394109082765, "grad_norm": 1.3665130138397217, "learning_rate": 9.855105263157896e-05, "loss": 0.6255, "step": 25518 }, { "epoch": 1.4289954082204055, "grad_norm": 1.445753574371338, "learning_rate": 9.855078947368422e-05, "loss": 0.3899, "step": 25519 }, { "epoch": 1.4290514055325345, "grad_norm": 1.3229690790176392, "learning_rate": 9.855052631578948e-05, "loss": 0.3873, "step": 25520 }, { "epoch": 1.4291074028446635, "grad_norm": 1.4007450342178345, "learning_rate": 9.855026315789474e-05, "loss": 0.4926, "step": 25521 }, { "epoch": 1.4291634001567926, "grad_norm": 1.5794925689697266, "learning_rate": 9.855000000000001e-05, "loss": 0.486, "step": 25522 }, { "epoch": 1.4292193974689216, "grad_norm": 2.4457929134368896, "learning_rate": 9.854973684210527e-05, "loss": 0.4961, "step": 25523 }, { "epoch": 1.4292753947810506, "grad_norm": 1.2906749248504639, "learning_rate": 9.854947368421053e-05, "loss": 0.4901, "step": 25524 }, { "epoch": 1.4293313920931796, "grad_norm": 1.2493500709533691, "learning_rate": 9.854921052631579e-05, "loss": 0.4229, "step": 25525 }, { "epoch": 1.4293873894053086, "grad_norm": 1.722590684890747, "learning_rate": 9.854894736842106e-05, "loss": 0.8219, "step": 25526 }, { "epoch": 1.4294433867174376, "grad_norm": 1.228349208831787, "learning_rate": 9.854868421052632e-05, "loss": 0.4558, "step": 25527 }, { "epoch": 1.4294993840295667, "grad_norm": 1.3449047803878784, "learning_rate": 9.854842105263158e-05, "loss": 0.5169, "step": 25528 }, { "epoch": 1.4295553813416957, "grad_norm": 1.2607173919677734, "learning_rate": 9.854815789473684e-05, "loss": 0.4035, "step": 25529 }, { "epoch": 1.4296113786538247, "grad_norm": 1.19171941280365, "learning_rate": 9.85478947368421e-05, "loss": 0.3917, "step": 25530 }, { "epoch": 1.4296673759659537, "grad_norm": 1.2546151876449585, "learning_rate": 9.854763157894737e-05, "loss": 0.4492, "step": 25531 }, { "epoch": 1.4297233732780827, "grad_norm": 1.5277622938156128, "learning_rate": 9.854736842105263e-05, "loss": 0.5667, "step": 25532 }, { "epoch": 1.4297793705902118, "grad_norm": 1.401011347770691, "learning_rate": 9.854710526315791e-05, "loss": 0.4184, "step": 25533 }, { "epoch": 1.4298353679023408, "grad_norm": 1.8958184719085693, "learning_rate": 9.854684210526315e-05, "loss": 0.497, "step": 25534 }, { "epoch": 1.4298913652144698, "grad_norm": 1.5363988876342773, "learning_rate": 9.854657894736843e-05, "loss": 0.4567, "step": 25535 }, { "epoch": 1.4299473625265988, "grad_norm": 1.2325515747070312, "learning_rate": 9.854631578947369e-05, "loss": 0.4315, "step": 25536 }, { "epoch": 1.4300033598387278, "grad_norm": 1.4724162817001343, "learning_rate": 9.854605263157896e-05, "loss": 0.3764, "step": 25537 }, { "epoch": 1.4300593571508569, "grad_norm": 1.2884479761123657, "learning_rate": 9.854578947368422e-05, "loss": 0.37, "step": 25538 }, { "epoch": 1.4301153544629859, "grad_norm": 1.3686155080795288, "learning_rate": 9.854552631578948e-05, "loss": 0.5295, "step": 25539 }, { "epoch": 1.430171351775115, "grad_norm": 1.3763350248336792, "learning_rate": 9.854526315789474e-05, "loss": 0.5457, "step": 25540 }, { "epoch": 1.430227349087244, "grad_norm": 2.0514578819274902, "learning_rate": 9.854500000000001e-05, "loss": 0.4575, "step": 25541 }, { "epoch": 1.430283346399373, "grad_norm": 1.3097364902496338, "learning_rate": 9.854473684210527e-05, "loss": 0.4334, "step": 25542 }, { "epoch": 1.430339343711502, "grad_norm": 1.1507169008255005, "learning_rate": 9.854447368421053e-05, "loss": 0.5702, "step": 25543 }, { "epoch": 1.430395341023631, "grad_norm": 1.8972383737564087, "learning_rate": 9.854421052631579e-05, "loss": 0.4563, "step": 25544 }, { "epoch": 1.43045133833576, "grad_norm": 1.3155434131622314, "learning_rate": 9.854394736842105e-05, "loss": 0.5349, "step": 25545 }, { "epoch": 1.430507335647889, "grad_norm": 5.150670051574707, "learning_rate": 9.854368421052632e-05, "loss": 0.5007, "step": 25546 }, { "epoch": 1.430563332960018, "grad_norm": 1.504660725593567, "learning_rate": 9.854342105263158e-05, "loss": 0.4913, "step": 25547 }, { "epoch": 1.430619330272147, "grad_norm": 1.3363568782806396, "learning_rate": 9.854315789473684e-05, "loss": 0.405, "step": 25548 }, { "epoch": 1.430675327584276, "grad_norm": 1.1625854969024658, "learning_rate": 9.85428947368421e-05, "loss": 0.4709, "step": 25549 }, { "epoch": 1.430731324896405, "grad_norm": 1.4491811990737915, "learning_rate": 9.854263157894738e-05, "loss": 0.3984, "step": 25550 }, { "epoch": 1.430787322208534, "grad_norm": 1.5567790269851685, "learning_rate": 9.854236842105264e-05, "loss": 0.4872, "step": 25551 }, { "epoch": 1.4308433195206631, "grad_norm": 1.5600478649139404, "learning_rate": 9.85421052631579e-05, "loss": 0.4427, "step": 25552 }, { "epoch": 1.4308993168327921, "grad_norm": 1.2961442470550537, "learning_rate": 9.854184210526316e-05, "loss": 0.3906, "step": 25553 }, { "epoch": 1.4309553141449212, "grad_norm": 1.2450376749038696, "learning_rate": 9.854157894736843e-05, "loss": 0.5177, "step": 25554 }, { "epoch": 1.4310113114570502, "grad_norm": 1.8768779039382935, "learning_rate": 9.854131578947369e-05, "loss": 0.4998, "step": 25555 }, { "epoch": 1.4310673087691792, "grad_norm": 1.3666603565216064, "learning_rate": 9.854105263157896e-05, "loss": 0.4617, "step": 25556 }, { "epoch": 1.4311233060813082, "grad_norm": 1.6488827466964722, "learning_rate": 9.854078947368421e-05, "loss": 0.6511, "step": 25557 }, { "epoch": 1.4311793033934372, "grad_norm": 1.5491153001785278, "learning_rate": 9.854052631578948e-05, "loss": 0.4367, "step": 25558 }, { "epoch": 1.4312353007055663, "grad_norm": 1.3581222295761108, "learning_rate": 9.854026315789474e-05, "loss": 0.5209, "step": 25559 }, { "epoch": 1.4312912980176953, "grad_norm": 1.5273687839508057, "learning_rate": 9.854000000000002e-05, "loss": 0.4487, "step": 25560 }, { "epoch": 1.4313472953298243, "grad_norm": 1.622373342514038, "learning_rate": 9.853973684210526e-05, "loss": 0.4901, "step": 25561 }, { "epoch": 1.4314032926419533, "grad_norm": 1.276328682899475, "learning_rate": 9.853947368421052e-05, "loss": 0.559, "step": 25562 }, { "epoch": 1.4314592899540823, "grad_norm": 1.4135698080062866, "learning_rate": 9.85392105263158e-05, "loss": 0.4358, "step": 25563 }, { "epoch": 1.4315152872662114, "grad_norm": 1.2505730390548706, "learning_rate": 9.853894736842105e-05, "loss": 0.494, "step": 25564 }, { "epoch": 1.4315712845783404, "grad_norm": 1.347671389579773, "learning_rate": 9.853868421052633e-05, "loss": 0.4177, "step": 25565 }, { "epoch": 1.4316272818904694, "grad_norm": 1.7066562175750732, "learning_rate": 9.853842105263157e-05, "loss": 0.9907, "step": 25566 }, { "epoch": 1.4316832792025984, "grad_norm": 1.3701480627059937, "learning_rate": 9.853815789473685e-05, "loss": 0.4696, "step": 25567 }, { "epoch": 1.4317392765147272, "grad_norm": 1.3462611436843872, "learning_rate": 9.85378947368421e-05, "loss": 0.4758, "step": 25568 }, { "epoch": 1.4317952738268562, "grad_norm": 1.2861237525939941, "learning_rate": 9.853763157894738e-05, "loss": 0.3977, "step": 25569 }, { "epoch": 1.4318512711389852, "grad_norm": 1.3685340881347656, "learning_rate": 9.853736842105264e-05, "loss": 0.4813, "step": 25570 }, { "epoch": 1.4319072684511143, "grad_norm": 1.4137378931045532, "learning_rate": 9.85371052631579e-05, "loss": 0.5633, "step": 25571 }, { "epoch": 1.4319632657632433, "grad_norm": 1.2090106010437012, "learning_rate": 9.853684210526316e-05, "loss": 0.4796, "step": 25572 }, { "epoch": 1.4320192630753723, "grad_norm": 1.4746901988983154, "learning_rate": 9.853657894736843e-05, "loss": 0.4088, "step": 25573 }, { "epoch": 1.4320752603875013, "grad_norm": 1.1521053314208984, "learning_rate": 9.853631578947369e-05, "loss": 0.3851, "step": 25574 }, { "epoch": 1.4321312576996303, "grad_norm": 1.3855780363082886, "learning_rate": 9.853605263157895e-05, "loss": 0.4598, "step": 25575 }, { "epoch": 1.4321872550117594, "grad_norm": 1.3446911573410034, "learning_rate": 9.853578947368421e-05, "loss": 0.4721, "step": 25576 }, { "epoch": 1.4322432523238884, "grad_norm": 1.3428047895431519, "learning_rate": 9.853552631578948e-05, "loss": 0.5576, "step": 25577 }, { "epoch": 1.4322992496360174, "grad_norm": 1.3166146278381348, "learning_rate": 9.853526315789474e-05, "loss": 0.4094, "step": 25578 }, { "epoch": 1.4323552469481464, "grad_norm": 1.4172027111053467, "learning_rate": 9.8535e-05, "loss": 0.4247, "step": 25579 }, { "epoch": 1.4324112442602754, "grad_norm": 1.9372472763061523, "learning_rate": 9.853473684210526e-05, "loss": 0.4461, "step": 25580 }, { "epoch": 1.4324672415724045, "grad_norm": 1.5230001211166382, "learning_rate": 9.853447368421052e-05, "loss": 0.4819, "step": 25581 }, { "epoch": 1.4325232388845335, "grad_norm": 1.5996944904327393, "learning_rate": 9.85342105263158e-05, "loss": 0.5124, "step": 25582 }, { "epoch": 1.4325792361966625, "grad_norm": 1.3001734018325806, "learning_rate": 9.853394736842106e-05, "loss": 0.4613, "step": 25583 }, { "epoch": 1.4326352335087915, "grad_norm": 1.233330249786377, "learning_rate": 9.853368421052632e-05, "loss": 0.3952, "step": 25584 }, { "epoch": 1.4326912308209205, "grad_norm": 1.220557451248169, "learning_rate": 9.853342105263158e-05, "loss": 0.4315, "step": 25585 }, { "epoch": 1.4327472281330496, "grad_norm": 1.838141679763794, "learning_rate": 9.853315789473685e-05, "loss": 0.5661, "step": 25586 }, { "epoch": 1.4328032254451786, "grad_norm": 1.4783620834350586, "learning_rate": 9.853289473684211e-05, "loss": 0.5511, "step": 25587 }, { "epoch": 1.4328592227573076, "grad_norm": 1.6853631734848022, "learning_rate": 9.853263157894738e-05, "loss": 0.666, "step": 25588 }, { "epoch": 1.4329152200694366, "grad_norm": 1.6526273488998413, "learning_rate": 9.853236842105263e-05, "loss": 0.4457, "step": 25589 }, { "epoch": 1.4329712173815656, "grad_norm": 1.7893047332763672, "learning_rate": 9.85321052631579e-05, "loss": 0.6248, "step": 25590 }, { "epoch": 1.4330272146936947, "grad_norm": 1.2511688470840454, "learning_rate": 9.853184210526316e-05, "loss": 0.4484, "step": 25591 }, { "epoch": 1.4330832120058237, "grad_norm": 2.7740895748138428, "learning_rate": 9.853157894736844e-05, "loss": 0.3603, "step": 25592 }, { "epoch": 1.4331392093179527, "grad_norm": 1.4321290254592896, "learning_rate": 9.85313157894737e-05, "loss": 0.4985, "step": 25593 }, { "epoch": 1.4331952066300817, "grad_norm": 1.5247721672058105, "learning_rate": 9.853105263157895e-05, "loss": 0.6196, "step": 25594 }, { "epoch": 1.4332512039422107, "grad_norm": 1.2495554685592651, "learning_rate": 9.853078947368421e-05, "loss": 0.4114, "step": 25595 }, { "epoch": 1.4333072012543397, "grad_norm": 1.3315740823745728, "learning_rate": 9.853052631578947e-05, "loss": 0.4687, "step": 25596 }, { "epoch": 1.4333631985664688, "grad_norm": 1.3829666376113892, "learning_rate": 9.853026315789475e-05, "loss": 0.4416, "step": 25597 }, { "epoch": 1.4334191958785978, "grad_norm": 1.4854471683502197, "learning_rate": 9.853e-05, "loss": 0.5246, "step": 25598 }, { "epoch": 1.4334751931907268, "grad_norm": 1.2463632822036743, "learning_rate": 9.852973684210527e-05, "loss": 0.438, "step": 25599 }, { "epoch": 1.4335311905028558, "grad_norm": 1.2956230640411377, "learning_rate": 9.852947368421053e-05, "loss": 0.524, "step": 25600 }, { "epoch": 1.4335871878149848, "grad_norm": 1.356801152229309, "learning_rate": 9.85292105263158e-05, "loss": 0.4658, "step": 25601 }, { "epoch": 1.4336431851271139, "grad_norm": 1.358520746231079, "learning_rate": 9.852894736842106e-05, "loss": 0.4039, "step": 25602 }, { "epoch": 1.4336991824392429, "grad_norm": 1.3137705326080322, "learning_rate": 9.852868421052632e-05, "loss": 0.4378, "step": 25603 }, { "epoch": 1.433755179751372, "grad_norm": 1.3565123081207275, "learning_rate": 9.852842105263158e-05, "loss": 0.4547, "step": 25604 }, { "epoch": 1.433811177063501, "grad_norm": 3.7248382568359375, "learning_rate": 9.852815789473685e-05, "loss": 0.6575, "step": 25605 }, { "epoch": 1.43386717437563, "grad_norm": 1.2829116582870483, "learning_rate": 9.852789473684211e-05, "loss": 0.405, "step": 25606 }, { "epoch": 1.433923171687759, "grad_norm": 2.3231306076049805, "learning_rate": 9.852763157894737e-05, "loss": 0.6263, "step": 25607 }, { "epoch": 1.433979168999888, "grad_norm": 1.3743995428085327, "learning_rate": 9.852736842105263e-05, "loss": 0.4791, "step": 25608 }, { "epoch": 1.434035166312017, "grad_norm": 1.517982840538025, "learning_rate": 9.85271052631579e-05, "loss": 0.5326, "step": 25609 }, { "epoch": 1.434091163624146, "grad_norm": 1.328944206237793, "learning_rate": 9.852684210526316e-05, "loss": 0.4551, "step": 25610 }, { "epoch": 1.434147160936275, "grad_norm": 1.4466986656188965, "learning_rate": 9.852657894736842e-05, "loss": 0.3798, "step": 25611 }, { "epoch": 1.434203158248404, "grad_norm": 1.442425012588501, "learning_rate": 9.852631578947368e-05, "loss": 0.478, "step": 25612 }, { "epoch": 1.434259155560533, "grad_norm": 1.1874006986618042, "learning_rate": 9.852605263157894e-05, "loss": 0.3621, "step": 25613 }, { "epoch": 1.434315152872662, "grad_norm": 1.4170418977737427, "learning_rate": 9.852578947368422e-05, "loss": 0.4213, "step": 25614 }, { "epoch": 1.434371150184791, "grad_norm": 1.4486767053604126, "learning_rate": 9.852552631578948e-05, "loss": 0.4419, "step": 25615 }, { "epoch": 1.4344271474969201, "grad_norm": 1.645767092704773, "learning_rate": 9.852526315789474e-05, "loss": 0.4913, "step": 25616 }, { "epoch": 1.4344831448090491, "grad_norm": 1.2231667041778564, "learning_rate": 9.8525e-05, "loss": 0.4702, "step": 25617 }, { "epoch": 1.4345391421211782, "grad_norm": 1.3708308935165405, "learning_rate": 9.852473684210527e-05, "loss": 0.4443, "step": 25618 }, { "epoch": 1.4345951394333072, "grad_norm": 1.3079771995544434, "learning_rate": 9.852447368421053e-05, "loss": 0.4473, "step": 25619 }, { "epoch": 1.4346511367454362, "grad_norm": 1.2420459985733032, "learning_rate": 9.85242105263158e-05, "loss": 0.4299, "step": 25620 }, { "epoch": 1.4347071340575652, "grad_norm": 1.3548718690872192, "learning_rate": 9.852394736842105e-05, "loss": 0.3791, "step": 25621 }, { "epoch": 1.4347631313696942, "grad_norm": 1.2706632614135742, "learning_rate": 9.852368421052632e-05, "loss": 0.4779, "step": 25622 }, { "epoch": 1.4348191286818233, "grad_norm": 1.0939626693725586, "learning_rate": 9.852342105263158e-05, "loss": 0.4237, "step": 25623 }, { "epoch": 1.4348751259939523, "grad_norm": 1.3191313743591309, "learning_rate": 9.852315789473685e-05, "loss": 0.4263, "step": 25624 }, { "epoch": 1.4349311233060813, "grad_norm": 1.2442973852157593, "learning_rate": 9.852289473684211e-05, "loss": 0.3207, "step": 25625 }, { "epoch": 1.4349871206182103, "grad_norm": 1.3126877546310425, "learning_rate": 9.852263157894737e-05, "loss": 0.4555, "step": 25626 }, { "epoch": 1.4350431179303393, "grad_norm": 1.4616296291351318, "learning_rate": 9.852236842105263e-05, "loss": 0.3951, "step": 25627 }, { "epoch": 1.4350991152424684, "grad_norm": 1.2831814289093018, "learning_rate": 9.852210526315791e-05, "loss": 0.423, "step": 25628 }, { "epoch": 1.4351551125545974, "grad_norm": 1.1951026916503906, "learning_rate": 9.852184210526317e-05, "loss": 0.3939, "step": 25629 }, { "epoch": 1.4352111098667264, "grad_norm": 1.3095088005065918, "learning_rate": 9.852157894736843e-05, "loss": 0.5734, "step": 25630 }, { "epoch": 1.4352671071788554, "grad_norm": 1.3465334177017212, "learning_rate": 9.852131578947369e-05, "loss": 0.4808, "step": 25631 }, { "epoch": 1.4353231044909844, "grad_norm": 1.1294889450073242, "learning_rate": 9.852105263157895e-05, "loss": 0.5764, "step": 25632 }, { "epoch": 1.4353791018031135, "grad_norm": 1.4509919881820679, "learning_rate": 9.852078947368422e-05, "loss": 0.543, "step": 25633 }, { "epoch": 1.4354350991152425, "grad_norm": 1.4717315435409546, "learning_rate": 9.852052631578948e-05, "loss": 0.5028, "step": 25634 }, { "epoch": 1.4354910964273715, "grad_norm": 1.5489089488983154, "learning_rate": 9.852026315789474e-05, "loss": 0.6137, "step": 25635 }, { "epoch": 1.4355470937395005, "grad_norm": 1.3831948041915894, "learning_rate": 9.852e-05, "loss": 0.3883, "step": 25636 }, { "epoch": 1.4356030910516295, "grad_norm": 1.2413886785507202, "learning_rate": 9.851973684210527e-05, "loss": 0.5447, "step": 25637 }, { "epoch": 1.4356590883637586, "grad_norm": 1.3069621324539185, "learning_rate": 9.851947368421053e-05, "loss": 0.469, "step": 25638 }, { "epoch": 1.4357150856758876, "grad_norm": 1.530605435371399, "learning_rate": 9.851921052631579e-05, "loss": 0.5449, "step": 25639 }, { "epoch": 1.4357710829880166, "grad_norm": 1.1206198930740356, "learning_rate": 9.851894736842105e-05, "loss": 0.41, "step": 25640 }, { "epoch": 1.4358270803001456, "grad_norm": 1.5072031021118164, "learning_rate": 9.851868421052632e-05, "loss": 0.4225, "step": 25641 }, { "epoch": 1.4358830776122746, "grad_norm": 1.2919793128967285, "learning_rate": 9.851842105263158e-05, "loss": 0.4009, "step": 25642 }, { "epoch": 1.4359390749244036, "grad_norm": 1.3727933168411255, "learning_rate": 9.851815789473686e-05, "loss": 0.4062, "step": 25643 }, { "epoch": 1.4359950722365327, "grad_norm": 1.3184592723846436, "learning_rate": 9.85178947368421e-05, "loss": 0.5137, "step": 25644 }, { "epoch": 1.4360510695486617, "grad_norm": 1.1559818983078003, "learning_rate": 9.851763157894738e-05, "loss": 0.4556, "step": 25645 }, { "epoch": 1.4361070668607907, "grad_norm": 1.5235875844955444, "learning_rate": 9.851736842105264e-05, "loss": 0.459, "step": 25646 }, { "epoch": 1.4361630641729197, "grad_norm": 1.1688374280929565, "learning_rate": 9.851710526315791e-05, "loss": 0.3956, "step": 25647 }, { "epoch": 1.4362190614850487, "grad_norm": 1.375508427619934, "learning_rate": 9.851684210526317e-05, "loss": 0.43, "step": 25648 }, { "epoch": 1.4362750587971778, "grad_norm": 1.9250904321670532, "learning_rate": 9.851657894736842e-05, "loss": 0.4892, "step": 25649 }, { "epoch": 1.4363310561093068, "grad_norm": 1.5735348463058472, "learning_rate": 9.851631578947369e-05, "loss": 0.6569, "step": 25650 }, { "epoch": 1.4363870534214358, "grad_norm": 1.421795129776001, "learning_rate": 9.851605263157895e-05, "loss": 0.4173, "step": 25651 }, { "epoch": 1.4364430507335648, "grad_norm": 1.58512282371521, "learning_rate": 9.851578947368422e-05, "loss": 0.393, "step": 25652 }, { "epoch": 1.4364990480456938, "grad_norm": 1.1600414514541626, "learning_rate": 9.851552631578947e-05, "loss": 0.4621, "step": 25653 }, { "epoch": 1.4365550453578229, "grad_norm": 1.1983907222747803, "learning_rate": 9.851526315789474e-05, "loss": 0.4011, "step": 25654 }, { "epoch": 1.4366110426699519, "grad_norm": 1.5610764026641846, "learning_rate": 9.8515e-05, "loss": 0.6253, "step": 25655 }, { "epoch": 1.436667039982081, "grad_norm": 1.3131215572357178, "learning_rate": 9.851473684210527e-05, "loss": 0.4615, "step": 25656 }, { "epoch": 1.43672303729421, "grad_norm": 1.3105984926223755, "learning_rate": 9.851447368421053e-05, "loss": 0.4597, "step": 25657 }, { "epoch": 1.436779034606339, "grad_norm": 1.5818188190460205, "learning_rate": 9.85142105263158e-05, "loss": 0.4973, "step": 25658 }, { "epoch": 1.436835031918468, "grad_norm": 1.244718313217163, "learning_rate": 9.851394736842105e-05, "loss": 0.4456, "step": 25659 }, { "epoch": 1.436891029230597, "grad_norm": 1.3325386047363281, "learning_rate": 9.851368421052633e-05, "loss": 0.4782, "step": 25660 }, { "epoch": 1.436947026542726, "grad_norm": 1.2712047100067139, "learning_rate": 9.851342105263159e-05, "loss": 0.4101, "step": 25661 }, { "epoch": 1.437003023854855, "grad_norm": 1.068215012550354, "learning_rate": 9.851315789473685e-05, "loss": 0.343, "step": 25662 }, { "epoch": 1.437059021166984, "grad_norm": 1.4893957376480103, "learning_rate": 9.85128947368421e-05, "loss": 0.5978, "step": 25663 }, { "epoch": 1.437115018479113, "grad_norm": 1.3513212203979492, "learning_rate": 9.851263157894738e-05, "loss": 0.4153, "step": 25664 }, { "epoch": 1.437171015791242, "grad_norm": 1.544857382774353, "learning_rate": 9.851236842105264e-05, "loss": 0.5701, "step": 25665 }, { "epoch": 1.437227013103371, "grad_norm": 1.2158294916152954, "learning_rate": 9.85121052631579e-05, "loss": 0.3734, "step": 25666 }, { "epoch": 1.4372830104155, "grad_norm": 2.1777915954589844, "learning_rate": 9.851184210526316e-05, "loss": 0.7021, "step": 25667 }, { "epoch": 1.4373390077276291, "grad_norm": 4.20610237121582, "learning_rate": 9.851157894736842e-05, "loss": 0.5588, "step": 25668 }, { "epoch": 1.4373950050397581, "grad_norm": 1.689322829246521, "learning_rate": 9.851131578947369e-05, "loss": 0.4976, "step": 25669 }, { "epoch": 1.4374510023518872, "grad_norm": 1.2596203088760376, "learning_rate": 9.851105263157895e-05, "loss": 0.4639, "step": 25670 }, { "epoch": 1.4375069996640162, "grad_norm": 1.1975135803222656, "learning_rate": 9.851078947368421e-05, "loss": 0.4729, "step": 25671 }, { "epoch": 1.4375629969761452, "grad_norm": 1.586650013923645, "learning_rate": 9.851052631578947e-05, "loss": 0.6526, "step": 25672 }, { "epoch": 1.4376189942882742, "grad_norm": 1.5360716581344604, "learning_rate": 9.851026315789474e-05, "loss": 0.4654, "step": 25673 }, { "epoch": 1.4376749916004032, "grad_norm": 1.3076252937316895, "learning_rate": 9.851e-05, "loss": 0.4586, "step": 25674 }, { "epoch": 1.4377309889125323, "grad_norm": 1.2574946880340576, "learning_rate": 9.850973684210528e-05, "loss": 0.394, "step": 25675 }, { "epoch": 1.4377869862246613, "grad_norm": 1.2691985368728638, "learning_rate": 9.850947368421052e-05, "loss": 0.4306, "step": 25676 }, { "epoch": 1.4378429835367903, "grad_norm": 1.185869812965393, "learning_rate": 9.85092105263158e-05, "loss": 0.3659, "step": 25677 }, { "epoch": 1.4378989808489193, "grad_norm": 1.2608500719070435, "learning_rate": 9.850894736842106e-05, "loss": 0.3736, "step": 25678 }, { "epoch": 1.4379549781610483, "grad_norm": 1.475943922996521, "learning_rate": 9.850868421052633e-05, "loss": 0.5287, "step": 25679 }, { "epoch": 1.4380109754731774, "grad_norm": 1.4908572435379028, "learning_rate": 9.850842105263159e-05, "loss": 0.5296, "step": 25680 }, { "epoch": 1.4380669727853064, "grad_norm": 1.2638475894927979, "learning_rate": 9.850815789473685e-05, "loss": 0.4952, "step": 25681 }, { "epoch": 1.4381229700974354, "grad_norm": 1.2012163400650024, "learning_rate": 9.850789473684211e-05, "loss": 0.5255, "step": 25682 }, { "epoch": 1.4381789674095644, "grad_norm": 2.8469507694244385, "learning_rate": 9.850763157894737e-05, "loss": 0.523, "step": 25683 }, { "epoch": 1.4382349647216934, "grad_norm": 1.1035187244415283, "learning_rate": 9.850736842105264e-05, "loss": 0.3706, "step": 25684 }, { "epoch": 1.4382909620338225, "grad_norm": 1.0298693180084229, "learning_rate": 9.85071052631579e-05, "loss": 0.3495, "step": 25685 }, { "epoch": 1.4383469593459515, "grad_norm": 1.5727527141571045, "learning_rate": 9.850684210526316e-05, "loss": 0.6739, "step": 25686 }, { "epoch": 1.4384029566580805, "grad_norm": 1.750372052192688, "learning_rate": 9.850657894736842e-05, "loss": 0.6113, "step": 25687 }, { "epoch": 1.4384589539702095, "grad_norm": 1.5725183486938477, "learning_rate": 9.85063157894737e-05, "loss": 0.5526, "step": 25688 }, { "epoch": 1.4385149512823385, "grad_norm": 1.3974330425262451, "learning_rate": 9.850605263157895e-05, "loss": 0.4032, "step": 25689 }, { "epoch": 1.4385709485944675, "grad_norm": 1.3261353969573975, "learning_rate": 9.850578947368421e-05, "loss": 0.4921, "step": 25690 }, { "epoch": 1.4386269459065966, "grad_norm": 1.3843958377838135, "learning_rate": 9.850552631578947e-05, "loss": 0.4347, "step": 25691 }, { "epoch": 1.4386829432187254, "grad_norm": 1.2524871826171875, "learning_rate": 9.850526315789475e-05, "loss": 0.4132, "step": 25692 }, { "epoch": 1.4387389405308544, "grad_norm": 1.102995753288269, "learning_rate": 9.8505e-05, "loss": 0.3919, "step": 25693 }, { "epoch": 1.4387949378429834, "grad_norm": 1.183918833732605, "learning_rate": 9.850473684210527e-05, "loss": 0.5171, "step": 25694 }, { "epoch": 1.4388509351551124, "grad_norm": 1.4520643949508667, "learning_rate": 9.850447368421053e-05, "loss": 0.4224, "step": 25695 }, { "epoch": 1.4389069324672414, "grad_norm": 1.2692177295684814, "learning_rate": 9.85042105263158e-05, "loss": 0.4775, "step": 25696 }, { "epoch": 1.4389629297793705, "grad_norm": 1.3434689044952393, "learning_rate": 9.850394736842106e-05, "loss": 0.4722, "step": 25697 }, { "epoch": 1.4390189270914995, "grad_norm": 1.5888018608093262, "learning_rate": 9.850368421052633e-05, "loss": 0.6064, "step": 25698 }, { "epoch": 1.4390749244036285, "grad_norm": 1.286224603652954, "learning_rate": 9.850342105263158e-05, "loss": 0.4078, "step": 25699 }, { "epoch": 1.4391309217157575, "grad_norm": 1.6147180795669556, "learning_rate": 9.850315789473684e-05, "loss": 0.6666, "step": 25700 }, { "epoch": 1.4391869190278865, "grad_norm": 1.2030236721038818, "learning_rate": 9.850289473684211e-05, "loss": 0.421, "step": 25701 }, { "epoch": 1.4392429163400156, "grad_norm": 1.427893042564392, "learning_rate": 9.850263157894737e-05, "loss": 0.4825, "step": 25702 }, { "epoch": 1.4392989136521446, "grad_norm": 1.3892394304275513, "learning_rate": 9.850236842105264e-05, "loss": 0.6434, "step": 25703 }, { "epoch": 1.4393549109642736, "grad_norm": 1.3688900470733643, "learning_rate": 9.850210526315789e-05, "loss": 0.5591, "step": 25704 }, { "epoch": 1.4394109082764026, "grad_norm": 1.286647081375122, "learning_rate": 9.850184210526316e-05, "loss": 0.5014, "step": 25705 }, { "epoch": 1.4394669055885316, "grad_norm": 1.2540347576141357, "learning_rate": 9.850157894736842e-05, "loss": 0.4826, "step": 25706 }, { "epoch": 1.4395229029006607, "grad_norm": 1.377650260925293, "learning_rate": 9.85013157894737e-05, "loss": 0.4497, "step": 25707 }, { "epoch": 1.4395789002127897, "grad_norm": 1.3599501848220825, "learning_rate": 9.850105263157894e-05, "loss": 0.4194, "step": 25708 }, { "epoch": 1.4396348975249187, "grad_norm": 1.356694221496582, "learning_rate": 9.850078947368422e-05, "loss": 0.3896, "step": 25709 }, { "epoch": 1.4396908948370477, "grad_norm": 1.2018773555755615, "learning_rate": 9.850052631578948e-05, "loss": 0.4454, "step": 25710 }, { "epoch": 1.4397468921491767, "grad_norm": 1.5941123962402344, "learning_rate": 9.850026315789475e-05, "loss": 0.5182, "step": 25711 }, { "epoch": 1.4398028894613057, "grad_norm": 1.4879815578460693, "learning_rate": 9.850000000000001e-05, "loss": 0.4818, "step": 25712 }, { "epoch": 1.4398588867734348, "grad_norm": 1.2622997760772705, "learning_rate": 9.849973684210527e-05, "loss": 0.4667, "step": 25713 }, { "epoch": 1.4399148840855638, "grad_norm": 1.4573948383331299, "learning_rate": 9.849947368421053e-05, "loss": 0.4054, "step": 25714 }, { "epoch": 1.4399708813976928, "grad_norm": 1.443192720413208, "learning_rate": 9.84992105263158e-05, "loss": 0.5356, "step": 25715 }, { "epoch": 1.4400268787098218, "grad_norm": 1.1048190593719482, "learning_rate": 9.849894736842106e-05, "loss": 0.3802, "step": 25716 }, { "epoch": 1.4400828760219508, "grad_norm": 1.1638503074645996, "learning_rate": 9.849868421052632e-05, "loss": 0.4394, "step": 25717 }, { "epoch": 1.4401388733340799, "grad_norm": 1.3394076824188232, "learning_rate": 9.849842105263158e-05, "loss": 0.5312, "step": 25718 }, { "epoch": 1.4401948706462089, "grad_norm": 1.2208266258239746, "learning_rate": 9.849815789473684e-05, "loss": 0.3839, "step": 25719 }, { "epoch": 1.440250867958338, "grad_norm": 1.4026274681091309, "learning_rate": 9.849789473684211e-05, "loss": 0.4958, "step": 25720 }, { "epoch": 1.440306865270467, "grad_norm": 1.4235761165618896, "learning_rate": 9.849763157894737e-05, "loss": 0.5339, "step": 25721 }, { "epoch": 1.440362862582596, "grad_norm": 1.5458778142929077, "learning_rate": 9.849736842105263e-05, "loss": 0.5653, "step": 25722 }, { "epoch": 1.440418859894725, "grad_norm": 7.980746746063232, "learning_rate": 9.849710526315789e-05, "loss": 0.4956, "step": 25723 }, { "epoch": 1.440474857206854, "grad_norm": 1.320092797279358, "learning_rate": 9.849684210526317e-05, "loss": 0.5172, "step": 25724 }, { "epoch": 1.440530854518983, "grad_norm": 1.689001441001892, "learning_rate": 9.849657894736843e-05, "loss": 0.5753, "step": 25725 }, { "epoch": 1.440586851831112, "grad_norm": 1.5204671621322632, "learning_rate": 9.849631578947369e-05, "loss": 0.5524, "step": 25726 }, { "epoch": 1.440642849143241, "grad_norm": 1.219130039215088, "learning_rate": 9.849605263157895e-05, "loss": 0.4574, "step": 25727 }, { "epoch": 1.44069884645537, "grad_norm": 1.1669539213180542, "learning_rate": 9.849578947368422e-05, "loss": 0.5025, "step": 25728 }, { "epoch": 1.440754843767499, "grad_norm": 1.4158577919006348, "learning_rate": 9.849552631578948e-05, "loss": 0.4848, "step": 25729 }, { "epoch": 1.440810841079628, "grad_norm": 1.3593709468841553, "learning_rate": 9.849526315789475e-05, "loss": 0.4785, "step": 25730 }, { "epoch": 1.440866838391757, "grad_norm": 1.3200407028198242, "learning_rate": 9.8495e-05, "loss": 0.3671, "step": 25731 }, { "epoch": 1.4409228357038861, "grad_norm": 2.0137245655059814, "learning_rate": 9.849473684210527e-05, "loss": 0.5586, "step": 25732 }, { "epoch": 1.4409788330160151, "grad_norm": 1.4319556951522827, "learning_rate": 9.849447368421053e-05, "loss": 0.4586, "step": 25733 }, { "epoch": 1.4410348303281442, "grad_norm": 1.4195220470428467, "learning_rate": 9.849421052631579e-05, "loss": 0.5305, "step": 25734 }, { "epoch": 1.4410908276402732, "grad_norm": 1.761801838874817, "learning_rate": 9.849394736842106e-05, "loss": 0.6981, "step": 25735 }, { "epoch": 1.4411468249524022, "grad_norm": 1.3044332265853882, "learning_rate": 9.849368421052631e-05, "loss": 0.4531, "step": 25736 }, { "epoch": 1.4412028222645312, "grad_norm": 1.449478030204773, "learning_rate": 9.849342105263158e-05, "loss": 0.5119, "step": 25737 }, { "epoch": 1.4412588195766602, "grad_norm": 1.167271375656128, "learning_rate": 9.849315789473684e-05, "loss": 0.4048, "step": 25738 }, { "epoch": 1.4413148168887893, "grad_norm": 1.1470009088516235, "learning_rate": 9.849289473684212e-05, "loss": 0.4402, "step": 25739 }, { "epoch": 1.4413708142009183, "grad_norm": 1.1541756391525269, "learning_rate": 9.849263157894738e-05, "loss": 0.4063, "step": 25740 }, { "epoch": 1.4414268115130473, "grad_norm": 1.43264901638031, "learning_rate": 9.849236842105264e-05, "loss": 0.4164, "step": 25741 }, { "epoch": 1.4414828088251763, "grad_norm": 1.517122745513916, "learning_rate": 9.84921052631579e-05, "loss": 0.6524, "step": 25742 }, { "epoch": 1.4415388061373053, "grad_norm": 1.531346082687378, "learning_rate": 9.849184210526317e-05, "loss": 0.4592, "step": 25743 }, { "epoch": 1.4415948034494344, "grad_norm": 1.2700045108795166, "learning_rate": 9.849157894736843e-05, "loss": 0.4308, "step": 25744 }, { "epoch": 1.4416508007615634, "grad_norm": 1.3897801637649536, "learning_rate": 9.849131578947369e-05, "loss": 0.5314, "step": 25745 }, { "epoch": 1.4417067980736924, "grad_norm": 1.5570229291915894, "learning_rate": 9.849105263157895e-05, "loss": 0.4738, "step": 25746 }, { "epoch": 1.4417627953858214, "grad_norm": 1.3779877424240112, "learning_rate": 9.849078947368422e-05, "loss": 0.6156, "step": 25747 }, { "epoch": 1.4418187926979504, "grad_norm": 1.4035085439682007, "learning_rate": 9.849052631578948e-05, "loss": 0.4309, "step": 25748 }, { "epoch": 1.4418747900100795, "grad_norm": 1.1910206079483032, "learning_rate": 9.849026315789474e-05, "loss": 0.4711, "step": 25749 }, { "epoch": 1.4419307873222085, "grad_norm": 1.4764854907989502, "learning_rate": 9.849e-05, "loss": 0.4317, "step": 25750 }, { "epoch": 1.4419867846343375, "grad_norm": 1.3121896982192993, "learning_rate": 9.848973684210526e-05, "loss": 0.4307, "step": 25751 }, { "epoch": 1.4420427819464665, "grad_norm": 1.616694450378418, "learning_rate": 9.848947368421053e-05, "loss": 0.7213, "step": 25752 }, { "epoch": 1.4420987792585955, "grad_norm": 1.1158812046051025, "learning_rate": 9.84892105263158e-05, "loss": 0.3822, "step": 25753 }, { "epoch": 1.4421547765707246, "grad_norm": 1.32943594455719, "learning_rate": 9.848894736842105e-05, "loss": 0.4025, "step": 25754 }, { "epoch": 1.4422107738828536, "grad_norm": 1.6812101602554321, "learning_rate": 9.848868421052631e-05, "loss": 0.557, "step": 25755 }, { "epoch": 1.4422667711949826, "grad_norm": 1.5972145795822144, "learning_rate": 9.848842105263159e-05, "loss": 0.4719, "step": 25756 }, { "epoch": 1.4423227685071116, "grad_norm": 1.3968019485473633, "learning_rate": 9.848815789473685e-05, "loss": 0.4173, "step": 25757 }, { "epoch": 1.4423787658192406, "grad_norm": 1.6362229585647583, "learning_rate": 9.848789473684212e-05, "loss": 0.5076, "step": 25758 }, { "epoch": 1.4424347631313696, "grad_norm": 1.2163110971450806, "learning_rate": 9.848763157894737e-05, "loss": 0.4992, "step": 25759 }, { "epoch": 1.4424907604434987, "grad_norm": 1.2565122842788696, "learning_rate": 9.848736842105264e-05, "loss": 0.4421, "step": 25760 }, { "epoch": 1.4425467577556277, "grad_norm": 1.3888131380081177, "learning_rate": 9.84871052631579e-05, "loss": 0.5243, "step": 25761 }, { "epoch": 1.4426027550677567, "grad_norm": 1.3142640590667725, "learning_rate": 9.848684210526317e-05, "loss": 0.3618, "step": 25762 }, { "epoch": 1.4426587523798857, "grad_norm": 1.2778666019439697, "learning_rate": 9.848657894736842e-05, "loss": 0.4543, "step": 25763 }, { "epoch": 1.4427147496920147, "grad_norm": 1.3430832624435425, "learning_rate": 9.848631578947369e-05, "loss": 0.5383, "step": 25764 }, { "epoch": 1.4427707470041438, "grad_norm": 1.625623106956482, "learning_rate": 9.848605263157895e-05, "loss": 0.5379, "step": 25765 }, { "epoch": 1.4428267443162728, "grad_norm": 1.27606999874115, "learning_rate": 9.848578947368422e-05, "loss": 0.3628, "step": 25766 }, { "epoch": 1.4428827416284018, "grad_norm": 1.6553044319152832, "learning_rate": 9.848552631578948e-05, "loss": 0.7632, "step": 25767 }, { "epoch": 1.4429387389405308, "grad_norm": 1.6357741355895996, "learning_rate": 9.848526315789474e-05, "loss": 0.5964, "step": 25768 }, { "epoch": 1.4429947362526598, "grad_norm": 1.7604966163635254, "learning_rate": 9.8485e-05, "loss": 0.7096, "step": 25769 }, { "epoch": 1.4430507335647889, "grad_norm": 1.5702502727508545, "learning_rate": 9.848473684210526e-05, "loss": 0.4116, "step": 25770 }, { "epoch": 1.4431067308769179, "grad_norm": 1.343562364578247, "learning_rate": 9.848447368421054e-05, "loss": 0.427, "step": 25771 }, { "epoch": 1.443162728189047, "grad_norm": 1.2983551025390625, "learning_rate": 9.84842105263158e-05, "loss": 0.38, "step": 25772 }, { "epoch": 1.443218725501176, "grad_norm": 1.2825380563735962, "learning_rate": 9.848394736842106e-05, "loss": 0.4135, "step": 25773 }, { "epoch": 1.443274722813305, "grad_norm": 1.3777891397476196, "learning_rate": 9.848368421052632e-05, "loss": 0.3667, "step": 25774 }, { "epoch": 1.443330720125434, "grad_norm": 1.2664874792099, "learning_rate": 9.848342105263159e-05, "loss": 0.4057, "step": 25775 }, { "epoch": 1.443386717437563, "grad_norm": 1.5738770961761475, "learning_rate": 9.848315789473685e-05, "loss": 0.511, "step": 25776 }, { "epoch": 1.443442714749692, "grad_norm": 1.3449558019638062, "learning_rate": 9.848289473684211e-05, "loss": 0.5309, "step": 25777 }, { "epoch": 1.443498712061821, "grad_norm": 1.1180827617645264, "learning_rate": 9.848263157894737e-05, "loss": 0.4378, "step": 25778 }, { "epoch": 1.44355470937395, "grad_norm": 1.8353078365325928, "learning_rate": 9.848236842105264e-05, "loss": 0.3912, "step": 25779 }, { "epoch": 1.443610706686079, "grad_norm": 2.383636951446533, "learning_rate": 9.84821052631579e-05, "loss": 0.6155, "step": 25780 }, { "epoch": 1.443666703998208, "grad_norm": 1.5396950244903564, "learning_rate": 9.848184210526316e-05, "loss": 0.4052, "step": 25781 }, { "epoch": 1.443722701310337, "grad_norm": 34.623844146728516, "learning_rate": 9.848157894736842e-05, "loss": 0.4543, "step": 25782 }, { "epoch": 1.443778698622466, "grad_norm": 1.4119480848312378, "learning_rate": 9.84813157894737e-05, "loss": 0.7073, "step": 25783 }, { "epoch": 1.4438346959345951, "grad_norm": 1.2975821495056152, "learning_rate": 9.848105263157895e-05, "loss": 0.4418, "step": 25784 }, { "epoch": 1.4438906932467241, "grad_norm": 1.4934360980987549, "learning_rate": 9.848078947368423e-05, "loss": 0.4745, "step": 25785 }, { "epoch": 1.4439466905588532, "grad_norm": 1.6379145383834839, "learning_rate": 9.848052631578947e-05, "loss": 0.4778, "step": 25786 }, { "epoch": 1.4440026878709822, "grad_norm": 1.3537036180496216, "learning_rate": 9.848026315789473e-05, "loss": 0.4766, "step": 25787 }, { "epoch": 1.4440586851831112, "grad_norm": 1.351130723953247, "learning_rate": 9.848e-05, "loss": 0.4779, "step": 25788 }, { "epoch": 1.4441146824952402, "grad_norm": 1.3500784635543823, "learning_rate": 9.847973684210527e-05, "loss": 0.4242, "step": 25789 }, { "epoch": 1.4441706798073692, "grad_norm": 1.1697924137115479, "learning_rate": 9.847947368421054e-05, "loss": 0.4517, "step": 25790 }, { "epoch": 1.4442266771194983, "grad_norm": 1.5843197107315063, "learning_rate": 9.847921052631578e-05, "loss": 0.3941, "step": 25791 }, { "epoch": 1.4442826744316273, "grad_norm": 1.3092375993728638, "learning_rate": 9.847894736842106e-05, "loss": 0.5552, "step": 25792 }, { "epoch": 1.4443386717437563, "grad_norm": 1.3153843879699707, "learning_rate": 9.847868421052632e-05, "loss": 0.3795, "step": 25793 }, { "epoch": 1.4443946690558853, "grad_norm": 1.3666855096817017, "learning_rate": 9.847842105263159e-05, "loss": 0.5007, "step": 25794 }, { "epoch": 1.4444506663680143, "grad_norm": 1.3877649307250977, "learning_rate": 9.847815789473685e-05, "loss": 0.5185, "step": 25795 }, { "epoch": 1.4445066636801434, "grad_norm": 3.332524299621582, "learning_rate": 9.847789473684211e-05, "loss": 0.4432, "step": 25796 }, { "epoch": 1.4445626609922724, "grad_norm": 1.4763113260269165, "learning_rate": 9.847763157894737e-05, "loss": 0.6981, "step": 25797 }, { "epoch": 1.4446186583044014, "grad_norm": 1.2583116292953491, "learning_rate": 9.847736842105264e-05, "loss": 0.4298, "step": 25798 }, { "epoch": 1.4446746556165304, "grad_norm": 1.458219289779663, "learning_rate": 9.84771052631579e-05, "loss": 0.5209, "step": 25799 }, { "epoch": 1.4447306529286594, "grad_norm": 1.372975468635559, "learning_rate": 9.847684210526316e-05, "loss": 0.5491, "step": 25800 }, { "epoch": 1.4447866502407885, "grad_norm": 1.1941468715667725, "learning_rate": 9.847657894736842e-05, "loss": 0.3895, "step": 25801 }, { "epoch": 1.4448426475529175, "grad_norm": 1.404276728630066, "learning_rate": 9.84763157894737e-05, "loss": 0.4141, "step": 25802 }, { "epoch": 1.4448986448650465, "grad_norm": 1.2524129152297974, "learning_rate": 9.847605263157896e-05, "loss": 0.5221, "step": 25803 }, { "epoch": 1.4449546421771755, "grad_norm": 1.4511653184890747, "learning_rate": 9.847578947368422e-05, "loss": 0.5579, "step": 25804 }, { "epoch": 1.4450106394893045, "grad_norm": 1.099122405052185, "learning_rate": 9.847552631578948e-05, "loss": 0.3619, "step": 25805 }, { "epoch": 1.4450666368014335, "grad_norm": 1.1699548959732056, "learning_rate": 9.847526315789474e-05, "loss": 0.3935, "step": 25806 }, { "epoch": 1.4451226341135626, "grad_norm": 1.9812945127487183, "learning_rate": 9.847500000000001e-05, "loss": 0.4948, "step": 25807 }, { "epoch": 1.4451786314256916, "grad_norm": 1.3612101078033447, "learning_rate": 9.847473684210527e-05, "loss": 0.5559, "step": 25808 }, { "epoch": 1.4452346287378206, "grad_norm": 1.381105899810791, "learning_rate": 9.847447368421053e-05, "loss": 0.323, "step": 25809 }, { "epoch": 1.4452906260499496, "grad_norm": 1.4454028606414795, "learning_rate": 9.847421052631579e-05, "loss": 0.5805, "step": 25810 }, { "epoch": 1.4453466233620786, "grad_norm": 2.1735076904296875, "learning_rate": 9.847394736842106e-05, "loss": 0.5439, "step": 25811 }, { "epoch": 1.4454026206742077, "grad_norm": 2.0774319171905518, "learning_rate": 9.847368421052632e-05, "loss": 0.559, "step": 25812 }, { "epoch": 1.4454586179863367, "grad_norm": 1.3191627264022827, "learning_rate": 9.847342105263158e-05, "loss": 0.4167, "step": 25813 }, { "epoch": 1.4455146152984657, "grad_norm": 1.2631181478500366, "learning_rate": 9.847315789473684e-05, "loss": 0.4546, "step": 25814 }, { "epoch": 1.4455706126105947, "grad_norm": 1.380804181098938, "learning_rate": 9.847289473684211e-05, "loss": 0.3802, "step": 25815 }, { "epoch": 1.4456266099227237, "grad_norm": 1.2480416297912598, "learning_rate": 9.847263157894737e-05, "loss": 0.5143, "step": 25816 }, { "epoch": 1.4456826072348528, "grad_norm": 1.4806478023529053, "learning_rate": 9.847236842105265e-05, "loss": 0.5376, "step": 25817 }, { "epoch": 1.4457386045469818, "grad_norm": 1.348549246788025, "learning_rate": 9.847210526315789e-05, "loss": 0.3942, "step": 25818 }, { "epoch": 1.4457946018591108, "grad_norm": 1.1411925554275513, "learning_rate": 9.847184210526317e-05, "loss": 0.3391, "step": 25819 }, { "epoch": 1.4458505991712398, "grad_norm": 1.7207562923431396, "learning_rate": 9.847157894736843e-05, "loss": 0.5623, "step": 25820 }, { "epoch": 1.4459065964833688, "grad_norm": 1.4202302694320679, "learning_rate": 9.847131578947369e-05, "loss": 0.5446, "step": 25821 }, { "epoch": 1.4459625937954979, "grad_norm": 1.5925122499465942, "learning_rate": 9.847105263157896e-05, "loss": 0.4418, "step": 25822 }, { "epoch": 1.4460185911076269, "grad_norm": 1.2614142894744873, "learning_rate": 9.84707894736842e-05, "loss": 0.4257, "step": 25823 }, { "epoch": 1.446074588419756, "grad_norm": 1.0207983255386353, "learning_rate": 9.847052631578948e-05, "loss": 0.3638, "step": 25824 }, { "epoch": 1.446130585731885, "grad_norm": 1.6025762557983398, "learning_rate": 9.847026315789474e-05, "loss": 0.5265, "step": 25825 }, { "epoch": 1.446186583044014, "grad_norm": 1.2102876901626587, "learning_rate": 9.847000000000001e-05, "loss": 0.4531, "step": 25826 }, { "epoch": 1.446242580356143, "grad_norm": 1.18339204788208, "learning_rate": 9.846973684210527e-05, "loss": 0.3625, "step": 25827 }, { "epoch": 1.446298577668272, "grad_norm": 1.980252981185913, "learning_rate": 9.846947368421053e-05, "loss": 0.5182, "step": 25828 }, { "epoch": 1.446354574980401, "grad_norm": 1.51128351688385, "learning_rate": 9.846921052631579e-05, "loss": 0.4307, "step": 25829 }, { "epoch": 1.44641057229253, "grad_norm": 1.2699722051620483, "learning_rate": 9.846894736842106e-05, "loss": 0.4264, "step": 25830 }, { "epoch": 1.446466569604659, "grad_norm": 1.3692348003387451, "learning_rate": 9.846868421052632e-05, "loss": 0.4324, "step": 25831 }, { "epoch": 1.446522566916788, "grad_norm": 1.2989691495895386, "learning_rate": 9.846842105263158e-05, "loss": 0.5484, "step": 25832 }, { "epoch": 1.446578564228917, "grad_norm": 1.4866071939468384, "learning_rate": 9.846815789473684e-05, "loss": 0.5034, "step": 25833 }, { "epoch": 1.446634561541046, "grad_norm": 1.1898126602172852, "learning_rate": 9.846789473684212e-05, "loss": 0.4293, "step": 25834 }, { "epoch": 1.446690558853175, "grad_norm": 1.5289809703826904, "learning_rate": 9.846763157894738e-05, "loss": 0.6081, "step": 25835 }, { "epoch": 1.4467465561653041, "grad_norm": 1.984678030014038, "learning_rate": 9.846736842105264e-05, "loss": 0.54, "step": 25836 }, { "epoch": 1.4468025534774331, "grad_norm": 1.222996711730957, "learning_rate": 9.84671052631579e-05, "loss": 0.3786, "step": 25837 }, { "epoch": 1.4468585507895622, "grad_norm": 1.2271722555160522, "learning_rate": 9.846684210526315e-05, "loss": 0.4339, "step": 25838 }, { "epoch": 1.4469145481016912, "grad_norm": 1.6566424369812012, "learning_rate": 9.846657894736843e-05, "loss": 0.3967, "step": 25839 }, { "epoch": 1.4469705454138202, "grad_norm": 1.3022581338882446, "learning_rate": 9.846631578947369e-05, "loss": 0.5282, "step": 25840 }, { "epoch": 1.4470265427259492, "grad_norm": 1.193652868270874, "learning_rate": 9.846605263157895e-05, "loss": 0.3789, "step": 25841 }, { "epoch": 1.4470825400380782, "grad_norm": 1.084812045097351, "learning_rate": 9.846578947368421e-05, "loss": 0.4475, "step": 25842 }, { "epoch": 1.4471385373502073, "grad_norm": 1.7399256229400635, "learning_rate": 9.846552631578948e-05, "loss": 0.4837, "step": 25843 }, { "epoch": 1.4471945346623363, "grad_norm": 4.695403099060059, "learning_rate": 9.846526315789474e-05, "loss": 0.4705, "step": 25844 }, { "epoch": 1.4472505319744653, "grad_norm": 7.097339630126953, "learning_rate": 9.846500000000001e-05, "loss": 0.4211, "step": 25845 }, { "epoch": 1.4473065292865943, "grad_norm": 1.3863807916641235, "learning_rate": 9.846473684210526e-05, "loss": 0.5693, "step": 25846 }, { "epoch": 1.4473625265987233, "grad_norm": 1.4710978269577026, "learning_rate": 9.846447368421053e-05, "loss": 0.4579, "step": 25847 }, { "epoch": 1.4474185239108524, "grad_norm": 1.4749094247817993, "learning_rate": 9.846421052631579e-05, "loss": 0.5118, "step": 25848 }, { "epoch": 1.4474745212229814, "grad_norm": 1.664351463317871, "learning_rate": 9.846394736842107e-05, "loss": 0.4544, "step": 25849 }, { "epoch": 1.4475305185351104, "grad_norm": 1.4030771255493164, "learning_rate": 9.846368421052633e-05, "loss": 0.4978, "step": 25850 }, { "epoch": 1.4475865158472394, "grad_norm": 1.3160074949264526, "learning_rate": 9.846342105263159e-05, "loss": 0.5448, "step": 25851 }, { "epoch": 1.4476425131593684, "grad_norm": 1.2401901483535767, "learning_rate": 9.846315789473685e-05, "loss": 0.4574, "step": 25852 }, { "epoch": 1.4476985104714974, "grad_norm": 1.3144906759262085, "learning_rate": 9.846289473684212e-05, "loss": 0.4755, "step": 25853 }, { "epoch": 1.4477545077836265, "grad_norm": 1.186714768409729, "learning_rate": 9.846263157894738e-05, "loss": 0.4043, "step": 25854 }, { "epoch": 1.4478105050957555, "grad_norm": 1.2592949867248535, "learning_rate": 9.846236842105262e-05, "loss": 0.377, "step": 25855 }, { "epoch": 1.4478665024078845, "grad_norm": 1.4551217555999756, "learning_rate": 9.84621052631579e-05, "loss": 0.5906, "step": 25856 }, { "epoch": 1.4479224997200135, "grad_norm": 1.4207539558410645, "learning_rate": 9.846184210526316e-05, "loss": 0.5401, "step": 25857 }, { "epoch": 1.4479784970321425, "grad_norm": 1.1180894374847412, "learning_rate": 9.846157894736843e-05, "loss": 0.3977, "step": 25858 }, { "epoch": 1.4480344943442716, "grad_norm": 1.841389775276184, "learning_rate": 9.846131578947369e-05, "loss": 0.5804, "step": 25859 }, { "epoch": 1.4480904916564006, "grad_norm": 1.4709042310714722, "learning_rate": 9.846105263157895e-05, "loss": 0.479, "step": 25860 }, { "epoch": 1.4481464889685296, "grad_norm": 1.5189865827560425, "learning_rate": 9.846078947368421e-05, "loss": 0.4432, "step": 25861 }, { "epoch": 1.4482024862806586, "grad_norm": 1.2005620002746582, "learning_rate": 9.846052631578948e-05, "loss": 0.3775, "step": 25862 }, { "epoch": 1.4482584835927876, "grad_norm": 1.397200584411621, "learning_rate": 9.846026315789474e-05, "loss": 0.6105, "step": 25863 }, { "epoch": 1.4483144809049167, "grad_norm": 1.3551275730133057, "learning_rate": 9.846e-05, "loss": 0.4215, "step": 25864 }, { "epoch": 1.4483704782170457, "grad_norm": 1.2143815755844116, "learning_rate": 9.845973684210526e-05, "loss": 0.4264, "step": 25865 }, { "epoch": 1.4484264755291747, "grad_norm": 1.278725266456604, "learning_rate": 9.845947368421054e-05, "loss": 0.6367, "step": 25866 }, { "epoch": 1.4484824728413037, "grad_norm": 1.7548737525939941, "learning_rate": 9.84592105263158e-05, "loss": 0.5817, "step": 25867 }, { "epoch": 1.4485384701534327, "grad_norm": 1.250089168548584, "learning_rate": 9.845894736842106e-05, "loss": 0.4424, "step": 25868 }, { "epoch": 1.4485944674655618, "grad_norm": 1.41702401638031, "learning_rate": 9.845868421052631e-05, "loss": 0.5236, "step": 25869 }, { "epoch": 1.4486504647776908, "grad_norm": 1.1638234853744507, "learning_rate": 9.845842105263159e-05, "loss": 0.4341, "step": 25870 }, { "epoch": 1.4487064620898198, "grad_norm": 1.0353578329086304, "learning_rate": 9.845815789473685e-05, "loss": 0.3386, "step": 25871 }, { "epoch": 1.4487624594019488, "grad_norm": 1.5625780820846558, "learning_rate": 9.845789473684211e-05, "loss": 0.395, "step": 25872 }, { "epoch": 1.4488184567140778, "grad_norm": 7.201679229736328, "learning_rate": 9.845763157894737e-05, "loss": 0.5571, "step": 25873 }, { "epoch": 1.4488744540262068, "grad_norm": 1.3730390071868896, "learning_rate": 9.845736842105263e-05, "loss": 0.4704, "step": 25874 }, { "epoch": 1.4489304513383359, "grad_norm": 1.3140690326690674, "learning_rate": 9.84571052631579e-05, "loss": 0.4497, "step": 25875 }, { "epoch": 1.4489864486504649, "grad_norm": 1.3097856044769287, "learning_rate": 9.845684210526316e-05, "loss": 0.452, "step": 25876 }, { "epoch": 1.449042445962594, "grad_norm": 1.4853609800338745, "learning_rate": 9.845657894736843e-05, "loss": 0.5977, "step": 25877 }, { "epoch": 1.449098443274723, "grad_norm": 1.4176387786865234, "learning_rate": 9.845631578947368e-05, "loss": 0.4417, "step": 25878 }, { "epoch": 1.449154440586852, "grad_norm": 1.1323189735412598, "learning_rate": 9.845605263157895e-05, "loss": 0.5364, "step": 25879 }, { "epoch": 1.449210437898981, "grad_norm": 1.172784686088562, "learning_rate": 9.845578947368421e-05, "loss": 0.4925, "step": 25880 }, { "epoch": 1.44926643521111, "grad_norm": 1.1581997871398926, "learning_rate": 9.845552631578949e-05, "loss": 0.3504, "step": 25881 }, { "epoch": 1.449322432523239, "grad_norm": 1.2002604007720947, "learning_rate": 9.845526315789475e-05, "loss": 0.4836, "step": 25882 }, { "epoch": 1.449378429835368, "grad_norm": 1.1245604753494263, "learning_rate": 9.8455e-05, "loss": 0.3703, "step": 25883 }, { "epoch": 1.449434427147497, "grad_norm": 1.520147442817688, "learning_rate": 9.845473684210526e-05, "loss": 0.5046, "step": 25884 }, { "epoch": 1.449490424459626, "grad_norm": 1.5617785453796387, "learning_rate": 9.845447368421054e-05, "loss": 0.4901, "step": 25885 }, { "epoch": 1.449546421771755, "grad_norm": 1.4862608909606934, "learning_rate": 9.84542105263158e-05, "loss": 0.4195, "step": 25886 }, { "epoch": 1.449602419083884, "grad_norm": 1.2302963733673096, "learning_rate": 9.845394736842106e-05, "loss": 0.4553, "step": 25887 }, { "epoch": 1.4496584163960131, "grad_norm": 1.2800395488739014, "learning_rate": 9.845368421052632e-05, "loss": 0.3787, "step": 25888 }, { "epoch": 1.4497144137081421, "grad_norm": 1.2763700485229492, "learning_rate": 9.845342105263159e-05, "loss": 0.3521, "step": 25889 }, { "epoch": 1.4497704110202712, "grad_norm": 1.4803738594055176, "learning_rate": 9.845315789473685e-05, "loss": 0.5863, "step": 25890 }, { "epoch": 1.4498264083324002, "grad_norm": 1.222286343574524, "learning_rate": 9.845289473684211e-05, "loss": 0.4811, "step": 25891 }, { "epoch": 1.4498824056445292, "grad_norm": 1.0914252996444702, "learning_rate": 9.845263157894737e-05, "loss": 0.3386, "step": 25892 }, { "epoch": 1.4499384029566582, "grad_norm": 1.4120428562164307, "learning_rate": 9.845236842105263e-05, "loss": 0.5278, "step": 25893 }, { "epoch": 1.4499944002687872, "grad_norm": 1.41368567943573, "learning_rate": 9.84521052631579e-05, "loss": 0.4883, "step": 25894 }, { "epoch": 1.4500503975809163, "grad_norm": 1.2134027481079102, "learning_rate": 9.845184210526316e-05, "loss": 0.4061, "step": 25895 }, { "epoch": 1.4501063948930453, "grad_norm": 1.4855021238327026, "learning_rate": 9.845157894736842e-05, "loss": 0.4442, "step": 25896 }, { "epoch": 1.4501623922051743, "grad_norm": 1.4125449657440186, "learning_rate": 9.845131578947368e-05, "loss": 0.4051, "step": 25897 }, { "epoch": 1.4502183895173033, "grad_norm": 1.275683879852295, "learning_rate": 9.845105263157896e-05, "loss": 0.4895, "step": 25898 }, { "epoch": 1.450274386829432, "grad_norm": 1.4906284809112549, "learning_rate": 9.845078947368422e-05, "loss": 0.4012, "step": 25899 }, { "epoch": 1.4503303841415611, "grad_norm": 1.4855666160583496, "learning_rate": 9.845052631578949e-05, "loss": 0.5737, "step": 25900 }, { "epoch": 1.4503863814536901, "grad_norm": 1.6058350801467896, "learning_rate": 9.845026315789473e-05, "loss": 0.5658, "step": 25901 }, { "epoch": 1.4504423787658192, "grad_norm": 1.2203283309936523, "learning_rate": 9.845000000000001e-05, "loss": 0.3587, "step": 25902 }, { "epoch": 1.4504983760779482, "grad_norm": 1.2490440607070923, "learning_rate": 9.844973684210527e-05, "loss": 0.3595, "step": 25903 }, { "epoch": 1.4505543733900772, "grad_norm": 1.5844202041625977, "learning_rate": 9.844947368421054e-05, "loss": 0.4712, "step": 25904 }, { "epoch": 1.4506103707022062, "grad_norm": 1.4154523611068726, "learning_rate": 9.84492105263158e-05, "loss": 0.4897, "step": 25905 }, { "epoch": 1.4506663680143352, "grad_norm": 1.3495726585388184, "learning_rate": 9.844894736842106e-05, "loss": 0.585, "step": 25906 }, { "epoch": 1.4507223653264643, "grad_norm": 1.4050761461257935, "learning_rate": 9.844868421052632e-05, "loss": 0.5269, "step": 25907 }, { "epoch": 1.4507783626385933, "grad_norm": 1.2094513177871704, "learning_rate": 9.844842105263158e-05, "loss": 0.4117, "step": 25908 }, { "epoch": 1.4508343599507223, "grad_norm": 1.6439192295074463, "learning_rate": 9.844815789473685e-05, "loss": 0.546, "step": 25909 }, { "epoch": 1.4508903572628513, "grad_norm": 1.701016902923584, "learning_rate": 9.84478947368421e-05, "loss": 0.4767, "step": 25910 }, { "epoch": 1.4509463545749803, "grad_norm": 1.4660980701446533, "learning_rate": 9.844763157894737e-05, "loss": 0.399, "step": 25911 }, { "epoch": 1.4510023518871094, "grad_norm": 1.4048539400100708, "learning_rate": 9.844736842105263e-05, "loss": 0.5477, "step": 25912 }, { "epoch": 1.4510583491992384, "grad_norm": 1.4888503551483154, "learning_rate": 9.84471052631579e-05, "loss": 0.6689, "step": 25913 }, { "epoch": 1.4511143465113674, "grad_norm": 1.1039063930511475, "learning_rate": 9.844684210526317e-05, "loss": 0.4155, "step": 25914 }, { "epoch": 1.4511703438234964, "grad_norm": 1.7937867641448975, "learning_rate": 9.844657894736842e-05, "loss": 0.4529, "step": 25915 }, { "epoch": 1.4512263411356254, "grad_norm": 1.480340600013733, "learning_rate": 9.844631578947368e-05, "loss": 0.4487, "step": 25916 }, { "epoch": 1.4512823384477544, "grad_norm": 1.8020457029342651, "learning_rate": 9.844605263157896e-05, "loss": 0.5597, "step": 25917 }, { "epoch": 1.4513383357598835, "grad_norm": 3.0687625408172607, "learning_rate": 9.844578947368422e-05, "loss": 0.5293, "step": 25918 }, { "epoch": 1.4513943330720125, "grad_norm": 1.4975465536117554, "learning_rate": 9.844552631578948e-05, "loss": 0.4141, "step": 25919 }, { "epoch": 1.4514503303841415, "grad_norm": 1.5277751684188843, "learning_rate": 9.844526315789474e-05, "loss": 0.6326, "step": 25920 }, { "epoch": 1.4515063276962705, "grad_norm": 1.370906949043274, "learning_rate": 9.844500000000001e-05, "loss": 0.468, "step": 25921 }, { "epoch": 1.4515623250083995, "grad_norm": 1.2885569334030151, "learning_rate": 9.844473684210527e-05, "loss": 0.5034, "step": 25922 }, { "epoch": 1.4516183223205286, "grad_norm": 1.2657023668289185, "learning_rate": 9.844447368421053e-05, "loss": 0.4651, "step": 25923 }, { "epoch": 1.4516743196326576, "grad_norm": 1.3978196382522583, "learning_rate": 9.844421052631579e-05, "loss": 0.4269, "step": 25924 }, { "epoch": 1.4517303169447866, "grad_norm": 1.3470312356948853, "learning_rate": 9.844394736842105e-05, "loss": 0.4954, "step": 25925 }, { "epoch": 1.4517863142569156, "grad_norm": 1.8148443698883057, "learning_rate": 9.844368421052632e-05, "loss": 0.6209, "step": 25926 }, { "epoch": 1.4518423115690446, "grad_norm": 5.657511234283447, "learning_rate": 9.844342105263158e-05, "loss": 0.4336, "step": 25927 }, { "epoch": 1.4518983088811737, "grad_norm": 1.5702624320983887, "learning_rate": 9.844315789473684e-05, "loss": 0.5375, "step": 25928 }, { "epoch": 1.4519543061933027, "grad_norm": 1.4084192514419556, "learning_rate": 9.84428947368421e-05, "loss": 0.5047, "step": 25929 }, { "epoch": 1.4520103035054317, "grad_norm": 1.1949462890625, "learning_rate": 9.844263157894738e-05, "loss": 0.3619, "step": 25930 }, { "epoch": 1.4520663008175607, "grad_norm": 1.2923543453216553, "learning_rate": 9.844236842105263e-05, "loss": 0.4469, "step": 25931 }, { "epoch": 1.4521222981296897, "grad_norm": 1.2760673761367798, "learning_rate": 9.844210526315791e-05, "loss": 0.467, "step": 25932 }, { "epoch": 1.4521782954418188, "grad_norm": 1.2177743911743164, "learning_rate": 9.844184210526315e-05, "loss": 0.406, "step": 25933 }, { "epoch": 1.4522342927539478, "grad_norm": 1.1399402618408203, "learning_rate": 9.844157894736843e-05, "loss": 0.3858, "step": 25934 }, { "epoch": 1.4522902900660768, "grad_norm": 1.6178439855575562, "learning_rate": 9.844131578947369e-05, "loss": 0.5365, "step": 25935 }, { "epoch": 1.4523462873782058, "grad_norm": 1.4049549102783203, "learning_rate": 9.844105263157896e-05, "loss": 0.5515, "step": 25936 }, { "epoch": 1.4524022846903348, "grad_norm": 1.2331537008285522, "learning_rate": 9.844078947368422e-05, "loss": 0.4917, "step": 25937 }, { "epoch": 1.4524582820024639, "grad_norm": 1.2904316186904907, "learning_rate": 9.844052631578948e-05, "loss": 0.4256, "step": 25938 }, { "epoch": 1.4525142793145929, "grad_norm": 1.261634349822998, "learning_rate": 9.844026315789474e-05, "loss": 0.458, "step": 25939 }, { "epoch": 1.452570276626722, "grad_norm": 1.430680513381958, "learning_rate": 9.844000000000001e-05, "loss": 0.4403, "step": 25940 }, { "epoch": 1.452626273938851, "grad_norm": 1.102910041809082, "learning_rate": 9.843973684210527e-05, "loss": 0.3499, "step": 25941 }, { "epoch": 1.45268227125098, "grad_norm": 1.4364904165267944, "learning_rate": 9.843947368421053e-05, "loss": 0.5855, "step": 25942 }, { "epoch": 1.452738268563109, "grad_norm": 1.155799388885498, "learning_rate": 9.843921052631579e-05, "loss": 0.3681, "step": 25943 }, { "epoch": 1.452794265875238, "grad_norm": 1.272122859954834, "learning_rate": 9.843894736842105e-05, "loss": 0.4879, "step": 25944 }, { "epoch": 1.452850263187367, "grad_norm": 1.3616063594818115, "learning_rate": 9.843868421052633e-05, "loss": 0.5572, "step": 25945 }, { "epoch": 1.452906260499496, "grad_norm": 1.530600905418396, "learning_rate": 9.843842105263158e-05, "loss": 0.4332, "step": 25946 }, { "epoch": 1.452962257811625, "grad_norm": 1.2354836463928223, "learning_rate": 9.843815789473684e-05, "loss": 0.4672, "step": 25947 }, { "epoch": 1.453018255123754, "grad_norm": 1.435318112373352, "learning_rate": 9.84378947368421e-05, "loss": 0.4326, "step": 25948 }, { "epoch": 1.453074252435883, "grad_norm": 1.4048984050750732, "learning_rate": 9.843763157894738e-05, "loss": 0.3772, "step": 25949 }, { "epoch": 1.453130249748012, "grad_norm": 2.3893043994903564, "learning_rate": 9.843736842105264e-05, "loss": 0.5347, "step": 25950 }, { "epoch": 1.453186247060141, "grad_norm": 1.403817057609558, "learning_rate": 9.84371052631579e-05, "loss": 0.5213, "step": 25951 }, { "epoch": 1.4532422443722701, "grad_norm": 1.2591506242752075, "learning_rate": 9.843684210526316e-05, "loss": 0.4081, "step": 25952 }, { "epoch": 1.4532982416843991, "grad_norm": 1.2045007944107056, "learning_rate": 9.843657894736843e-05, "loss": 0.5792, "step": 25953 }, { "epoch": 1.4533542389965282, "grad_norm": 1.2742711305618286, "learning_rate": 9.843631578947369e-05, "loss": 0.4952, "step": 25954 }, { "epoch": 1.4534102363086572, "grad_norm": 1.6678144931793213, "learning_rate": 9.843605263157896e-05, "loss": 0.5661, "step": 25955 }, { "epoch": 1.4534662336207862, "grad_norm": 1.4130090475082397, "learning_rate": 9.843578947368421e-05, "loss": 0.6704, "step": 25956 }, { "epoch": 1.4535222309329152, "grad_norm": 1.37088942527771, "learning_rate": 9.843552631578948e-05, "loss": 0.4189, "step": 25957 }, { "epoch": 1.4535782282450442, "grad_norm": 1.8538419008255005, "learning_rate": 9.843526315789474e-05, "loss": 0.4482, "step": 25958 }, { "epoch": 1.4536342255571733, "grad_norm": 1.3719291687011719, "learning_rate": 9.8435e-05, "loss": 0.3062, "step": 25959 }, { "epoch": 1.4536902228693023, "grad_norm": 1.3360399007797241, "learning_rate": 9.843473684210526e-05, "loss": 0.3929, "step": 25960 }, { "epoch": 1.4537462201814313, "grad_norm": 1.3749924898147583, "learning_rate": 9.843447368421052e-05, "loss": 0.4151, "step": 25961 }, { "epoch": 1.4538022174935603, "grad_norm": 1.381787657737732, "learning_rate": 9.84342105263158e-05, "loss": 0.4326, "step": 25962 }, { "epoch": 1.4538582148056893, "grad_norm": 1.9576159715652466, "learning_rate": 9.843394736842105e-05, "loss": 0.4619, "step": 25963 }, { "epoch": 1.4539142121178183, "grad_norm": 1.386579155921936, "learning_rate": 9.843368421052633e-05, "loss": 0.4705, "step": 25964 }, { "epoch": 1.4539702094299474, "grad_norm": 1.1891536712646484, "learning_rate": 9.843342105263157e-05, "loss": 0.4118, "step": 25965 }, { "epoch": 1.4540262067420764, "grad_norm": 1.2311022281646729, "learning_rate": 9.843315789473685e-05, "loss": 0.4656, "step": 25966 }, { "epoch": 1.4540822040542054, "grad_norm": 1.3781304359436035, "learning_rate": 9.843289473684211e-05, "loss": 0.5193, "step": 25967 }, { "epoch": 1.4541382013663344, "grad_norm": 1.1454792022705078, "learning_rate": 9.843263157894738e-05, "loss": 0.4095, "step": 25968 }, { "epoch": 1.4541941986784634, "grad_norm": 1.28517484664917, "learning_rate": 9.843236842105264e-05, "loss": 0.3822, "step": 25969 }, { "epoch": 1.4542501959905925, "grad_norm": 1.3925752639770508, "learning_rate": 9.84321052631579e-05, "loss": 0.4105, "step": 25970 }, { "epoch": 1.4543061933027215, "grad_norm": 3.689382553100586, "learning_rate": 9.843184210526316e-05, "loss": 0.603, "step": 25971 }, { "epoch": 1.4543621906148505, "grad_norm": 1.3937817811965942, "learning_rate": 9.843157894736843e-05, "loss": 0.4476, "step": 25972 }, { "epoch": 1.4544181879269795, "grad_norm": 77.36202239990234, "learning_rate": 9.843131578947369e-05, "loss": 0.3675, "step": 25973 }, { "epoch": 1.4544741852391085, "grad_norm": 1.22329580783844, "learning_rate": 9.843105263157895e-05, "loss": 0.3984, "step": 25974 }, { "epoch": 1.4545301825512376, "grad_norm": 1.4108954668045044, "learning_rate": 9.843078947368421e-05, "loss": 0.4543, "step": 25975 }, { "epoch": 1.4545861798633666, "grad_norm": 1.2376995086669922, "learning_rate": 9.843052631578947e-05, "loss": 0.4174, "step": 25976 }, { "epoch": 1.4546421771754956, "grad_norm": 1.5809969902038574, "learning_rate": 9.843026315789474e-05, "loss": 0.7597, "step": 25977 }, { "epoch": 1.4546981744876246, "grad_norm": 1.929602861404419, "learning_rate": 9.843e-05, "loss": 0.6487, "step": 25978 }, { "epoch": 1.4547541717997536, "grad_norm": 1.5038347244262695, "learning_rate": 9.842973684210526e-05, "loss": 0.6088, "step": 25979 }, { "epoch": 1.4548101691118827, "grad_norm": 1.659993290901184, "learning_rate": 9.842947368421052e-05, "loss": 0.4552, "step": 25980 }, { "epoch": 1.4548661664240117, "grad_norm": 1.1503784656524658, "learning_rate": 9.84292105263158e-05, "loss": 0.4481, "step": 25981 }, { "epoch": 1.4549221637361407, "grad_norm": 1.1233444213867188, "learning_rate": 9.842894736842106e-05, "loss": 0.3566, "step": 25982 }, { "epoch": 1.4549781610482697, "grad_norm": 1.5418497323989868, "learning_rate": 9.842868421052632e-05, "loss": 0.4217, "step": 25983 }, { "epoch": 1.4550341583603987, "grad_norm": 1.42060387134552, "learning_rate": 9.842842105263158e-05, "loss": 0.5069, "step": 25984 }, { "epoch": 1.4550901556725278, "grad_norm": 1.5929495096206665, "learning_rate": 9.842815789473685e-05, "loss": 0.4035, "step": 25985 }, { "epoch": 1.4551461529846568, "grad_norm": 1.476660966873169, "learning_rate": 9.842789473684211e-05, "loss": 0.5383, "step": 25986 }, { "epoch": 1.4552021502967858, "grad_norm": 1.4194258451461792, "learning_rate": 9.842763157894738e-05, "loss": 0.5295, "step": 25987 }, { "epoch": 1.4552581476089148, "grad_norm": 1.1278876066207886, "learning_rate": 9.842736842105263e-05, "loss": 0.4407, "step": 25988 }, { "epoch": 1.4553141449210438, "grad_norm": 1.4255927801132202, "learning_rate": 9.84271052631579e-05, "loss": 0.6305, "step": 25989 }, { "epoch": 1.4553701422331728, "grad_norm": 1.2105761766433716, "learning_rate": 9.842684210526316e-05, "loss": 0.3823, "step": 25990 }, { "epoch": 1.4554261395453019, "grad_norm": 1.2208470106124878, "learning_rate": 9.842657894736844e-05, "loss": 0.4616, "step": 25991 }, { "epoch": 1.4554821368574309, "grad_norm": 1.30439293384552, "learning_rate": 9.84263157894737e-05, "loss": 0.3807, "step": 25992 }, { "epoch": 1.45553813416956, "grad_norm": 1.7102527618408203, "learning_rate": 9.842605263157894e-05, "loss": 0.4421, "step": 25993 }, { "epoch": 1.455594131481689, "grad_norm": 1.3844481706619263, "learning_rate": 9.842578947368421e-05, "loss": 0.5318, "step": 25994 }, { "epoch": 1.455650128793818, "grad_norm": 1.1847615242004395, "learning_rate": 9.842552631578947e-05, "loss": 0.3416, "step": 25995 }, { "epoch": 1.455706126105947, "grad_norm": 1.346411943435669, "learning_rate": 9.842526315789475e-05, "loss": 0.5071, "step": 25996 }, { "epoch": 1.455762123418076, "grad_norm": 1.2523080110549927, "learning_rate": 9.842500000000001e-05, "loss": 0.5055, "step": 25997 }, { "epoch": 1.455818120730205, "grad_norm": 1.2922927141189575, "learning_rate": 9.842473684210527e-05, "loss": 0.4191, "step": 25998 }, { "epoch": 1.455874118042334, "grad_norm": 2.072600841522217, "learning_rate": 9.842447368421053e-05, "loss": 0.6591, "step": 25999 }, { "epoch": 1.455930115354463, "grad_norm": 1.4252487421035767, "learning_rate": 9.84242105263158e-05, "loss": 0.4047, "step": 26000 }, { "epoch": 1.455986112666592, "grad_norm": 1.4820553064346313, "learning_rate": 9.842394736842106e-05, "loss": 0.4762, "step": 26001 }, { "epoch": 1.456042109978721, "grad_norm": 1.4761065244674683, "learning_rate": 9.842368421052632e-05, "loss": 0.417, "step": 26002 }, { "epoch": 1.45609810729085, "grad_norm": 1.1952581405639648, "learning_rate": 9.842342105263158e-05, "loss": 0.5099, "step": 26003 }, { "epoch": 1.4561541046029791, "grad_norm": 1.2118871212005615, "learning_rate": 9.842315789473685e-05, "loss": 0.4029, "step": 26004 }, { "epoch": 1.4562101019151081, "grad_norm": 2.3767006397247314, "learning_rate": 9.842289473684211e-05, "loss": 0.4342, "step": 26005 }, { "epoch": 1.4562660992272372, "grad_norm": 1.329052448272705, "learning_rate": 9.842263157894737e-05, "loss": 0.4584, "step": 26006 }, { "epoch": 1.4563220965393662, "grad_norm": 1.3413516283035278, "learning_rate": 9.842236842105263e-05, "loss": 0.4386, "step": 26007 }, { "epoch": 1.4563780938514952, "grad_norm": 1.215601921081543, "learning_rate": 9.84221052631579e-05, "loss": 0.4626, "step": 26008 }, { "epoch": 1.4564340911636242, "grad_norm": 2.029501438140869, "learning_rate": 9.842184210526316e-05, "loss": 0.5052, "step": 26009 }, { "epoch": 1.4564900884757532, "grad_norm": 1.856543779373169, "learning_rate": 9.842157894736842e-05, "loss": 0.4854, "step": 26010 }, { "epoch": 1.4565460857878822, "grad_norm": 1.4724719524383545, "learning_rate": 9.842131578947368e-05, "loss": 0.5947, "step": 26011 }, { "epoch": 1.4566020831000113, "grad_norm": 1.2454043626785278, "learning_rate": 9.842105263157894e-05, "loss": 0.4048, "step": 26012 }, { "epoch": 1.4566580804121403, "grad_norm": 1.3093109130859375, "learning_rate": 9.842078947368422e-05, "loss": 0.4102, "step": 26013 }, { "epoch": 1.4567140777242693, "grad_norm": 1.3444128036499023, "learning_rate": 9.842052631578948e-05, "loss": 0.4793, "step": 26014 }, { "epoch": 1.4567700750363983, "grad_norm": 1.5629818439483643, "learning_rate": 9.842026315789474e-05, "loss": 0.4227, "step": 26015 }, { "epoch": 1.4568260723485273, "grad_norm": 1.532607078552246, "learning_rate": 9.842e-05, "loss": 0.6029, "step": 26016 }, { "epoch": 1.4568820696606564, "grad_norm": 1.3916041851043701, "learning_rate": 9.841973684210527e-05, "loss": 0.5419, "step": 26017 }, { "epoch": 1.4569380669727854, "grad_norm": 1.4412180185317993, "learning_rate": 9.841947368421053e-05, "loss": 0.4692, "step": 26018 }, { "epoch": 1.4569940642849144, "grad_norm": 1.4216548204421997, "learning_rate": 9.84192105263158e-05, "loss": 0.5973, "step": 26019 }, { "epoch": 1.4570500615970434, "grad_norm": 1.2250051498413086, "learning_rate": 9.841894736842105e-05, "loss": 0.5463, "step": 26020 }, { "epoch": 1.4571060589091724, "grad_norm": 1.220533847808838, "learning_rate": 9.841868421052632e-05, "loss": 0.4516, "step": 26021 }, { "epoch": 1.4571620562213015, "grad_norm": 1.1817001104354858, "learning_rate": 9.841842105263158e-05, "loss": 0.5348, "step": 26022 }, { "epoch": 1.4572180535334303, "grad_norm": 1.211058497428894, "learning_rate": 9.841815789473685e-05, "loss": 0.4168, "step": 26023 }, { "epoch": 1.4572740508455593, "grad_norm": 1.8724970817565918, "learning_rate": 9.841789473684211e-05, "loss": 0.4228, "step": 26024 }, { "epoch": 1.4573300481576883, "grad_norm": 1.1929121017456055, "learning_rate": 9.841763157894737e-05, "loss": 0.4154, "step": 26025 }, { "epoch": 1.4573860454698173, "grad_norm": 1.1333788633346558, "learning_rate": 9.841736842105263e-05, "loss": 0.3917, "step": 26026 }, { "epoch": 1.4574420427819463, "grad_norm": 1.3728362321853638, "learning_rate": 9.841710526315791e-05, "loss": 0.6962, "step": 26027 }, { "epoch": 1.4574980400940754, "grad_norm": 1.2028093338012695, "learning_rate": 9.841684210526317e-05, "loss": 0.3844, "step": 26028 }, { "epoch": 1.4575540374062044, "grad_norm": 1.2827948331832886, "learning_rate": 9.841657894736843e-05, "loss": 0.6314, "step": 26029 }, { "epoch": 1.4576100347183334, "grad_norm": 1.3257192373275757, "learning_rate": 9.841631578947369e-05, "loss": 0.5371, "step": 26030 }, { "epoch": 1.4576660320304624, "grad_norm": 1.401133418083191, "learning_rate": 9.841605263157895e-05, "loss": 0.4718, "step": 26031 }, { "epoch": 1.4577220293425914, "grad_norm": 1.27435302734375, "learning_rate": 9.841578947368422e-05, "loss": 0.4775, "step": 26032 }, { "epoch": 1.4577780266547204, "grad_norm": 1.7541958093643188, "learning_rate": 9.841552631578948e-05, "loss": 0.4958, "step": 26033 }, { "epoch": 1.4578340239668495, "grad_norm": 1.232792615890503, "learning_rate": 9.841526315789474e-05, "loss": 0.4311, "step": 26034 }, { "epoch": 1.4578900212789785, "grad_norm": 1.480499029159546, "learning_rate": 9.8415e-05, "loss": 0.5128, "step": 26035 }, { "epoch": 1.4579460185911075, "grad_norm": 1.412124514579773, "learning_rate": 9.841473684210527e-05, "loss": 0.4031, "step": 26036 }, { "epoch": 1.4580020159032365, "grad_norm": 1.420940637588501, "learning_rate": 9.841447368421053e-05, "loss": 0.4616, "step": 26037 }, { "epoch": 1.4580580132153655, "grad_norm": 1.3835965394973755, "learning_rate": 9.841421052631579e-05, "loss": 0.4683, "step": 26038 }, { "epoch": 1.4581140105274946, "grad_norm": 1.6787493228912354, "learning_rate": 9.841394736842105e-05, "loss": 0.5847, "step": 26039 }, { "epoch": 1.4581700078396236, "grad_norm": 1.1006306409835815, "learning_rate": 9.841368421052632e-05, "loss": 0.3483, "step": 26040 }, { "epoch": 1.4582260051517526, "grad_norm": 1.5119205713272095, "learning_rate": 9.841342105263158e-05, "loss": 0.5986, "step": 26041 }, { "epoch": 1.4582820024638816, "grad_norm": 1.6595722436904907, "learning_rate": 9.841315789473686e-05, "loss": 0.4327, "step": 26042 }, { "epoch": 1.4583379997760106, "grad_norm": 1.4033514261245728, "learning_rate": 9.84128947368421e-05, "loss": 0.499, "step": 26043 }, { "epoch": 1.4583939970881397, "grad_norm": 1.226958155632019, "learning_rate": 9.841263157894738e-05, "loss": 0.3799, "step": 26044 }, { "epoch": 1.4584499944002687, "grad_norm": 1.127150058746338, "learning_rate": 9.841236842105264e-05, "loss": 0.2782, "step": 26045 }, { "epoch": 1.4585059917123977, "grad_norm": 1.2230535745620728, "learning_rate": 9.84121052631579e-05, "loss": 0.4618, "step": 26046 }, { "epoch": 1.4585619890245267, "grad_norm": 1.1991167068481445, "learning_rate": 9.841184210526317e-05, "loss": 0.3825, "step": 26047 }, { "epoch": 1.4586179863366557, "grad_norm": 1.8252909183502197, "learning_rate": 9.841157894736842e-05, "loss": 0.5257, "step": 26048 }, { "epoch": 1.4586739836487848, "grad_norm": 1.4885025024414062, "learning_rate": 9.841131578947369e-05, "loss": 0.5272, "step": 26049 }, { "epoch": 1.4587299809609138, "grad_norm": 1.4274439811706543, "learning_rate": 9.841105263157895e-05, "loss": 0.488, "step": 26050 }, { "epoch": 1.4587859782730428, "grad_norm": 1.3002328872680664, "learning_rate": 9.841078947368422e-05, "loss": 0.4252, "step": 26051 }, { "epoch": 1.4588419755851718, "grad_norm": 1.3177926540374756, "learning_rate": 9.841052631578948e-05, "loss": 0.4527, "step": 26052 }, { "epoch": 1.4588979728973008, "grad_norm": 1.2957621812820435, "learning_rate": 9.841026315789474e-05, "loss": 0.4261, "step": 26053 }, { "epoch": 1.4589539702094299, "grad_norm": 1.5503376722335815, "learning_rate": 9.841e-05, "loss": 0.4861, "step": 26054 }, { "epoch": 1.4590099675215589, "grad_norm": 1.515969157218933, "learning_rate": 9.840973684210527e-05, "loss": 0.6048, "step": 26055 }, { "epoch": 1.459065964833688, "grad_norm": 1.3552058935165405, "learning_rate": 9.840947368421053e-05, "loss": 0.4295, "step": 26056 }, { "epoch": 1.459121962145817, "grad_norm": 1.1816718578338623, "learning_rate": 9.84092105263158e-05, "loss": 0.4525, "step": 26057 }, { "epoch": 1.459177959457946, "grad_norm": 1.6081931591033936, "learning_rate": 9.840894736842105e-05, "loss": 0.5457, "step": 26058 }, { "epoch": 1.459233956770075, "grad_norm": 1.6812665462493896, "learning_rate": 9.840868421052633e-05, "loss": 0.4257, "step": 26059 }, { "epoch": 1.459289954082204, "grad_norm": 1.229638695716858, "learning_rate": 9.840842105263159e-05, "loss": 0.4609, "step": 26060 }, { "epoch": 1.459345951394333, "grad_norm": 1.4026283025741577, "learning_rate": 9.840815789473685e-05, "loss": 0.5036, "step": 26061 }, { "epoch": 1.459401948706462, "grad_norm": 1.263132095336914, "learning_rate": 9.84078947368421e-05, "loss": 0.4865, "step": 26062 }, { "epoch": 1.459457946018591, "grad_norm": 1.5632518529891968, "learning_rate": 9.840763157894737e-05, "loss": 0.6191, "step": 26063 }, { "epoch": 1.45951394333072, "grad_norm": 1.5699266195297241, "learning_rate": 9.840736842105264e-05, "loss": 0.5325, "step": 26064 }, { "epoch": 1.459569940642849, "grad_norm": 1.2045263051986694, "learning_rate": 9.84071052631579e-05, "loss": 0.3987, "step": 26065 }, { "epoch": 1.459625937954978, "grad_norm": 5.441231727600098, "learning_rate": 9.840684210526316e-05, "loss": 0.5344, "step": 26066 }, { "epoch": 1.459681935267107, "grad_norm": 1.2703382968902588, "learning_rate": 9.840657894736842e-05, "loss": 0.5315, "step": 26067 }, { "epoch": 1.4597379325792361, "grad_norm": 1.1619566679000854, "learning_rate": 9.840631578947369e-05, "loss": 0.3904, "step": 26068 }, { "epoch": 1.4597939298913651, "grad_norm": 1.2699205875396729, "learning_rate": 9.840605263157895e-05, "loss": 0.4152, "step": 26069 }, { "epoch": 1.4598499272034942, "grad_norm": 1.5068904161453247, "learning_rate": 9.840578947368421e-05, "loss": 0.393, "step": 26070 }, { "epoch": 1.4599059245156232, "grad_norm": 1.146650791168213, "learning_rate": 9.840552631578947e-05, "loss": 0.3644, "step": 26071 }, { "epoch": 1.4599619218277522, "grad_norm": 1.2180830240249634, "learning_rate": 9.840526315789474e-05, "loss": 0.3651, "step": 26072 }, { "epoch": 1.4600179191398812, "grad_norm": 1.6138577461242676, "learning_rate": 9.8405e-05, "loss": 0.4821, "step": 26073 }, { "epoch": 1.4600739164520102, "grad_norm": 1.2507485151290894, "learning_rate": 9.840473684210528e-05, "loss": 0.6241, "step": 26074 }, { "epoch": 1.4601299137641393, "grad_norm": 1.2357007265090942, "learning_rate": 9.840447368421052e-05, "loss": 0.3541, "step": 26075 }, { "epoch": 1.4601859110762683, "grad_norm": 1.3801939487457275, "learning_rate": 9.84042105263158e-05, "loss": 0.7115, "step": 26076 }, { "epoch": 1.4602419083883973, "grad_norm": 1.485823631286621, "learning_rate": 9.840394736842106e-05, "loss": 0.4691, "step": 26077 }, { "epoch": 1.4602979057005263, "grad_norm": 8.340812683105469, "learning_rate": 9.840368421052633e-05, "loss": 0.3016, "step": 26078 }, { "epoch": 1.4603539030126553, "grad_norm": 1.2271965742111206, "learning_rate": 9.840342105263159e-05, "loss": 0.4139, "step": 26079 }, { "epoch": 1.4604099003247843, "grad_norm": 1.0837594270706177, "learning_rate": 9.840315789473684e-05, "loss": 0.4571, "step": 26080 }, { "epoch": 1.4604658976369134, "grad_norm": 1.594258427619934, "learning_rate": 9.840289473684211e-05, "loss": 0.4861, "step": 26081 }, { "epoch": 1.4605218949490424, "grad_norm": 1.315037727355957, "learning_rate": 9.840263157894737e-05, "loss": 0.4359, "step": 26082 }, { "epoch": 1.4605778922611714, "grad_norm": 1.315635085105896, "learning_rate": 9.840236842105264e-05, "loss": 0.5626, "step": 26083 }, { "epoch": 1.4606338895733004, "grad_norm": 1.4542587995529175, "learning_rate": 9.84021052631579e-05, "loss": 0.4359, "step": 26084 }, { "epoch": 1.4606898868854294, "grad_norm": 1.3752509355545044, "learning_rate": 9.840184210526316e-05, "loss": 0.4102, "step": 26085 }, { "epoch": 1.4607458841975585, "grad_norm": 1.7064623832702637, "learning_rate": 9.840157894736842e-05, "loss": 0.4918, "step": 26086 }, { "epoch": 1.4608018815096875, "grad_norm": 1.3330295085906982, "learning_rate": 9.84013157894737e-05, "loss": 0.4255, "step": 26087 }, { "epoch": 1.4608578788218165, "grad_norm": 1.7486516237258911, "learning_rate": 9.840105263157895e-05, "loss": 0.5374, "step": 26088 }, { "epoch": 1.4609138761339455, "grad_norm": 1.2772011756896973, "learning_rate": 9.840078947368421e-05, "loss": 0.4463, "step": 26089 }, { "epoch": 1.4609698734460745, "grad_norm": 1.404482126235962, "learning_rate": 9.840052631578947e-05, "loss": 0.3757, "step": 26090 }, { "epoch": 1.4610258707582036, "grad_norm": 1.397269368171692, "learning_rate": 9.840026315789475e-05, "loss": 0.4529, "step": 26091 }, { "epoch": 1.4610818680703326, "grad_norm": 1.510548710823059, "learning_rate": 9.84e-05, "loss": 0.5331, "step": 26092 }, { "epoch": 1.4611378653824616, "grad_norm": 1.2802950143814087, "learning_rate": 9.839973684210527e-05, "loss": 0.4482, "step": 26093 }, { "epoch": 1.4611938626945906, "grad_norm": 1.235029935836792, "learning_rate": 9.839947368421053e-05, "loss": 0.5423, "step": 26094 }, { "epoch": 1.4612498600067196, "grad_norm": 1.1043256521224976, "learning_rate": 9.83992105263158e-05, "loss": 0.4166, "step": 26095 }, { "epoch": 1.4613058573188487, "grad_norm": 1.2232075929641724, "learning_rate": 9.839894736842106e-05, "loss": 0.3702, "step": 26096 }, { "epoch": 1.4613618546309777, "grad_norm": 1.1902234554290771, "learning_rate": 9.839868421052632e-05, "loss": 0.4973, "step": 26097 }, { "epoch": 1.4614178519431067, "grad_norm": 1.5543293952941895, "learning_rate": 9.839842105263158e-05, "loss": 0.6131, "step": 26098 }, { "epoch": 1.4614738492552357, "grad_norm": 1.4217603206634521, "learning_rate": 9.839815789473684e-05, "loss": 0.4294, "step": 26099 }, { "epoch": 1.4615298465673647, "grad_norm": 1.4103598594665527, "learning_rate": 9.839789473684211e-05, "loss": 0.422, "step": 26100 }, { "epoch": 1.4615858438794938, "grad_norm": 1.322417974472046, "learning_rate": 9.839763157894737e-05, "loss": 0.6959, "step": 26101 }, { "epoch": 1.4616418411916228, "grad_norm": 1.3699936866760254, "learning_rate": 9.839736842105264e-05, "loss": 0.6449, "step": 26102 }, { "epoch": 1.4616978385037518, "grad_norm": 1.6707324981689453, "learning_rate": 9.839710526315789e-05, "loss": 0.4287, "step": 26103 }, { "epoch": 1.4617538358158808, "grad_norm": 1.2176315784454346, "learning_rate": 9.839684210526316e-05, "loss": 0.3065, "step": 26104 }, { "epoch": 1.4618098331280098, "grad_norm": 1.8233367204666138, "learning_rate": 9.839657894736842e-05, "loss": 0.531, "step": 26105 }, { "epoch": 1.4618658304401388, "grad_norm": 1.1831194162368774, "learning_rate": 9.83963157894737e-05, "loss": 0.3419, "step": 26106 }, { "epoch": 1.4619218277522679, "grad_norm": 1.393722414970398, "learning_rate": 9.839605263157896e-05, "loss": 0.522, "step": 26107 }, { "epoch": 1.4619778250643969, "grad_norm": 1.2852668762207031, "learning_rate": 9.839578947368422e-05, "loss": 0.5382, "step": 26108 }, { "epoch": 1.462033822376526, "grad_norm": 1.2282413244247437, "learning_rate": 9.839552631578948e-05, "loss": 0.4142, "step": 26109 }, { "epoch": 1.462089819688655, "grad_norm": 1.3288555145263672, "learning_rate": 9.839526315789475e-05, "loss": 0.3723, "step": 26110 }, { "epoch": 1.462145817000784, "grad_norm": 1.4714993238449097, "learning_rate": 9.839500000000001e-05, "loss": 0.4981, "step": 26111 }, { "epoch": 1.462201814312913, "grad_norm": 1.2625560760498047, "learning_rate": 9.839473684210527e-05, "loss": 0.5205, "step": 26112 }, { "epoch": 1.462257811625042, "grad_norm": 3.8642263412475586, "learning_rate": 9.839447368421053e-05, "loss": 0.5482, "step": 26113 }, { "epoch": 1.462313808937171, "grad_norm": 1.502594232559204, "learning_rate": 9.839421052631579e-05, "loss": 0.5093, "step": 26114 }, { "epoch": 1.4623698062493, "grad_norm": 1.234735131263733, "learning_rate": 9.839394736842106e-05, "loss": 0.434, "step": 26115 }, { "epoch": 1.462425803561429, "grad_norm": 1.3437854051589966, "learning_rate": 9.839368421052632e-05, "loss": 0.365, "step": 26116 }, { "epoch": 1.462481800873558, "grad_norm": 1.4200648069381714, "learning_rate": 9.839342105263158e-05, "loss": 0.4288, "step": 26117 }, { "epoch": 1.462537798185687, "grad_norm": 1.7690536975860596, "learning_rate": 9.839315789473684e-05, "loss": 0.5416, "step": 26118 }, { "epoch": 1.462593795497816, "grad_norm": 1.2500172853469849, "learning_rate": 9.839289473684211e-05, "loss": 0.374, "step": 26119 }, { "epoch": 1.4626497928099451, "grad_norm": 1.230628252029419, "learning_rate": 9.839263157894737e-05, "loss": 0.5791, "step": 26120 }, { "epoch": 1.4627057901220741, "grad_norm": 1.2279032468795776, "learning_rate": 9.839236842105263e-05, "loss": 0.4471, "step": 26121 }, { "epoch": 1.4627617874342032, "grad_norm": 1.8044170141220093, "learning_rate": 9.83921052631579e-05, "loss": 0.8309, "step": 26122 }, { "epoch": 1.4628177847463322, "grad_norm": 1.2737913131713867, "learning_rate": 9.839184210526317e-05, "loss": 0.5838, "step": 26123 }, { "epoch": 1.4628737820584612, "grad_norm": 1.3217850923538208, "learning_rate": 9.839157894736843e-05, "loss": 0.4428, "step": 26124 }, { "epoch": 1.4629297793705902, "grad_norm": 1.421263337135315, "learning_rate": 9.839131578947369e-05, "loss": 0.457, "step": 26125 }, { "epoch": 1.4629857766827192, "grad_norm": 1.8002113103866577, "learning_rate": 9.839105263157895e-05, "loss": 0.5559, "step": 26126 }, { "epoch": 1.4630417739948482, "grad_norm": 1.7728595733642578, "learning_rate": 9.839078947368422e-05, "loss": 0.6489, "step": 26127 }, { "epoch": 1.4630977713069773, "grad_norm": 1.3049659729003906, "learning_rate": 9.839052631578948e-05, "loss": 0.4105, "step": 26128 }, { "epoch": 1.4631537686191063, "grad_norm": 1.3368240594863892, "learning_rate": 9.839026315789475e-05, "loss": 0.4986, "step": 26129 }, { "epoch": 1.4632097659312353, "grad_norm": 1.1114985942840576, "learning_rate": 9.839e-05, "loss": 0.4899, "step": 26130 }, { "epoch": 1.4632657632433643, "grad_norm": 1.4967657327651978, "learning_rate": 9.838973684210526e-05, "loss": 0.419, "step": 26131 }, { "epoch": 1.4633217605554933, "grad_norm": 1.5984803438186646, "learning_rate": 9.838947368421053e-05, "loss": 0.4878, "step": 26132 }, { "epoch": 1.4633777578676224, "grad_norm": 1.439347505569458, "learning_rate": 9.838921052631579e-05, "loss": 0.4475, "step": 26133 }, { "epoch": 1.4634337551797514, "grad_norm": 1.2261884212493896, "learning_rate": 9.838894736842106e-05, "loss": 0.434, "step": 26134 }, { "epoch": 1.4634897524918804, "grad_norm": 1.3100343942642212, "learning_rate": 9.838868421052631e-05, "loss": 0.5021, "step": 26135 }, { "epoch": 1.4635457498040094, "grad_norm": 1.1510303020477295, "learning_rate": 9.838842105263158e-05, "loss": 0.4006, "step": 26136 }, { "epoch": 1.4636017471161384, "grad_norm": 1.1804864406585693, "learning_rate": 9.838815789473684e-05, "loss": 0.4323, "step": 26137 }, { "epoch": 1.4636577444282675, "grad_norm": 1.1206936836242676, "learning_rate": 9.838789473684212e-05, "loss": 0.4375, "step": 26138 }, { "epoch": 1.4637137417403965, "grad_norm": 1.62826669216156, "learning_rate": 9.838763157894738e-05, "loss": 0.5586, "step": 26139 }, { "epoch": 1.4637697390525255, "grad_norm": 1.252765417098999, "learning_rate": 9.838736842105264e-05, "loss": 0.4176, "step": 26140 }, { "epoch": 1.4638257363646545, "grad_norm": 1.4438307285308838, "learning_rate": 9.83871052631579e-05, "loss": 0.5453, "step": 26141 }, { "epoch": 1.4638817336767835, "grad_norm": 1.2927666902542114, "learning_rate": 9.838684210526317e-05, "loss": 0.4684, "step": 26142 }, { "epoch": 1.4639377309889126, "grad_norm": 1.4094184637069702, "learning_rate": 9.838657894736843e-05, "loss": 0.4477, "step": 26143 }, { "epoch": 1.4639937283010416, "grad_norm": 1.346919059753418, "learning_rate": 9.838631578947369e-05, "loss": 0.4416, "step": 26144 }, { "epoch": 1.4640497256131706, "grad_norm": 1.358778953552246, "learning_rate": 9.838605263157895e-05, "loss": 0.4921, "step": 26145 }, { "epoch": 1.4641057229252996, "grad_norm": 1.0951234102249146, "learning_rate": 9.838578947368422e-05, "loss": 0.4282, "step": 26146 }, { "epoch": 1.4641617202374286, "grad_norm": 1.1184499263763428, "learning_rate": 9.838552631578948e-05, "loss": 0.4633, "step": 26147 }, { "epoch": 1.4642177175495577, "grad_norm": 2.312195062637329, "learning_rate": 9.838526315789474e-05, "loss": 0.4097, "step": 26148 }, { "epoch": 1.4642737148616867, "grad_norm": 1.454043984413147, "learning_rate": 9.8385e-05, "loss": 0.4175, "step": 26149 }, { "epoch": 1.4643297121738157, "grad_norm": 1.3082998991012573, "learning_rate": 9.838473684210526e-05, "loss": 0.5793, "step": 26150 }, { "epoch": 1.4643857094859447, "grad_norm": 1.2758334875106812, "learning_rate": 9.838447368421053e-05, "loss": 0.4464, "step": 26151 }, { "epoch": 1.4644417067980737, "grad_norm": 1.262650728225708, "learning_rate": 9.83842105263158e-05, "loss": 0.4955, "step": 26152 }, { "epoch": 1.4644977041102027, "grad_norm": 1.4613919258117676, "learning_rate": 9.838394736842105e-05, "loss": 0.4926, "step": 26153 }, { "epoch": 1.4645537014223318, "grad_norm": 1.3332149982452393, "learning_rate": 9.838368421052631e-05, "loss": 0.3976, "step": 26154 }, { "epoch": 1.4646096987344608, "grad_norm": 2.993288278579712, "learning_rate": 9.838342105263159e-05, "loss": 0.5283, "step": 26155 }, { "epoch": 1.4646656960465898, "grad_norm": 1.2900582551956177, "learning_rate": 9.838315789473685e-05, "loss": 0.4609, "step": 26156 }, { "epoch": 1.4647216933587188, "grad_norm": 1.1441998481750488, "learning_rate": 9.838289473684212e-05, "loss": 0.4822, "step": 26157 }, { "epoch": 1.4647776906708478, "grad_norm": 2.2140674591064453, "learning_rate": 9.838263157894737e-05, "loss": 0.3641, "step": 26158 }, { "epoch": 1.4648336879829769, "grad_norm": 1.6417720317840576, "learning_rate": 9.838236842105264e-05, "loss": 0.474, "step": 26159 }, { "epoch": 1.4648896852951059, "grad_norm": 1.185815691947937, "learning_rate": 9.83821052631579e-05, "loss": 0.3657, "step": 26160 }, { "epoch": 1.464945682607235, "grad_norm": 1.2073918581008911, "learning_rate": 9.838184210526317e-05, "loss": 0.4163, "step": 26161 }, { "epoch": 1.465001679919364, "grad_norm": 1.1560301780700684, "learning_rate": 9.838157894736842e-05, "loss": 0.4289, "step": 26162 }, { "epoch": 1.465057677231493, "grad_norm": 1.4747960567474365, "learning_rate": 9.838131578947369e-05, "loss": 0.5165, "step": 26163 }, { "epoch": 1.465113674543622, "grad_norm": 1.760401725769043, "learning_rate": 9.838105263157895e-05, "loss": 0.5872, "step": 26164 }, { "epoch": 1.465169671855751, "grad_norm": 1.168134093284607, "learning_rate": 9.838078947368422e-05, "loss": 0.3327, "step": 26165 }, { "epoch": 1.46522566916788, "grad_norm": 1.3292665481567383, "learning_rate": 9.838052631578948e-05, "loss": 0.5577, "step": 26166 }, { "epoch": 1.465281666480009, "grad_norm": 1.2612038850784302, "learning_rate": 9.838026315789473e-05, "loss": 0.4686, "step": 26167 }, { "epoch": 1.465337663792138, "grad_norm": 1.4005955457687378, "learning_rate": 9.838e-05, "loss": 0.4274, "step": 26168 }, { "epoch": 1.465393661104267, "grad_norm": 1.398913025856018, "learning_rate": 9.837973684210526e-05, "loss": 0.4616, "step": 26169 }, { "epoch": 1.465449658416396, "grad_norm": 3.524045944213867, "learning_rate": 9.837947368421054e-05, "loss": 0.4242, "step": 26170 }, { "epoch": 1.465505655728525, "grad_norm": 1.2521146535873413, "learning_rate": 9.83792105263158e-05, "loss": 0.3885, "step": 26171 }, { "epoch": 1.465561653040654, "grad_norm": 1.2548400163650513, "learning_rate": 9.837894736842106e-05, "loss": 0.4356, "step": 26172 }, { "epoch": 1.4656176503527831, "grad_norm": 1.3966028690338135, "learning_rate": 9.837868421052632e-05, "loss": 0.5344, "step": 26173 }, { "epoch": 1.4656736476649121, "grad_norm": 1.1401585340499878, "learning_rate": 9.837842105263159e-05, "loss": 0.4329, "step": 26174 }, { "epoch": 1.4657296449770412, "grad_norm": 1.516364336013794, "learning_rate": 9.837815789473685e-05, "loss": 0.4669, "step": 26175 }, { "epoch": 1.4657856422891702, "grad_norm": 1.3908380270004272, "learning_rate": 9.837789473684211e-05, "loss": 0.5796, "step": 26176 }, { "epoch": 1.4658416396012992, "grad_norm": 1.1535483598709106, "learning_rate": 9.837763157894737e-05, "loss": 0.3513, "step": 26177 }, { "epoch": 1.4658976369134282, "grad_norm": 1.364871621131897, "learning_rate": 9.837736842105264e-05, "loss": 0.6552, "step": 26178 }, { "epoch": 1.4659536342255572, "grad_norm": 1.3948826789855957, "learning_rate": 9.83771052631579e-05, "loss": 0.5461, "step": 26179 }, { "epoch": 1.4660096315376863, "grad_norm": 1.451411485671997, "learning_rate": 9.837684210526316e-05, "loss": 0.5771, "step": 26180 }, { "epoch": 1.4660656288498153, "grad_norm": 1.616327166557312, "learning_rate": 9.837657894736842e-05, "loss": 0.4818, "step": 26181 }, { "epoch": 1.4661216261619443, "grad_norm": 1.4954359531402588, "learning_rate": 9.83763157894737e-05, "loss": 0.3666, "step": 26182 }, { "epoch": 1.4661776234740733, "grad_norm": 2.1145246028900146, "learning_rate": 9.837605263157895e-05, "loss": 0.5932, "step": 26183 }, { "epoch": 1.4662336207862023, "grad_norm": 1.4395397901535034, "learning_rate": 9.837578947368421e-05, "loss": 0.3956, "step": 26184 }, { "epoch": 1.4662896180983314, "grad_norm": 1.270071268081665, "learning_rate": 9.837552631578947e-05, "loss": 0.4866, "step": 26185 }, { "epoch": 1.4663456154104604, "grad_norm": 1.183920979499817, "learning_rate": 9.837526315789473e-05, "loss": 0.4086, "step": 26186 }, { "epoch": 1.4664016127225894, "grad_norm": 2.1491172313690186, "learning_rate": 9.8375e-05, "loss": 0.619, "step": 26187 }, { "epoch": 1.4664576100347184, "grad_norm": 1.3373596668243408, "learning_rate": 9.837473684210527e-05, "loss": 0.425, "step": 26188 }, { "epoch": 1.4665136073468474, "grad_norm": 1.3180291652679443, "learning_rate": 9.837447368421054e-05, "loss": 0.4633, "step": 26189 }, { "epoch": 1.4665696046589765, "grad_norm": 1.2733041048049927, "learning_rate": 9.837421052631579e-05, "loss": 0.5698, "step": 26190 }, { "epoch": 1.4666256019711055, "grad_norm": 1.4104869365692139, "learning_rate": 9.837394736842106e-05, "loss": 0.6728, "step": 26191 }, { "epoch": 1.4666815992832345, "grad_norm": 1.3825445175170898, "learning_rate": 9.837368421052632e-05, "loss": 0.4538, "step": 26192 }, { "epoch": 1.4667375965953635, "grad_norm": 1.294661283493042, "learning_rate": 9.837342105263159e-05, "loss": 0.4541, "step": 26193 }, { "epoch": 1.4667935939074925, "grad_norm": 1.3568086624145508, "learning_rate": 9.837315789473685e-05, "loss": 0.5706, "step": 26194 }, { "epoch": 1.4668495912196216, "grad_norm": 2.1935811042785645, "learning_rate": 9.837289473684211e-05, "loss": 0.5933, "step": 26195 }, { "epoch": 1.4669055885317506, "grad_norm": 1.3590644598007202, "learning_rate": 9.837263157894737e-05, "loss": 0.5384, "step": 26196 }, { "epoch": 1.4669615858438796, "grad_norm": 1.1283819675445557, "learning_rate": 9.837236842105264e-05, "loss": 0.4398, "step": 26197 }, { "epoch": 1.4670175831560086, "grad_norm": 1.3206721544265747, "learning_rate": 9.83721052631579e-05, "loss": 0.4794, "step": 26198 }, { "epoch": 1.4670735804681376, "grad_norm": 1.4902489185333252, "learning_rate": 9.837184210526316e-05, "loss": 0.5192, "step": 26199 }, { "epoch": 1.4671295777802666, "grad_norm": 1.4239227771759033, "learning_rate": 9.837157894736842e-05, "loss": 0.4689, "step": 26200 }, { "epoch": 1.4671855750923957, "grad_norm": 1.2795462608337402, "learning_rate": 9.837131578947368e-05, "loss": 0.428, "step": 26201 }, { "epoch": 1.4672415724045247, "grad_norm": 1.591731309890747, "learning_rate": 9.837105263157896e-05, "loss": 0.4965, "step": 26202 }, { "epoch": 1.4672975697166537, "grad_norm": 1.4575308561325073, "learning_rate": 9.837078947368422e-05, "loss": 0.4823, "step": 26203 }, { "epoch": 1.4673535670287827, "grad_norm": 1.3647435903549194, "learning_rate": 9.837052631578948e-05, "loss": 0.3855, "step": 26204 }, { "epoch": 1.4674095643409117, "grad_norm": 1.602904200553894, "learning_rate": 9.837026315789474e-05, "loss": 0.5449, "step": 26205 }, { "epoch": 1.4674655616530408, "grad_norm": 1.2654188871383667, "learning_rate": 9.837000000000001e-05, "loss": 0.4398, "step": 26206 }, { "epoch": 1.4675215589651698, "grad_norm": 1.1874247789382935, "learning_rate": 9.836973684210527e-05, "loss": 0.3172, "step": 26207 }, { "epoch": 1.4675775562772988, "grad_norm": 1.2212897539138794, "learning_rate": 9.836947368421053e-05, "loss": 0.4092, "step": 26208 }, { "epoch": 1.4676335535894278, "grad_norm": 1.4444659948349, "learning_rate": 9.836921052631579e-05, "loss": 0.4105, "step": 26209 }, { "epoch": 1.4676895509015568, "grad_norm": 1.3629571199417114, "learning_rate": 9.836894736842106e-05, "loss": 0.6394, "step": 26210 }, { "epoch": 1.4677455482136859, "grad_norm": 1.2981512546539307, "learning_rate": 9.836868421052632e-05, "loss": 0.5366, "step": 26211 }, { "epoch": 1.4678015455258149, "grad_norm": 1.6782872676849365, "learning_rate": 9.83684210526316e-05, "loss": 0.5427, "step": 26212 }, { "epoch": 1.467857542837944, "grad_norm": 1.6365340948104858, "learning_rate": 9.836815789473684e-05, "loss": 0.494, "step": 26213 }, { "epoch": 1.467913540150073, "grad_norm": 1.3263881206512451, "learning_rate": 9.836789473684211e-05, "loss": 0.4234, "step": 26214 }, { "epoch": 1.467969537462202, "grad_norm": 1.6355187892913818, "learning_rate": 9.836763157894737e-05, "loss": 0.6058, "step": 26215 }, { "epoch": 1.468025534774331, "grad_norm": 1.500312328338623, "learning_rate": 9.836736842105265e-05, "loss": 0.4829, "step": 26216 }, { "epoch": 1.46808153208646, "grad_norm": 1.261370301246643, "learning_rate": 9.836710526315789e-05, "loss": 0.5173, "step": 26217 }, { "epoch": 1.468137529398589, "grad_norm": 1.103760004043579, "learning_rate": 9.836684210526315e-05, "loss": 0.4576, "step": 26218 }, { "epoch": 1.468193526710718, "grad_norm": 1.3855925798416138, "learning_rate": 9.836657894736843e-05, "loss": 0.4705, "step": 26219 }, { "epoch": 1.468249524022847, "grad_norm": 1.7272679805755615, "learning_rate": 9.836631578947369e-05, "loss": 0.4376, "step": 26220 }, { "epoch": 1.468305521334976, "grad_norm": 1.2779830694198608, "learning_rate": 9.836605263157896e-05, "loss": 0.558, "step": 26221 }, { "epoch": 1.468361518647105, "grad_norm": 1.3490245342254639, "learning_rate": 9.83657894736842e-05, "loss": 0.4422, "step": 26222 }, { "epoch": 1.468417515959234, "grad_norm": 1.7253708839416504, "learning_rate": 9.836552631578948e-05, "loss": 0.5349, "step": 26223 }, { "epoch": 1.468473513271363, "grad_norm": 1.2533903121948242, "learning_rate": 9.836526315789474e-05, "loss": 0.489, "step": 26224 }, { "epoch": 1.4685295105834921, "grad_norm": 1.0124671459197998, "learning_rate": 9.836500000000001e-05, "loss": 0.3777, "step": 26225 }, { "epoch": 1.4685855078956211, "grad_norm": 1.3425922393798828, "learning_rate": 9.836473684210527e-05, "loss": 0.4844, "step": 26226 }, { "epoch": 1.4686415052077502, "grad_norm": 1.3471434116363525, "learning_rate": 9.836447368421053e-05, "loss": 0.4589, "step": 26227 }, { "epoch": 1.4686975025198792, "grad_norm": 1.4200447797775269, "learning_rate": 9.836421052631579e-05, "loss": 0.4118, "step": 26228 }, { "epoch": 1.468753499832008, "grad_norm": 1.3445628881454468, "learning_rate": 9.836394736842106e-05, "loss": 0.4383, "step": 26229 }, { "epoch": 1.468809497144137, "grad_norm": 1.3946771621704102, "learning_rate": 9.836368421052632e-05, "loss": 0.4094, "step": 26230 }, { "epoch": 1.468865494456266, "grad_norm": 1.3378466367721558, "learning_rate": 9.836342105263158e-05, "loss": 0.4681, "step": 26231 }, { "epoch": 1.468921491768395, "grad_norm": 1.1821906566619873, "learning_rate": 9.836315789473684e-05, "loss": 0.3714, "step": 26232 }, { "epoch": 1.468977489080524, "grad_norm": 1.356821894645691, "learning_rate": 9.836289473684212e-05, "loss": 0.5454, "step": 26233 }, { "epoch": 1.469033486392653, "grad_norm": 1.5115985870361328, "learning_rate": 9.836263157894738e-05, "loss": 0.5112, "step": 26234 }, { "epoch": 1.469089483704782, "grad_norm": 1.245773196220398, "learning_rate": 9.836236842105264e-05, "loss": 0.4485, "step": 26235 }, { "epoch": 1.4691454810169111, "grad_norm": 1.9896646738052368, "learning_rate": 9.83621052631579e-05, "loss": 0.6119, "step": 26236 }, { "epoch": 1.4692014783290401, "grad_norm": 1.217750072479248, "learning_rate": 9.836184210526315e-05, "loss": 0.4095, "step": 26237 }, { "epoch": 1.4692574756411692, "grad_norm": 1.9007140398025513, "learning_rate": 9.836157894736843e-05, "loss": 0.4894, "step": 26238 }, { "epoch": 1.4693134729532982, "grad_norm": 1.4336644411087036, "learning_rate": 9.836131578947369e-05, "loss": 0.4557, "step": 26239 }, { "epoch": 1.4693694702654272, "grad_norm": 1.2235615253448486, "learning_rate": 9.836105263157895e-05, "loss": 0.4204, "step": 26240 }, { "epoch": 1.4694254675775562, "grad_norm": 1.5203443765640259, "learning_rate": 9.836078947368421e-05, "loss": 0.6011, "step": 26241 }, { "epoch": 1.4694814648896852, "grad_norm": 1.2648966312408447, "learning_rate": 9.836052631578948e-05, "loss": 0.5083, "step": 26242 }, { "epoch": 1.4695374622018142, "grad_norm": 1.2400827407836914, "learning_rate": 9.836026315789474e-05, "loss": 0.5313, "step": 26243 }, { "epoch": 1.4695934595139433, "grad_norm": 1.233296513557434, "learning_rate": 9.836000000000001e-05, "loss": 0.4499, "step": 26244 }, { "epoch": 1.4696494568260723, "grad_norm": 1.275831937789917, "learning_rate": 9.835973684210526e-05, "loss": 0.539, "step": 26245 }, { "epoch": 1.4697054541382013, "grad_norm": 1.2991141080856323, "learning_rate": 9.835947368421053e-05, "loss": 0.502, "step": 26246 }, { "epoch": 1.4697614514503303, "grad_norm": 1.430525541305542, "learning_rate": 9.835921052631579e-05, "loss": 0.489, "step": 26247 }, { "epoch": 1.4698174487624593, "grad_norm": 1.268449068069458, "learning_rate": 9.835894736842107e-05, "loss": 0.3601, "step": 26248 }, { "epoch": 1.4698734460745884, "grad_norm": 1.5674430131912231, "learning_rate": 9.835868421052633e-05, "loss": 0.6267, "step": 26249 }, { "epoch": 1.4699294433867174, "grad_norm": 1.63973069190979, "learning_rate": 9.835842105263159e-05, "loss": 0.5781, "step": 26250 }, { "epoch": 1.4699854406988464, "grad_norm": 1.3429597616195679, "learning_rate": 9.835815789473685e-05, "loss": 0.3855, "step": 26251 }, { "epoch": 1.4700414380109754, "grad_norm": 1.366430640220642, "learning_rate": 9.83578947368421e-05, "loss": 0.416, "step": 26252 }, { "epoch": 1.4700974353231044, "grad_norm": 1.4533041715621948, "learning_rate": 9.835763157894738e-05, "loss": 0.45, "step": 26253 }, { "epoch": 1.4701534326352335, "grad_norm": 1.2154712677001953, "learning_rate": 9.835736842105264e-05, "loss": 0.4355, "step": 26254 }, { "epoch": 1.4702094299473625, "grad_norm": 1.096551537513733, "learning_rate": 9.83571052631579e-05, "loss": 0.4278, "step": 26255 }, { "epoch": 1.4702654272594915, "grad_norm": 1.5150096416473389, "learning_rate": 9.835684210526316e-05, "loss": 0.4331, "step": 26256 }, { "epoch": 1.4703214245716205, "grad_norm": 1.4409756660461426, "learning_rate": 9.835657894736843e-05, "loss": 0.4369, "step": 26257 }, { "epoch": 1.4703774218837495, "grad_norm": 1.529038667678833, "learning_rate": 9.835631578947369e-05, "loss": 0.5506, "step": 26258 }, { "epoch": 1.4704334191958786, "grad_norm": 1.58342444896698, "learning_rate": 9.835605263157895e-05, "loss": 0.4856, "step": 26259 }, { "epoch": 1.4704894165080076, "grad_norm": 1.3333591222763062, "learning_rate": 9.835578947368421e-05, "loss": 0.4603, "step": 26260 }, { "epoch": 1.4705454138201366, "grad_norm": 1.2099251747131348, "learning_rate": 9.835552631578948e-05, "loss": 0.402, "step": 26261 }, { "epoch": 1.4706014111322656, "grad_norm": 1.2804086208343506, "learning_rate": 9.835526315789474e-05, "loss": 0.3806, "step": 26262 }, { "epoch": 1.4706574084443946, "grad_norm": 1.2557204961776733, "learning_rate": 9.8355e-05, "loss": 0.383, "step": 26263 }, { "epoch": 1.4707134057565237, "grad_norm": 1.328728199005127, "learning_rate": 9.835473684210526e-05, "loss": 0.6845, "step": 26264 }, { "epoch": 1.4707694030686527, "grad_norm": 1.572045922279358, "learning_rate": 9.835447368421054e-05, "loss": 0.4997, "step": 26265 }, { "epoch": 1.4708254003807817, "grad_norm": 1.8102408647537231, "learning_rate": 9.83542105263158e-05, "loss": 0.5642, "step": 26266 }, { "epoch": 1.4708813976929107, "grad_norm": 1.502687692642212, "learning_rate": 9.835394736842107e-05, "loss": 0.5193, "step": 26267 }, { "epoch": 1.4709373950050397, "grad_norm": 1.3405128717422485, "learning_rate": 9.835368421052631e-05, "loss": 0.4186, "step": 26268 }, { "epoch": 1.4709933923171687, "grad_norm": 1.430985450744629, "learning_rate": 9.835342105263159e-05, "loss": 0.7133, "step": 26269 }, { "epoch": 1.4710493896292978, "grad_norm": 1.7925950288772583, "learning_rate": 9.835315789473685e-05, "loss": 0.4835, "step": 26270 }, { "epoch": 1.4711053869414268, "grad_norm": 1.115112543106079, "learning_rate": 9.835289473684211e-05, "loss": 0.4697, "step": 26271 }, { "epoch": 1.4711613842535558, "grad_norm": 1.8055635690689087, "learning_rate": 9.835263157894737e-05, "loss": 0.6008, "step": 26272 }, { "epoch": 1.4712173815656848, "grad_norm": 1.398441195487976, "learning_rate": 9.835236842105263e-05, "loss": 0.4506, "step": 26273 }, { "epoch": 1.4712733788778138, "grad_norm": 1.3960886001586914, "learning_rate": 9.83521052631579e-05, "loss": 0.5928, "step": 26274 }, { "epoch": 1.4713293761899429, "grad_norm": 1.3009015321731567, "learning_rate": 9.835184210526316e-05, "loss": 0.4612, "step": 26275 }, { "epoch": 1.4713853735020719, "grad_norm": 1.52046537399292, "learning_rate": 9.835157894736843e-05, "loss": 0.5243, "step": 26276 }, { "epoch": 1.471441370814201, "grad_norm": 1.382170557975769, "learning_rate": 9.835131578947368e-05, "loss": 0.5536, "step": 26277 }, { "epoch": 1.47149736812633, "grad_norm": 1.4639800786972046, "learning_rate": 9.835105263157895e-05, "loss": 0.4572, "step": 26278 }, { "epoch": 1.471553365438459, "grad_norm": 1.494409203529358, "learning_rate": 9.835078947368421e-05, "loss": 0.4796, "step": 26279 }, { "epoch": 1.471609362750588, "grad_norm": 1.181498646736145, "learning_rate": 9.835052631578949e-05, "loss": 0.4657, "step": 26280 }, { "epoch": 1.471665360062717, "grad_norm": 1.3346881866455078, "learning_rate": 9.835026315789475e-05, "loss": 0.5247, "step": 26281 }, { "epoch": 1.471721357374846, "grad_norm": 1.2633123397827148, "learning_rate": 9.835e-05, "loss": 0.3755, "step": 26282 }, { "epoch": 1.471777354686975, "grad_norm": 1.2065980434417725, "learning_rate": 9.834973684210527e-05, "loss": 0.5416, "step": 26283 }, { "epoch": 1.471833351999104, "grad_norm": 1.511853814125061, "learning_rate": 9.834947368421054e-05, "loss": 0.5346, "step": 26284 }, { "epoch": 1.471889349311233, "grad_norm": 1.6110639572143555, "learning_rate": 9.83492105263158e-05, "loss": 0.5677, "step": 26285 }, { "epoch": 1.471945346623362, "grad_norm": 1.2345631122589111, "learning_rate": 9.834894736842106e-05, "loss": 0.6015, "step": 26286 }, { "epoch": 1.472001343935491, "grad_norm": 1.659584879875183, "learning_rate": 9.834868421052632e-05, "loss": 0.4492, "step": 26287 }, { "epoch": 1.47205734124762, "grad_norm": 1.4842394590377808, "learning_rate": 9.834842105263158e-05, "loss": 0.5161, "step": 26288 }, { "epoch": 1.4721133385597491, "grad_norm": 1.5364598035812378, "learning_rate": 9.834815789473685e-05, "loss": 0.5089, "step": 26289 }, { "epoch": 1.4721693358718781, "grad_norm": 1.1319185495376587, "learning_rate": 9.834789473684211e-05, "loss": 0.3774, "step": 26290 }, { "epoch": 1.4722253331840072, "grad_norm": 1.2564584016799927, "learning_rate": 9.834763157894737e-05, "loss": 0.4365, "step": 26291 }, { "epoch": 1.4722813304961362, "grad_norm": 1.386336088180542, "learning_rate": 9.834736842105263e-05, "loss": 0.4626, "step": 26292 }, { "epoch": 1.4723373278082652, "grad_norm": 1.2889857292175293, "learning_rate": 9.83471052631579e-05, "loss": 0.469, "step": 26293 }, { "epoch": 1.4723933251203942, "grad_norm": 2.604438304901123, "learning_rate": 9.834684210526316e-05, "loss": 0.4053, "step": 26294 }, { "epoch": 1.4724493224325232, "grad_norm": 1.454038381576538, "learning_rate": 9.834657894736842e-05, "loss": 0.6295, "step": 26295 }, { "epoch": 1.4725053197446523, "grad_norm": 1.471243143081665, "learning_rate": 9.834631578947368e-05, "loss": 0.4956, "step": 26296 }, { "epoch": 1.4725613170567813, "grad_norm": 1.1576184034347534, "learning_rate": 9.834605263157896e-05, "loss": 0.4578, "step": 26297 }, { "epoch": 1.4726173143689103, "grad_norm": 1.247467279434204, "learning_rate": 9.834578947368422e-05, "loss": 0.4447, "step": 26298 }, { "epoch": 1.4726733116810393, "grad_norm": 1.2596161365509033, "learning_rate": 9.834552631578949e-05, "loss": 0.4248, "step": 26299 }, { "epoch": 1.4727293089931683, "grad_norm": 1.3921743631362915, "learning_rate": 9.834526315789473e-05, "loss": 0.4215, "step": 26300 }, { "epoch": 1.4727853063052974, "grad_norm": 1.458272099494934, "learning_rate": 9.834500000000001e-05, "loss": 0.4621, "step": 26301 }, { "epoch": 1.4728413036174264, "grad_norm": 1.153385043144226, "learning_rate": 9.834473684210527e-05, "loss": 0.4612, "step": 26302 }, { "epoch": 1.4728973009295554, "grad_norm": 1.3927161693572998, "learning_rate": 9.834447368421054e-05, "loss": 0.4447, "step": 26303 }, { "epoch": 1.4729532982416844, "grad_norm": 1.2911595106124878, "learning_rate": 9.83442105263158e-05, "loss": 0.4224, "step": 26304 }, { "epoch": 1.4730092955538134, "grad_norm": 1.290352702140808, "learning_rate": 9.834394736842105e-05, "loss": 0.3922, "step": 26305 }, { "epoch": 1.4730652928659425, "grad_norm": 1.5228631496429443, "learning_rate": 9.834368421052632e-05, "loss": 0.4679, "step": 26306 }, { "epoch": 1.4731212901780715, "grad_norm": 1.3852399587631226, "learning_rate": 9.834342105263158e-05, "loss": 0.3971, "step": 26307 }, { "epoch": 1.4731772874902005, "grad_norm": 1.2399355173110962, "learning_rate": 9.834315789473685e-05, "loss": 0.4984, "step": 26308 }, { "epoch": 1.4732332848023295, "grad_norm": 1.2135710716247559, "learning_rate": 9.83428947368421e-05, "loss": 0.4384, "step": 26309 }, { "epoch": 1.4732892821144585, "grad_norm": 1.644197940826416, "learning_rate": 9.834263157894737e-05, "loss": 0.5053, "step": 26310 }, { "epoch": 1.4733452794265876, "grad_norm": 1.3441396951675415, "learning_rate": 9.834236842105263e-05, "loss": 0.3923, "step": 26311 }, { "epoch": 1.4734012767387166, "grad_norm": 1.3883205652236938, "learning_rate": 9.83421052631579e-05, "loss": 0.6124, "step": 26312 }, { "epoch": 1.4734572740508456, "grad_norm": 1.3514333963394165, "learning_rate": 9.834184210526317e-05, "loss": 0.5058, "step": 26313 }, { "epoch": 1.4735132713629746, "grad_norm": 1.5079749822616577, "learning_rate": 9.834157894736843e-05, "loss": 0.5274, "step": 26314 }, { "epoch": 1.4735692686751036, "grad_norm": 1.2684351205825806, "learning_rate": 9.834131578947368e-05, "loss": 0.4921, "step": 26315 }, { "epoch": 1.4736252659872326, "grad_norm": 1.300086498260498, "learning_rate": 9.834105263157896e-05, "loss": 0.4784, "step": 26316 }, { "epoch": 1.4736812632993617, "grad_norm": 1.300095796585083, "learning_rate": 9.834078947368422e-05, "loss": 0.4917, "step": 26317 }, { "epoch": 1.4737372606114907, "grad_norm": 1.8674501180648804, "learning_rate": 9.834052631578948e-05, "loss": 0.5852, "step": 26318 }, { "epoch": 1.4737932579236197, "grad_norm": 1.654843807220459, "learning_rate": 9.834026315789474e-05, "loss": 0.5459, "step": 26319 }, { "epoch": 1.4738492552357487, "grad_norm": 1.3569914102554321, "learning_rate": 9.834000000000001e-05, "loss": 0.4422, "step": 26320 }, { "epoch": 1.4739052525478777, "grad_norm": 1.52809739112854, "learning_rate": 9.833973684210527e-05, "loss": 0.2912, "step": 26321 }, { "epoch": 1.4739612498600068, "grad_norm": 1.3407853841781616, "learning_rate": 9.833947368421053e-05, "loss": 0.4849, "step": 26322 }, { "epoch": 1.4740172471721358, "grad_norm": 1.2122225761413574, "learning_rate": 9.833921052631579e-05, "loss": 0.3636, "step": 26323 }, { "epoch": 1.4740732444842648, "grad_norm": 1.421341061592102, "learning_rate": 9.833894736842105e-05, "loss": 0.4648, "step": 26324 }, { "epoch": 1.4741292417963938, "grad_norm": 1.3635461330413818, "learning_rate": 9.833868421052632e-05, "loss": 0.3847, "step": 26325 }, { "epoch": 1.4741852391085228, "grad_norm": 1.5558712482452393, "learning_rate": 9.833842105263158e-05, "loss": 0.7002, "step": 26326 }, { "epoch": 1.4742412364206519, "grad_norm": 1.379497766494751, "learning_rate": 9.833815789473684e-05, "loss": 0.5231, "step": 26327 }, { "epoch": 1.4742972337327809, "grad_norm": 1.337708830833435, "learning_rate": 9.83378947368421e-05, "loss": 0.3953, "step": 26328 }, { "epoch": 1.47435323104491, "grad_norm": 1.4808558225631714, "learning_rate": 9.833763157894738e-05, "loss": 0.4949, "step": 26329 }, { "epoch": 1.474409228357039, "grad_norm": 1.286215901374817, "learning_rate": 9.833736842105263e-05, "loss": 0.352, "step": 26330 }, { "epoch": 1.474465225669168, "grad_norm": 1.3624666929244995, "learning_rate": 9.833710526315791e-05, "loss": 0.5038, "step": 26331 }, { "epoch": 1.474521222981297, "grad_norm": 1.7593283653259277, "learning_rate": 9.833684210526315e-05, "loss": 0.5098, "step": 26332 }, { "epoch": 1.474577220293426, "grad_norm": 1.313141107559204, "learning_rate": 9.833657894736843e-05, "loss": 0.4492, "step": 26333 }, { "epoch": 1.474633217605555, "grad_norm": 1.3633440732955933, "learning_rate": 9.833631578947369e-05, "loss": 0.3685, "step": 26334 }, { "epoch": 1.474689214917684, "grad_norm": 1.2808122634887695, "learning_rate": 9.833605263157896e-05, "loss": 0.4479, "step": 26335 }, { "epoch": 1.474745212229813, "grad_norm": 1.3971952199935913, "learning_rate": 9.833578947368422e-05, "loss": 0.4375, "step": 26336 }, { "epoch": 1.474801209541942, "grad_norm": 1.2194265127182007, "learning_rate": 9.833552631578948e-05, "loss": 0.4314, "step": 26337 }, { "epoch": 1.474857206854071, "grad_norm": 1.1891316175460815, "learning_rate": 9.833526315789474e-05, "loss": 0.3928, "step": 26338 }, { "epoch": 1.4749132041662, "grad_norm": 1.1536589860916138, "learning_rate": 9.8335e-05, "loss": 0.4065, "step": 26339 }, { "epoch": 1.474969201478329, "grad_norm": 1.2504948377609253, "learning_rate": 9.833473684210527e-05, "loss": 0.4255, "step": 26340 }, { "epoch": 1.4750251987904581, "grad_norm": 1.4145692586898804, "learning_rate": 9.833447368421053e-05, "loss": 0.4366, "step": 26341 }, { "epoch": 1.4750811961025871, "grad_norm": 1.2258678674697876, "learning_rate": 9.833421052631579e-05, "loss": 0.5979, "step": 26342 }, { "epoch": 1.4751371934147162, "grad_norm": 1.4275203943252563, "learning_rate": 9.833394736842105e-05, "loss": 0.4396, "step": 26343 }, { "epoch": 1.4751931907268452, "grad_norm": 1.2004852294921875, "learning_rate": 9.833368421052633e-05, "loss": 0.3964, "step": 26344 }, { "epoch": 1.4752491880389742, "grad_norm": 1.274658441543579, "learning_rate": 9.833342105263159e-05, "loss": 0.4803, "step": 26345 }, { "epoch": 1.4753051853511032, "grad_norm": 1.2332416772842407, "learning_rate": 9.833315789473684e-05, "loss": 0.4954, "step": 26346 }, { "epoch": 1.4753611826632322, "grad_norm": 1.2086886167526245, "learning_rate": 9.83328947368421e-05, "loss": 0.4858, "step": 26347 }, { "epoch": 1.4754171799753613, "grad_norm": 1.24374520778656, "learning_rate": 9.833263157894738e-05, "loss": 0.4812, "step": 26348 }, { "epoch": 1.4754731772874903, "grad_norm": 1.462717890739441, "learning_rate": 9.833236842105264e-05, "loss": 0.4936, "step": 26349 }, { "epoch": 1.4755291745996193, "grad_norm": 1.3813000917434692, "learning_rate": 9.83321052631579e-05, "loss": 0.4134, "step": 26350 }, { "epoch": 1.4755851719117483, "grad_norm": 1.4574353694915771, "learning_rate": 9.833184210526316e-05, "loss": 0.5203, "step": 26351 }, { "epoch": 1.4756411692238773, "grad_norm": 11.261170387268066, "learning_rate": 9.833157894736843e-05, "loss": 0.5446, "step": 26352 }, { "epoch": 1.4756971665360061, "grad_norm": 1.5375767946243286, "learning_rate": 9.833131578947369e-05, "loss": 0.4074, "step": 26353 }, { "epoch": 1.4757531638481352, "grad_norm": 1.4761403799057007, "learning_rate": 9.833105263157896e-05, "loss": 0.4767, "step": 26354 }, { "epoch": 1.4758091611602642, "grad_norm": 1.3679126501083374, "learning_rate": 9.833078947368421e-05, "loss": 0.4278, "step": 26355 }, { "epoch": 1.4758651584723932, "grad_norm": 1.2875906229019165, "learning_rate": 9.833052631578947e-05, "loss": 0.4687, "step": 26356 }, { "epoch": 1.4759211557845222, "grad_norm": 1.0745713710784912, "learning_rate": 9.833026315789474e-05, "loss": 0.3685, "step": 26357 }, { "epoch": 1.4759771530966512, "grad_norm": 1.2158112525939941, "learning_rate": 9.833e-05, "loss": 0.4848, "step": 26358 }, { "epoch": 1.4760331504087802, "grad_norm": 1.1623754501342773, "learning_rate": 9.832973684210528e-05, "loss": 0.5242, "step": 26359 }, { "epoch": 1.4760891477209093, "grad_norm": 1.2012572288513184, "learning_rate": 9.832947368421052e-05, "loss": 0.4513, "step": 26360 }, { "epoch": 1.4761451450330383, "grad_norm": 1.6301683187484741, "learning_rate": 9.83292105263158e-05, "loss": 0.4982, "step": 26361 }, { "epoch": 1.4762011423451673, "grad_norm": 1.2660621404647827, "learning_rate": 9.832894736842105e-05, "loss": 0.5017, "step": 26362 }, { "epoch": 1.4762571396572963, "grad_norm": 1.179836630821228, "learning_rate": 9.832868421052633e-05, "loss": 0.4138, "step": 26363 }, { "epoch": 1.4763131369694253, "grad_norm": 1.2577470541000366, "learning_rate": 9.832842105263157e-05, "loss": 0.4766, "step": 26364 }, { "epoch": 1.4763691342815544, "grad_norm": 1.3299623727798462, "learning_rate": 9.832815789473685e-05, "loss": 0.5242, "step": 26365 }, { "epoch": 1.4764251315936834, "grad_norm": 1.5384547710418701, "learning_rate": 9.832789473684211e-05, "loss": 0.453, "step": 26366 }, { "epoch": 1.4764811289058124, "grad_norm": 1.502936840057373, "learning_rate": 9.832763157894738e-05, "loss": 0.4914, "step": 26367 }, { "epoch": 1.4765371262179414, "grad_norm": 1.1734130382537842, "learning_rate": 9.832736842105264e-05, "loss": 0.3136, "step": 26368 }, { "epoch": 1.4765931235300704, "grad_norm": 1.788120150566101, "learning_rate": 9.83271052631579e-05, "loss": 0.4253, "step": 26369 }, { "epoch": 1.4766491208421995, "grad_norm": 1.5197820663452148, "learning_rate": 9.832684210526316e-05, "loss": 0.5413, "step": 26370 }, { "epoch": 1.4767051181543285, "grad_norm": 2.077280282974243, "learning_rate": 9.832657894736843e-05, "loss": 0.5113, "step": 26371 }, { "epoch": 1.4767611154664575, "grad_norm": 1.2023744583129883, "learning_rate": 9.832631578947369e-05, "loss": 0.5016, "step": 26372 }, { "epoch": 1.4768171127785865, "grad_norm": 1.2389616966247559, "learning_rate": 9.832605263157895e-05, "loss": 0.4866, "step": 26373 }, { "epoch": 1.4768731100907155, "grad_norm": 1.177794337272644, "learning_rate": 9.832578947368421e-05, "loss": 0.4466, "step": 26374 }, { "epoch": 1.4769291074028446, "grad_norm": 1.428767442703247, "learning_rate": 9.832552631578947e-05, "loss": 0.4747, "step": 26375 }, { "epoch": 1.4769851047149736, "grad_norm": 1.4767810106277466, "learning_rate": 9.832526315789475e-05, "loss": 0.4311, "step": 26376 }, { "epoch": 1.4770411020271026, "grad_norm": 1.1158396005630493, "learning_rate": 9.8325e-05, "loss": 0.373, "step": 26377 }, { "epoch": 1.4770970993392316, "grad_norm": 1.7182050943374634, "learning_rate": 9.832473684210526e-05, "loss": 0.4972, "step": 26378 }, { "epoch": 1.4771530966513606, "grad_norm": 1.5233622789382935, "learning_rate": 9.832447368421052e-05, "loss": 0.4996, "step": 26379 }, { "epoch": 1.4772090939634897, "grad_norm": 1.4264438152313232, "learning_rate": 9.83242105263158e-05, "loss": 0.5008, "step": 26380 }, { "epoch": 1.4772650912756187, "grad_norm": 1.1224011182785034, "learning_rate": 9.832394736842106e-05, "loss": 0.3306, "step": 26381 }, { "epoch": 1.4773210885877477, "grad_norm": 1.9449200630187988, "learning_rate": 9.832368421052632e-05, "loss": 0.53, "step": 26382 }, { "epoch": 1.4773770858998767, "grad_norm": 1.3462685346603394, "learning_rate": 9.832342105263158e-05, "loss": 0.464, "step": 26383 }, { "epoch": 1.4774330832120057, "grad_norm": 1.302512526512146, "learning_rate": 9.832315789473685e-05, "loss": 0.3426, "step": 26384 }, { "epoch": 1.4774890805241347, "grad_norm": 1.703502893447876, "learning_rate": 9.832289473684211e-05, "loss": 0.5233, "step": 26385 }, { "epoch": 1.4775450778362638, "grad_norm": 1.4388452768325806, "learning_rate": 9.832263157894738e-05, "loss": 0.569, "step": 26386 }, { "epoch": 1.4776010751483928, "grad_norm": 1.9214638471603394, "learning_rate": 9.832236842105263e-05, "loss": 0.495, "step": 26387 }, { "epoch": 1.4776570724605218, "grad_norm": 1.1944797039031982, "learning_rate": 9.83221052631579e-05, "loss": 0.4455, "step": 26388 }, { "epoch": 1.4777130697726508, "grad_norm": 1.778193712234497, "learning_rate": 9.832184210526316e-05, "loss": 0.5706, "step": 26389 }, { "epoch": 1.4777690670847798, "grad_norm": 1.3621065616607666, "learning_rate": 9.832157894736844e-05, "loss": 0.5761, "step": 26390 }, { "epoch": 1.4778250643969089, "grad_norm": 1.1708863973617554, "learning_rate": 9.83213157894737e-05, "loss": 0.3399, "step": 26391 }, { "epoch": 1.4778810617090379, "grad_norm": 1.17164146900177, "learning_rate": 9.832105263157894e-05, "loss": 0.326, "step": 26392 }, { "epoch": 1.477937059021167, "grad_norm": 1.2393851280212402, "learning_rate": 9.832078947368421e-05, "loss": 0.4197, "step": 26393 }, { "epoch": 1.477993056333296, "grad_norm": 1.4932615756988525, "learning_rate": 9.832052631578947e-05, "loss": 0.5323, "step": 26394 }, { "epoch": 1.478049053645425, "grad_norm": 1.2480852603912354, "learning_rate": 9.832026315789475e-05, "loss": 0.461, "step": 26395 }, { "epoch": 1.478105050957554, "grad_norm": 1.423901081085205, "learning_rate": 9.832000000000001e-05, "loss": 0.3795, "step": 26396 }, { "epoch": 1.478161048269683, "grad_norm": 1.1684621572494507, "learning_rate": 9.831973684210527e-05, "loss": 0.4044, "step": 26397 }, { "epoch": 1.478217045581812, "grad_norm": 1.1378426551818848, "learning_rate": 9.831947368421053e-05, "loss": 0.4358, "step": 26398 }, { "epoch": 1.478273042893941, "grad_norm": 1.8066250085830688, "learning_rate": 9.83192105263158e-05, "loss": 0.6716, "step": 26399 }, { "epoch": 1.47832904020607, "grad_norm": 1.414628505706787, "learning_rate": 9.831894736842106e-05, "loss": 0.4991, "step": 26400 }, { "epoch": 1.478385037518199, "grad_norm": 1.4492090940475464, "learning_rate": 9.831868421052632e-05, "loss": 0.4438, "step": 26401 }, { "epoch": 1.478441034830328, "grad_norm": 1.5771111249923706, "learning_rate": 9.831842105263158e-05, "loss": 0.4732, "step": 26402 }, { "epoch": 1.478497032142457, "grad_norm": 1.8152694702148438, "learning_rate": 9.831815789473685e-05, "loss": 0.4741, "step": 26403 }, { "epoch": 1.478553029454586, "grad_norm": 1.3209187984466553, "learning_rate": 9.831789473684211e-05, "loss": 0.3829, "step": 26404 }, { "epoch": 1.4786090267667151, "grad_norm": 1.21585214138031, "learning_rate": 9.831763157894737e-05, "loss": 0.4164, "step": 26405 }, { "epoch": 1.4786650240788441, "grad_norm": 1.0698697566986084, "learning_rate": 9.831736842105263e-05, "loss": 0.349, "step": 26406 }, { "epoch": 1.4787210213909732, "grad_norm": 1.439136266708374, "learning_rate": 9.83171052631579e-05, "loss": 0.3543, "step": 26407 }, { "epoch": 1.4787770187031022, "grad_norm": 1.2398730516433716, "learning_rate": 9.831684210526316e-05, "loss": 0.5895, "step": 26408 }, { "epoch": 1.4788330160152312, "grad_norm": 1.3039520978927612, "learning_rate": 9.831657894736842e-05, "loss": 0.5787, "step": 26409 }, { "epoch": 1.4788890133273602, "grad_norm": 1.1072537899017334, "learning_rate": 9.831631578947368e-05, "loss": 0.4565, "step": 26410 }, { "epoch": 1.4789450106394892, "grad_norm": 1.2011222839355469, "learning_rate": 9.831605263157894e-05, "loss": 0.4562, "step": 26411 }, { "epoch": 1.4790010079516183, "grad_norm": 1.3780157566070557, "learning_rate": 9.831578947368422e-05, "loss": 0.4488, "step": 26412 }, { "epoch": 1.4790570052637473, "grad_norm": 1.3226121664047241, "learning_rate": 9.831552631578948e-05, "loss": 0.5458, "step": 26413 }, { "epoch": 1.4791130025758763, "grad_norm": 1.4769147634506226, "learning_rate": 9.831526315789475e-05, "loss": 0.5004, "step": 26414 }, { "epoch": 1.4791689998880053, "grad_norm": 1.467332363128662, "learning_rate": 9.8315e-05, "loss": 0.6603, "step": 26415 }, { "epoch": 1.4792249972001343, "grad_norm": 1.6566319465637207, "learning_rate": 9.831473684210527e-05, "loss": 0.5555, "step": 26416 }, { "epoch": 1.4792809945122634, "grad_norm": 1.2249643802642822, "learning_rate": 9.831447368421053e-05, "loss": 0.4686, "step": 26417 }, { "epoch": 1.4793369918243924, "grad_norm": 1.3104848861694336, "learning_rate": 9.83142105263158e-05, "loss": 0.4912, "step": 26418 }, { "epoch": 1.4793929891365214, "grad_norm": 1.394104242324829, "learning_rate": 9.831394736842105e-05, "loss": 0.5911, "step": 26419 }, { "epoch": 1.4794489864486504, "grad_norm": 1.2208060026168823, "learning_rate": 9.831368421052632e-05, "loss": 0.3924, "step": 26420 }, { "epoch": 1.4795049837607794, "grad_norm": 1.481011986732483, "learning_rate": 9.831342105263158e-05, "loss": 0.4138, "step": 26421 }, { "epoch": 1.4795609810729085, "grad_norm": 1.4936903715133667, "learning_rate": 9.831315789473686e-05, "loss": 0.6404, "step": 26422 }, { "epoch": 1.4796169783850375, "grad_norm": 1.253900408744812, "learning_rate": 9.831289473684211e-05, "loss": 0.4427, "step": 26423 }, { "epoch": 1.4796729756971665, "grad_norm": 1.721214771270752, "learning_rate": 9.831263157894737e-05, "loss": 0.4268, "step": 26424 }, { "epoch": 1.4797289730092955, "grad_norm": 1.1525635719299316, "learning_rate": 9.831236842105263e-05, "loss": 0.4156, "step": 26425 }, { "epoch": 1.4797849703214245, "grad_norm": 1.419958233833313, "learning_rate": 9.83121052631579e-05, "loss": 0.5837, "step": 26426 }, { "epoch": 1.4798409676335536, "grad_norm": 1.310727596282959, "learning_rate": 9.831184210526317e-05, "loss": 0.4129, "step": 26427 }, { "epoch": 1.4798969649456826, "grad_norm": 1.3370429277420044, "learning_rate": 9.831157894736843e-05, "loss": 0.3879, "step": 26428 }, { "epoch": 1.4799529622578116, "grad_norm": 1.1163039207458496, "learning_rate": 9.831131578947369e-05, "loss": 0.377, "step": 26429 }, { "epoch": 1.4800089595699406, "grad_norm": 1.5141627788543701, "learning_rate": 9.831105263157895e-05, "loss": 0.7203, "step": 26430 }, { "epoch": 1.4800649568820696, "grad_norm": 1.1618309020996094, "learning_rate": 9.831078947368422e-05, "loss": 0.4064, "step": 26431 }, { "epoch": 1.4801209541941986, "grad_norm": 1.1684426069259644, "learning_rate": 9.831052631578948e-05, "loss": 0.4171, "step": 26432 }, { "epoch": 1.4801769515063277, "grad_norm": 1.3729358911514282, "learning_rate": 9.831026315789474e-05, "loss": 0.4878, "step": 26433 }, { "epoch": 1.4802329488184567, "grad_norm": 1.247262716293335, "learning_rate": 9.831e-05, "loss": 0.4128, "step": 26434 }, { "epoch": 1.4802889461305857, "grad_norm": 1.4308249950408936, "learning_rate": 9.830973684210527e-05, "loss": 0.5528, "step": 26435 }, { "epoch": 1.4803449434427147, "grad_norm": 1.364248275756836, "learning_rate": 9.830947368421053e-05, "loss": 0.4859, "step": 26436 }, { "epoch": 1.4804009407548437, "grad_norm": 1.3801158666610718, "learning_rate": 9.830921052631579e-05, "loss": 0.4584, "step": 26437 }, { "epoch": 1.4804569380669728, "grad_norm": 1.2886649370193481, "learning_rate": 9.830894736842105e-05, "loss": 0.5631, "step": 26438 }, { "epoch": 1.4805129353791018, "grad_norm": 1.2724788188934326, "learning_rate": 9.830868421052632e-05, "loss": 0.449, "step": 26439 }, { "epoch": 1.4805689326912308, "grad_norm": 1.3946900367736816, "learning_rate": 9.830842105263158e-05, "loss": 0.3947, "step": 26440 }, { "epoch": 1.4806249300033598, "grad_norm": 1.218000888824463, "learning_rate": 9.830815789473686e-05, "loss": 0.3982, "step": 26441 }, { "epoch": 1.4806809273154888, "grad_norm": 1.2052074670791626, "learning_rate": 9.83078947368421e-05, "loss": 0.4831, "step": 26442 }, { "epoch": 1.4807369246276179, "grad_norm": 1.4074491262435913, "learning_rate": 9.830763157894736e-05, "loss": 0.4657, "step": 26443 }, { "epoch": 1.4807929219397469, "grad_norm": 1.4324088096618652, "learning_rate": 9.830736842105264e-05, "loss": 0.4509, "step": 26444 }, { "epoch": 1.480848919251876, "grad_norm": 1.564112901687622, "learning_rate": 9.83071052631579e-05, "loss": 0.5721, "step": 26445 }, { "epoch": 1.480904916564005, "grad_norm": 1.3879790306091309, "learning_rate": 9.830684210526317e-05, "loss": 0.4773, "step": 26446 }, { "epoch": 1.480960913876134, "grad_norm": 1.097662329673767, "learning_rate": 9.830657894736842e-05, "loss": 0.5217, "step": 26447 }, { "epoch": 1.481016911188263, "grad_norm": 1.2054959535598755, "learning_rate": 9.830631578947369e-05, "loss": 0.4029, "step": 26448 }, { "epoch": 1.481072908500392, "grad_norm": 1.091911792755127, "learning_rate": 9.830605263157895e-05, "loss": 0.4091, "step": 26449 }, { "epoch": 1.481128905812521, "grad_norm": 1.366909146308899, "learning_rate": 9.830578947368422e-05, "loss": 0.5705, "step": 26450 }, { "epoch": 1.48118490312465, "grad_norm": 1.342989206314087, "learning_rate": 9.830552631578948e-05, "loss": 0.5374, "step": 26451 }, { "epoch": 1.481240900436779, "grad_norm": 1.3994578123092651, "learning_rate": 9.830526315789474e-05, "loss": 0.3981, "step": 26452 }, { "epoch": 1.481296897748908, "grad_norm": 1.430092215538025, "learning_rate": 9.8305e-05, "loss": 0.4674, "step": 26453 }, { "epoch": 1.481352895061037, "grad_norm": 1.160904884338379, "learning_rate": 9.830473684210527e-05, "loss": 0.3719, "step": 26454 }, { "epoch": 1.481408892373166, "grad_norm": 1.5587517023086548, "learning_rate": 9.830447368421053e-05, "loss": 0.5502, "step": 26455 }, { "epoch": 1.481464889685295, "grad_norm": 1.2606472969055176, "learning_rate": 9.83042105263158e-05, "loss": 0.4605, "step": 26456 }, { "epoch": 1.4815208869974241, "grad_norm": 1.5438977479934692, "learning_rate": 9.830394736842105e-05, "loss": 0.8613, "step": 26457 }, { "epoch": 1.4815768843095531, "grad_norm": 1.6708910465240479, "learning_rate": 9.830368421052633e-05, "loss": 0.5333, "step": 26458 }, { "epoch": 1.4816328816216822, "grad_norm": 1.3151296377182007, "learning_rate": 9.830342105263159e-05, "loss": 0.3697, "step": 26459 }, { "epoch": 1.4816888789338112, "grad_norm": 1.283218502998352, "learning_rate": 9.830315789473685e-05, "loss": 0.4876, "step": 26460 }, { "epoch": 1.4817448762459402, "grad_norm": 1.3207824230194092, "learning_rate": 9.83028947368421e-05, "loss": 0.4758, "step": 26461 }, { "epoch": 1.4818008735580692, "grad_norm": 1.426101803779602, "learning_rate": 9.830263157894737e-05, "loss": 0.5537, "step": 26462 }, { "epoch": 1.4818568708701982, "grad_norm": 1.3909064531326294, "learning_rate": 9.830236842105264e-05, "loss": 0.3975, "step": 26463 }, { "epoch": 1.4819128681823273, "grad_norm": 1.3793268203735352, "learning_rate": 9.83021052631579e-05, "loss": 0.5087, "step": 26464 }, { "epoch": 1.4819688654944563, "grad_norm": 1.632776141166687, "learning_rate": 9.830184210526316e-05, "loss": 0.4604, "step": 26465 }, { "epoch": 1.4820248628065853, "grad_norm": 1.5219885110855103, "learning_rate": 9.830157894736842e-05, "loss": 0.6344, "step": 26466 }, { "epoch": 1.4820808601187143, "grad_norm": 1.3433325290679932, "learning_rate": 9.830131578947369e-05, "loss": 0.4594, "step": 26467 }, { "epoch": 1.4821368574308433, "grad_norm": 1.540515661239624, "learning_rate": 9.830105263157895e-05, "loss": 0.3933, "step": 26468 }, { "epoch": 1.4821928547429724, "grad_norm": 1.087449312210083, "learning_rate": 9.830078947368422e-05, "loss": 0.4609, "step": 26469 }, { "epoch": 1.4822488520551014, "grad_norm": 1.4942513704299927, "learning_rate": 9.830052631578947e-05, "loss": 0.5734, "step": 26470 }, { "epoch": 1.4823048493672304, "grad_norm": 1.2333168983459473, "learning_rate": 9.830026315789474e-05, "loss": 0.3683, "step": 26471 }, { "epoch": 1.4823608466793594, "grad_norm": 1.3990916013717651, "learning_rate": 9.83e-05, "loss": 0.4677, "step": 26472 }, { "epoch": 1.4824168439914884, "grad_norm": 1.342435359954834, "learning_rate": 9.829973684210528e-05, "loss": 0.4344, "step": 26473 }, { "epoch": 1.4824728413036175, "grad_norm": 10.466675758361816, "learning_rate": 9.829947368421052e-05, "loss": 0.5972, "step": 26474 }, { "epoch": 1.4825288386157465, "grad_norm": 1.4302014112472534, "learning_rate": 9.82992105263158e-05, "loss": 0.5685, "step": 26475 }, { "epoch": 1.4825848359278755, "grad_norm": 1.4066733121871948, "learning_rate": 9.829894736842106e-05, "loss": 0.5278, "step": 26476 }, { "epoch": 1.4826408332400045, "grad_norm": 2.721774101257324, "learning_rate": 9.829868421052632e-05, "loss": 0.4604, "step": 26477 }, { "epoch": 1.4826968305521335, "grad_norm": 1.548677921295166, "learning_rate": 9.829842105263159e-05, "loss": 0.4053, "step": 26478 }, { "epoch": 1.4827528278642625, "grad_norm": 1.1114532947540283, "learning_rate": 9.829815789473684e-05, "loss": 0.5194, "step": 26479 }, { "epoch": 1.4828088251763916, "grad_norm": 1.2839246988296509, "learning_rate": 9.829789473684211e-05, "loss": 0.4681, "step": 26480 }, { "epoch": 1.4828648224885206, "grad_norm": 1.4902235269546509, "learning_rate": 9.829763157894737e-05, "loss": 0.4127, "step": 26481 }, { "epoch": 1.4829208198006496, "grad_norm": 1.416769027709961, "learning_rate": 9.829736842105264e-05, "loss": 0.5673, "step": 26482 }, { "epoch": 1.4829768171127786, "grad_norm": 1.4382866621017456, "learning_rate": 9.82971052631579e-05, "loss": 0.4693, "step": 26483 }, { "epoch": 1.4830328144249076, "grad_norm": 1.2511255741119385, "learning_rate": 9.829684210526316e-05, "loss": 0.439, "step": 26484 }, { "epoch": 1.4830888117370367, "grad_norm": 1.4404335021972656, "learning_rate": 9.829657894736842e-05, "loss": 0.5361, "step": 26485 }, { "epoch": 1.4831448090491657, "grad_norm": 1.4500209093093872, "learning_rate": 9.82963157894737e-05, "loss": 0.4741, "step": 26486 }, { "epoch": 1.4832008063612947, "grad_norm": 1.334502100944519, "learning_rate": 9.829605263157895e-05, "loss": 0.5394, "step": 26487 }, { "epoch": 1.4832568036734237, "grad_norm": 1.18729567527771, "learning_rate": 9.829578947368421e-05, "loss": 0.3905, "step": 26488 }, { "epoch": 1.4833128009855527, "grad_norm": 1.1093251705169678, "learning_rate": 9.829552631578947e-05, "loss": 0.3999, "step": 26489 }, { "epoch": 1.4833687982976818, "grad_norm": 1.262854814529419, "learning_rate": 9.829526315789475e-05, "loss": 0.4139, "step": 26490 }, { "epoch": 1.4834247956098108, "grad_norm": 1.491692066192627, "learning_rate": 9.8295e-05, "loss": 0.5313, "step": 26491 }, { "epoch": 1.4834807929219398, "grad_norm": 1.256239652633667, "learning_rate": 9.829473684210527e-05, "loss": 0.42, "step": 26492 }, { "epoch": 1.4835367902340688, "grad_norm": 1.0862276554107666, "learning_rate": 9.829447368421053e-05, "loss": 0.3264, "step": 26493 }, { "epoch": 1.4835927875461978, "grad_norm": 1.4838993549346924, "learning_rate": 9.829421052631579e-05, "loss": 0.395, "step": 26494 }, { "epoch": 1.4836487848583269, "grad_norm": 1.4637272357940674, "learning_rate": 9.829394736842106e-05, "loss": 0.4645, "step": 26495 }, { "epoch": 1.4837047821704559, "grad_norm": 1.218091368675232, "learning_rate": 9.829368421052632e-05, "loss": 0.5034, "step": 26496 }, { "epoch": 1.483760779482585, "grad_norm": 1.3952760696411133, "learning_rate": 9.829342105263158e-05, "loss": 0.3725, "step": 26497 }, { "epoch": 1.483816776794714, "grad_norm": 1.4923816919326782, "learning_rate": 9.829315789473684e-05, "loss": 0.4811, "step": 26498 }, { "epoch": 1.483872774106843, "grad_norm": 1.4141688346862793, "learning_rate": 9.829289473684211e-05, "loss": 0.6126, "step": 26499 }, { "epoch": 1.483928771418972, "grad_norm": 1.403441071510315, "learning_rate": 9.829263157894737e-05, "loss": 0.4566, "step": 26500 }, { "epoch": 1.483984768731101, "grad_norm": 1.1639097929000854, "learning_rate": 9.829236842105264e-05, "loss": 0.4863, "step": 26501 }, { "epoch": 1.48404076604323, "grad_norm": 1.2145073413848877, "learning_rate": 9.829210526315789e-05, "loss": 0.4488, "step": 26502 }, { "epoch": 1.484096763355359, "grad_norm": 1.349830150604248, "learning_rate": 9.829184210526316e-05, "loss": 0.4137, "step": 26503 }, { "epoch": 1.484152760667488, "grad_norm": 1.2369256019592285, "learning_rate": 9.829157894736842e-05, "loss": 0.3126, "step": 26504 }, { "epoch": 1.484208757979617, "grad_norm": 1.1879065036773682, "learning_rate": 9.82913157894737e-05, "loss": 0.4301, "step": 26505 }, { "epoch": 1.484264755291746, "grad_norm": 1.506127119064331, "learning_rate": 9.829105263157896e-05, "loss": 0.7175, "step": 26506 }, { "epoch": 1.484320752603875, "grad_norm": 1.4014273881912231, "learning_rate": 9.829078947368422e-05, "loss": 0.4257, "step": 26507 }, { "epoch": 1.484376749916004, "grad_norm": 1.3636915683746338, "learning_rate": 9.829052631578948e-05, "loss": 0.4488, "step": 26508 }, { "epoch": 1.4844327472281331, "grad_norm": 1.363595962524414, "learning_rate": 9.829026315789475e-05, "loss": 0.3705, "step": 26509 }, { "epoch": 1.4844887445402621, "grad_norm": 1.5012990236282349, "learning_rate": 9.829000000000001e-05, "loss": 0.412, "step": 26510 }, { "epoch": 1.4845447418523912, "grad_norm": 1.3049137592315674, "learning_rate": 9.828973684210527e-05, "loss": 0.4308, "step": 26511 }, { "epoch": 1.4846007391645202, "grad_norm": 1.3145856857299805, "learning_rate": 9.828947368421053e-05, "loss": 0.3557, "step": 26512 }, { "epoch": 1.4846567364766492, "grad_norm": 1.1711186170578003, "learning_rate": 9.828921052631579e-05, "loss": 0.4307, "step": 26513 }, { "epoch": 1.4847127337887782, "grad_norm": 1.9552361965179443, "learning_rate": 9.828894736842106e-05, "loss": 0.3842, "step": 26514 }, { "epoch": 1.4847687311009072, "grad_norm": 1.2192083597183228, "learning_rate": 9.828868421052632e-05, "loss": 0.4244, "step": 26515 }, { "epoch": 1.4848247284130363, "grad_norm": 1.537446141242981, "learning_rate": 9.828842105263158e-05, "loss": 0.4177, "step": 26516 }, { "epoch": 1.4848807257251653, "grad_norm": 1.4063435792922974, "learning_rate": 9.828815789473684e-05, "loss": 0.537, "step": 26517 }, { "epoch": 1.4849367230372943, "grad_norm": 1.3753093481063843, "learning_rate": 9.828789473684211e-05, "loss": 0.4318, "step": 26518 }, { "epoch": 1.4849927203494233, "grad_norm": 1.205532193183899, "learning_rate": 9.828763157894737e-05, "loss": 0.4066, "step": 26519 }, { "epoch": 1.4850487176615523, "grad_norm": 1.2799038887023926, "learning_rate": 9.828736842105263e-05, "loss": 0.4152, "step": 26520 }, { "epoch": 1.4851047149736814, "grad_norm": 1.3891645669937134, "learning_rate": 9.82871052631579e-05, "loss": 0.4474, "step": 26521 }, { "epoch": 1.4851607122858104, "grad_norm": 1.5356215238571167, "learning_rate": 9.828684210526317e-05, "loss": 0.5309, "step": 26522 }, { "epoch": 1.4852167095979394, "grad_norm": 1.4802745580673218, "learning_rate": 9.828657894736843e-05, "loss": 0.5008, "step": 26523 }, { "epoch": 1.4852727069100684, "grad_norm": 1.4387753009796143, "learning_rate": 9.82863157894737e-05, "loss": 0.5275, "step": 26524 }, { "epoch": 1.4853287042221974, "grad_norm": 1.2407060861587524, "learning_rate": 9.828605263157895e-05, "loss": 0.4245, "step": 26525 }, { "epoch": 1.4853847015343264, "grad_norm": 1.364608883857727, "learning_rate": 9.828578947368422e-05, "loss": 0.4472, "step": 26526 }, { "epoch": 1.4854406988464555, "grad_norm": 1.448456883430481, "learning_rate": 9.828552631578948e-05, "loss": 0.5022, "step": 26527 }, { "epoch": 1.4854966961585845, "grad_norm": 1.405077576637268, "learning_rate": 9.828526315789475e-05, "loss": 0.401, "step": 26528 }, { "epoch": 1.4855526934707135, "grad_norm": 1.231691598892212, "learning_rate": 9.8285e-05, "loss": 0.3612, "step": 26529 }, { "epoch": 1.4856086907828425, "grad_norm": 1.1891672611236572, "learning_rate": 9.828473684210526e-05, "loss": 0.5706, "step": 26530 }, { "epoch": 1.4856646880949715, "grad_norm": 1.8921364545822144, "learning_rate": 9.828447368421053e-05, "loss": 0.5376, "step": 26531 }, { "epoch": 1.4857206854071006, "grad_norm": 1.308678388595581, "learning_rate": 9.828421052631579e-05, "loss": 0.4693, "step": 26532 }, { "epoch": 1.4857766827192296, "grad_norm": 1.3780572414398193, "learning_rate": 9.828394736842106e-05, "loss": 0.4731, "step": 26533 }, { "epoch": 1.4858326800313586, "grad_norm": 1.3991550207138062, "learning_rate": 9.828368421052631e-05, "loss": 0.4826, "step": 26534 }, { "epoch": 1.4858886773434876, "grad_norm": 6.05579948425293, "learning_rate": 9.828342105263158e-05, "loss": 0.5507, "step": 26535 }, { "epoch": 1.4859446746556166, "grad_norm": 1.2682628631591797, "learning_rate": 9.828315789473684e-05, "loss": 0.4841, "step": 26536 }, { "epoch": 1.4860006719677457, "grad_norm": 1.2944921255111694, "learning_rate": 9.828289473684212e-05, "loss": 0.4768, "step": 26537 }, { "epoch": 1.4860566692798747, "grad_norm": 1.390641689300537, "learning_rate": 9.828263157894738e-05, "loss": 0.532, "step": 26538 }, { "epoch": 1.4861126665920037, "grad_norm": 1.2902518510818481, "learning_rate": 9.828236842105264e-05, "loss": 0.4296, "step": 26539 }, { "epoch": 1.4861686639041327, "grad_norm": 1.514411211013794, "learning_rate": 9.82821052631579e-05, "loss": 0.4976, "step": 26540 }, { "epoch": 1.4862246612162617, "grad_norm": 1.4082057476043701, "learning_rate": 9.828184210526317e-05, "loss": 0.4531, "step": 26541 }, { "epoch": 1.4862806585283908, "grad_norm": 1.2194361686706543, "learning_rate": 9.828157894736843e-05, "loss": 0.4623, "step": 26542 }, { "epoch": 1.4863366558405198, "grad_norm": 1.5673096179962158, "learning_rate": 9.828131578947369e-05, "loss": 0.4011, "step": 26543 }, { "epoch": 1.4863926531526488, "grad_norm": 1.132565975189209, "learning_rate": 9.828105263157895e-05, "loss": 0.3721, "step": 26544 }, { "epoch": 1.4864486504647778, "grad_norm": 1.6840846538543701, "learning_rate": 9.828078947368422e-05, "loss": 0.5935, "step": 26545 }, { "epoch": 1.4865046477769068, "grad_norm": 1.255689263343811, "learning_rate": 9.828052631578948e-05, "loss": 0.4315, "step": 26546 }, { "epoch": 1.4865606450890358, "grad_norm": 1.2471442222595215, "learning_rate": 9.828026315789474e-05, "loss": 0.4162, "step": 26547 }, { "epoch": 1.4866166424011649, "grad_norm": 1.4482321739196777, "learning_rate": 9.828e-05, "loss": 0.4751, "step": 26548 }, { "epoch": 1.4866726397132939, "grad_norm": 1.298810601234436, "learning_rate": 9.827973684210526e-05, "loss": 0.5159, "step": 26549 }, { "epoch": 1.486728637025423, "grad_norm": 1.200095295906067, "learning_rate": 9.827947368421053e-05, "loss": 0.4506, "step": 26550 }, { "epoch": 1.486784634337552, "grad_norm": 1.4362943172454834, "learning_rate": 9.82792105263158e-05, "loss": 0.475, "step": 26551 }, { "epoch": 1.486840631649681, "grad_norm": 1.1563717126846313, "learning_rate": 9.827894736842105e-05, "loss": 0.3387, "step": 26552 }, { "epoch": 1.48689662896181, "grad_norm": 1.1349667310714722, "learning_rate": 9.827868421052631e-05, "loss": 0.3534, "step": 26553 }, { "epoch": 1.486952626273939, "grad_norm": 1.1395677328109741, "learning_rate": 9.827842105263159e-05, "loss": 0.4657, "step": 26554 }, { "epoch": 1.487008623586068, "grad_norm": 1.8083597421646118, "learning_rate": 9.827815789473685e-05, "loss": 0.6034, "step": 26555 }, { "epoch": 1.487064620898197, "grad_norm": 1.188975214958191, "learning_rate": 9.827789473684212e-05, "loss": 0.4013, "step": 26556 }, { "epoch": 1.487120618210326, "grad_norm": 1.316445231437683, "learning_rate": 9.827763157894737e-05, "loss": 0.5189, "step": 26557 }, { "epoch": 1.487176615522455, "grad_norm": 1.5387048721313477, "learning_rate": 9.827736842105264e-05, "loss": 0.5348, "step": 26558 }, { "epoch": 1.487232612834584, "grad_norm": 1.3718448877334595, "learning_rate": 9.82771052631579e-05, "loss": 0.5667, "step": 26559 }, { "epoch": 1.4872886101467129, "grad_norm": 5.825066566467285, "learning_rate": 9.827684210526317e-05, "loss": 0.3749, "step": 26560 }, { "epoch": 1.487344607458842, "grad_norm": 1.566150426864624, "learning_rate": 9.827657894736843e-05, "loss": 0.403, "step": 26561 }, { "epoch": 1.487400604770971, "grad_norm": 1.1900097131729126, "learning_rate": 9.827631578947369e-05, "loss": 0.4285, "step": 26562 }, { "epoch": 1.4874566020831, "grad_norm": 1.266889214515686, "learning_rate": 9.827605263157895e-05, "loss": 0.4519, "step": 26563 }, { "epoch": 1.487512599395229, "grad_norm": 1.1044726371765137, "learning_rate": 9.827578947368421e-05, "loss": 0.4364, "step": 26564 }, { "epoch": 1.487568596707358, "grad_norm": 1.3785444498062134, "learning_rate": 9.827552631578948e-05, "loss": 0.4805, "step": 26565 }, { "epoch": 1.487624594019487, "grad_norm": 1.5386810302734375, "learning_rate": 9.827526315789473e-05, "loss": 0.5183, "step": 26566 }, { "epoch": 1.487680591331616, "grad_norm": 1.3156059980392456, "learning_rate": 9.8275e-05, "loss": 0.4641, "step": 26567 }, { "epoch": 1.487736588643745, "grad_norm": 1.3793963193893433, "learning_rate": 9.827473684210526e-05, "loss": 0.5021, "step": 26568 }, { "epoch": 1.487792585955874, "grad_norm": 1.1304057836532593, "learning_rate": 9.827447368421054e-05, "loss": 0.4386, "step": 26569 }, { "epoch": 1.487848583268003, "grad_norm": 1.4514892101287842, "learning_rate": 9.82742105263158e-05, "loss": 0.5732, "step": 26570 }, { "epoch": 1.487904580580132, "grad_norm": 1.2929202318191528, "learning_rate": 9.827394736842106e-05, "loss": 0.3627, "step": 26571 }, { "epoch": 1.487960577892261, "grad_norm": 1.4279794692993164, "learning_rate": 9.827368421052632e-05, "loss": 0.4568, "step": 26572 }, { "epoch": 1.4880165752043901, "grad_norm": 1.3467376232147217, "learning_rate": 9.827342105263159e-05, "loss": 0.4781, "step": 26573 }, { "epoch": 1.4880725725165191, "grad_norm": 1.162685751914978, "learning_rate": 9.827315789473685e-05, "loss": 0.373, "step": 26574 }, { "epoch": 1.4881285698286482, "grad_norm": 1.4191398620605469, "learning_rate": 9.827289473684211e-05, "loss": 0.6028, "step": 26575 }, { "epoch": 1.4881845671407772, "grad_norm": 1.132226586341858, "learning_rate": 9.827263157894737e-05, "loss": 0.3635, "step": 26576 }, { "epoch": 1.4882405644529062, "grad_norm": 1.084893822669983, "learning_rate": 9.827236842105264e-05, "loss": 0.463, "step": 26577 }, { "epoch": 1.4882965617650352, "grad_norm": 1.3243541717529297, "learning_rate": 9.82721052631579e-05, "loss": 0.4241, "step": 26578 }, { "epoch": 1.4883525590771642, "grad_norm": 1.4677218198776245, "learning_rate": 9.827184210526317e-05, "loss": 0.5732, "step": 26579 }, { "epoch": 1.4884085563892933, "grad_norm": 1.4174957275390625, "learning_rate": 9.827157894736842e-05, "loss": 0.4608, "step": 26580 }, { "epoch": 1.4884645537014223, "grad_norm": 1.155500888824463, "learning_rate": 9.827131578947368e-05, "loss": 0.358, "step": 26581 }, { "epoch": 1.4885205510135513, "grad_norm": 1.275414228439331, "learning_rate": 9.827105263157895e-05, "loss": 0.4646, "step": 26582 }, { "epoch": 1.4885765483256803, "grad_norm": 1.2806388139724731, "learning_rate": 9.827078947368421e-05, "loss": 0.4829, "step": 26583 }, { "epoch": 1.4886325456378093, "grad_norm": 1.2613698244094849, "learning_rate": 9.827052631578947e-05, "loss": 0.4348, "step": 26584 }, { "epoch": 1.4886885429499384, "grad_norm": 1.1614763736724854, "learning_rate": 9.827026315789473e-05, "loss": 0.4095, "step": 26585 }, { "epoch": 1.4887445402620674, "grad_norm": 1.1521501541137695, "learning_rate": 9.827e-05, "loss": 0.4157, "step": 26586 }, { "epoch": 1.4888005375741964, "grad_norm": 1.5595088005065918, "learning_rate": 9.826973684210527e-05, "loss": 0.4135, "step": 26587 }, { "epoch": 1.4888565348863254, "grad_norm": 1.1848524808883667, "learning_rate": 9.826947368421054e-05, "loss": 0.432, "step": 26588 }, { "epoch": 1.4889125321984544, "grad_norm": 2.0190978050231934, "learning_rate": 9.826921052631579e-05, "loss": 0.4802, "step": 26589 }, { "epoch": 1.4889685295105834, "grad_norm": 1.8962711095809937, "learning_rate": 9.826894736842106e-05, "loss": 0.6623, "step": 26590 }, { "epoch": 1.4890245268227125, "grad_norm": 1.5094536542892456, "learning_rate": 9.826868421052632e-05, "loss": 0.5381, "step": 26591 }, { "epoch": 1.4890805241348415, "grad_norm": 1.4123467206954956, "learning_rate": 9.826842105263159e-05, "loss": 0.4597, "step": 26592 }, { "epoch": 1.4891365214469705, "grad_norm": 1.146776795387268, "learning_rate": 9.826815789473685e-05, "loss": 0.4171, "step": 26593 }, { "epoch": 1.4891925187590995, "grad_norm": 1.3933452367782593, "learning_rate": 9.826789473684211e-05, "loss": 0.5097, "step": 26594 }, { "epoch": 1.4892485160712285, "grad_norm": 1.508174180984497, "learning_rate": 9.826763157894737e-05, "loss": 0.5648, "step": 26595 }, { "epoch": 1.4893045133833576, "grad_norm": 1.2280765771865845, "learning_rate": 9.826736842105264e-05, "loss": 0.4365, "step": 26596 }, { "epoch": 1.4893605106954866, "grad_norm": 1.496867299079895, "learning_rate": 9.82671052631579e-05, "loss": 0.4005, "step": 26597 }, { "epoch": 1.4894165080076156, "grad_norm": 1.4241769313812256, "learning_rate": 9.826684210526316e-05, "loss": 0.5276, "step": 26598 }, { "epoch": 1.4894725053197446, "grad_norm": 1.3316019773483276, "learning_rate": 9.826657894736842e-05, "loss": 0.4393, "step": 26599 }, { "epoch": 1.4895285026318736, "grad_norm": 1.1914223432540894, "learning_rate": 9.826631578947368e-05, "loss": 0.3833, "step": 26600 }, { "epoch": 1.4895844999440027, "grad_norm": 1.281269907951355, "learning_rate": 9.826605263157896e-05, "loss": 0.5015, "step": 26601 }, { "epoch": 1.4896404972561317, "grad_norm": 1.5901553630828857, "learning_rate": 9.826578947368422e-05, "loss": 0.4524, "step": 26602 }, { "epoch": 1.4896964945682607, "grad_norm": 1.4138697385787964, "learning_rate": 9.826552631578948e-05, "loss": 0.508, "step": 26603 }, { "epoch": 1.4897524918803897, "grad_norm": 1.2021832466125488, "learning_rate": 9.826526315789474e-05, "loss": 0.5529, "step": 26604 }, { "epoch": 1.4898084891925187, "grad_norm": 1.3018269538879395, "learning_rate": 9.826500000000001e-05, "loss": 0.4325, "step": 26605 }, { "epoch": 1.4898644865046478, "grad_norm": 1.1973603963851929, "learning_rate": 9.826473684210527e-05, "loss": 0.3586, "step": 26606 }, { "epoch": 1.4899204838167768, "grad_norm": 1.347508192062378, "learning_rate": 9.826447368421053e-05, "loss": 0.4202, "step": 26607 }, { "epoch": 1.4899764811289058, "grad_norm": 1.521946907043457, "learning_rate": 9.826421052631579e-05, "loss": 0.4604, "step": 26608 }, { "epoch": 1.4900324784410348, "grad_norm": 1.5154759883880615, "learning_rate": 9.826394736842106e-05, "loss": 0.6821, "step": 26609 }, { "epoch": 1.4900884757531638, "grad_norm": 1.2492790222167969, "learning_rate": 9.826368421052632e-05, "loss": 0.3793, "step": 26610 }, { "epoch": 1.4901444730652929, "grad_norm": 1.5363818407058716, "learning_rate": 9.82634210526316e-05, "loss": 0.4483, "step": 26611 }, { "epoch": 1.4902004703774219, "grad_norm": 1.3126407861709595, "learning_rate": 9.826315789473684e-05, "loss": 0.5083, "step": 26612 }, { "epoch": 1.490256467689551, "grad_norm": 1.3964883089065552, "learning_rate": 9.826289473684211e-05, "loss": 0.4706, "step": 26613 }, { "epoch": 1.49031246500168, "grad_norm": 1.463017463684082, "learning_rate": 9.826263157894737e-05, "loss": 0.4911, "step": 26614 }, { "epoch": 1.490368462313809, "grad_norm": 1.0788241624832153, "learning_rate": 9.826236842105263e-05, "loss": 0.3901, "step": 26615 }, { "epoch": 1.490424459625938, "grad_norm": 1.3994767665863037, "learning_rate": 9.82621052631579e-05, "loss": 0.5225, "step": 26616 }, { "epoch": 1.490480456938067, "grad_norm": 1.4026886224746704, "learning_rate": 9.826184210526315e-05, "loss": 0.5064, "step": 26617 }, { "epoch": 1.490536454250196, "grad_norm": 1.3137027025222778, "learning_rate": 9.826157894736843e-05, "loss": 0.5917, "step": 26618 }, { "epoch": 1.490592451562325, "grad_norm": 1.2471706867218018, "learning_rate": 9.826131578947369e-05, "loss": 0.5021, "step": 26619 }, { "epoch": 1.490648448874454, "grad_norm": 1.3848005533218384, "learning_rate": 9.826105263157896e-05, "loss": 0.6732, "step": 26620 }, { "epoch": 1.490704446186583, "grad_norm": 1.459981083869934, "learning_rate": 9.82607894736842e-05, "loss": 0.4569, "step": 26621 }, { "epoch": 1.490760443498712, "grad_norm": 1.3855366706848145, "learning_rate": 9.826052631578948e-05, "loss": 0.5632, "step": 26622 }, { "epoch": 1.490816440810841, "grad_norm": 1.3110512495040894, "learning_rate": 9.826026315789474e-05, "loss": 0.3802, "step": 26623 }, { "epoch": 1.49087243812297, "grad_norm": 1.2619868516921997, "learning_rate": 9.826000000000001e-05, "loss": 0.4903, "step": 26624 }, { "epoch": 1.4909284354350991, "grad_norm": 1.2366193532943726, "learning_rate": 9.825973684210527e-05, "loss": 0.5048, "step": 26625 }, { "epoch": 1.4909844327472281, "grad_norm": 1.1477752923965454, "learning_rate": 9.825947368421053e-05, "loss": 0.5035, "step": 26626 }, { "epoch": 1.4910404300593572, "grad_norm": 1.4129172563552856, "learning_rate": 9.825921052631579e-05, "loss": 0.5237, "step": 26627 }, { "epoch": 1.4910964273714862, "grad_norm": 1.224112868309021, "learning_rate": 9.825894736842106e-05, "loss": 0.3905, "step": 26628 }, { "epoch": 1.4911524246836152, "grad_norm": 1.2950512170791626, "learning_rate": 9.825868421052632e-05, "loss": 0.4303, "step": 26629 }, { "epoch": 1.4912084219957442, "grad_norm": 2.1667160987854004, "learning_rate": 9.825842105263158e-05, "loss": 0.4029, "step": 26630 }, { "epoch": 1.4912644193078732, "grad_norm": 1.4129606485366821, "learning_rate": 9.825815789473684e-05, "loss": 0.4251, "step": 26631 }, { "epoch": 1.4913204166200023, "grad_norm": 1.4479670524597168, "learning_rate": 9.825789473684212e-05, "loss": 0.4992, "step": 26632 }, { "epoch": 1.4913764139321313, "grad_norm": 1.3471579551696777, "learning_rate": 9.825763157894738e-05, "loss": 0.4921, "step": 26633 }, { "epoch": 1.4914324112442603, "grad_norm": 1.6021217107772827, "learning_rate": 9.825736842105264e-05, "loss": 0.4532, "step": 26634 }, { "epoch": 1.4914884085563893, "grad_norm": 1.3492587804794312, "learning_rate": 9.82571052631579e-05, "loss": 0.482, "step": 26635 }, { "epoch": 1.4915444058685183, "grad_norm": 1.9318296909332275, "learning_rate": 9.825684210526316e-05, "loss": 0.4424, "step": 26636 }, { "epoch": 1.4916004031806473, "grad_norm": 1.05913245677948, "learning_rate": 9.825657894736843e-05, "loss": 0.4489, "step": 26637 }, { "epoch": 1.4916564004927764, "grad_norm": 1.2017278671264648, "learning_rate": 9.825631578947369e-05, "loss": 0.4875, "step": 26638 }, { "epoch": 1.4917123978049054, "grad_norm": 1.4743341207504272, "learning_rate": 9.825605263157895e-05, "loss": 0.5475, "step": 26639 }, { "epoch": 1.4917683951170344, "grad_norm": 1.2858116626739502, "learning_rate": 9.825578947368421e-05, "loss": 0.4494, "step": 26640 }, { "epoch": 1.4918243924291634, "grad_norm": 1.4351807832717896, "learning_rate": 9.825552631578948e-05, "loss": 0.3805, "step": 26641 }, { "epoch": 1.4918803897412924, "grad_norm": 1.4434479475021362, "learning_rate": 9.825526315789474e-05, "loss": 0.5806, "step": 26642 }, { "epoch": 1.4919363870534215, "grad_norm": 1.21992027759552, "learning_rate": 9.825500000000001e-05, "loss": 0.413, "step": 26643 }, { "epoch": 1.4919923843655505, "grad_norm": 1.1825666427612305, "learning_rate": 9.825473684210526e-05, "loss": 0.4294, "step": 26644 }, { "epoch": 1.4920483816776795, "grad_norm": 1.4497736692428589, "learning_rate": 9.825447368421053e-05, "loss": 0.5618, "step": 26645 }, { "epoch": 1.4921043789898085, "grad_norm": 1.3041061162948608, "learning_rate": 9.825421052631579e-05, "loss": 0.5717, "step": 26646 }, { "epoch": 1.4921603763019375, "grad_norm": 1.6015552282333374, "learning_rate": 9.825394736842107e-05, "loss": 0.4817, "step": 26647 }, { "epoch": 1.4922163736140666, "grad_norm": 1.276550054550171, "learning_rate": 9.825368421052633e-05, "loss": 0.4927, "step": 26648 }, { "epoch": 1.4922723709261956, "grad_norm": 1.363005518913269, "learning_rate": 9.825342105263159e-05, "loss": 0.4021, "step": 26649 }, { "epoch": 1.4923283682383246, "grad_norm": 1.5803815126419067, "learning_rate": 9.825315789473685e-05, "loss": 0.6846, "step": 26650 }, { "epoch": 1.4923843655504536, "grad_norm": 1.558101773262024, "learning_rate": 9.82528947368421e-05, "loss": 0.4083, "step": 26651 }, { "epoch": 1.4924403628625826, "grad_norm": 1.2533341646194458, "learning_rate": 9.825263157894738e-05, "loss": 0.4739, "step": 26652 }, { "epoch": 1.4924963601747117, "grad_norm": 1.1889357566833496, "learning_rate": 9.825236842105264e-05, "loss": 0.3874, "step": 26653 }, { "epoch": 1.4925523574868407, "grad_norm": 1.213054895401001, "learning_rate": 9.82521052631579e-05, "loss": 0.424, "step": 26654 }, { "epoch": 1.4926083547989697, "grad_norm": 1.3365778923034668, "learning_rate": 9.825184210526316e-05, "loss": 0.3959, "step": 26655 }, { "epoch": 1.4926643521110987, "grad_norm": 1.2400928735733032, "learning_rate": 9.825157894736843e-05, "loss": 0.4036, "step": 26656 }, { "epoch": 1.4927203494232277, "grad_norm": 1.3384134769439697, "learning_rate": 9.825131578947369e-05, "loss": 0.4183, "step": 26657 }, { "epoch": 1.4927763467353568, "grad_norm": 1.3476084470748901, "learning_rate": 9.825105263157895e-05, "loss": 0.5117, "step": 26658 }, { "epoch": 1.4928323440474858, "grad_norm": 1.2372089624404907, "learning_rate": 9.825078947368421e-05, "loss": 0.4327, "step": 26659 }, { "epoch": 1.4928883413596148, "grad_norm": 1.314595341682434, "learning_rate": 9.825052631578948e-05, "loss": 0.522, "step": 26660 }, { "epoch": 1.4929443386717438, "grad_norm": 1.4243106842041016, "learning_rate": 9.825026315789474e-05, "loss": 0.5655, "step": 26661 }, { "epoch": 1.4930003359838728, "grad_norm": 1.6291049718856812, "learning_rate": 9.825e-05, "loss": 0.5341, "step": 26662 }, { "epoch": 1.4930563332960018, "grad_norm": 1.1582446098327637, "learning_rate": 9.824973684210526e-05, "loss": 0.3955, "step": 26663 }, { "epoch": 1.4931123306081309, "grad_norm": 1.3123834133148193, "learning_rate": 9.824947368421054e-05, "loss": 0.4636, "step": 26664 }, { "epoch": 1.4931683279202599, "grad_norm": 46.05010223388672, "learning_rate": 9.82492105263158e-05, "loss": 0.5557, "step": 26665 }, { "epoch": 1.493224325232389, "grad_norm": 1.4509391784667969, "learning_rate": 9.824894736842107e-05, "loss": 0.5353, "step": 26666 }, { "epoch": 1.493280322544518, "grad_norm": 1.1491566896438599, "learning_rate": 9.824868421052632e-05, "loss": 0.386, "step": 26667 }, { "epoch": 1.493336319856647, "grad_norm": 1.6083931922912598, "learning_rate": 9.824842105263157e-05, "loss": 0.581, "step": 26668 }, { "epoch": 1.493392317168776, "grad_norm": 1.2309685945510864, "learning_rate": 9.824815789473685e-05, "loss": 0.366, "step": 26669 }, { "epoch": 1.493448314480905, "grad_norm": 1.7661563158035278, "learning_rate": 9.824789473684211e-05, "loss": 0.6173, "step": 26670 }, { "epoch": 1.493504311793034, "grad_norm": 1.210553526878357, "learning_rate": 9.824763157894738e-05, "loss": 0.4531, "step": 26671 }, { "epoch": 1.493560309105163, "grad_norm": 1.2885973453521729, "learning_rate": 9.824736842105263e-05, "loss": 0.4018, "step": 26672 }, { "epoch": 1.493616306417292, "grad_norm": 1.4575316905975342, "learning_rate": 9.82471052631579e-05, "loss": 0.4969, "step": 26673 }, { "epoch": 1.493672303729421, "grad_norm": 1.6433037519454956, "learning_rate": 9.824684210526316e-05, "loss": 0.5275, "step": 26674 }, { "epoch": 1.49372830104155, "grad_norm": 1.4665077924728394, "learning_rate": 9.824657894736843e-05, "loss": 0.5303, "step": 26675 }, { "epoch": 1.493784298353679, "grad_norm": 1.18905770778656, "learning_rate": 9.824631578947368e-05, "loss": 0.4734, "step": 26676 }, { "epoch": 1.4938402956658081, "grad_norm": 1.1709564924240112, "learning_rate": 9.824605263157895e-05, "loss": 0.389, "step": 26677 }, { "epoch": 1.4938962929779371, "grad_norm": 1.2126034498214722, "learning_rate": 9.824578947368421e-05, "loss": 0.3979, "step": 26678 }, { "epoch": 1.4939522902900662, "grad_norm": 1.1228069067001343, "learning_rate": 9.824552631578949e-05, "loss": 0.3886, "step": 26679 }, { "epoch": 1.4940082876021952, "grad_norm": 1.4189866781234741, "learning_rate": 9.824526315789475e-05, "loss": 0.4907, "step": 26680 }, { "epoch": 1.4940642849143242, "grad_norm": 1.4428449869155884, "learning_rate": 9.8245e-05, "loss": 0.4597, "step": 26681 }, { "epoch": 1.4941202822264532, "grad_norm": 1.293521523475647, "learning_rate": 9.824473684210527e-05, "loss": 0.466, "step": 26682 }, { "epoch": 1.4941762795385822, "grad_norm": 1.3009419441223145, "learning_rate": 9.824447368421054e-05, "loss": 0.4997, "step": 26683 }, { "epoch": 1.494232276850711, "grad_norm": 1.138881802558899, "learning_rate": 9.82442105263158e-05, "loss": 0.3961, "step": 26684 }, { "epoch": 1.49428827416284, "grad_norm": 1.5244901180267334, "learning_rate": 9.824394736842106e-05, "loss": 0.4786, "step": 26685 }, { "epoch": 1.494344271474969, "grad_norm": 1.3683289289474487, "learning_rate": 9.824368421052632e-05, "loss": 0.5121, "step": 26686 }, { "epoch": 1.494400268787098, "grad_norm": 1.4406224489212036, "learning_rate": 9.824342105263158e-05, "loss": 0.4976, "step": 26687 }, { "epoch": 1.494456266099227, "grad_norm": 1.1860580444335938, "learning_rate": 9.824315789473685e-05, "loss": 0.3892, "step": 26688 }, { "epoch": 1.4945122634113561, "grad_norm": 1.1105409860610962, "learning_rate": 9.824289473684211e-05, "loss": 0.3432, "step": 26689 }, { "epoch": 1.4945682607234851, "grad_norm": 1.489783763885498, "learning_rate": 9.824263157894737e-05, "loss": 0.6195, "step": 26690 }, { "epoch": 1.4946242580356142, "grad_norm": 1.3480902910232544, "learning_rate": 9.824236842105263e-05, "loss": 0.4823, "step": 26691 }, { "epoch": 1.4946802553477432, "grad_norm": 1.6353440284729004, "learning_rate": 9.82421052631579e-05, "loss": 0.6926, "step": 26692 }, { "epoch": 1.4947362526598722, "grad_norm": 1.5540894269943237, "learning_rate": 9.824184210526316e-05, "loss": 0.4531, "step": 26693 }, { "epoch": 1.4947922499720012, "grad_norm": 1.8608779907226562, "learning_rate": 9.824157894736842e-05, "loss": 0.5218, "step": 26694 }, { "epoch": 1.4948482472841302, "grad_norm": 1.2090544700622559, "learning_rate": 9.824131578947368e-05, "loss": 0.4046, "step": 26695 }, { "epoch": 1.4949042445962593, "grad_norm": 1.2432013750076294, "learning_rate": 9.824105263157896e-05, "loss": 0.4473, "step": 26696 }, { "epoch": 1.4949602419083883, "grad_norm": 1.1945785284042358, "learning_rate": 9.824078947368422e-05, "loss": 0.3855, "step": 26697 }, { "epoch": 1.4950162392205173, "grad_norm": 1.077295184135437, "learning_rate": 9.824052631578949e-05, "loss": 0.3811, "step": 26698 }, { "epoch": 1.4950722365326463, "grad_norm": 1.4812551736831665, "learning_rate": 9.824026315789473e-05, "loss": 0.6106, "step": 26699 }, { "epoch": 1.4951282338447753, "grad_norm": 1.4711402654647827, "learning_rate": 9.824000000000001e-05, "loss": 0.3819, "step": 26700 }, { "epoch": 1.4951842311569044, "grad_norm": 1.2971477508544922, "learning_rate": 9.823973684210527e-05, "loss": 0.4607, "step": 26701 }, { "epoch": 1.4952402284690334, "grad_norm": 1.1944701671600342, "learning_rate": 9.823947368421053e-05, "loss": 0.4241, "step": 26702 }, { "epoch": 1.4952962257811624, "grad_norm": 1.2420014142990112, "learning_rate": 9.82392105263158e-05, "loss": 0.3327, "step": 26703 }, { "epoch": 1.4953522230932914, "grad_norm": 1.3738360404968262, "learning_rate": 9.823894736842105e-05, "loss": 0.4774, "step": 26704 }, { "epoch": 1.4954082204054204, "grad_norm": 1.2295901775360107, "learning_rate": 9.823868421052632e-05, "loss": 0.4331, "step": 26705 }, { "epoch": 1.4954642177175494, "grad_norm": 1.4630894660949707, "learning_rate": 9.823842105263158e-05, "loss": 0.6747, "step": 26706 }, { "epoch": 1.4955202150296785, "grad_norm": 1.5961787700653076, "learning_rate": 9.823815789473685e-05, "loss": 0.5482, "step": 26707 }, { "epoch": 1.4955762123418075, "grad_norm": 1.469846248626709, "learning_rate": 9.823789473684211e-05, "loss": 0.62, "step": 26708 }, { "epoch": 1.4956322096539365, "grad_norm": 1.65489661693573, "learning_rate": 9.823763157894737e-05, "loss": 0.5532, "step": 26709 }, { "epoch": 1.4956882069660655, "grad_norm": 1.1136822700500488, "learning_rate": 9.823736842105263e-05, "loss": 0.4125, "step": 26710 }, { "epoch": 1.4957442042781945, "grad_norm": 1.4769656658172607, "learning_rate": 9.82371052631579e-05, "loss": 0.4585, "step": 26711 }, { "epoch": 1.4958002015903236, "grad_norm": 1.5494664907455444, "learning_rate": 9.823684210526317e-05, "loss": 0.514, "step": 26712 }, { "epoch": 1.4958561989024526, "grad_norm": 1.2355320453643799, "learning_rate": 9.823657894736843e-05, "loss": 0.4382, "step": 26713 }, { "epoch": 1.4959121962145816, "grad_norm": 1.2471064329147339, "learning_rate": 9.823631578947368e-05, "loss": 0.4819, "step": 26714 }, { "epoch": 1.4959681935267106, "grad_norm": 1.4682189226150513, "learning_rate": 9.823605263157896e-05, "loss": 0.5551, "step": 26715 }, { "epoch": 1.4960241908388396, "grad_norm": 1.6817207336425781, "learning_rate": 9.823578947368422e-05, "loss": 0.6358, "step": 26716 }, { "epoch": 1.4960801881509687, "grad_norm": 1.5784755945205688, "learning_rate": 9.823552631578948e-05, "loss": 0.4629, "step": 26717 }, { "epoch": 1.4961361854630977, "grad_norm": 1.316011667251587, "learning_rate": 9.823526315789474e-05, "loss": 0.481, "step": 26718 }, { "epoch": 1.4961921827752267, "grad_norm": 1.3917642831802368, "learning_rate": 9.8235e-05, "loss": 0.4046, "step": 26719 }, { "epoch": 1.4962481800873557, "grad_norm": 1.3899905681610107, "learning_rate": 9.823473684210527e-05, "loss": 0.4627, "step": 26720 }, { "epoch": 1.4963041773994847, "grad_norm": 1.5084383487701416, "learning_rate": 9.823447368421053e-05, "loss": 0.3227, "step": 26721 }, { "epoch": 1.4963601747116138, "grad_norm": 1.2321195602416992, "learning_rate": 9.823421052631579e-05, "loss": 0.4581, "step": 26722 }, { "epoch": 1.4964161720237428, "grad_norm": 1.4483340978622437, "learning_rate": 9.823394736842105e-05, "loss": 0.6091, "step": 26723 }, { "epoch": 1.4964721693358718, "grad_norm": 1.0192410945892334, "learning_rate": 9.823368421052632e-05, "loss": 0.3762, "step": 26724 }, { "epoch": 1.4965281666480008, "grad_norm": 1.1380773782730103, "learning_rate": 9.823342105263158e-05, "loss": 0.3941, "step": 26725 }, { "epoch": 1.4965841639601298, "grad_norm": 1.5388333797454834, "learning_rate": 9.823315789473686e-05, "loss": 0.4075, "step": 26726 }, { "epoch": 1.4966401612722589, "grad_norm": 1.4268181324005127, "learning_rate": 9.82328947368421e-05, "loss": 0.4967, "step": 26727 }, { "epoch": 1.4966961585843879, "grad_norm": 1.0744291543960571, "learning_rate": 9.823263157894738e-05, "loss": 0.4012, "step": 26728 }, { "epoch": 1.496752155896517, "grad_norm": 1.3298625946044922, "learning_rate": 9.823236842105264e-05, "loss": 0.4907, "step": 26729 }, { "epoch": 1.496808153208646, "grad_norm": 1.501968264579773, "learning_rate": 9.823210526315791e-05, "loss": 0.6027, "step": 26730 }, { "epoch": 1.496864150520775, "grad_norm": 1.2917025089263916, "learning_rate": 9.823184210526315e-05, "loss": 0.4129, "step": 26731 }, { "epoch": 1.496920147832904, "grad_norm": 1.2158446311950684, "learning_rate": 9.823157894736843e-05, "loss": 0.3479, "step": 26732 }, { "epoch": 1.496976145145033, "grad_norm": 1.1381868124008179, "learning_rate": 9.823131578947369e-05, "loss": 0.4021, "step": 26733 }, { "epoch": 1.497032142457162, "grad_norm": 1.2020201683044434, "learning_rate": 9.823105263157896e-05, "loss": 0.3444, "step": 26734 }, { "epoch": 1.497088139769291, "grad_norm": 1.4698524475097656, "learning_rate": 9.823078947368422e-05, "loss": 0.4059, "step": 26735 }, { "epoch": 1.49714413708142, "grad_norm": 1.5952394008636475, "learning_rate": 9.823052631578947e-05, "loss": 0.4962, "step": 26736 }, { "epoch": 1.497200134393549, "grad_norm": 1.15773606300354, "learning_rate": 9.823026315789474e-05, "loss": 0.4159, "step": 26737 }, { "epoch": 1.497256131705678, "grad_norm": 1.5245182514190674, "learning_rate": 9.823e-05, "loss": 0.577, "step": 26738 }, { "epoch": 1.497312129017807, "grad_norm": 1.2623963356018066, "learning_rate": 9.822973684210527e-05, "loss": 0.3999, "step": 26739 }, { "epoch": 1.497368126329936, "grad_norm": 1.3014445304870605, "learning_rate": 9.822947368421053e-05, "loss": 0.395, "step": 26740 }, { "epoch": 1.4974241236420651, "grad_norm": 1.1897354125976562, "learning_rate": 9.822921052631579e-05, "loss": 0.4322, "step": 26741 }, { "epoch": 1.4974801209541941, "grad_norm": 1.2164031267166138, "learning_rate": 9.822894736842105e-05, "loss": 0.4825, "step": 26742 }, { "epoch": 1.4975361182663232, "grad_norm": 1.5846658945083618, "learning_rate": 9.822868421052633e-05, "loss": 0.6695, "step": 26743 }, { "epoch": 1.4975921155784522, "grad_norm": 1.182814598083496, "learning_rate": 9.822842105263159e-05, "loss": 0.421, "step": 26744 }, { "epoch": 1.4976481128905812, "grad_norm": 1.4049961566925049, "learning_rate": 9.822815789473684e-05, "loss": 0.3883, "step": 26745 }, { "epoch": 1.4977041102027102, "grad_norm": 1.538678765296936, "learning_rate": 9.82278947368421e-05, "loss": 0.5299, "step": 26746 }, { "epoch": 1.4977601075148392, "grad_norm": 1.246418833732605, "learning_rate": 9.822763157894738e-05, "loss": 0.3696, "step": 26747 }, { "epoch": 1.4978161048269683, "grad_norm": 1.547446846961975, "learning_rate": 9.822736842105264e-05, "loss": 0.5213, "step": 26748 }, { "epoch": 1.4978721021390973, "grad_norm": 1.4471827745437622, "learning_rate": 9.82271052631579e-05, "loss": 0.3878, "step": 26749 }, { "epoch": 1.4979280994512263, "grad_norm": 1.4013447761535645, "learning_rate": 9.822684210526316e-05, "loss": 0.5327, "step": 26750 }, { "epoch": 1.4979840967633553, "grad_norm": 1.5289849042892456, "learning_rate": 9.822657894736843e-05, "loss": 0.488, "step": 26751 }, { "epoch": 1.4980400940754843, "grad_norm": 1.0098973512649536, "learning_rate": 9.822631578947369e-05, "loss": 0.3345, "step": 26752 }, { "epoch": 1.4980960913876133, "grad_norm": 1.0826987028121948, "learning_rate": 9.822605263157896e-05, "loss": 0.5749, "step": 26753 }, { "epoch": 1.4981520886997424, "grad_norm": 1.5209635496139526, "learning_rate": 9.822578947368421e-05, "loss": 0.4544, "step": 26754 }, { "epoch": 1.4982080860118714, "grad_norm": 2.081101417541504, "learning_rate": 9.822552631578947e-05, "loss": 0.8189, "step": 26755 }, { "epoch": 1.4982640833240004, "grad_norm": 1.442839503288269, "learning_rate": 9.822526315789474e-05, "loss": 0.4608, "step": 26756 }, { "epoch": 1.4983200806361294, "grad_norm": 1.606909990310669, "learning_rate": 9.8225e-05, "loss": 0.4206, "step": 26757 }, { "epoch": 1.4983760779482584, "grad_norm": 1.4205514192581177, "learning_rate": 9.822473684210528e-05, "loss": 0.4728, "step": 26758 }, { "epoch": 1.4984320752603875, "grad_norm": 1.2935292720794678, "learning_rate": 9.822447368421052e-05, "loss": 0.4086, "step": 26759 }, { "epoch": 1.4984880725725165, "grad_norm": 1.299617052078247, "learning_rate": 9.82242105263158e-05, "loss": 0.483, "step": 26760 }, { "epoch": 1.4985440698846455, "grad_norm": 1.3414956331253052, "learning_rate": 9.822394736842105e-05, "loss": 0.3733, "step": 26761 }, { "epoch": 1.4986000671967745, "grad_norm": 1.2331242561340332, "learning_rate": 9.822368421052633e-05, "loss": 0.4835, "step": 26762 }, { "epoch": 1.4986560645089035, "grad_norm": 1.2244077920913696, "learning_rate": 9.822342105263159e-05, "loss": 0.3698, "step": 26763 }, { "epoch": 1.4987120618210326, "grad_norm": 1.3659173250198364, "learning_rate": 9.822315789473685e-05, "loss": 0.3211, "step": 26764 }, { "epoch": 1.4987680591331616, "grad_norm": 1.341306447982788, "learning_rate": 9.822289473684211e-05, "loss": 0.4194, "step": 26765 }, { "epoch": 1.4988240564452906, "grad_norm": 1.5292794704437256, "learning_rate": 9.822263157894738e-05, "loss": 0.464, "step": 26766 }, { "epoch": 1.4988800537574196, "grad_norm": 1.1992926597595215, "learning_rate": 9.822236842105264e-05, "loss": 0.4708, "step": 26767 }, { "epoch": 1.4989360510695486, "grad_norm": 1.3847891092300415, "learning_rate": 9.82221052631579e-05, "loss": 0.4973, "step": 26768 }, { "epoch": 1.4989920483816777, "grad_norm": 1.101475715637207, "learning_rate": 9.822184210526316e-05, "loss": 0.4568, "step": 26769 }, { "epoch": 1.4990480456938067, "grad_norm": 1.3191195726394653, "learning_rate": 9.822157894736843e-05, "loss": 0.3405, "step": 26770 }, { "epoch": 1.4991040430059357, "grad_norm": 1.291374921798706, "learning_rate": 9.822131578947369e-05, "loss": 0.4529, "step": 26771 }, { "epoch": 1.4991600403180647, "grad_norm": 1.3544799089431763, "learning_rate": 9.822105263157895e-05, "loss": 0.4364, "step": 26772 }, { "epoch": 1.4992160376301937, "grad_norm": 1.2974934577941895, "learning_rate": 9.822078947368421e-05, "loss": 0.407, "step": 26773 }, { "epoch": 1.4992720349423228, "grad_norm": 1.4829468727111816, "learning_rate": 9.822052631578947e-05, "loss": 0.4433, "step": 26774 }, { "epoch": 1.4993280322544518, "grad_norm": 2.106544017791748, "learning_rate": 9.822026315789475e-05, "loss": 0.4644, "step": 26775 }, { "epoch": 1.4993840295665808, "grad_norm": 1.395676612854004, "learning_rate": 9.822e-05, "loss": 0.4148, "step": 26776 }, { "epoch": 1.4994400268787098, "grad_norm": 1.3502802848815918, "learning_rate": 9.821973684210526e-05, "loss": 0.6398, "step": 26777 }, { "epoch": 1.4994960241908388, "grad_norm": 1.4034233093261719, "learning_rate": 9.821947368421052e-05, "loss": 0.6107, "step": 26778 }, { "epoch": 1.4995520215029678, "grad_norm": 1.351454257965088, "learning_rate": 9.82192105263158e-05, "loss": 0.4908, "step": 26779 }, { "epoch": 1.4996080188150969, "grad_norm": 1.7202394008636475, "learning_rate": 9.821894736842106e-05, "loss": 0.5555, "step": 26780 }, { "epoch": 1.4996640161272259, "grad_norm": 1.1028008460998535, "learning_rate": 9.821868421052632e-05, "loss": 0.3848, "step": 26781 }, { "epoch": 1.499720013439355, "grad_norm": 1.5374046564102173, "learning_rate": 9.821842105263158e-05, "loss": 0.4258, "step": 26782 }, { "epoch": 1.499776010751484, "grad_norm": 1.2351723909378052, "learning_rate": 9.821815789473685e-05, "loss": 0.4447, "step": 26783 }, { "epoch": 1.499832008063613, "grad_norm": 1.3446274995803833, "learning_rate": 9.821789473684211e-05, "loss": 0.4953, "step": 26784 }, { "epoch": 1.499888005375742, "grad_norm": 1.412572979927063, "learning_rate": 9.821763157894738e-05, "loss": 0.4591, "step": 26785 }, { "epoch": 1.499944002687871, "grad_norm": 1.1691511869430542, "learning_rate": 9.821736842105263e-05, "loss": 0.4391, "step": 26786 }, { "epoch": 1.5, "grad_norm": 1.2498557567596436, "learning_rate": 9.82171052631579e-05, "loss": 0.4797, "step": 26787 }, { "epoch": 1.500055997312129, "grad_norm": 1.5805020332336426, "learning_rate": 9.821684210526316e-05, "loss": 0.474, "step": 26788 }, { "epoch": 1.500111994624258, "grad_norm": 1.2805150747299194, "learning_rate": 9.821657894736842e-05, "loss": 0.3752, "step": 26789 }, { "epoch": 1.500167991936387, "grad_norm": 1.155444622039795, "learning_rate": 9.82163157894737e-05, "loss": 0.382, "step": 26790 }, { "epoch": 1.500223989248516, "grad_norm": 1.1691333055496216, "learning_rate": 9.821605263157894e-05, "loss": 0.3829, "step": 26791 }, { "epoch": 1.500279986560645, "grad_norm": 1.4243505001068115, "learning_rate": 9.821578947368421e-05, "loss": 0.5008, "step": 26792 }, { "epoch": 1.5003359838727741, "grad_norm": 1.4051177501678467, "learning_rate": 9.821552631578947e-05, "loss": 0.622, "step": 26793 }, { "epoch": 1.5003919811849031, "grad_norm": 1.3862048387527466, "learning_rate": 9.821526315789475e-05, "loss": 0.4147, "step": 26794 }, { "epoch": 1.5004479784970322, "grad_norm": 1.3812639713287354, "learning_rate": 9.821500000000001e-05, "loss": 0.5005, "step": 26795 }, { "epoch": 1.5005039758091612, "grad_norm": 1.407889485359192, "learning_rate": 9.821473684210527e-05, "loss": 0.5874, "step": 26796 }, { "epoch": 1.5005599731212902, "grad_norm": 1.387760043144226, "learning_rate": 9.821447368421053e-05, "loss": 0.4919, "step": 26797 }, { "epoch": 1.5006159704334192, "grad_norm": 1.3240712881088257, "learning_rate": 9.82142105263158e-05, "loss": 0.4559, "step": 26798 }, { "epoch": 1.5006719677455482, "grad_norm": 2.1726765632629395, "learning_rate": 9.821394736842106e-05, "loss": 0.5946, "step": 26799 }, { "epoch": 1.5007279650576772, "grad_norm": 1.0934633016586304, "learning_rate": 9.821368421052632e-05, "loss": 0.3353, "step": 26800 }, { "epoch": 1.5007839623698063, "grad_norm": 1.3092851638793945, "learning_rate": 9.821342105263158e-05, "loss": 0.5126, "step": 26801 }, { "epoch": 1.5008399596819353, "grad_norm": 1.3932744264602661, "learning_rate": 9.821315789473685e-05, "loss": 0.4586, "step": 26802 }, { "epoch": 1.5008959569940643, "grad_norm": 1.3432908058166504, "learning_rate": 9.821289473684211e-05, "loss": 0.4227, "step": 26803 }, { "epoch": 1.5009519543061933, "grad_norm": 1.2111737728118896, "learning_rate": 9.821263157894737e-05, "loss": 0.4225, "step": 26804 }, { "epoch": 1.5010079516183223, "grad_norm": 1.2058987617492676, "learning_rate": 9.821236842105263e-05, "loss": 0.3126, "step": 26805 }, { "epoch": 1.5010639489304514, "grad_norm": 1.4938188791275024, "learning_rate": 9.821210526315789e-05, "loss": 0.5548, "step": 26806 }, { "epoch": 1.5011199462425804, "grad_norm": 1.2865266799926758, "learning_rate": 9.821184210526316e-05, "loss": 0.4605, "step": 26807 }, { "epoch": 1.5011759435547094, "grad_norm": 1.329338550567627, "learning_rate": 9.821157894736842e-05, "loss": 0.4606, "step": 26808 }, { "epoch": 1.5012319408668384, "grad_norm": 1.2825850248336792, "learning_rate": 9.821131578947368e-05, "loss": 0.4284, "step": 26809 }, { "epoch": 1.5012879381789674, "grad_norm": 1.5459418296813965, "learning_rate": 9.821105263157894e-05, "loss": 0.6218, "step": 26810 }, { "epoch": 1.5013439354910965, "grad_norm": 1.3349864482879639, "learning_rate": 9.821078947368422e-05, "loss": 0.3773, "step": 26811 }, { "epoch": 1.5013999328032255, "grad_norm": 1.8708873987197876, "learning_rate": 9.821052631578948e-05, "loss": 0.4553, "step": 26812 }, { "epoch": 1.5014559301153545, "grad_norm": 1.0955673456192017, "learning_rate": 9.821026315789475e-05, "loss": 0.3634, "step": 26813 }, { "epoch": 1.5015119274274835, "grad_norm": 1.1382811069488525, "learning_rate": 9.821e-05, "loss": 0.4818, "step": 26814 }, { "epoch": 1.5015679247396125, "grad_norm": 1.322993516921997, "learning_rate": 9.820973684210527e-05, "loss": 0.4008, "step": 26815 }, { "epoch": 1.5016239220517416, "grad_norm": 1.3150700330734253, "learning_rate": 9.820947368421053e-05, "loss": 0.535, "step": 26816 }, { "epoch": 1.5016799193638706, "grad_norm": 1.4436053037643433, "learning_rate": 9.82092105263158e-05, "loss": 0.4855, "step": 26817 }, { "epoch": 1.5017359166759996, "grad_norm": 1.3064913749694824, "learning_rate": 9.820894736842106e-05, "loss": 0.4284, "step": 26818 }, { "epoch": 1.5017919139881286, "grad_norm": 1.1609876155853271, "learning_rate": 9.820868421052632e-05, "loss": 0.3699, "step": 26819 }, { "epoch": 1.5018479113002576, "grad_norm": 1.4147918224334717, "learning_rate": 9.820842105263158e-05, "loss": 0.5225, "step": 26820 }, { "epoch": 1.5019039086123867, "grad_norm": 1.2616310119628906, "learning_rate": 9.820815789473686e-05, "loss": 0.3796, "step": 26821 }, { "epoch": 1.5019599059245157, "grad_norm": 2.5908286571502686, "learning_rate": 9.820789473684211e-05, "loss": 0.5921, "step": 26822 }, { "epoch": 1.5020159032366447, "grad_norm": 1.4717801809310913, "learning_rate": 9.820763157894736e-05, "loss": 0.4755, "step": 26823 }, { "epoch": 1.5020719005487737, "grad_norm": 1.6030758619308472, "learning_rate": 9.820736842105263e-05, "loss": 0.5514, "step": 26824 }, { "epoch": 1.5021278978609027, "grad_norm": 1.283334493637085, "learning_rate": 9.82071052631579e-05, "loss": 0.5847, "step": 26825 }, { "epoch": 1.5021838951730317, "grad_norm": 1.3323065042495728, "learning_rate": 9.820684210526317e-05, "loss": 0.4857, "step": 26826 }, { "epoch": 1.5022398924851608, "grad_norm": 1.284218192100525, "learning_rate": 9.820657894736843e-05, "loss": 0.4279, "step": 26827 }, { "epoch": 1.5022958897972898, "grad_norm": 1.5113115310668945, "learning_rate": 9.820631578947369e-05, "loss": 0.4652, "step": 26828 }, { "epoch": 1.5023518871094188, "grad_norm": 1.7711883783340454, "learning_rate": 9.820605263157895e-05, "loss": 0.5026, "step": 26829 }, { "epoch": 1.5024078844215478, "grad_norm": 1.3830803632736206, "learning_rate": 9.820578947368422e-05, "loss": 0.4605, "step": 26830 }, { "epoch": 1.5024638817336768, "grad_norm": 1.2874218225479126, "learning_rate": 9.820552631578948e-05, "loss": 0.4895, "step": 26831 }, { "epoch": 1.5025198790458059, "grad_norm": 1.2400010824203491, "learning_rate": 9.820526315789474e-05, "loss": 0.3865, "step": 26832 }, { "epoch": 1.5025758763579349, "grad_norm": 1.394715666770935, "learning_rate": 9.8205e-05, "loss": 0.5973, "step": 26833 }, { "epoch": 1.502631873670064, "grad_norm": 1.2617863416671753, "learning_rate": 9.820473684210527e-05, "loss": 0.4719, "step": 26834 }, { "epoch": 1.502687870982193, "grad_norm": NaN, "learning_rate": 9.820473684210527e-05, "loss": 0.4233, "step": 26835 }, { "epoch": 1.502743868294322, "grad_norm": 1.1229867935180664, "learning_rate": 9.820447368421053e-05, "loss": 0.4105, "step": 26836 }, { "epoch": 1.502799865606451, "grad_norm": 1.460927963256836, "learning_rate": 9.820421052631579e-05, "loss": 0.5992, "step": 26837 }, { "epoch": 1.50285586291858, "grad_norm": 1.4900665283203125, "learning_rate": 9.820394736842105e-05, "loss": 0.5054, "step": 26838 }, { "epoch": 1.502911860230709, "grad_norm": 1.3573169708251953, "learning_rate": 9.820368421052632e-05, "loss": 0.5846, "step": 26839 }, { "epoch": 1.502967857542838, "grad_norm": 1.4125213623046875, "learning_rate": 9.820342105263158e-05, "loss": 0.4359, "step": 26840 }, { "epoch": 1.503023854854967, "grad_norm": 1.2597665786743164, "learning_rate": 9.820315789473684e-05, "loss": 0.4514, "step": 26841 }, { "epoch": 1.503079852167096, "grad_norm": 1.8424570560455322, "learning_rate": 9.82028947368421e-05, "loss": 0.6751, "step": 26842 }, { "epoch": 1.503135849479225, "grad_norm": 1.2865324020385742, "learning_rate": 9.820263157894736e-05, "loss": 0.4469, "step": 26843 }, { "epoch": 1.503191846791354, "grad_norm": 1.4428001642227173, "learning_rate": 9.820236842105264e-05, "loss": 0.5853, "step": 26844 }, { "epoch": 1.503247844103483, "grad_norm": 1.3592240810394287, "learning_rate": 9.82021052631579e-05, "loss": 0.4209, "step": 26845 }, { "epoch": 1.5033038414156121, "grad_norm": 1.2329879999160767, "learning_rate": 9.820184210526317e-05, "loss": 0.564, "step": 26846 }, { "epoch": 1.5033598387277411, "grad_norm": 1.3990342617034912, "learning_rate": 9.820157894736842e-05, "loss": 0.5239, "step": 26847 }, { "epoch": 1.5034158360398702, "grad_norm": 1.3297662734985352, "learning_rate": 9.820131578947369e-05, "loss": 0.4364, "step": 26848 }, { "epoch": 1.5034718333519992, "grad_norm": 1.827245831489563, "learning_rate": 9.820105263157895e-05, "loss": 0.5855, "step": 26849 }, { "epoch": 1.5035278306641282, "grad_norm": 1.5831985473632812, "learning_rate": 9.820078947368422e-05, "loss": 0.5982, "step": 26850 }, { "epoch": 1.5035838279762572, "grad_norm": 1.1328448057174683, "learning_rate": 9.820052631578948e-05, "loss": 0.4677, "step": 26851 }, { "epoch": 1.5036398252883862, "grad_norm": 1.227616310119629, "learning_rate": 9.820026315789474e-05, "loss": 0.5185, "step": 26852 }, { "epoch": 1.5036958226005153, "grad_norm": 1.471461296081543, "learning_rate": 9.82e-05, "loss": 0.4526, "step": 26853 }, { "epoch": 1.5037518199126443, "grad_norm": 1.071116328239441, "learning_rate": 9.819973684210527e-05, "loss": 0.3713, "step": 26854 }, { "epoch": 1.5038078172247733, "grad_norm": 1.463884949684143, "learning_rate": 9.819947368421053e-05, "loss": 0.5586, "step": 26855 }, { "epoch": 1.5038638145369023, "grad_norm": 1.183410406112671, "learning_rate": 9.81992105263158e-05, "loss": 0.4291, "step": 26856 }, { "epoch": 1.5039198118490313, "grad_norm": 1.2246249914169312, "learning_rate": 9.819894736842105e-05, "loss": 0.4141, "step": 26857 }, { "epoch": 1.5039758091611604, "grad_norm": 1.3548663854599, "learning_rate": 9.819868421052631e-05, "loss": 0.6363, "step": 26858 }, { "epoch": 1.5040318064732894, "grad_norm": 1.112684726715088, "learning_rate": 9.819842105263159e-05, "loss": 0.4017, "step": 26859 }, { "epoch": 1.5040878037854184, "grad_norm": 1.1287680864334106, "learning_rate": 9.819815789473685e-05, "loss": 0.3189, "step": 26860 }, { "epoch": 1.5041438010975474, "grad_norm": 1.3464311361312866, "learning_rate": 9.81978947368421e-05, "loss": 0.4012, "step": 26861 }, { "epoch": 1.5041997984096764, "grad_norm": 1.1868360042572021, "learning_rate": 9.819763157894737e-05, "loss": 0.3788, "step": 26862 }, { "epoch": 1.5042557957218055, "grad_norm": 1.2991905212402344, "learning_rate": 9.819736842105264e-05, "loss": 0.4808, "step": 26863 }, { "epoch": 1.5043117930339345, "grad_norm": 1.4344667196273804, "learning_rate": 9.81971052631579e-05, "loss": 0.5092, "step": 26864 }, { "epoch": 1.5043677903460635, "grad_norm": 1.2147564888000488, "learning_rate": 9.819684210526316e-05, "loss": 0.4904, "step": 26865 }, { "epoch": 1.5044237876581925, "grad_norm": 1.4568026065826416, "learning_rate": 9.819657894736842e-05, "loss": 0.5145, "step": 26866 }, { "epoch": 1.5044797849703215, "grad_norm": 1.2571946382522583, "learning_rate": 9.819631578947369e-05, "loss": 0.4085, "step": 26867 }, { "epoch": 1.5045357822824506, "grad_norm": 1.4784843921661377, "learning_rate": 9.819605263157895e-05, "loss": 0.5063, "step": 26868 }, { "epoch": 1.5045917795945796, "grad_norm": 1.479533314704895, "learning_rate": 9.819578947368423e-05, "loss": 0.4835, "step": 26869 }, { "epoch": 1.5046477769067086, "grad_norm": 1.1925956010818481, "learning_rate": 9.819552631578947e-05, "loss": 0.4318, "step": 26870 }, { "epoch": 1.5047037742188376, "grad_norm": 1.4827125072479248, "learning_rate": 9.819526315789474e-05, "loss": 0.64, "step": 26871 }, { "epoch": 1.5047597715309666, "grad_norm": 1.2384952306747437, "learning_rate": 9.8195e-05, "loss": 0.4034, "step": 26872 }, { "epoch": 1.5048157688430956, "grad_norm": 1.070129156112671, "learning_rate": 9.819473684210528e-05, "loss": 0.3833, "step": 26873 }, { "epoch": 1.5048717661552247, "grad_norm": 1.3075287342071533, "learning_rate": 9.819447368421054e-05, "loss": 0.4432, "step": 26874 }, { "epoch": 1.5049277634673537, "grad_norm": 1.3838526010513306, "learning_rate": 9.81942105263158e-05, "loss": 0.4524, "step": 26875 }, { "epoch": 1.5049837607794827, "grad_norm": 1.6466572284698486, "learning_rate": 9.819394736842106e-05, "loss": 0.5443, "step": 26876 }, { "epoch": 1.5050397580916117, "grad_norm": 1.976815104484558, "learning_rate": 9.819368421052632e-05, "loss": 0.5765, "step": 26877 }, { "epoch": 1.5050957554037407, "grad_norm": 1.3899425268173218, "learning_rate": 9.819342105263159e-05, "loss": 0.4691, "step": 26878 }, { "epoch": 1.5051517527158698, "grad_norm": 1.2887729406356812, "learning_rate": 9.819315789473684e-05, "loss": 0.5758, "step": 26879 }, { "epoch": 1.5052077500279988, "grad_norm": 1.201016902923584, "learning_rate": 9.819289473684211e-05, "loss": 0.3569, "step": 26880 }, { "epoch": 1.5052637473401278, "grad_norm": 1.3866976499557495, "learning_rate": 9.819263157894737e-05, "loss": 0.4409, "step": 26881 }, { "epoch": 1.5053197446522568, "grad_norm": 1.2868051528930664, "learning_rate": 9.819236842105264e-05, "loss": 0.4047, "step": 26882 }, { "epoch": 1.5053757419643858, "grad_norm": 2.1382479667663574, "learning_rate": 9.81921052631579e-05, "loss": 0.4798, "step": 26883 }, { "epoch": 1.5054317392765149, "grad_norm": 1.374638319015503, "learning_rate": 9.819184210526316e-05, "loss": 0.439, "step": 26884 }, { "epoch": 1.5054877365886439, "grad_norm": 1.6410070657730103, "learning_rate": 9.819157894736842e-05, "loss": 0.5119, "step": 26885 }, { "epoch": 1.505543733900773, "grad_norm": 1.246012568473816, "learning_rate": 9.81913157894737e-05, "loss": 0.4355, "step": 26886 }, { "epoch": 1.505599731212902, "grad_norm": 1.2205312252044678, "learning_rate": 9.819105263157895e-05, "loss": 0.4992, "step": 26887 }, { "epoch": 1.505655728525031, "grad_norm": 1.5243549346923828, "learning_rate": 9.819078947368421e-05, "loss": 0.5899, "step": 26888 }, { "epoch": 1.50571172583716, "grad_norm": 1.6237629652023315, "learning_rate": 9.819052631578947e-05, "loss": 0.4594, "step": 26889 }, { "epoch": 1.505767723149289, "grad_norm": 1.3966513872146606, "learning_rate": 9.819026315789475e-05, "loss": 0.4631, "step": 26890 }, { "epoch": 1.505823720461418, "grad_norm": 1.6498286724090576, "learning_rate": 9.819000000000001e-05, "loss": 0.4875, "step": 26891 }, { "epoch": 1.505879717773547, "grad_norm": 1.6096594333648682, "learning_rate": 9.818973684210527e-05, "loss": 0.3638, "step": 26892 }, { "epoch": 1.505935715085676, "grad_norm": 1.3800125122070312, "learning_rate": 9.818947368421053e-05, "loss": 0.4304, "step": 26893 }, { "epoch": 1.505991712397805, "grad_norm": 1.5419855117797852, "learning_rate": 9.818921052631579e-05, "loss": 0.4609, "step": 26894 }, { "epoch": 1.506047709709934, "grad_norm": 1.3288782835006714, "learning_rate": 9.818894736842106e-05, "loss": 0.4868, "step": 26895 }, { "epoch": 1.506103707022063, "grad_norm": 1.3769283294677734, "learning_rate": 9.818868421052632e-05, "loss": 0.4556, "step": 26896 }, { "epoch": 1.506159704334192, "grad_norm": 1.2915641069412231, "learning_rate": 9.818842105263158e-05, "loss": 0.513, "step": 26897 }, { "epoch": 1.5062157016463211, "grad_norm": 1.331145167350769, "learning_rate": 9.818815789473684e-05, "loss": 0.5951, "step": 26898 }, { "epoch": 1.5062716989584501, "grad_norm": 1.5310689210891724, "learning_rate": 9.818789473684211e-05, "loss": 0.5239, "step": 26899 }, { "epoch": 1.5063276962705792, "grad_norm": 1.2608551979064941, "learning_rate": 9.818763157894737e-05, "loss": 0.4522, "step": 26900 }, { "epoch": 1.5063836935827082, "grad_norm": 1.5724152326583862, "learning_rate": 9.818736842105264e-05, "loss": 0.5246, "step": 26901 }, { "epoch": 1.5064396908948372, "grad_norm": 1.3005868196487427, "learning_rate": 9.818710526315789e-05, "loss": 0.379, "step": 26902 }, { "epoch": 1.5064956882069662, "grad_norm": 1.192333459854126, "learning_rate": 9.818684210526316e-05, "loss": 0.3715, "step": 26903 }, { "epoch": 1.5065516855190952, "grad_norm": 1.3528265953063965, "learning_rate": 9.818657894736842e-05, "loss": 0.3949, "step": 26904 }, { "epoch": 1.5066076828312243, "grad_norm": 1.7287955284118652, "learning_rate": 9.81863157894737e-05, "loss": 0.3794, "step": 26905 }, { "epoch": 1.5066636801433533, "grad_norm": 2.0118093490600586, "learning_rate": 9.818605263157896e-05, "loss": 0.569, "step": 26906 }, { "epoch": 1.5067196774554823, "grad_norm": 1.28496515750885, "learning_rate": 9.818578947368422e-05, "loss": 0.4252, "step": 26907 }, { "epoch": 1.5067756747676113, "grad_norm": 1.237634301185608, "learning_rate": 9.818552631578948e-05, "loss": 0.4065, "step": 26908 }, { "epoch": 1.5068316720797403, "grad_norm": 1.317826509475708, "learning_rate": 9.818526315789475e-05, "loss": 0.4356, "step": 26909 }, { "epoch": 1.5068876693918694, "grad_norm": 1.404542326927185, "learning_rate": 9.818500000000001e-05, "loss": 0.5111, "step": 26910 }, { "epoch": 1.5069436667039984, "grad_norm": 1.5804247856140137, "learning_rate": 9.818473684210527e-05, "loss": 0.4187, "step": 26911 }, { "epoch": 1.5069996640161272, "grad_norm": 1.4498772621154785, "learning_rate": 9.818447368421053e-05, "loss": 0.5616, "step": 26912 }, { "epoch": 1.5070556613282562, "grad_norm": 1.2389779090881348, "learning_rate": 9.818421052631579e-05, "loss": 0.4663, "step": 26913 }, { "epoch": 1.5071116586403852, "grad_norm": 1.4195177555084229, "learning_rate": 9.818394736842106e-05, "loss": 0.5183, "step": 26914 }, { "epoch": 1.5071676559525142, "grad_norm": 1.356787919998169, "learning_rate": 9.818368421052632e-05, "loss": 0.3849, "step": 26915 }, { "epoch": 1.5072236532646432, "grad_norm": 1.1957062482833862, "learning_rate": 9.818342105263158e-05, "loss": 0.402, "step": 26916 }, { "epoch": 1.5072796505767723, "grad_norm": 1.26849365234375, "learning_rate": 9.818315789473684e-05, "loss": 0.538, "step": 26917 }, { "epoch": 1.5073356478889013, "grad_norm": 1.2174674272537231, "learning_rate": 9.818289473684211e-05, "loss": 0.4348, "step": 26918 }, { "epoch": 1.5073916452010303, "grad_norm": 1.4033805131912231, "learning_rate": 9.818263157894737e-05, "loss": 0.3808, "step": 26919 }, { "epoch": 1.5074476425131593, "grad_norm": 1.1879520416259766, "learning_rate": 9.818236842105263e-05, "loss": 0.5138, "step": 26920 }, { "epoch": 1.5075036398252883, "grad_norm": 1.3620471954345703, "learning_rate": 9.81821052631579e-05, "loss": 0.4985, "step": 26921 }, { "epoch": 1.5075596371374174, "grad_norm": 1.1453810930252075, "learning_rate": 9.818184210526317e-05, "loss": 0.4237, "step": 26922 }, { "epoch": 1.5076156344495464, "grad_norm": 18.394256591796875, "learning_rate": 9.818157894736843e-05, "loss": 0.5131, "step": 26923 }, { "epoch": 1.5076716317616754, "grad_norm": 1.3331096172332764, "learning_rate": 9.81813157894737e-05, "loss": 0.4732, "step": 26924 }, { "epoch": 1.5077276290738044, "grad_norm": 1.2102887630462646, "learning_rate": 9.818105263157895e-05, "loss": 0.4682, "step": 26925 }, { "epoch": 1.5077836263859334, "grad_norm": 1.5214927196502686, "learning_rate": 9.818078947368422e-05, "loss": 0.4753, "step": 26926 }, { "epoch": 1.5078396236980625, "grad_norm": 1.2080925703048706, "learning_rate": 9.818052631578948e-05, "loss": 0.4148, "step": 26927 }, { "epoch": 1.5078956210101915, "grad_norm": 7.116842269897461, "learning_rate": 9.818026315789474e-05, "loss": 0.4787, "step": 26928 }, { "epoch": 1.5079516183223205, "grad_norm": 1.38553786277771, "learning_rate": 9.818000000000001e-05, "loss": 0.4541, "step": 26929 }, { "epoch": 1.5080076156344495, "grad_norm": 3.073695659637451, "learning_rate": 9.817973684210526e-05, "loss": 0.4254, "step": 26930 }, { "epoch": 1.5080636129465785, "grad_norm": 1.1337478160858154, "learning_rate": 9.817947368421053e-05, "loss": 0.3835, "step": 26931 }, { "epoch": 1.5081196102587076, "grad_norm": 1.1407872438430786, "learning_rate": 9.817921052631579e-05, "loss": 0.3578, "step": 26932 }, { "epoch": 1.5081756075708366, "grad_norm": 1.1704356670379639, "learning_rate": 9.817894736842106e-05, "loss": 0.3821, "step": 26933 }, { "epoch": 1.5082316048829656, "grad_norm": 1.244095802307129, "learning_rate": 9.817868421052631e-05, "loss": 0.4444, "step": 26934 }, { "epoch": 1.5082876021950946, "grad_norm": 1.176902413368225, "learning_rate": 9.817842105263158e-05, "loss": 0.389, "step": 26935 }, { "epoch": 1.5083435995072236, "grad_norm": 1.5219768285751343, "learning_rate": 9.817815789473684e-05, "loss": 0.4255, "step": 26936 }, { "epoch": 1.5083995968193527, "grad_norm": 1.2690356969833374, "learning_rate": 9.817789473684212e-05, "loss": 0.4926, "step": 26937 }, { "epoch": 1.5084555941314817, "grad_norm": 1.209762454032898, "learning_rate": 9.817763157894738e-05, "loss": 0.4703, "step": 26938 }, { "epoch": 1.5085115914436107, "grad_norm": 1.5581369400024414, "learning_rate": 9.817736842105264e-05, "loss": 0.4605, "step": 26939 }, { "epoch": 1.5085675887557397, "grad_norm": 1.1573799848556519, "learning_rate": 9.81771052631579e-05, "loss": 0.4106, "step": 26940 }, { "epoch": 1.5086235860678687, "grad_norm": 1.1588919162750244, "learning_rate": 9.817684210526317e-05, "loss": 0.5242, "step": 26941 }, { "epoch": 1.5086795833799977, "grad_norm": 1.4145991802215576, "learning_rate": 9.817657894736843e-05, "loss": 0.4363, "step": 26942 }, { "epoch": 1.5087355806921268, "grad_norm": 1.0535063743591309, "learning_rate": 9.817631578947369e-05, "loss": 0.3595, "step": 26943 }, { "epoch": 1.5087915780042558, "grad_norm": 1.281191349029541, "learning_rate": 9.817605263157895e-05, "loss": 0.4164, "step": 26944 }, { "epoch": 1.5088475753163848, "grad_norm": 1.3760112524032593, "learning_rate": 9.817578947368421e-05, "loss": 0.459, "step": 26945 }, { "epoch": 1.5089035726285138, "grad_norm": 1.3487932682037354, "learning_rate": 9.817552631578948e-05, "loss": 0.4675, "step": 26946 }, { "epoch": 1.5089595699406428, "grad_norm": 1.274167776107788, "learning_rate": 9.817526315789474e-05, "loss": 0.4668, "step": 26947 }, { "epoch": 1.5090155672527719, "grad_norm": 1.2175053358078003, "learning_rate": 9.8175e-05, "loss": 0.6446, "step": 26948 }, { "epoch": 1.5090715645649009, "grad_norm": 1.1962106227874756, "learning_rate": 9.817473684210526e-05, "loss": 0.4663, "step": 26949 }, { "epoch": 1.50912756187703, "grad_norm": 7.010622024536133, "learning_rate": 9.817447368421053e-05, "loss": 0.2958, "step": 26950 }, { "epoch": 1.509183559189159, "grad_norm": 1.4108219146728516, "learning_rate": 9.81742105263158e-05, "loss": 0.4266, "step": 26951 }, { "epoch": 1.509239556501288, "grad_norm": 1.3778231143951416, "learning_rate": 9.817394736842105e-05, "loss": 0.5832, "step": 26952 }, { "epoch": 1.509295553813417, "grad_norm": 1.2143008708953857, "learning_rate": 9.817368421052631e-05, "loss": 0.49, "step": 26953 }, { "epoch": 1.509351551125546, "grad_norm": 1.4713767766952515, "learning_rate": 9.817342105263159e-05, "loss": 0.4437, "step": 26954 }, { "epoch": 1.509407548437675, "grad_norm": 1.3192007541656494, "learning_rate": 9.817315789473685e-05, "loss": 0.4068, "step": 26955 }, { "epoch": 1.509463545749804, "grad_norm": 1.194394826889038, "learning_rate": 9.817289473684212e-05, "loss": 0.4051, "step": 26956 }, { "epoch": 1.509519543061933, "grad_norm": 1.1465498208999634, "learning_rate": 9.817263157894737e-05, "loss": 0.4345, "step": 26957 }, { "epoch": 1.509575540374062, "grad_norm": 1.186548113822937, "learning_rate": 9.817236842105264e-05, "loss": 0.4653, "step": 26958 }, { "epoch": 1.509631537686191, "grad_norm": 1.673143744468689, "learning_rate": 9.81721052631579e-05, "loss": 0.8719, "step": 26959 }, { "epoch": 1.50968753499832, "grad_norm": 1.4337735176086426, "learning_rate": 9.817184210526317e-05, "loss": 0.4798, "step": 26960 }, { "epoch": 1.509743532310449, "grad_norm": 1.67737877368927, "learning_rate": 9.817157894736843e-05, "loss": 0.5259, "step": 26961 }, { "epoch": 1.5097995296225781, "grad_norm": 1.0275202989578247, "learning_rate": 9.817131578947368e-05, "loss": 0.4168, "step": 26962 }, { "epoch": 1.5098555269347071, "grad_norm": 1.2809327840805054, "learning_rate": 9.817105263157895e-05, "loss": 0.3889, "step": 26963 }, { "epoch": 1.5099115242468362, "grad_norm": 1.1116302013397217, "learning_rate": 9.817078947368421e-05, "loss": 0.4027, "step": 26964 }, { "epoch": 1.5099675215589652, "grad_norm": 1.626907229423523, "learning_rate": 9.817052631578948e-05, "loss": 0.5272, "step": 26965 }, { "epoch": 1.5100235188710942, "grad_norm": 1.2781729698181152, "learning_rate": 9.817026315789474e-05, "loss": 0.5919, "step": 26966 }, { "epoch": 1.5100795161832232, "grad_norm": 1.2536100149154663, "learning_rate": 9.817e-05, "loss": 0.3703, "step": 26967 }, { "epoch": 1.5101355134953522, "grad_norm": 1.2065856456756592, "learning_rate": 9.816973684210526e-05, "loss": 0.4474, "step": 26968 }, { "epoch": 1.5101915108074813, "grad_norm": 1.3996970653533936, "learning_rate": 9.816947368421054e-05, "loss": 0.4775, "step": 26969 }, { "epoch": 1.5102475081196103, "grad_norm": 1.4324966669082642, "learning_rate": 9.81692105263158e-05, "loss": 0.4612, "step": 26970 }, { "epoch": 1.5103035054317393, "grad_norm": 1.1891260147094727, "learning_rate": 9.816894736842106e-05, "loss": 0.5276, "step": 26971 }, { "epoch": 1.5103595027438683, "grad_norm": 1.230858564376831, "learning_rate": 9.816868421052632e-05, "loss": 0.592, "step": 26972 }, { "epoch": 1.5104155000559973, "grad_norm": 1.4232933521270752, "learning_rate": 9.816842105263159e-05, "loss": 0.499, "step": 26973 }, { "epoch": 1.5104714973681264, "grad_norm": 1.3536614179611206, "learning_rate": 9.816815789473685e-05, "loss": 0.3909, "step": 26974 }, { "epoch": 1.5105274946802554, "grad_norm": 2.4492759704589844, "learning_rate": 9.816789473684211e-05, "loss": 0.5383, "step": 26975 }, { "epoch": 1.5105834919923844, "grad_norm": 1.141347885131836, "learning_rate": 9.816763157894737e-05, "loss": 0.4357, "step": 26976 }, { "epoch": 1.5106394893045134, "grad_norm": 1.2195390462875366, "learning_rate": 9.816736842105264e-05, "loss": 0.5351, "step": 26977 }, { "epoch": 1.5106954866166424, "grad_norm": 1.432437539100647, "learning_rate": 9.81671052631579e-05, "loss": 0.6622, "step": 26978 }, { "epoch": 1.5107514839287715, "grad_norm": 1.1447179317474365, "learning_rate": 9.816684210526316e-05, "loss": 0.3834, "step": 26979 }, { "epoch": 1.5108074812409005, "grad_norm": 1.3678417205810547, "learning_rate": 9.816657894736842e-05, "loss": 0.5131, "step": 26980 }, { "epoch": 1.5108634785530295, "grad_norm": 1.1029322147369385, "learning_rate": 9.816631578947368e-05, "loss": 0.4077, "step": 26981 }, { "epoch": 1.5109194758651585, "grad_norm": 1.260974645614624, "learning_rate": 9.816605263157895e-05, "loss": 0.5159, "step": 26982 }, { "epoch": 1.5109754731772875, "grad_norm": 1.2160358428955078, "learning_rate": 9.816578947368421e-05, "loss": 0.4207, "step": 26983 }, { "epoch": 1.5110314704894166, "grad_norm": 1.2865164279937744, "learning_rate": 9.816552631578947e-05, "loss": 0.4125, "step": 26984 }, { "epoch": 1.5110874678015456, "grad_norm": 1.6537671089172363, "learning_rate": 9.816526315789473e-05, "loss": 0.6076, "step": 26985 }, { "epoch": 1.5111434651136746, "grad_norm": 1.1508857011795044, "learning_rate": 9.8165e-05, "loss": 0.4455, "step": 26986 }, { "epoch": 1.5111994624258036, "grad_norm": 1.6039947271347046, "learning_rate": 9.816473684210527e-05, "loss": 0.6007, "step": 26987 }, { "epoch": 1.5112554597379326, "grad_norm": 1.6592415571212769, "learning_rate": 9.816447368421054e-05, "loss": 0.5268, "step": 26988 }, { "epoch": 1.5113114570500616, "grad_norm": 1.210771083831787, "learning_rate": 9.816421052631579e-05, "loss": 0.4452, "step": 26989 }, { "epoch": 1.5113674543621907, "grad_norm": 1.40608549118042, "learning_rate": 9.816394736842106e-05, "loss": 0.4988, "step": 26990 }, { "epoch": 1.5114234516743197, "grad_norm": 2.0569067001342773, "learning_rate": 9.816368421052632e-05, "loss": 0.5539, "step": 26991 }, { "epoch": 1.5114794489864487, "grad_norm": 2.3576831817626953, "learning_rate": 9.816342105263159e-05, "loss": 0.6229, "step": 26992 }, { "epoch": 1.5115354462985777, "grad_norm": 1.2888567447662354, "learning_rate": 9.816315789473685e-05, "loss": 0.4938, "step": 26993 }, { "epoch": 1.5115914436107065, "grad_norm": 1.8300138711929321, "learning_rate": 9.816289473684211e-05, "loss": 0.6033, "step": 26994 }, { "epoch": 1.5116474409228355, "grad_norm": 1.290547251701355, "learning_rate": 9.816263157894737e-05, "loss": 0.4041, "step": 26995 }, { "epoch": 1.5117034382349646, "grad_norm": 1.5086348056793213, "learning_rate": 9.816236842105264e-05, "loss": 0.5601, "step": 26996 }, { "epoch": 1.5117594355470936, "grad_norm": 1.2090970277786255, "learning_rate": 9.81621052631579e-05, "loss": 0.4098, "step": 26997 }, { "epoch": 1.5118154328592226, "grad_norm": 1.125792384147644, "learning_rate": 9.816184210526316e-05, "loss": 0.3752, "step": 26998 }, { "epoch": 1.5118714301713516, "grad_norm": 1.3590054512023926, "learning_rate": 9.816157894736842e-05, "loss": 0.4998, "step": 26999 }, { "epoch": 1.5119274274834806, "grad_norm": 1.4590128660202026, "learning_rate": 9.816131578947368e-05, "loss": 0.4472, "step": 27000 }, { "epoch": 1.5119834247956097, "grad_norm": 1.3467227220535278, "learning_rate": 9.816105263157896e-05, "loss": 0.5055, "step": 27001 }, { "epoch": 1.5120394221077387, "grad_norm": 1.2745952606201172, "learning_rate": 9.816078947368422e-05, "loss": 0.5036, "step": 27002 }, { "epoch": 1.5120954194198677, "grad_norm": 1.2922078371047974, "learning_rate": 9.816052631578948e-05, "loss": 0.5639, "step": 27003 }, { "epoch": 1.5121514167319967, "grad_norm": 1.372616171836853, "learning_rate": 9.816026315789474e-05, "loss": 0.5246, "step": 27004 }, { "epoch": 1.5122074140441257, "grad_norm": 1.3429547548294067, "learning_rate": 9.816000000000001e-05, "loss": 0.419, "step": 27005 }, { "epoch": 1.5122634113562547, "grad_norm": 1.0865153074264526, "learning_rate": 9.815973684210527e-05, "loss": 0.3538, "step": 27006 }, { "epoch": 1.5123194086683838, "grad_norm": 1.3962829113006592, "learning_rate": 9.815947368421053e-05, "loss": 0.454, "step": 27007 }, { "epoch": 1.5123754059805128, "grad_norm": 1.2971525192260742, "learning_rate": 9.815921052631579e-05, "loss": 0.4568, "step": 27008 }, { "epoch": 1.5124314032926418, "grad_norm": 3.0007100105285645, "learning_rate": 9.815894736842106e-05, "loss": 0.3183, "step": 27009 }, { "epoch": 1.5124874006047708, "grad_norm": 1.372809648513794, "learning_rate": 9.815868421052632e-05, "loss": 0.5886, "step": 27010 }, { "epoch": 1.5125433979168998, "grad_norm": 1.3756669759750366, "learning_rate": 9.81584210526316e-05, "loss": 0.4924, "step": 27011 }, { "epoch": 1.5125993952290289, "grad_norm": 1.1371204853057861, "learning_rate": 9.815815789473684e-05, "loss": 0.3745, "step": 27012 }, { "epoch": 1.5126553925411579, "grad_norm": 1.4544603824615479, "learning_rate": 9.815789473684211e-05, "loss": 0.4664, "step": 27013 }, { "epoch": 1.512711389853287, "grad_norm": 1.2304860353469849, "learning_rate": 9.815763157894737e-05, "loss": 0.4754, "step": 27014 }, { "epoch": 1.512767387165416, "grad_norm": 1.2915074825286865, "learning_rate": 9.815736842105263e-05, "loss": 0.4477, "step": 27015 }, { "epoch": 1.512823384477545, "grad_norm": 2.008554697036743, "learning_rate": 9.81571052631579e-05, "loss": 0.7653, "step": 27016 }, { "epoch": 1.512879381789674, "grad_norm": 1.2905584573745728, "learning_rate": 9.815684210526315e-05, "loss": 0.4139, "step": 27017 }, { "epoch": 1.512935379101803, "grad_norm": 1.3069546222686768, "learning_rate": 9.815657894736843e-05, "loss": 0.5049, "step": 27018 }, { "epoch": 1.512991376413932, "grad_norm": 1.5047800540924072, "learning_rate": 9.815631578947369e-05, "loss": 0.4656, "step": 27019 }, { "epoch": 1.513047373726061, "grad_norm": 1.1808758974075317, "learning_rate": 9.815605263157896e-05, "loss": 0.3925, "step": 27020 }, { "epoch": 1.51310337103819, "grad_norm": 1.5783368349075317, "learning_rate": 9.815578947368422e-05, "loss": 0.4801, "step": 27021 }, { "epoch": 1.513159368350319, "grad_norm": 1.9102554321289062, "learning_rate": 9.815552631578948e-05, "loss": 0.5058, "step": 27022 }, { "epoch": 1.513215365662448, "grad_norm": 1.4686520099639893, "learning_rate": 9.815526315789474e-05, "loss": 0.5082, "step": 27023 }, { "epoch": 1.513271362974577, "grad_norm": 1.7134006023406982, "learning_rate": 9.815500000000001e-05, "loss": 0.5512, "step": 27024 }, { "epoch": 1.5133273602867061, "grad_norm": 1.4265936613082886, "learning_rate": 9.815473684210527e-05, "loss": 0.5363, "step": 27025 }, { "epoch": 1.5133833575988351, "grad_norm": 2.0115835666656494, "learning_rate": 9.815447368421053e-05, "loss": 0.5573, "step": 27026 }, { "epoch": 1.5134393549109642, "grad_norm": 1.341893196105957, "learning_rate": 9.815421052631579e-05, "loss": 0.5759, "step": 27027 }, { "epoch": 1.5134953522230932, "grad_norm": 4.912156581878662, "learning_rate": 9.815394736842106e-05, "loss": 0.5019, "step": 27028 }, { "epoch": 1.5135513495352222, "grad_norm": 1.7848292589187622, "learning_rate": 9.815368421052632e-05, "loss": 0.5088, "step": 27029 }, { "epoch": 1.5136073468473512, "grad_norm": 2.5112762451171875, "learning_rate": 9.815342105263158e-05, "loss": 0.5038, "step": 27030 }, { "epoch": 1.5136633441594802, "grad_norm": 1.1688530445098877, "learning_rate": 9.815315789473684e-05, "loss": 0.437, "step": 27031 }, { "epoch": 1.5137193414716092, "grad_norm": 1.1913703680038452, "learning_rate": 9.81528947368421e-05, "loss": 0.5217, "step": 27032 }, { "epoch": 1.5137753387837383, "grad_norm": 1.3893768787384033, "learning_rate": 9.815263157894738e-05, "loss": 0.4392, "step": 27033 }, { "epoch": 1.5138313360958673, "grad_norm": 1.5970072746276855, "learning_rate": 9.815236842105264e-05, "loss": 0.5538, "step": 27034 }, { "epoch": 1.5138873334079963, "grad_norm": 1.2803162336349487, "learning_rate": 9.81521052631579e-05, "loss": 0.4452, "step": 27035 }, { "epoch": 1.5139433307201253, "grad_norm": 1.2993170022964478, "learning_rate": 9.815184210526316e-05, "loss": 0.5912, "step": 27036 }, { "epoch": 1.5139993280322543, "grad_norm": 1.235471487045288, "learning_rate": 9.815157894736843e-05, "loss": 0.5795, "step": 27037 }, { "epoch": 1.5140553253443834, "grad_norm": 1.541257619857788, "learning_rate": 9.815131578947369e-05, "loss": 0.5014, "step": 27038 }, { "epoch": 1.5141113226565124, "grad_norm": 1.2752493619918823, "learning_rate": 9.815105263157895e-05, "loss": 0.4493, "step": 27039 }, { "epoch": 1.5141673199686414, "grad_norm": 1.2818028926849365, "learning_rate": 9.815078947368421e-05, "loss": 0.5289, "step": 27040 }, { "epoch": 1.5142233172807704, "grad_norm": 1.458085060119629, "learning_rate": 9.815052631578948e-05, "loss": 0.5055, "step": 27041 }, { "epoch": 1.5142793145928994, "grad_norm": 1.3541269302368164, "learning_rate": 9.815026315789474e-05, "loss": 0.447, "step": 27042 }, { "epoch": 1.5143353119050285, "grad_norm": 1.884742021560669, "learning_rate": 9.815000000000001e-05, "loss": 0.4487, "step": 27043 }, { "epoch": 1.5143913092171575, "grad_norm": 1.2877849340438843, "learning_rate": 9.814973684210526e-05, "loss": 0.5738, "step": 27044 }, { "epoch": 1.5144473065292865, "grad_norm": 1.541459560394287, "learning_rate": 9.814947368421053e-05, "loss": 0.5417, "step": 27045 }, { "epoch": 1.5145033038414155, "grad_norm": 1.1358022689819336, "learning_rate": 9.814921052631579e-05, "loss": 0.5055, "step": 27046 }, { "epoch": 1.5145593011535445, "grad_norm": 1.6166036128997803, "learning_rate": 9.814894736842107e-05, "loss": 0.3977, "step": 27047 }, { "epoch": 1.5146152984656736, "grad_norm": 1.3530287742614746, "learning_rate": 9.814868421052633e-05, "loss": 0.5103, "step": 27048 }, { "epoch": 1.5146712957778026, "grad_norm": 1.3396238088607788, "learning_rate": 9.814842105263157e-05, "loss": 0.3918, "step": 27049 }, { "epoch": 1.5147272930899316, "grad_norm": 1.286135196685791, "learning_rate": 9.814815789473685e-05, "loss": 0.3937, "step": 27050 }, { "epoch": 1.5147832904020606, "grad_norm": 1.4488623142242432, "learning_rate": 9.81478947368421e-05, "loss": 0.4194, "step": 27051 }, { "epoch": 1.5148392877141896, "grad_norm": 1.2895300388336182, "learning_rate": 9.814763157894738e-05, "loss": 0.4557, "step": 27052 }, { "epoch": 1.5148952850263186, "grad_norm": 1.3724356889724731, "learning_rate": 9.814736842105264e-05, "loss": 0.4399, "step": 27053 }, { "epoch": 1.5149512823384477, "grad_norm": 1.2225584983825684, "learning_rate": 9.81471052631579e-05, "loss": 0.4237, "step": 27054 }, { "epoch": 1.5150072796505767, "grad_norm": 1.3517637252807617, "learning_rate": 9.814684210526316e-05, "loss": 0.5057, "step": 27055 }, { "epoch": 1.5150632769627057, "grad_norm": 1.3071893453598022, "learning_rate": 9.814657894736843e-05, "loss": 0.4281, "step": 27056 }, { "epoch": 1.5151192742748347, "grad_norm": 1.7707455158233643, "learning_rate": 9.814631578947369e-05, "loss": 0.5988, "step": 27057 }, { "epoch": 1.5151752715869637, "grad_norm": 1.6087961196899414, "learning_rate": 9.814605263157895e-05, "loss": 0.6235, "step": 27058 }, { "epoch": 1.5152312688990928, "grad_norm": 1.6496331691741943, "learning_rate": 9.814578947368421e-05, "loss": 0.4991, "step": 27059 }, { "epoch": 1.5152872662112218, "grad_norm": 1.4258060455322266, "learning_rate": 9.814552631578948e-05, "loss": 0.4091, "step": 27060 }, { "epoch": 1.5153432635233508, "grad_norm": 1.502301812171936, "learning_rate": 9.814526315789474e-05, "loss": 0.4726, "step": 27061 }, { "epoch": 1.5153992608354798, "grad_norm": 1.0531821250915527, "learning_rate": 9.8145e-05, "loss": 0.4544, "step": 27062 }, { "epoch": 1.5154552581476088, "grad_norm": 1.3646159172058105, "learning_rate": 9.814473684210526e-05, "loss": 0.4808, "step": 27063 }, { "epoch": 1.5155112554597379, "grad_norm": 1.2903409004211426, "learning_rate": 9.814447368421054e-05, "loss": 0.455, "step": 27064 }, { "epoch": 1.5155672527718669, "grad_norm": 1.236262559890747, "learning_rate": 9.81442105263158e-05, "loss": 0.4925, "step": 27065 }, { "epoch": 1.515623250083996, "grad_norm": 1.5894114971160889, "learning_rate": 9.814394736842106e-05, "loss": 0.4423, "step": 27066 }, { "epoch": 1.515679247396125, "grad_norm": 1.3579305410385132, "learning_rate": 9.814368421052632e-05, "loss": 0.3986, "step": 27067 }, { "epoch": 1.515735244708254, "grad_norm": 1.482695460319519, "learning_rate": 9.814342105263157e-05, "loss": 0.4451, "step": 27068 }, { "epoch": 1.515791242020383, "grad_norm": 1.119757056236267, "learning_rate": 9.814315789473685e-05, "loss": 0.4088, "step": 27069 }, { "epoch": 1.515847239332512, "grad_norm": 1.4656729698181152, "learning_rate": 9.814289473684211e-05, "loss": 0.5124, "step": 27070 }, { "epoch": 1.515903236644641, "grad_norm": 1.4599612951278687, "learning_rate": 9.814263157894738e-05, "loss": 0.5764, "step": 27071 }, { "epoch": 1.51595923395677, "grad_norm": 1.2479785680770874, "learning_rate": 9.814236842105263e-05, "loss": 0.4453, "step": 27072 }, { "epoch": 1.516015231268899, "grad_norm": 1.2437714338302612, "learning_rate": 9.81421052631579e-05, "loss": 0.3318, "step": 27073 }, { "epoch": 1.516071228581028, "grad_norm": 1.187427043914795, "learning_rate": 9.814184210526316e-05, "loss": 0.408, "step": 27074 }, { "epoch": 1.516127225893157, "grad_norm": 2.3595211505889893, "learning_rate": 9.814157894736843e-05, "loss": 0.4369, "step": 27075 }, { "epoch": 1.516183223205286, "grad_norm": 1.1709308624267578, "learning_rate": 9.81413157894737e-05, "loss": 0.3735, "step": 27076 }, { "epoch": 1.516239220517415, "grad_norm": 1.4980545043945312, "learning_rate": 9.814105263157895e-05, "loss": 0.558, "step": 27077 }, { "epoch": 1.5162952178295441, "grad_norm": 1.3856656551361084, "learning_rate": 9.814078947368421e-05, "loss": 0.543, "step": 27078 }, { "epoch": 1.5163512151416731, "grad_norm": 1.2116948366165161, "learning_rate": 9.814052631578949e-05, "loss": 0.4328, "step": 27079 }, { "epoch": 1.5164072124538022, "grad_norm": 1.1827560663223267, "learning_rate": 9.814026315789475e-05, "loss": 0.4754, "step": 27080 }, { "epoch": 1.5164632097659312, "grad_norm": 1.2897484302520752, "learning_rate": 9.814e-05, "loss": 0.3885, "step": 27081 }, { "epoch": 1.5165192070780602, "grad_norm": 1.2172999382019043, "learning_rate": 9.813973684210527e-05, "loss": 0.4431, "step": 27082 }, { "epoch": 1.5165752043901892, "grad_norm": 1.2611318826675415, "learning_rate": 9.813947368421053e-05, "loss": 0.4159, "step": 27083 }, { "epoch": 1.5166312017023182, "grad_norm": 1.3078234195709229, "learning_rate": 9.81392105263158e-05, "loss": 0.4671, "step": 27084 }, { "epoch": 1.5166871990144473, "grad_norm": 1.3110862970352173, "learning_rate": 9.813894736842106e-05, "loss": 0.4017, "step": 27085 }, { "epoch": 1.5167431963265763, "grad_norm": 1.41542649269104, "learning_rate": 9.813868421052632e-05, "loss": 0.4007, "step": 27086 }, { "epoch": 1.5167991936387053, "grad_norm": 1.227420449256897, "learning_rate": 9.813842105263158e-05, "loss": 0.5873, "step": 27087 }, { "epoch": 1.5168551909508343, "grad_norm": 1.150922417640686, "learning_rate": 9.813815789473685e-05, "loss": 0.4562, "step": 27088 }, { "epoch": 1.5169111882629633, "grad_norm": 1.364452600479126, "learning_rate": 9.813789473684211e-05, "loss": 0.5006, "step": 27089 }, { "epoch": 1.5169671855750924, "grad_norm": 1.3885174989700317, "learning_rate": 9.813763157894737e-05, "loss": 0.4819, "step": 27090 }, { "epoch": 1.5170231828872214, "grad_norm": 1.5120718479156494, "learning_rate": 9.813736842105263e-05, "loss": 0.4843, "step": 27091 }, { "epoch": 1.5170791801993504, "grad_norm": 1.4497497081756592, "learning_rate": 9.81371052631579e-05, "loss": 0.413, "step": 27092 }, { "epoch": 1.5171351775114794, "grad_norm": 1.415647029876709, "learning_rate": 9.813684210526316e-05, "loss": 0.4852, "step": 27093 }, { "epoch": 1.5171911748236084, "grad_norm": 1.0697673559188843, "learning_rate": 9.813657894736842e-05, "loss": 0.3982, "step": 27094 }, { "epoch": 1.5172471721357375, "grad_norm": 1.2788331508636475, "learning_rate": 9.813631578947368e-05, "loss": 0.4982, "step": 27095 }, { "epoch": 1.5173031694478665, "grad_norm": 1.8143675327301025, "learning_rate": 9.813605263157896e-05, "loss": 0.4638, "step": 27096 }, { "epoch": 1.5173591667599955, "grad_norm": 1.411655306816101, "learning_rate": 9.813578947368422e-05, "loss": 0.52, "step": 27097 }, { "epoch": 1.5174151640721245, "grad_norm": 2.727475166320801, "learning_rate": 9.813552631578949e-05, "loss": 0.4718, "step": 27098 }, { "epoch": 1.5174711613842535, "grad_norm": 1.9955580234527588, "learning_rate": 9.813526315789473e-05, "loss": 0.6964, "step": 27099 }, { "epoch": 1.5175271586963825, "grad_norm": 1.6153862476348877, "learning_rate": 9.8135e-05, "loss": 0.4741, "step": 27100 }, { "epoch": 1.5175831560085116, "grad_norm": 1.4560762643814087, "learning_rate": 9.813473684210527e-05, "loss": 0.5971, "step": 27101 }, { "epoch": 1.5176391533206406, "grad_norm": 1.269814133644104, "learning_rate": 9.813447368421053e-05, "loss": 0.5312, "step": 27102 }, { "epoch": 1.5176951506327696, "grad_norm": 1.0865706205368042, "learning_rate": 9.81342105263158e-05, "loss": 0.3884, "step": 27103 }, { "epoch": 1.5177511479448986, "grad_norm": 1.2786933183670044, "learning_rate": 9.813394736842105e-05, "loss": 0.3779, "step": 27104 }, { "epoch": 1.5178071452570276, "grad_norm": 1.280717134475708, "learning_rate": 9.813368421052632e-05, "loss": 0.3973, "step": 27105 }, { "epoch": 1.5178631425691567, "grad_norm": 1.2063264846801758, "learning_rate": 9.813342105263158e-05, "loss": 0.4159, "step": 27106 }, { "epoch": 1.5179191398812857, "grad_norm": 1.1776610612869263, "learning_rate": 9.813315789473685e-05, "loss": 0.4355, "step": 27107 }, { "epoch": 1.5179751371934147, "grad_norm": 1.3531817197799683, "learning_rate": 9.813289473684211e-05, "loss": 0.4998, "step": 27108 }, { "epoch": 1.5180311345055437, "grad_norm": 1.4713177680969238, "learning_rate": 9.813263157894737e-05, "loss": 0.5086, "step": 27109 }, { "epoch": 1.5180871318176727, "grad_norm": 1.3408626317977905, "learning_rate": 9.813236842105263e-05, "loss": 0.3993, "step": 27110 }, { "epoch": 1.5181431291298018, "grad_norm": 1.6550896167755127, "learning_rate": 9.81321052631579e-05, "loss": 0.5474, "step": 27111 }, { "epoch": 1.5181991264419308, "grad_norm": 1.925497055053711, "learning_rate": 9.813184210526317e-05, "loss": 0.4303, "step": 27112 }, { "epoch": 1.5182551237540598, "grad_norm": 1.4134142398834229, "learning_rate": 9.813157894736843e-05, "loss": 0.502, "step": 27113 }, { "epoch": 1.5183111210661888, "grad_norm": 1.3809471130371094, "learning_rate": 9.813131578947369e-05, "loss": 0.5189, "step": 27114 }, { "epoch": 1.5183671183783178, "grad_norm": 1.0960626602172852, "learning_rate": 9.813105263157896e-05, "loss": 0.422, "step": 27115 }, { "epoch": 1.5184231156904469, "grad_norm": 1.3033056259155273, "learning_rate": 9.813078947368422e-05, "loss": 0.4158, "step": 27116 }, { "epoch": 1.5184791130025759, "grad_norm": 1.3114839792251587, "learning_rate": 9.813052631578948e-05, "loss": 0.4797, "step": 27117 }, { "epoch": 1.518535110314705, "grad_norm": 1.7351293563842773, "learning_rate": 9.813026315789474e-05, "loss": 0.4195, "step": 27118 }, { "epoch": 1.518591107626834, "grad_norm": 1.5991266965866089, "learning_rate": 9.813e-05, "loss": 0.3638, "step": 27119 }, { "epoch": 1.518647104938963, "grad_norm": 1.3073616027832031, "learning_rate": 9.812973684210527e-05, "loss": 0.3962, "step": 27120 }, { "epoch": 1.518703102251092, "grad_norm": 1.023710012435913, "learning_rate": 9.812947368421053e-05, "loss": 0.461, "step": 27121 }, { "epoch": 1.518759099563221, "grad_norm": 1.4045839309692383, "learning_rate": 9.812921052631579e-05, "loss": 0.4891, "step": 27122 }, { "epoch": 1.51881509687535, "grad_norm": 1.2008012533187866, "learning_rate": 9.812894736842105e-05, "loss": 0.3799, "step": 27123 }, { "epoch": 1.518871094187479, "grad_norm": 1.2304240465164185, "learning_rate": 9.812868421052632e-05, "loss": 0.4843, "step": 27124 }, { "epoch": 1.518927091499608, "grad_norm": 1.461506724357605, "learning_rate": 9.812842105263158e-05, "loss": 0.4795, "step": 27125 }, { "epoch": 1.518983088811737, "grad_norm": 1.3854840993881226, "learning_rate": 9.812815789473686e-05, "loss": 0.4384, "step": 27126 }, { "epoch": 1.519039086123866, "grad_norm": 1.8799121379852295, "learning_rate": 9.81278947368421e-05, "loss": 0.6213, "step": 27127 }, { "epoch": 1.519095083435995, "grad_norm": 1.2859798669815063, "learning_rate": 9.812763157894738e-05, "loss": 0.4172, "step": 27128 }, { "epoch": 1.519151080748124, "grad_norm": 1.3912931680679321, "learning_rate": 9.812736842105264e-05, "loss": 0.4523, "step": 27129 }, { "epoch": 1.5192070780602531, "grad_norm": 1.075352430343628, "learning_rate": 9.812710526315791e-05, "loss": 0.37, "step": 27130 }, { "epoch": 1.5192630753723821, "grad_norm": 1.4183050394058228, "learning_rate": 9.812684210526317e-05, "loss": 0.408, "step": 27131 }, { "epoch": 1.5193190726845112, "grad_norm": 1.3998409509658813, "learning_rate": 9.812657894736843e-05, "loss": 0.6337, "step": 27132 }, { "epoch": 1.5193750699966402, "grad_norm": 1.930840015411377, "learning_rate": 9.812631578947369e-05, "loss": 0.5001, "step": 27133 }, { "epoch": 1.5194310673087692, "grad_norm": 1.0706380605697632, "learning_rate": 9.812605263157896e-05, "loss": 0.3335, "step": 27134 }, { "epoch": 1.5194870646208982, "grad_norm": 2.313913106918335, "learning_rate": 9.812578947368422e-05, "loss": 0.6187, "step": 27135 }, { "epoch": 1.5195430619330272, "grad_norm": 1.260740041732788, "learning_rate": 9.812552631578947e-05, "loss": 0.4128, "step": 27136 }, { "epoch": 1.5195990592451563, "grad_norm": 3.1872777938842773, "learning_rate": 9.812526315789474e-05, "loss": 0.4337, "step": 27137 }, { "epoch": 1.5196550565572853, "grad_norm": 1.3023390769958496, "learning_rate": 9.8125e-05, "loss": 0.3654, "step": 27138 }, { "epoch": 1.5197110538694143, "grad_norm": 1.215165376663208, "learning_rate": 9.812473684210527e-05, "loss": 0.3756, "step": 27139 }, { "epoch": 1.5197670511815433, "grad_norm": 1.2889282703399658, "learning_rate": 9.812447368421053e-05, "loss": 0.4967, "step": 27140 }, { "epoch": 1.5198230484936723, "grad_norm": 1.33611261844635, "learning_rate": 9.812421052631579e-05, "loss": 0.3644, "step": 27141 }, { "epoch": 1.5198790458058014, "grad_norm": 1.5400924682617188, "learning_rate": 9.812394736842105e-05, "loss": 0.6058, "step": 27142 }, { "epoch": 1.5199350431179304, "grad_norm": 1.2969951629638672, "learning_rate": 9.812368421052633e-05, "loss": 0.707, "step": 27143 }, { "epoch": 1.5199910404300594, "grad_norm": 1.2953400611877441, "learning_rate": 9.812342105263159e-05, "loss": 0.5529, "step": 27144 }, { "epoch": 1.5200470377421884, "grad_norm": 1.1095150709152222, "learning_rate": 9.812315789473685e-05, "loss": 0.3975, "step": 27145 }, { "epoch": 1.5201030350543174, "grad_norm": 1.2372747659683228, "learning_rate": 9.81228947368421e-05, "loss": 0.5075, "step": 27146 }, { "epoch": 1.5201590323664464, "grad_norm": 1.6907185316085815, "learning_rate": 9.812263157894738e-05, "loss": 0.6013, "step": 27147 }, { "epoch": 1.5202150296785755, "grad_norm": 1.5320686101913452, "learning_rate": 9.812236842105264e-05, "loss": 0.5221, "step": 27148 }, { "epoch": 1.5202710269907045, "grad_norm": 1.3951154947280884, "learning_rate": 9.81221052631579e-05, "loss": 0.6272, "step": 27149 }, { "epoch": 1.5203270243028335, "grad_norm": 1.3134037256240845, "learning_rate": 9.812184210526316e-05, "loss": 0.4784, "step": 27150 }, { "epoch": 1.5203830216149625, "grad_norm": 1.2420580387115479, "learning_rate": 9.812157894736843e-05, "loss": 0.4082, "step": 27151 }, { "epoch": 1.5204390189270915, "grad_norm": 1.3023356199264526, "learning_rate": 9.812131578947369e-05, "loss": 0.5054, "step": 27152 }, { "epoch": 1.5204950162392206, "grad_norm": 4.242269515991211, "learning_rate": 9.812105263157895e-05, "loss": 0.3689, "step": 27153 }, { "epoch": 1.5205510135513496, "grad_norm": 1.17594575881958, "learning_rate": 9.812078947368421e-05, "loss": 0.391, "step": 27154 }, { "epoch": 1.5206070108634786, "grad_norm": 1.258170247077942, "learning_rate": 9.812052631578947e-05, "loss": 0.4083, "step": 27155 }, { "epoch": 1.5206630081756076, "grad_norm": 1.5187753438949585, "learning_rate": 9.812026315789474e-05, "loss": 0.6817, "step": 27156 }, { "epoch": 1.5207190054877366, "grad_norm": 1.2015737295150757, "learning_rate": 9.812e-05, "loss": 0.4681, "step": 27157 }, { "epoch": 1.5207750027998657, "grad_norm": 1.2637263536453247, "learning_rate": 9.811973684210528e-05, "loss": 0.3941, "step": 27158 }, { "epoch": 1.5208310001119947, "grad_norm": 1.280678153038025, "learning_rate": 9.811947368421052e-05, "loss": 0.4573, "step": 27159 }, { "epoch": 1.5208869974241237, "grad_norm": 1.2184869050979614, "learning_rate": 9.81192105263158e-05, "loss": 0.4618, "step": 27160 }, { "epoch": 1.5209429947362527, "grad_norm": 1.2928537130355835, "learning_rate": 9.811894736842105e-05, "loss": 0.3909, "step": 27161 }, { "epoch": 1.5209989920483817, "grad_norm": 1.3666313886642456, "learning_rate": 9.811868421052633e-05, "loss": 0.4929, "step": 27162 }, { "epoch": 1.5210549893605108, "grad_norm": 1.306142807006836, "learning_rate": 9.811842105263159e-05, "loss": 0.4229, "step": 27163 }, { "epoch": 1.5211109866726398, "grad_norm": 2.1070852279663086, "learning_rate": 9.811815789473685e-05, "loss": 0.4423, "step": 27164 }, { "epoch": 1.5211669839847688, "grad_norm": 1.2931374311447144, "learning_rate": 9.811789473684211e-05, "loss": 0.4635, "step": 27165 }, { "epoch": 1.5212229812968978, "grad_norm": 1.4965428113937378, "learning_rate": 9.811763157894738e-05, "loss": 0.5256, "step": 27166 }, { "epoch": 1.5212789786090268, "grad_norm": 1.2619374990463257, "learning_rate": 9.811736842105264e-05, "loss": 0.4944, "step": 27167 }, { "epoch": 1.5213349759211559, "grad_norm": 1.268546462059021, "learning_rate": 9.81171052631579e-05, "loss": 0.4044, "step": 27168 }, { "epoch": 1.5213909732332849, "grad_norm": 1.2187644243240356, "learning_rate": 9.811684210526316e-05, "loss": 0.3879, "step": 27169 }, { "epoch": 1.521446970545414, "grad_norm": 1.40371835231781, "learning_rate": 9.811657894736842e-05, "loss": 0.4912, "step": 27170 }, { "epoch": 1.521502967857543, "grad_norm": 2.225950002670288, "learning_rate": 9.811631578947369e-05, "loss": 0.512, "step": 27171 }, { "epoch": 1.521558965169672, "grad_norm": 2.1730637550354004, "learning_rate": 9.811605263157895e-05, "loss": 0.4606, "step": 27172 }, { "epoch": 1.521614962481801, "grad_norm": 1.4855023622512817, "learning_rate": 9.811578947368421e-05, "loss": 0.4031, "step": 27173 }, { "epoch": 1.52167095979393, "grad_norm": 1.6859471797943115, "learning_rate": 9.811552631578947e-05, "loss": 0.5472, "step": 27174 }, { "epoch": 1.521726957106059, "grad_norm": 1.3349955081939697, "learning_rate": 9.811526315789475e-05, "loss": 0.6914, "step": 27175 }, { "epoch": 1.521782954418188, "grad_norm": 1.1871305704116821, "learning_rate": 9.8115e-05, "loss": 0.3831, "step": 27176 }, { "epoch": 1.521838951730317, "grad_norm": 1.4136004447937012, "learning_rate": 9.811473684210526e-05, "loss": 0.4222, "step": 27177 }, { "epoch": 1.521894949042446, "grad_norm": 1.245827317237854, "learning_rate": 9.811447368421052e-05, "loss": 0.4735, "step": 27178 }, { "epoch": 1.521950946354575, "grad_norm": 1.1601252555847168, "learning_rate": 9.81142105263158e-05, "loss": 0.3037, "step": 27179 }, { "epoch": 1.522006943666704, "grad_norm": 1.2845308780670166, "learning_rate": 9.811394736842106e-05, "loss": 0.4207, "step": 27180 }, { "epoch": 1.522062940978833, "grad_norm": 1.2875018119812012, "learning_rate": 9.811368421052633e-05, "loss": 0.5284, "step": 27181 }, { "epoch": 1.5221189382909621, "grad_norm": 1.3341776132583618, "learning_rate": 9.811342105263158e-05, "loss": 0.4887, "step": 27182 }, { "epoch": 1.5221749356030911, "grad_norm": 1.4460558891296387, "learning_rate": 9.811315789473685e-05, "loss": 0.4795, "step": 27183 }, { "epoch": 1.5222309329152202, "grad_norm": 1.4187291860580444, "learning_rate": 9.811289473684211e-05, "loss": 0.6419, "step": 27184 }, { "epoch": 1.5222869302273492, "grad_norm": 1.1622968912124634, "learning_rate": 9.811263157894738e-05, "loss": 0.3809, "step": 27185 }, { "epoch": 1.5223429275394782, "grad_norm": 1.2642873525619507, "learning_rate": 9.811236842105263e-05, "loss": 0.4248, "step": 27186 }, { "epoch": 1.5223989248516072, "grad_norm": 1.4572975635528564, "learning_rate": 9.811210526315789e-05, "loss": 0.7081, "step": 27187 }, { "epoch": 1.5224549221637362, "grad_norm": 1.3987679481506348, "learning_rate": 9.811184210526316e-05, "loss": 0.606, "step": 27188 }, { "epoch": 1.5225109194758653, "grad_norm": 1.2974300384521484, "learning_rate": 9.811157894736842e-05, "loss": 0.4245, "step": 27189 }, { "epoch": 1.5225669167879943, "grad_norm": 1.381773829460144, "learning_rate": 9.81113157894737e-05, "loss": 0.465, "step": 27190 }, { "epoch": 1.5226229141001233, "grad_norm": 1.5028408765792847, "learning_rate": 9.811105263157894e-05, "loss": 0.4995, "step": 27191 }, { "epoch": 1.5226789114122523, "grad_norm": 1.1616239547729492, "learning_rate": 9.811078947368421e-05, "loss": 0.4463, "step": 27192 }, { "epoch": 1.5227349087243813, "grad_norm": 1.256756067276001, "learning_rate": 9.811052631578947e-05, "loss": 0.396, "step": 27193 }, { "epoch": 1.5227909060365103, "grad_norm": 1.2919772863388062, "learning_rate": 9.811026315789475e-05, "loss": 0.5616, "step": 27194 }, { "epoch": 1.5228469033486394, "grad_norm": 1.1837810277938843, "learning_rate": 9.811000000000001e-05, "loss": 0.4797, "step": 27195 }, { "epoch": 1.5229029006607684, "grad_norm": 2.169356346130371, "learning_rate": 9.810973684210527e-05, "loss": 0.4727, "step": 27196 }, { "epoch": 1.5229588979728974, "grad_norm": 1.384002923965454, "learning_rate": 9.810947368421053e-05, "loss": 0.4697, "step": 27197 }, { "epoch": 1.5230148952850264, "grad_norm": 1.2022258043289185, "learning_rate": 9.81092105263158e-05, "loss": 0.4269, "step": 27198 }, { "epoch": 1.5230708925971554, "grad_norm": 1.2444813251495361, "learning_rate": 9.810894736842106e-05, "loss": 0.5477, "step": 27199 }, { "epoch": 1.5231268899092845, "grad_norm": 1.3995620012283325, "learning_rate": 9.810868421052632e-05, "loss": 0.5408, "step": 27200 }, { "epoch": 1.5231828872214135, "grad_norm": 1.2903931140899658, "learning_rate": 9.810842105263158e-05, "loss": 0.3646, "step": 27201 }, { "epoch": 1.5232388845335425, "grad_norm": 1.0688526630401611, "learning_rate": 9.810815789473685e-05, "loss": 0.3268, "step": 27202 }, { "epoch": 1.5232948818456715, "grad_norm": 1.2227773666381836, "learning_rate": 9.810789473684211e-05, "loss": 0.4748, "step": 27203 }, { "epoch": 1.5233508791578005, "grad_norm": 1.2780729532241821, "learning_rate": 9.810763157894737e-05, "loss": 0.3487, "step": 27204 }, { "epoch": 1.5234068764699296, "grad_norm": 1.2940983772277832, "learning_rate": 9.810736842105263e-05, "loss": 0.6563, "step": 27205 }, { "epoch": 1.5234628737820586, "grad_norm": 1.4650967121124268, "learning_rate": 9.810710526315789e-05, "loss": 0.413, "step": 27206 }, { "epoch": 1.5235188710941876, "grad_norm": 1.5916510820388794, "learning_rate": 9.810684210526317e-05, "loss": 0.4701, "step": 27207 }, { "epoch": 1.5235748684063166, "grad_norm": 1.412753939628601, "learning_rate": 9.810657894736842e-05, "loss": 0.5446, "step": 27208 }, { "epoch": 1.5236308657184456, "grad_norm": 1.569606900215149, "learning_rate": 9.810631578947368e-05, "loss": 0.3985, "step": 27209 }, { "epoch": 1.5236868630305747, "grad_norm": 1.2182259559631348, "learning_rate": 9.810605263157894e-05, "loss": 0.3772, "step": 27210 }, { "epoch": 1.5237428603427037, "grad_norm": 1.3352302312850952, "learning_rate": 9.810578947368422e-05, "loss": 0.4234, "step": 27211 }, { "epoch": 1.5237988576548327, "grad_norm": 1.3450623750686646, "learning_rate": 9.810552631578948e-05, "loss": 0.5302, "step": 27212 }, { "epoch": 1.5238548549669617, "grad_norm": 1.124396800994873, "learning_rate": 9.810526315789475e-05, "loss": 0.4326, "step": 27213 }, { "epoch": 1.5239108522790907, "grad_norm": 1.2622078657150269, "learning_rate": 9.8105e-05, "loss": 0.4639, "step": 27214 }, { "epoch": 1.5239668495912198, "grad_norm": 1.2417024374008179, "learning_rate": 9.810473684210527e-05, "loss": 0.4054, "step": 27215 }, { "epoch": 1.5240228469033488, "grad_norm": 1.3757787942886353, "learning_rate": 9.810447368421053e-05, "loss": 0.656, "step": 27216 }, { "epoch": 1.5240788442154778, "grad_norm": 3.607560873031616, "learning_rate": 9.81042105263158e-05, "loss": 0.5444, "step": 27217 }, { "epoch": 1.5241348415276068, "grad_norm": 1.2746714353561401, "learning_rate": 9.810394736842106e-05, "loss": 0.4427, "step": 27218 }, { "epoch": 1.5241908388397358, "grad_norm": 1.492066740989685, "learning_rate": 9.810368421052632e-05, "loss": 0.5981, "step": 27219 }, { "epoch": 1.5242468361518648, "grad_norm": 1.1095901727676392, "learning_rate": 9.810342105263158e-05, "loss": 0.3708, "step": 27220 }, { "epoch": 1.5243028334639939, "grad_norm": 1.349563479423523, "learning_rate": 9.810315789473684e-05, "loss": 0.4297, "step": 27221 }, { "epoch": 1.5243588307761229, "grad_norm": 1.4883623123168945, "learning_rate": 9.810289473684212e-05, "loss": 0.3863, "step": 27222 }, { "epoch": 1.524414828088252, "grad_norm": 1.3130929470062256, "learning_rate": 9.810263157894737e-05, "loss": 0.406, "step": 27223 }, { "epoch": 1.524470825400381, "grad_norm": 1.2860057353973389, "learning_rate": 9.810236842105263e-05, "loss": 0.4408, "step": 27224 }, { "epoch": 1.52452682271251, "grad_norm": 1.1853996515274048, "learning_rate": 9.81021052631579e-05, "loss": 0.3751, "step": 27225 }, { "epoch": 1.524582820024639, "grad_norm": 1.4100369215011597, "learning_rate": 9.810184210526317e-05, "loss": 0.4522, "step": 27226 }, { "epoch": 1.524638817336768, "grad_norm": 1.4437059164047241, "learning_rate": 9.810157894736843e-05, "loss": 0.5082, "step": 27227 }, { "epoch": 1.524694814648897, "grad_norm": 0.9887460470199585, "learning_rate": 9.810131578947369e-05, "loss": 0.4007, "step": 27228 }, { "epoch": 1.524750811961026, "grad_norm": 1.3373279571533203, "learning_rate": 9.810105263157895e-05, "loss": 0.4514, "step": 27229 }, { "epoch": 1.524806809273155, "grad_norm": 1.519224762916565, "learning_rate": 9.810078947368422e-05, "loss": 0.4418, "step": 27230 }, { "epoch": 1.524862806585284, "grad_norm": 1.5326309204101562, "learning_rate": 9.810052631578948e-05, "loss": 0.6816, "step": 27231 }, { "epoch": 1.524918803897413, "grad_norm": 1.2395530939102173, "learning_rate": 9.810026315789474e-05, "loss": 0.4167, "step": 27232 }, { "epoch": 1.524974801209542, "grad_norm": 1.1414129734039307, "learning_rate": 9.81e-05, "loss": 0.4036, "step": 27233 }, { "epoch": 1.5250307985216711, "grad_norm": 1.3156803846359253, "learning_rate": 9.809973684210527e-05, "loss": 0.4922, "step": 27234 }, { "epoch": 1.5250867958338001, "grad_norm": 1.4350786209106445, "learning_rate": 9.809947368421053e-05, "loss": 0.5052, "step": 27235 }, { "epoch": 1.5251427931459292, "grad_norm": 1.3264518976211548, "learning_rate": 9.80992105263158e-05, "loss": 0.3631, "step": 27236 }, { "epoch": 1.5251987904580582, "grad_norm": 1.1088902950286865, "learning_rate": 9.809894736842105e-05, "loss": 0.3764, "step": 27237 }, { "epoch": 1.5252547877701872, "grad_norm": 1.1665536165237427, "learning_rate": 9.809868421052633e-05, "loss": 0.5014, "step": 27238 }, { "epoch": 1.5253107850823162, "grad_norm": 1.2686662673950195, "learning_rate": 9.809842105263158e-05, "loss": 0.4081, "step": 27239 }, { "epoch": 1.5253667823944452, "grad_norm": 2.089110851287842, "learning_rate": 9.809815789473684e-05, "loss": 0.4076, "step": 27240 }, { "epoch": 1.5254227797065743, "grad_norm": 1.2193576097488403, "learning_rate": 9.80978947368421e-05, "loss": 0.4736, "step": 27241 }, { "epoch": 1.525478777018703, "grad_norm": 1.1142656803131104, "learning_rate": 9.809763157894736e-05, "loss": 0.3628, "step": 27242 }, { "epoch": 1.525534774330832, "grad_norm": 2.5167746543884277, "learning_rate": 9.809736842105264e-05, "loss": 0.4503, "step": 27243 }, { "epoch": 1.525590771642961, "grad_norm": 1.2938402891159058, "learning_rate": 9.80971052631579e-05, "loss": 0.4976, "step": 27244 }, { "epoch": 1.52564676895509, "grad_norm": 1.2518161535263062, "learning_rate": 9.809684210526317e-05, "loss": 0.457, "step": 27245 }, { "epoch": 1.5257027662672191, "grad_norm": 1.3567074537277222, "learning_rate": 9.809657894736842e-05, "loss": 0.4738, "step": 27246 }, { "epoch": 1.5257587635793481, "grad_norm": 1.4210426807403564, "learning_rate": 9.809631578947369e-05, "loss": 0.4596, "step": 27247 }, { "epoch": 1.5258147608914772, "grad_norm": 1.4643398523330688, "learning_rate": 9.809605263157895e-05, "loss": 0.5329, "step": 27248 }, { "epoch": 1.5258707582036062, "grad_norm": 1.2194404602050781, "learning_rate": 9.809578947368422e-05, "loss": 0.4716, "step": 27249 }, { "epoch": 1.5259267555157352, "grad_norm": 1.2832481861114502, "learning_rate": 9.809552631578948e-05, "loss": 0.3971, "step": 27250 }, { "epoch": 1.5259827528278642, "grad_norm": 1.1515140533447266, "learning_rate": 9.809526315789474e-05, "loss": 0.4872, "step": 27251 }, { "epoch": 1.5260387501399932, "grad_norm": 1.1932960748672485, "learning_rate": 9.8095e-05, "loss": 0.3725, "step": 27252 }, { "epoch": 1.5260947474521223, "grad_norm": 1.0814008712768555, "learning_rate": 9.809473684210528e-05, "loss": 0.2995, "step": 27253 }, { "epoch": 1.5261507447642513, "grad_norm": 1.4640215635299683, "learning_rate": 9.809447368421053e-05, "loss": 0.4988, "step": 27254 }, { "epoch": 1.5262067420763803, "grad_norm": 1.5298998355865479, "learning_rate": 9.80942105263158e-05, "loss": 0.491, "step": 27255 }, { "epoch": 1.5262627393885093, "grad_norm": 1.3268486261367798, "learning_rate": 9.809394736842105e-05, "loss": 0.5103, "step": 27256 }, { "epoch": 1.5263187367006383, "grad_norm": 1.8547991514205933, "learning_rate": 9.809368421052631e-05, "loss": 0.4281, "step": 27257 }, { "epoch": 1.5263747340127674, "grad_norm": 1.2123308181762695, "learning_rate": 9.809342105263159e-05, "loss": 0.3843, "step": 27258 }, { "epoch": 1.5264307313248964, "grad_norm": 1.3726972341537476, "learning_rate": 9.809315789473685e-05, "loss": 0.5126, "step": 27259 }, { "epoch": 1.5264867286370254, "grad_norm": 1.2992104291915894, "learning_rate": 9.809289473684211e-05, "loss": 0.4645, "step": 27260 }, { "epoch": 1.5265427259491544, "grad_norm": 1.420084834098816, "learning_rate": 9.809263157894737e-05, "loss": 0.5628, "step": 27261 }, { "epoch": 1.5265987232612834, "grad_norm": 1.237008810043335, "learning_rate": 9.809236842105264e-05, "loss": 0.6578, "step": 27262 }, { "epoch": 1.5266547205734124, "grad_norm": 1.4518870115280151, "learning_rate": 9.80921052631579e-05, "loss": 0.4862, "step": 27263 }, { "epoch": 1.5267107178855415, "grad_norm": 1.1755800247192383, "learning_rate": 9.809184210526316e-05, "loss": 0.4391, "step": 27264 }, { "epoch": 1.5267667151976705, "grad_norm": 1.2907551527023315, "learning_rate": 9.809157894736842e-05, "loss": 0.5257, "step": 27265 }, { "epoch": 1.5268227125097995, "grad_norm": 1.1906359195709229, "learning_rate": 9.809131578947369e-05, "loss": 0.3457, "step": 27266 }, { "epoch": 1.5268787098219285, "grad_norm": 1.5713539123535156, "learning_rate": 9.809105263157895e-05, "loss": 0.467, "step": 27267 }, { "epoch": 1.5269347071340575, "grad_norm": 1.5531158447265625, "learning_rate": 9.809078947368423e-05, "loss": 0.5502, "step": 27268 }, { "epoch": 1.5269907044461866, "grad_norm": 1.4606424570083618, "learning_rate": 9.809052631578947e-05, "loss": 0.2967, "step": 27269 }, { "epoch": 1.5270467017583156, "grad_norm": 1.352168083190918, "learning_rate": 9.809026315789474e-05, "loss": 0.4274, "step": 27270 }, { "epoch": 1.5271026990704446, "grad_norm": 1.477839469909668, "learning_rate": 9.809e-05, "loss": 0.3866, "step": 27271 }, { "epoch": 1.5271586963825736, "grad_norm": 1.2686796188354492, "learning_rate": 9.808973684210528e-05, "loss": 0.4013, "step": 27272 }, { "epoch": 1.5272146936947026, "grad_norm": 1.5855923891067505, "learning_rate": 9.808947368421054e-05, "loss": 0.5514, "step": 27273 }, { "epoch": 1.5272706910068317, "grad_norm": 1.2596975564956665, "learning_rate": 9.808921052631578e-05, "loss": 0.4397, "step": 27274 }, { "epoch": 1.5273266883189607, "grad_norm": 1.4806995391845703, "learning_rate": 9.808894736842106e-05, "loss": 0.5705, "step": 27275 }, { "epoch": 1.5273826856310897, "grad_norm": 1.3230366706848145, "learning_rate": 9.808868421052632e-05, "loss": 0.6061, "step": 27276 }, { "epoch": 1.5274386829432187, "grad_norm": 1.2527214288711548, "learning_rate": 9.808842105263159e-05, "loss": 0.464, "step": 27277 }, { "epoch": 1.5274946802553477, "grad_norm": 1.3091689348220825, "learning_rate": 9.808815789473685e-05, "loss": 0.3889, "step": 27278 }, { "epoch": 1.5275506775674768, "grad_norm": 1.3826245069503784, "learning_rate": 9.808789473684211e-05, "loss": 0.4983, "step": 27279 }, { "epoch": 1.5276066748796058, "grad_norm": 1.2268179655075073, "learning_rate": 9.808763157894737e-05, "loss": 0.3406, "step": 27280 }, { "epoch": 1.5276626721917348, "grad_norm": 1.3147023916244507, "learning_rate": 9.808736842105264e-05, "loss": 0.6199, "step": 27281 }, { "epoch": 1.5277186695038638, "grad_norm": 1.1666747331619263, "learning_rate": 9.80871052631579e-05, "loss": 0.4304, "step": 27282 }, { "epoch": 1.5277746668159928, "grad_norm": 1.3330376148223877, "learning_rate": 9.808684210526316e-05, "loss": 0.4283, "step": 27283 }, { "epoch": 1.5278306641281219, "grad_norm": 1.4851858615875244, "learning_rate": 9.808657894736842e-05, "loss": 0.3865, "step": 27284 }, { "epoch": 1.5278866614402509, "grad_norm": 1.4621042013168335, "learning_rate": 9.80863157894737e-05, "loss": 0.5033, "step": 27285 }, { "epoch": 1.52794265875238, "grad_norm": 1.2530263662338257, "learning_rate": 9.808605263157895e-05, "loss": 0.4445, "step": 27286 }, { "epoch": 1.527998656064509, "grad_norm": 1.4377599954605103, "learning_rate": 9.808578947368421e-05, "loss": 0.4676, "step": 27287 }, { "epoch": 1.528054653376638, "grad_norm": 1.7416679859161377, "learning_rate": 9.808552631578947e-05, "loss": 0.5612, "step": 27288 }, { "epoch": 1.528110650688767, "grad_norm": 1.4890049695968628, "learning_rate": 9.808526315789475e-05, "loss": 0.4705, "step": 27289 }, { "epoch": 1.528166648000896, "grad_norm": 1.6244866847991943, "learning_rate": 9.808500000000001e-05, "loss": 0.5147, "step": 27290 }, { "epoch": 1.528222645313025, "grad_norm": 1.2908213138580322, "learning_rate": 9.808473684210527e-05, "loss": 0.3702, "step": 27291 }, { "epoch": 1.528278642625154, "grad_norm": 1.3829395771026611, "learning_rate": 9.808447368421053e-05, "loss": 0.4444, "step": 27292 }, { "epoch": 1.528334639937283, "grad_norm": 1.6316728591918945, "learning_rate": 9.808421052631579e-05, "loss": 0.4936, "step": 27293 }, { "epoch": 1.528390637249412, "grad_norm": 1.4428198337554932, "learning_rate": 9.808394736842106e-05, "loss": 0.5268, "step": 27294 }, { "epoch": 1.528446634561541, "grad_norm": 1.3690496683120728, "learning_rate": 9.808368421052632e-05, "loss": 0.4147, "step": 27295 }, { "epoch": 1.52850263187367, "grad_norm": 1.5757402181625366, "learning_rate": 9.808342105263158e-05, "loss": 0.6325, "step": 27296 }, { "epoch": 1.528558629185799, "grad_norm": 1.3949118852615356, "learning_rate": 9.808315789473684e-05, "loss": 0.4446, "step": 27297 }, { "epoch": 1.5286146264979281, "grad_norm": 1.326460838317871, "learning_rate": 9.808289473684211e-05, "loss": 0.5211, "step": 27298 }, { "epoch": 1.5286706238100571, "grad_norm": 1.2361222505569458, "learning_rate": 9.808263157894737e-05, "loss": 0.4878, "step": 27299 }, { "epoch": 1.5287266211221862, "grad_norm": 1.2712743282318115, "learning_rate": 9.808236842105264e-05, "loss": 0.3534, "step": 27300 }, { "epoch": 1.5287826184343152, "grad_norm": 1.9682635068893433, "learning_rate": 9.808210526315789e-05, "loss": 0.4473, "step": 27301 }, { "epoch": 1.5288386157464442, "grad_norm": 1.5164655447006226, "learning_rate": 9.808184210526316e-05, "loss": 0.5343, "step": 27302 }, { "epoch": 1.5288946130585732, "grad_norm": 1.496638298034668, "learning_rate": 9.808157894736842e-05, "loss": 0.5247, "step": 27303 }, { "epoch": 1.5289506103707022, "grad_norm": 1.265756607055664, "learning_rate": 9.80813157894737e-05, "loss": 0.3993, "step": 27304 }, { "epoch": 1.5290066076828313, "grad_norm": 1.3341575860977173, "learning_rate": 9.808105263157896e-05, "loss": 0.4568, "step": 27305 }, { "epoch": 1.5290626049949603, "grad_norm": 1.7430514097213745, "learning_rate": 9.808078947368422e-05, "loss": 0.4912, "step": 27306 }, { "epoch": 1.5291186023070893, "grad_norm": 1.522204041481018, "learning_rate": 9.808052631578948e-05, "loss": 0.4835, "step": 27307 }, { "epoch": 1.5291745996192183, "grad_norm": 8.616300582885742, "learning_rate": 9.808026315789474e-05, "loss": 0.6381, "step": 27308 }, { "epoch": 1.5292305969313473, "grad_norm": 1.8359129428863525, "learning_rate": 9.808000000000001e-05, "loss": 0.6387, "step": 27309 }, { "epoch": 1.5292865942434763, "grad_norm": 1.3288633823394775, "learning_rate": 9.807973684210527e-05, "loss": 0.4425, "step": 27310 }, { "epoch": 1.5293425915556054, "grad_norm": 1.3993644714355469, "learning_rate": 9.807947368421053e-05, "loss": 0.4144, "step": 27311 }, { "epoch": 1.5293985888677344, "grad_norm": 1.2721813917160034, "learning_rate": 9.807921052631579e-05, "loss": 0.3192, "step": 27312 }, { "epoch": 1.5294545861798634, "grad_norm": 1.2544965744018555, "learning_rate": 9.807894736842106e-05, "loss": 0.5001, "step": 27313 }, { "epoch": 1.5295105834919924, "grad_norm": 1.2644429206848145, "learning_rate": 9.807868421052632e-05, "loss": 0.4599, "step": 27314 }, { "epoch": 1.5295665808041214, "grad_norm": 1.3764448165893555, "learning_rate": 9.807842105263158e-05, "loss": 0.518, "step": 27315 }, { "epoch": 1.5296225781162505, "grad_norm": 1.488978624343872, "learning_rate": 9.807815789473684e-05, "loss": 0.5179, "step": 27316 }, { "epoch": 1.5296785754283795, "grad_norm": 1.5431971549987793, "learning_rate": 9.807789473684211e-05, "loss": 0.5134, "step": 27317 }, { "epoch": 1.5297345727405085, "grad_norm": 1.3441526889801025, "learning_rate": 9.807763157894737e-05, "loss": 0.3493, "step": 27318 }, { "epoch": 1.5297905700526375, "grad_norm": 1.3060107231140137, "learning_rate": 9.807736842105263e-05, "loss": 0.4902, "step": 27319 }, { "epoch": 1.5298465673647665, "grad_norm": 1.2472405433654785, "learning_rate": 9.80771052631579e-05, "loss": 0.3954, "step": 27320 }, { "epoch": 1.5299025646768956, "grad_norm": 1.2510164976119995, "learning_rate": 9.807684210526317e-05, "loss": 0.4524, "step": 27321 }, { "epoch": 1.5299585619890246, "grad_norm": 1.3180891275405884, "learning_rate": 9.807657894736843e-05, "loss": 0.3708, "step": 27322 }, { "epoch": 1.5300145593011536, "grad_norm": 1.217759370803833, "learning_rate": 9.80763157894737e-05, "loss": 0.4818, "step": 27323 }, { "epoch": 1.5300705566132826, "grad_norm": 1.3709975481033325, "learning_rate": 9.807605263157895e-05, "loss": 0.46, "step": 27324 }, { "epoch": 1.5301265539254114, "grad_norm": 1.1192548274993896, "learning_rate": 9.80757894736842e-05, "loss": 0.3857, "step": 27325 }, { "epoch": 1.5301825512375404, "grad_norm": 1.4618414640426636, "learning_rate": 9.807552631578948e-05, "loss": 0.3888, "step": 27326 }, { "epoch": 1.5302385485496695, "grad_norm": 1.6253644227981567, "learning_rate": 9.807526315789474e-05, "loss": 0.6086, "step": 27327 }, { "epoch": 1.5302945458617985, "grad_norm": 1.1323260068893433, "learning_rate": 9.807500000000001e-05, "loss": 0.4307, "step": 27328 }, { "epoch": 1.5303505431739275, "grad_norm": 1.3198050260543823, "learning_rate": 9.807473684210526e-05, "loss": 0.4646, "step": 27329 }, { "epoch": 1.5304065404860565, "grad_norm": 1.1652281284332275, "learning_rate": 9.807447368421053e-05, "loss": 0.4897, "step": 27330 }, { "epoch": 1.5304625377981855, "grad_norm": 1.1871758699417114, "learning_rate": 9.807421052631579e-05, "loss": 0.5071, "step": 27331 }, { "epoch": 1.5305185351103145, "grad_norm": 1.3732500076293945, "learning_rate": 9.807394736842106e-05, "loss": 0.3617, "step": 27332 }, { "epoch": 1.5305745324224436, "grad_norm": 1.1966280937194824, "learning_rate": 9.807368421052631e-05, "loss": 0.4051, "step": 27333 }, { "epoch": 1.5306305297345726, "grad_norm": 1.1944531202316284, "learning_rate": 9.807342105263158e-05, "loss": 0.387, "step": 27334 }, { "epoch": 1.5306865270467016, "grad_norm": 1.246779203414917, "learning_rate": 9.807315789473684e-05, "loss": 0.3806, "step": 27335 }, { "epoch": 1.5307425243588306, "grad_norm": 1.5883923768997192, "learning_rate": 9.807289473684212e-05, "loss": 0.5501, "step": 27336 }, { "epoch": 1.5307985216709596, "grad_norm": 1.4065109491348267, "learning_rate": 9.807263157894738e-05, "loss": 0.461, "step": 27337 }, { "epoch": 1.5308545189830887, "grad_norm": 1.3632166385650635, "learning_rate": 9.807236842105264e-05, "loss": 0.4515, "step": 27338 }, { "epoch": 1.5309105162952177, "grad_norm": 1.1589895486831665, "learning_rate": 9.80721052631579e-05, "loss": 0.506, "step": 27339 }, { "epoch": 1.5309665136073467, "grad_norm": 1.538962960243225, "learning_rate": 9.807184210526317e-05, "loss": 0.3962, "step": 27340 }, { "epoch": 1.5310225109194757, "grad_norm": 1.7433265447616577, "learning_rate": 9.807157894736843e-05, "loss": 0.5894, "step": 27341 }, { "epoch": 1.5310785082316047, "grad_norm": 1.180971384048462, "learning_rate": 9.807131578947369e-05, "loss": 0.2923, "step": 27342 }, { "epoch": 1.5311345055437338, "grad_norm": 1.596555471420288, "learning_rate": 9.807105263157895e-05, "loss": 0.4668, "step": 27343 }, { "epoch": 1.5311905028558628, "grad_norm": 1.2896409034729004, "learning_rate": 9.807078947368421e-05, "loss": 0.5266, "step": 27344 }, { "epoch": 1.5312465001679918, "grad_norm": 1.253602385520935, "learning_rate": 9.807052631578948e-05, "loss": 0.5117, "step": 27345 }, { "epoch": 1.5313024974801208, "grad_norm": 1.3735167980194092, "learning_rate": 9.807026315789474e-05, "loss": 0.4223, "step": 27346 }, { "epoch": 1.5313584947922498, "grad_norm": 1.3457038402557373, "learning_rate": 9.807e-05, "loss": 0.4288, "step": 27347 }, { "epoch": 1.5314144921043789, "grad_norm": 1.4801067113876343, "learning_rate": 9.806973684210526e-05, "loss": 0.4664, "step": 27348 }, { "epoch": 1.5314704894165079, "grad_norm": 1.544136643409729, "learning_rate": 9.806947368421053e-05, "loss": 0.449, "step": 27349 }, { "epoch": 1.531526486728637, "grad_norm": 1.1419053077697754, "learning_rate": 9.80692105263158e-05, "loss": 0.4573, "step": 27350 }, { "epoch": 1.531582484040766, "grad_norm": 1.8215211629867554, "learning_rate": 9.806894736842105e-05, "loss": 0.495, "step": 27351 }, { "epoch": 1.531638481352895, "grad_norm": 1.4061273336410522, "learning_rate": 9.806868421052631e-05, "loss": 0.5048, "step": 27352 }, { "epoch": 1.531694478665024, "grad_norm": 1.3769176006317139, "learning_rate": 9.806842105263159e-05, "loss": 0.5012, "step": 27353 }, { "epoch": 1.531750475977153, "grad_norm": 1.22798490524292, "learning_rate": 9.806815789473685e-05, "loss": 0.4844, "step": 27354 }, { "epoch": 1.531806473289282, "grad_norm": 1.7953495979309082, "learning_rate": 9.806789473684212e-05, "loss": 0.5709, "step": 27355 }, { "epoch": 1.531862470601411, "grad_norm": 1.2608802318572998, "learning_rate": 9.806763157894737e-05, "loss": 0.3773, "step": 27356 }, { "epoch": 1.53191846791354, "grad_norm": 1.4404892921447754, "learning_rate": 9.806736842105264e-05, "loss": 0.6066, "step": 27357 }, { "epoch": 1.531974465225669, "grad_norm": 1.2526389360427856, "learning_rate": 9.80671052631579e-05, "loss": 0.3322, "step": 27358 }, { "epoch": 1.532030462537798, "grad_norm": 1.2167657613754272, "learning_rate": 9.806684210526317e-05, "loss": 0.3577, "step": 27359 }, { "epoch": 1.532086459849927, "grad_norm": 1.2484211921691895, "learning_rate": 9.806657894736843e-05, "loss": 0.3917, "step": 27360 }, { "epoch": 1.532142457162056, "grad_norm": 1.107147455215454, "learning_rate": 9.806631578947368e-05, "loss": 0.4293, "step": 27361 }, { "epoch": 1.5321984544741851, "grad_norm": 1.4587103128433228, "learning_rate": 9.806605263157895e-05, "loss": 0.6043, "step": 27362 }, { "epoch": 1.5322544517863141, "grad_norm": 1.3617078065872192, "learning_rate": 9.806578947368421e-05, "loss": 0.5124, "step": 27363 }, { "epoch": 1.5323104490984432, "grad_norm": 1.3550008535385132, "learning_rate": 9.806552631578948e-05, "loss": 0.4097, "step": 27364 }, { "epoch": 1.5323664464105722, "grad_norm": 1.4102470874786377, "learning_rate": 9.806526315789474e-05, "loss": 0.4618, "step": 27365 }, { "epoch": 1.5324224437227012, "grad_norm": 1.5110503435134888, "learning_rate": 9.8065e-05, "loss": 0.5387, "step": 27366 }, { "epoch": 1.5324784410348302, "grad_norm": 1.5743473768234253, "learning_rate": 9.806473684210526e-05, "loss": 0.4776, "step": 27367 }, { "epoch": 1.5325344383469592, "grad_norm": 1.6093778610229492, "learning_rate": 9.806447368421054e-05, "loss": 0.3658, "step": 27368 }, { "epoch": 1.5325904356590883, "grad_norm": 1.5215086936950684, "learning_rate": 9.80642105263158e-05, "loss": 0.5366, "step": 27369 }, { "epoch": 1.5326464329712173, "grad_norm": 1.475719690322876, "learning_rate": 9.806394736842106e-05, "loss": 0.6122, "step": 27370 }, { "epoch": 1.5327024302833463, "grad_norm": 1.2523468732833862, "learning_rate": 9.806368421052632e-05, "loss": 0.398, "step": 27371 }, { "epoch": 1.5327584275954753, "grad_norm": 1.1293725967407227, "learning_rate": 9.806342105263159e-05, "loss": 0.3936, "step": 27372 }, { "epoch": 1.5328144249076043, "grad_norm": 1.5792362689971924, "learning_rate": 9.806315789473685e-05, "loss": 0.4651, "step": 27373 }, { "epoch": 1.5328704222197334, "grad_norm": 1.2675620317459106, "learning_rate": 9.806289473684211e-05, "loss": 0.6451, "step": 27374 }, { "epoch": 1.5329264195318624, "grad_norm": 1.1812338829040527, "learning_rate": 9.806263157894737e-05, "loss": 0.3571, "step": 27375 }, { "epoch": 1.5329824168439914, "grad_norm": 1.276490330696106, "learning_rate": 9.806236842105264e-05, "loss": 0.4349, "step": 27376 }, { "epoch": 1.5330384141561204, "grad_norm": 1.2429367303848267, "learning_rate": 9.80621052631579e-05, "loss": 0.4896, "step": 27377 }, { "epoch": 1.5330944114682494, "grad_norm": 1.464216947555542, "learning_rate": 9.806184210526316e-05, "loss": 0.4263, "step": 27378 }, { "epoch": 1.5331504087803784, "grad_norm": 1.555129051208496, "learning_rate": 9.806157894736842e-05, "loss": 0.5057, "step": 27379 }, { "epoch": 1.5332064060925075, "grad_norm": 1.5318166017532349, "learning_rate": 9.806131578947368e-05, "loss": 0.5248, "step": 27380 }, { "epoch": 1.5332624034046365, "grad_norm": 1.4204380512237549, "learning_rate": 9.806105263157895e-05, "loss": 0.5319, "step": 27381 }, { "epoch": 1.5333184007167655, "grad_norm": 1.737895131111145, "learning_rate": 9.806078947368421e-05, "loss": 0.543, "step": 27382 }, { "epoch": 1.5333743980288945, "grad_norm": 1.4844969511032104, "learning_rate": 9.806052631578949e-05, "loss": 0.492, "step": 27383 }, { "epoch": 1.5334303953410235, "grad_norm": 1.2135682106018066, "learning_rate": 9.806026315789473e-05, "loss": 0.4453, "step": 27384 }, { "epoch": 1.5334863926531526, "grad_norm": 1.1250629425048828, "learning_rate": 9.806e-05, "loss": 0.3666, "step": 27385 }, { "epoch": 1.5335423899652816, "grad_norm": 1.3407983779907227, "learning_rate": 9.805973684210527e-05, "loss": 0.4875, "step": 27386 }, { "epoch": 1.5335983872774106, "grad_norm": 1.3464614152908325, "learning_rate": 9.805947368421054e-05, "loss": 0.3705, "step": 27387 }, { "epoch": 1.5336543845895396, "grad_norm": 1.4388389587402344, "learning_rate": 9.805921052631579e-05, "loss": 0.3942, "step": 27388 }, { "epoch": 1.5337103819016686, "grad_norm": 1.7731399536132812, "learning_rate": 9.805894736842106e-05, "loss": 0.4749, "step": 27389 }, { "epoch": 1.5337663792137977, "grad_norm": 1.9700196981430054, "learning_rate": 9.805868421052632e-05, "loss": 0.3996, "step": 27390 }, { "epoch": 1.5338223765259267, "grad_norm": 1.5039012432098389, "learning_rate": 9.805842105263159e-05, "loss": 0.5317, "step": 27391 }, { "epoch": 1.5338783738380557, "grad_norm": 1.7295633554458618, "learning_rate": 9.805815789473685e-05, "loss": 0.6416, "step": 27392 }, { "epoch": 1.5339343711501847, "grad_norm": 1.3499912023544312, "learning_rate": 9.805789473684211e-05, "loss": 0.5603, "step": 27393 }, { "epoch": 1.5339903684623137, "grad_norm": 1.2199500799179077, "learning_rate": 9.805763157894737e-05, "loss": 0.4216, "step": 27394 }, { "epoch": 1.5340463657744428, "grad_norm": 1.0372475385665894, "learning_rate": 9.805736842105263e-05, "loss": 0.3845, "step": 27395 }, { "epoch": 1.5341023630865718, "grad_norm": 1.6075316667556763, "learning_rate": 9.80571052631579e-05, "loss": 0.7346, "step": 27396 }, { "epoch": 1.5341583603987008, "grad_norm": 1.2499035596847534, "learning_rate": 9.805684210526316e-05, "loss": 0.5194, "step": 27397 }, { "epoch": 1.5342143577108298, "grad_norm": 1.4742541313171387, "learning_rate": 9.805657894736842e-05, "loss": 0.424, "step": 27398 }, { "epoch": 1.5342703550229588, "grad_norm": 1.3422431945800781, "learning_rate": 9.805631578947368e-05, "loss": 0.4758, "step": 27399 }, { "epoch": 1.5343263523350879, "grad_norm": 1.3489582538604736, "learning_rate": 9.805605263157896e-05, "loss": 0.4106, "step": 27400 }, { "epoch": 1.5343823496472169, "grad_norm": 1.3509870767593384, "learning_rate": 9.805578947368422e-05, "loss": 0.3976, "step": 27401 }, { "epoch": 1.534438346959346, "grad_norm": 2.1054441928863525, "learning_rate": 9.805552631578948e-05, "loss": 0.5922, "step": 27402 }, { "epoch": 1.534494344271475, "grad_norm": 1.5807559490203857, "learning_rate": 9.805526315789474e-05, "loss": 0.4145, "step": 27403 }, { "epoch": 1.534550341583604, "grad_norm": 1.226464867591858, "learning_rate": 9.805500000000001e-05, "loss": 0.3998, "step": 27404 }, { "epoch": 1.534606338895733, "grad_norm": 1.1433545351028442, "learning_rate": 9.805473684210527e-05, "loss": 0.3917, "step": 27405 }, { "epoch": 1.534662336207862, "grad_norm": 1.6557573080062866, "learning_rate": 9.805447368421053e-05, "loss": 0.4927, "step": 27406 }, { "epoch": 1.534718333519991, "grad_norm": 1.523334264755249, "learning_rate": 9.805421052631579e-05, "loss": 0.3263, "step": 27407 }, { "epoch": 1.53477433083212, "grad_norm": 1.2454636096954346, "learning_rate": 9.805394736842106e-05, "loss": 0.4938, "step": 27408 }, { "epoch": 1.534830328144249, "grad_norm": 1.1688791513442993, "learning_rate": 9.805368421052632e-05, "loss": 0.3803, "step": 27409 }, { "epoch": 1.534886325456378, "grad_norm": 1.3370659351348877, "learning_rate": 9.80534210526316e-05, "loss": 0.3608, "step": 27410 }, { "epoch": 1.534942322768507, "grad_norm": 2.0097856521606445, "learning_rate": 9.805315789473684e-05, "loss": 0.5683, "step": 27411 }, { "epoch": 1.534998320080636, "grad_norm": 1.3716015815734863, "learning_rate": 9.80528947368421e-05, "loss": 0.3982, "step": 27412 }, { "epoch": 1.535054317392765, "grad_norm": 1.6543197631835938, "learning_rate": 9.805263157894737e-05, "loss": 0.4842, "step": 27413 }, { "epoch": 1.5351103147048941, "grad_norm": 1.7502961158752441, "learning_rate": 9.805236842105263e-05, "loss": 0.5106, "step": 27414 }, { "epoch": 1.5351663120170231, "grad_norm": 1.0914019346237183, "learning_rate": 9.80521052631579e-05, "loss": 0.3747, "step": 27415 }, { "epoch": 1.5352223093291522, "grad_norm": 1.7459182739257812, "learning_rate": 9.805184210526315e-05, "loss": 0.4082, "step": 27416 }, { "epoch": 1.5352783066412812, "grad_norm": 1.4182765483856201, "learning_rate": 9.805157894736843e-05, "loss": 0.4289, "step": 27417 }, { "epoch": 1.5353343039534102, "grad_norm": 1.6390981674194336, "learning_rate": 9.805131578947369e-05, "loss": 0.5726, "step": 27418 }, { "epoch": 1.5353903012655392, "grad_norm": 1.5273953676223755, "learning_rate": 9.805105263157896e-05, "loss": 0.4502, "step": 27419 }, { "epoch": 1.5354462985776682, "grad_norm": 1.197325587272644, "learning_rate": 9.805078947368422e-05, "loss": 0.4224, "step": 27420 }, { "epoch": 1.5355022958897973, "grad_norm": 1.758510708808899, "learning_rate": 9.805052631578948e-05, "loss": 0.5773, "step": 27421 }, { "epoch": 1.5355582932019263, "grad_norm": 1.3416218757629395, "learning_rate": 9.805026315789474e-05, "loss": 0.4875, "step": 27422 }, { "epoch": 1.5356142905140553, "grad_norm": 1.3372999429702759, "learning_rate": 9.805000000000001e-05, "loss": 0.3808, "step": 27423 }, { "epoch": 1.5356702878261843, "grad_norm": 1.239515781402588, "learning_rate": 9.804973684210527e-05, "loss": 0.6036, "step": 27424 }, { "epoch": 1.5357262851383133, "grad_norm": 1.2493059635162354, "learning_rate": 9.804947368421053e-05, "loss": 0.337, "step": 27425 }, { "epoch": 1.5357822824504423, "grad_norm": 1.2406386137008667, "learning_rate": 9.804921052631579e-05, "loss": 0.417, "step": 27426 }, { "epoch": 1.5358382797625714, "grad_norm": 1.2913198471069336, "learning_rate": 9.804894736842106e-05, "loss": 0.4527, "step": 27427 }, { "epoch": 1.5358942770747004, "grad_norm": 1.506883978843689, "learning_rate": 9.804868421052632e-05, "loss": 0.4886, "step": 27428 }, { "epoch": 1.5359502743868294, "grad_norm": 1.262926697731018, "learning_rate": 9.804842105263158e-05, "loss": 0.5099, "step": 27429 }, { "epoch": 1.5360062716989584, "grad_norm": 1.320450782775879, "learning_rate": 9.804815789473684e-05, "loss": 0.6103, "step": 27430 }, { "epoch": 1.5360622690110874, "grad_norm": 1.2099217176437378, "learning_rate": 9.80478947368421e-05, "loss": 0.3378, "step": 27431 }, { "epoch": 1.5361182663232165, "grad_norm": 1.5453951358795166, "learning_rate": 9.804763157894738e-05, "loss": 0.4624, "step": 27432 }, { "epoch": 1.5361742636353455, "grad_norm": 1.3994524478912354, "learning_rate": 9.804736842105264e-05, "loss": 0.6331, "step": 27433 }, { "epoch": 1.5362302609474745, "grad_norm": 1.4725806713104248, "learning_rate": 9.80471052631579e-05, "loss": 0.52, "step": 27434 }, { "epoch": 1.5362862582596035, "grad_norm": 1.5783519744873047, "learning_rate": 9.804684210526316e-05, "loss": 0.5181, "step": 27435 }, { "epoch": 1.5363422555717325, "grad_norm": 1.4294320344924927, "learning_rate": 9.804657894736843e-05, "loss": 0.6509, "step": 27436 }, { "epoch": 1.5363982528838616, "grad_norm": 1.1189452409744263, "learning_rate": 9.804631578947369e-05, "loss": 0.367, "step": 27437 }, { "epoch": 1.5364542501959906, "grad_norm": 1.5972261428833008, "learning_rate": 9.804605263157896e-05, "loss": 0.5024, "step": 27438 }, { "epoch": 1.5365102475081196, "grad_norm": 1.3196470737457275, "learning_rate": 9.804578947368421e-05, "loss": 0.4849, "step": 27439 }, { "epoch": 1.5365662448202486, "grad_norm": 1.0912482738494873, "learning_rate": 9.804552631578948e-05, "loss": 0.3987, "step": 27440 }, { "epoch": 1.5366222421323776, "grad_norm": 2.2294299602508545, "learning_rate": 9.804526315789474e-05, "loss": 0.4531, "step": 27441 }, { "epoch": 1.5366782394445067, "grad_norm": 1.0957279205322266, "learning_rate": 9.804500000000001e-05, "loss": 0.4376, "step": 27442 }, { "epoch": 1.5367342367566357, "grad_norm": 1.2432575225830078, "learning_rate": 9.804473684210526e-05, "loss": 0.4539, "step": 27443 }, { "epoch": 1.5367902340687647, "grad_norm": 2.164496898651123, "learning_rate": 9.804447368421053e-05, "loss": 0.31, "step": 27444 }, { "epoch": 1.5368462313808937, "grad_norm": 1.7686548233032227, "learning_rate": 9.80442105263158e-05, "loss": 0.5001, "step": 27445 }, { "epoch": 1.5369022286930227, "grad_norm": 1.1818535327911377, "learning_rate": 9.804394736842105e-05, "loss": 0.4528, "step": 27446 }, { "epoch": 1.5369582260051518, "grad_norm": 1.6264586448669434, "learning_rate": 9.804368421052633e-05, "loss": 0.4355, "step": 27447 }, { "epoch": 1.5370142233172808, "grad_norm": 1.404054045677185, "learning_rate": 9.804342105263157e-05, "loss": 0.6259, "step": 27448 }, { "epoch": 1.5370702206294098, "grad_norm": 1.3645541667938232, "learning_rate": 9.804315789473685e-05, "loss": 0.6429, "step": 27449 }, { "epoch": 1.5371262179415388, "grad_norm": 1.200858235359192, "learning_rate": 9.80428947368421e-05, "loss": 0.3692, "step": 27450 }, { "epoch": 1.5371822152536678, "grad_norm": 1.3735047578811646, "learning_rate": 9.804263157894738e-05, "loss": 0.5971, "step": 27451 }, { "epoch": 1.5372382125657968, "grad_norm": 1.3319628238677979, "learning_rate": 9.804236842105264e-05, "loss": 0.4273, "step": 27452 }, { "epoch": 1.5372942098779259, "grad_norm": 1.5300577878952026, "learning_rate": 9.80421052631579e-05, "loss": 0.4289, "step": 27453 }, { "epoch": 1.5373502071900549, "grad_norm": 1.256272315979004, "learning_rate": 9.804184210526316e-05, "loss": 0.4217, "step": 27454 }, { "epoch": 1.537406204502184, "grad_norm": 1.1546261310577393, "learning_rate": 9.804157894736843e-05, "loss": 0.5448, "step": 27455 }, { "epoch": 1.537462201814313, "grad_norm": 1.232764482498169, "learning_rate": 9.804131578947369e-05, "loss": 0.4131, "step": 27456 }, { "epoch": 1.537518199126442, "grad_norm": 1.5305607318878174, "learning_rate": 9.804105263157895e-05, "loss": 0.4478, "step": 27457 }, { "epoch": 1.537574196438571, "grad_norm": 1.6348007917404175, "learning_rate": 9.804078947368421e-05, "loss": 0.6888, "step": 27458 }, { "epoch": 1.5376301937507, "grad_norm": 1.250065565109253, "learning_rate": 9.804052631578948e-05, "loss": 0.4321, "step": 27459 }, { "epoch": 1.537686191062829, "grad_norm": 1.3260490894317627, "learning_rate": 9.804026315789474e-05, "loss": 0.463, "step": 27460 }, { "epoch": 1.537742188374958, "grad_norm": 1.4980632066726685, "learning_rate": 9.804e-05, "loss": 0.6361, "step": 27461 }, { "epoch": 1.537798185687087, "grad_norm": 1.5763019323349, "learning_rate": 9.803973684210526e-05, "loss": 0.8689, "step": 27462 }, { "epoch": 1.537854182999216, "grad_norm": 1.4502410888671875, "learning_rate": 9.803947368421052e-05, "loss": 0.3842, "step": 27463 }, { "epoch": 1.537910180311345, "grad_norm": 1.3471466302871704, "learning_rate": 9.80392105263158e-05, "loss": 0.4361, "step": 27464 }, { "epoch": 1.537966177623474, "grad_norm": 1.247658610343933, "learning_rate": 9.803894736842106e-05, "loss": 0.531, "step": 27465 }, { "epoch": 1.5380221749356031, "grad_norm": 1.1767892837524414, "learning_rate": 9.803868421052632e-05, "loss": 0.3491, "step": 27466 }, { "epoch": 1.5380781722477321, "grad_norm": 1.3079240322113037, "learning_rate": 9.803842105263158e-05, "loss": 0.5244, "step": 27467 }, { "epoch": 1.5381341695598612, "grad_norm": 1.3583015203475952, "learning_rate": 9.803815789473685e-05, "loss": 0.4296, "step": 27468 }, { "epoch": 1.5381901668719902, "grad_norm": 1.3113908767700195, "learning_rate": 9.803789473684211e-05, "loss": 0.4516, "step": 27469 }, { "epoch": 1.5382461641841192, "grad_norm": 1.5479438304901123, "learning_rate": 9.803763157894738e-05, "loss": 0.5803, "step": 27470 }, { "epoch": 1.5383021614962482, "grad_norm": 1.4623644351959229, "learning_rate": 9.803736842105263e-05, "loss": 0.4005, "step": 27471 }, { "epoch": 1.5383581588083772, "grad_norm": 1.529299259185791, "learning_rate": 9.80371052631579e-05, "loss": 0.4526, "step": 27472 }, { "epoch": 1.5384141561205062, "grad_norm": 1.45881986618042, "learning_rate": 9.803684210526316e-05, "loss": 0.6338, "step": 27473 }, { "epoch": 1.5384701534326353, "grad_norm": 1.1933166980743408, "learning_rate": 9.803657894736843e-05, "loss": 0.4632, "step": 27474 }, { "epoch": 1.5385261507447643, "grad_norm": 1.365078330039978, "learning_rate": 9.80363157894737e-05, "loss": 0.4295, "step": 27475 }, { "epoch": 1.5385821480568933, "grad_norm": 1.6030665636062622, "learning_rate": 9.803605263157895e-05, "loss": 0.5091, "step": 27476 }, { "epoch": 1.5386381453690223, "grad_norm": 1.1716169118881226, "learning_rate": 9.803578947368421e-05, "loss": 0.4437, "step": 27477 }, { "epoch": 1.5386941426811513, "grad_norm": 1.7651629447937012, "learning_rate": 9.803552631578949e-05, "loss": 0.5739, "step": 27478 }, { "epoch": 1.5387501399932804, "grad_norm": 1.3289557695388794, "learning_rate": 9.803526315789475e-05, "loss": 0.4221, "step": 27479 }, { "epoch": 1.5388061373054094, "grad_norm": 1.4761066436767578, "learning_rate": 9.8035e-05, "loss": 0.4091, "step": 27480 }, { "epoch": 1.5388621346175384, "grad_norm": 1.7415555715560913, "learning_rate": 9.803473684210527e-05, "loss": 0.465, "step": 27481 }, { "epoch": 1.5389181319296674, "grad_norm": 1.260891318321228, "learning_rate": 9.803447368421053e-05, "loss": 0.4759, "step": 27482 }, { "epoch": 1.5389741292417964, "grad_norm": 1.1866629123687744, "learning_rate": 9.80342105263158e-05, "loss": 0.4661, "step": 27483 }, { "epoch": 1.5390301265539255, "grad_norm": 1.501732587814331, "learning_rate": 9.803394736842106e-05, "loss": 0.5099, "step": 27484 }, { "epoch": 1.5390861238660545, "grad_norm": 1.0346893072128296, "learning_rate": 9.803368421052632e-05, "loss": 0.3779, "step": 27485 }, { "epoch": 1.5391421211781835, "grad_norm": 1.6514413356781006, "learning_rate": 9.803342105263158e-05, "loss": 0.4359, "step": 27486 }, { "epoch": 1.5391981184903125, "grad_norm": 1.1998839378356934, "learning_rate": 9.803315789473685e-05, "loss": 0.4138, "step": 27487 }, { "epoch": 1.5392541158024415, "grad_norm": 1.340802550315857, "learning_rate": 9.803289473684211e-05, "loss": 0.5049, "step": 27488 }, { "epoch": 1.5393101131145706, "grad_norm": 1.6594573259353638, "learning_rate": 9.803263157894737e-05, "loss": 0.5302, "step": 27489 }, { "epoch": 1.5393661104266996, "grad_norm": 1.2872958183288574, "learning_rate": 9.803236842105263e-05, "loss": 0.5179, "step": 27490 }, { "epoch": 1.5394221077388286, "grad_norm": 1.357077717781067, "learning_rate": 9.80321052631579e-05, "loss": 0.4203, "step": 27491 }, { "epoch": 1.5394781050509576, "grad_norm": 1.2538336515426636, "learning_rate": 9.803184210526316e-05, "loss": 0.4895, "step": 27492 }, { "epoch": 1.5395341023630866, "grad_norm": 1.4729819297790527, "learning_rate": 9.803157894736844e-05, "loss": 0.5603, "step": 27493 }, { "epoch": 1.5395900996752157, "grad_norm": 1.2647981643676758, "learning_rate": 9.803131578947368e-05, "loss": 0.4322, "step": 27494 }, { "epoch": 1.5396460969873447, "grad_norm": 1.2713780403137207, "learning_rate": 9.803105263157896e-05, "loss": 0.4704, "step": 27495 }, { "epoch": 1.5397020942994737, "grad_norm": 1.1072604656219482, "learning_rate": 9.803078947368422e-05, "loss": 0.4073, "step": 27496 }, { "epoch": 1.5397580916116027, "grad_norm": 1.9605051279067993, "learning_rate": 9.803052631578949e-05, "loss": 0.4553, "step": 27497 }, { "epoch": 1.5398140889237317, "grad_norm": 1.4391065835952759, "learning_rate": 9.803026315789474e-05, "loss": 0.5179, "step": 27498 }, { "epoch": 1.5398700862358607, "grad_norm": 1.2579585313796997, "learning_rate": 9.803e-05, "loss": 0.4785, "step": 27499 }, { "epoch": 1.5399260835479898, "grad_norm": 1.3812254667282104, "learning_rate": 9.802973684210527e-05, "loss": 0.4546, "step": 27500 }, { "epoch": 1.5399820808601188, "grad_norm": 1.4660743474960327, "learning_rate": 9.802947368421053e-05, "loss": 0.4336, "step": 27501 }, { "epoch": 1.5400380781722478, "grad_norm": 1.2951329946517944, "learning_rate": 9.80292105263158e-05, "loss": 0.3834, "step": 27502 }, { "epoch": 1.5400940754843768, "grad_norm": 1.3852564096450806, "learning_rate": 9.802894736842105e-05, "loss": 0.6174, "step": 27503 }, { "epoch": 1.5401500727965058, "grad_norm": 1.3390361070632935, "learning_rate": 9.802868421052632e-05, "loss": 0.573, "step": 27504 }, { "epoch": 1.5402060701086349, "grad_norm": 1.2705798149108887, "learning_rate": 9.802842105263158e-05, "loss": 0.3637, "step": 27505 }, { "epoch": 1.5402620674207639, "grad_norm": 1.1765118837356567, "learning_rate": 9.802815789473685e-05, "loss": 0.3652, "step": 27506 }, { "epoch": 1.540318064732893, "grad_norm": 1.1236757040023804, "learning_rate": 9.802789473684211e-05, "loss": 0.4043, "step": 27507 }, { "epoch": 1.540374062045022, "grad_norm": 1.32753586769104, "learning_rate": 9.802763157894737e-05, "loss": 0.4631, "step": 27508 }, { "epoch": 1.540430059357151, "grad_norm": 1.560129165649414, "learning_rate": 9.802736842105263e-05, "loss": 0.4467, "step": 27509 }, { "epoch": 1.54048605666928, "grad_norm": 1.3294612169265747, "learning_rate": 9.80271052631579e-05, "loss": 0.4659, "step": 27510 }, { "epoch": 1.540542053981409, "grad_norm": 1.3083581924438477, "learning_rate": 9.802684210526317e-05, "loss": 0.489, "step": 27511 }, { "epoch": 1.540598051293538, "grad_norm": 1.2318902015686035, "learning_rate": 9.802657894736843e-05, "loss": 0.4461, "step": 27512 }, { "epoch": 1.540654048605667, "grad_norm": 1.1843339204788208, "learning_rate": 9.802631578947369e-05, "loss": 0.4687, "step": 27513 }, { "epoch": 1.540710045917796, "grad_norm": 1.335110068321228, "learning_rate": 9.802605263157896e-05, "loss": 0.3646, "step": 27514 }, { "epoch": 1.540766043229925, "grad_norm": 1.146899700164795, "learning_rate": 9.802578947368422e-05, "loss": 0.4172, "step": 27515 }, { "epoch": 1.540822040542054, "grad_norm": 1.4995315074920654, "learning_rate": 9.802552631578948e-05, "loss": 0.4947, "step": 27516 }, { "epoch": 1.540878037854183, "grad_norm": 1.4839874505996704, "learning_rate": 9.802526315789474e-05, "loss": 0.4552, "step": 27517 }, { "epoch": 1.540934035166312, "grad_norm": 1.3880366086959839, "learning_rate": 9.8025e-05, "loss": 0.5506, "step": 27518 }, { "epoch": 1.5409900324784411, "grad_norm": 1.256199598312378, "learning_rate": 9.802473684210527e-05, "loss": 0.5615, "step": 27519 }, { "epoch": 1.5410460297905701, "grad_norm": 2.4414100646972656, "learning_rate": 9.802447368421053e-05, "loss": 0.4661, "step": 27520 }, { "epoch": 1.5411020271026992, "grad_norm": 1.535971999168396, "learning_rate": 9.802421052631579e-05, "loss": 0.5246, "step": 27521 }, { "epoch": 1.5411580244148282, "grad_norm": 1.4283818006515503, "learning_rate": 9.802394736842105e-05, "loss": 0.4692, "step": 27522 }, { "epoch": 1.5412140217269572, "grad_norm": 1.3877708911895752, "learning_rate": 9.802368421052632e-05, "loss": 0.5436, "step": 27523 }, { "epoch": 1.5412700190390862, "grad_norm": 1.5361599922180176, "learning_rate": 9.802342105263158e-05, "loss": 0.5783, "step": 27524 }, { "epoch": 1.5413260163512152, "grad_norm": 1.22494637966156, "learning_rate": 9.802315789473686e-05, "loss": 0.4429, "step": 27525 }, { "epoch": 1.5413820136633443, "grad_norm": 1.433502435684204, "learning_rate": 9.80228947368421e-05, "loss": 0.5215, "step": 27526 }, { "epoch": 1.5414380109754733, "grad_norm": 1.7901016473770142, "learning_rate": 9.802263157894738e-05, "loss": 0.4571, "step": 27527 }, { "epoch": 1.5414940082876023, "grad_norm": 1.2573442459106445, "learning_rate": 9.802236842105264e-05, "loss": 0.4885, "step": 27528 }, { "epoch": 1.5415500055997313, "grad_norm": 1.3599714040756226, "learning_rate": 9.802210526315791e-05, "loss": 0.32, "step": 27529 }, { "epoch": 1.5416060029118603, "grad_norm": 1.5652059316635132, "learning_rate": 9.802184210526317e-05, "loss": 0.4739, "step": 27530 }, { "epoch": 1.5416620002239894, "grad_norm": 1.3834737539291382, "learning_rate": 9.802157894736843e-05, "loss": 0.4994, "step": 27531 }, { "epoch": 1.5417179975361184, "grad_norm": 1.1089963912963867, "learning_rate": 9.802131578947369e-05, "loss": 0.4456, "step": 27532 }, { "epoch": 1.5417739948482474, "grad_norm": 1.2896775007247925, "learning_rate": 9.802105263157895e-05, "loss": 0.4007, "step": 27533 }, { "epoch": 1.5418299921603764, "grad_norm": 1.4454684257507324, "learning_rate": 9.802078947368422e-05, "loss": 0.4368, "step": 27534 }, { "epoch": 1.5418859894725054, "grad_norm": 1.4198540449142456, "learning_rate": 9.802052631578947e-05, "loss": 0.4667, "step": 27535 }, { "epoch": 1.5419419867846345, "grad_norm": 1.478713035583496, "learning_rate": 9.802026315789474e-05, "loss": 0.6273, "step": 27536 }, { "epoch": 1.5419979840967635, "grad_norm": 1.2896050214767456, "learning_rate": 9.802e-05, "loss": 0.4453, "step": 27537 }, { "epoch": 1.5420539814088925, "grad_norm": 1.3719547986984253, "learning_rate": 9.801973684210527e-05, "loss": 0.5107, "step": 27538 }, { "epoch": 1.5421099787210215, "grad_norm": 1.1835438013076782, "learning_rate": 9.801947368421053e-05, "loss": 0.4458, "step": 27539 }, { "epoch": 1.5421659760331505, "grad_norm": 1.4666827917099, "learning_rate": 9.801921052631579e-05, "loss": 0.5352, "step": 27540 }, { "epoch": 1.5422219733452796, "grad_norm": 1.3444392681121826, "learning_rate": 9.801894736842105e-05, "loss": 0.4657, "step": 27541 }, { "epoch": 1.5422779706574086, "grad_norm": 1.0673942565917969, "learning_rate": 9.801868421052633e-05, "loss": 0.4011, "step": 27542 }, { "epoch": 1.5423339679695376, "grad_norm": 1.4041234254837036, "learning_rate": 9.801842105263159e-05, "loss": 0.4219, "step": 27543 }, { "epoch": 1.5423899652816666, "grad_norm": 1.3166135549545288, "learning_rate": 9.801815789473685e-05, "loss": 0.3867, "step": 27544 }, { "epoch": 1.5424459625937956, "grad_norm": 1.516274094581604, "learning_rate": 9.80178947368421e-05, "loss": 0.4241, "step": 27545 }, { "epoch": 1.5425019599059246, "grad_norm": 1.1992155313491821, "learning_rate": 9.801763157894738e-05, "loss": 0.3902, "step": 27546 }, { "epoch": 1.5425579572180537, "grad_norm": 1.3640427589416504, "learning_rate": 9.801736842105264e-05, "loss": 0.5306, "step": 27547 }, { "epoch": 1.5426139545301827, "grad_norm": 1.326397180557251, "learning_rate": 9.801710526315791e-05, "loss": 0.4747, "step": 27548 }, { "epoch": 1.5426699518423117, "grad_norm": 1.6014254093170166, "learning_rate": 9.801684210526316e-05, "loss": 0.4637, "step": 27549 }, { "epoch": 1.5427259491544407, "grad_norm": 1.4529451131820679, "learning_rate": 9.801657894736842e-05, "loss": 0.4927, "step": 27550 }, { "epoch": 1.5427819464665697, "grad_norm": 1.4845919609069824, "learning_rate": 9.801631578947369e-05, "loss": 0.4053, "step": 27551 }, { "epoch": 1.5428379437786988, "grad_norm": 1.4558218717575073, "learning_rate": 9.801605263157895e-05, "loss": 0.572, "step": 27552 }, { "epoch": 1.5428939410908278, "grad_norm": 1.2350249290466309, "learning_rate": 9.801578947368421e-05, "loss": 0.324, "step": 27553 }, { "epoch": 1.5429499384029568, "grad_norm": 1.2948768138885498, "learning_rate": 9.801552631578947e-05, "loss": 0.6895, "step": 27554 }, { "epoch": 1.5430059357150858, "grad_norm": 1.2931283712387085, "learning_rate": 9.801526315789474e-05, "loss": 0.4341, "step": 27555 }, { "epoch": 1.5430619330272148, "grad_norm": 1.2458338737487793, "learning_rate": 9.8015e-05, "loss": 0.4292, "step": 27556 }, { "epoch": 1.5431179303393439, "grad_norm": 1.4769642353057861, "learning_rate": 9.801473684210528e-05, "loss": 0.7012, "step": 27557 }, { "epoch": 1.5431739276514729, "grad_norm": 1.3621057271957397, "learning_rate": 9.801447368421052e-05, "loss": 0.5053, "step": 27558 }, { "epoch": 1.543229924963602, "grad_norm": 1.2167919874191284, "learning_rate": 9.80142105263158e-05, "loss": 0.3819, "step": 27559 }, { "epoch": 1.543285922275731, "grad_norm": 1.462602972984314, "learning_rate": 9.801394736842106e-05, "loss": 0.474, "step": 27560 }, { "epoch": 1.54334191958786, "grad_norm": 1.011000633239746, "learning_rate": 9.801368421052633e-05, "loss": 0.3042, "step": 27561 }, { "epoch": 1.543397916899989, "grad_norm": 1.2537404298782349, "learning_rate": 9.801342105263159e-05, "loss": 0.4026, "step": 27562 }, { "epoch": 1.543453914212118, "grad_norm": 1.2451791763305664, "learning_rate": 9.801315789473685e-05, "loss": 0.4505, "step": 27563 }, { "epoch": 1.543509911524247, "grad_norm": 1.3682466745376587, "learning_rate": 9.801289473684211e-05, "loss": 0.4357, "step": 27564 }, { "epoch": 1.543565908836376, "grad_norm": 1.4733965396881104, "learning_rate": 9.801263157894738e-05, "loss": 0.4874, "step": 27565 }, { "epoch": 1.543621906148505, "grad_norm": 1.4038304090499878, "learning_rate": 9.801236842105264e-05, "loss": 0.4441, "step": 27566 }, { "epoch": 1.543677903460634, "grad_norm": 3.1235392093658447, "learning_rate": 9.80121052631579e-05, "loss": 0.4708, "step": 27567 }, { "epoch": 1.543733900772763, "grad_norm": 1.4425427913665771, "learning_rate": 9.801184210526316e-05, "loss": 0.518, "step": 27568 }, { "epoch": 1.543789898084892, "grad_norm": 1.268245816230774, "learning_rate": 9.801157894736842e-05, "loss": 0.6169, "step": 27569 }, { "epoch": 1.543845895397021, "grad_norm": 1.3493518829345703, "learning_rate": 9.801131578947369e-05, "loss": 0.4883, "step": 27570 }, { "epoch": 1.5439018927091501, "grad_norm": 1.2586694955825806, "learning_rate": 9.801105263157895e-05, "loss": 0.5377, "step": 27571 }, { "epoch": 1.5439578900212791, "grad_norm": 1.302017092704773, "learning_rate": 9.801078947368421e-05, "loss": 0.463, "step": 27572 }, { "epoch": 1.544013887333408, "grad_norm": 34.4649658203125, "learning_rate": 9.801052631578947e-05, "loss": 0.4456, "step": 27573 }, { "epoch": 1.544069884645537, "grad_norm": 1.345123291015625, "learning_rate": 9.801026315789475e-05, "loss": 0.4977, "step": 27574 }, { "epoch": 1.544125881957666, "grad_norm": 1.382767677307129, "learning_rate": 9.801e-05, "loss": 0.4965, "step": 27575 }, { "epoch": 1.544181879269795, "grad_norm": 1.341180443763733, "learning_rate": 9.800973684210526e-05, "loss": 0.4113, "step": 27576 }, { "epoch": 1.544237876581924, "grad_norm": 1.2358497381210327, "learning_rate": 9.800947368421052e-05, "loss": 0.5991, "step": 27577 }, { "epoch": 1.544293873894053, "grad_norm": 1.458097219467163, "learning_rate": 9.80092105263158e-05, "loss": 0.5126, "step": 27578 }, { "epoch": 1.544349871206182, "grad_norm": 1.3169153928756714, "learning_rate": 9.800894736842106e-05, "loss": 0.4325, "step": 27579 }, { "epoch": 1.544405868518311, "grad_norm": 1.228348970413208, "learning_rate": 9.800868421052633e-05, "loss": 0.3367, "step": 27580 }, { "epoch": 1.54446186583044, "grad_norm": 1.3079302310943604, "learning_rate": 9.800842105263158e-05, "loss": 0.4229, "step": 27581 }, { "epoch": 1.5445178631425691, "grad_norm": 1.2360742092132568, "learning_rate": 9.800815789473685e-05, "loss": 0.4752, "step": 27582 }, { "epoch": 1.5445738604546981, "grad_norm": 1.5535396337509155, "learning_rate": 9.800789473684211e-05, "loss": 0.4473, "step": 27583 }, { "epoch": 1.5446298577668272, "grad_norm": 1.1982449293136597, "learning_rate": 9.800763157894737e-05, "loss": 0.416, "step": 27584 }, { "epoch": 1.5446858550789562, "grad_norm": 1.2752082347869873, "learning_rate": 9.800736842105264e-05, "loss": 0.6395, "step": 27585 }, { "epoch": 1.5447418523910852, "grad_norm": 1.3502033948898315, "learning_rate": 9.800710526315789e-05, "loss": 0.4872, "step": 27586 }, { "epoch": 1.5447978497032142, "grad_norm": 1.4935429096221924, "learning_rate": 9.800684210526316e-05, "loss": 0.441, "step": 27587 }, { "epoch": 1.5448538470153432, "grad_norm": 1.4000657796859741, "learning_rate": 9.800657894736842e-05, "loss": 0.5606, "step": 27588 }, { "epoch": 1.5449098443274722, "grad_norm": 1.6481719017028809, "learning_rate": 9.80063157894737e-05, "loss": 0.4268, "step": 27589 }, { "epoch": 1.5449658416396013, "grad_norm": 1.4190216064453125, "learning_rate": 9.800605263157894e-05, "loss": 0.4317, "step": 27590 }, { "epoch": 1.5450218389517303, "grad_norm": 1.8343473672866821, "learning_rate": 9.800578947368422e-05, "loss": 0.4894, "step": 27591 }, { "epoch": 1.5450778362638593, "grad_norm": 1.4488444328308105, "learning_rate": 9.800552631578947e-05, "loss": 0.4326, "step": 27592 }, { "epoch": 1.5451338335759883, "grad_norm": 1.3605916500091553, "learning_rate": 9.800526315789475e-05, "loss": 0.4197, "step": 27593 }, { "epoch": 1.5451898308881173, "grad_norm": 1.5493638515472412, "learning_rate": 9.800500000000001e-05, "loss": 0.4657, "step": 27594 }, { "epoch": 1.5452458282002464, "grad_norm": 2.3098866939544678, "learning_rate": 9.800473684210527e-05, "loss": 0.437, "step": 27595 }, { "epoch": 1.5453018255123754, "grad_norm": 1.4588706493377686, "learning_rate": 9.800447368421053e-05, "loss": 0.4434, "step": 27596 }, { "epoch": 1.5453578228245044, "grad_norm": 1.5190616846084595, "learning_rate": 9.80042105263158e-05, "loss": 0.3889, "step": 27597 }, { "epoch": 1.5454138201366334, "grad_norm": 1.4056833982467651, "learning_rate": 9.800394736842106e-05, "loss": 0.4166, "step": 27598 }, { "epoch": 1.5454698174487624, "grad_norm": 1.3816081285476685, "learning_rate": 9.800368421052632e-05, "loss": 0.4344, "step": 27599 }, { "epoch": 1.5455258147608915, "grad_norm": 1.176428198814392, "learning_rate": 9.800342105263158e-05, "loss": 0.4996, "step": 27600 }, { "epoch": 1.5455818120730205, "grad_norm": 1.3347351551055908, "learning_rate": 9.800315789473685e-05, "loss": 0.641, "step": 27601 }, { "epoch": 1.5456378093851495, "grad_norm": 1.6254616975784302, "learning_rate": 9.800289473684211e-05, "loss": 0.567, "step": 27602 }, { "epoch": 1.5456938066972785, "grad_norm": 1.2517011165618896, "learning_rate": 9.800263157894737e-05, "loss": 0.3868, "step": 27603 }, { "epoch": 1.5457498040094075, "grad_norm": 1.9341506958007812, "learning_rate": 9.800236842105263e-05, "loss": 0.479, "step": 27604 }, { "epoch": 1.5458058013215366, "grad_norm": 1.3913823366165161, "learning_rate": 9.800210526315789e-05, "loss": 0.4766, "step": 27605 }, { "epoch": 1.5458617986336656, "grad_norm": 1.4235960245132446, "learning_rate": 9.800184210526317e-05, "loss": 0.5779, "step": 27606 }, { "epoch": 1.5459177959457946, "grad_norm": 1.5019274950027466, "learning_rate": 9.800157894736842e-05, "loss": 0.3857, "step": 27607 }, { "epoch": 1.5459737932579236, "grad_norm": 1.4286653995513916, "learning_rate": 9.800131578947368e-05, "loss": 0.4655, "step": 27608 }, { "epoch": 1.5460297905700526, "grad_norm": 1.2873685359954834, "learning_rate": 9.800105263157894e-05, "loss": 0.4759, "step": 27609 }, { "epoch": 1.5460857878821817, "grad_norm": 1.1733746528625488, "learning_rate": 9.800078947368422e-05, "loss": 0.4239, "step": 27610 }, { "epoch": 1.5461417851943107, "grad_norm": 1.183221697807312, "learning_rate": 9.800052631578948e-05, "loss": 0.3502, "step": 27611 }, { "epoch": 1.5461977825064397, "grad_norm": 1.1111266613006592, "learning_rate": 9.800026315789475e-05, "loss": 0.4377, "step": 27612 }, { "epoch": 1.5462537798185687, "grad_norm": 1.2624726295471191, "learning_rate": 9.8e-05, "loss": 0.5593, "step": 27613 }, { "epoch": 1.5463097771306977, "grad_norm": 1.170574426651001, "learning_rate": 9.799973684210527e-05, "loss": 0.5029, "step": 27614 }, { "epoch": 1.5463657744428267, "grad_norm": 1.469617247581482, "learning_rate": 9.799947368421053e-05, "loss": 0.4209, "step": 27615 }, { "epoch": 1.5464217717549558, "grad_norm": 1.3512747287750244, "learning_rate": 9.79992105263158e-05, "loss": 0.4437, "step": 27616 }, { "epoch": 1.5464777690670848, "grad_norm": 1.5204124450683594, "learning_rate": 9.799894736842106e-05, "loss": 0.5534, "step": 27617 }, { "epoch": 1.5465337663792138, "grad_norm": 1.490783929824829, "learning_rate": 9.799868421052632e-05, "loss": 0.5079, "step": 27618 }, { "epoch": 1.5465897636913428, "grad_norm": 1.3356685638427734, "learning_rate": 9.799842105263158e-05, "loss": 0.4309, "step": 27619 }, { "epoch": 1.5466457610034718, "grad_norm": 1.7717808485031128, "learning_rate": 9.799815789473684e-05, "loss": 0.4072, "step": 27620 }, { "epoch": 1.5467017583156009, "grad_norm": 1.167535662651062, "learning_rate": 9.799789473684212e-05, "loss": 0.3819, "step": 27621 }, { "epoch": 1.5467577556277299, "grad_norm": 1.2569087743759155, "learning_rate": 9.799763157894738e-05, "loss": 0.3743, "step": 27622 }, { "epoch": 1.546813752939859, "grad_norm": 1.3656669855117798, "learning_rate": 9.799736842105263e-05, "loss": 0.4982, "step": 27623 }, { "epoch": 1.546869750251988, "grad_norm": 1.3347703218460083, "learning_rate": 9.79971052631579e-05, "loss": 0.4499, "step": 27624 }, { "epoch": 1.546925747564117, "grad_norm": 1.5180779695510864, "learning_rate": 9.799684210526317e-05, "loss": 0.4667, "step": 27625 }, { "epoch": 1.546981744876246, "grad_norm": 1.380881428718567, "learning_rate": 9.799657894736843e-05, "loss": 0.3848, "step": 27626 }, { "epoch": 1.547037742188375, "grad_norm": 1.9279532432556152, "learning_rate": 9.799631578947369e-05, "loss": 0.7991, "step": 27627 }, { "epoch": 1.547093739500504, "grad_norm": 2.0329720973968506, "learning_rate": 9.799605263157895e-05, "loss": 0.4458, "step": 27628 }, { "epoch": 1.547149736812633, "grad_norm": 1.4294487237930298, "learning_rate": 9.799578947368422e-05, "loss": 0.4432, "step": 27629 }, { "epoch": 1.547205734124762, "grad_norm": 2.325901985168457, "learning_rate": 9.799552631578948e-05, "loss": 0.6328, "step": 27630 }, { "epoch": 1.547261731436891, "grad_norm": 1.354583978652954, "learning_rate": 9.799526315789474e-05, "loss": 0.3689, "step": 27631 }, { "epoch": 1.54731772874902, "grad_norm": 1.095689296722412, "learning_rate": 9.7995e-05, "loss": 0.3846, "step": 27632 }, { "epoch": 1.547373726061149, "grad_norm": 1.3646684885025024, "learning_rate": 9.799473684210527e-05, "loss": 0.5064, "step": 27633 }, { "epoch": 1.547429723373278, "grad_norm": 1.3029592037200928, "learning_rate": 9.799447368421053e-05, "loss": 0.5023, "step": 27634 }, { "epoch": 1.5474857206854071, "grad_norm": 1.477940320968628, "learning_rate": 9.79942105263158e-05, "loss": 0.454, "step": 27635 }, { "epoch": 1.5475417179975361, "grad_norm": 1.6404883861541748, "learning_rate": 9.799394736842105e-05, "loss": 0.4442, "step": 27636 }, { "epoch": 1.5475977153096652, "grad_norm": 1.6449779272079468, "learning_rate": 9.799368421052631e-05, "loss": 0.5236, "step": 27637 }, { "epoch": 1.5476537126217942, "grad_norm": 1.4596631526947021, "learning_rate": 9.799342105263158e-05, "loss": 0.3402, "step": 27638 }, { "epoch": 1.5477097099339232, "grad_norm": 1.3849345445632935, "learning_rate": 9.799315789473684e-05, "loss": 0.3877, "step": 27639 }, { "epoch": 1.5477657072460522, "grad_norm": 1.363826870918274, "learning_rate": 9.799289473684212e-05, "loss": 0.4869, "step": 27640 }, { "epoch": 1.5478217045581812, "grad_norm": 1.2477645874023438, "learning_rate": 9.799263157894736e-05, "loss": 0.4483, "step": 27641 }, { "epoch": 1.5478777018703103, "grad_norm": 1.1474741697311401, "learning_rate": 9.799236842105264e-05, "loss": 0.3734, "step": 27642 }, { "epoch": 1.5479336991824393, "grad_norm": 1.4541995525360107, "learning_rate": 9.79921052631579e-05, "loss": 0.4592, "step": 27643 }, { "epoch": 1.5479896964945683, "grad_norm": 1.234514832496643, "learning_rate": 9.799184210526317e-05, "loss": 0.5405, "step": 27644 }, { "epoch": 1.5480456938066973, "grad_norm": 1.9639506340026855, "learning_rate": 9.799157894736842e-05, "loss": 0.5765, "step": 27645 }, { "epoch": 1.5481016911188263, "grad_norm": 1.4586303234100342, "learning_rate": 9.799131578947369e-05, "loss": 0.5156, "step": 27646 }, { "epoch": 1.5481576884309554, "grad_norm": 1.235527515411377, "learning_rate": 9.799105263157895e-05, "loss": 0.4629, "step": 27647 }, { "epoch": 1.5482136857430844, "grad_norm": 1.2466105222702026, "learning_rate": 9.799078947368422e-05, "loss": 0.4021, "step": 27648 }, { "epoch": 1.5482696830552134, "grad_norm": 1.3709512948989868, "learning_rate": 9.799052631578948e-05, "loss": 0.4173, "step": 27649 }, { "epoch": 1.5483256803673424, "grad_norm": 1.4440704584121704, "learning_rate": 9.799026315789474e-05, "loss": 0.5999, "step": 27650 }, { "epoch": 1.5483816776794714, "grad_norm": 1.4406636953353882, "learning_rate": 9.799e-05, "loss": 0.4572, "step": 27651 }, { "epoch": 1.5484376749916005, "grad_norm": 1.3229143619537354, "learning_rate": 9.798973684210528e-05, "loss": 0.5996, "step": 27652 }, { "epoch": 1.5484936723037295, "grad_norm": 1.45196533203125, "learning_rate": 9.798947368421054e-05, "loss": 0.4739, "step": 27653 }, { "epoch": 1.5485496696158585, "grad_norm": 1.112792730331421, "learning_rate": 9.79892105263158e-05, "loss": 0.3605, "step": 27654 }, { "epoch": 1.5486056669279875, "grad_norm": 1.547577142715454, "learning_rate": 9.798894736842105e-05, "loss": 0.5193, "step": 27655 }, { "epoch": 1.5486616642401163, "grad_norm": 1.2999566793441772, "learning_rate": 9.798868421052631e-05, "loss": 0.404, "step": 27656 }, { "epoch": 1.5487176615522453, "grad_norm": 1.3730603456497192, "learning_rate": 9.798842105263159e-05, "loss": 0.442, "step": 27657 }, { "epoch": 1.5487736588643743, "grad_norm": 1.4913021326065063, "learning_rate": 9.798815789473685e-05, "loss": 0.4299, "step": 27658 }, { "epoch": 1.5488296561765034, "grad_norm": 1.3966526985168457, "learning_rate": 9.798789473684211e-05, "loss": 0.4235, "step": 27659 }, { "epoch": 1.5488856534886324, "grad_norm": 1.2151272296905518, "learning_rate": 9.798763157894737e-05, "loss": 0.4361, "step": 27660 }, { "epoch": 1.5489416508007614, "grad_norm": 1.390045404434204, "learning_rate": 9.798736842105264e-05, "loss": 0.4848, "step": 27661 }, { "epoch": 1.5489976481128904, "grad_norm": 1.304226279258728, "learning_rate": 9.79871052631579e-05, "loss": 0.3626, "step": 27662 }, { "epoch": 1.5490536454250194, "grad_norm": 1.1964772939682007, "learning_rate": 9.798684210526316e-05, "loss": 0.3906, "step": 27663 }, { "epoch": 1.5491096427371485, "grad_norm": 1.7195937633514404, "learning_rate": 9.798657894736842e-05, "loss": 0.5034, "step": 27664 }, { "epoch": 1.5491656400492775, "grad_norm": 1.2917834520339966, "learning_rate": 9.798631578947369e-05, "loss": 0.419, "step": 27665 }, { "epoch": 1.5492216373614065, "grad_norm": 1.176645278930664, "learning_rate": 9.798605263157895e-05, "loss": 0.4128, "step": 27666 }, { "epoch": 1.5492776346735355, "grad_norm": 1.359494924545288, "learning_rate": 9.798578947368423e-05, "loss": 0.4341, "step": 27667 }, { "epoch": 1.5493336319856645, "grad_norm": 1.2506375312805176, "learning_rate": 9.798552631578947e-05, "loss": 0.3823, "step": 27668 }, { "epoch": 1.5493896292977936, "grad_norm": 1.3744421005249023, "learning_rate": 9.798526315789474e-05, "loss": 0.5909, "step": 27669 }, { "epoch": 1.5494456266099226, "grad_norm": 1.2375952005386353, "learning_rate": 9.7985e-05, "loss": 0.63, "step": 27670 }, { "epoch": 1.5495016239220516, "grad_norm": 1.6736398935317993, "learning_rate": 9.798473684210526e-05, "loss": 0.6227, "step": 27671 }, { "epoch": 1.5495576212341806, "grad_norm": 1.6351205110549927, "learning_rate": 9.798447368421054e-05, "loss": 0.6249, "step": 27672 }, { "epoch": 1.5496136185463096, "grad_norm": 1.5866144895553589, "learning_rate": 9.798421052631578e-05, "loss": 0.6634, "step": 27673 }, { "epoch": 1.5496696158584387, "grad_norm": 1.447206974029541, "learning_rate": 9.798394736842106e-05, "loss": 0.4198, "step": 27674 }, { "epoch": 1.5497256131705677, "grad_norm": 1.0121902227401733, "learning_rate": 9.798368421052632e-05, "loss": 0.3474, "step": 27675 }, { "epoch": 1.5497816104826967, "grad_norm": 1.433874487876892, "learning_rate": 9.798342105263159e-05, "loss": 0.4346, "step": 27676 }, { "epoch": 1.5498376077948257, "grad_norm": 1.1715593338012695, "learning_rate": 9.798315789473685e-05, "loss": 0.4513, "step": 27677 }, { "epoch": 1.5498936051069547, "grad_norm": 1.19560968875885, "learning_rate": 9.798289473684211e-05, "loss": 0.4297, "step": 27678 }, { "epoch": 1.5499496024190837, "grad_norm": 1.3347197771072388, "learning_rate": 9.798263157894737e-05, "loss": 0.3472, "step": 27679 }, { "epoch": 1.5500055997312128, "grad_norm": 1.138706922531128, "learning_rate": 9.798236842105264e-05, "loss": 0.375, "step": 27680 }, { "epoch": 1.5500615970433418, "grad_norm": 1.112441062927246, "learning_rate": 9.79821052631579e-05, "loss": 0.4377, "step": 27681 }, { "epoch": 1.5501175943554708, "grad_norm": 1.169978380203247, "learning_rate": 9.798184210526316e-05, "loss": 0.4607, "step": 27682 }, { "epoch": 1.5501735916675998, "grad_norm": 1.4009206295013428, "learning_rate": 9.798157894736842e-05, "loss": 0.488, "step": 27683 }, { "epoch": 1.5502295889797288, "grad_norm": 1.5235466957092285, "learning_rate": 9.79813157894737e-05, "loss": 0.3927, "step": 27684 }, { "epoch": 1.5502855862918579, "grad_norm": 1.2733972072601318, "learning_rate": 9.798105263157895e-05, "loss": 0.4078, "step": 27685 }, { "epoch": 1.5503415836039869, "grad_norm": 1.9851861000061035, "learning_rate": 9.798078947368421e-05, "loss": 0.4431, "step": 27686 }, { "epoch": 1.550397580916116, "grad_norm": 1.2623870372772217, "learning_rate": 9.798052631578947e-05, "loss": 0.4077, "step": 27687 }, { "epoch": 1.550453578228245, "grad_norm": 1.3079915046691895, "learning_rate": 9.798026315789473e-05, "loss": 0.3512, "step": 27688 }, { "epoch": 1.550509575540374, "grad_norm": 1.4924261569976807, "learning_rate": 9.798000000000001e-05, "loss": 0.5004, "step": 27689 }, { "epoch": 1.550565572852503, "grad_norm": 1.1912822723388672, "learning_rate": 9.797973684210527e-05, "loss": 0.4008, "step": 27690 }, { "epoch": 1.550621570164632, "grad_norm": 1.5061736106872559, "learning_rate": 9.797947368421053e-05, "loss": 0.4683, "step": 27691 }, { "epoch": 1.550677567476761, "grad_norm": 1.206244707107544, "learning_rate": 9.797921052631579e-05, "loss": 0.4397, "step": 27692 }, { "epoch": 1.55073356478889, "grad_norm": 1.3236454725265503, "learning_rate": 9.797894736842106e-05, "loss": 0.416, "step": 27693 }, { "epoch": 1.550789562101019, "grad_norm": 1.4528212547302246, "learning_rate": 9.797868421052632e-05, "loss": 0.6181, "step": 27694 }, { "epoch": 1.550845559413148, "grad_norm": 1.3994133472442627, "learning_rate": 9.797842105263159e-05, "loss": 0.4998, "step": 27695 }, { "epoch": 1.550901556725277, "grad_norm": 1.1481215953826904, "learning_rate": 9.797815789473684e-05, "loss": 0.3476, "step": 27696 }, { "epoch": 1.550957554037406, "grad_norm": 1.178155779838562, "learning_rate": 9.797789473684211e-05, "loss": 0.3862, "step": 27697 }, { "epoch": 1.5510135513495351, "grad_norm": 1.1070185899734497, "learning_rate": 9.797763157894737e-05, "loss": 0.5307, "step": 27698 }, { "epoch": 1.5510695486616641, "grad_norm": 1.3176507949829102, "learning_rate": 9.797736842105265e-05, "loss": 0.3921, "step": 27699 }, { "epoch": 1.5511255459737932, "grad_norm": 1.0525776147842407, "learning_rate": 9.797710526315789e-05, "loss": 0.3283, "step": 27700 }, { "epoch": 1.5511815432859222, "grad_norm": 1.5223246812820435, "learning_rate": 9.797684210526316e-05, "loss": 0.5607, "step": 27701 }, { "epoch": 1.5512375405980512, "grad_norm": 1.5738961696624756, "learning_rate": 9.797657894736842e-05, "loss": 0.5562, "step": 27702 }, { "epoch": 1.5512935379101802, "grad_norm": 1.657384991645813, "learning_rate": 9.79763157894737e-05, "loss": 0.4221, "step": 27703 }, { "epoch": 1.5513495352223092, "grad_norm": 1.3452095985412598, "learning_rate": 9.797605263157896e-05, "loss": 0.483, "step": 27704 }, { "epoch": 1.5514055325344382, "grad_norm": 1.2362409830093384, "learning_rate": 9.79757894736842e-05, "loss": 0.3704, "step": 27705 }, { "epoch": 1.5514615298465673, "grad_norm": 1.1338156461715698, "learning_rate": 9.797552631578948e-05, "loss": 0.4091, "step": 27706 }, { "epoch": 1.5515175271586963, "grad_norm": 1.4207375049591064, "learning_rate": 9.797526315789474e-05, "loss": 0.4474, "step": 27707 }, { "epoch": 1.5515735244708253, "grad_norm": 1.397441029548645, "learning_rate": 9.797500000000001e-05, "loss": 0.4012, "step": 27708 }, { "epoch": 1.5516295217829543, "grad_norm": 1.2236764430999756, "learning_rate": 9.797473684210527e-05, "loss": 0.3869, "step": 27709 }, { "epoch": 1.5516855190950833, "grad_norm": 1.1548060178756714, "learning_rate": 9.797447368421053e-05, "loss": 0.3724, "step": 27710 }, { "epoch": 1.5517415164072124, "grad_norm": 1.5421934127807617, "learning_rate": 9.797421052631579e-05, "loss": 0.56, "step": 27711 }, { "epoch": 1.5517975137193414, "grad_norm": 1.4410067796707153, "learning_rate": 9.797394736842106e-05, "loss": 0.4808, "step": 27712 }, { "epoch": 1.5518535110314704, "grad_norm": 1.3346655368804932, "learning_rate": 9.797368421052632e-05, "loss": 0.4176, "step": 27713 }, { "epoch": 1.5519095083435994, "grad_norm": 1.3503823280334473, "learning_rate": 9.797342105263158e-05, "loss": 0.5093, "step": 27714 }, { "epoch": 1.5519655056557284, "grad_norm": 1.340509057044983, "learning_rate": 9.797315789473684e-05, "loss": 0.4658, "step": 27715 }, { "epoch": 1.5520215029678575, "grad_norm": 1.1793127059936523, "learning_rate": 9.797289473684211e-05, "loss": 0.4391, "step": 27716 }, { "epoch": 1.5520775002799865, "grad_norm": 1.1974103450775146, "learning_rate": 9.797263157894737e-05, "loss": 0.3996, "step": 27717 }, { "epoch": 1.5521334975921155, "grad_norm": 1.266251564025879, "learning_rate": 9.797236842105263e-05, "loss": 0.4152, "step": 27718 }, { "epoch": 1.5521894949042445, "grad_norm": 2.003234386444092, "learning_rate": 9.79721052631579e-05, "loss": 0.5629, "step": 27719 }, { "epoch": 1.5522454922163735, "grad_norm": 1.327091097831726, "learning_rate": 9.797184210526317e-05, "loss": 0.3728, "step": 27720 }, { "epoch": 1.5523014895285026, "grad_norm": 1.35039484500885, "learning_rate": 9.797157894736843e-05, "loss": 0.4403, "step": 27721 }, { "epoch": 1.5523574868406316, "grad_norm": 1.0910850763320923, "learning_rate": 9.79713157894737e-05, "loss": 0.366, "step": 27722 }, { "epoch": 1.5524134841527606, "grad_norm": 1.8816014528274536, "learning_rate": 9.797105263157895e-05, "loss": 0.6915, "step": 27723 }, { "epoch": 1.5524694814648896, "grad_norm": 1.3539272546768188, "learning_rate": 9.79707894736842e-05, "loss": 0.4375, "step": 27724 }, { "epoch": 1.5525254787770186, "grad_norm": 1.5180089473724365, "learning_rate": 9.797052631578948e-05, "loss": 0.4702, "step": 27725 }, { "epoch": 1.5525814760891476, "grad_norm": 1.1890053749084473, "learning_rate": 9.797026315789474e-05, "loss": 0.3834, "step": 27726 }, { "epoch": 1.5526374734012767, "grad_norm": 1.3108999729156494, "learning_rate": 9.797000000000001e-05, "loss": 0.4571, "step": 27727 }, { "epoch": 1.5526934707134057, "grad_norm": 1.4722704887390137, "learning_rate": 9.796973684210526e-05, "loss": 0.5284, "step": 27728 }, { "epoch": 1.5527494680255347, "grad_norm": 1.3678479194641113, "learning_rate": 9.796947368421053e-05, "loss": 0.5655, "step": 27729 }, { "epoch": 1.5528054653376637, "grad_norm": 1.1171141862869263, "learning_rate": 9.796921052631579e-05, "loss": 0.4052, "step": 27730 }, { "epoch": 1.5528614626497927, "grad_norm": 1.676128625869751, "learning_rate": 9.796894736842106e-05, "loss": 0.4494, "step": 27731 }, { "epoch": 1.5529174599619218, "grad_norm": 1.7069107294082642, "learning_rate": 9.796868421052632e-05, "loss": 0.6305, "step": 27732 }, { "epoch": 1.5529734572740508, "grad_norm": 1.9045878648757935, "learning_rate": 9.796842105263158e-05, "loss": 0.4019, "step": 27733 }, { "epoch": 1.5530294545861798, "grad_norm": 1.4582682847976685, "learning_rate": 9.796815789473684e-05, "loss": 0.335, "step": 27734 }, { "epoch": 1.5530854518983088, "grad_norm": 1.0639840364456177, "learning_rate": 9.796789473684212e-05, "loss": 0.4934, "step": 27735 }, { "epoch": 1.5531414492104378, "grad_norm": 1.4219121932983398, "learning_rate": 9.796763157894738e-05, "loss": 0.4423, "step": 27736 }, { "epoch": 1.5531974465225669, "grad_norm": 1.201599359512329, "learning_rate": 9.796736842105264e-05, "loss": 0.409, "step": 27737 }, { "epoch": 1.5532534438346959, "grad_norm": 1.385581612586975, "learning_rate": 9.79671052631579e-05, "loss": 0.4744, "step": 27738 }, { "epoch": 1.553309441146825, "grad_norm": 1.4093060493469238, "learning_rate": 9.796684210526317e-05, "loss": 0.3263, "step": 27739 }, { "epoch": 1.553365438458954, "grad_norm": 1.5411500930786133, "learning_rate": 9.796657894736843e-05, "loss": 0.4366, "step": 27740 }, { "epoch": 1.553421435771083, "grad_norm": 1.2060127258300781, "learning_rate": 9.796631578947369e-05, "loss": 0.4044, "step": 27741 }, { "epoch": 1.553477433083212, "grad_norm": 1.3991246223449707, "learning_rate": 9.796605263157895e-05, "loss": 0.4528, "step": 27742 }, { "epoch": 1.553533430395341, "grad_norm": 1.4705504179000854, "learning_rate": 9.796578947368421e-05, "loss": 0.549, "step": 27743 }, { "epoch": 1.55358942770747, "grad_norm": 1.3193557262420654, "learning_rate": 9.796552631578948e-05, "loss": 0.3969, "step": 27744 }, { "epoch": 1.553645425019599, "grad_norm": 1.5294543504714966, "learning_rate": 9.796526315789474e-05, "loss": 0.4579, "step": 27745 }, { "epoch": 1.553701422331728, "grad_norm": 1.4128329753875732, "learning_rate": 9.7965e-05, "loss": 0.5688, "step": 27746 }, { "epoch": 1.553757419643857, "grad_norm": 1.693296194076538, "learning_rate": 9.796473684210526e-05, "loss": 0.6006, "step": 27747 }, { "epoch": 1.553813416955986, "grad_norm": 1.4760183095932007, "learning_rate": 9.796447368421053e-05, "loss": 0.4203, "step": 27748 }, { "epoch": 1.553869414268115, "grad_norm": 1.2221639156341553, "learning_rate": 9.79642105263158e-05, "loss": 0.4608, "step": 27749 }, { "epoch": 1.553925411580244, "grad_norm": 1.3965953588485718, "learning_rate": 9.796394736842107e-05, "loss": 0.5166, "step": 27750 }, { "epoch": 1.5539814088923731, "grad_norm": 1.446052074432373, "learning_rate": 9.796368421052631e-05, "loss": 0.4521, "step": 27751 }, { "epoch": 1.5540374062045021, "grad_norm": 1.1243226528167725, "learning_rate": 9.796342105263159e-05, "loss": 0.2749, "step": 27752 }, { "epoch": 1.5540934035166312, "grad_norm": 1.5954065322875977, "learning_rate": 9.796315789473685e-05, "loss": 0.5175, "step": 27753 }, { "epoch": 1.5541494008287602, "grad_norm": 1.2738813161849976, "learning_rate": 9.796289473684212e-05, "loss": 0.4379, "step": 27754 }, { "epoch": 1.5542053981408892, "grad_norm": 1.3528434038162231, "learning_rate": 9.796263157894737e-05, "loss": 0.4364, "step": 27755 }, { "epoch": 1.5542613954530182, "grad_norm": 1.5497957468032837, "learning_rate": 9.796236842105264e-05, "loss": 0.5718, "step": 27756 }, { "epoch": 1.5543173927651472, "grad_norm": 1.4113351106643677, "learning_rate": 9.79621052631579e-05, "loss": 0.5735, "step": 27757 }, { "epoch": 1.5543733900772763, "grad_norm": 1.560281753540039, "learning_rate": 9.796184210526316e-05, "loss": 0.4684, "step": 27758 }, { "epoch": 1.5544293873894053, "grad_norm": 1.4054566621780396, "learning_rate": 9.796157894736843e-05, "loss": 0.3689, "step": 27759 }, { "epoch": 1.5544853847015343, "grad_norm": 1.5809017419815063, "learning_rate": 9.796131578947368e-05, "loss": 0.4974, "step": 27760 }, { "epoch": 1.5545413820136633, "grad_norm": 1.3012982606887817, "learning_rate": 9.796105263157895e-05, "loss": 0.5023, "step": 27761 }, { "epoch": 1.5545973793257923, "grad_norm": 1.0782115459442139, "learning_rate": 9.796078947368421e-05, "loss": 0.3798, "step": 27762 }, { "epoch": 1.5546533766379214, "grad_norm": 1.3033592700958252, "learning_rate": 9.796052631578948e-05, "loss": 0.4332, "step": 27763 }, { "epoch": 1.5547093739500504, "grad_norm": 1.4053047895431519, "learning_rate": 9.796026315789474e-05, "loss": 0.4463, "step": 27764 }, { "epoch": 1.5547653712621794, "grad_norm": 1.3142244815826416, "learning_rate": 9.796e-05, "loss": 0.4889, "step": 27765 }, { "epoch": 1.5548213685743084, "grad_norm": 1.018303632736206, "learning_rate": 9.795973684210526e-05, "loss": 0.3926, "step": 27766 }, { "epoch": 1.5548773658864374, "grad_norm": 1.1563303470611572, "learning_rate": 9.795947368421054e-05, "loss": 0.3991, "step": 27767 }, { "epoch": 1.5549333631985665, "grad_norm": 1.3148008584976196, "learning_rate": 9.79592105263158e-05, "loss": 0.4882, "step": 27768 }, { "epoch": 1.5549893605106955, "grad_norm": 1.119433879852295, "learning_rate": 9.795894736842106e-05, "loss": 0.3509, "step": 27769 }, { "epoch": 1.5550453578228245, "grad_norm": 1.9987049102783203, "learning_rate": 9.795868421052632e-05, "loss": 0.4775, "step": 27770 }, { "epoch": 1.5551013551349535, "grad_norm": 1.4459271430969238, "learning_rate": 9.795842105263159e-05, "loss": 0.5327, "step": 27771 }, { "epoch": 1.5551573524470825, "grad_norm": 22.486413955688477, "learning_rate": 9.795815789473685e-05, "loss": 0.6281, "step": 27772 }, { "epoch": 1.5552133497592115, "grad_norm": 1.9849474430084229, "learning_rate": 9.795789473684211e-05, "loss": 0.6185, "step": 27773 }, { "epoch": 1.5552693470713406, "grad_norm": 1.5695642232894897, "learning_rate": 9.795763157894737e-05, "loss": 0.3902, "step": 27774 }, { "epoch": 1.5553253443834696, "grad_norm": 1.440346121788025, "learning_rate": 9.795736842105263e-05, "loss": 0.5583, "step": 27775 }, { "epoch": 1.5553813416955986, "grad_norm": 1.3403582572937012, "learning_rate": 9.79571052631579e-05, "loss": 0.5714, "step": 27776 }, { "epoch": 1.5554373390077276, "grad_norm": 1.3569977283477783, "learning_rate": 9.795684210526316e-05, "loss": 0.3281, "step": 27777 }, { "epoch": 1.5554933363198566, "grad_norm": 1.1667449474334717, "learning_rate": 9.795657894736842e-05, "loss": 0.5078, "step": 27778 }, { "epoch": 1.5555493336319857, "grad_norm": 1.2146210670471191, "learning_rate": 9.795631578947368e-05, "loss": 0.5043, "step": 27779 }, { "epoch": 1.5556053309441147, "grad_norm": 2.4014410972595215, "learning_rate": 9.795605263157895e-05, "loss": 0.445, "step": 27780 }, { "epoch": 1.5556613282562437, "grad_norm": 1.3136094808578491, "learning_rate": 9.795578947368421e-05, "loss": 0.6192, "step": 27781 }, { "epoch": 1.5557173255683727, "grad_norm": 1.2894420623779297, "learning_rate": 9.795552631578949e-05, "loss": 0.4372, "step": 27782 }, { "epoch": 1.5557733228805017, "grad_norm": 1.4806616306304932, "learning_rate": 9.795526315789473e-05, "loss": 0.6182, "step": 27783 }, { "epoch": 1.5558293201926308, "grad_norm": 1.118759274482727, "learning_rate": 9.7955e-05, "loss": 0.4354, "step": 27784 }, { "epoch": 1.5558853175047598, "grad_norm": 1.2672228813171387, "learning_rate": 9.795473684210527e-05, "loss": 0.3302, "step": 27785 }, { "epoch": 1.5559413148168888, "grad_norm": 1.3909382820129395, "learning_rate": 9.795447368421054e-05, "loss": 0.619, "step": 27786 }, { "epoch": 1.5559973121290178, "grad_norm": 1.2416855096817017, "learning_rate": 9.79542105263158e-05, "loss": 0.4477, "step": 27787 }, { "epoch": 1.5560533094411468, "grad_norm": 1.565833330154419, "learning_rate": 9.795394736842106e-05, "loss": 0.5442, "step": 27788 }, { "epoch": 1.5561093067532759, "grad_norm": 1.4561254978179932, "learning_rate": 9.795368421052632e-05, "loss": 0.5995, "step": 27789 }, { "epoch": 1.5561653040654049, "grad_norm": 1.3448600769042969, "learning_rate": 9.795342105263159e-05, "loss": 0.4515, "step": 27790 }, { "epoch": 1.556221301377534, "grad_norm": 1.1499367952346802, "learning_rate": 9.795315789473685e-05, "loss": 0.445, "step": 27791 }, { "epoch": 1.556277298689663, "grad_norm": 1.1299775838851929, "learning_rate": 9.79528947368421e-05, "loss": 0.4119, "step": 27792 }, { "epoch": 1.556333296001792, "grad_norm": 1.1896737813949585, "learning_rate": 9.795263157894737e-05, "loss": 0.4403, "step": 27793 }, { "epoch": 1.556389293313921, "grad_norm": 1.3264724016189575, "learning_rate": 9.795236842105263e-05, "loss": 0.5096, "step": 27794 }, { "epoch": 1.55644529062605, "grad_norm": 1.5012086629867554, "learning_rate": 9.79521052631579e-05, "loss": 0.4116, "step": 27795 }, { "epoch": 1.556501287938179, "grad_norm": 2.2111494541168213, "learning_rate": 9.795184210526316e-05, "loss": 0.5572, "step": 27796 }, { "epoch": 1.556557285250308, "grad_norm": 1.1106387376785278, "learning_rate": 9.795157894736842e-05, "loss": 0.3883, "step": 27797 }, { "epoch": 1.556613282562437, "grad_norm": 1.485128402709961, "learning_rate": 9.795131578947368e-05, "loss": 0.5082, "step": 27798 }, { "epoch": 1.556669279874566, "grad_norm": 1.1329487562179565, "learning_rate": 9.795105263157896e-05, "loss": 0.4078, "step": 27799 }, { "epoch": 1.556725277186695, "grad_norm": 1.0622822046279907, "learning_rate": 9.795078947368422e-05, "loss": 0.473, "step": 27800 }, { "epoch": 1.556781274498824, "grad_norm": 1.2985491752624512, "learning_rate": 9.795052631578948e-05, "loss": 0.4488, "step": 27801 }, { "epoch": 1.556837271810953, "grad_norm": 1.0579886436462402, "learning_rate": 9.795026315789474e-05, "loss": 0.3947, "step": 27802 }, { "epoch": 1.5568932691230821, "grad_norm": 1.3231867551803589, "learning_rate": 9.795000000000001e-05, "loss": 0.4697, "step": 27803 }, { "epoch": 1.5569492664352111, "grad_norm": 1.3121984004974365, "learning_rate": 9.794973684210527e-05, "loss": 0.4894, "step": 27804 }, { "epoch": 1.5570052637473402, "grad_norm": 1.2421481609344482, "learning_rate": 9.794947368421053e-05, "loss": 0.4702, "step": 27805 }, { "epoch": 1.5570612610594692, "grad_norm": 1.358551025390625, "learning_rate": 9.794921052631579e-05, "loss": 0.4754, "step": 27806 }, { "epoch": 1.5571172583715982, "grad_norm": 1.1165051460266113, "learning_rate": 9.794894736842106e-05, "loss": 0.413, "step": 27807 }, { "epoch": 1.5571732556837272, "grad_norm": 1.1838672161102295, "learning_rate": 9.794868421052632e-05, "loss": 0.4276, "step": 27808 }, { "epoch": 1.5572292529958562, "grad_norm": 1.4677737951278687, "learning_rate": 9.794842105263158e-05, "loss": 0.5689, "step": 27809 }, { "epoch": 1.5572852503079853, "grad_norm": 2.69555926322937, "learning_rate": 9.794815789473684e-05, "loss": 0.6513, "step": 27810 }, { "epoch": 1.5573412476201143, "grad_norm": 1.8028641939163208, "learning_rate": 9.79478947368421e-05, "loss": 0.4972, "step": 27811 }, { "epoch": 1.5573972449322433, "grad_norm": 1.2707033157348633, "learning_rate": 9.794763157894737e-05, "loss": 0.4286, "step": 27812 }, { "epoch": 1.5574532422443723, "grad_norm": 1.3541085720062256, "learning_rate": 9.794736842105263e-05, "loss": 0.516, "step": 27813 }, { "epoch": 1.5575092395565013, "grad_norm": 45.87267303466797, "learning_rate": 9.794710526315791e-05, "loss": 0.4842, "step": 27814 }, { "epoch": 1.5575652368686304, "grad_norm": 1.760021686553955, "learning_rate": 9.794684210526315e-05, "loss": 0.7113, "step": 27815 }, { "epoch": 1.5576212341807594, "grad_norm": 1.8014436960220337, "learning_rate": 9.794657894736843e-05, "loss": 0.8422, "step": 27816 }, { "epoch": 1.5576772314928884, "grad_norm": 1.4810210466384888, "learning_rate": 9.794631578947369e-05, "loss": 0.441, "step": 27817 }, { "epoch": 1.5577332288050174, "grad_norm": 1.642518162727356, "learning_rate": 9.794605263157896e-05, "loss": 0.386, "step": 27818 }, { "epoch": 1.5577892261171464, "grad_norm": 1.0953466892242432, "learning_rate": 9.794578947368422e-05, "loss": 0.4178, "step": 27819 }, { "epoch": 1.5578452234292754, "grad_norm": 1.2690304517745972, "learning_rate": 9.794552631578948e-05, "loss": 0.4771, "step": 27820 }, { "epoch": 1.5579012207414045, "grad_norm": 1.3168573379516602, "learning_rate": 9.794526315789474e-05, "loss": 0.4387, "step": 27821 }, { "epoch": 1.5579572180535335, "grad_norm": 1.2579644918441772, "learning_rate": 9.794500000000001e-05, "loss": 0.5125, "step": 27822 }, { "epoch": 1.5580132153656625, "grad_norm": 1.160528540611267, "learning_rate": 9.794473684210527e-05, "loss": 0.4034, "step": 27823 }, { "epoch": 1.5580692126777915, "grad_norm": 1.2254694700241089, "learning_rate": 9.794447368421053e-05, "loss": 0.4466, "step": 27824 }, { "epoch": 1.5581252099899205, "grad_norm": 1.1999738216400146, "learning_rate": 9.794421052631579e-05, "loss": 0.441, "step": 27825 }, { "epoch": 1.5581812073020496, "grad_norm": 1.1831997632980347, "learning_rate": 9.794394736842105e-05, "loss": 0.4673, "step": 27826 }, { "epoch": 1.5582372046141786, "grad_norm": 1.330078125, "learning_rate": 9.794368421052632e-05, "loss": 0.5133, "step": 27827 }, { "epoch": 1.5582932019263076, "grad_norm": 1.326755404472351, "learning_rate": 9.794342105263158e-05, "loss": 0.4091, "step": 27828 }, { "epoch": 1.5583491992384366, "grad_norm": 1.13228440284729, "learning_rate": 9.794315789473684e-05, "loss": 0.3985, "step": 27829 }, { "epoch": 1.5584051965505656, "grad_norm": 1.2264310121536255, "learning_rate": 9.79428947368421e-05, "loss": 0.4694, "step": 27830 }, { "epoch": 1.5584611938626947, "grad_norm": 1.1563001871109009, "learning_rate": 9.794263157894738e-05, "loss": 0.3887, "step": 27831 }, { "epoch": 1.5585171911748237, "grad_norm": 1.2988418340682983, "learning_rate": 9.794236842105264e-05, "loss": 0.5275, "step": 27832 }, { "epoch": 1.5585731884869527, "grad_norm": 1.272973895072937, "learning_rate": 9.79421052631579e-05, "loss": 0.4014, "step": 27833 }, { "epoch": 1.5586291857990817, "grad_norm": 1.4969706535339355, "learning_rate": 9.794184210526316e-05, "loss": 0.5557, "step": 27834 }, { "epoch": 1.5586851831112107, "grad_norm": 1.4358437061309814, "learning_rate": 9.794157894736843e-05, "loss": 0.3992, "step": 27835 }, { "epoch": 1.5587411804233398, "grad_norm": 1.112850546836853, "learning_rate": 9.794131578947369e-05, "loss": 0.4536, "step": 27836 }, { "epoch": 1.5587971777354688, "grad_norm": 1.3522948026657104, "learning_rate": 9.794105263157896e-05, "loss": 0.4322, "step": 27837 }, { "epoch": 1.5588531750475978, "grad_norm": 1.5419671535491943, "learning_rate": 9.794078947368421e-05, "loss": 0.4437, "step": 27838 }, { "epoch": 1.5589091723597268, "grad_norm": 1.2560789585113525, "learning_rate": 9.794052631578948e-05, "loss": 0.414, "step": 27839 }, { "epoch": 1.5589651696718558, "grad_norm": 1.4753572940826416, "learning_rate": 9.794026315789474e-05, "loss": 0.4246, "step": 27840 }, { "epoch": 1.5590211669839849, "grad_norm": 1.5477051734924316, "learning_rate": 9.794000000000001e-05, "loss": 0.462, "step": 27841 }, { "epoch": 1.5590771642961139, "grad_norm": 1.2439004182815552, "learning_rate": 9.793973684210527e-05, "loss": 0.3825, "step": 27842 }, { "epoch": 1.559133161608243, "grad_norm": 1.1846262216567993, "learning_rate": 9.793947368421053e-05, "loss": 0.3773, "step": 27843 }, { "epoch": 1.559189158920372, "grad_norm": 1.2155591249465942, "learning_rate": 9.79392105263158e-05, "loss": 0.3896, "step": 27844 }, { "epoch": 1.559245156232501, "grad_norm": 1.6876803636550903, "learning_rate": 9.793894736842105e-05, "loss": 0.6784, "step": 27845 }, { "epoch": 1.55930115354463, "grad_norm": 1.1184388399124146, "learning_rate": 9.793868421052633e-05, "loss": 0.4407, "step": 27846 }, { "epoch": 1.559357150856759, "grad_norm": 1.3937392234802246, "learning_rate": 9.793842105263157e-05, "loss": 0.6646, "step": 27847 }, { "epoch": 1.559413148168888, "grad_norm": 1.4409432411193848, "learning_rate": 9.793815789473685e-05, "loss": 0.6223, "step": 27848 }, { "epoch": 1.559469145481017, "grad_norm": 2.5783779621124268, "learning_rate": 9.79378947368421e-05, "loss": 0.4499, "step": 27849 }, { "epoch": 1.559525142793146, "grad_norm": 1.3124620914459229, "learning_rate": 9.793763157894738e-05, "loss": 0.4684, "step": 27850 }, { "epoch": 1.559581140105275, "grad_norm": 1.2405046224594116, "learning_rate": 9.793736842105264e-05, "loss": 0.4707, "step": 27851 }, { "epoch": 1.559637137417404, "grad_norm": 1.333479642868042, "learning_rate": 9.79371052631579e-05, "loss": 0.4788, "step": 27852 }, { "epoch": 1.559693134729533, "grad_norm": 1.2092924118041992, "learning_rate": 9.793684210526316e-05, "loss": 0.4107, "step": 27853 }, { "epoch": 1.559749132041662, "grad_norm": 1.1981545686721802, "learning_rate": 9.793657894736843e-05, "loss": 0.377, "step": 27854 }, { "epoch": 1.5598051293537911, "grad_norm": 1.3340747356414795, "learning_rate": 9.793631578947369e-05, "loss": 0.4975, "step": 27855 }, { "epoch": 1.5598611266659201, "grad_norm": 1.3088123798370361, "learning_rate": 9.793605263157895e-05, "loss": 0.5356, "step": 27856 }, { "epoch": 1.5599171239780492, "grad_norm": 1.5157161951065063, "learning_rate": 9.793578947368421e-05, "loss": 0.5309, "step": 27857 }, { "epoch": 1.5599731212901782, "grad_norm": 1.4487133026123047, "learning_rate": 9.793552631578948e-05, "loss": 0.469, "step": 27858 }, { "epoch": 1.5600291186023072, "grad_norm": 1.2588891983032227, "learning_rate": 9.793526315789474e-05, "loss": 0.399, "step": 27859 }, { "epoch": 1.5600851159144362, "grad_norm": 1.49861478805542, "learning_rate": 9.7935e-05, "loss": 0.4292, "step": 27860 }, { "epoch": 1.5601411132265652, "grad_norm": 1.1538630723953247, "learning_rate": 9.793473684210526e-05, "loss": 0.4191, "step": 27861 }, { "epoch": 1.5601971105386943, "grad_norm": 1.2881295680999756, "learning_rate": 9.793447368421052e-05, "loss": 0.413, "step": 27862 }, { "epoch": 1.5602531078508233, "grad_norm": 1.2034615278244019, "learning_rate": 9.79342105263158e-05, "loss": 0.4059, "step": 27863 }, { "epoch": 1.5603091051629523, "grad_norm": 1.1906850337982178, "learning_rate": 9.793394736842106e-05, "loss": 0.4292, "step": 27864 }, { "epoch": 1.5603651024750813, "grad_norm": 1.0385959148406982, "learning_rate": 9.793368421052632e-05, "loss": 0.4155, "step": 27865 }, { "epoch": 1.5604210997872103, "grad_norm": 2.187077283859253, "learning_rate": 9.793342105263158e-05, "loss": 0.5637, "step": 27866 }, { "epoch": 1.5604770970993393, "grad_norm": 1.3084129095077515, "learning_rate": 9.793315789473685e-05, "loss": 0.4163, "step": 27867 }, { "epoch": 1.5605330944114684, "grad_norm": 1.4236375093460083, "learning_rate": 9.793289473684211e-05, "loss": 0.6847, "step": 27868 }, { "epoch": 1.5605890917235974, "grad_norm": 1.2820924520492554, "learning_rate": 9.793263157894738e-05, "loss": 0.4831, "step": 27869 }, { "epoch": 1.5606450890357264, "grad_norm": 1.1205179691314697, "learning_rate": 9.793236842105263e-05, "loss": 0.4814, "step": 27870 }, { "epoch": 1.5607010863478554, "grad_norm": 1.2113511562347412, "learning_rate": 9.79321052631579e-05, "loss": 0.342, "step": 27871 }, { "epoch": 1.5607570836599844, "grad_norm": 1.2579805850982666, "learning_rate": 9.793184210526316e-05, "loss": 0.3823, "step": 27872 }, { "epoch": 1.5608130809721135, "grad_norm": 1.8428640365600586, "learning_rate": 9.793157894736843e-05, "loss": 0.4281, "step": 27873 }, { "epoch": 1.5608690782842425, "grad_norm": 1.8847538232803345, "learning_rate": 9.79313157894737e-05, "loss": 0.3794, "step": 27874 }, { "epoch": 1.5609250755963715, "grad_norm": 1.2826124429702759, "learning_rate": 9.793105263157895e-05, "loss": 0.4482, "step": 27875 }, { "epoch": 1.5609810729085005, "grad_norm": 1.134809136390686, "learning_rate": 9.793078947368421e-05, "loss": 0.4436, "step": 27876 }, { "epoch": 1.5610370702206295, "grad_norm": 1.2939330339431763, "learning_rate": 9.793052631578949e-05, "loss": 0.4125, "step": 27877 }, { "epoch": 1.5610930675327586, "grad_norm": 1.9151451587677002, "learning_rate": 9.793026315789475e-05, "loss": 0.5838, "step": 27878 }, { "epoch": 1.5611490648448876, "grad_norm": 1.1742690801620483, "learning_rate": 9.793e-05, "loss": 0.3943, "step": 27879 }, { "epoch": 1.5612050621570166, "grad_norm": 1.6123944520950317, "learning_rate": 9.792973684210527e-05, "loss": 0.5769, "step": 27880 }, { "epoch": 1.5612610594691456, "grad_norm": 1.2266395092010498, "learning_rate": 9.792947368421053e-05, "loss": 0.3835, "step": 27881 }, { "epoch": 1.5613170567812746, "grad_norm": 1.2051206827163696, "learning_rate": 9.79292105263158e-05, "loss": 0.4799, "step": 27882 }, { "epoch": 1.5613730540934037, "grad_norm": 1.504221796989441, "learning_rate": 9.792894736842106e-05, "loss": 0.6255, "step": 27883 }, { "epoch": 1.5614290514055327, "grad_norm": 1.3478666543960571, "learning_rate": 9.792868421052632e-05, "loss": 0.4994, "step": 27884 }, { "epoch": 1.5614850487176617, "grad_norm": 1.4160724878311157, "learning_rate": 9.792842105263158e-05, "loss": 0.5038, "step": 27885 }, { "epoch": 1.5615410460297907, "grad_norm": 1.9105815887451172, "learning_rate": 9.792815789473685e-05, "loss": 0.5542, "step": 27886 }, { "epoch": 1.5615970433419197, "grad_norm": 1.240626335144043, "learning_rate": 9.792789473684211e-05, "loss": 0.3307, "step": 27887 }, { "epoch": 1.5616530406540488, "grad_norm": 1.2806501388549805, "learning_rate": 9.792763157894737e-05, "loss": 0.35, "step": 27888 }, { "epoch": 1.5617090379661778, "grad_norm": 1.2732311487197876, "learning_rate": 9.792736842105263e-05, "loss": 0.4266, "step": 27889 }, { "epoch": 1.5617650352783068, "grad_norm": 1.423905372619629, "learning_rate": 9.79271052631579e-05, "loss": 0.4768, "step": 27890 }, { "epoch": 1.5618210325904358, "grad_norm": 1.2721583843231201, "learning_rate": 9.792684210526316e-05, "loss": 0.4447, "step": 27891 }, { "epoch": 1.5618770299025648, "grad_norm": 1.4054096937179565, "learning_rate": 9.792657894736844e-05, "loss": 0.5952, "step": 27892 }, { "epoch": 1.5619330272146938, "grad_norm": 1.4433014392852783, "learning_rate": 9.792631578947368e-05, "loss": 0.4713, "step": 27893 }, { "epoch": 1.5619890245268229, "grad_norm": 1.7865170240402222, "learning_rate": 9.792605263157896e-05, "loss": 0.4889, "step": 27894 }, { "epoch": 1.5620450218389519, "grad_norm": 1.6442961692810059, "learning_rate": 9.792578947368422e-05, "loss": 0.3941, "step": 27895 }, { "epoch": 1.562101019151081, "grad_norm": 1.4772812128067017, "learning_rate": 9.792552631578948e-05, "loss": 0.4298, "step": 27896 }, { "epoch": 1.56215701646321, "grad_norm": 1.2540076971054077, "learning_rate": 9.792526315789475e-05, "loss": 0.381, "step": 27897 }, { "epoch": 1.562213013775339, "grad_norm": 1.2921191453933716, "learning_rate": 9.7925e-05, "loss": 0.4336, "step": 27898 }, { "epoch": 1.562269011087468, "grad_norm": 1.384541630744934, "learning_rate": 9.792473684210527e-05, "loss": 0.5008, "step": 27899 }, { "epoch": 1.562325008399597, "grad_norm": 1.2399932146072388, "learning_rate": 9.792447368421053e-05, "loss": 0.3815, "step": 27900 }, { "epoch": 1.562381005711726, "grad_norm": 1.7059587240219116, "learning_rate": 9.79242105263158e-05, "loss": 0.4672, "step": 27901 }, { "epoch": 1.562437003023855, "grad_norm": 1.260528564453125, "learning_rate": 9.792394736842105e-05, "loss": 0.4081, "step": 27902 }, { "epoch": 1.562493000335984, "grad_norm": 1.2083096504211426, "learning_rate": 9.792368421052632e-05, "loss": 0.4603, "step": 27903 }, { "epoch": 1.5625489976481128, "grad_norm": 1.1734106540679932, "learning_rate": 9.792342105263158e-05, "loss": 0.4077, "step": 27904 }, { "epoch": 1.5626049949602419, "grad_norm": 1.3308879137039185, "learning_rate": 9.792315789473685e-05, "loss": 0.3871, "step": 27905 }, { "epoch": 1.5626609922723709, "grad_norm": 1.4251302480697632, "learning_rate": 9.792289473684211e-05, "loss": 0.4728, "step": 27906 }, { "epoch": 1.5627169895845, "grad_norm": 1.292154312133789, "learning_rate": 9.792263157894737e-05, "loss": 0.4261, "step": 27907 }, { "epoch": 1.562772986896629, "grad_norm": 1.6118751764297485, "learning_rate": 9.792236842105263e-05, "loss": 0.462, "step": 27908 }, { "epoch": 1.562828984208758, "grad_norm": 1.6858899593353271, "learning_rate": 9.79221052631579e-05, "loss": 0.5896, "step": 27909 }, { "epoch": 1.562884981520887, "grad_norm": 1.938799262046814, "learning_rate": 9.792184210526317e-05, "loss": 0.5382, "step": 27910 }, { "epoch": 1.562940978833016, "grad_norm": 1.291366457939148, "learning_rate": 9.792157894736843e-05, "loss": 0.4255, "step": 27911 }, { "epoch": 1.562996976145145, "grad_norm": 1.2998098134994507, "learning_rate": 9.792131578947369e-05, "loss": 0.471, "step": 27912 }, { "epoch": 1.563052973457274, "grad_norm": 1.3475925922393799, "learning_rate": 9.792105263157895e-05, "loss": 0.4834, "step": 27913 }, { "epoch": 1.563108970769403, "grad_norm": 1.3017666339874268, "learning_rate": 9.792078947368422e-05, "loss": 0.5708, "step": 27914 }, { "epoch": 1.563164968081532, "grad_norm": 1.2234461307525635, "learning_rate": 9.792052631578948e-05, "loss": 0.5419, "step": 27915 }, { "epoch": 1.563220965393661, "grad_norm": 1.4304434061050415, "learning_rate": 9.792026315789474e-05, "loss": 0.4753, "step": 27916 }, { "epoch": 1.56327696270579, "grad_norm": 1.1569468975067139, "learning_rate": 9.792e-05, "loss": 0.3657, "step": 27917 }, { "epoch": 1.563332960017919, "grad_norm": 1.090216040611267, "learning_rate": 9.791973684210527e-05, "loss": 0.3583, "step": 27918 }, { "epoch": 1.5633889573300481, "grad_norm": 1.2005150318145752, "learning_rate": 9.791947368421053e-05, "loss": 0.3683, "step": 27919 }, { "epoch": 1.5634449546421771, "grad_norm": 1.5770893096923828, "learning_rate": 9.791921052631579e-05, "loss": 0.5088, "step": 27920 }, { "epoch": 1.5635009519543062, "grad_norm": 1.634976863861084, "learning_rate": 9.791894736842105e-05, "loss": 0.5555, "step": 27921 }, { "epoch": 1.5635569492664352, "grad_norm": 1.1582673788070679, "learning_rate": 9.791868421052632e-05, "loss": 0.4653, "step": 27922 }, { "epoch": 1.5636129465785642, "grad_norm": 1.4435118436813354, "learning_rate": 9.791842105263158e-05, "loss": 0.4989, "step": 27923 }, { "epoch": 1.5636689438906932, "grad_norm": 1.4571359157562256, "learning_rate": 9.791815789473686e-05, "loss": 0.4679, "step": 27924 }, { "epoch": 1.5637249412028222, "grad_norm": 1.1578705310821533, "learning_rate": 9.79178947368421e-05, "loss": 0.4218, "step": 27925 }, { "epoch": 1.5637809385149513, "grad_norm": 1.4565349817276, "learning_rate": 9.791763157894738e-05, "loss": 0.4999, "step": 27926 }, { "epoch": 1.5638369358270803, "grad_norm": 1.5129117965698242, "learning_rate": 9.791736842105264e-05, "loss": 0.4592, "step": 27927 }, { "epoch": 1.5638929331392093, "grad_norm": 1.1397032737731934, "learning_rate": 9.791710526315791e-05, "loss": 0.447, "step": 27928 }, { "epoch": 1.5639489304513383, "grad_norm": 1.3803645372390747, "learning_rate": 9.791684210526317e-05, "loss": 0.673, "step": 27929 }, { "epoch": 1.5640049277634673, "grad_norm": 1.7738953828811646, "learning_rate": 9.791657894736841e-05, "loss": 0.5228, "step": 27930 }, { "epoch": 1.5640609250755964, "grad_norm": 1.5509098768234253, "learning_rate": 9.791631578947369e-05, "loss": 0.4285, "step": 27931 }, { "epoch": 1.5641169223877254, "grad_norm": 1.4631469249725342, "learning_rate": 9.791605263157895e-05, "loss": 0.5835, "step": 27932 }, { "epoch": 1.5641729196998544, "grad_norm": 1.380316972732544, "learning_rate": 9.791578947368422e-05, "loss": 0.4579, "step": 27933 }, { "epoch": 1.5642289170119834, "grad_norm": 1.1702483892440796, "learning_rate": 9.791552631578948e-05, "loss": 0.5154, "step": 27934 }, { "epoch": 1.5642849143241124, "grad_norm": 1.2805495262145996, "learning_rate": 9.791526315789474e-05, "loss": 0.446, "step": 27935 }, { "epoch": 1.5643409116362414, "grad_norm": 1.351359248161316, "learning_rate": 9.7915e-05, "loss": 0.5345, "step": 27936 }, { "epoch": 1.5643969089483705, "grad_norm": 1.1455228328704834, "learning_rate": 9.791473684210527e-05, "loss": 0.3928, "step": 27937 }, { "epoch": 1.5644529062604995, "grad_norm": 1.5278799533843994, "learning_rate": 9.791447368421053e-05, "loss": 0.5924, "step": 27938 }, { "epoch": 1.5645089035726285, "grad_norm": 1.5728360414505005, "learning_rate": 9.791421052631579e-05, "loss": 0.4548, "step": 27939 }, { "epoch": 1.5645649008847575, "grad_norm": 1.5662719011306763, "learning_rate": 9.791394736842105e-05, "loss": 0.4648, "step": 27940 }, { "epoch": 1.5646208981968865, "grad_norm": 1.1243435144424438, "learning_rate": 9.791368421052633e-05, "loss": 0.339, "step": 27941 }, { "epoch": 1.5646768955090156, "grad_norm": 1.3633006811141968, "learning_rate": 9.791342105263159e-05, "loss": 0.4912, "step": 27942 }, { "epoch": 1.5647328928211446, "grad_norm": 1.3593796491622925, "learning_rate": 9.791315789473685e-05, "loss": 0.5445, "step": 27943 }, { "epoch": 1.5647888901332736, "grad_norm": 1.676698923110962, "learning_rate": 9.79128947368421e-05, "loss": 0.4787, "step": 27944 }, { "epoch": 1.5648448874454026, "grad_norm": 1.312387228012085, "learning_rate": 9.791263157894738e-05, "loss": 0.5799, "step": 27945 }, { "epoch": 1.5649008847575316, "grad_norm": 1.405980110168457, "learning_rate": 9.791236842105264e-05, "loss": 0.6163, "step": 27946 }, { "epoch": 1.5649568820696607, "grad_norm": 1.2081812620162964, "learning_rate": 9.79121052631579e-05, "loss": 0.4004, "step": 27947 }, { "epoch": 1.5650128793817897, "grad_norm": 1.5452519655227661, "learning_rate": 9.791184210526316e-05, "loss": 0.3953, "step": 27948 }, { "epoch": 1.5650688766939187, "grad_norm": 1.3677908182144165, "learning_rate": 9.791157894736842e-05, "loss": 0.474, "step": 27949 }, { "epoch": 1.5651248740060477, "grad_norm": 1.3281514644622803, "learning_rate": 9.791131578947369e-05, "loss": 0.3966, "step": 27950 }, { "epoch": 1.5651808713181767, "grad_norm": 1.2376335859298706, "learning_rate": 9.791105263157895e-05, "loss": 0.4162, "step": 27951 }, { "epoch": 1.5652368686303058, "grad_norm": 1.2949895858764648, "learning_rate": 9.791078947368422e-05, "loss": 0.4646, "step": 27952 }, { "epoch": 1.5652928659424348, "grad_norm": 1.4561299085617065, "learning_rate": 9.791052631578947e-05, "loss": 0.4796, "step": 27953 }, { "epoch": 1.5653488632545638, "grad_norm": 1.5170055627822876, "learning_rate": 9.791026315789474e-05, "loss": 0.5016, "step": 27954 }, { "epoch": 1.5654048605666928, "grad_norm": 2.710172176361084, "learning_rate": 9.791e-05, "loss": 0.3683, "step": 27955 }, { "epoch": 1.5654608578788218, "grad_norm": 1.7364965677261353, "learning_rate": 9.790973684210528e-05, "loss": 0.541, "step": 27956 }, { "epoch": 1.5655168551909509, "grad_norm": 1.2130730152130127, "learning_rate": 9.790947368421052e-05, "loss": 0.4467, "step": 27957 }, { "epoch": 1.5655728525030799, "grad_norm": 1.2386354207992554, "learning_rate": 9.79092105263158e-05, "loss": 0.4314, "step": 27958 }, { "epoch": 1.565628849815209, "grad_norm": 1.3954352140426636, "learning_rate": 9.790894736842106e-05, "loss": 0.4406, "step": 27959 }, { "epoch": 1.565684847127338, "grad_norm": 1.4999171495437622, "learning_rate": 9.790868421052633e-05, "loss": 0.505, "step": 27960 }, { "epoch": 1.565740844439467, "grad_norm": 1.12770676612854, "learning_rate": 9.790842105263159e-05, "loss": 0.4304, "step": 27961 }, { "epoch": 1.565796841751596, "grad_norm": 1.4203526973724365, "learning_rate": 9.790815789473685e-05, "loss": 0.522, "step": 27962 }, { "epoch": 1.565852839063725, "grad_norm": 1.2756706476211548, "learning_rate": 9.790789473684211e-05, "loss": 0.4641, "step": 27963 }, { "epoch": 1.565908836375854, "grad_norm": 1.3431830406188965, "learning_rate": 9.790763157894738e-05, "loss": 0.375, "step": 27964 }, { "epoch": 1.565964833687983, "grad_norm": 1.577451467514038, "learning_rate": 9.790736842105264e-05, "loss": 0.439, "step": 27965 }, { "epoch": 1.566020831000112, "grad_norm": 1.377826452255249, "learning_rate": 9.79071052631579e-05, "loss": 0.5604, "step": 27966 }, { "epoch": 1.566076828312241, "grad_norm": 1.374342679977417, "learning_rate": 9.790684210526316e-05, "loss": 0.4538, "step": 27967 }, { "epoch": 1.56613282562437, "grad_norm": 1.2720531225204468, "learning_rate": 9.790657894736842e-05, "loss": 0.3871, "step": 27968 }, { "epoch": 1.566188822936499, "grad_norm": 1.3962714672088623, "learning_rate": 9.790631578947369e-05, "loss": 0.5448, "step": 27969 }, { "epoch": 1.566244820248628, "grad_norm": 1.2228339910507202, "learning_rate": 9.790605263157895e-05, "loss": 0.3319, "step": 27970 }, { "epoch": 1.5663008175607571, "grad_norm": 1.1670565605163574, "learning_rate": 9.790578947368421e-05, "loss": 0.5092, "step": 27971 }, { "epoch": 1.5663568148728861, "grad_norm": 1.0824604034423828, "learning_rate": 9.790552631578947e-05, "loss": 0.3979, "step": 27972 }, { "epoch": 1.5664128121850152, "grad_norm": 1.3282324075698853, "learning_rate": 9.790526315789475e-05, "loss": 0.4293, "step": 27973 }, { "epoch": 1.5664688094971442, "grad_norm": 1.1624890565872192, "learning_rate": 9.7905e-05, "loss": 0.4407, "step": 27974 }, { "epoch": 1.5665248068092732, "grad_norm": 1.3137606382369995, "learning_rate": 9.790473684210527e-05, "loss": 0.4876, "step": 27975 }, { "epoch": 1.5665808041214022, "grad_norm": 1.2371691465377808, "learning_rate": 9.790447368421052e-05, "loss": 0.4415, "step": 27976 }, { "epoch": 1.5666368014335312, "grad_norm": 1.12775719165802, "learning_rate": 9.79042105263158e-05, "loss": 0.3587, "step": 27977 }, { "epoch": 1.5666927987456603, "grad_norm": 1.0950857400894165, "learning_rate": 9.790394736842106e-05, "loss": 0.3711, "step": 27978 }, { "epoch": 1.5667487960577893, "grad_norm": 1.4122695922851562, "learning_rate": 9.790368421052633e-05, "loss": 0.6856, "step": 27979 }, { "epoch": 1.5668047933699183, "grad_norm": 1.40877103805542, "learning_rate": 9.790342105263158e-05, "loss": 0.6257, "step": 27980 }, { "epoch": 1.5668607906820473, "grad_norm": 1.2427443265914917, "learning_rate": 9.790315789473685e-05, "loss": 0.3701, "step": 27981 }, { "epoch": 1.5669167879941763, "grad_norm": 1.1560802459716797, "learning_rate": 9.790289473684211e-05, "loss": 0.435, "step": 27982 }, { "epoch": 1.5669727853063053, "grad_norm": 1.0992628335952759, "learning_rate": 9.790263157894737e-05, "loss": 0.2943, "step": 27983 }, { "epoch": 1.5670287826184344, "grad_norm": 1.3091014623641968, "learning_rate": 9.790236842105264e-05, "loss": 0.4956, "step": 27984 }, { "epoch": 1.5670847799305634, "grad_norm": 1.0343049764633179, "learning_rate": 9.790210526315789e-05, "loss": 0.3894, "step": 27985 }, { "epoch": 1.5671407772426922, "grad_norm": 1.20504891872406, "learning_rate": 9.790184210526316e-05, "loss": 0.3722, "step": 27986 }, { "epoch": 1.5671967745548212, "grad_norm": 1.3490113019943237, "learning_rate": 9.790157894736842e-05, "loss": 0.4699, "step": 27987 }, { "epoch": 1.5672527718669502, "grad_norm": 1.3986599445343018, "learning_rate": 9.79013157894737e-05, "loss": 0.4752, "step": 27988 }, { "epoch": 1.5673087691790792, "grad_norm": 4.4811224937438965, "learning_rate": 9.790105263157896e-05, "loss": 0.4955, "step": 27989 }, { "epoch": 1.5673647664912083, "grad_norm": 1.4624607563018799, "learning_rate": 9.790078947368422e-05, "loss": 0.5015, "step": 27990 }, { "epoch": 1.5674207638033373, "grad_norm": 1.1344993114471436, "learning_rate": 9.790052631578947e-05, "loss": 0.4068, "step": 27991 }, { "epoch": 1.5674767611154663, "grad_norm": 1.3949861526489258, "learning_rate": 9.790026315789475e-05, "loss": 0.4201, "step": 27992 }, { "epoch": 1.5675327584275953, "grad_norm": 1.5104824304580688, "learning_rate": 9.790000000000001e-05, "loss": 0.3837, "step": 27993 }, { "epoch": 1.5675887557397243, "grad_norm": 1.2982854843139648, "learning_rate": 9.789973684210527e-05, "loss": 0.4254, "step": 27994 }, { "epoch": 1.5676447530518534, "grad_norm": 1.2567329406738281, "learning_rate": 9.789947368421053e-05, "loss": 0.4193, "step": 27995 }, { "epoch": 1.5677007503639824, "grad_norm": 1.7564815282821655, "learning_rate": 9.78992105263158e-05, "loss": 0.6307, "step": 27996 }, { "epoch": 1.5677567476761114, "grad_norm": 1.3201968669891357, "learning_rate": 9.789894736842106e-05, "loss": 0.412, "step": 27997 }, { "epoch": 1.5678127449882404, "grad_norm": 1.2468838691711426, "learning_rate": 9.789868421052632e-05, "loss": 0.3827, "step": 27998 }, { "epoch": 1.5678687423003694, "grad_norm": 1.3903758525848389, "learning_rate": 9.789842105263158e-05, "loss": 0.3819, "step": 27999 }, { "epoch": 1.5679247396124985, "grad_norm": 1.143258810043335, "learning_rate": 9.789815789473684e-05, "loss": 0.2986, "step": 28000 }, { "epoch": 1.5679807369246275, "grad_norm": 1.330857515335083, "learning_rate": 9.789789473684211e-05, "loss": 0.4789, "step": 28001 }, { "epoch": 1.5680367342367565, "grad_norm": 1.2843005657196045, "learning_rate": 9.789763157894737e-05, "loss": 0.4213, "step": 28002 }, { "epoch": 1.5680927315488855, "grad_norm": 1.3903347253799438, "learning_rate": 9.789736842105263e-05, "loss": 0.5385, "step": 28003 }, { "epoch": 1.5681487288610145, "grad_norm": 1.4101808071136475, "learning_rate": 9.789710526315789e-05, "loss": 0.4919, "step": 28004 }, { "epoch": 1.5682047261731435, "grad_norm": 1.4195345640182495, "learning_rate": 9.789684210526317e-05, "loss": 0.3698, "step": 28005 }, { "epoch": 1.5682607234852726, "grad_norm": 1.22687828540802, "learning_rate": 9.789657894736843e-05, "loss": 0.3312, "step": 28006 }, { "epoch": 1.5683167207974016, "grad_norm": 1.58999502658844, "learning_rate": 9.789631578947368e-05, "loss": 0.521, "step": 28007 }, { "epoch": 1.5683727181095306, "grad_norm": 1.1324678659439087, "learning_rate": 9.789605263157894e-05, "loss": 0.421, "step": 28008 }, { "epoch": 1.5684287154216596, "grad_norm": 1.395676612854004, "learning_rate": 9.789578947368422e-05, "loss": 0.5193, "step": 28009 }, { "epoch": 1.5684847127337886, "grad_norm": 1.327857255935669, "learning_rate": 9.789552631578948e-05, "loss": 0.5392, "step": 28010 }, { "epoch": 1.5685407100459177, "grad_norm": 1.1735122203826904, "learning_rate": 9.789526315789475e-05, "loss": 0.4637, "step": 28011 }, { "epoch": 1.5685967073580467, "grad_norm": 1.3529436588287354, "learning_rate": 9.7895e-05, "loss": 0.4668, "step": 28012 }, { "epoch": 1.5686527046701757, "grad_norm": 1.3784637451171875, "learning_rate": 9.789473684210527e-05, "loss": 0.4643, "step": 28013 }, { "epoch": 1.5687087019823047, "grad_norm": 1.3789427280426025, "learning_rate": 9.789447368421053e-05, "loss": 0.4272, "step": 28014 }, { "epoch": 1.5687646992944337, "grad_norm": 1.415741205215454, "learning_rate": 9.78942105263158e-05, "loss": 0.4292, "step": 28015 }, { "epoch": 1.5688206966065628, "grad_norm": 1.377379298210144, "learning_rate": 9.789394736842106e-05, "loss": 0.4687, "step": 28016 }, { "epoch": 1.5688766939186918, "grad_norm": 1.4200210571289062, "learning_rate": 9.789368421052631e-05, "loss": 0.4575, "step": 28017 }, { "epoch": 1.5689326912308208, "grad_norm": 1.5323363542556763, "learning_rate": 9.789342105263158e-05, "loss": 0.4414, "step": 28018 }, { "epoch": 1.5689886885429498, "grad_norm": 1.495772123336792, "learning_rate": 9.789315789473684e-05, "loss": 0.4087, "step": 28019 }, { "epoch": 1.5690446858550788, "grad_norm": 1.220541000366211, "learning_rate": 9.789289473684212e-05, "loss": 0.4097, "step": 28020 }, { "epoch": 1.5691006831672079, "grad_norm": 1.581607460975647, "learning_rate": 9.789263157894738e-05, "loss": 0.418, "step": 28021 }, { "epoch": 1.5691566804793369, "grad_norm": 1.676621437072754, "learning_rate": 9.789236842105263e-05, "loss": 0.4839, "step": 28022 }, { "epoch": 1.569212677791466, "grad_norm": 1.236293911933899, "learning_rate": 9.78921052631579e-05, "loss": 0.3658, "step": 28023 }, { "epoch": 1.569268675103595, "grad_norm": 1.4252465963363647, "learning_rate": 9.789184210526317e-05, "loss": 0.5402, "step": 28024 }, { "epoch": 1.569324672415724, "grad_norm": 1.4142038822174072, "learning_rate": 9.789157894736843e-05, "loss": 0.3963, "step": 28025 }, { "epoch": 1.569380669727853, "grad_norm": 1.4917858839035034, "learning_rate": 9.789131578947369e-05, "loss": 0.4539, "step": 28026 }, { "epoch": 1.569436667039982, "grad_norm": 1.422104835510254, "learning_rate": 9.789105263157895e-05, "loss": 0.5467, "step": 28027 }, { "epoch": 1.569492664352111, "grad_norm": 1.6085867881774902, "learning_rate": 9.789078947368422e-05, "loss": 0.6167, "step": 28028 }, { "epoch": 1.56954866166424, "grad_norm": 1.2367597818374634, "learning_rate": 9.789052631578948e-05, "loss": 0.4233, "step": 28029 }, { "epoch": 1.569604658976369, "grad_norm": 1.2459625005722046, "learning_rate": 9.789026315789474e-05, "loss": 0.4643, "step": 28030 }, { "epoch": 1.569660656288498, "grad_norm": 1.1102368831634521, "learning_rate": 9.789e-05, "loss": 0.4502, "step": 28031 }, { "epoch": 1.569716653600627, "grad_norm": 1.150658130645752, "learning_rate": 9.788973684210527e-05, "loss": 0.3848, "step": 28032 }, { "epoch": 1.569772650912756, "grad_norm": 1.266994595527649, "learning_rate": 9.788947368421053e-05, "loss": 0.3786, "step": 28033 }, { "epoch": 1.569828648224885, "grad_norm": 1.4353046417236328, "learning_rate": 9.788921052631579e-05, "loss": 0.4771, "step": 28034 }, { "epoch": 1.5698846455370141, "grad_norm": 1.238640308380127, "learning_rate": 9.788894736842105e-05, "loss": 0.519, "step": 28035 }, { "epoch": 1.5699406428491431, "grad_norm": 1.2137643098831177, "learning_rate": 9.788868421052631e-05, "loss": 0.4772, "step": 28036 }, { "epoch": 1.5699966401612722, "grad_norm": 1.268845558166504, "learning_rate": 9.788842105263159e-05, "loss": 0.424, "step": 28037 }, { "epoch": 1.5700526374734012, "grad_norm": 1.2281060218811035, "learning_rate": 9.788815789473684e-05, "loss": 0.4479, "step": 28038 }, { "epoch": 1.5701086347855302, "grad_norm": 4.048982620239258, "learning_rate": 9.788789473684212e-05, "loss": 0.4112, "step": 28039 }, { "epoch": 1.5701646320976592, "grad_norm": 1.7420209646224976, "learning_rate": 9.788763157894736e-05, "loss": 0.7577, "step": 28040 }, { "epoch": 1.5702206294097882, "grad_norm": 1.2172189950942993, "learning_rate": 9.788736842105264e-05, "loss": 0.3898, "step": 28041 }, { "epoch": 1.5702766267219173, "grad_norm": 1.5058587789535522, "learning_rate": 9.78871052631579e-05, "loss": 0.4789, "step": 28042 }, { "epoch": 1.5703326240340463, "grad_norm": 1.1051394939422607, "learning_rate": 9.788684210526317e-05, "loss": 0.3422, "step": 28043 }, { "epoch": 1.5703886213461753, "grad_norm": 1.3431580066680908, "learning_rate": 9.788657894736843e-05, "loss": 0.5644, "step": 28044 }, { "epoch": 1.5704446186583043, "grad_norm": 1.3015185594558716, "learning_rate": 9.788631578947369e-05, "loss": 0.4857, "step": 28045 }, { "epoch": 1.5705006159704333, "grad_norm": 1.2908891439437866, "learning_rate": 9.788605263157895e-05, "loss": 0.3975, "step": 28046 }, { "epoch": 1.5705566132825624, "grad_norm": 1.5780909061431885, "learning_rate": 9.788578947368422e-05, "loss": 0.4592, "step": 28047 }, { "epoch": 1.5706126105946914, "grad_norm": 1.4913671016693115, "learning_rate": 9.788552631578948e-05, "loss": 0.6319, "step": 28048 }, { "epoch": 1.5706686079068204, "grad_norm": 1.4054170846939087, "learning_rate": 9.788526315789474e-05, "loss": 0.5016, "step": 28049 }, { "epoch": 1.5707246052189494, "grad_norm": 1.1858580112457275, "learning_rate": 9.7885e-05, "loss": 0.4828, "step": 28050 }, { "epoch": 1.5707806025310784, "grad_norm": 1.3672168254852295, "learning_rate": 9.788473684210526e-05, "loss": 0.3784, "step": 28051 }, { "epoch": 1.5708365998432074, "grad_norm": 1.2569587230682373, "learning_rate": 9.788447368421054e-05, "loss": 0.4334, "step": 28052 }, { "epoch": 1.5708925971553365, "grad_norm": 1.2256430387496948, "learning_rate": 9.78842105263158e-05, "loss": 0.4234, "step": 28053 }, { "epoch": 1.5709485944674655, "grad_norm": 1.0204970836639404, "learning_rate": 9.788394736842105e-05, "loss": 0.3367, "step": 28054 }, { "epoch": 1.5710045917795945, "grad_norm": 1.4459164142608643, "learning_rate": 9.788368421052631e-05, "loss": 0.4671, "step": 28055 }, { "epoch": 1.5710605890917235, "grad_norm": 1.5175567865371704, "learning_rate": 9.788342105263159e-05, "loss": 0.5397, "step": 28056 }, { "epoch": 1.5711165864038525, "grad_norm": 1.364989161491394, "learning_rate": 9.788315789473685e-05, "loss": 0.483, "step": 28057 }, { "epoch": 1.5711725837159816, "grad_norm": 1.1792104244232178, "learning_rate": 9.788289473684211e-05, "loss": 0.352, "step": 28058 }, { "epoch": 1.5712285810281106, "grad_norm": 1.5198942422866821, "learning_rate": 9.788263157894737e-05, "loss": 0.4844, "step": 28059 }, { "epoch": 1.5712845783402396, "grad_norm": 1.224273681640625, "learning_rate": 9.788236842105264e-05, "loss": 0.4047, "step": 28060 }, { "epoch": 1.5713405756523686, "grad_norm": 1.302113652229309, "learning_rate": 9.78821052631579e-05, "loss": 0.4535, "step": 28061 }, { "epoch": 1.5713965729644976, "grad_norm": 1.559851050376892, "learning_rate": 9.788184210526316e-05, "loss": 0.4636, "step": 28062 }, { "epoch": 1.5714525702766267, "grad_norm": 1.35964834690094, "learning_rate": 9.788157894736842e-05, "loss": 0.5024, "step": 28063 }, { "epoch": 1.5715085675887557, "grad_norm": 1.6735707521438599, "learning_rate": 9.788131578947369e-05, "loss": 0.4968, "step": 28064 }, { "epoch": 1.5715645649008847, "grad_norm": 1.1186984777450562, "learning_rate": 9.788105263157895e-05, "loss": 0.3354, "step": 28065 }, { "epoch": 1.5716205622130137, "grad_norm": 1.3679900169372559, "learning_rate": 9.788078947368423e-05, "loss": 0.3556, "step": 28066 }, { "epoch": 1.5716765595251427, "grad_norm": 1.2580618858337402, "learning_rate": 9.788052631578947e-05, "loss": 0.466, "step": 28067 }, { "epoch": 1.5717325568372718, "grad_norm": 1.267106056213379, "learning_rate": 9.788026315789473e-05, "loss": 0.4127, "step": 28068 }, { "epoch": 1.5717885541494008, "grad_norm": 3.595611810684204, "learning_rate": 9.788e-05, "loss": 0.5909, "step": 28069 }, { "epoch": 1.5718445514615298, "grad_norm": NaN, "learning_rate": 9.788e-05, "loss": 0.599, "step": 28070 }, { "epoch": 1.5719005487736588, "grad_norm": 1.5328335762023926, "learning_rate": 9.787973684210526e-05, "loss": 0.4543, "step": 28071 }, { "epoch": 1.5719565460857878, "grad_norm": 1.3158411979675293, "learning_rate": 9.787947368421054e-05, "loss": 0.488, "step": 28072 }, { "epoch": 1.5720125433979169, "grad_norm": 2.418255090713501, "learning_rate": 9.787921052631578e-05, "loss": 0.5711, "step": 28073 }, { "epoch": 1.5720685407100459, "grad_norm": 1.469887614250183, "learning_rate": 9.787894736842106e-05, "loss": 0.5755, "step": 28074 }, { "epoch": 1.5721245380221749, "grad_norm": 1.3137468099594116, "learning_rate": 9.787868421052632e-05, "loss": 0.3472, "step": 28075 }, { "epoch": 1.572180535334304, "grad_norm": 1.1552168130874634, "learning_rate": 9.787842105263159e-05, "loss": 0.5308, "step": 28076 }, { "epoch": 1.572236532646433, "grad_norm": 1.41217839717865, "learning_rate": 9.787815789473685e-05, "loss": 0.3818, "step": 28077 }, { "epoch": 1.572292529958562, "grad_norm": 1.1353222131729126, "learning_rate": 9.787789473684211e-05, "loss": 0.3699, "step": 28078 }, { "epoch": 1.572348527270691, "grad_norm": 1.1848223209381104, "learning_rate": 9.787763157894737e-05, "loss": 0.5062, "step": 28079 }, { "epoch": 1.57240452458282, "grad_norm": 1.1055452823638916, "learning_rate": 9.787736842105264e-05, "loss": 0.3496, "step": 28080 }, { "epoch": 1.572460521894949, "grad_norm": 1.4840240478515625, "learning_rate": 9.78771052631579e-05, "loss": 0.4124, "step": 28081 }, { "epoch": 1.572516519207078, "grad_norm": 1.1247225999832153, "learning_rate": 9.787684210526316e-05, "loss": 0.5187, "step": 28082 }, { "epoch": 1.572572516519207, "grad_norm": 1.0302581787109375, "learning_rate": 9.787657894736842e-05, "loss": 0.412, "step": 28083 }, { "epoch": 1.572628513831336, "grad_norm": 1.3668094873428345, "learning_rate": 9.78763157894737e-05, "loss": 0.4253, "step": 28084 }, { "epoch": 1.572684511143465, "grad_norm": 1.3305730819702148, "learning_rate": 9.787605263157895e-05, "loss": 0.5992, "step": 28085 }, { "epoch": 1.572740508455594, "grad_norm": 1.515182375907898, "learning_rate": 9.787578947368421e-05, "loss": 0.5052, "step": 28086 }, { "epoch": 1.5727965057677231, "grad_norm": 1.3288472890853882, "learning_rate": 9.787552631578947e-05, "loss": 0.4967, "step": 28087 }, { "epoch": 1.5728525030798521, "grad_norm": 1.110041856765747, "learning_rate": 9.787526315789473e-05, "loss": 0.3674, "step": 28088 }, { "epoch": 1.5729085003919812, "grad_norm": 1.2525770664215088, "learning_rate": 9.787500000000001e-05, "loss": 0.414, "step": 28089 }, { "epoch": 1.5729644977041102, "grad_norm": 1.2663404941558838, "learning_rate": 9.787473684210527e-05, "loss": 0.3943, "step": 28090 }, { "epoch": 1.5730204950162392, "grad_norm": 1.105630874633789, "learning_rate": 9.787447368421053e-05, "loss": 0.3995, "step": 28091 }, { "epoch": 1.5730764923283682, "grad_norm": 1.1396989822387695, "learning_rate": 9.787421052631579e-05, "loss": 0.4185, "step": 28092 }, { "epoch": 1.5731324896404972, "grad_norm": 1.2848345041275024, "learning_rate": 9.787394736842106e-05, "loss": 0.4938, "step": 28093 }, { "epoch": 1.5731884869526263, "grad_norm": 1.0787601470947266, "learning_rate": 9.787368421052632e-05, "loss": 0.4144, "step": 28094 }, { "epoch": 1.5732444842647553, "grad_norm": 1.9113361835479736, "learning_rate": 9.787342105263159e-05, "loss": 0.6329, "step": 28095 }, { "epoch": 1.5733004815768843, "grad_norm": 1.2810189723968506, "learning_rate": 9.787315789473684e-05, "loss": 0.4292, "step": 28096 }, { "epoch": 1.5733564788890133, "grad_norm": 1.7841038703918457, "learning_rate": 9.787289473684211e-05, "loss": 0.448, "step": 28097 }, { "epoch": 1.5734124762011423, "grad_norm": 1.3965585231781006, "learning_rate": 9.787263157894737e-05, "loss": 0.4491, "step": 28098 }, { "epoch": 1.5734684735132713, "grad_norm": 1.3444855213165283, "learning_rate": 9.787236842105265e-05, "loss": 0.4838, "step": 28099 }, { "epoch": 1.5735244708254004, "grad_norm": 1.3435307741165161, "learning_rate": 9.78721052631579e-05, "loss": 0.5611, "step": 28100 }, { "epoch": 1.5735804681375294, "grad_norm": 1.0784518718719482, "learning_rate": 9.787184210526316e-05, "loss": 0.3621, "step": 28101 }, { "epoch": 1.5736364654496584, "grad_norm": 1.5495529174804688, "learning_rate": 9.787157894736842e-05, "loss": 0.5277, "step": 28102 }, { "epoch": 1.5736924627617874, "grad_norm": 2.4598546028137207, "learning_rate": 9.78713157894737e-05, "loss": 0.5536, "step": 28103 }, { "epoch": 1.5737484600739164, "grad_norm": 1.3591283559799194, "learning_rate": 9.787105263157896e-05, "loss": 0.5098, "step": 28104 }, { "epoch": 1.5738044573860455, "grad_norm": 1.3528375625610352, "learning_rate": 9.78707894736842e-05, "loss": 0.3944, "step": 28105 }, { "epoch": 1.5738604546981745, "grad_norm": 1.5302882194519043, "learning_rate": 9.787052631578948e-05, "loss": 0.4248, "step": 28106 }, { "epoch": 1.5739164520103035, "grad_norm": 2.221705675125122, "learning_rate": 9.787026315789474e-05, "loss": 0.5651, "step": 28107 }, { "epoch": 1.5739724493224325, "grad_norm": 1.2829450368881226, "learning_rate": 9.787000000000001e-05, "loss": 0.4317, "step": 28108 }, { "epoch": 1.5740284466345615, "grad_norm": 1.266735315322876, "learning_rate": 9.786973684210527e-05, "loss": 0.4263, "step": 28109 }, { "epoch": 1.5740844439466906, "grad_norm": 1.3932968378067017, "learning_rate": 9.786947368421053e-05, "loss": 0.4782, "step": 28110 }, { "epoch": 1.5741404412588196, "grad_norm": 1.2708779573440552, "learning_rate": 9.786921052631579e-05, "loss": 0.4157, "step": 28111 }, { "epoch": 1.5741964385709486, "grad_norm": 1.2368707656860352, "learning_rate": 9.786894736842106e-05, "loss": 0.4547, "step": 28112 }, { "epoch": 1.5742524358830776, "grad_norm": 1.4338250160217285, "learning_rate": 9.786868421052632e-05, "loss": 0.3706, "step": 28113 }, { "epoch": 1.5743084331952066, "grad_norm": 1.2648639678955078, "learning_rate": 9.786842105263158e-05, "loss": 0.3937, "step": 28114 }, { "epoch": 1.5743644305073357, "grad_norm": 1.6365704536437988, "learning_rate": 9.786815789473684e-05, "loss": 0.5993, "step": 28115 }, { "epoch": 1.5744204278194647, "grad_norm": 1.3124196529388428, "learning_rate": 9.786789473684211e-05, "loss": 0.4459, "step": 28116 }, { "epoch": 1.5744764251315937, "grad_norm": 1.3489229679107666, "learning_rate": 9.786763157894737e-05, "loss": 0.4817, "step": 28117 }, { "epoch": 1.5745324224437227, "grad_norm": 1.2093262672424316, "learning_rate": 9.786736842105263e-05, "loss": 0.413, "step": 28118 }, { "epoch": 1.5745884197558517, "grad_norm": 1.512413501739502, "learning_rate": 9.78671052631579e-05, "loss": 0.5071, "step": 28119 }, { "epoch": 1.5746444170679808, "grad_norm": 1.3144752979278564, "learning_rate": 9.786684210526317e-05, "loss": 0.5384, "step": 28120 }, { "epoch": 1.5747004143801098, "grad_norm": 1.8020782470703125, "learning_rate": 9.786657894736843e-05, "loss": 0.4502, "step": 28121 }, { "epoch": 1.5747564116922388, "grad_norm": 1.2363632917404175, "learning_rate": 9.786631578947369e-05, "loss": 0.3991, "step": 28122 }, { "epoch": 1.5748124090043678, "grad_norm": 1.388232946395874, "learning_rate": 9.786605263157895e-05, "loss": 0.5205, "step": 28123 }, { "epoch": 1.5748684063164968, "grad_norm": 11.61746883392334, "learning_rate": 9.78657894736842e-05, "loss": 0.6034, "step": 28124 }, { "epoch": 1.5749244036286258, "grad_norm": 1.5589476823806763, "learning_rate": 9.786552631578948e-05, "loss": 0.7174, "step": 28125 }, { "epoch": 1.5749804009407549, "grad_norm": 1.4372823238372803, "learning_rate": 9.786526315789474e-05, "loss": 0.5682, "step": 28126 }, { "epoch": 1.5750363982528839, "grad_norm": 1.3344392776489258, "learning_rate": 9.786500000000001e-05, "loss": 0.485, "step": 28127 }, { "epoch": 1.575092395565013, "grad_norm": 1.3974255323410034, "learning_rate": 9.786473684210526e-05, "loss": 0.4426, "step": 28128 }, { "epoch": 1.575148392877142, "grad_norm": 1.2909753322601318, "learning_rate": 9.786447368421053e-05, "loss": 0.4372, "step": 28129 }, { "epoch": 1.575204390189271, "grad_norm": 1.6748439073562622, "learning_rate": 9.786421052631579e-05, "loss": 0.4385, "step": 28130 }, { "epoch": 1.5752603875014, "grad_norm": 1.1811232566833496, "learning_rate": 9.786394736842106e-05, "loss": 0.4178, "step": 28131 }, { "epoch": 1.575316384813529, "grad_norm": 1.3954200744628906, "learning_rate": 9.786368421052632e-05, "loss": 0.4656, "step": 28132 }, { "epoch": 1.575372382125658, "grad_norm": 1.1845266819000244, "learning_rate": 9.786342105263158e-05, "loss": 0.4493, "step": 28133 }, { "epoch": 1.575428379437787, "grad_norm": 1.4437274932861328, "learning_rate": 9.786315789473684e-05, "loss": 0.5061, "step": 28134 }, { "epoch": 1.575484376749916, "grad_norm": 1.2547589540481567, "learning_rate": 9.786289473684212e-05, "loss": 0.3944, "step": 28135 }, { "epoch": 1.575540374062045, "grad_norm": 1.269227385520935, "learning_rate": 9.786263157894738e-05, "loss": 0.3511, "step": 28136 }, { "epoch": 1.575596371374174, "grad_norm": 1.3924602270126343, "learning_rate": 9.786236842105264e-05, "loss": 0.4636, "step": 28137 }, { "epoch": 1.575652368686303, "grad_norm": 1.0934702157974243, "learning_rate": 9.78621052631579e-05, "loss": 0.3141, "step": 28138 }, { "epoch": 1.5757083659984321, "grad_norm": 1.1891965866088867, "learning_rate": 9.786184210526316e-05, "loss": 0.5611, "step": 28139 }, { "epoch": 1.5757643633105611, "grad_norm": 1.4275307655334473, "learning_rate": 9.786157894736843e-05, "loss": 0.4026, "step": 28140 }, { "epoch": 1.5758203606226902, "grad_norm": 1.2542580366134644, "learning_rate": 9.786131578947369e-05, "loss": 0.4041, "step": 28141 }, { "epoch": 1.5758763579348192, "grad_norm": 1.2023520469665527, "learning_rate": 9.786105263157895e-05, "loss": 0.5344, "step": 28142 }, { "epoch": 1.5759323552469482, "grad_norm": 1.3916306495666504, "learning_rate": 9.786078947368421e-05, "loss": 0.5923, "step": 28143 }, { "epoch": 1.5759883525590772, "grad_norm": 1.2810475826263428, "learning_rate": 9.786052631578948e-05, "loss": 0.4102, "step": 28144 }, { "epoch": 1.5760443498712062, "grad_norm": 1.1698429584503174, "learning_rate": 9.786026315789474e-05, "loss": 0.3531, "step": 28145 }, { "epoch": 1.5761003471833352, "grad_norm": 1.3877931833267212, "learning_rate": 9.786e-05, "loss": 0.4117, "step": 28146 }, { "epoch": 1.5761563444954643, "grad_norm": 1.289060354232788, "learning_rate": 9.785973684210526e-05, "loss": 0.4003, "step": 28147 }, { "epoch": 1.5762123418075933, "grad_norm": 1.2534656524658203, "learning_rate": 9.785947368421053e-05, "loss": 0.4815, "step": 28148 }, { "epoch": 1.5762683391197223, "grad_norm": 1.4801337718963623, "learning_rate": 9.78592105263158e-05, "loss": 0.5747, "step": 28149 }, { "epoch": 1.5763243364318513, "grad_norm": 1.0988131761550903, "learning_rate": 9.785894736842107e-05, "loss": 0.4168, "step": 28150 }, { "epoch": 1.5763803337439803, "grad_norm": 1.3799777030944824, "learning_rate": 9.785868421052631e-05, "loss": 0.6514, "step": 28151 }, { "epoch": 1.5764363310561094, "grad_norm": 1.220173716545105, "learning_rate": 9.785842105263159e-05, "loss": 0.4265, "step": 28152 }, { "epoch": 1.5764923283682384, "grad_norm": 1.2092936038970947, "learning_rate": 9.785815789473685e-05, "loss": 0.4341, "step": 28153 }, { "epoch": 1.5765483256803674, "grad_norm": 1.393599271774292, "learning_rate": 9.785789473684212e-05, "loss": 0.3641, "step": 28154 }, { "epoch": 1.5766043229924964, "grad_norm": 1.2767813205718994, "learning_rate": 9.785763157894737e-05, "loss": 0.5336, "step": 28155 }, { "epoch": 1.5766603203046254, "grad_norm": 1.2935755252838135, "learning_rate": 9.785736842105263e-05, "loss": 0.5034, "step": 28156 }, { "epoch": 1.5767163176167545, "grad_norm": 1.108004093170166, "learning_rate": 9.78571052631579e-05, "loss": 0.4588, "step": 28157 }, { "epoch": 1.5767723149288835, "grad_norm": 1.0608549118041992, "learning_rate": 9.785684210526316e-05, "loss": 0.4156, "step": 28158 }, { "epoch": 1.5768283122410125, "grad_norm": 1.2897781133651733, "learning_rate": 9.785657894736843e-05, "loss": 0.4719, "step": 28159 }, { "epoch": 1.5768843095531415, "grad_norm": 1.090461015701294, "learning_rate": 9.785631578947368e-05, "loss": 0.3136, "step": 28160 }, { "epoch": 1.5769403068652705, "grad_norm": 1.243940830230713, "learning_rate": 9.785605263157895e-05, "loss": 0.3999, "step": 28161 }, { "epoch": 1.5769963041773996, "grad_norm": 1.2789543867111206, "learning_rate": 9.785578947368421e-05, "loss": 0.478, "step": 28162 }, { "epoch": 1.5770523014895286, "grad_norm": 1.2415505647659302, "learning_rate": 9.785552631578948e-05, "loss": 0.4176, "step": 28163 }, { "epoch": 1.5771082988016576, "grad_norm": 1.3886882066726685, "learning_rate": 9.785526315789474e-05, "loss": 0.4695, "step": 28164 }, { "epoch": 1.5771642961137866, "grad_norm": 1.1668152809143066, "learning_rate": 9.7855e-05, "loss": 0.3876, "step": 28165 }, { "epoch": 1.5772202934259156, "grad_norm": 1.349058747291565, "learning_rate": 9.785473684210526e-05, "loss": 0.5779, "step": 28166 }, { "epoch": 1.5772762907380447, "grad_norm": 1.3069305419921875, "learning_rate": 9.785447368421054e-05, "loss": 0.4907, "step": 28167 }, { "epoch": 1.5773322880501737, "grad_norm": 1.1267633438110352, "learning_rate": 9.78542105263158e-05, "loss": 0.3592, "step": 28168 }, { "epoch": 1.5773882853623027, "grad_norm": 1.205538272857666, "learning_rate": 9.785394736842106e-05, "loss": 0.4373, "step": 28169 }, { "epoch": 1.5774442826744317, "grad_norm": 1.0698776245117188, "learning_rate": 9.785368421052632e-05, "loss": 0.4397, "step": 28170 }, { "epoch": 1.5775002799865607, "grad_norm": 1.385589361190796, "learning_rate": 9.785342105263159e-05, "loss": 0.433, "step": 28171 }, { "epoch": 1.5775562772986897, "grad_norm": 1.3485287427902222, "learning_rate": 9.785315789473685e-05, "loss": 0.5212, "step": 28172 }, { "epoch": 1.5776122746108188, "grad_norm": 1.1175944805145264, "learning_rate": 9.785289473684211e-05, "loss": 0.3932, "step": 28173 }, { "epoch": 1.5776682719229478, "grad_norm": 1.5841163396835327, "learning_rate": 9.785263157894737e-05, "loss": 0.4892, "step": 28174 }, { "epoch": 1.5777242692350768, "grad_norm": 1.2950897216796875, "learning_rate": 9.785236842105263e-05, "loss": 0.4751, "step": 28175 }, { "epoch": 1.5777802665472058, "grad_norm": 1.267866611480713, "learning_rate": 9.78521052631579e-05, "loss": 0.3934, "step": 28176 }, { "epoch": 1.5778362638593348, "grad_norm": 1.0617599487304688, "learning_rate": 9.785184210526316e-05, "loss": 0.4001, "step": 28177 }, { "epoch": 1.5778922611714639, "grad_norm": 1.4428179264068604, "learning_rate": 9.785157894736842e-05, "loss": 0.5607, "step": 28178 }, { "epoch": 1.5779482584835929, "grad_norm": 1.1948655843734741, "learning_rate": 9.785131578947368e-05, "loss": 0.4326, "step": 28179 }, { "epoch": 1.578004255795722, "grad_norm": 1.6858242750167847, "learning_rate": 9.785105263157895e-05, "loss": 0.4088, "step": 28180 }, { "epoch": 1.578060253107851, "grad_norm": 1.232558250427246, "learning_rate": 9.785078947368421e-05, "loss": 0.4657, "step": 28181 }, { "epoch": 1.57811625041998, "grad_norm": 1.5232340097427368, "learning_rate": 9.785052631578949e-05, "loss": 0.5507, "step": 28182 }, { "epoch": 1.578172247732109, "grad_norm": 1.240832805633545, "learning_rate": 9.785026315789473e-05, "loss": 0.437, "step": 28183 }, { "epoch": 1.578228245044238, "grad_norm": 1.556320071220398, "learning_rate": 9.785e-05, "loss": 0.4443, "step": 28184 }, { "epoch": 1.578284242356367, "grad_norm": 1.4046788215637207, "learning_rate": 9.784973684210527e-05, "loss": 0.4333, "step": 28185 }, { "epoch": 1.578340239668496, "grad_norm": 1.3440078496932983, "learning_rate": 9.784947368421054e-05, "loss": 0.39, "step": 28186 }, { "epoch": 1.578396236980625, "grad_norm": 1.4519109725952148, "learning_rate": 9.78492105263158e-05, "loss": 0.5872, "step": 28187 }, { "epoch": 1.578452234292754, "grad_norm": 1.3294261693954468, "learning_rate": 9.784894736842106e-05, "loss": 0.5281, "step": 28188 }, { "epoch": 1.578508231604883, "grad_norm": 1.7994285821914673, "learning_rate": 9.784868421052632e-05, "loss": 0.4514, "step": 28189 }, { "epoch": 1.578564228917012, "grad_norm": 1.1187090873718262, "learning_rate": 9.784842105263158e-05, "loss": 0.4704, "step": 28190 }, { "epoch": 1.578620226229141, "grad_norm": 1.0622961521148682, "learning_rate": 9.784815789473685e-05, "loss": 0.3963, "step": 28191 }, { "epoch": 1.5786762235412701, "grad_norm": 1.4263650178909302, "learning_rate": 9.784789473684211e-05, "loss": 0.5271, "step": 28192 }, { "epoch": 1.5787322208533991, "grad_norm": 1.0818216800689697, "learning_rate": 9.784763157894737e-05, "loss": 0.4412, "step": 28193 }, { "epoch": 1.5787882181655282, "grad_norm": 1.3329949378967285, "learning_rate": 9.784736842105263e-05, "loss": 0.5599, "step": 28194 }, { "epoch": 1.5788442154776572, "grad_norm": 1.3518515825271606, "learning_rate": 9.78471052631579e-05, "loss": 0.3642, "step": 28195 }, { "epoch": 1.5789002127897862, "grad_norm": 1.2396793365478516, "learning_rate": 9.784684210526316e-05, "loss": 0.4187, "step": 28196 }, { "epoch": 1.5789562101019152, "grad_norm": 1.3476207256317139, "learning_rate": 9.784657894736842e-05, "loss": 0.3397, "step": 28197 }, { "epoch": 1.5790122074140442, "grad_norm": 1.346340537071228, "learning_rate": 9.784631578947368e-05, "loss": 0.4135, "step": 28198 }, { "epoch": 1.5790682047261733, "grad_norm": 1.3594069480895996, "learning_rate": 9.784605263157896e-05, "loss": 0.3856, "step": 28199 }, { "epoch": 1.5791242020383023, "grad_norm": 1.5098919868469238, "learning_rate": 9.784578947368422e-05, "loss": 0.3712, "step": 28200 }, { "epoch": 1.5791801993504313, "grad_norm": 1.3382796049118042, "learning_rate": 9.784552631578948e-05, "loss": 0.4768, "step": 28201 }, { "epoch": 1.5792361966625603, "grad_norm": 1.1051304340362549, "learning_rate": 9.784526315789474e-05, "loss": 0.3683, "step": 28202 }, { "epoch": 1.5792921939746893, "grad_norm": 1.24118971824646, "learning_rate": 9.784500000000001e-05, "loss": 0.4999, "step": 28203 }, { "epoch": 1.5793481912868184, "grad_norm": 1.661124348640442, "learning_rate": 9.784473684210527e-05, "loss": 0.7368, "step": 28204 }, { "epoch": 1.5794041885989474, "grad_norm": 1.280503511428833, "learning_rate": 9.784447368421054e-05, "loss": 0.5091, "step": 28205 }, { "epoch": 1.5794601859110764, "grad_norm": 1.2633830308914185, "learning_rate": 9.784421052631579e-05, "loss": 0.4254, "step": 28206 }, { "epoch": 1.5795161832232054, "grad_norm": 1.272430419921875, "learning_rate": 9.784394736842106e-05, "loss": 0.4289, "step": 28207 }, { "epoch": 1.5795721805353344, "grad_norm": 1.4057821035385132, "learning_rate": 9.784368421052632e-05, "loss": 0.3939, "step": 28208 }, { "epoch": 1.5796281778474635, "grad_norm": 1.5706238746643066, "learning_rate": 9.784342105263158e-05, "loss": 0.4638, "step": 28209 }, { "epoch": 1.5796841751595925, "grad_norm": 1.3825490474700928, "learning_rate": 9.784315789473684e-05, "loss": 0.397, "step": 28210 }, { "epoch": 1.5797401724717215, "grad_norm": 1.3392679691314697, "learning_rate": 9.78428947368421e-05, "loss": 0.4503, "step": 28211 }, { "epoch": 1.5797961697838505, "grad_norm": 1.4337555170059204, "learning_rate": 9.784263157894737e-05, "loss": 0.5699, "step": 28212 }, { "epoch": 1.5798521670959795, "grad_norm": 1.576265811920166, "learning_rate": 9.784236842105263e-05, "loss": 0.5449, "step": 28213 }, { "epoch": 1.5799081644081086, "grad_norm": 1.4735445976257324, "learning_rate": 9.784210526315791e-05, "loss": 0.4527, "step": 28214 }, { "epoch": 1.5799641617202376, "grad_norm": 1.4079476594924927, "learning_rate": 9.784184210526315e-05, "loss": 0.3421, "step": 28215 }, { "epoch": 1.5800201590323666, "grad_norm": 1.2776329517364502, "learning_rate": 9.784157894736843e-05, "loss": 0.461, "step": 28216 }, { "epoch": 1.5800761563444956, "grad_norm": 1.2142679691314697, "learning_rate": 9.784131578947369e-05, "loss": 0.3766, "step": 28217 }, { "epoch": 1.5801321536566246, "grad_norm": 1.3883777856826782, "learning_rate": 9.784105263157896e-05, "loss": 0.454, "step": 28218 }, { "epoch": 1.5801881509687536, "grad_norm": 1.1547354459762573, "learning_rate": 9.784078947368422e-05, "loss": 0.477, "step": 28219 }, { "epoch": 1.5802441482808827, "grad_norm": 1.2915985584259033, "learning_rate": 9.784052631578948e-05, "loss": 0.4815, "step": 28220 }, { "epoch": 1.5803001455930117, "grad_norm": 1.478183388710022, "learning_rate": 9.784026315789474e-05, "loss": 0.4279, "step": 28221 }, { "epoch": 1.5803561429051407, "grad_norm": 1.2574313879013062, "learning_rate": 9.784000000000001e-05, "loss": 0.508, "step": 28222 }, { "epoch": 1.5804121402172697, "grad_norm": 10.788265228271484, "learning_rate": 9.783973684210527e-05, "loss": 0.4428, "step": 28223 }, { "epoch": 1.5804681375293987, "grad_norm": 1.188437581062317, "learning_rate": 9.783947368421053e-05, "loss": 0.3691, "step": 28224 }, { "epoch": 1.5805241348415278, "grad_norm": 1.3362503051757812, "learning_rate": 9.783921052631579e-05, "loss": 0.4179, "step": 28225 }, { "epoch": 1.5805801321536568, "grad_norm": 1.9426575899124146, "learning_rate": 9.783894736842105e-05, "loss": 0.5131, "step": 28226 }, { "epoch": 1.5806361294657858, "grad_norm": 1.151312232017517, "learning_rate": 9.783868421052632e-05, "loss": 0.3808, "step": 28227 }, { "epoch": 1.5806921267779148, "grad_norm": 1.3491356372833252, "learning_rate": 9.783842105263158e-05, "loss": 0.4749, "step": 28228 }, { "epoch": 1.5807481240900438, "grad_norm": 1.3050971031188965, "learning_rate": 9.783815789473684e-05, "loss": 0.4173, "step": 28229 }, { "epoch": 1.5808041214021729, "grad_norm": 1.3836475610733032, "learning_rate": 9.78378947368421e-05, "loss": 0.4225, "step": 28230 }, { "epoch": 1.5808601187143019, "grad_norm": 1.2909274101257324, "learning_rate": 9.783763157894738e-05, "loss": 0.5205, "step": 28231 }, { "epoch": 1.580916116026431, "grad_norm": 1.6245766878128052, "learning_rate": 9.783736842105264e-05, "loss": 0.5249, "step": 28232 }, { "epoch": 1.58097211333856, "grad_norm": 1.3622593879699707, "learning_rate": 9.78371052631579e-05, "loss": 0.3795, "step": 28233 }, { "epoch": 1.5810281106506887, "grad_norm": 1.450543999671936, "learning_rate": 9.783684210526316e-05, "loss": 0.4643, "step": 28234 }, { "epoch": 1.5810841079628177, "grad_norm": 1.3439433574676514, "learning_rate": 9.783657894736843e-05, "loss": 0.6194, "step": 28235 }, { "epoch": 1.5811401052749467, "grad_norm": 1.3028042316436768, "learning_rate": 9.783631578947369e-05, "loss": 0.4211, "step": 28236 }, { "epoch": 1.5811961025870758, "grad_norm": 1.1072126626968384, "learning_rate": 9.783605263157896e-05, "loss": 0.3549, "step": 28237 }, { "epoch": 1.5812520998992048, "grad_norm": 1.2173089981079102, "learning_rate": 9.783578947368421e-05, "loss": 0.4091, "step": 28238 }, { "epoch": 1.5813080972113338, "grad_norm": 1.1502578258514404, "learning_rate": 9.783552631578948e-05, "loss": 0.4709, "step": 28239 }, { "epoch": 1.5813640945234628, "grad_norm": 1.3251762390136719, "learning_rate": 9.783526315789474e-05, "loss": 0.5133, "step": 28240 }, { "epoch": 1.5814200918355918, "grad_norm": 1.4343903064727783, "learning_rate": 9.783500000000001e-05, "loss": 0.4494, "step": 28241 }, { "epoch": 1.5814760891477209, "grad_norm": 1.1779705286026, "learning_rate": 9.783473684210527e-05, "loss": 0.55, "step": 28242 }, { "epoch": 1.5815320864598499, "grad_norm": 1.5313528776168823, "learning_rate": 9.783447368421052e-05, "loss": 0.4, "step": 28243 }, { "epoch": 1.581588083771979, "grad_norm": 1.4285385608673096, "learning_rate": 9.78342105263158e-05, "loss": 0.5774, "step": 28244 }, { "epoch": 1.581644081084108, "grad_norm": 1.9072041511535645, "learning_rate": 9.783394736842105e-05, "loss": 0.4054, "step": 28245 }, { "epoch": 1.581700078396237, "grad_norm": 1.3752843141555786, "learning_rate": 9.783368421052633e-05, "loss": 0.423, "step": 28246 }, { "epoch": 1.581756075708366, "grad_norm": 1.5470937490463257, "learning_rate": 9.783342105263159e-05, "loss": 0.4605, "step": 28247 }, { "epoch": 1.581812073020495, "grad_norm": 1.4123104810714722, "learning_rate": 9.783315789473685e-05, "loss": 0.5767, "step": 28248 }, { "epoch": 1.581868070332624, "grad_norm": 1.4098743200302124, "learning_rate": 9.78328947368421e-05, "loss": 0.4671, "step": 28249 }, { "epoch": 1.581924067644753, "grad_norm": 1.2538591623306274, "learning_rate": 9.783263157894738e-05, "loss": 0.3655, "step": 28250 }, { "epoch": 1.581980064956882, "grad_norm": 1.244978904724121, "learning_rate": 9.783236842105264e-05, "loss": 0.3907, "step": 28251 }, { "epoch": 1.582036062269011, "grad_norm": 1.2174460887908936, "learning_rate": 9.78321052631579e-05, "loss": 0.3742, "step": 28252 }, { "epoch": 1.58209205958114, "grad_norm": 16.026084899902344, "learning_rate": 9.783184210526316e-05, "loss": 0.5541, "step": 28253 }, { "epoch": 1.582148056893269, "grad_norm": 1.2309362888336182, "learning_rate": 9.783157894736843e-05, "loss": 0.4567, "step": 28254 }, { "epoch": 1.5822040542053981, "grad_norm": 1.18257474899292, "learning_rate": 9.783131578947369e-05, "loss": 0.457, "step": 28255 }, { "epoch": 1.5822600515175271, "grad_norm": 1.2617989778518677, "learning_rate": 9.783105263157895e-05, "loss": 0.4647, "step": 28256 }, { "epoch": 1.5823160488296562, "grad_norm": 1.1995033025741577, "learning_rate": 9.783078947368421e-05, "loss": 0.5024, "step": 28257 }, { "epoch": 1.5823720461417852, "grad_norm": 1.1797460317611694, "learning_rate": 9.783052631578948e-05, "loss": 0.5191, "step": 28258 }, { "epoch": 1.5824280434539142, "grad_norm": 1.3838032484054565, "learning_rate": 9.783026315789474e-05, "loss": 0.488, "step": 28259 }, { "epoch": 1.5824840407660432, "grad_norm": 1.5013700723648071, "learning_rate": 9.783e-05, "loss": 0.4724, "step": 28260 }, { "epoch": 1.5825400380781722, "grad_norm": 1.1968519687652588, "learning_rate": 9.782973684210526e-05, "loss": 0.4432, "step": 28261 }, { "epoch": 1.5825960353903012, "grad_norm": 1.2762856483459473, "learning_rate": 9.782947368421052e-05, "loss": 0.4813, "step": 28262 }, { "epoch": 1.5826520327024303, "grad_norm": 1.4965077638626099, "learning_rate": 9.78292105263158e-05, "loss": 0.4058, "step": 28263 }, { "epoch": 1.5827080300145593, "grad_norm": 1.2279053926467896, "learning_rate": 9.782894736842106e-05, "loss": 0.4609, "step": 28264 }, { "epoch": 1.5827640273266883, "grad_norm": 1.1062767505645752, "learning_rate": 9.782868421052632e-05, "loss": 0.3519, "step": 28265 }, { "epoch": 1.5828200246388173, "grad_norm": 1.3223053216934204, "learning_rate": 9.782842105263158e-05, "loss": 0.5096, "step": 28266 }, { "epoch": 1.5828760219509463, "grad_norm": 1.2049095630645752, "learning_rate": 9.782815789473685e-05, "loss": 0.4288, "step": 28267 }, { "epoch": 1.5829320192630754, "grad_norm": 1.3944287300109863, "learning_rate": 9.782789473684211e-05, "loss": 0.4298, "step": 28268 }, { "epoch": 1.5829880165752044, "grad_norm": 1.3010836839675903, "learning_rate": 9.782763157894738e-05, "loss": 0.3869, "step": 28269 }, { "epoch": 1.5830440138873334, "grad_norm": 1.4387359619140625, "learning_rate": 9.782736842105263e-05, "loss": 0.5562, "step": 28270 }, { "epoch": 1.5831000111994624, "grad_norm": 1.1513671875, "learning_rate": 9.78271052631579e-05, "loss": 0.4648, "step": 28271 }, { "epoch": 1.5831560085115914, "grad_norm": 1.276419997215271, "learning_rate": 9.782684210526316e-05, "loss": 0.5492, "step": 28272 }, { "epoch": 1.5832120058237205, "grad_norm": 1.277443766593933, "learning_rate": 9.782657894736843e-05, "loss": 0.4696, "step": 28273 }, { "epoch": 1.5832680031358495, "grad_norm": 1.727570652961731, "learning_rate": 9.78263157894737e-05, "loss": 0.5006, "step": 28274 }, { "epoch": 1.5833240004479785, "grad_norm": 1.3538728952407837, "learning_rate": 9.782605263157895e-05, "loss": 0.5115, "step": 28275 }, { "epoch": 1.5833799977601075, "grad_norm": 1.189645767211914, "learning_rate": 9.782578947368421e-05, "loss": 0.4491, "step": 28276 }, { "epoch": 1.5834359950722365, "grad_norm": 1.426240086555481, "learning_rate": 9.782552631578947e-05, "loss": 0.5695, "step": 28277 }, { "epoch": 1.5834919923843656, "grad_norm": 1.1678203344345093, "learning_rate": 9.782526315789475e-05, "loss": 0.4941, "step": 28278 }, { "epoch": 1.5835479896964946, "grad_norm": 1.3237804174423218, "learning_rate": 9.7825e-05, "loss": 0.4183, "step": 28279 }, { "epoch": 1.5836039870086236, "grad_norm": 1.8183512687683105, "learning_rate": 9.782473684210527e-05, "loss": 0.485, "step": 28280 }, { "epoch": 1.5836599843207526, "grad_norm": 1.3586292266845703, "learning_rate": 9.782447368421053e-05, "loss": 0.5133, "step": 28281 }, { "epoch": 1.5837159816328816, "grad_norm": 1.332970142364502, "learning_rate": 9.78242105263158e-05, "loss": 0.4322, "step": 28282 }, { "epoch": 1.5837719789450107, "grad_norm": 1.2447971105575562, "learning_rate": 9.782394736842106e-05, "loss": 0.4877, "step": 28283 }, { "epoch": 1.5838279762571397, "grad_norm": 1.1528841257095337, "learning_rate": 9.782368421052632e-05, "loss": 0.411, "step": 28284 }, { "epoch": 1.5838839735692687, "grad_norm": 1.331696629524231, "learning_rate": 9.782342105263158e-05, "loss": 0.5132, "step": 28285 }, { "epoch": 1.5839399708813977, "grad_norm": 1.526389718055725, "learning_rate": 9.782315789473685e-05, "loss": 0.5063, "step": 28286 }, { "epoch": 1.5839959681935267, "grad_norm": 1.220671534538269, "learning_rate": 9.782289473684211e-05, "loss": 0.4549, "step": 28287 }, { "epoch": 1.5840519655056557, "grad_norm": 1.4934096336364746, "learning_rate": 9.782263157894737e-05, "loss": 0.5103, "step": 28288 }, { "epoch": 1.5841079628177848, "grad_norm": 1.101176381111145, "learning_rate": 9.782236842105263e-05, "loss": 0.3865, "step": 28289 }, { "epoch": 1.5841639601299138, "grad_norm": 1.2829971313476562, "learning_rate": 9.78221052631579e-05, "loss": 0.3926, "step": 28290 }, { "epoch": 1.5842199574420428, "grad_norm": 1.3696972131729126, "learning_rate": 9.782184210526316e-05, "loss": 0.6527, "step": 28291 }, { "epoch": 1.5842759547541718, "grad_norm": 1.5396428108215332, "learning_rate": 9.782157894736844e-05, "loss": 0.5814, "step": 28292 }, { "epoch": 1.5843319520663008, "grad_norm": 1.616563081741333, "learning_rate": 9.782131578947368e-05, "loss": 0.5543, "step": 28293 }, { "epoch": 1.5843879493784299, "grad_norm": 1.4253442287445068, "learning_rate": 9.782105263157894e-05, "loss": 0.4605, "step": 28294 }, { "epoch": 1.5844439466905589, "grad_norm": 1.382038950920105, "learning_rate": 9.782078947368422e-05, "loss": 0.4453, "step": 28295 }, { "epoch": 1.584499944002688, "grad_norm": 1.2013399600982666, "learning_rate": 9.782052631578948e-05, "loss": 0.4356, "step": 28296 }, { "epoch": 1.584555941314817, "grad_norm": 1.2636768817901611, "learning_rate": 9.782026315789475e-05, "loss": 0.4449, "step": 28297 }, { "epoch": 1.584611938626946, "grad_norm": 1.3658510446548462, "learning_rate": 9.782e-05, "loss": 0.5393, "step": 28298 }, { "epoch": 1.584667935939075, "grad_norm": 1.285614252090454, "learning_rate": 9.781973684210527e-05, "loss": 0.4172, "step": 28299 }, { "epoch": 1.584723933251204, "grad_norm": 1.1509438753128052, "learning_rate": 9.781947368421053e-05, "loss": 0.4589, "step": 28300 }, { "epoch": 1.584779930563333, "grad_norm": 1.444385051727295, "learning_rate": 9.78192105263158e-05, "loss": 0.4409, "step": 28301 }, { "epoch": 1.584835927875462, "grad_norm": 1.872693657875061, "learning_rate": 9.781894736842106e-05, "loss": 0.5809, "step": 28302 }, { "epoch": 1.584891925187591, "grad_norm": 1.6845189332962036, "learning_rate": 9.781868421052632e-05, "loss": 0.4986, "step": 28303 }, { "epoch": 1.58494792249972, "grad_norm": 1.6680781841278076, "learning_rate": 9.781842105263158e-05, "loss": 0.5092, "step": 28304 }, { "epoch": 1.585003919811849, "grad_norm": 1.7148813009262085, "learning_rate": 9.781815789473685e-05, "loss": 0.564, "step": 28305 }, { "epoch": 1.585059917123978, "grad_norm": 1.5087623596191406, "learning_rate": 9.781789473684211e-05, "loss": 0.6449, "step": 28306 }, { "epoch": 1.585115914436107, "grad_norm": 1.4534099102020264, "learning_rate": 9.781763157894737e-05, "loss": 0.4322, "step": 28307 }, { "epoch": 1.5851719117482361, "grad_norm": 1.3164526224136353, "learning_rate": 9.781736842105263e-05, "loss": 0.354, "step": 28308 }, { "epoch": 1.5852279090603651, "grad_norm": 1.0275143384933472, "learning_rate": 9.78171052631579e-05, "loss": 0.2847, "step": 28309 }, { "epoch": 1.5852839063724942, "grad_norm": 1.2909725904464722, "learning_rate": 9.781684210526317e-05, "loss": 0.4754, "step": 28310 }, { "epoch": 1.5853399036846232, "grad_norm": 1.3653693199157715, "learning_rate": 9.781657894736843e-05, "loss": 0.3985, "step": 28311 }, { "epoch": 1.5853959009967522, "grad_norm": 1.3306163549423218, "learning_rate": 9.781631578947369e-05, "loss": 0.419, "step": 28312 }, { "epoch": 1.5854518983088812, "grad_norm": 1.3650118112564087, "learning_rate": 9.781605263157895e-05, "loss": 0.4289, "step": 28313 }, { "epoch": 1.5855078956210102, "grad_norm": 1.4015605449676514, "learning_rate": 9.781578947368422e-05, "loss": 0.4585, "step": 28314 }, { "epoch": 1.5855638929331393, "grad_norm": 1.2568800449371338, "learning_rate": 9.781552631578948e-05, "loss": 0.4393, "step": 28315 }, { "epoch": 1.5856198902452683, "grad_norm": 1.413943886756897, "learning_rate": 9.781526315789474e-05, "loss": 0.4012, "step": 28316 }, { "epoch": 1.585675887557397, "grad_norm": 1.41544771194458, "learning_rate": 9.7815e-05, "loss": 0.3876, "step": 28317 }, { "epoch": 1.585731884869526, "grad_norm": 1.333017349243164, "learning_rate": 9.781473684210527e-05, "loss": 0.5692, "step": 28318 }, { "epoch": 1.5857878821816551, "grad_norm": 1.1712980270385742, "learning_rate": 9.781447368421053e-05, "loss": 0.3676, "step": 28319 }, { "epoch": 1.5858438794937841, "grad_norm": 1.17991042137146, "learning_rate": 9.781421052631579e-05, "loss": 0.4407, "step": 28320 }, { "epoch": 1.5858998768059132, "grad_norm": 1.4239095449447632, "learning_rate": 9.781394736842105e-05, "loss": 0.5732, "step": 28321 }, { "epoch": 1.5859558741180422, "grad_norm": 2.375763177871704, "learning_rate": 9.781368421052632e-05, "loss": 0.54, "step": 28322 }, { "epoch": 1.5860118714301712, "grad_norm": 1.3151723146438599, "learning_rate": 9.781342105263158e-05, "loss": 0.3928, "step": 28323 }, { "epoch": 1.5860678687423002, "grad_norm": 1.134048342704773, "learning_rate": 9.781315789473686e-05, "loss": 0.4171, "step": 28324 }, { "epoch": 1.5861238660544292, "grad_norm": 1.9114692211151123, "learning_rate": 9.78128947368421e-05, "loss": 0.3269, "step": 28325 }, { "epoch": 1.5861798633665583, "grad_norm": 3.0973849296569824, "learning_rate": 9.781263157894738e-05, "loss": 0.4508, "step": 28326 }, { "epoch": 1.5862358606786873, "grad_norm": 1.5860693454742432, "learning_rate": 9.781236842105264e-05, "loss": 0.5947, "step": 28327 }, { "epoch": 1.5862918579908163, "grad_norm": 1.4285081624984741, "learning_rate": 9.781210526315791e-05, "loss": 0.5213, "step": 28328 }, { "epoch": 1.5863478553029453, "grad_norm": 1.274310827255249, "learning_rate": 9.781184210526317e-05, "loss": 0.5095, "step": 28329 }, { "epoch": 1.5864038526150743, "grad_norm": 1.354880928993225, "learning_rate": 9.781157894736841e-05, "loss": 0.4487, "step": 28330 }, { "epoch": 1.5864598499272033, "grad_norm": 1.4087774753570557, "learning_rate": 9.781131578947369e-05, "loss": 0.4681, "step": 28331 }, { "epoch": 1.5865158472393324, "grad_norm": 1.4856915473937988, "learning_rate": 9.781105263157895e-05, "loss": 0.4907, "step": 28332 }, { "epoch": 1.5865718445514614, "grad_norm": 1.165053129196167, "learning_rate": 9.781078947368422e-05, "loss": 0.4071, "step": 28333 }, { "epoch": 1.5866278418635904, "grad_norm": 1.8275299072265625, "learning_rate": 9.781052631578948e-05, "loss": 0.7704, "step": 28334 }, { "epoch": 1.5866838391757194, "grad_norm": 1.1157145500183105, "learning_rate": 9.781026315789474e-05, "loss": 0.4555, "step": 28335 }, { "epoch": 1.5867398364878484, "grad_norm": 1.1894867420196533, "learning_rate": 9.781e-05, "loss": 0.4554, "step": 28336 }, { "epoch": 1.5867958337999775, "grad_norm": 1.4696792364120483, "learning_rate": 9.780973684210527e-05, "loss": 0.4572, "step": 28337 }, { "epoch": 1.5868518311121065, "grad_norm": 1.2457644939422607, "learning_rate": 9.780947368421053e-05, "loss": 0.4419, "step": 28338 }, { "epoch": 1.5869078284242355, "grad_norm": 1.1074076890945435, "learning_rate": 9.780921052631579e-05, "loss": 0.5484, "step": 28339 }, { "epoch": 1.5869638257363645, "grad_norm": 1.3094606399536133, "learning_rate": 9.780894736842105e-05, "loss": 0.472, "step": 28340 }, { "epoch": 1.5870198230484935, "grad_norm": 1.7311792373657227, "learning_rate": 9.780868421052633e-05, "loss": 0.549, "step": 28341 }, { "epoch": 1.5870758203606226, "grad_norm": 1.266061782836914, "learning_rate": 9.780842105263159e-05, "loss": 0.4031, "step": 28342 }, { "epoch": 1.5871318176727516, "grad_norm": 1.1189930438995361, "learning_rate": 9.780815789473685e-05, "loss": 0.4839, "step": 28343 }, { "epoch": 1.5871878149848806, "grad_norm": 1.2545877695083618, "learning_rate": 9.78078947368421e-05, "loss": 0.6219, "step": 28344 }, { "epoch": 1.5872438122970096, "grad_norm": 1.3213335275650024, "learning_rate": 9.780763157894738e-05, "loss": 0.3883, "step": 28345 }, { "epoch": 1.5872998096091386, "grad_norm": 1.3805620670318604, "learning_rate": 9.780736842105264e-05, "loss": 0.4589, "step": 28346 }, { "epoch": 1.5873558069212677, "grad_norm": 1.2606295347213745, "learning_rate": 9.78071052631579e-05, "loss": 0.5068, "step": 28347 }, { "epoch": 1.5874118042333967, "grad_norm": 1.353511929512024, "learning_rate": 9.780684210526316e-05, "loss": 0.436, "step": 28348 }, { "epoch": 1.5874678015455257, "grad_norm": 1.7375603914260864, "learning_rate": 9.780657894736842e-05, "loss": 0.5207, "step": 28349 }, { "epoch": 1.5875237988576547, "grad_norm": 1.2530337572097778, "learning_rate": 9.780631578947369e-05, "loss": 0.3879, "step": 28350 }, { "epoch": 1.5875797961697837, "grad_norm": 1.5136656761169434, "learning_rate": 9.780605263157895e-05, "loss": 0.5764, "step": 28351 }, { "epoch": 1.5876357934819127, "grad_norm": 1.0596904754638672, "learning_rate": 9.780578947368422e-05, "loss": 0.3732, "step": 28352 }, { "epoch": 1.5876917907940418, "grad_norm": 1.2544870376586914, "learning_rate": 9.780552631578947e-05, "loss": 0.4187, "step": 28353 }, { "epoch": 1.5877477881061708, "grad_norm": 1.3218860626220703, "learning_rate": 9.780526315789474e-05, "loss": 0.5132, "step": 28354 }, { "epoch": 1.5878037854182998, "grad_norm": 1.3503254652023315, "learning_rate": 9.7805e-05, "loss": 0.4403, "step": 28355 }, { "epoch": 1.5878597827304288, "grad_norm": 1.5628440380096436, "learning_rate": 9.780473684210528e-05, "loss": 0.4707, "step": 28356 }, { "epoch": 1.5879157800425578, "grad_norm": 1.1598467826843262, "learning_rate": 9.780447368421052e-05, "loss": 0.4415, "step": 28357 }, { "epoch": 1.5879717773546869, "grad_norm": 1.7052439451217651, "learning_rate": 9.78042105263158e-05, "loss": 0.5187, "step": 28358 }, { "epoch": 1.5880277746668159, "grad_norm": 1.171898603439331, "learning_rate": 9.780394736842106e-05, "loss": 0.3662, "step": 28359 }, { "epoch": 1.588083771978945, "grad_norm": 1.285593032836914, "learning_rate": 9.780368421052633e-05, "loss": 0.4864, "step": 28360 }, { "epoch": 1.588139769291074, "grad_norm": 1.2117118835449219, "learning_rate": 9.780342105263159e-05, "loss": 0.3878, "step": 28361 }, { "epoch": 1.588195766603203, "grad_norm": 1.504900574684143, "learning_rate": 9.780315789473685e-05, "loss": 0.5126, "step": 28362 }, { "epoch": 1.588251763915332, "grad_norm": 1.3133801221847534, "learning_rate": 9.780289473684211e-05, "loss": 0.4801, "step": 28363 }, { "epoch": 1.588307761227461, "grad_norm": 1.1876076459884644, "learning_rate": 9.780263157894737e-05, "loss": 0.4214, "step": 28364 }, { "epoch": 1.58836375853959, "grad_norm": 1.3989534378051758, "learning_rate": 9.780236842105264e-05, "loss": 0.6245, "step": 28365 }, { "epoch": 1.588419755851719, "grad_norm": 1.4644654989242554, "learning_rate": 9.78021052631579e-05, "loss": 0.5663, "step": 28366 }, { "epoch": 1.588475753163848, "grad_norm": 1.4771467447280884, "learning_rate": 9.780184210526316e-05, "loss": 0.5066, "step": 28367 }, { "epoch": 1.588531750475977, "grad_norm": 1.3771470785140991, "learning_rate": 9.780157894736842e-05, "loss": 0.7344, "step": 28368 }, { "epoch": 1.588587747788106, "grad_norm": 1.4508684873580933, "learning_rate": 9.78013157894737e-05, "loss": 0.3952, "step": 28369 }, { "epoch": 1.588643745100235, "grad_norm": 1.3457491397857666, "learning_rate": 9.780105263157895e-05, "loss": 0.4476, "step": 28370 }, { "epoch": 1.5886997424123641, "grad_norm": 1.3605365753173828, "learning_rate": 9.780078947368421e-05, "loss": 0.556, "step": 28371 }, { "epoch": 1.5887557397244931, "grad_norm": 1.2433078289031982, "learning_rate": 9.780052631578947e-05, "loss": 0.4486, "step": 28372 }, { "epoch": 1.5888117370366222, "grad_norm": 1.3366619348526, "learning_rate": 9.780026315789475e-05, "loss": 0.4229, "step": 28373 }, { "epoch": 1.5888677343487512, "grad_norm": 1.091226577758789, "learning_rate": 9.78e-05, "loss": 0.4386, "step": 28374 }, { "epoch": 1.5889237316608802, "grad_norm": 1.1753994226455688, "learning_rate": 9.779973684210527e-05, "loss": 0.3889, "step": 28375 }, { "epoch": 1.5889797289730092, "grad_norm": 1.200480341911316, "learning_rate": 9.779947368421052e-05, "loss": 0.3553, "step": 28376 }, { "epoch": 1.5890357262851382, "grad_norm": 1.3582489490509033, "learning_rate": 9.77992105263158e-05, "loss": 0.5353, "step": 28377 }, { "epoch": 1.5890917235972672, "grad_norm": 1.1027886867523193, "learning_rate": 9.779894736842106e-05, "loss": 0.3692, "step": 28378 }, { "epoch": 1.5891477209093963, "grad_norm": 1.4722157716751099, "learning_rate": 9.779868421052633e-05, "loss": 0.3914, "step": 28379 }, { "epoch": 1.5892037182215253, "grad_norm": 1.501230001449585, "learning_rate": 9.779842105263158e-05, "loss": 0.594, "step": 28380 }, { "epoch": 1.5892597155336543, "grad_norm": 1.2601374387741089, "learning_rate": 9.779815789473684e-05, "loss": 0.4311, "step": 28381 }, { "epoch": 1.5893157128457833, "grad_norm": 1.4137557744979858, "learning_rate": 9.779789473684211e-05, "loss": 0.4823, "step": 28382 }, { "epoch": 1.5893717101579123, "grad_norm": 1.4185372591018677, "learning_rate": 9.779763157894737e-05, "loss": 0.3598, "step": 28383 }, { "epoch": 1.5894277074700414, "grad_norm": 1.58175790309906, "learning_rate": 9.779736842105264e-05, "loss": 0.5759, "step": 28384 }, { "epoch": 1.5894837047821704, "grad_norm": 1.2684824466705322, "learning_rate": 9.779710526315789e-05, "loss": 0.4728, "step": 28385 }, { "epoch": 1.5895397020942994, "grad_norm": 1.3118031024932861, "learning_rate": 9.779684210526316e-05, "loss": 0.4175, "step": 28386 }, { "epoch": 1.5895956994064284, "grad_norm": 1.0539162158966064, "learning_rate": 9.779657894736842e-05, "loss": 0.2494, "step": 28387 }, { "epoch": 1.5896516967185574, "grad_norm": 1.312360167503357, "learning_rate": 9.77963157894737e-05, "loss": 0.4386, "step": 28388 }, { "epoch": 1.5897076940306865, "grad_norm": 1.4711054563522339, "learning_rate": 9.779605263157896e-05, "loss": 0.3986, "step": 28389 }, { "epoch": 1.5897636913428155, "grad_norm": 1.13727605342865, "learning_rate": 9.779578947368422e-05, "loss": 0.4532, "step": 28390 }, { "epoch": 1.5898196886549445, "grad_norm": 1.7851922512054443, "learning_rate": 9.779552631578948e-05, "loss": 0.5963, "step": 28391 }, { "epoch": 1.5898756859670735, "grad_norm": 2.342245101928711, "learning_rate": 9.779526315789475e-05, "loss": 0.386, "step": 28392 }, { "epoch": 1.5899316832792025, "grad_norm": 1.5506318807601929, "learning_rate": 9.779500000000001e-05, "loss": 0.4646, "step": 28393 }, { "epoch": 1.5899876805913316, "grad_norm": 1.348465085029602, "learning_rate": 9.779473684210527e-05, "loss": 0.4819, "step": 28394 }, { "epoch": 1.5900436779034606, "grad_norm": 1.213765263557434, "learning_rate": 9.779447368421053e-05, "loss": 0.4695, "step": 28395 }, { "epoch": 1.5900996752155896, "grad_norm": 1.4216108322143555, "learning_rate": 9.77942105263158e-05, "loss": 0.6269, "step": 28396 }, { "epoch": 1.5901556725277186, "grad_norm": 1.424285650253296, "learning_rate": 9.779394736842106e-05, "loss": 0.4841, "step": 28397 }, { "epoch": 1.5902116698398476, "grad_norm": 1.2293741703033447, "learning_rate": 9.779368421052632e-05, "loss": 0.4536, "step": 28398 }, { "epoch": 1.5902676671519766, "grad_norm": 1.4259518384933472, "learning_rate": 9.779342105263158e-05, "loss": 0.4597, "step": 28399 }, { "epoch": 1.5903236644641057, "grad_norm": 1.1571725606918335, "learning_rate": 9.779315789473684e-05, "loss": 0.4649, "step": 28400 }, { "epoch": 1.5903796617762347, "grad_norm": 1.0336768627166748, "learning_rate": 9.779289473684211e-05, "loss": 0.4505, "step": 28401 }, { "epoch": 1.5904356590883637, "grad_norm": 1.3144545555114746, "learning_rate": 9.779263157894737e-05, "loss": 0.462, "step": 28402 }, { "epoch": 1.5904916564004927, "grad_norm": 1.402504801750183, "learning_rate": 9.779236842105263e-05, "loss": 0.5251, "step": 28403 }, { "epoch": 1.5905476537126217, "grad_norm": 1.2226722240447998, "learning_rate": 9.779210526315789e-05, "loss": 0.4761, "step": 28404 }, { "epoch": 1.5906036510247508, "grad_norm": 1.2393138408660889, "learning_rate": 9.779184210526317e-05, "loss": 0.3775, "step": 28405 }, { "epoch": 1.5906596483368798, "grad_norm": 1.0617609024047852, "learning_rate": 9.779157894736843e-05, "loss": 0.3888, "step": 28406 }, { "epoch": 1.5907156456490088, "grad_norm": 1.089975118637085, "learning_rate": 9.77913157894737e-05, "loss": 0.3558, "step": 28407 }, { "epoch": 1.5907716429611378, "grad_norm": 1.3500378131866455, "learning_rate": 9.779105263157894e-05, "loss": 0.4809, "step": 28408 }, { "epoch": 1.5908276402732668, "grad_norm": 1.1152628660202026, "learning_rate": 9.779078947368422e-05, "loss": 0.3475, "step": 28409 }, { "epoch": 1.5908836375853959, "grad_norm": 1.1033310890197754, "learning_rate": 9.779052631578948e-05, "loss": 0.3902, "step": 28410 }, { "epoch": 1.5909396348975249, "grad_norm": 1.1562005281448364, "learning_rate": 9.779026315789475e-05, "loss": 0.4295, "step": 28411 }, { "epoch": 1.590995632209654, "grad_norm": 1.3591700792312622, "learning_rate": 9.779e-05, "loss": 0.4184, "step": 28412 }, { "epoch": 1.591051629521783, "grad_norm": 1.2122689485549927, "learning_rate": 9.778973684210527e-05, "loss": 0.4766, "step": 28413 }, { "epoch": 1.591107626833912, "grad_norm": 1.1197035312652588, "learning_rate": 9.778947368421053e-05, "loss": 0.5783, "step": 28414 }, { "epoch": 1.591163624146041, "grad_norm": 1.5661566257476807, "learning_rate": 9.778921052631579e-05, "loss": 0.4492, "step": 28415 }, { "epoch": 1.59121962145817, "grad_norm": 1.2168840169906616, "learning_rate": 9.778894736842106e-05, "loss": 0.4883, "step": 28416 }, { "epoch": 1.591275618770299, "grad_norm": 1.9509795904159546, "learning_rate": 9.778868421052631e-05, "loss": 0.5233, "step": 28417 }, { "epoch": 1.591331616082428, "grad_norm": 1.128519058227539, "learning_rate": 9.778842105263158e-05, "loss": 0.373, "step": 28418 }, { "epoch": 1.591387613394557, "grad_norm": 1.1893380880355835, "learning_rate": 9.778815789473684e-05, "loss": 0.444, "step": 28419 }, { "epoch": 1.591443610706686, "grad_norm": 1.22463858127594, "learning_rate": 9.778789473684212e-05, "loss": 0.4, "step": 28420 }, { "epoch": 1.591499608018815, "grad_norm": 1.1782256364822388, "learning_rate": 9.778763157894738e-05, "loss": 0.3917, "step": 28421 }, { "epoch": 1.591555605330944, "grad_norm": 1.3224610090255737, "learning_rate": 9.778736842105264e-05, "loss": 0.4997, "step": 28422 }, { "epoch": 1.591611602643073, "grad_norm": 2.5644896030426025, "learning_rate": 9.77871052631579e-05, "loss": 0.6133, "step": 28423 }, { "epoch": 1.5916675999552021, "grad_norm": 1.3538347482681274, "learning_rate": 9.778684210526317e-05, "loss": 0.4019, "step": 28424 }, { "epoch": 1.5917235972673311, "grad_norm": 1.201648473739624, "learning_rate": 9.778657894736843e-05, "loss": 0.4126, "step": 28425 }, { "epoch": 1.5917795945794602, "grad_norm": 1.3906424045562744, "learning_rate": 9.778631578947369e-05, "loss": 0.4754, "step": 28426 }, { "epoch": 1.5918355918915892, "grad_norm": 1.3985551595687866, "learning_rate": 9.778605263157895e-05, "loss": 0.4963, "step": 28427 }, { "epoch": 1.5918915892037182, "grad_norm": 1.138534426689148, "learning_rate": 9.778578947368422e-05, "loss": 0.4352, "step": 28428 }, { "epoch": 1.5919475865158472, "grad_norm": 1.607399821281433, "learning_rate": 9.778552631578948e-05, "loss": 0.4495, "step": 28429 }, { "epoch": 1.5920035838279762, "grad_norm": 1.2148011922836304, "learning_rate": 9.778526315789474e-05, "loss": 0.4655, "step": 28430 }, { "epoch": 1.5920595811401053, "grad_norm": 1.1976314783096313, "learning_rate": 9.7785e-05, "loss": 0.4267, "step": 28431 }, { "epoch": 1.5921155784522343, "grad_norm": 1.3428587913513184, "learning_rate": 9.778473684210526e-05, "loss": 0.4297, "step": 28432 }, { "epoch": 1.5921715757643633, "grad_norm": 1.3928537368774414, "learning_rate": 9.778447368421053e-05, "loss": 0.4863, "step": 28433 }, { "epoch": 1.5922275730764923, "grad_norm": 1.0944451093673706, "learning_rate": 9.778421052631579e-05, "loss": 0.3974, "step": 28434 }, { "epoch": 1.5922835703886213, "grad_norm": 1.2076185941696167, "learning_rate": 9.778394736842105e-05, "loss": 0.5395, "step": 28435 }, { "epoch": 1.5923395677007504, "grad_norm": 1.1822516918182373, "learning_rate": 9.778368421052631e-05, "loss": 0.405, "step": 28436 }, { "epoch": 1.5923955650128794, "grad_norm": 1.5731171369552612, "learning_rate": 9.778342105263159e-05, "loss": 0.3564, "step": 28437 }, { "epoch": 1.5924515623250084, "grad_norm": 1.2677079439163208, "learning_rate": 9.778315789473684e-05, "loss": 0.4636, "step": 28438 }, { "epoch": 1.5925075596371374, "grad_norm": 1.1874058246612549, "learning_rate": 9.778289473684212e-05, "loss": 0.3787, "step": 28439 }, { "epoch": 1.5925635569492664, "grad_norm": 1.4953478574752808, "learning_rate": 9.778263157894736e-05, "loss": 0.4372, "step": 28440 }, { "epoch": 1.5926195542613955, "grad_norm": 1.2469935417175293, "learning_rate": 9.778236842105264e-05, "loss": 0.597, "step": 28441 }, { "epoch": 1.5926755515735245, "grad_norm": 1.3644754886627197, "learning_rate": 9.77821052631579e-05, "loss": 0.3941, "step": 28442 }, { "epoch": 1.5927315488856535, "grad_norm": 1.2577742338180542, "learning_rate": 9.778184210526317e-05, "loss": 0.4344, "step": 28443 }, { "epoch": 1.5927875461977825, "grad_norm": 1.7004826068878174, "learning_rate": 9.778157894736843e-05, "loss": 0.6271, "step": 28444 }, { "epoch": 1.5928435435099115, "grad_norm": 1.211025357246399, "learning_rate": 9.778131578947369e-05, "loss": 0.3728, "step": 28445 }, { "epoch": 1.5928995408220405, "grad_norm": 1.2380868196487427, "learning_rate": 9.778105263157895e-05, "loss": 0.5776, "step": 28446 }, { "epoch": 1.5929555381341696, "grad_norm": 2.2672958374023438, "learning_rate": 9.778078947368422e-05, "loss": 0.4709, "step": 28447 }, { "epoch": 1.5930115354462986, "grad_norm": 1.4772093296051025, "learning_rate": 9.778052631578948e-05, "loss": 0.4569, "step": 28448 }, { "epoch": 1.5930675327584276, "grad_norm": 1.0881645679473877, "learning_rate": 9.778026315789474e-05, "loss": 0.3936, "step": 28449 }, { "epoch": 1.5931235300705566, "grad_norm": 1.9581612348556519, "learning_rate": 9.778e-05, "loss": 0.4379, "step": 28450 }, { "epoch": 1.5931795273826856, "grad_norm": 1.2579345703125, "learning_rate": 9.777973684210526e-05, "loss": 0.4634, "step": 28451 }, { "epoch": 1.5932355246948147, "grad_norm": 1.2960530519485474, "learning_rate": 9.777947368421054e-05, "loss": 0.4623, "step": 28452 }, { "epoch": 1.5932915220069437, "grad_norm": 1.7639278173446655, "learning_rate": 9.77792105263158e-05, "loss": 0.3915, "step": 28453 }, { "epoch": 1.5933475193190727, "grad_norm": 1.5790356397628784, "learning_rate": 9.777894736842105e-05, "loss": 0.4792, "step": 28454 }, { "epoch": 1.5934035166312017, "grad_norm": 1.4003068208694458, "learning_rate": 9.777868421052631e-05, "loss": 0.4453, "step": 28455 }, { "epoch": 1.5934595139433307, "grad_norm": 1.4069652557373047, "learning_rate": 9.777842105263159e-05, "loss": 0.4734, "step": 28456 }, { "epoch": 1.5935155112554598, "grad_norm": 1.4430841207504272, "learning_rate": 9.777815789473685e-05, "loss": 0.4123, "step": 28457 }, { "epoch": 1.5935715085675888, "grad_norm": 1.223680019378662, "learning_rate": 9.777789473684211e-05, "loss": 0.3639, "step": 28458 }, { "epoch": 1.5936275058797178, "grad_norm": 1.3366825580596924, "learning_rate": 9.777763157894737e-05, "loss": 0.3767, "step": 28459 }, { "epoch": 1.5936835031918468, "grad_norm": 1.191957712173462, "learning_rate": 9.777736842105264e-05, "loss": 0.4411, "step": 28460 }, { "epoch": 1.5937395005039758, "grad_norm": 1.339077115058899, "learning_rate": 9.77771052631579e-05, "loss": 0.521, "step": 28461 }, { "epoch": 1.5937954978161049, "grad_norm": 1.3121421337127686, "learning_rate": 9.777684210526317e-05, "loss": 0.5048, "step": 28462 }, { "epoch": 1.5938514951282339, "grad_norm": 1.209811806678772, "learning_rate": 9.777657894736842e-05, "loss": 0.416, "step": 28463 }, { "epoch": 1.593907492440363, "grad_norm": 1.111407995223999, "learning_rate": 9.777631578947369e-05, "loss": 0.4355, "step": 28464 }, { "epoch": 1.593963489752492, "grad_norm": 1.4819329977035522, "learning_rate": 9.777605263157895e-05, "loss": 0.4893, "step": 28465 }, { "epoch": 1.594019487064621, "grad_norm": 1.4028327465057373, "learning_rate": 9.777578947368423e-05, "loss": 0.417, "step": 28466 }, { "epoch": 1.59407548437675, "grad_norm": 1.1550005674362183, "learning_rate": 9.777552631578947e-05, "loss": 0.4364, "step": 28467 }, { "epoch": 1.594131481688879, "grad_norm": 1.216228723526001, "learning_rate": 9.777526315789473e-05, "loss": 0.2954, "step": 28468 }, { "epoch": 1.594187479001008, "grad_norm": 1.1248055696487427, "learning_rate": 9.7775e-05, "loss": 0.3729, "step": 28469 }, { "epoch": 1.594243476313137, "grad_norm": 1.0551655292510986, "learning_rate": 9.777473684210526e-05, "loss": 0.3796, "step": 28470 }, { "epoch": 1.594299473625266, "grad_norm": 1.4725807905197144, "learning_rate": 9.777447368421054e-05, "loss": 0.5342, "step": 28471 }, { "epoch": 1.594355470937395, "grad_norm": 1.3018877506256104, "learning_rate": 9.777421052631578e-05, "loss": 0.5014, "step": 28472 }, { "epoch": 1.594411468249524, "grad_norm": 1.571959376335144, "learning_rate": 9.777394736842106e-05, "loss": 0.5121, "step": 28473 }, { "epoch": 1.594467465561653, "grad_norm": 1.4337856769561768, "learning_rate": 9.777368421052632e-05, "loss": 0.4807, "step": 28474 }, { "epoch": 1.594523462873782, "grad_norm": 1.2302309274673462, "learning_rate": 9.777342105263159e-05, "loss": 0.4135, "step": 28475 }, { "epoch": 1.5945794601859111, "grad_norm": 1.445213794708252, "learning_rate": 9.777315789473685e-05, "loss": 0.5878, "step": 28476 }, { "epoch": 1.5946354574980401, "grad_norm": 1.50583016872406, "learning_rate": 9.777289473684211e-05, "loss": 0.4031, "step": 28477 }, { "epoch": 1.5946914548101692, "grad_norm": 1.5212727785110474, "learning_rate": 9.777263157894737e-05, "loss": 0.4732, "step": 28478 }, { "epoch": 1.5947474521222982, "grad_norm": 1.2776482105255127, "learning_rate": 9.777236842105264e-05, "loss": 0.4414, "step": 28479 }, { "epoch": 1.5948034494344272, "grad_norm": 1.2676594257354736, "learning_rate": 9.77721052631579e-05, "loss": 0.5139, "step": 28480 }, { "epoch": 1.5948594467465562, "grad_norm": 1.4428002834320068, "learning_rate": 9.777184210526316e-05, "loss": 0.4788, "step": 28481 }, { "epoch": 1.5949154440586852, "grad_norm": 1.3846535682678223, "learning_rate": 9.777157894736842e-05, "loss": 0.5191, "step": 28482 }, { "epoch": 1.5949714413708143, "grad_norm": 1.5444915294647217, "learning_rate": 9.77713157894737e-05, "loss": 0.5192, "step": 28483 }, { "epoch": 1.5950274386829433, "grad_norm": 1.2789089679718018, "learning_rate": 9.777105263157896e-05, "loss": 0.4586, "step": 28484 }, { "epoch": 1.5950834359950723, "grad_norm": 1.6008487939834595, "learning_rate": 9.777078947368421e-05, "loss": 0.5281, "step": 28485 }, { "epoch": 1.5951394333072013, "grad_norm": 1.1789329051971436, "learning_rate": 9.777052631578947e-05, "loss": 0.4533, "step": 28486 }, { "epoch": 1.5951954306193303, "grad_norm": 1.3337887525558472, "learning_rate": 9.777026315789473e-05, "loss": 0.4568, "step": 28487 }, { "epoch": 1.5952514279314594, "grad_norm": 1.1670817136764526, "learning_rate": 9.777000000000001e-05, "loss": 0.4196, "step": 28488 }, { "epoch": 1.5953074252435884, "grad_norm": 1.2730642557144165, "learning_rate": 9.776973684210527e-05, "loss": 0.3497, "step": 28489 }, { "epoch": 1.5953634225557174, "grad_norm": 1.3642950057983398, "learning_rate": 9.776947368421053e-05, "loss": 0.4985, "step": 28490 }, { "epoch": 1.5954194198678464, "grad_norm": 1.2098121643066406, "learning_rate": 9.776921052631579e-05, "loss": 0.6366, "step": 28491 }, { "epoch": 1.5954754171799754, "grad_norm": 1.8024334907531738, "learning_rate": 9.776894736842106e-05, "loss": 0.4126, "step": 28492 }, { "epoch": 1.5955314144921044, "grad_norm": 1.2786428928375244, "learning_rate": 9.776868421052632e-05, "loss": 0.5756, "step": 28493 }, { "epoch": 1.5955874118042335, "grad_norm": 4.292102813720703, "learning_rate": 9.776842105263159e-05, "loss": 0.548, "step": 28494 }, { "epoch": 1.5956434091163625, "grad_norm": 1.196244239807129, "learning_rate": 9.776815789473684e-05, "loss": 0.3955, "step": 28495 }, { "epoch": 1.5956994064284915, "grad_norm": 1.4397553205490112, "learning_rate": 9.776789473684211e-05, "loss": 0.5208, "step": 28496 }, { "epoch": 1.5957554037406205, "grad_norm": 1.1940295696258545, "learning_rate": 9.776763157894737e-05, "loss": 0.3456, "step": 28497 }, { "epoch": 1.5958114010527495, "grad_norm": 1.2804096937179565, "learning_rate": 9.776736842105265e-05, "loss": 0.4411, "step": 28498 }, { "epoch": 1.5958673983648786, "grad_norm": 2.038516044616699, "learning_rate": 9.77671052631579e-05, "loss": 0.3775, "step": 28499 }, { "epoch": 1.5959233956770076, "grad_norm": 1.3518465757369995, "learning_rate": 9.776684210526316e-05, "loss": 0.4528, "step": 28500 }, { "epoch": 1.5959793929891366, "grad_norm": 1.3068658113479614, "learning_rate": 9.776657894736842e-05, "loss": 0.4894, "step": 28501 }, { "epoch": 1.5960353903012656, "grad_norm": 1.3878062963485718, "learning_rate": 9.776631578947368e-05, "loss": 0.4538, "step": 28502 }, { "epoch": 1.5960913876133946, "grad_norm": 1.7153552770614624, "learning_rate": 9.776605263157896e-05, "loss": 0.5061, "step": 28503 }, { "epoch": 1.5961473849255237, "grad_norm": 1.4197717905044556, "learning_rate": 9.776578947368422e-05, "loss": 0.5573, "step": 28504 }, { "epoch": 1.5962033822376527, "grad_norm": 1.4265944957733154, "learning_rate": 9.776552631578948e-05, "loss": 0.5851, "step": 28505 }, { "epoch": 1.5962593795497817, "grad_norm": 1.3374272584915161, "learning_rate": 9.776526315789474e-05, "loss": 0.334, "step": 28506 }, { "epoch": 1.5963153768619107, "grad_norm": 1.7477167844772339, "learning_rate": 9.776500000000001e-05, "loss": 0.6123, "step": 28507 }, { "epoch": 1.5963713741740397, "grad_norm": 1.1946483850479126, "learning_rate": 9.776473684210527e-05, "loss": 0.4356, "step": 28508 }, { "epoch": 1.5964273714861688, "grad_norm": 1.7405896186828613, "learning_rate": 9.776447368421053e-05, "loss": 0.4198, "step": 28509 }, { "epoch": 1.5964833687982978, "grad_norm": 1.3562170267105103, "learning_rate": 9.776421052631579e-05, "loss": 0.3167, "step": 28510 }, { "epoch": 1.5965393661104268, "grad_norm": 1.243408203125, "learning_rate": 9.776394736842106e-05, "loss": 0.5274, "step": 28511 }, { "epoch": 1.5965953634225558, "grad_norm": 2.603712320327759, "learning_rate": 9.776368421052632e-05, "loss": 0.4354, "step": 28512 }, { "epoch": 1.5966513607346848, "grad_norm": 1.4916447401046753, "learning_rate": 9.776342105263158e-05, "loss": 0.4733, "step": 28513 }, { "epoch": 1.5967073580468139, "grad_norm": 1.0743104219436646, "learning_rate": 9.776315789473684e-05, "loss": 0.3415, "step": 28514 }, { "epoch": 1.5967633553589429, "grad_norm": 1.333001971244812, "learning_rate": 9.776289473684212e-05, "loss": 0.6249, "step": 28515 }, { "epoch": 1.596819352671072, "grad_norm": 1.2960251569747925, "learning_rate": 9.776263157894737e-05, "loss": 0.4857, "step": 28516 }, { "epoch": 1.596875349983201, "grad_norm": 1.4187594652175903, "learning_rate": 9.776236842105265e-05, "loss": 0.5072, "step": 28517 }, { "epoch": 1.59693134729533, "grad_norm": 0.9220099449157715, "learning_rate": 9.77621052631579e-05, "loss": 0.3494, "step": 28518 }, { "epoch": 1.596987344607459, "grad_norm": 1.3434042930603027, "learning_rate": 9.776184210526315e-05, "loss": 0.4417, "step": 28519 }, { "epoch": 1.597043341919588, "grad_norm": 1.4903477430343628, "learning_rate": 9.776157894736843e-05, "loss": 0.5027, "step": 28520 }, { "epoch": 1.597099339231717, "grad_norm": 1.5842574834823608, "learning_rate": 9.776131578947369e-05, "loss": 0.688, "step": 28521 }, { "epoch": 1.597155336543846, "grad_norm": 1.6226140260696411, "learning_rate": 9.776105263157895e-05, "loss": 0.4676, "step": 28522 }, { "epoch": 1.597211333855975, "grad_norm": 1.1888446807861328, "learning_rate": 9.77607894736842e-05, "loss": 0.4191, "step": 28523 }, { "epoch": 1.597267331168104, "grad_norm": 1.2289389371871948, "learning_rate": 9.776052631578948e-05, "loss": 0.4819, "step": 28524 }, { "epoch": 1.597323328480233, "grad_norm": 1.3530793190002441, "learning_rate": 9.776026315789474e-05, "loss": 0.5033, "step": 28525 }, { "epoch": 1.597379325792362, "grad_norm": 1.10776948928833, "learning_rate": 9.776000000000001e-05, "loss": 0.3494, "step": 28526 }, { "epoch": 1.597435323104491, "grad_norm": 1.1916927099227905, "learning_rate": 9.775973684210526e-05, "loss": 0.4193, "step": 28527 }, { "epoch": 1.5974913204166201, "grad_norm": 1.2298662662506104, "learning_rate": 9.775947368421053e-05, "loss": 0.418, "step": 28528 }, { "epoch": 1.5975473177287491, "grad_norm": 1.2551629543304443, "learning_rate": 9.775921052631579e-05, "loss": 0.3814, "step": 28529 }, { "epoch": 1.5976033150408782, "grad_norm": 1.444276213645935, "learning_rate": 9.775894736842107e-05, "loss": 0.5472, "step": 28530 }, { "epoch": 1.5976593123530072, "grad_norm": 1.3685768842697144, "learning_rate": 9.775868421052632e-05, "loss": 0.6566, "step": 28531 }, { "epoch": 1.5977153096651362, "grad_norm": 1.209527850151062, "learning_rate": 9.775842105263158e-05, "loss": 0.6138, "step": 28532 }, { "epoch": 1.5977713069772652, "grad_norm": 1.4719892740249634, "learning_rate": 9.775815789473684e-05, "loss": 0.453, "step": 28533 }, { "epoch": 1.5978273042893942, "grad_norm": 1.3141635656356812, "learning_rate": 9.775789473684212e-05, "loss": 0.4432, "step": 28534 }, { "epoch": 1.5978833016015233, "grad_norm": 1.5745213031768799, "learning_rate": 9.775763157894738e-05, "loss": 0.5168, "step": 28535 }, { "epoch": 1.5979392989136523, "grad_norm": 1.1044379472732544, "learning_rate": 9.775736842105264e-05, "loss": 0.4065, "step": 28536 }, { "epoch": 1.5979952962257813, "grad_norm": 1.2080252170562744, "learning_rate": 9.77571052631579e-05, "loss": 0.4802, "step": 28537 }, { "epoch": 1.5980512935379103, "grad_norm": 1.4356034994125366, "learning_rate": 9.775684210526316e-05, "loss": 0.383, "step": 28538 }, { "epoch": 1.5981072908500393, "grad_norm": 1.205481767654419, "learning_rate": 9.775657894736843e-05, "loss": 0.3951, "step": 28539 }, { "epoch": 1.5981632881621683, "grad_norm": 2.1522939205169678, "learning_rate": 9.775631578947369e-05, "loss": 0.5754, "step": 28540 }, { "epoch": 1.5982192854742974, "grad_norm": 1.5407639741897583, "learning_rate": 9.775605263157895e-05, "loss": 0.5231, "step": 28541 }, { "epoch": 1.5982752827864264, "grad_norm": 1.5534600019454956, "learning_rate": 9.775578947368421e-05, "loss": 0.4517, "step": 28542 }, { "epoch": 1.5983312800985554, "grad_norm": 1.257481575012207, "learning_rate": 9.775552631578948e-05, "loss": 0.5159, "step": 28543 }, { "epoch": 1.5983872774106844, "grad_norm": 1.5003831386566162, "learning_rate": 9.775526315789474e-05, "loss": 0.4466, "step": 28544 }, { "epoch": 1.5984432747228134, "grad_norm": 1.46065354347229, "learning_rate": 9.7755e-05, "loss": 0.517, "step": 28545 }, { "epoch": 1.5984992720349425, "grad_norm": 1.3138266801834106, "learning_rate": 9.775473684210526e-05, "loss": 0.4196, "step": 28546 }, { "epoch": 1.5985552693470715, "grad_norm": 8.69722843170166, "learning_rate": 9.775447368421053e-05, "loss": 0.4998, "step": 28547 }, { "epoch": 1.5986112666592005, "grad_norm": 1.264536738395691, "learning_rate": 9.77542105263158e-05, "loss": 0.4926, "step": 28548 }, { "epoch": 1.5986672639713295, "grad_norm": 1.1531795263290405, "learning_rate": 9.775394736842107e-05, "loss": 0.4186, "step": 28549 }, { "epoch": 1.5987232612834585, "grad_norm": 1.7917656898498535, "learning_rate": 9.775368421052631e-05, "loss": 0.4706, "step": 28550 }, { "epoch": 1.5987792585955876, "grad_norm": 1.5780102014541626, "learning_rate": 9.775342105263159e-05, "loss": 0.4027, "step": 28551 }, { "epoch": 1.5988352559077166, "grad_norm": 1.246495246887207, "learning_rate": 9.775315789473685e-05, "loss": 0.5135, "step": 28552 }, { "epoch": 1.5988912532198456, "grad_norm": 1.2440063953399658, "learning_rate": 9.77528947368421e-05, "loss": 0.4332, "step": 28553 }, { "epoch": 1.5989472505319746, "grad_norm": 1.3712800741195679, "learning_rate": 9.775263157894738e-05, "loss": 0.4079, "step": 28554 }, { "epoch": 1.5990032478441036, "grad_norm": 1.4920446872711182, "learning_rate": 9.775236842105263e-05, "loss": 0.3994, "step": 28555 }, { "epoch": 1.5990592451562327, "grad_norm": 1.6190557479858398, "learning_rate": 9.77521052631579e-05, "loss": 0.4627, "step": 28556 }, { "epoch": 1.5991152424683617, "grad_norm": 1.5346342325210571, "learning_rate": 9.775184210526316e-05, "loss": 0.4299, "step": 28557 }, { "epoch": 1.5991712397804907, "grad_norm": 1.3129130601882935, "learning_rate": 9.775157894736843e-05, "loss": 0.5504, "step": 28558 }, { "epoch": 1.5992272370926197, "grad_norm": 1.2687749862670898, "learning_rate": 9.775131578947368e-05, "loss": 0.5004, "step": 28559 }, { "epoch": 1.5992832344047487, "grad_norm": 1.371648907661438, "learning_rate": 9.775105263157895e-05, "loss": 0.4729, "step": 28560 }, { "epoch": 1.5993392317168778, "grad_norm": 1.2943694591522217, "learning_rate": 9.775078947368421e-05, "loss": 0.6238, "step": 28561 }, { "epoch": 1.5993952290290068, "grad_norm": 1.118265151977539, "learning_rate": 9.775052631578948e-05, "loss": 0.3792, "step": 28562 }, { "epoch": 1.5994512263411358, "grad_norm": 1.4891771078109741, "learning_rate": 9.775026315789474e-05, "loss": 0.4695, "step": 28563 }, { "epoch": 1.5995072236532648, "grad_norm": 1.7308967113494873, "learning_rate": 9.775e-05, "loss": 0.4526, "step": 28564 }, { "epoch": 1.5995632209653936, "grad_norm": 1.083701252937317, "learning_rate": 9.774973684210526e-05, "loss": 0.3322, "step": 28565 }, { "epoch": 1.5996192182775226, "grad_norm": 1.39650559425354, "learning_rate": 9.774947368421054e-05, "loss": 0.3853, "step": 28566 }, { "epoch": 1.5996752155896516, "grad_norm": 1.2509013414382935, "learning_rate": 9.77492105263158e-05, "loss": 0.4567, "step": 28567 }, { "epoch": 1.5997312129017807, "grad_norm": 1.6539342403411865, "learning_rate": 9.774894736842106e-05, "loss": 0.5734, "step": 28568 }, { "epoch": 1.5997872102139097, "grad_norm": 1.1319507360458374, "learning_rate": 9.774868421052632e-05, "loss": 0.3607, "step": 28569 }, { "epoch": 1.5998432075260387, "grad_norm": 1.6434319019317627, "learning_rate": 9.774842105263159e-05, "loss": 0.4695, "step": 28570 }, { "epoch": 1.5998992048381677, "grad_norm": 1.5648937225341797, "learning_rate": 9.774815789473685e-05, "loss": 0.6171, "step": 28571 }, { "epoch": 1.5999552021502967, "grad_norm": 1.357996940612793, "learning_rate": 9.774789473684211e-05, "loss": 0.3983, "step": 28572 }, { "epoch": 1.6000111994624258, "grad_norm": 1.4389029741287231, "learning_rate": 9.774763157894737e-05, "loss": 0.4872, "step": 28573 }, { "epoch": 1.6000671967745548, "grad_norm": 1.4677770137786865, "learning_rate": 9.774736842105263e-05, "loss": 0.526, "step": 28574 }, { "epoch": 1.6001231940866838, "grad_norm": 1.207441806793213, "learning_rate": 9.77471052631579e-05, "loss": 0.5071, "step": 28575 }, { "epoch": 1.6001791913988128, "grad_norm": 1.2272001504898071, "learning_rate": 9.774684210526316e-05, "loss": 0.5347, "step": 28576 }, { "epoch": 1.6002351887109418, "grad_norm": 1.299938440322876, "learning_rate": 9.774657894736842e-05, "loss": 0.4146, "step": 28577 }, { "epoch": 1.6002911860230709, "grad_norm": 1.4660892486572266, "learning_rate": 9.774631578947368e-05, "loss": 0.5567, "step": 28578 }, { "epoch": 1.6003471833351999, "grad_norm": 1.6837488412857056, "learning_rate": 9.774605263157895e-05, "loss": 0.5576, "step": 28579 }, { "epoch": 1.600403180647329, "grad_norm": 1.3120943307876587, "learning_rate": 9.774578947368421e-05, "loss": 0.4611, "step": 28580 }, { "epoch": 1.600459177959458, "grad_norm": 1.2361335754394531, "learning_rate": 9.774552631578949e-05, "loss": 0.4284, "step": 28581 }, { "epoch": 1.600515175271587, "grad_norm": 1.3941798210144043, "learning_rate": 9.774526315789473e-05, "loss": 0.629, "step": 28582 }, { "epoch": 1.600571172583716, "grad_norm": 1.0770739316940308, "learning_rate": 9.774500000000001e-05, "loss": 0.479, "step": 28583 }, { "epoch": 1.600627169895845, "grad_norm": 1.4396823644638062, "learning_rate": 9.774473684210527e-05, "loss": 0.5685, "step": 28584 }, { "epoch": 1.600683167207974, "grad_norm": 1.361149549484253, "learning_rate": 9.774447368421054e-05, "loss": 0.4855, "step": 28585 }, { "epoch": 1.600739164520103, "grad_norm": 1.5855399370193481, "learning_rate": 9.77442105263158e-05, "loss": 0.5503, "step": 28586 }, { "epoch": 1.600795161832232, "grad_norm": 1.39113450050354, "learning_rate": 9.774394736842106e-05, "loss": 0.5595, "step": 28587 }, { "epoch": 1.600851159144361, "grad_norm": 1.7800246477127075, "learning_rate": 9.774368421052632e-05, "loss": 0.4223, "step": 28588 }, { "epoch": 1.60090715645649, "grad_norm": 1.1487984657287598, "learning_rate": 9.774342105263158e-05, "loss": 0.4232, "step": 28589 }, { "epoch": 1.600963153768619, "grad_norm": 2.425628900527954, "learning_rate": 9.774315789473685e-05, "loss": 0.5678, "step": 28590 }, { "epoch": 1.601019151080748, "grad_norm": 1.4537615776062012, "learning_rate": 9.774289473684211e-05, "loss": 0.5565, "step": 28591 }, { "epoch": 1.6010751483928771, "grad_norm": 1.319662094116211, "learning_rate": 9.774263157894737e-05, "loss": 0.4901, "step": 28592 }, { "epoch": 1.6011311457050061, "grad_norm": 1.9893040657043457, "learning_rate": 9.774236842105263e-05, "loss": 0.5555, "step": 28593 }, { "epoch": 1.6011871430171352, "grad_norm": 1.6523683071136475, "learning_rate": 9.77421052631579e-05, "loss": 0.4821, "step": 28594 }, { "epoch": 1.6012431403292642, "grad_norm": 1.3857654333114624, "learning_rate": 9.774184210526316e-05, "loss": 0.8286, "step": 28595 }, { "epoch": 1.6012991376413932, "grad_norm": 1.2868489027023315, "learning_rate": 9.774157894736842e-05, "loss": 0.4676, "step": 28596 }, { "epoch": 1.6013551349535222, "grad_norm": 1.4066622257232666, "learning_rate": 9.774131578947368e-05, "loss": 0.426, "step": 28597 }, { "epoch": 1.6014111322656512, "grad_norm": 1.2741644382476807, "learning_rate": 9.774105263157896e-05, "loss": 0.3972, "step": 28598 }, { "epoch": 1.6014671295777803, "grad_norm": 1.590599536895752, "learning_rate": 9.774078947368422e-05, "loss": 0.5083, "step": 28599 }, { "epoch": 1.6015231268899093, "grad_norm": 2.7208175659179688, "learning_rate": 9.774052631578948e-05, "loss": 0.6283, "step": 28600 }, { "epoch": 1.6015791242020383, "grad_norm": 1.4425791501998901, "learning_rate": 9.774026315789474e-05, "loss": 0.4716, "step": 28601 }, { "epoch": 1.6016351215141673, "grad_norm": 1.4351321458816528, "learning_rate": 9.774000000000001e-05, "loss": 0.445, "step": 28602 }, { "epoch": 1.6016911188262963, "grad_norm": 1.2157111167907715, "learning_rate": 9.773973684210527e-05, "loss": 0.3545, "step": 28603 }, { "epoch": 1.6017471161384254, "grad_norm": 1.4584052562713623, "learning_rate": 9.773947368421054e-05, "loss": 0.5592, "step": 28604 }, { "epoch": 1.6018031134505544, "grad_norm": 1.518430233001709, "learning_rate": 9.773921052631579e-05, "loss": 0.5112, "step": 28605 }, { "epoch": 1.6018591107626834, "grad_norm": 1.3541641235351562, "learning_rate": 9.773894736842105e-05, "loss": 0.4307, "step": 28606 }, { "epoch": 1.6019151080748124, "grad_norm": 1.5336047410964966, "learning_rate": 9.773868421052632e-05, "loss": 0.529, "step": 28607 }, { "epoch": 1.6019711053869414, "grad_norm": 1.3114652633666992, "learning_rate": 9.773842105263158e-05, "loss": 0.4296, "step": 28608 }, { "epoch": 1.6020271026990704, "grad_norm": 1.2435579299926758, "learning_rate": 9.773815789473685e-05, "loss": 0.435, "step": 28609 }, { "epoch": 1.6020831000111995, "grad_norm": 1.3253560066223145, "learning_rate": 9.77378947368421e-05, "loss": 0.4692, "step": 28610 }, { "epoch": 1.6021390973233285, "grad_norm": 1.359158992767334, "learning_rate": 9.773763157894737e-05, "loss": 0.4447, "step": 28611 }, { "epoch": 1.6021950946354575, "grad_norm": 1.135666012763977, "learning_rate": 9.773736842105263e-05, "loss": 0.4251, "step": 28612 }, { "epoch": 1.6022510919475865, "grad_norm": 1.4796688556671143, "learning_rate": 9.773710526315791e-05, "loss": 0.6012, "step": 28613 }, { "epoch": 1.6023070892597155, "grad_norm": 1.2790898084640503, "learning_rate": 9.773684210526315e-05, "loss": 0.5324, "step": 28614 }, { "epoch": 1.6023630865718446, "grad_norm": 1.2509945631027222, "learning_rate": 9.773657894736843e-05, "loss": 0.5256, "step": 28615 }, { "epoch": 1.6024190838839736, "grad_norm": 1.4188028573989868, "learning_rate": 9.773631578947369e-05, "loss": 0.5471, "step": 28616 }, { "epoch": 1.6024750811961026, "grad_norm": 1.5398504734039307, "learning_rate": 9.773605263157896e-05, "loss": 0.6039, "step": 28617 }, { "epoch": 1.6025310785082316, "grad_norm": 1.2628508806228638, "learning_rate": 9.773578947368422e-05, "loss": 0.3908, "step": 28618 }, { "epoch": 1.6025870758203606, "grad_norm": 1.200680136680603, "learning_rate": 9.773552631578948e-05, "loss": 0.3997, "step": 28619 }, { "epoch": 1.6026430731324897, "grad_norm": 1.3624876737594604, "learning_rate": 9.773526315789474e-05, "loss": 0.4948, "step": 28620 }, { "epoch": 1.6026990704446187, "grad_norm": 1.2947224378585815, "learning_rate": 9.773500000000001e-05, "loss": 0.4947, "step": 28621 }, { "epoch": 1.6027550677567477, "grad_norm": 1.4677554368972778, "learning_rate": 9.773473684210527e-05, "loss": 0.4754, "step": 28622 }, { "epoch": 1.6028110650688767, "grad_norm": 1.371080756187439, "learning_rate": 9.773447368421053e-05, "loss": 0.4582, "step": 28623 }, { "epoch": 1.6028670623810057, "grad_norm": 1.522147536277771, "learning_rate": 9.773421052631579e-05, "loss": 0.5, "step": 28624 }, { "epoch": 1.6029230596931348, "grad_norm": 1.4593405723571777, "learning_rate": 9.773394736842105e-05, "loss": 0.4705, "step": 28625 }, { "epoch": 1.6029790570052638, "grad_norm": 1.56641685962677, "learning_rate": 9.773368421052632e-05, "loss": 0.5664, "step": 28626 }, { "epoch": 1.6030350543173928, "grad_norm": 1.2927019596099854, "learning_rate": 9.773342105263158e-05, "loss": 0.4037, "step": 28627 }, { "epoch": 1.6030910516295218, "grad_norm": 1.29274320602417, "learning_rate": 9.773315789473684e-05, "loss": 0.4363, "step": 28628 }, { "epoch": 1.6031470489416508, "grad_norm": 1.2716351747512817, "learning_rate": 9.77328947368421e-05, "loss": 0.457, "step": 28629 }, { "epoch": 1.6032030462537799, "grad_norm": 1.4632830619812012, "learning_rate": 9.773263157894738e-05, "loss": 0.4265, "step": 28630 }, { "epoch": 1.6032590435659089, "grad_norm": 1.4021910429000854, "learning_rate": 9.773236842105264e-05, "loss": 0.4463, "step": 28631 }, { "epoch": 1.603315040878038, "grad_norm": 1.2475794553756714, "learning_rate": 9.77321052631579e-05, "loss": 0.5772, "step": 28632 }, { "epoch": 1.603371038190167, "grad_norm": 1.1905592679977417, "learning_rate": 9.773184210526316e-05, "loss": 0.3985, "step": 28633 }, { "epoch": 1.603427035502296, "grad_norm": 3.8043150901794434, "learning_rate": 9.773157894736843e-05, "loss": 0.4323, "step": 28634 }, { "epoch": 1.603483032814425, "grad_norm": 1.1995283365249634, "learning_rate": 9.773131578947369e-05, "loss": 0.3641, "step": 28635 }, { "epoch": 1.603539030126554, "grad_norm": 1.4894057512283325, "learning_rate": 9.773105263157896e-05, "loss": 0.4722, "step": 28636 }, { "epoch": 1.603595027438683, "grad_norm": 1.453940987586975, "learning_rate": 9.773078947368421e-05, "loss": 0.4959, "step": 28637 }, { "epoch": 1.603651024750812, "grad_norm": 1.3902662992477417, "learning_rate": 9.773052631578948e-05, "loss": 0.5107, "step": 28638 }, { "epoch": 1.603707022062941, "grad_norm": 1.1786566972732544, "learning_rate": 9.773026315789474e-05, "loss": 0.4885, "step": 28639 }, { "epoch": 1.60376301937507, "grad_norm": 1.3461360931396484, "learning_rate": 9.773e-05, "loss": 0.4654, "step": 28640 }, { "epoch": 1.603819016687199, "grad_norm": 1.120482325553894, "learning_rate": 9.772973684210527e-05, "loss": 0.4848, "step": 28641 }, { "epoch": 1.603875013999328, "grad_norm": 2.156015396118164, "learning_rate": 9.772947368421052e-05, "loss": 0.6806, "step": 28642 }, { "epoch": 1.603931011311457, "grad_norm": 1.0594418048858643, "learning_rate": 9.77292105263158e-05, "loss": 0.3164, "step": 28643 }, { "epoch": 1.6039870086235861, "grad_norm": 1.2195169925689697, "learning_rate": 9.772894736842105e-05, "loss": 0.4343, "step": 28644 }, { "epoch": 1.6040430059357151, "grad_norm": 1.37152898311615, "learning_rate": 9.772868421052633e-05, "loss": 0.4588, "step": 28645 }, { "epoch": 1.6040990032478442, "grad_norm": 1.2625083923339844, "learning_rate": 9.772842105263159e-05, "loss": 0.4527, "step": 28646 }, { "epoch": 1.6041550005599732, "grad_norm": 1.5454283952713013, "learning_rate": 9.772815789473685e-05, "loss": 0.6302, "step": 28647 }, { "epoch": 1.604210997872102, "grad_norm": 1.2913886308670044, "learning_rate": 9.77278947368421e-05, "loss": 0.5043, "step": 28648 }, { "epoch": 1.604266995184231, "grad_norm": 1.4128942489624023, "learning_rate": 9.772763157894738e-05, "loss": 0.4755, "step": 28649 }, { "epoch": 1.60432299249636, "grad_norm": 1.3026320934295654, "learning_rate": 9.772736842105264e-05, "loss": 0.4333, "step": 28650 }, { "epoch": 1.604378989808489, "grad_norm": 1.2849268913269043, "learning_rate": 9.77271052631579e-05, "loss": 0.5542, "step": 28651 }, { "epoch": 1.604434987120618, "grad_norm": 1.3272056579589844, "learning_rate": 9.772684210526316e-05, "loss": 0.3359, "step": 28652 }, { "epoch": 1.604490984432747, "grad_norm": 1.3442047834396362, "learning_rate": 9.772657894736843e-05, "loss": 0.4682, "step": 28653 }, { "epoch": 1.604546981744876, "grad_norm": 1.4577223062515259, "learning_rate": 9.772631578947369e-05, "loss": 0.4763, "step": 28654 }, { "epoch": 1.604602979057005, "grad_norm": 1.1649953126907349, "learning_rate": 9.772605263157895e-05, "loss": 0.4263, "step": 28655 }, { "epoch": 1.6046589763691341, "grad_norm": 1.4000461101531982, "learning_rate": 9.772578947368421e-05, "loss": 0.4431, "step": 28656 }, { "epoch": 1.6047149736812631, "grad_norm": 1.293175220489502, "learning_rate": 9.772552631578947e-05, "loss": 0.4242, "step": 28657 }, { "epoch": 1.6047709709933922, "grad_norm": 1.503591537475586, "learning_rate": 9.772526315789474e-05, "loss": 0.5596, "step": 28658 }, { "epoch": 1.6048269683055212, "grad_norm": 1.1230645179748535, "learning_rate": 9.7725e-05, "loss": 0.3241, "step": 28659 }, { "epoch": 1.6048829656176502, "grad_norm": 1.2284173965454102, "learning_rate": 9.772473684210526e-05, "loss": 0.334, "step": 28660 }, { "epoch": 1.6049389629297792, "grad_norm": 1.3745721578598022, "learning_rate": 9.772447368421052e-05, "loss": 0.3021, "step": 28661 }, { "epoch": 1.6049949602419082, "grad_norm": 1.3472864627838135, "learning_rate": 9.77242105263158e-05, "loss": 0.4249, "step": 28662 }, { "epoch": 1.6050509575540373, "grad_norm": 1.5999746322631836, "learning_rate": 9.772394736842106e-05, "loss": 0.5218, "step": 28663 }, { "epoch": 1.6051069548661663, "grad_norm": 2.2409257888793945, "learning_rate": 9.772368421052633e-05, "loss": 0.6774, "step": 28664 }, { "epoch": 1.6051629521782953, "grad_norm": 1.3826230764389038, "learning_rate": 9.772342105263158e-05, "loss": 0.4914, "step": 28665 }, { "epoch": 1.6052189494904243, "grad_norm": 1.2145477533340454, "learning_rate": 9.772315789473685e-05, "loss": 0.3978, "step": 28666 }, { "epoch": 1.6052749468025533, "grad_norm": 1.286562204360962, "learning_rate": 9.772289473684211e-05, "loss": 0.577, "step": 28667 }, { "epoch": 1.6053309441146824, "grad_norm": 1.5623323917388916, "learning_rate": 9.772263157894738e-05, "loss": 0.4105, "step": 28668 }, { "epoch": 1.6053869414268114, "grad_norm": 1.4411369562149048, "learning_rate": 9.772236842105263e-05, "loss": 0.4962, "step": 28669 }, { "epoch": 1.6054429387389404, "grad_norm": 1.319985270500183, "learning_rate": 9.77221052631579e-05, "loss": 0.6363, "step": 28670 }, { "epoch": 1.6054989360510694, "grad_norm": 1.4259133338928223, "learning_rate": 9.772184210526316e-05, "loss": 0.4932, "step": 28671 }, { "epoch": 1.6055549333631984, "grad_norm": 1.8027509450912476, "learning_rate": 9.772157894736843e-05, "loss": 0.4294, "step": 28672 }, { "epoch": 1.6056109306753275, "grad_norm": 1.3412691354751587, "learning_rate": 9.77213157894737e-05, "loss": 0.4014, "step": 28673 }, { "epoch": 1.6056669279874565, "grad_norm": 1.2715134620666504, "learning_rate": 9.772105263157894e-05, "loss": 0.3943, "step": 28674 }, { "epoch": 1.6057229252995855, "grad_norm": 1.5118589401245117, "learning_rate": 9.772078947368421e-05, "loss": 0.5511, "step": 28675 }, { "epoch": 1.6057789226117145, "grad_norm": 1.4125322103500366, "learning_rate": 9.772052631578947e-05, "loss": 0.4525, "step": 28676 }, { "epoch": 1.6058349199238435, "grad_norm": 1.3682007789611816, "learning_rate": 9.772026315789475e-05, "loss": 0.6081, "step": 28677 }, { "epoch": 1.6058909172359725, "grad_norm": 1.3445745706558228, "learning_rate": 9.772e-05, "loss": 0.4932, "step": 28678 }, { "epoch": 1.6059469145481016, "grad_norm": 3.1188995838165283, "learning_rate": 9.771973684210527e-05, "loss": 0.5, "step": 28679 }, { "epoch": 1.6060029118602306, "grad_norm": 1.4412713050842285, "learning_rate": 9.771947368421053e-05, "loss": 0.4545, "step": 28680 }, { "epoch": 1.6060589091723596, "grad_norm": 1.889561653137207, "learning_rate": 9.77192105263158e-05, "loss": 0.5177, "step": 28681 }, { "epoch": 1.6061149064844886, "grad_norm": 1.3179881572723389, "learning_rate": 9.771894736842106e-05, "loss": 0.5502, "step": 28682 }, { "epoch": 1.6061709037966176, "grad_norm": 1.4670768976211548, "learning_rate": 9.771868421052632e-05, "loss": 0.3974, "step": 28683 }, { "epoch": 1.6062269011087467, "grad_norm": 1.2923095226287842, "learning_rate": 9.771842105263158e-05, "loss": 0.6051, "step": 28684 }, { "epoch": 1.6062828984208757, "grad_norm": 1.2992335557937622, "learning_rate": 9.771815789473685e-05, "loss": 0.5566, "step": 28685 }, { "epoch": 1.6063388957330047, "grad_norm": 1.779698371887207, "learning_rate": 9.771789473684211e-05, "loss": 0.5703, "step": 28686 }, { "epoch": 1.6063948930451337, "grad_norm": 1.2247023582458496, "learning_rate": 9.771763157894737e-05, "loss": 0.4534, "step": 28687 }, { "epoch": 1.6064508903572627, "grad_norm": 1.4279910326004028, "learning_rate": 9.771736842105263e-05, "loss": 0.4967, "step": 28688 }, { "epoch": 1.6065068876693918, "grad_norm": 1.3476687669754028, "learning_rate": 9.77171052631579e-05, "loss": 0.6584, "step": 28689 }, { "epoch": 1.6065628849815208, "grad_norm": 1.3156377077102661, "learning_rate": 9.771684210526316e-05, "loss": 0.5392, "step": 28690 }, { "epoch": 1.6066188822936498, "grad_norm": 1.3912566900253296, "learning_rate": 9.771657894736844e-05, "loss": 0.4671, "step": 28691 }, { "epoch": 1.6066748796057788, "grad_norm": 1.198332667350769, "learning_rate": 9.771631578947368e-05, "loss": 0.3925, "step": 28692 }, { "epoch": 1.6067308769179078, "grad_norm": 1.2896145582199097, "learning_rate": 9.771605263157894e-05, "loss": 0.4474, "step": 28693 }, { "epoch": 1.6067868742300369, "grad_norm": 1.2734466791152954, "learning_rate": 9.771578947368422e-05, "loss": 0.4353, "step": 28694 }, { "epoch": 1.6068428715421659, "grad_norm": 1.3854820728302002, "learning_rate": 9.771552631578948e-05, "loss": 0.4362, "step": 28695 }, { "epoch": 1.606898868854295, "grad_norm": 1.2498865127563477, "learning_rate": 9.771526315789475e-05, "loss": 0.4349, "step": 28696 }, { "epoch": 1.606954866166424, "grad_norm": 1.5366005897521973, "learning_rate": 9.7715e-05, "loss": 0.533, "step": 28697 }, { "epoch": 1.607010863478553, "grad_norm": 1.1614415645599365, "learning_rate": 9.771473684210527e-05, "loss": 0.5351, "step": 28698 }, { "epoch": 1.607066860790682, "grad_norm": 1.7099281549453735, "learning_rate": 9.771447368421053e-05, "loss": 0.6309, "step": 28699 }, { "epoch": 1.607122858102811, "grad_norm": 1.2391961812973022, "learning_rate": 9.77142105263158e-05, "loss": 0.5167, "step": 28700 }, { "epoch": 1.60717885541494, "grad_norm": 1.4294838905334473, "learning_rate": 9.771394736842106e-05, "loss": 0.4542, "step": 28701 }, { "epoch": 1.607234852727069, "grad_norm": 0.9986810684204102, "learning_rate": 9.771368421052632e-05, "loss": 0.318, "step": 28702 }, { "epoch": 1.607290850039198, "grad_norm": 1.23465895652771, "learning_rate": 9.771342105263158e-05, "loss": 0.5482, "step": 28703 }, { "epoch": 1.607346847351327, "grad_norm": 1.420244574546814, "learning_rate": 9.771315789473685e-05, "loss": 0.4165, "step": 28704 }, { "epoch": 1.607402844663456, "grad_norm": 1.8302278518676758, "learning_rate": 9.771289473684211e-05, "loss": 0.5376, "step": 28705 }, { "epoch": 1.607458841975585, "grad_norm": 1.2272710800170898, "learning_rate": 9.771263157894737e-05, "loss": 0.4202, "step": 28706 }, { "epoch": 1.607514839287714, "grad_norm": 1.1919933557510376, "learning_rate": 9.771236842105263e-05, "loss": 0.4382, "step": 28707 }, { "epoch": 1.6075708365998431, "grad_norm": 1.2344907522201538, "learning_rate": 9.77121052631579e-05, "loss": 0.3364, "step": 28708 }, { "epoch": 1.6076268339119721, "grad_norm": 1.7100778818130493, "learning_rate": 9.771184210526317e-05, "loss": 0.5967, "step": 28709 }, { "epoch": 1.6076828312241012, "grad_norm": 1.5475964546203613, "learning_rate": 9.771157894736843e-05, "loss": 0.5107, "step": 28710 }, { "epoch": 1.6077388285362302, "grad_norm": 1.8974214792251587, "learning_rate": 9.771131578947369e-05, "loss": 0.7437, "step": 28711 }, { "epoch": 1.6077948258483592, "grad_norm": 1.195861577987671, "learning_rate": 9.771105263157895e-05, "loss": 0.3528, "step": 28712 }, { "epoch": 1.6078508231604882, "grad_norm": 1.2650530338287354, "learning_rate": 9.771078947368422e-05, "loss": 0.3817, "step": 28713 }, { "epoch": 1.6079068204726172, "grad_norm": 1.4194986820220947, "learning_rate": 9.771052631578948e-05, "loss": 0.509, "step": 28714 }, { "epoch": 1.6079628177847463, "grad_norm": 1.1955994367599487, "learning_rate": 9.771026315789474e-05, "loss": 0.4528, "step": 28715 }, { "epoch": 1.6080188150968753, "grad_norm": 1.1781166791915894, "learning_rate": 9.771e-05, "loss": 0.5074, "step": 28716 }, { "epoch": 1.6080748124090043, "grad_norm": 1.5795073509216309, "learning_rate": 9.770973684210527e-05, "loss": 0.3698, "step": 28717 }, { "epoch": 1.6081308097211333, "grad_norm": 1.37860107421875, "learning_rate": 9.770947368421053e-05, "loss": 0.3894, "step": 28718 }, { "epoch": 1.6081868070332623, "grad_norm": 1.3968303203582764, "learning_rate": 9.77092105263158e-05, "loss": 0.5085, "step": 28719 }, { "epoch": 1.6082428043453914, "grad_norm": 1.8028346300125122, "learning_rate": 9.770894736842105e-05, "loss": 0.4945, "step": 28720 }, { "epoch": 1.6082988016575204, "grad_norm": 1.4722049236297607, "learning_rate": 9.770868421052632e-05, "loss": 0.5267, "step": 28721 }, { "epoch": 1.6083547989696494, "grad_norm": 1.2737699747085571, "learning_rate": 9.770842105263158e-05, "loss": 0.5085, "step": 28722 }, { "epoch": 1.6084107962817784, "grad_norm": 1.464971661567688, "learning_rate": 9.770815789473686e-05, "loss": 0.5654, "step": 28723 }, { "epoch": 1.6084667935939074, "grad_norm": 1.0730468034744263, "learning_rate": 9.77078947368421e-05, "loss": 0.3335, "step": 28724 }, { "epoch": 1.6085227909060364, "grad_norm": 1.6783322095870972, "learning_rate": 9.770763157894738e-05, "loss": 0.5438, "step": 28725 }, { "epoch": 1.6085787882181655, "grad_norm": 1.5127896070480347, "learning_rate": 9.770736842105264e-05, "loss": 0.4101, "step": 28726 }, { "epoch": 1.6086347855302945, "grad_norm": 1.3831170797348022, "learning_rate": 9.77071052631579e-05, "loss": 0.529, "step": 28727 }, { "epoch": 1.6086907828424235, "grad_norm": 1.2516558170318604, "learning_rate": 9.770684210526317e-05, "loss": 0.5008, "step": 28728 }, { "epoch": 1.6087467801545525, "grad_norm": 1.1753110885620117, "learning_rate": 9.770657894736842e-05, "loss": 0.4279, "step": 28729 }, { "epoch": 1.6088027774666815, "grad_norm": 1.1439446210861206, "learning_rate": 9.770631578947369e-05, "loss": 0.4876, "step": 28730 }, { "epoch": 1.6088587747788106, "grad_norm": 1.350986361503601, "learning_rate": 9.770605263157895e-05, "loss": 0.4439, "step": 28731 }, { "epoch": 1.6089147720909396, "grad_norm": 1.4621875286102295, "learning_rate": 9.770578947368422e-05, "loss": 0.4695, "step": 28732 }, { "epoch": 1.6089707694030686, "grad_norm": 1.6829105615615845, "learning_rate": 9.770552631578948e-05, "loss": 0.4594, "step": 28733 }, { "epoch": 1.6090267667151976, "grad_norm": 1.4435217380523682, "learning_rate": 9.770526315789474e-05, "loss": 0.4287, "step": 28734 }, { "epoch": 1.6090827640273266, "grad_norm": 1.356022834777832, "learning_rate": 9.7705e-05, "loss": 0.3338, "step": 28735 }, { "epoch": 1.6091387613394557, "grad_norm": 1.1180651187896729, "learning_rate": 9.770473684210527e-05, "loss": 0.3526, "step": 28736 }, { "epoch": 1.6091947586515847, "grad_norm": 1.066878318786621, "learning_rate": 9.770447368421053e-05, "loss": 0.3508, "step": 28737 }, { "epoch": 1.6092507559637137, "grad_norm": 1.2516587972640991, "learning_rate": 9.77042105263158e-05, "loss": 0.3417, "step": 28738 }, { "epoch": 1.6093067532758427, "grad_norm": 1.305559754371643, "learning_rate": 9.770394736842105e-05, "loss": 0.4857, "step": 28739 }, { "epoch": 1.6093627505879717, "grad_norm": 3.1608378887176514, "learning_rate": 9.770368421052633e-05, "loss": 0.3657, "step": 28740 }, { "epoch": 1.6094187479001008, "grad_norm": 1.6207187175750732, "learning_rate": 9.770342105263159e-05, "loss": 0.5124, "step": 28741 }, { "epoch": 1.6094747452122298, "grad_norm": 1.2724627256393433, "learning_rate": 9.770315789473685e-05, "loss": 0.489, "step": 28742 }, { "epoch": 1.6095307425243588, "grad_norm": 1.23508620262146, "learning_rate": 9.77028947368421e-05, "loss": 0.4922, "step": 28743 }, { "epoch": 1.6095867398364878, "grad_norm": 1.3437213897705078, "learning_rate": 9.770263157894737e-05, "loss": 0.4651, "step": 28744 }, { "epoch": 1.6096427371486168, "grad_norm": 1.3021752834320068, "learning_rate": 9.770236842105264e-05, "loss": 0.4908, "step": 28745 }, { "epoch": 1.6096987344607459, "grad_norm": 1.061396837234497, "learning_rate": 9.77021052631579e-05, "loss": 0.3948, "step": 28746 }, { "epoch": 1.6097547317728749, "grad_norm": 1.5615110397338867, "learning_rate": 9.770184210526316e-05, "loss": 0.5421, "step": 28747 }, { "epoch": 1.6098107290850039, "grad_norm": 1.4910013675689697, "learning_rate": 9.770157894736842e-05, "loss": 0.6022, "step": 28748 }, { "epoch": 1.609866726397133, "grad_norm": 1.4547667503356934, "learning_rate": 9.770131578947369e-05, "loss": 0.5313, "step": 28749 }, { "epoch": 1.609922723709262, "grad_norm": 1.4698052406311035, "learning_rate": 9.770105263157895e-05, "loss": 0.539, "step": 28750 }, { "epoch": 1.609978721021391, "grad_norm": 1.3056294918060303, "learning_rate": 9.770078947368422e-05, "loss": 0.4185, "step": 28751 }, { "epoch": 1.61003471833352, "grad_norm": 1.2521520853042603, "learning_rate": 9.770052631578947e-05, "loss": 0.4903, "step": 28752 }, { "epoch": 1.610090715645649, "grad_norm": 1.549988031387329, "learning_rate": 9.770026315789474e-05, "loss": 0.502, "step": 28753 }, { "epoch": 1.610146712957778, "grad_norm": 1.5590424537658691, "learning_rate": 9.77e-05, "loss": 0.5469, "step": 28754 }, { "epoch": 1.610202710269907, "grad_norm": 1.0338451862335205, "learning_rate": 9.769973684210528e-05, "loss": 0.4221, "step": 28755 }, { "epoch": 1.610258707582036, "grad_norm": 1.9603238105773926, "learning_rate": 9.769947368421054e-05, "loss": 0.5875, "step": 28756 }, { "epoch": 1.610314704894165, "grad_norm": 1.5836817026138306, "learning_rate": 9.76992105263158e-05, "loss": 0.4774, "step": 28757 }, { "epoch": 1.610370702206294, "grad_norm": 1.1883763074874878, "learning_rate": 9.769894736842106e-05, "loss": 0.354, "step": 28758 }, { "epoch": 1.610426699518423, "grad_norm": 1.2072845697402954, "learning_rate": 9.769868421052633e-05, "loss": 0.3925, "step": 28759 }, { "epoch": 1.6104826968305521, "grad_norm": 1.1078659296035767, "learning_rate": 9.769842105263159e-05, "loss": 0.4503, "step": 28760 }, { "epoch": 1.6105386941426811, "grad_norm": 1.453686237335205, "learning_rate": 9.769815789473683e-05, "loss": 0.562, "step": 28761 }, { "epoch": 1.6105946914548102, "grad_norm": 1.4762941598892212, "learning_rate": 9.769789473684211e-05, "loss": 0.6003, "step": 28762 }, { "epoch": 1.6106506887669392, "grad_norm": 1.154151439666748, "learning_rate": 9.769763157894737e-05, "loss": 0.357, "step": 28763 }, { "epoch": 1.6107066860790682, "grad_norm": 1.1706411838531494, "learning_rate": 9.769736842105264e-05, "loss": 0.4488, "step": 28764 }, { "epoch": 1.6107626833911972, "grad_norm": 1.2597057819366455, "learning_rate": 9.76971052631579e-05, "loss": 0.4386, "step": 28765 }, { "epoch": 1.6108186807033262, "grad_norm": 1.1595110893249512, "learning_rate": 9.769684210526316e-05, "loss": 0.4152, "step": 28766 }, { "epoch": 1.6108746780154553, "grad_norm": 1.3331243991851807, "learning_rate": 9.769657894736842e-05, "loss": 0.5759, "step": 28767 }, { "epoch": 1.6109306753275843, "grad_norm": 1.391335129737854, "learning_rate": 9.76963157894737e-05, "loss": 0.6717, "step": 28768 }, { "epoch": 1.6109866726397133, "grad_norm": 1.2932710647583008, "learning_rate": 9.769605263157895e-05, "loss": 0.396, "step": 28769 }, { "epoch": 1.6110426699518423, "grad_norm": 1.1795724630355835, "learning_rate": 9.769578947368421e-05, "loss": 0.501, "step": 28770 }, { "epoch": 1.6110986672639713, "grad_norm": 1.2339893579483032, "learning_rate": 9.769552631578947e-05, "loss": 0.4543, "step": 28771 }, { "epoch": 1.6111546645761003, "grad_norm": 3.830420732498169, "learning_rate": 9.769526315789475e-05, "loss": 0.3997, "step": 28772 }, { "epoch": 1.6112106618882294, "grad_norm": 1.1677887439727783, "learning_rate": 9.7695e-05, "loss": 0.3785, "step": 28773 }, { "epoch": 1.6112666592003584, "grad_norm": 1.3042711019515991, "learning_rate": 9.769473684210528e-05, "loss": 0.6053, "step": 28774 }, { "epoch": 1.6113226565124874, "grad_norm": 1.1209880113601685, "learning_rate": 9.769447368421053e-05, "loss": 0.4068, "step": 28775 }, { "epoch": 1.6113786538246164, "grad_norm": 1.3490440845489502, "learning_rate": 9.76942105263158e-05, "loss": 0.5125, "step": 28776 }, { "epoch": 1.6114346511367454, "grad_norm": 1.358336329460144, "learning_rate": 9.769394736842106e-05, "loss": 0.5652, "step": 28777 }, { "epoch": 1.6114906484488745, "grad_norm": 1.1783603429794312, "learning_rate": 9.769368421052632e-05, "loss": 0.4084, "step": 28778 }, { "epoch": 1.6115466457610035, "grad_norm": 1.4164807796478271, "learning_rate": 9.769342105263158e-05, "loss": 0.5972, "step": 28779 }, { "epoch": 1.6116026430731325, "grad_norm": 1.284678339958191, "learning_rate": 9.769315789473684e-05, "loss": 0.4398, "step": 28780 }, { "epoch": 1.6116586403852615, "grad_norm": 1.2834359407424927, "learning_rate": 9.769289473684211e-05, "loss": 0.5424, "step": 28781 }, { "epoch": 1.6117146376973905, "grad_norm": 1.4140981435775757, "learning_rate": 9.769263157894737e-05, "loss": 0.4268, "step": 28782 }, { "epoch": 1.6117706350095196, "grad_norm": 1.4729633331298828, "learning_rate": 9.769236842105264e-05, "loss": 0.4791, "step": 28783 }, { "epoch": 1.6118266323216486, "grad_norm": 1.2354273796081543, "learning_rate": 9.769210526315789e-05, "loss": 0.489, "step": 28784 }, { "epoch": 1.6118826296337776, "grad_norm": 1.4806479215621948, "learning_rate": 9.769184210526316e-05, "loss": 0.4625, "step": 28785 }, { "epoch": 1.6119386269459066, "grad_norm": 1.2784717082977295, "learning_rate": 9.769157894736842e-05, "loss": 0.4417, "step": 28786 }, { "epoch": 1.6119946242580356, "grad_norm": 1.5859538316726685, "learning_rate": 9.76913157894737e-05, "loss": 0.5529, "step": 28787 }, { "epoch": 1.6120506215701647, "grad_norm": 1.3850970268249512, "learning_rate": 9.769105263157896e-05, "loss": 0.4521, "step": 28788 }, { "epoch": 1.6121066188822937, "grad_norm": 1.3167903423309326, "learning_rate": 9.769078947368422e-05, "loss": 0.4672, "step": 28789 }, { "epoch": 1.6121626161944227, "grad_norm": 1.0518642663955688, "learning_rate": 9.769052631578948e-05, "loss": 0.3206, "step": 28790 }, { "epoch": 1.6122186135065517, "grad_norm": 1.182880163192749, "learning_rate": 9.769026315789475e-05, "loss": 0.4477, "step": 28791 }, { "epoch": 1.6122746108186807, "grad_norm": 1.206493854522705, "learning_rate": 9.769000000000001e-05, "loss": 0.4195, "step": 28792 }, { "epoch": 1.6123306081308098, "grad_norm": 1.2070099115371704, "learning_rate": 9.768973684210527e-05, "loss": 0.5545, "step": 28793 }, { "epoch": 1.6123866054429388, "grad_norm": 1.3531379699707031, "learning_rate": 9.768947368421053e-05, "loss": 0.3874, "step": 28794 }, { "epoch": 1.6124426027550678, "grad_norm": 1.6257026195526123, "learning_rate": 9.768921052631579e-05, "loss": 0.4392, "step": 28795 }, { "epoch": 1.6124986000671968, "grad_norm": 1.2899463176727295, "learning_rate": 9.768894736842106e-05, "loss": 0.3819, "step": 28796 }, { "epoch": 1.6125545973793258, "grad_norm": 1.8360083103179932, "learning_rate": 9.768868421052632e-05, "loss": 0.4681, "step": 28797 }, { "epoch": 1.6126105946914548, "grad_norm": 1.408347249031067, "learning_rate": 9.768842105263158e-05, "loss": 0.4443, "step": 28798 }, { "epoch": 1.6126665920035839, "grad_norm": 16.916879653930664, "learning_rate": 9.768815789473684e-05, "loss": 0.5161, "step": 28799 }, { "epoch": 1.6127225893157129, "grad_norm": 1.405197262763977, "learning_rate": 9.768789473684211e-05, "loss": 0.4787, "step": 28800 }, { "epoch": 1.612778586627842, "grad_norm": 1.0854650735855103, "learning_rate": 9.768763157894737e-05, "loss": 0.4347, "step": 28801 }, { "epoch": 1.612834583939971, "grad_norm": 1.2529568672180176, "learning_rate": 9.768736842105263e-05, "loss": 0.419, "step": 28802 }, { "epoch": 1.6128905812521, "grad_norm": 1.1711766719818115, "learning_rate": 9.768710526315789e-05, "loss": 0.3822, "step": 28803 }, { "epoch": 1.612946578564229, "grad_norm": 1.5420488119125366, "learning_rate": 9.768684210526317e-05, "loss": 0.4751, "step": 28804 }, { "epoch": 1.613002575876358, "grad_norm": 1.1327838897705078, "learning_rate": 9.768657894736843e-05, "loss": 0.4577, "step": 28805 }, { "epoch": 1.613058573188487, "grad_norm": 1.602009892463684, "learning_rate": 9.76863157894737e-05, "loss": 0.6539, "step": 28806 }, { "epoch": 1.613114570500616, "grad_norm": 1.3847897052764893, "learning_rate": 9.768605263157894e-05, "loss": 0.5669, "step": 28807 }, { "epoch": 1.613170567812745, "grad_norm": 1.1550319194793701, "learning_rate": 9.768578947368422e-05, "loss": 0.4647, "step": 28808 }, { "epoch": 1.613226565124874, "grad_norm": 1.7570668458938599, "learning_rate": 9.768552631578948e-05, "loss": 0.4337, "step": 28809 }, { "epoch": 1.613282562437003, "grad_norm": 1.6952332258224487, "learning_rate": 9.768526315789475e-05, "loss": 0.7457, "step": 28810 }, { "epoch": 1.613338559749132, "grad_norm": 1.632666826248169, "learning_rate": 9.768500000000001e-05, "loss": 0.7305, "step": 28811 }, { "epoch": 1.6133945570612611, "grad_norm": 1.2979798316955566, "learning_rate": 9.768473684210527e-05, "loss": 0.5339, "step": 28812 }, { "epoch": 1.6134505543733901, "grad_norm": 1.3651719093322754, "learning_rate": 9.768447368421053e-05, "loss": 0.5517, "step": 28813 }, { "epoch": 1.6135065516855192, "grad_norm": 1.3211630582809448, "learning_rate": 9.768421052631579e-05, "loss": 0.48, "step": 28814 }, { "epoch": 1.6135625489976482, "grad_norm": 1.4898940324783325, "learning_rate": 9.768394736842106e-05, "loss": 0.5233, "step": 28815 }, { "epoch": 1.6136185463097772, "grad_norm": 1.2606719732284546, "learning_rate": 9.768368421052631e-05, "loss": 0.5858, "step": 28816 }, { "epoch": 1.6136745436219062, "grad_norm": 1.2960206270217896, "learning_rate": 9.768342105263158e-05, "loss": 0.4371, "step": 28817 }, { "epoch": 1.6137305409340352, "grad_norm": 1.3298046588897705, "learning_rate": 9.768315789473684e-05, "loss": 0.3939, "step": 28818 }, { "epoch": 1.6137865382461642, "grad_norm": 1.3246947526931763, "learning_rate": 9.768289473684212e-05, "loss": 0.4704, "step": 28819 }, { "epoch": 1.6138425355582933, "grad_norm": 1.300347924232483, "learning_rate": 9.768263157894738e-05, "loss": 0.4161, "step": 28820 }, { "epoch": 1.6138985328704223, "grad_norm": 1.2787528038024902, "learning_rate": 9.768236842105264e-05, "loss": 0.4934, "step": 28821 }, { "epoch": 1.6139545301825513, "grad_norm": 1.212634563446045, "learning_rate": 9.76821052631579e-05, "loss": 0.426, "step": 28822 }, { "epoch": 1.6140105274946803, "grad_norm": 1.5091456174850464, "learning_rate": 9.768184210526317e-05, "loss": 0.4461, "step": 28823 }, { "epoch": 1.6140665248068093, "grad_norm": 1.3671553134918213, "learning_rate": 9.768157894736843e-05, "loss": 0.4442, "step": 28824 }, { "epoch": 1.6141225221189384, "grad_norm": 1.0682740211486816, "learning_rate": 9.768131578947369e-05, "loss": 0.3582, "step": 28825 }, { "epoch": 1.6141785194310674, "grad_norm": 1.0848997831344604, "learning_rate": 9.768105263157895e-05, "loss": 0.3017, "step": 28826 }, { "epoch": 1.6142345167431964, "grad_norm": 1.3373178243637085, "learning_rate": 9.768078947368422e-05, "loss": 0.4526, "step": 28827 }, { "epoch": 1.6142905140553254, "grad_norm": 1.0836713314056396, "learning_rate": 9.768052631578948e-05, "loss": 0.3695, "step": 28828 }, { "epoch": 1.6143465113674544, "grad_norm": 1.3050196170806885, "learning_rate": 9.768026315789474e-05, "loss": 0.5845, "step": 28829 }, { "epoch": 1.6144025086795835, "grad_norm": 1.3665285110473633, "learning_rate": 9.768e-05, "loss": 0.4255, "step": 28830 }, { "epoch": 1.6144585059917125, "grad_norm": 1.3075628280639648, "learning_rate": 9.767973684210526e-05, "loss": 0.5029, "step": 28831 }, { "epoch": 1.6145145033038415, "grad_norm": 1.5388578176498413, "learning_rate": 9.767947368421053e-05, "loss": 0.5079, "step": 28832 }, { "epoch": 1.6145705006159705, "grad_norm": 1.5257887840270996, "learning_rate": 9.767921052631579e-05, "loss": 0.3415, "step": 28833 }, { "epoch": 1.6146264979280995, "grad_norm": 1.3473721742630005, "learning_rate": 9.767894736842105e-05, "loss": 0.5001, "step": 28834 }, { "epoch": 1.6146824952402286, "grad_norm": 1.303948163986206, "learning_rate": 9.767868421052631e-05, "loss": 0.4365, "step": 28835 }, { "epoch": 1.6147384925523576, "grad_norm": 1.313559651374817, "learning_rate": 9.767842105263159e-05, "loss": 0.4855, "step": 28836 }, { "epoch": 1.6147944898644866, "grad_norm": 1.1499208211898804, "learning_rate": 9.767815789473685e-05, "loss": 0.4064, "step": 28837 }, { "epoch": 1.6148504871766156, "grad_norm": 1.278723955154419, "learning_rate": 9.767789473684212e-05, "loss": 0.4662, "step": 28838 }, { "epoch": 1.6149064844887446, "grad_norm": 1.3083597421646118, "learning_rate": 9.767763157894736e-05, "loss": 0.534, "step": 28839 }, { "epoch": 1.6149624818008737, "grad_norm": 1.1727782487869263, "learning_rate": 9.767736842105264e-05, "loss": 0.4407, "step": 28840 }, { "epoch": 1.6150184791130027, "grad_norm": 1.8495877981185913, "learning_rate": 9.76771052631579e-05, "loss": 0.4931, "step": 28841 }, { "epoch": 1.6150744764251317, "grad_norm": 1.2436549663543701, "learning_rate": 9.767684210526317e-05, "loss": 0.3709, "step": 28842 }, { "epoch": 1.6151304737372607, "grad_norm": 1.3587554693222046, "learning_rate": 9.767657894736843e-05, "loss": 0.4508, "step": 28843 }, { "epoch": 1.6151864710493897, "grad_norm": 1.335359811782837, "learning_rate": 9.767631578947369e-05, "loss": 0.4305, "step": 28844 }, { "epoch": 1.6152424683615187, "grad_norm": 1.2961686849594116, "learning_rate": 9.767605263157895e-05, "loss": 0.4625, "step": 28845 }, { "epoch": 1.6152984656736478, "grad_norm": 1.3412424325942993, "learning_rate": 9.767578947368422e-05, "loss": 0.4367, "step": 28846 }, { "epoch": 1.6153544629857768, "grad_norm": 1.342451810836792, "learning_rate": 9.767552631578948e-05, "loss": 0.4499, "step": 28847 }, { "epoch": 1.6154104602979058, "grad_norm": 1.823162317276001, "learning_rate": 9.767526315789474e-05, "loss": 0.4182, "step": 28848 }, { "epoch": 1.6154664576100348, "grad_norm": 1.3667343854904175, "learning_rate": 9.7675e-05, "loss": 0.4889, "step": 28849 }, { "epoch": 1.6155224549221638, "grad_norm": 1.4782923460006714, "learning_rate": 9.767473684210526e-05, "loss": 0.5111, "step": 28850 }, { "epoch": 1.6155784522342929, "grad_norm": 1.3616951704025269, "learning_rate": 9.767447368421054e-05, "loss": 0.4375, "step": 28851 }, { "epoch": 1.6156344495464219, "grad_norm": 1.2682764530181885, "learning_rate": 9.76742105263158e-05, "loss": 0.4144, "step": 28852 }, { "epoch": 1.615690446858551, "grad_norm": 1.182246208190918, "learning_rate": 9.767394736842105e-05, "loss": 0.3703, "step": 28853 }, { "epoch": 1.61574644417068, "grad_norm": 1.3063621520996094, "learning_rate": 9.767368421052631e-05, "loss": 0.4596, "step": 28854 }, { "epoch": 1.615802441482809, "grad_norm": 1.2230753898620605, "learning_rate": 9.767342105263159e-05, "loss": 0.3699, "step": 28855 }, { "epoch": 1.615858438794938, "grad_norm": 2.1222188472747803, "learning_rate": 9.767315789473685e-05, "loss": 0.5617, "step": 28856 }, { "epoch": 1.615914436107067, "grad_norm": 1.2271397113800049, "learning_rate": 9.767289473684211e-05, "loss": 0.3465, "step": 28857 }, { "epoch": 1.615970433419196, "grad_norm": 2.0450069904327393, "learning_rate": 9.767263157894737e-05, "loss": 0.5504, "step": 28858 }, { "epoch": 1.616026430731325, "grad_norm": 1.3388069868087769, "learning_rate": 9.767236842105264e-05, "loss": 0.4284, "step": 28859 }, { "epoch": 1.616082428043454, "grad_norm": 1.4325214624404907, "learning_rate": 9.76721052631579e-05, "loss": 0.3864, "step": 28860 }, { "epoch": 1.616138425355583, "grad_norm": 1.2629402875900269, "learning_rate": 9.767184210526317e-05, "loss": 0.4763, "step": 28861 }, { "epoch": 1.616194422667712, "grad_norm": 1.146431803703308, "learning_rate": 9.767157894736842e-05, "loss": 0.3929, "step": 28862 }, { "epoch": 1.616250419979841, "grad_norm": 1.3569384813308716, "learning_rate": 9.767131578947369e-05, "loss": 0.4921, "step": 28863 }, { "epoch": 1.61630641729197, "grad_norm": 1.4281699657440186, "learning_rate": 9.767105263157895e-05, "loss": 0.454, "step": 28864 }, { "epoch": 1.6163624146040991, "grad_norm": 1.4064583778381348, "learning_rate": 9.767078947368421e-05, "loss": 0.4045, "step": 28865 }, { "epoch": 1.6164184119162281, "grad_norm": 1.4081312417984009, "learning_rate": 9.767052631578949e-05, "loss": 0.439, "step": 28866 }, { "epoch": 1.6164744092283572, "grad_norm": 1.4261716604232788, "learning_rate": 9.767026315789473e-05, "loss": 0.4116, "step": 28867 }, { "epoch": 1.6165304065404862, "grad_norm": 1.331688404083252, "learning_rate": 9.767e-05, "loss": 0.4301, "step": 28868 }, { "epoch": 1.6165864038526152, "grad_norm": 1.3330377340316772, "learning_rate": 9.766973684210526e-05, "loss": 0.4386, "step": 28869 }, { "epoch": 1.6166424011647442, "grad_norm": 1.7573132514953613, "learning_rate": 9.766947368421054e-05, "loss": 0.6382, "step": 28870 }, { "epoch": 1.6166983984768732, "grad_norm": 1.5500704050064087, "learning_rate": 9.766921052631578e-05, "loss": 0.4457, "step": 28871 }, { "epoch": 1.6167543957890023, "grad_norm": 1.4348666667938232, "learning_rate": 9.766894736842106e-05, "loss": 0.4841, "step": 28872 }, { "epoch": 1.6168103931011313, "grad_norm": 1.3392975330352783, "learning_rate": 9.766868421052632e-05, "loss": 0.5253, "step": 28873 }, { "epoch": 1.6168663904132603, "grad_norm": 1.9987597465515137, "learning_rate": 9.766842105263159e-05, "loss": 0.4609, "step": 28874 }, { "epoch": 1.6169223877253893, "grad_norm": 2.7324507236480713, "learning_rate": 9.766815789473685e-05, "loss": 0.6237, "step": 28875 }, { "epoch": 1.6169783850375183, "grad_norm": 1.508131742477417, "learning_rate": 9.766789473684211e-05, "loss": 0.5241, "step": 28876 }, { "epoch": 1.6170343823496474, "grad_norm": 1.9047346115112305, "learning_rate": 9.766763157894737e-05, "loss": 0.4972, "step": 28877 }, { "epoch": 1.6170903796617764, "grad_norm": 1.4013547897338867, "learning_rate": 9.766736842105264e-05, "loss": 0.51, "step": 28878 }, { "epoch": 1.6171463769739054, "grad_norm": 2.806600332260132, "learning_rate": 9.76671052631579e-05, "loss": 0.4834, "step": 28879 }, { "epoch": 1.6172023742860344, "grad_norm": 1.1944141387939453, "learning_rate": 9.766684210526316e-05, "loss": 0.3531, "step": 28880 }, { "epoch": 1.6172583715981634, "grad_norm": 1.2647945880889893, "learning_rate": 9.766657894736842e-05, "loss": 0.529, "step": 28881 }, { "epoch": 1.6173143689102925, "grad_norm": 1.6566176414489746, "learning_rate": 9.766631578947368e-05, "loss": 0.4407, "step": 28882 }, { "epoch": 1.6173703662224215, "grad_norm": 1.159731388092041, "learning_rate": 9.766605263157896e-05, "loss": 0.3763, "step": 28883 }, { "epoch": 1.6174263635345505, "grad_norm": 1.4550575017929077, "learning_rate": 9.766578947368421e-05, "loss": 0.5408, "step": 28884 }, { "epoch": 1.6174823608466795, "grad_norm": 1.4085232019424438, "learning_rate": 9.766552631578947e-05, "loss": 0.4368, "step": 28885 }, { "epoch": 1.6175383581588085, "grad_norm": 1.7718677520751953, "learning_rate": 9.766526315789473e-05, "loss": 0.4229, "step": 28886 }, { "epoch": 1.6175943554709376, "grad_norm": 1.944851040840149, "learning_rate": 9.766500000000001e-05, "loss": 0.4829, "step": 28887 }, { "epoch": 1.6176503527830666, "grad_norm": 1.1601150035858154, "learning_rate": 9.766473684210527e-05, "loss": 0.3789, "step": 28888 }, { "epoch": 1.6177063500951956, "grad_norm": 1.2976466417312622, "learning_rate": 9.766447368421053e-05, "loss": 0.4909, "step": 28889 }, { "epoch": 1.6177623474073246, "grad_norm": 1.249280333518982, "learning_rate": 9.766421052631579e-05, "loss": 0.4483, "step": 28890 }, { "epoch": 1.6178183447194536, "grad_norm": 1.6155037879943848, "learning_rate": 9.766394736842106e-05, "loss": 0.5474, "step": 28891 }, { "epoch": 1.6178743420315826, "grad_norm": 1.7790343761444092, "learning_rate": 9.766368421052632e-05, "loss": 0.4386, "step": 28892 }, { "epoch": 1.6179303393437117, "grad_norm": 1.2534726858139038, "learning_rate": 9.766342105263159e-05, "loss": 0.3634, "step": 28893 }, { "epoch": 1.6179863366558407, "grad_norm": 1.319286823272705, "learning_rate": 9.766315789473684e-05, "loss": 0.5808, "step": 28894 }, { "epoch": 1.6180423339679697, "grad_norm": 1.3928951025009155, "learning_rate": 9.766289473684211e-05, "loss": 0.3583, "step": 28895 }, { "epoch": 1.6180983312800985, "grad_norm": 1.4259158372879028, "learning_rate": 9.766263157894737e-05, "loss": 0.4083, "step": 28896 }, { "epoch": 1.6181543285922275, "grad_norm": 1.4068379402160645, "learning_rate": 9.766236842105265e-05, "loss": 0.6703, "step": 28897 }, { "epoch": 1.6182103259043565, "grad_norm": 1.14223051071167, "learning_rate": 9.76621052631579e-05, "loss": 0.4393, "step": 28898 }, { "epoch": 1.6182663232164856, "grad_norm": 1.1985951662063599, "learning_rate": 9.766184210526315e-05, "loss": 0.3575, "step": 28899 }, { "epoch": 1.6183223205286146, "grad_norm": 1.5247827768325806, "learning_rate": 9.766157894736842e-05, "loss": 0.5642, "step": 28900 }, { "epoch": 1.6183783178407436, "grad_norm": 1.274037480354309, "learning_rate": 9.766131578947368e-05, "loss": 0.4636, "step": 28901 }, { "epoch": 1.6184343151528726, "grad_norm": 1.0825755596160889, "learning_rate": 9.766105263157896e-05, "loss": 0.4752, "step": 28902 }, { "epoch": 1.6184903124650016, "grad_norm": 1.1186537742614746, "learning_rate": 9.766078947368422e-05, "loss": 0.4092, "step": 28903 }, { "epoch": 1.6185463097771307, "grad_norm": 1.2335047721862793, "learning_rate": 9.766052631578948e-05, "loss": 0.4991, "step": 28904 }, { "epoch": 1.6186023070892597, "grad_norm": 1.4402673244476318, "learning_rate": 9.766026315789474e-05, "loss": 0.4952, "step": 28905 }, { "epoch": 1.6186583044013887, "grad_norm": 1.5665110349655151, "learning_rate": 9.766000000000001e-05, "loss": 0.5344, "step": 28906 }, { "epoch": 1.6187143017135177, "grad_norm": 1.2146878242492676, "learning_rate": 9.765973684210527e-05, "loss": 0.4731, "step": 28907 }, { "epoch": 1.6187702990256467, "grad_norm": 1.4464036226272583, "learning_rate": 9.765947368421053e-05, "loss": 0.6648, "step": 28908 }, { "epoch": 1.6188262963377757, "grad_norm": 1.3497234582901, "learning_rate": 9.765921052631579e-05, "loss": 0.5241, "step": 28909 }, { "epoch": 1.6188822936499048, "grad_norm": 1.1629825830459595, "learning_rate": 9.765894736842106e-05, "loss": 0.3955, "step": 28910 }, { "epoch": 1.6189382909620338, "grad_norm": 1.1402976512908936, "learning_rate": 9.765868421052632e-05, "loss": 0.3949, "step": 28911 }, { "epoch": 1.6189942882741628, "grad_norm": 1.6595808267593384, "learning_rate": 9.765842105263158e-05, "loss": 0.5269, "step": 28912 }, { "epoch": 1.6190502855862918, "grad_norm": 1.1686064004898071, "learning_rate": 9.765815789473684e-05, "loss": 0.4283, "step": 28913 }, { "epoch": 1.6191062828984208, "grad_norm": 1.1514596939086914, "learning_rate": 9.765789473684212e-05, "loss": 0.4714, "step": 28914 }, { "epoch": 1.6191622802105499, "grad_norm": 1.1812119483947754, "learning_rate": 9.765763157894737e-05, "loss": 0.3457, "step": 28915 }, { "epoch": 1.6192182775226789, "grad_norm": 1.2720551490783691, "learning_rate": 9.765736842105263e-05, "loss": 0.4326, "step": 28916 }, { "epoch": 1.619274274834808, "grad_norm": 2.7686190605163574, "learning_rate": 9.76571052631579e-05, "loss": 0.701, "step": 28917 }, { "epoch": 1.619330272146937, "grad_norm": 1.5894356966018677, "learning_rate": 9.765684210526315e-05, "loss": 0.6887, "step": 28918 }, { "epoch": 1.619386269459066, "grad_norm": 1.4989386796951294, "learning_rate": 9.765657894736843e-05, "loss": 0.581, "step": 28919 }, { "epoch": 1.619442266771195, "grad_norm": 1.2035168409347534, "learning_rate": 9.765631578947369e-05, "loss": 0.4001, "step": 28920 }, { "epoch": 1.619498264083324, "grad_norm": 1.7772064208984375, "learning_rate": 9.765605263157896e-05, "loss": 0.4919, "step": 28921 }, { "epoch": 1.619554261395453, "grad_norm": 1.3816946744918823, "learning_rate": 9.76557894736842e-05, "loss": 0.4771, "step": 28922 }, { "epoch": 1.619610258707582, "grad_norm": 1.5412209033966064, "learning_rate": 9.765552631578948e-05, "loss": 0.4784, "step": 28923 }, { "epoch": 1.619666256019711, "grad_norm": 1.265815019607544, "learning_rate": 9.765526315789474e-05, "loss": 0.4187, "step": 28924 }, { "epoch": 1.61972225333184, "grad_norm": 1.6642132997512817, "learning_rate": 9.765500000000001e-05, "loss": 0.4875, "step": 28925 }, { "epoch": 1.619778250643969, "grad_norm": 1.099763035774231, "learning_rate": 9.765473684210526e-05, "loss": 0.3481, "step": 28926 }, { "epoch": 1.619834247956098, "grad_norm": 1.391595482826233, "learning_rate": 9.765447368421053e-05, "loss": 0.5049, "step": 28927 }, { "epoch": 1.6198902452682271, "grad_norm": 1.264090895652771, "learning_rate": 9.765421052631579e-05, "loss": 0.401, "step": 28928 }, { "epoch": 1.6199462425803561, "grad_norm": 1.3754740953445435, "learning_rate": 9.765394736842107e-05, "loss": 0.5046, "step": 28929 }, { "epoch": 1.6200022398924852, "grad_norm": 1.443232774734497, "learning_rate": 9.765368421052633e-05, "loss": 0.5517, "step": 28930 }, { "epoch": 1.6200582372046142, "grad_norm": 1.269391417503357, "learning_rate": 9.765342105263158e-05, "loss": 0.5542, "step": 28931 }, { "epoch": 1.6201142345167432, "grad_norm": 1.2511099576950073, "learning_rate": 9.765315789473684e-05, "loss": 0.4431, "step": 28932 }, { "epoch": 1.6201702318288722, "grad_norm": 1.5928725004196167, "learning_rate": 9.765289473684212e-05, "loss": 0.4087, "step": 28933 }, { "epoch": 1.6202262291410012, "grad_norm": 1.8670235872268677, "learning_rate": 9.765263157894738e-05, "loss": 0.478, "step": 28934 }, { "epoch": 1.6202822264531302, "grad_norm": 1.4178543090820312, "learning_rate": 9.765236842105264e-05, "loss": 0.4866, "step": 28935 }, { "epoch": 1.6203382237652593, "grad_norm": 1.7292839288711548, "learning_rate": 9.76521052631579e-05, "loss": 0.5253, "step": 28936 }, { "epoch": 1.6203942210773883, "grad_norm": 1.3527981042861938, "learning_rate": 9.765184210526316e-05, "loss": 0.5333, "step": 28937 }, { "epoch": 1.6204502183895173, "grad_norm": 1.0957695245742798, "learning_rate": 9.765157894736843e-05, "loss": 0.4332, "step": 28938 }, { "epoch": 1.6205062157016463, "grad_norm": 1.1085983514785767, "learning_rate": 9.765131578947369e-05, "loss": 0.3908, "step": 28939 }, { "epoch": 1.6205622130137753, "grad_norm": 1.4848521947860718, "learning_rate": 9.765105263157895e-05, "loss": 0.3817, "step": 28940 }, { "epoch": 1.6206182103259044, "grad_norm": 1.2339684963226318, "learning_rate": 9.765078947368421e-05, "loss": 0.4563, "step": 28941 }, { "epoch": 1.6206742076380334, "grad_norm": 1.0157705545425415, "learning_rate": 9.765052631578948e-05, "loss": 0.3975, "step": 28942 }, { "epoch": 1.6207302049501624, "grad_norm": 1.3542698621749878, "learning_rate": 9.765026315789474e-05, "loss": 0.5248, "step": 28943 }, { "epoch": 1.6207862022622914, "grad_norm": 1.5024199485778809, "learning_rate": 9.765e-05, "loss": 0.5143, "step": 28944 }, { "epoch": 1.6208421995744204, "grad_norm": 1.2035006284713745, "learning_rate": 9.764973684210526e-05, "loss": 0.4675, "step": 28945 }, { "epoch": 1.6208981968865495, "grad_norm": 4.292696475982666, "learning_rate": 9.764947368421053e-05, "loss": 0.7096, "step": 28946 }, { "epoch": 1.6209541941986785, "grad_norm": 1.3858609199523926, "learning_rate": 9.76492105263158e-05, "loss": 0.45, "step": 28947 }, { "epoch": 1.6210101915108075, "grad_norm": 1.3534660339355469, "learning_rate": 9.764894736842107e-05, "loss": 0.4051, "step": 28948 }, { "epoch": 1.6210661888229365, "grad_norm": 1.4373719692230225, "learning_rate": 9.764868421052631e-05, "loss": 0.4589, "step": 28949 }, { "epoch": 1.6211221861350655, "grad_norm": 1.2982500791549683, "learning_rate": 9.764842105263159e-05, "loss": 0.4363, "step": 28950 }, { "epoch": 1.6211781834471946, "grad_norm": 1.3825939893722534, "learning_rate": 9.764815789473685e-05, "loss": 0.5435, "step": 28951 }, { "epoch": 1.6212341807593236, "grad_norm": 1.072726845741272, "learning_rate": 9.764789473684211e-05, "loss": 0.4476, "step": 28952 }, { "epoch": 1.6212901780714526, "grad_norm": 1.3606057167053223, "learning_rate": 9.764763157894738e-05, "loss": 0.4894, "step": 28953 }, { "epoch": 1.6213461753835816, "grad_norm": 1.4802929162979126, "learning_rate": 9.764736842105263e-05, "loss": 0.4803, "step": 28954 }, { "epoch": 1.6214021726957106, "grad_norm": 1.1817505359649658, "learning_rate": 9.76471052631579e-05, "loss": 0.3624, "step": 28955 }, { "epoch": 1.6214581700078396, "grad_norm": 1.829140067100525, "learning_rate": 9.764684210526316e-05, "loss": 0.686, "step": 28956 }, { "epoch": 1.6215141673199687, "grad_norm": 1.3381738662719727, "learning_rate": 9.764657894736843e-05, "loss": 0.4526, "step": 28957 }, { "epoch": 1.6215701646320977, "grad_norm": 1.410481572151184, "learning_rate": 9.764631578947369e-05, "loss": 0.5079, "step": 28958 }, { "epoch": 1.6216261619442267, "grad_norm": 1.2489384412765503, "learning_rate": 9.764605263157895e-05, "loss": 0.4669, "step": 28959 }, { "epoch": 1.6216821592563557, "grad_norm": 1.709755301475525, "learning_rate": 9.764578947368421e-05, "loss": 0.6166, "step": 28960 }, { "epoch": 1.6217381565684847, "grad_norm": 1.248163104057312, "learning_rate": 9.764552631578949e-05, "loss": 0.491, "step": 28961 }, { "epoch": 1.6217941538806138, "grad_norm": 1.2984687089920044, "learning_rate": 9.764526315789474e-05, "loss": 0.4477, "step": 28962 }, { "epoch": 1.6218501511927428, "grad_norm": 1.3458508253097534, "learning_rate": 9.7645e-05, "loss": 0.4577, "step": 28963 }, { "epoch": 1.6219061485048718, "grad_norm": 1.5114479064941406, "learning_rate": 9.764473684210526e-05, "loss": 0.704, "step": 28964 }, { "epoch": 1.6219621458170008, "grad_norm": 1.4598270654678345, "learning_rate": 9.764447368421054e-05, "loss": 0.4764, "step": 28965 }, { "epoch": 1.6220181431291298, "grad_norm": 1.3080896139144897, "learning_rate": 9.76442105263158e-05, "loss": 0.4064, "step": 28966 }, { "epoch": 1.6220741404412589, "grad_norm": 1.2693126201629639, "learning_rate": 9.764394736842106e-05, "loss": 0.438, "step": 28967 }, { "epoch": 1.6221301377533879, "grad_norm": 1.0561363697052002, "learning_rate": 9.764368421052632e-05, "loss": 0.3789, "step": 28968 }, { "epoch": 1.622186135065517, "grad_norm": 1.2383800745010376, "learning_rate": 9.764342105263158e-05, "loss": 0.4333, "step": 28969 }, { "epoch": 1.622242132377646, "grad_norm": 1.2280343770980835, "learning_rate": 9.764315789473685e-05, "loss": 0.5021, "step": 28970 }, { "epoch": 1.622298129689775, "grad_norm": 1.3351688385009766, "learning_rate": 9.764289473684211e-05, "loss": 0.505, "step": 28971 }, { "epoch": 1.622354127001904, "grad_norm": 1.3184148073196411, "learning_rate": 9.764263157894737e-05, "loss": 0.5307, "step": 28972 }, { "epoch": 1.622410124314033, "grad_norm": 1.5563063621520996, "learning_rate": 9.764236842105263e-05, "loss": 0.4808, "step": 28973 }, { "epoch": 1.622466121626162, "grad_norm": 1.2767109870910645, "learning_rate": 9.76421052631579e-05, "loss": 0.5952, "step": 28974 }, { "epoch": 1.622522118938291, "grad_norm": 1.2463483810424805, "learning_rate": 9.764184210526316e-05, "loss": 0.377, "step": 28975 }, { "epoch": 1.62257811625042, "grad_norm": 1.4963417053222656, "learning_rate": 9.764157894736844e-05, "loss": 0.4538, "step": 28976 }, { "epoch": 1.622634113562549, "grad_norm": 1.3054777383804321, "learning_rate": 9.764131578947368e-05, "loss": 0.4315, "step": 28977 }, { "epoch": 1.6226901108746778, "grad_norm": 1.5979790687561035, "learning_rate": 9.764105263157895e-05, "loss": 0.4133, "step": 28978 }, { "epoch": 1.6227461081868069, "grad_norm": 1.4845672845840454, "learning_rate": 9.764078947368421e-05, "loss": 0.457, "step": 28979 }, { "epoch": 1.6228021054989359, "grad_norm": 1.5465277433395386, "learning_rate": 9.764052631578949e-05, "loss": 0.405, "step": 28980 }, { "epoch": 1.622858102811065, "grad_norm": 1.4176063537597656, "learning_rate": 9.764026315789473e-05, "loss": 0.384, "step": 28981 }, { "epoch": 1.622914100123194, "grad_norm": 1.4219379425048828, "learning_rate": 9.764000000000001e-05, "loss": 0.4869, "step": 28982 }, { "epoch": 1.622970097435323, "grad_norm": 1.5295010805130005, "learning_rate": 9.763973684210527e-05, "loss": 0.4806, "step": 28983 }, { "epoch": 1.623026094747452, "grad_norm": 1.2485733032226562, "learning_rate": 9.763947368421054e-05, "loss": 0.4526, "step": 28984 }, { "epoch": 1.623082092059581, "grad_norm": 1.1920751333236694, "learning_rate": 9.76392105263158e-05, "loss": 0.4755, "step": 28985 }, { "epoch": 1.62313808937171, "grad_norm": 1.9188928604125977, "learning_rate": 9.763894736842105e-05, "loss": 0.5597, "step": 28986 }, { "epoch": 1.623194086683839, "grad_norm": 1.6881300210952759, "learning_rate": 9.763868421052632e-05, "loss": 0.5532, "step": 28987 }, { "epoch": 1.623250083995968, "grad_norm": 1.4397168159484863, "learning_rate": 9.763842105263158e-05, "loss": 0.5602, "step": 28988 }, { "epoch": 1.623306081308097, "grad_norm": 1.4025870561599731, "learning_rate": 9.763815789473685e-05, "loss": 0.3857, "step": 28989 }, { "epoch": 1.623362078620226, "grad_norm": 14.822052955627441, "learning_rate": 9.763789473684211e-05, "loss": 0.4411, "step": 28990 }, { "epoch": 1.623418075932355, "grad_norm": 1.1370784044265747, "learning_rate": 9.763763157894737e-05, "loss": 0.3767, "step": 28991 }, { "epoch": 1.6234740732444841, "grad_norm": 1.361096978187561, "learning_rate": 9.763736842105263e-05, "loss": 0.6225, "step": 28992 }, { "epoch": 1.6235300705566131, "grad_norm": 1.27276611328125, "learning_rate": 9.76371052631579e-05, "loss": 0.465, "step": 28993 }, { "epoch": 1.6235860678687422, "grad_norm": 1.3719966411590576, "learning_rate": 9.763684210526316e-05, "loss": 0.6489, "step": 28994 }, { "epoch": 1.6236420651808712, "grad_norm": 1.4883934259414673, "learning_rate": 9.763657894736842e-05, "loss": 0.4197, "step": 28995 }, { "epoch": 1.6236980624930002, "grad_norm": 1.515711784362793, "learning_rate": 9.763631578947368e-05, "loss": 0.4197, "step": 28996 }, { "epoch": 1.6237540598051292, "grad_norm": 1.0941756963729858, "learning_rate": 9.763605263157896e-05, "loss": 0.4487, "step": 28997 }, { "epoch": 1.6238100571172582, "grad_norm": 1.2121586799621582, "learning_rate": 9.763578947368422e-05, "loss": 0.4212, "step": 28998 }, { "epoch": 1.6238660544293873, "grad_norm": 1.2333850860595703, "learning_rate": 9.763552631578948e-05, "loss": 0.4775, "step": 28999 }, { "epoch": 1.6239220517415163, "grad_norm": 1.2895197868347168, "learning_rate": 9.763526315789474e-05, "loss": 0.5964, "step": 29000 }, { "epoch": 1.6239780490536453, "grad_norm": 1.435691475868225, "learning_rate": 9.763500000000001e-05, "loss": 0.4094, "step": 29001 }, { "epoch": 1.6240340463657743, "grad_norm": 2.209624767303467, "learning_rate": 9.763473684210527e-05, "loss": 0.4834, "step": 29002 }, { "epoch": 1.6240900436779033, "grad_norm": 1.4166560173034668, "learning_rate": 9.763447368421053e-05, "loss": 0.6601, "step": 29003 }, { "epoch": 1.6241460409900323, "grad_norm": 1.151734471321106, "learning_rate": 9.763421052631579e-05, "loss": 0.4065, "step": 29004 }, { "epoch": 1.6242020383021614, "grad_norm": 1.2314025163650513, "learning_rate": 9.763394736842105e-05, "loss": 0.511, "step": 29005 }, { "epoch": 1.6242580356142904, "grad_norm": 1.1897450685501099, "learning_rate": 9.763368421052632e-05, "loss": 0.3494, "step": 29006 }, { "epoch": 1.6243140329264194, "grad_norm": 1.5910755395889282, "learning_rate": 9.763342105263158e-05, "loss": 0.5022, "step": 29007 }, { "epoch": 1.6243700302385484, "grad_norm": 1.2150752544403076, "learning_rate": 9.763315789473685e-05, "loss": 0.4724, "step": 29008 }, { "epoch": 1.6244260275506774, "grad_norm": 1.533077597618103, "learning_rate": 9.76328947368421e-05, "loss": 0.5095, "step": 29009 }, { "epoch": 1.6244820248628065, "grad_norm": 1.3829405307769775, "learning_rate": 9.763263157894737e-05, "loss": 0.4628, "step": 29010 }, { "epoch": 1.6245380221749355, "grad_norm": 1.334855318069458, "learning_rate": 9.763236842105263e-05, "loss": 0.3906, "step": 29011 }, { "epoch": 1.6245940194870645, "grad_norm": 1.2197556495666504, "learning_rate": 9.763210526315791e-05, "loss": 0.5492, "step": 29012 }, { "epoch": 1.6246500167991935, "grad_norm": 1.3573681116104126, "learning_rate": 9.763184210526317e-05, "loss": 0.5445, "step": 29013 }, { "epoch": 1.6247060141113225, "grad_norm": 1.6501281261444092, "learning_rate": 9.763157894736843e-05, "loss": 0.4317, "step": 29014 }, { "epoch": 1.6247620114234516, "grad_norm": 1.2500278949737549, "learning_rate": 9.763131578947369e-05, "loss": 0.385, "step": 29015 }, { "epoch": 1.6248180087355806, "grad_norm": 1.5370055437088013, "learning_rate": 9.763105263157896e-05, "loss": 0.522, "step": 29016 }, { "epoch": 1.6248740060477096, "grad_norm": 1.313562273979187, "learning_rate": 9.763078947368422e-05, "loss": 0.3867, "step": 29017 }, { "epoch": 1.6249300033598386, "grad_norm": 1.272716760635376, "learning_rate": 9.763052631578948e-05, "loss": 0.4413, "step": 29018 }, { "epoch": 1.6249860006719676, "grad_norm": 1.325479507446289, "learning_rate": 9.763026315789474e-05, "loss": 0.4495, "step": 29019 }, { "epoch": 1.6250419979840967, "grad_norm": 1.1775710582733154, "learning_rate": 9.763e-05, "loss": 0.6156, "step": 29020 }, { "epoch": 1.6250979952962257, "grad_norm": 1.4680395126342773, "learning_rate": 9.762973684210527e-05, "loss": 0.4178, "step": 29021 }, { "epoch": 1.6251539926083547, "grad_norm": 1.3455936908721924, "learning_rate": 9.762947368421053e-05, "loss": 0.3931, "step": 29022 }, { "epoch": 1.6252099899204837, "grad_norm": 1.1602425575256348, "learning_rate": 9.762921052631579e-05, "loss": 0.4051, "step": 29023 }, { "epoch": 1.6252659872326127, "grad_norm": 1.522652506828308, "learning_rate": 9.762894736842105e-05, "loss": 0.4474, "step": 29024 }, { "epoch": 1.6253219845447417, "grad_norm": 1.1786167621612549, "learning_rate": 9.762868421052632e-05, "loss": 0.6032, "step": 29025 }, { "epoch": 1.6253779818568708, "grad_norm": 1.346570372581482, "learning_rate": 9.762842105263158e-05, "loss": 0.4033, "step": 29026 }, { "epoch": 1.6254339791689998, "grad_norm": 1.21056067943573, "learning_rate": 9.762815789473684e-05, "loss": 0.5341, "step": 29027 }, { "epoch": 1.6254899764811288, "grad_norm": 1.5269578695297241, "learning_rate": 9.76278947368421e-05, "loss": 0.5068, "step": 29028 }, { "epoch": 1.6255459737932578, "grad_norm": 1.4292340278625488, "learning_rate": 9.762763157894738e-05, "loss": 0.4894, "step": 29029 }, { "epoch": 1.6256019711053868, "grad_norm": 1.1982123851776123, "learning_rate": 9.762736842105264e-05, "loss": 0.4273, "step": 29030 }, { "epoch": 1.6256579684175159, "grad_norm": 1.709243893623352, "learning_rate": 9.76271052631579e-05, "loss": 0.5359, "step": 29031 }, { "epoch": 1.6257139657296449, "grad_norm": 1.1783006191253662, "learning_rate": 9.762684210526316e-05, "loss": 0.3656, "step": 29032 }, { "epoch": 1.625769963041774, "grad_norm": 1.061718225479126, "learning_rate": 9.762657894736843e-05, "loss": 0.4325, "step": 29033 }, { "epoch": 1.625825960353903, "grad_norm": 2.1916258335113525, "learning_rate": 9.762631578947369e-05, "loss": 0.4931, "step": 29034 }, { "epoch": 1.625881957666032, "grad_norm": 1.1895729303359985, "learning_rate": 9.762605263157896e-05, "loss": 0.3862, "step": 29035 }, { "epoch": 1.625937954978161, "grad_norm": 1.0891497135162354, "learning_rate": 9.762578947368421e-05, "loss": 0.4223, "step": 29036 }, { "epoch": 1.62599395229029, "grad_norm": 1.2871347665786743, "learning_rate": 9.762552631578947e-05, "loss": 0.4835, "step": 29037 }, { "epoch": 1.626049949602419, "grad_norm": 1.1227421760559082, "learning_rate": 9.762526315789474e-05, "loss": 0.3586, "step": 29038 }, { "epoch": 1.626105946914548, "grad_norm": 1.5562020540237427, "learning_rate": 9.7625e-05, "loss": 0.3576, "step": 29039 }, { "epoch": 1.626161944226677, "grad_norm": 1.2298904657363892, "learning_rate": 9.762473684210527e-05, "loss": 0.5008, "step": 29040 }, { "epoch": 1.626217941538806, "grad_norm": 1.3883846998214722, "learning_rate": 9.762447368421052e-05, "loss": 0.4788, "step": 29041 }, { "epoch": 1.626273938850935, "grad_norm": 1.3229891061782837, "learning_rate": 9.76242105263158e-05, "loss": 0.503, "step": 29042 }, { "epoch": 1.626329936163064, "grad_norm": 1.4288462400436401, "learning_rate": 9.762394736842105e-05, "loss": 0.5985, "step": 29043 }, { "epoch": 1.6263859334751931, "grad_norm": 1.5787339210510254, "learning_rate": 9.762368421052633e-05, "loss": 0.4384, "step": 29044 }, { "epoch": 1.6264419307873221, "grad_norm": 1.0647637844085693, "learning_rate": 9.762342105263159e-05, "loss": 0.3867, "step": 29045 }, { "epoch": 1.6264979280994512, "grad_norm": 1.180756688117981, "learning_rate": 9.762315789473685e-05, "loss": 0.4342, "step": 29046 }, { "epoch": 1.6265539254115802, "grad_norm": 1.1512324810028076, "learning_rate": 9.76228947368421e-05, "loss": 0.4566, "step": 29047 }, { "epoch": 1.6266099227237092, "grad_norm": 1.1414687633514404, "learning_rate": 9.762263157894738e-05, "loss": 0.3501, "step": 29048 }, { "epoch": 1.6266659200358382, "grad_norm": 1.374139428138733, "learning_rate": 9.762236842105264e-05, "loss": 0.4822, "step": 29049 }, { "epoch": 1.6267219173479672, "grad_norm": 1.401052713394165, "learning_rate": 9.76221052631579e-05, "loss": 0.4372, "step": 29050 }, { "epoch": 1.6267779146600962, "grad_norm": 1.2061361074447632, "learning_rate": 9.762184210526316e-05, "loss": 0.3528, "step": 29051 }, { "epoch": 1.6268339119722253, "grad_norm": 1.4091142416000366, "learning_rate": 9.762157894736843e-05, "loss": 0.5102, "step": 29052 }, { "epoch": 1.6268899092843543, "grad_norm": 1.4907644987106323, "learning_rate": 9.762131578947369e-05, "loss": 0.6561, "step": 29053 }, { "epoch": 1.6269459065964833, "grad_norm": 1.1247915029525757, "learning_rate": 9.762105263157895e-05, "loss": 0.4596, "step": 29054 }, { "epoch": 1.6270019039086123, "grad_norm": 1.152758002281189, "learning_rate": 9.762078947368421e-05, "loss": 0.4631, "step": 29055 }, { "epoch": 1.6270579012207413, "grad_norm": 1.2452892065048218, "learning_rate": 9.762052631578947e-05, "loss": 0.3635, "step": 29056 }, { "epoch": 1.6271138985328704, "grad_norm": 1.5018380880355835, "learning_rate": 9.762026315789474e-05, "loss": 0.5134, "step": 29057 }, { "epoch": 1.6271698958449994, "grad_norm": 1.26292085647583, "learning_rate": 9.762e-05, "loss": 0.4954, "step": 29058 }, { "epoch": 1.6272258931571284, "grad_norm": 1.3581428527832031, "learning_rate": 9.761973684210526e-05, "loss": 0.451, "step": 29059 }, { "epoch": 1.6272818904692574, "grad_norm": 1.4662487506866455, "learning_rate": 9.761947368421052e-05, "loss": 0.4825, "step": 29060 }, { "epoch": 1.6273378877813864, "grad_norm": 1.0710781812667847, "learning_rate": 9.76192105263158e-05, "loss": 0.3624, "step": 29061 }, { "epoch": 1.6273938850935155, "grad_norm": 1.219622254371643, "learning_rate": 9.761894736842106e-05, "loss": 0.43, "step": 29062 }, { "epoch": 1.6274498824056445, "grad_norm": 1.3126916885375977, "learning_rate": 9.761868421052633e-05, "loss": 0.4779, "step": 29063 }, { "epoch": 1.6275058797177735, "grad_norm": 1.0866769552230835, "learning_rate": 9.761842105263158e-05, "loss": 0.4377, "step": 29064 }, { "epoch": 1.6275618770299025, "grad_norm": 1.2525542974472046, "learning_rate": 9.761815789473685e-05, "loss": 0.4171, "step": 29065 }, { "epoch": 1.6276178743420315, "grad_norm": 1.3107547760009766, "learning_rate": 9.761789473684211e-05, "loss": 0.4772, "step": 29066 }, { "epoch": 1.6276738716541606, "grad_norm": 1.738831639289856, "learning_rate": 9.761763157894738e-05, "loss": 0.5713, "step": 29067 }, { "epoch": 1.6277298689662896, "grad_norm": 1.6793580055236816, "learning_rate": 9.761736842105264e-05, "loss": 0.4374, "step": 29068 }, { "epoch": 1.6277858662784186, "grad_norm": 1.2854893207550049, "learning_rate": 9.76171052631579e-05, "loss": 0.3926, "step": 29069 }, { "epoch": 1.6278418635905476, "grad_norm": 1.1788318157196045, "learning_rate": 9.761684210526316e-05, "loss": 0.4633, "step": 29070 }, { "epoch": 1.6278978609026766, "grad_norm": 1.4562842845916748, "learning_rate": 9.761657894736843e-05, "loss": 0.3869, "step": 29071 }, { "epoch": 1.6279538582148056, "grad_norm": 1.302661657333374, "learning_rate": 9.76163157894737e-05, "loss": 0.4805, "step": 29072 }, { "epoch": 1.6280098555269347, "grad_norm": 1.3763971328735352, "learning_rate": 9.761605263157894e-05, "loss": 0.3982, "step": 29073 }, { "epoch": 1.6280658528390637, "grad_norm": 1.9479624032974243, "learning_rate": 9.761578947368421e-05, "loss": 0.4567, "step": 29074 }, { "epoch": 1.6281218501511927, "grad_norm": 1.659568190574646, "learning_rate": 9.761552631578947e-05, "loss": 0.71, "step": 29075 }, { "epoch": 1.6281778474633217, "grad_norm": 1.4795762300491333, "learning_rate": 9.761526315789475e-05, "loss": 0.4887, "step": 29076 }, { "epoch": 1.6282338447754507, "grad_norm": 1.327354073524475, "learning_rate": 9.7615e-05, "loss": 0.5031, "step": 29077 }, { "epoch": 1.6282898420875798, "grad_norm": 1.9933340549468994, "learning_rate": 9.761473684210527e-05, "loss": 0.3958, "step": 29078 }, { "epoch": 1.6283458393997088, "grad_norm": 1.9714298248291016, "learning_rate": 9.761447368421053e-05, "loss": 0.5657, "step": 29079 }, { "epoch": 1.6284018367118378, "grad_norm": 1.4459640979766846, "learning_rate": 9.76142105263158e-05, "loss": 0.4235, "step": 29080 }, { "epoch": 1.6284578340239668, "grad_norm": 1.193831443786621, "learning_rate": 9.761394736842106e-05, "loss": 0.441, "step": 29081 }, { "epoch": 1.6285138313360958, "grad_norm": 1.1125028133392334, "learning_rate": 9.761368421052632e-05, "loss": 0.4223, "step": 29082 }, { "epoch": 1.6285698286482249, "grad_norm": 1.3093454837799072, "learning_rate": 9.761342105263158e-05, "loss": 0.4256, "step": 29083 }, { "epoch": 1.6286258259603539, "grad_norm": 2.2786388397216797, "learning_rate": 9.761315789473685e-05, "loss": 0.54, "step": 29084 }, { "epoch": 1.628681823272483, "grad_norm": 1.323691964149475, "learning_rate": 9.761289473684211e-05, "loss": 0.5686, "step": 29085 }, { "epoch": 1.628737820584612, "grad_norm": 1.310255765914917, "learning_rate": 9.761263157894737e-05, "loss": 0.5394, "step": 29086 }, { "epoch": 1.628793817896741, "grad_norm": 1.7696998119354248, "learning_rate": 9.761236842105263e-05, "loss": 0.5722, "step": 29087 }, { "epoch": 1.62884981520887, "grad_norm": 1.2077407836914062, "learning_rate": 9.76121052631579e-05, "loss": 0.3398, "step": 29088 }, { "epoch": 1.628905812520999, "grad_norm": 1.19025719165802, "learning_rate": 9.761184210526316e-05, "loss": 0.4873, "step": 29089 }, { "epoch": 1.628961809833128, "grad_norm": 1.1577588319778442, "learning_rate": 9.761157894736842e-05, "loss": 0.3893, "step": 29090 }, { "epoch": 1.629017807145257, "grad_norm": 1.648565649986267, "learning_rate": 9.761131578947368e-05, "loss": 0.5877, "step": 29091 }, { "epoch": 1.629073804457386, "grad_norm": 1.1934082508087158, "learning_rate": 9.761105263157894e-05, "loss": 0.4315, "step": 29092 }, { "epoch": 1.629129801769515, "grad_norm": 1.4596374034881592, "learning_rate": 9.761078947368422e-05, "loss": 0.5484, "step": 29093 }, { "epoch": 1.629185799081644, "grad_norm": 1.3889191150665283, "learning_rate": 9.761052631578948e-05, "loss": 0.4731, "step": 29094 }, { "epoch": 1.629241796393773, "grad_norm": 1.4306739568710327, "learning_rate": 9.761026315789475e-05, "loss": 0.6296, "step": 29095 }, { "epoch": 1.629297793705902, "grad_norm": 1.3662248849868774, "learning_rate": 9.761e-05, "loss": 0.4153, "step": 29096 }, { "epoch": 1.6293537910180311, "grad_norm": 1.737163782119751, "learning_rate": 9.760973684210527e-05, "loss": 0.5097, "step": 29097 }, { "epoch": 1.6294097883301601, "grad_norm": 1.5683296918869019, "learning_rate": 9.760947368421053e-05, "loss": 0.4128, "step": 29098 }, { "epoch": 1.6294657856422892, "grad_norm": 1.4416677951812744, "learning_rate": 9.76092105263158e-05, "loss": 0.4875, "step": 29099 }, { "epoch": 1.6295217829544182, "grad_norm": 1.2504794597625732, "learning_rate": 9.760894736842106e-05, "loss": 0.3317, "step": 29100 }, { "epoch": 1.6295777802665472, "grad_norm": 1.3671859502792358, "learning_rate": 9.760868421052632e-05, "loss": 0.513, "step": 29101 }, { "epoch": 1.6296337775786762, "grad_norm": 1.8179361820220947, "learning_rate": 9.760842105263158e-05, "loss": 0.5065, "step": 29102 }, { "epoch": 1.6296897748908052, "grad_norm": 1.1108883619308472, "learning_rate": 9.760815789473685e-05, "loss": 0.3864, "step": 29103 }, { "epoch": 1.6297457722029343, "grad_norm": 15.233407020568848, "learning_rate": 9.760789473684211e-05, "loss": 0.3712, "step": 29104 }, { "epoch": 1.6298017695150633, "grad_norm": 1.2472684383392334, "learning_rate": 9.760763157894737e-05, "loss": 0.3965, "step": 29105 }, { "epoch": 1.6298577668271923, "grad_norm": 2.189598560333252, "learning_rate": 9.760736842105263e-05, "loss": 0.559, "step": 29106 }, { "epoch": 1.6299137641393213, "grad_norm": 1.4176472425460815, "learning_rate": 9.760710526315789e-05, "loss": 0.443, "step": 29107 }, { "epoch": 1.6299697614514503, "grad_norm": 1.387403964996338, "learning_rate": 9.760684210526317e-05, "loss": 0.5454, "step": 29108 }, { "epoch": 1.6300257587635794, "grad_norm": 1.6350126266479492, "learning_rate": 9.760657894736843e-05, "loss": 0.4206, "step": 29109 }, { "epoch": 1.6300817560757084, "grad_norm": 1.2087743282318115, "learning_rate": 9.760631578947369e-05, "loss": 0.4521, "step": 29110 }, { "epoch": 1.6301377533878374, "grad_norm": 1.3117601871490479, "learning_rate": 9.760605263157895e-05, "loss": 0.5464, "step": 29111 }, { "epoch": 1.6301937506999664, "grad_norm": 1.255249261856079, "learning_rate": 9.760578947368422e-05, "loss": 0.4812, "step": 29112 }, { "epoch": 1.6302497480120954, "grad_norm": 1.740799903869629, "learning_rate": 9.760552631578948e-05, "loss": 0.4692, "step": 29113 }, { "epoch": 1.6303057453242245, "grad_norm": 1.5625931024551392, "learning_rate": 9.760526315789474e-05, "loss": 0.4163, "step": 29114 }, { "epoch": 1.6303617426363535, "grad_norm": 1.2539629936218262, "learning_rate": 9.7605e-05, "loss": 0.4085, "step": 29115 }, { "epoch": 1.6304177399484825, "grad_norm": 1.3470768928527832, "learning_rate": 9.760473684210527e-05, "loss": 0.4537, "step": 29116 }, { "epoch": 1.6304737372606115, "grad_norm": 1.1457774639129639, "learning_rate": 9.760447368421053e-05, "loss": 0.451, "step": 29117 }, { "epoch": 1.6305297345727405, "grad_norm": 1.6563763618469238, "learning_rate": 9.76042105263158e-05, "loss": 0.6096, "step": 29118 }, { "epoch": 1.6305857318848695, "grad_norm": 1.1781911849975586, "learning_rate": 9.760394736842105e-05, "loss": 0.4572, "step": 29119 }, { "epoch": 1.6306417291969986, "grad_norm": 1.3582149744033813, "learning_rate": 9.760368421052632e-05, "loss": 0.5164, "step": 29120 }, { "epoch": 1.6306977265091276, "grad_norm": 1.073562502861023, "learning_rate": 9.760342105263158e-05, "loss": 0.3497, "step": 29121 }, { "epoch": 1.6307537238212566, "grad_norm": 1.3410911560058594, "learning_rate": 9.760315789473686e-05, "loss": 0.488, "step": 29122 }, { "epoch": 1.6308097211333856, "grad_norm": 1.3580961227416992, "learning_rate": 9.760289473684212e-05, "loss": 0.5321, "step": 29123 }, { "epoch": 1.6308657184455146, "grad_norm": 1.290549635887146, "learning_rate": 9.760263157894736e-05, "loss": 0.5001, "step": 29124 }, { "epoch": 1.6309217157576437, "grad_norm": 1.5435512065887451, "learning_rate": 9.760236842105264e-05, "loss": 0.6849, "step": 29125 }, { "epoch": 1.6309777130697727, "grad_norm": 1.1113272905349731, "learning_rate": 9.76021052631579e-05, "loss": 0.3134, "step": 29126 }, { "epoch": 1.6310337103819017, "grad_norm": 1.1967477798461914, "learning_rate": 9.760184210526317e-05, "loss": 0.3766, "step": 29127 }, { "epoch": 1.6310897076940307, "grad_norm": 1.4602841138839722, "learning_rate": 9.760157894736842e-05, "loss": 0.5771, "step": 29128 }, { "epoch": 1.6311457050061597, "grad_norm": 1.3339039087295532, "learning_rate": 9.760131578947369e-05, "loss": 0.4594, "step": 29129 }, { "epoch": 1.6312017023182888, "grad_norm": 1.2210233211517334, "learning_rate": 9.760105263157895e-05, "loss": 0.4366, "step": 29130 }, { "epoch": 1.6312576996304178, "grad_norm": 1.192659616470337, "learning_rate": 9.760078947368422e-05, "loss": 0.5058, "step": 29131 }, { "epoch": 1.6313136969425468, "grad_norm": 1.5911970138549805, "learning_rate": 9.760052631578948e-05, "loss": 0.4466, "step": 29132 }, { "epoch": 1.6313696942546758, "grad_norm": 1.1969664096832275, "learning_rate": 9.760026315789474e-05, "loss": 0.4689, "step": 29133 }, { "epoch": 1.6314256915668048, "grad_norm": 1.3877958059310913, "learning_rate": 9.76e-05, "loss": 0.3713, "step": 29134 }, { "epoch": 1.6314816888789339, "grad_norm": 1.6576367616653442, "learning_rate": 9.759973684210527e-05, "loss": 0.5546, "step": 29135 }, { "epoch": 1.6315376861910629, "grad_norm": 1.4091688394546509, "learning_rate": 9.759947368421053e-05, "loss": 0.4724, "step": 29136 }, { "epoch": 1.631593683503192, "grad_norm": 1.2410529851913452, "learning_rate": 9.75992105263158e-05, "loss": 0.4976, "step": 29137 }, { "epoch": 1.631649680815321, "grad_norm": 1.290068507194519, "learning_rate": 9.759894736842105e-05, "loss": 0.5175, "step": 29138 }, { "epoch": 1.63170567812745, "grad_norm": 1.3230829238891602, "learning_rate": 9.759868421052633e-05, "loss": 0.357, "step": 29139 }, { "epoch": 1.631761675439579, "grad_norm": 1.3626868724822998, "learning_rate": 9.759842105263159e-05, "loss": 0.6765, "step": 29140 }, { "epoch": 1.631817672751708, "grad_norm": 1.5146164894104004, "learning_rate": 9.759815789473685e-05, "loss": 0.5233, "step": 29141 }, { "epoch": 1.631873670063837, "grad_norm": 1.4057866334915161, "learning_rate": 9.75978947368421e-05, "loss": 0.3804, "step": 29142 }, { "epoch": 1.631929667375966, "grad_norm": 1.1456196308135986, "learning_rate": 9.759763157894737e-05, "loss": 0.4014, "step": 29143 }, { "epoch": 1.631985664688095, "grad_norm": 1.3079019784927368, "learning_rate": 9.759736842105264e-05, "loss": 0.5138, "step": 29144 }, { "epoch": 1.632041662000224, "grad_norm": 1.0659583806991577, "learning_rate": 9.75971052631579e-05, "loss": 0.4265, "step": 29145 }, { "epoch": 1.632097659312353, "grad_norm": 1.377334713935852, "learning_rate": 9.759684210526316e-05, "loss": 0.4598, "step": 29146 }, { "epoch": 1.632153656624482, "grad_norm": 1.3103435039520264, "learning_rate": 9.759657894736842e-05, "loss": 0.3911, "step": 29147 }, { "epoch": 1.632209653936611, "grad_norm": 1.2591474056243896, "learning_rate": 9.759631578947369e-05, "loss": 0.4053, "step": 29148 }, { "epoch": 1.6322656512487401, "grad_norm": 1.6633414030075073, "learning_rate": 9.759605263157895e-05, "loss": 0.4674, "step": 29149 }, { "epoch": 1.6323216485608691, "grad_norm": 1.1004396677017212, "learning_rate": 9.759578947368422e-05, "loss": 0.3968, "step": 29150 }, { "epoch": 1.6323776458729982, "grad_norm": 1.4744101762771606, "learning_rate": 9.759552631578947e-05, "loss": 0.4244, "step": 29151 }, { "epoch": 1.6324336431851272, "grad_norm": 1.2575689554214478, "learning_rate": 9.759526315789474e-05, "loss": 0.4432, "step": 29152 }, { "epoch": 1.6324896404972562, "grad_norm": 1.2009389400482178, "learning_rate": 9.7595e-05, "loss": 0.4098, "step": 29153 }, { "epoch": 1.6325456378093852, "grad_norm": 1.4981553554534912, "learning_rate": 9.759473684210528e-05, "loss": 0.4769, "step": 29154 }, { "epoch": 1.6326016351215142, "grad_norm": 1.6977664232254028, "learning_rate": 9.759447368421054e-05, "loss": 0.4882, "step": 29155 }, { "epoch": 1.6326576324336433, "grad_norm": 0.9824967384338379, "learning_rate": 9.75942105263158e-05, "loss": 0.3928, "step": 29156 }, { "epoch": 1.6327136297457723, "grad_norm": 1.3059180974960327, "learning_rate": 9.759394736842106e-05, "loss": 0.5373, "step": 29157 }, { "epoch": 1.6327696270579013, "grad_norm": 1.1779322624206543, "learning_rate": 9.759368421052632e-05, "loss": 0.4314, "step": 29158 }, { "epoch": 1.6328256243700303, "grad_norm": 1.2194240093231201, "learning_rate": 9.759342105263159e-05, "loss": 0.4762, "step": 29159 }, { "epoch": 1.6328816216821593, "grad_norm": 1.175771713256836, "learning_rate": 9.759315789473685e-05, "loss": 0.4765, "step": 29160 }, { "epoch": 1.6329376189942884, "grad_norm": 1.2643389701843262, "learning_rate": 9.759289473684211e-05, "loss": 0.4175, "step": 29161 }, { "epoch": 1.6329936163064174, "grad_norm": 1.2959729433059692, "learning_rate": 9.759263157894737e-05, "loss": 0.452, "step": 29162 }, { "epoch": 1.6330496136185464, "grad_norm": 1.3463983535766602, "learning_rate": 9.759236842105264e-05, "loss": 0.3897, "step": 29163 }, { "epoch": 1.6331056109306754, "grad_norm": 1.2260736227035522, "learning_rate": 9.75921052631579e-05, "loss": 0.46, "step": 29164 }, { "epoch": 1.6331616082428044, "grad_norm": 1.4508298635482788, "learning_rate": 9.759184210526316e-05, "loss": 0.5266, "step": 29165 }, { "epoch": 1.6332176055549334, "grad_norm": 1.2655651569366455, "learning_rate": 9.759157894736842e-05, "loss": 0.4594, "step": 29166 }, { "epoch": 1.6332736028670625, "grad_norm": 1.4045745134353638, "learning_rate": 9.75913157894737e-05, "loss": 0.7315, "step": 29167 }, { "epoch": 1.6333296001791915, "grad_norm": 1.1083928346633911, "learning_rate": 9.759105263157895e-05, "loss": 0.4991, "step": 29168 }, { "epoch": 1.6333855974913205, "grad_norm": 1.4445360898971558, "learning_rate": 9.759078947368421e-05, "loss": 0.5848, "step": 29169 }, { "epoch": 1.6334415948034495, "grad_norm": 1.4294407367706299, "learning_rate": 9.759052631578947e-05, "loss": 0.5486, "step": 29170 }, { "epoch": 1.6334975921155785, "grad_norm": 1.4833860397338867, "learning_rate": 9.759026315789475e-05, "loss": 0.5012, "step": 29171 }, { "epoch": 1.6335535894277076, "grad_norm": 1.4099397659301758, "learning_rate": 9.759e-05, "loss": 0.3433, "step": 29172 }, { "epoch": 1.6336095867398366, "grad_norm": 5.987528324127197, "learning_rate": 9.758973684210528e-05, "loss": 0.5535, "step": 29173 }, { "epoch": 1.6336655840519656, "grad_norm": 1.8035095930099487, "learning_rate": 9.758947368421053e-05, "loss": 0.4982, "step": 29174 }, { "epoch": 1.6337215813640946, "grad_norm": 1.0976383686065674, "learning_rate": 9.75892105263158e-05, "loss": 0.3637, "step": 29175 }, { "epoch": 1.6337775786762236, "grad_norm": 1.1581015586853027, "learning_rate": 9.758894736842106e-05, "loss": 0.4259, "step": 29176 }, { "epoch": 1.6338335759883527, "grad_norm": 1.3488553762435913, "learning_rate": 9.758868421052632e-05, "loss": 0.409, "step": 29177 }, { "epoch": 1.6338895733004817, "grad_norm": 1.1652346849441528, "learning_rate": 9.758842105263158e-05, "loss": 0.478, "step": 29178 }, { "epoch": 1.6339455706126107, "grad_norm": 1.2191673517227173, "learning_rate": 9.758815789473684e-05, "loss": 0.3877, "step": 29179 }, { "epoch": 1.6340015679247397, "grad_norm": 1.2604730129241943, "learning_rate": 9.758789473684211e-05, "loss": 0.4978, "step": 29180 }, { "epoch": 1.6340575652368687, "grad_norm": 1.6360100507736206, "learning_rate": 9.758763157894737e-05, "loss": 0.3549, "step": 29181 }, { "epoch": 1.6341135625489978, "grad_norm": 1.3707400560379028, "learning_rate": 9.758736842105264e-05, "loss": 0.4374, "step": 29182 }, { "epoch": 1.6341695598611268, "grad_norm": 1.3481628894805908, "learning_rate": 9.758710526315789e-05, "loss": 0.546, "step": 29183 }, { "epoch": 1.6342255571732558, "grad_norm": 1.3352290391921997, "learning_rate": 9.758684210526316e-05, "loss": 0.5488, "step": 29184 }, { "epoch": 1.6342815544853848, "grad_norm": 1.357681393623352, "learning_rate": 9.758657894736842e-05, "loss": 0.4132, "step": 29185 }, { "epoch": 1.6343375517975138, "grad_norm": 1.2435659170150757, "learning_rate": 9.75863157894737e-05, "loss": 0.3325, "step": 29186 }, { "epoch": 1.6343935491096429, "grad_norm": 1.1963164806365967, "learning_rate": 9.758605263157896e-05, "loss": 0.448, "step": 29187 }, { "epoch": 1.6344495464217719, "grad_norm": 1.3451859951019287, "learning_rate": 9.758578947368422e-05, "loss": 0.5199, "step": 29188 }, { "epoch": 1.634505543733901, "grad_norm": 1.3353017568588257, "learning_rate": 9.758552631578948e-05, "loss": 0.6076, "step": 29189 }, { "epoch": 1.63456154104603, "grad_norm": 1.2593022584915161, "learning_rate": 9.758526315789475e-05, "loss": 0.53, "step": 29190 }, { "epoch": 1.634617538358159, "grad_norm": 1.5316128730773926, "learning_rate": 9.758500000000001e-05, "loss": 0.4194, "step": 29191 }, { "epoch": 1.634673535670288, "grad_norm": 1.4630426168441772, "learning_rate": 9.758473684210527e-05, "loss": 0.472, "step": 29192 }, { "epoch": 1.634729532982417, "grad_norm": 1.186896800994873, "learning_rate": 9.758447368421053e-05, "loss": 0.4558, "step": 29193 }, { "epoch": 1.634785530294546, "grad_norm": 1.35330331325531, "learning_rate": 9.758421052631579e-05, "loss": 0.4215, "step": 29194 }, { "epoch": 1.634841527606675, "grad_norm": 1.1766036748886108, "learning_rate": 9.758394736842106e-05, "loss": 0.3758, "step": 29195 }, { "epoch": 1.634897524918804, "grad_norm": 2.030928373336792, "learning_rate": 9.758368421052632e-05, "loss": 0.485, "step": 29196 }, { "epoch": 1.634953522230933, "grad_norm": 1.5365872383117676, "learning_rate": 9.758342105263158e-05, "loss": 0.7332, "step": 29197 }, { "epoch": 1.635009519543062, "grad_norm": 1.1595895290374756, "learning_rate": 9.758315789473684e-05, "loss": 0.3959, "step": 29198 }, { "epoch": 1.635065516855191, "grad_norm": 1.3030779361724854, "learning_rate": 9.758289473684211e-05, "loss": 0.4442, "step": 29199 }, { "epoch": 1.63512151416732, "grad_norm": 1.7303502559661865, "learning_rate": 9.758263157894737e-05, "loss": 0.411, "step": 29200 }, { "epoch": 1.6351775114794491, "grad_norm": 1.6569221019744873, "learning_rate": 9.758236842105263e-05, "loss": 0.6445, "step": 29201 }, { "epoch": 1.6352335087915781, "grad_norm": 1.3060539960861206, "learning_rate": 9.758210526315789e-05, "loss": 0.4981, "step": 29202 }, { "epoch": 1.6352895061037072, "grad_norm": 1.2461727857589722, "learning_rate": 9.758184210526317e-05, "loss": 0.5195, "step": 29203 }, { "epoch": 1.6353455034158362, "grad_norm": 1.407566785812378, "learning_rate": 9.758157894736843e-05, "loss": 0.4992, "step": 29204 }, { "epoch": 1.6354015007279652, "grad_norm": 1.8369650840759277, "learning_rate": 9.75813157894737e-05, "loss": 0.4087, "step": 29205 }, { "epoch": 1.6354574980400942, "grad_norm": 1.3014521598815918, "learning_rate": 9.758105263157895e-05, "loss": 0.4605, "step": 29206 }, { "epoch": 1.6355134953522232, "grad_norm": 1.6740087270736694, "learning_rate": 9.758078947368422e-05, "loss": 0.4178, "step": 29207 }, { "epoch": 1.6355694926643523, "grad_norm": 1.2325019836425781, "learning_rate": 9.758052631578948e-05, "loss": 0.405, "step": 29208 }, { "epoch": 1.6356254899764813, "grad_norm": 1.3273875713348389, "learning_rate": 9.758026315789475e-05, "loss": 0.3583, "step": 29209 }, { "epoch": 1.6356814872886103, "grad_norm": 1.1015675067901611, "learning_rate": 9.758000000000001e-05, "loss": 0.3004, "step": 29210 }, { "epoch": 1.6357374846007393, "grad_norm": 1.7572027444839478, "learning_rate": 9.757973684210526e-05, "loss": 0.5247, "step": 29211 }, { "epoch": 1.6357934819128683, "grad_norm": 1.3972452878952026, "learning_rate": 9.757947368421053e-05, "loss": 0.4335, "step": 29212 }, { "epoch": 1.6358494792249973, "grad_norm": 1.4027369022369385, "learning_rate": 9.757921052631579e-05, "loss": 0.4834, "step": 29213 }, { "epoch": 1.6359054765371264, "grad_norm": 1.4487584829330444, "learning_rate": 9.757894736842106e-05, "loss": 0.4702, "step": 29214 }, { "epoch": 1.6359614738492554, "grad_norm": 1.4246277809143066, "learning_rate": 9.757868421052632e-05, "loss": 0.4236, "step": 29215 }, { "epoch": 1.6360174711613844, "grad_norm": 1.6195552349090576, "learning_rate": 9.757842105263158e-05, "loss": 0.407, "step": 29216 }, { "epoch": 1.6360734684735134, "grad_norm": 1.6442829370498657, "learning_rate": 9.757815789473684e-05, "loss": 0.4091, "step": 29217 }, { "epoch": 1.6361294657856424, "grad_norm": 1.4494110345840454, "learning_rate": 9.757789473684212e-05, "loss": 0.4088, "step": 29218 }, { "epoch": 1.6361854630977715, "grad_norm": 1.286902666091919, "learning_rate": 9.757763157894738e-05, "loss": 0.4466, "step": 29219 }, { "epoch": 1.6362414604099005, "grad_norm": 1.5643664598464966, "learning_rate": 9.757736842105264e-05, "loss": 0.6074, "step": 29220 }, { "epoch": 1.6362974577220295, "grad_norm": 1.476778507232666, "learning_rate": 9.75771052631579e-05, "loss": 0.4695, "step": 29221 }, { "epoch": 1.6363534550341585, "grad_norm": 1.1702582836151123, "learning_rate": 9.757684210526317e-05, "loss": 0.4855, "step": 29222 }, { "epoch": 1.6364094523462875, "grad_norm": 1.292647123336792, "learning_rate": 9.757657894736843e-05, "loss": 0.4206, "step": 29223 }, { "epoch": 1.6364654496584166, "grad_norm": 1.072305679321289, "learning_rate": 9.757631578947369e-05, "loss": 0.4266, "step": 29224 }, { "epoch": 1.6365214469705456, "grad_norm": 2.024962902069092, "learning_rate": 9.757605263157895e-05, "loss": 0.5598, "step": 29225 }, { "epoch": 1.6365774442826744, "grad_norm": 1.2907689809799194, "learning_rate": 9.757578947368422e-05, "loss": 0.4269, "step": 29226 }, { "epoch": 1.6366334415948034, "grad_norm": 1.6650328636169434, "learning_rate": 9.757552631578948e-05, "loss": 0.635, "step": 29227 }, { "epoch": 1.6366894389069324, "grad_norm": 1.3826417922973633, "learning_rate": 9.757526315789474e-05, "loss": 0.4345, "step": 29228 }, { "epoch": 1.6367454362190614, "grad_norm": 2.3610739707946777, "learning_rate": 9.7575e-05, "loss": 0.5797, "step": 29229 }, { "epoch": 1.6368014335311905, "grad_norm": 1.8455467224121094, "learning_rate": 9.757473684210526e-05, "loss": 0.7451, "step": 29230 }, { "epoch": 1.6368574308433195, "grad_norm": 1.270052433013916, "learning_rate": 9.757447368421053e-05, "loss": 0.4652, "step": 29231 }, { "epoch": 1.6369134281554485, "grad_norm": 1.677120566368103, "learning_rate": 9.757421052631579e-05, "loss": 0.4804, "step": 29232 }, { "epoch": 1.6369694254675775, "grad_norm": 1.7527693510055542, "learning_rate": 9.757394736842105e-05, "loss": 0.3979, "step": 29233 }, { "epoch": 1.6370254227797065, "grad_norm": 1.2408288717269897, "learning_rate": 9.757368421052631e-05, "loss": 0.4502, "step": 29234 }, { "epoch": 1.6370814200918355, "grad_norm": 1.2545652389526367, "learning_rate": 9.757342105263159e-05, "loss": 0.4549, "step": 29235 }, { "epoch": 1.6371374174039646, "grad_norm": 1.446791648864746, "learning_rate": 9.757315789473685e-05, "loss": 0.5472, "step": 29236 }, { "epoch": 1.6371934147160936, "grad_norm": 1.5149366855621338, "learning_rate": 9.757289473684212e-05, "loss": 0.4647, "step": 29237 }, { "epoch": 1.6372494120282226, "grad_norm": 1.4112406969070435, "learning_rate": 9.757263157894736e-05, "loss": 0.5926, "step": 29238 }, { "epoch": 1.6373054093403516, "grad_norm": 1.5062649250030518, "learning_rate": 9.757236842105264e-05, "loss": 0.3953, "step": 29239 }, { "epoch": 1.6373614066524806, "grad_norm": 1.1949267387390137, "learning_rate": 9.75721052631579e-05, "loss": 0.3226, "step": 29240 }, { "epoch": 1.6374174039646097, "grad_norm": 1.3616310358047485, "learning_rate": 9.757184210526317e-05, "loss": 0.5263, "step": 29241 }, { "epoch": 1.6374734012767387, "grad_norm": 1.2105578184127808, "learning_rate": 9.757157894736843e-05, "loss": 0.4264, "step": 29242 }, { "epoch": 1.6375293985888677, "grad_norm": 1.3974268436431885, "learning_rate": 9.757131578947369e-05, "loss": 0.5389, "step": 29243 }, { "epoch": 1.6375853959009967, "grad_norm": 1.3129873275756836, "learning_rate": 9.757105263157895e-05, "loss": 0.4387, "step": 29244 }, { "epoch": 1.6376413932131257, "grad_norm": 1.1442760229110718, "learning_rate": 9.757078947368421e-05, "loss": 0.4952, "step": 29245 }, { "epoch": 1.6376973905252548, "grad_norm": 1.4596924781799316, "learning_rate": 9.757052631578948e-05, "loss": 0.4531, "step": 29246 }, { "epoch": 1.6377533878373838, "grad_norm": 1.195422649383545, "learning_rate": 9.757026315789474e-05, "loss": 0.4395, "step": 29247 }, { "epoch": 1.6378093851495128, "grad_norm": 1.3700623512268066, "learning_rate": 9.757e-05, "loss": 0.3675, "step": 29248 }, { "epoch": 1.6378653824616418, "grad_norm": 1.3969826698303223, "learning_rate": 9.756973684210526e-05, "loss": 0.4904, "step": 29249 }, { "epoch": 1.6379213797737708, "grad_norm": 1.2775415182113647, "learning_rate": 9.756947368421054e-05, "loss": 0.528, "step": 29250 }, { "epoch": 1.6379773770858999, "grad_norm": 1.1494224071502686, "learning_rate": 9.75692105263158e-05, "loss": 0.3905, "step": 29251 }, { "epoch": 1.6380333743980289, "grad_norm": 1.369499683380127, "learning_rate": 9.756894736842106e-05, "loss": 0.5188, "step": 29252 }, { "epoch": 1.638089371710158, "grad_norm": 1.2361911535263062, "learning_rate": 9.756868421052631e-05, "loss": 0.6881, "step": 29253 }, { "epoch": 1.638145369022287, "grad_norm": 1.3087455034255981, "learning_rate": 9.756842105263159e-05, "loss": 0.5997, "step": 29254 }, { "epoch": 1.638201366334416, "grad_norm": 1.2577117681503296, "learning_rate": 9.756815789473685e-05, "loss": 0.429, "step": 29255 }, { "epoch": 1.638257363646545, "grad_norm": 1.2478843927383423, "learning_rate": 9.756789473684211e-05, "loss": 0.5154, "step": 29256 }, { "epoch": 1.638313360958674, "grad_norm": 1.2429277896881104, "learning_rate": 9.756763157894737e-05, "loss": 0.4897, "step": 29257 }, { "epoch": 1.638369358270803, "grad_norm": 1.3642586469650269, "learning_rate": 9.756736842105264e-05, "loss": 0.4276, "step": 29258 }, { "epoch": 1.638425355582932, "grad_norm": 1.3517820835113525, "learning_rate": 9.75671052631579e-05, "loss": 0.4568, "step": 29259 }, { "epoch": 1.638481352895061, "grad_norm": 1.2205671072006226, "learning_rate": 9.756684210526317e-05, "loss": 0.4527, "step": 29260 }, { "epoch": 1.63853735020719, "grad_norm": 1.2526224851608276, "learning_rate": 9.756657894736842e-05, "loss": 0.5031, "step": 29261 }, { "epoch": 1.638593347519319, "grad_norm": 1.354042649269104, "learning_rate": 9.756631578947368e-05, "loss": 0.3161, "step": 29262 }, { "epoch": 1.638649344831448, "grad_norm": 1.4771968126296997, "learning_rate": 9.756605263157895e-05, "loss": 0.5773, "step": 29263 }, { "epoch": 1.638705342143577, "grad_norm": 1.1587164402008057, "learning_rate": 9.756578947368421e-05, "loss": 0.3288, "step": 29264 }, { "epoch": 1.6387613394557061, "grad_norm": 1.5697356462478638, "learning_rate": 9.756552631578949e-05, "loss": 0.5518, "step": 29265 }, { "epoch": 1.6388173367678351, "grad_norm": 1.3110226392745972, "learning_rate": 9.756526315789473e-05, "loss": 0.4752, "step": 29266 }, { "epoch": 1.6388733340799642, "grad_norm": 1.389346718788147, "learning_rate": 9.7565e-05, "loss": 0.5271, "step": 29267 }, { "epoch": 1.6389293313920932, "grad_norm": 1.3497350215911865, "learning_rate": 9.756473684210526e-05, "loss": 0.341, "step": 29268 }, { "epoch": 1.6389853287042222, "grad_norm": 1.2732964754104614, "learning_rate": 9.756447368421054e-05, "loss": 0.4535, "step": 29269 }, { "epoch": 1.6390413260163512, "grad_norm": 1.4268783330917358, "learning_rate": 9.75642105263158e-05, "loss": 0.5771, "step": 29270 }, { "epoch": 1.6390973233284802, "grad_norm": 1.372564435005188, "learning_rate": 9.756394736842106e-05, "loss": 0.4146, "step": 29271 }, { "epoch": 1.6391533206406093, "grad_norm": 1.3734673261642456, "learning_rate": 9.756368421052632e-05, "loss": 0.4694, "step": 29272 }, { "epoch": 1.6392093179527383, "grad_norm": 1.1530158519744873, "learning_rate": 9.756342105263159e-05, "loss": 0.4468, "step": 29273 }, { "epoch": 1.6392653152648673, "grad_norm": 9.442240715026855, "learning_rate": 9.756315789473685e-05, "loss": 0.4221, "step": 29274 }, { "epoch": 1.6393213125769963, "grad_norm": 1.2248588800430298, "learning_rate": 9.756289473684211e-05, "loss": 0.4177, "step": 29275 }, { "epoch": 1.6393773098891253, "grad_norm": 1.0724364519119263, "learning_rate": 9.756263157894737e-05, "loss": 0.2904, "step": 29276 }, { "epoch": 1.6394333072012544, "grad_norm": 1.2486066818237305, "learning_rate": 9.756236842105264e-05, "loss": 0.4967, "step": 29277 }, { "epoch": 1.6394893045133834, "grad_norm": 1.1921827793121338, "learning_rate": 9.75621052631579e-05, "loss": 0.4251, "step": 29278 }, { "epoch": 1.6395453018255124, "grad_norm": 1.30080246925354, "learning_rate": 9.756184210526316e-05, "loss": 0.4643, "step": 29279 }, { "epoch": 1.6396012991376414, "grad_norm": 1.4581222534179688, "learning_rate": 9.756157894736842e-05, "loss": 0.5637, "step": 29280 }, { "epoch": 1.6396572964497704, "grad_norm": 1.3218766450881958, "learning_rate": 9.756131578947368e-05, "loss": 0.416, "step": 29281 }, { "epoch": 1.6397132937618994, "grad_norm": 1.4028072357177734, "learning_rate": 9.756105263157896e-05, "loss": 0.4125, "step": 29282 }, { "epoch": 1.6397692910740285, "grad_norm": 1.1699087619781494, "learning_rate": 9.756078947368422e-05, "loss": 0.3245, "step": 29283 }, { "epoch": 1.6398252883861575, "grad_norm": 1.3882259130477905, "learning_rate": 9.756052631578947e-05, "loss": 0.456, "step": 29284 }, { "epoch": 1.6398812856982865, "grad_norm": 1.126474142074585, "learning_rate": 9.756026315789473e-05, "loss": 0.3478, "step": 29285 }, { "epoch": 1.6399372830104155, "grad_norm": 1.2113491296768188, "learning_rate": 9.756000000000001e-05, "loss": 0.4296, "step": 29286 }, { "epoch": 1.6399932803225445, "grad_norm": 1.5506101846694946, "learning_rate": 9.755973684210527e-05, "loss": 0.5387, "step": 29287 }, { "epoch": 1.6400492776346736, "grad_norm": 1.283271074295044, "learning_rate": 9.755947368421053e-05, "loss": 0.5906, "step": 29288 }, { "epoch": 1.6401052749468026, "grad_norm": 1.549760103225708, "learning_rate": 9.755921052631579e-05, "loss": 0.7714, "step": 29289 }, { "epoch": 1.6401612722589316, "grad_norm": 1.3226968050003052, "learning_rate": 9.755894736842106e-05, "loss": 0.3748, "step": 29290 }, { "epoch": 1.6402172695710606, "grad_norm": 1.3081238269805908, "learning_rate": 9.755868421052632e-05, "loss": 0.409, "step": 29291 }, { "epoch": 1.6402732668831896, "grad_norm": 1.3023624420166016, "learning_rate": 9.75584210526316e-05, "loss": 0.5124, "step": 29292 }, { "epoch": 1.6403292641953187, "grad_norm": 1.170420527458191, "learning_rate": 9.755815789473684e-05, "loss": 0.4215, "step": 29293 }, { "epoch": 1.6403852615074477, "grad_norm": 1.242762804031372, "learning_rate": 9.755789473684211e-05, "loss": 0.4159, "step": 29294 }, { "epoch": 1.6404412588195767, "grad_norm": 1.4003139734268188, "learning_rate": 9.755763157894737e-05, "loss": 0.4027, "step": 29295 }, { "epoch": 1.6404972561317057, "grad_norm": 1.252991795539856, "learning_rate": 9.755736842105265e-05, "loss": 0.4914, "step": 29296 }, { "epoch": 1.6405532534438347, "grad_norm": 1.2703192234039307, "learning_rate": 9.75571052631579e-05, "loss": 0.4134, "step": 29297 }, { "epoch": 1.6406092507559638, "grad_norm": 1.4573822021484375, "learning_rate": 9.755684210526315e-05, "loss": 0.4338, "step": 29298 }, { "epoch": 1.6406652480680928, "grad_norm": 1.3850125074386597, "learning_rate": 9.755657894736842e-05, "loss": 0.4316, "step": 29299 }, { "epoch": 1.6407212453802218, "grad_norm": 1.1426774263381958, "learning_rate": 9.755631578947368e-05, "loss": 0.4613, "step": 29300 }, { "epoch": 1.6407772426923508, "grad_norm": 1.572794795036316, "learning_rate": 9.755605263157896e-05, "loss": 0.5873, "step": 29301 }, { "epoch": 1.6408332400044798, "grad_norm": 1.3179900646209717, "learning_rate": 9.755578947368422e-05, "loss": 0.4128, "step": 29302 }, { "epoch": 1.6408892373166089, "grad_norm": 1.2013174295425415, "learning_rate": 9.755552631578948e-05, "loss": 0.4532, "step": 29303 }, { "epoch": 1.6409452346287379, "grad_norm": 1.488812804222107, "learning_rate": 9.755526315789474e-05, "loss": 0.502, "step": 29304 }, { "epoch": 1.641001231940867, "grad_norm": 1.9621952772140503, "learning_rate": 9.755500000000001e-05, "loss": 0.434, "step": 29305 }, { "epoch": 1.641057229252996, "grad_norm": 1.46525239944458, "learning_rate": 9.755473684210527e-05, "loss": 0.5215, "step": 29306 }, { "epoch": 1.641113226565125, "grad_norm": 1.2031933069229126, "learning_rate": 9.755447368421053e-05, "loss": 0.373, "step": 29307 }, { "epoch": 1.641169223877254, "grad_norm": 1.5745117664337158, "learning_rate": 9.755421052631579e-05, "loss": 0.6136, "step": 29308 }, { "epoch": 1.6412252211893827, "grad_norm": 1.5272430181503296, "learning_rate": 9.755394736842106e-05, "loss": 0.5901, "step": 29309 }, { "epoch": 1.6412812185015118, "grad_norm": 1.1108843088150024, "learning_rate": 9.755368421052632e-05, "loss": 0.3861, "step": 29310 }, { "epoch": 1.6413372158136408, "grad_norm": 1.524139404296875, "learning_rate": 9.755342105263158e-05, "loss": 0.5559, "step": 29311 }, { "epoch": 1.6413932131257698, "grad_norm": 1.4420135021209717, "learning_rate": 9.755315789473684e-05, "loss": 0.4656, "step": 29312 }, { "epoch": 1.6414492104378988, "grad_norm": 1.3306641578674316, "learning_rate": 9.755289473684212e-05, "loss": 0.4025, "step": 29313 }, { "epoch": 1.6415052077500278, "grad_norm": 1.4485255479812622, "learning_rate": 9.755263157894738e-05, "loss": 0.4637, "step": 29314 }, { "epoch": 1.6415612050621569, "grad_norm": 1.257196307182312, "learning_rate": 9.755236842105263e-05, "loss": 0.4802, "step": 29315 }, { "epoch": 1.6416172023742859, "grad_norm": 1.5029897689819336, "learning_rate": 9.75521052631579e-05, "loss": 0.4612, "step": 29316 }, { "epoch": 1.641673199686415, "grad_norm": 1.9011369943618774, "learning_rate": 9.755184210526315e-05, "loss": 0.6143, "step": 29317 }, { "epoch": 1.641729196998544, "grad_norm": 1.32305908203125, "learning_rate": 9.755157894736843e-05, "loss": 0.4435, "step": 29318 }, { "epoch": 1.641785194310673, "grad_norm": 0.9825717806816101, "learning_rate": 9.755131578947369e-05, "loss": 0.3063, "step": 29319 }, { "epoch": 1.641841191622802, "grad_norm": 1.2988083362579346, "learning_rate": 9.755105263157896e-05, "loss": 0.3966, "step": 29320 }, { "epoch": 1.641897188934931, "grad_norm": 1.243069052696228, "learning_rate": 9.75507894736842e-05, "loss": 0.4338, "step": 29321 }, { "epoch": 1.64195318624706, "grad_norm": 1.6195769309997559, "learning_rate": 9.755052631578948e-05, "loss": 0.4953, "step": 29322 }, { "epoch": 1.642009183559189, "grad_norm": 1.1538461446762085, "learning_rate": 9.755026315789474e-05, "loss": 0.4261, "step": 29323 }, { "epoch": 1.642065180871318, "grad_norm": 1.189353346824646, "learning_rate": 9.755000000000001e-05, "loss": 0.3824, "step": 29324 }, { "epoch": 1.642121178183447, "grad_norm": 1.3259837627410889, "learning_rate": 9.754973684210527e-05, "loss": 0.4563, "step": 29325 }, { "epoch": 1.642177175495576, "grad_norm": 1.227063775062561, "learning_rate": 9.754947368421053e-05, "loss": 0.3955, "step": 29326 }, { "epoch": 1.642233172807705, "grad_norm": 1.155016303062439, "learning_rate": 9.754921052631579e-05, "loss": 0.3072, "step": 29327 }, { "epoch": 1.642289170119834, "grad_norm": 1.4582222700119019, "learning_rate": 9.754894736842107e-05, "loss": 0.5228, "step": 29328 }, { "epoch": 1.6423451674319631, "grad_norm": 1.249298095703125, "learning_rate": 9.754868421052633e-05, "loss": 0.3776, "step": 29329 }, { "epoch": 1.6424011647440921, "grad_norm": 1.5514057874679565, "learning_rate": 9.754842105263158e-05, "loss": 0.4551, "step": 29330 }, { "epoch": 1.6424571620562212, "grad_norm": 1.3318390846252441, "learning_rate": 9.754815789473684e-05, "loss": 0.5298, "step": 29331 }, { "epoch": 1.6425131593683502, "grad_norm": 1.2294528484344482, "learning_rate": 9.75478947368421e-05, "loss": 0.4337, "step": 29332 }, { "epoch": 1.6425691566804792, "grad_norm": 1.2888906002044678, "learning_rate": 9.754763157894738e-05, "loss": 0.4862, "step": 29333 }, { "epoch": 1.6426251539926082, "grad_norm": 1.1327520608901978, "learning_rate": 9.754736842105264e-05, "loss": 0.4729, "step": 29334 }, { "epoch": 1.6426811513047372, "grad_norm": 1.1546499729156494, "learning_rate": 9.75471052631579e-05, "loss": 0.4146, "step": 29335 }, { "epoch": 1.6427371486168663, "grad_norm": 1.6673691272735596, "learning_rate": 9.754684210526316e-05, "loss": 0.6231, "step": 29336 }, { "epoch": 1.6427931459289953, "grad_norm": 1.2196277379989624, "learning_rate": 9.754657894736843e-05, "loss": 0.4973, "step": 29337 }, { "epoch": 1.6428491432411243, "grad_norm": 1.2602683305740356, "learning_rate": 9.754631578947369e-05, "loss": 0.5665, "step": 29338 }, { "epoch": 1.6429051405532533, "grad_norm": 1.1988778114318848, "learning_rate": 9.754605263157895e-05, "loss": 0.4264, "step": 29339 }, { "epoch": 1.6429611378653823, "grad_norm": 1.1710362434387207, "learning_rate": 9.754578947368421e-05, "loss": 0.3061, "step": 29340 }, { "epoch": 1.6430171351775114, "grad_norm": 1.3546621799468994, "learning_rate": 9.754552631578948e-05, "loss": 0.6396, "step": 29341 }, { "epoch": 1.6430731324896404, "grad_norm": 1.3007144927978516, "learning_rate": 9.754526315789474e-05, "loss": 0.435, "step": 29342 }, { "epoch": 1.6431291298017694, "grad_norm": 1.340864896774292, "learning_rate": 9.7545e-05, "loss": 0.4531, "step": 29343 }, { "epoch": 1.6431851271138984, "grad_norm": 1.3586357831954956, "learning_rate": 9.754473684210526e-05, "loss": 0.4163, "step": 29344 }, { "epoch": 1.6432411244260274, "grad_norm": 1.363587498664856, "learning_rate": 9.754447368421054e-05, "loss": 0.4542, "step": 29345 }, { "epoch": 1.6432971217381565, "grad_norm": 1.5507924556732178, "learning_rate": 9.75442105263158e-05, "loss": 0.4726, "step": 29346 }, { "epoch": 1.6433531190502855, "grad_norm": 1.2276865243911743, "learning_rate": 9.754394736842107e-05, "loss": 0.5614, "step": 29347 }, { "epoch": 1.6434091163624145, "grad_norm": 1.294532060623169, "learning_rate": 9.754368421052631e-05, "loss": 0.3719, "step": 29348 }, { "epoch": 1.6434651136745435, "grad_norm": 1.558737874031067, "learning_rate": 9.754342105263157e-05, "loss": 0.5353, "step": 29349 }, { "epoch": 1.6435211109866725, "grad_norm": 1.4003148078918457, "learning_rate": 9.754315789473685e-05, "loss": 0.5072, "step": 29350 }, { "epoch": 1.6435771082988015, "grad_norm": 1.0688210725784302, "learning_rate": 9.754289473684211e-05, "loss": 0.3453, "step": 29351 }, { "epoch": 1.6436331056109306, "grad_norm": 1.2977280616760254, "learning_rate": 9.754263157894738e-05, "loss": 0.4658, "step": 29352 }, { "epoch": 1.6436891029230596, "grad_norm": 1.1841076612472534, "learning_rate": 9.754236842105263e-05, "loss": 0.4808, "step": 29353 }, { "epoch": 1.6437451002351886, "grad_norm": 1.3413748741149902, "learning_rate": 9.75421052631579e-05, "loss": 0.5303, "step": 29354 }, { "epoch": 1.6438010975473176, "grad_norm": 1.2622005939483643, "learning_rate": 9.754184210526316e-05, "loss": 0.4256, "step": 29355 }, { "epoch": 1.6438570948594466, "grad_norm": 1.2882167100906372, "learning_rate": 9.754157894736843e-05, "loss": 0.4495, "step": 29356 }, { "epoch": 1.6439130921715757, "grad_norm": 1.2747324705123901, "learning_rate": 9.754131578947369e-05, "loss": 0.4351, "step": 29357 }, { "epoch": 1.6439690894837047, "grad_norm": 1.3000568151474, "learning_rate": 9.754105263157895e-05, "loss": 0.6083, "step": 29358 }, { "epoch": 1.6440250867958337, "grad_norm": 1.176451325416565, "learning_rate": 9.754078947368421e-05, "loss": 0.463, "step": 29359 }, { "epoch": 1.6440810841079627, "grad_norm": 1.1218034029006958, "learning_rate": 9.754052631578949e-05, "loss": 0.3896, "step": 29360 }, { "epoch": 1.6441370814200917, "grad_norm": 1.5688334703445435, "learning_rate": 9.754026315789474e-05, "loss": 0.4835, "step": 29361 }, { "epoch": 1.6441930787322208, "grad_norm": 1.240452527999878, "learning_rate": 9.754e-05, "loss": 0.359, "step": 29362 }, { "epoch": 1.6442490760443498, "grad_norm": 1.2389189004898071, "learning_rate": 9.753973684210526e-05, "loss": 0.6237, "step": 29363 }, { "epoch": 1.6443050733564788, "grad_norm": 1.3271735906600952, "learning_rate": 9.753947368421054e-05, "loss": 0.4437, "step": 29364 }, { "epoch": 1.6443610706686078, "grad_norm": 1.3391735553741455, "learning_rate": 9.75392105263158e-05, "loss": 0.433, "step": 29365 }, { "epoch": 1.6444170679807368, "grad_norm": 1.1607493162155151, "learning_rate": 9.753894736842106e-05, "loss": 0.3328, "step": 29366 }, { "epoch": 1.6444730652928659, "grad_norm": 1.331188678741455, "learning_rate": 9.753868421052632e-05, "loss": 0.5072, "step": 29367 }, { "epoch": 1.6445290626049949, "grad_norm": 1.3958109617233276, "learning_rate": 9.753842105263158e-05, "loss": 0.4293, "step": 29368 }, { "epoch": 1.644585059917124, "grad_norm": 1.2311660051345825, "learning_rate": 9.753815789473685e-05, "loss": 0.4149, "step": 29369 }, { "epoch": 1.644641057229253, "grad_norm": 1.254151463508606, "learning_rate": 9.753789473684211e-05, "loss": 0.3842, "step": 29370 }, { "epoch": 1.644697054541382, "grad_norm": 1.3861286640167236, "learning_rate": 9.753763157894737e-05, "loss": 0.435, "step": 29371 }, { "epoch": 1.644753051853511, "grad_norm": 1.5580637454986572, "learning_rate": 9.753736842105263e-05, "loss": 0.5089, "step": 29372 }, { "epoch": 1.64480904916564, "grad_norm": 1.239936113357544, "learning_rate": 9.75371052631579e-05, "loss": 0.3747, "step": 29373 }, { "epoch": 1.644865046477769, "grad_norm": 1.2191531658172607, "learning_rate": 9.753684210526316e-05, "loss": 0.408, "step": 29374 }, { "epoch": 1.644921043789898, "grad_norm": 1.3743925094604492, "learning_rate": 9.753657894736844e-05, "loss": 0.4911, "step": 29375 }, { "epoch": 1.644977041102027, "grad_norm": 1.1639271974563599, "learning_rate": 9.753631578947368e-05, "loss": 0.3753, "step": 29376 }, { "epoch": 1.645033038414156, "grad_norm": 1.3580440282821655, "learning_rate": 9.753605263157895e-05, "loss": 0.4591, "step": 29377 }, { "epoch": 1.645089035726285, "grad_norm": 1.3186320066452026, "learning_rate": 9.753578947368421e-05, "loss": 0.5989, "step": 29378 }, { "epoch": 1.645145033038414, "grad_norm": 1.604811191558838, "learning_rate": 9.753552631578949e-05, "loss": 0.6839, "step": 29379 }, { "epoch": 1.645201030350543, "grad_norm": 9.71956729888916, "learning_rate": 9.753526315789473e-05, "loss": 0.5981, "step": 29380 }, { "epoch": 1.6452570276626721, "grad_norm": 1.2815930843353271, "learning_rate": 9.753500000000001e-05, "loss": 0.3662, "step": 29381 }, { "epoch": 1.6453130249748011, "grad_norm": 1.2173564434051514, "learning_rate": 9.753473684210527e-05, "loss": 0.3752, "step": 29382 }, { "epoch": 1.6453690222869302, "grad_norm": 1.4138680696487427, "learning_rate": 9.753447368421053e-05, "loss": 0.3849, "step": 29383 }, { "epoch": 1.6454250195990592, "grad_norm": 1.145095944404602, "learning_rate": 9.75342105263158e-05, "loss": 0.3243, "step": 29384 }, { "epoch": 1.6454810169111882, "grad_norm": 1.850839614868164, "learning_rate": 9.753394736842105e-05, "loss": 0.5586, "step": 29385 }, { "epoch": 1.6455370142233172, "grad_norm": 1.1812289953231812, "learning_rate": 9.753368421052632e-05, "loss": 0.3993, "step": 29386 }, { "epoch": 1.6455930115354462, "grad_norm": 1.272046446800232, "learning_rate": 9.753342105263158e-05, "loss": 0.4131, "step": 29387 }, { "epoch": 1.6456490088475753, "grad_norm": 1.8550727367401123, "learning_rate": 9.753315789473685e-05, "loss": 0.5772, "step": 29388 }, { "epoch": 1.6457050061597043, "grad_norm": 1.3328020572662354, "learning_rate": 9.753289473684211e-05, "loss": 0.5455, "step": 29389 }, { "epoch": 1.6457610034718333, "grad_norm": 1.267332673072815, "learning_rate": 9.753263157894737e-05, "loss": 0.4384, "step": 29390 }, { "epoch": 1.6458170007839623, "grad_norm": 1.1775612831115723, "learning_rate": 9.753236842105263e-05, "loss": 0.4278, "step": 29391 }, { "epoch": 1.6458729980960913, "grad_norm": 1.369011402130127, "learning_rate": 9.75321052631579e-05, "loss": 0.4961, "step": 29392 }, { "epoch": 1.6459289954082204, "grad_norm": 1.5223098993301392, "learning_rate": 9.753184210526316e-05, "loss": 0.5209, "step": 29393 }, { "epoch": 1.6459849927203494, "grad_norm": 1.4535424709320068, "learning_rate": 9.753157894736842e-05, "loss": 0.5633, "step": 29394 }, { "epoch": 1.6460409900324784, "grad_norm": 1.1435127258300781, "learning_rate": 9.753131578947368e-05, "loss": 0.4205, "step": 29395 }, { "epoch": 1.6460969873446074, "grad_norm": 1.2056618928909302, "learning_rate": 9.753105263157896e-05, "loss": 0.3655, "step": 29396 }, { "epoch": 1.6461529846567364, "grad_norm": 1.3217023611068726, "learning_rate": 9.753078947368422e-05, "loss": 0.3516, "step": 29397 }, { "epoch": 1.6462089819688654, "grad_norm": 1.221899151802063, "learning_rate": 9.753052631578948e-05, "loss": 0.4635, "step": 29398 }, { "epoch": 1.6462649792809945, "grad_norm": 0.9793908596038818, "learning_rate": 9.753026315789474e-05, "loss": 0.3526, "step": 29399 }, { "epoch": 1.6463209765931235, "grad_norm": 1.0542079210281372, "learning_rate": 9.753e-05, "loss": 0.3677, "step": 29400 }, { "epoch": 1.6463769739052525, "grad_norm": 1.3768837451934814, "learning_rate": 9.752973684210527e-05, "loss": 0.4844, "step": 29401 }, { "epoch": 1.6464329712173815, "grad_norm": 1.3428592681884766, "learning_rate": 9.752947368421053e-05, "loss": 0.661, "step": 29402 }, { "epoch": 1.6464889685295105, "grad_norm": 1.7047210931777954, "learning_rate": 9.752921052631579e-05, "loss": 0.5118, "step": 29403 }, { "epoch": 1.6465449658416396, "grad_norm": 1.5451815128326416, "learning_rate": 9.752894736842105e-05, "loss": 0.4501, "step": 29404 }, { "epoch": 1.6466009631537686, "grad_norm": 1.2636375427246094, "learning_rate": 9.752868421052632e-05, "loss": 0.4475, "step": 29405 }, { "epoch": 1.6466569604658976, "grad_norm": 1.2503951787948608, "learning_rate": 9.752842105263158e-05, "loss": 0.3915, "step": 29406 }, { "epoch": 1.6467129577780266, "grad_norm": 1.2569408416748047, "learning_rate": 9.752815789473686e-05, "loss": 0.4041, "step": 29407 }, { "epoch": 1.6467689550901556, "grad_norm": 1.645736575126648, "learning_rate": 9.75278947368421e-05, "loss": 0.4398, "step": 29408 }, { "epoch": 1.6468249524022847, "grad_norm": 1.7948026657104492, "learning_rate": 9.752763157894737e-05, "loss": 0.5674, "step": 29409 }, { "epoch": 1.6468809497144137, "grad_norm": 1.4449021816253662, "learning_rate": 9.752736842105263e-05, "loss": 0.5032, "step": 29410 }, { "epoch": 1.6469369470265427, "grad_norm": 1.430108904838562, "learning_rate": 9.752710526315791e-05, "loss": 0.4738, "step": 29411 }, { "epoch": 1.6469929443386717, "grad_norm": 1.1391552686691284, "learning_rate": 9.752684210526317e-05, "loss": 0.4393, "step": 29412 }, { "epoch": 1.6470489416508007, "grad_norm": 1.2336457967758179, "learning_rate": 9.752657894736843e-05, "loss": 0.4446, "step": 29413 }, { "epoch": 1.6471049389629298, "grad_norm": 1.372226357460022, "learning_rate": 9.752631578947369e-05, "loss": 0.406, "step": 29414 }, { "epoch": 1.6471609362750588, "grad_norm": 1.3093339204788208, "learning_rate": 9.752605263157896e-05, "loss": 0.5155, "step": 29415 }, { "epoch": 1.6472169335871878, "grad_norm": 1.4605350494384766, "learning_rate": 9.752578947368422e-05, "loss": 0.4429, "step": 29416 }, { "epoch": 1.6472729308993168, "grad_norm": 1.4476892948150635, "learning_rate": 9.752552631578948e-05, "loss": 0.415, "step": 29417 }, { "epoch": 1.6473289282114458, "grad_norm": 1.3584709167480469, "learning_rate": 9.752526315789474e-05, "loss": 0.5558, "step": 29418 }, { "epoch": 1.6473849255235749, "grad_norm": 1.3899284601211548, "learning_rate": 9.7525e-05, "loss": 0.5136, "step": 29419 }, { "epoch": 1.6474409228357039, "grad_norm": 1.0593681335449219, "learning_rate": 9.752473684210527e-05, "loss": 0.4007, "step": 29420 }, { "epoch": 1.6474969201478329, "grad_norm": 1.3471382856369019, "learning_rate": 9.752447368421053e-05, "loss": 0.4306, "step": 29421 }, { "epoch": 1.647552917459962, "grad_norm": 1.8453447818756104, "learning_rate": 9.752421052631579e-05, "loss": 0.569, "step": 29422 }, { "epoch": 1.647608914772091, "grad_norm": 1.2868098020553589, "learning_rate": 9.752394736842105e-05, "loss": 0.4398, "step": 29423 }, { "epoch": 1.64766491208422, "grad_norm": 1.1600334644317627, "learning_rate": 9.752368421052632e-05, "loss": 0.45, "step": 29424 }, { "epoch": 1.647720909396349, "grad_norm": 1.535599946975708, "learning_rate": 9.752342105263158e-05, "loss": 0.5377, "step": 29425 }, { "epoch": 1.647776906708478, "grad_norm": 1.270965814590454, "learning_rate": 9.752315789473684e-05, "loss": 0.4662, "step": 29426 }, { "epoch": 1.647832904020607, "grad_norm": 1.2857426404953003, "learning_rate": 9.75228947368421e-05, "loss": 0.5464, "step": 29427 }, { "epoch": 1.647888901332736, "grad_norm": 1.3699164390563965, "learning_rate": 9.752263157894738e-05, "loss": 0.3875, "step": 29428 }, { "epoch": 1.647944898644865, "grad_norm": 1.0707435607910156, "learning_rate": 9.752236842105264e-05, "loss": 0.4614, "step": 29429 }, { "epoch": 1.648000895956994, "grad_norm": 1.468240737915039, "learning_rate": 9.752210526315791e-05, "loss": 0.4208, "step": 29430 }, { "epoch": 1.648056893269123, "grad_norm": 1.38775634765625, "learning_rate": 9.752184210526316e-05, "loss": 0.5273, "step": 29431 }, { "epoch": 1.648112890581252, "grad_norm": 1.7929494380950928, "learning_rate": 9.752157894736843e-05, "loss": 0.4566, "step": 29432 }, { "epoch": 1.6481688878933811, "grad_norm": 1.2799029350280762, "learning_rate": 9.752131578947369e-05, "loss": 0.5098, "step": 29433 }, { "epoch": 1.6482248852055101, "grad_norm": 1.3467371463775635, "learning_rate": 9.752105263157896e-05, "loss": 0.4895, "step": 29434 }, { "epoch": 1.6482808825176392, "grad_norm": 1.270985722541809, "learning_rate": 9.752078947368421e-05, "loss": 0.6181, "step": 29435 }, { "epoch": 1.6483368798297682, "grad_norm": 1.092190146446228, "learning_rate": 9.752052631578947e-05, "loss": 0.387, "step": 29436 }, { "epoch": 1.6483928771418972, "grad_norm": 1.0694005489349365, "learning_rate": 9.752026315789474e-05, "loss": 0.4356, "step": 29437 }, { "epoch": 1.6484488744540262, "grad_norm": 1.276921272277832, "learning_rate": 9.752e-05, "loss": 0.4261, "step": 29438 }, { "epoch": 1.6485048717661552, "grad_norm": 1.4633861780166626, "learning_rate": 9.751973684210527e-05, "loss": 0.5498, "step": 29439 }, { "epoch": 1.6485608690782843, "grad_norm": 1.5083926916122437, "learning_rate": 9.751947368421052e-05, "loss": 0.4237, "step": 29440 }, { "epoch": 1.6486168663904133, "grad_norm": 1.3734607696533203, "learning_rate": 9.75192105263158e-05, "loss": 0.3969, "step": 29441 }, { "epoch": 1.6486728637025423, "grad_norm": 1.3441766500473022, "learning_rate": 9.751894736842105e-05, "loss": 0.3779, "step": 29442 }, { "epoch": 1.6487288610146713, "grad_norm": 1.093099594116211, "learning_rate": 9.751868421052633e-05, "loss": 0.3103, "step": 29443 }, { "epoch": 1.6487848583268003, "grad_norm": 1.3892213106155396, "learning_rate": 9.751842105263159e-05, "loss": 0.4943, "step": 29444 }, { "epoch": 1.6488408556389293, "grad_norm": 1.4378942251205444, "learning_rate": 9.751815789473685e-05, "loss": 0.4575, "step": 29445 }, { "epoch": 1.6488968529510584, "grad_norm": 1.298954725265503, "learning_rate": 9.75178947368421e-05, "loss": 0.4933, "step": 29446 }, { "epoch": 1.6489528502631874, "grad_norm": 1.314711332321167, "learning_rate": 9.751763157894738e-05, "loss": 0.4495, "step": 29447 }, { "epoch": 1.6490088475753164, "grad_norm": 1.2707839012145996, "learning_rate": 9.751736842105264e-05, "loss": 0.4522, "step": 29448 }, { "epoch": 1.6490648448874454, "grad_norm": 1.2846564054489136, "learning_rate": 9.75171052631579e-05, "loss": 0.3352, "step": 29449 }, { "epoch": 1.6491208421995744, "grad_norm": 1.248152494430542, "learning_rate": 9.751684210526316e-05, "loss": 0.4574, "step": 29450 }, { "epoch": 1.6491768395117035, "grad_norm": 1.2577346563339233, "learning_rate": 9.751657894736843e-05, "loss": 0.4628, "step": 29451 }, { "epoch": 1.6492328368238325, "grad_norm": 1.3272029161453247, "learning_rate": 9.751631578947369e-05, "loss": 0.4397, "step": 29452 }, { "epoch": 1.6492888341359615, "grad_norm": 1.3512473106384277, "learning_rate": 9.751605263157895e-05, "loss": 0.3505, "step": 29453 }, { "epoch": 1.6493448314480905, "grad_norm": 1.1984714269638062, "learning_rate": 9.751578947368421e-05, "loss": 0.6082, "step": 29454 }, { "epoch": 1.6494008287602195, "grad_norm": 1.3861230611801147, "learning_rate": 9.751552631578947e-05, "loss": 0.4913, "step": 29455 }, { "epoch": 1.6494568260723486, "grad_norm": 1.2818530797958374, "learning_rate": 9.751526315789474e-05, "loss": 0.443, "step": 29456 }, { "epoch": 1.6495128233844776, "grad_norm": 1.317275881767273, "learning_rate": 9.7515e-05, "loss": 0.3658, "step": 29457 }, { "epoch": 1.6495688206966066, "grad_norm": 1.3239867687225342, "learning_rate": 9.751473684210526e-05, "loss": 0.4324, "step": 29458 }, { "epoch": 1.6496248180087356, "grad_norm": 1.4137225151062012, "learning_rate": 9.751447368421052e-05, "loss": 0.4373, "step": 29459 }, { "epoch": 1.6496808153208646, "grad_norm": 1.5117820501327515, "learning_rate": 9.75142105263158e-05, "loss": 0.5513, "step": 29460 }, { "epoch": 1.6497368126329937, "grad_norm": 1.5019891262054443, "learning_rate": 9.751394736842106e-05, "loss": 0.5148, "step": 29461 }, { "epoch": 1.6497928099451227, "grad_norm": 1.271453857421875, "learning_rate": 9.751368421052633e-05, "loss": 0.4867, "step": 29462 }, { "epoch": 1.6498488072572517, "grad_norm": 1.3284165859222412, "learning_rate": 9.751342105263158e-05, "loss": 0.4884, "step": 29463 }, { "epoch": 1.6499048045693807, "grad_norm": 1.3907990455627441, "learning_rate": 9.751315789473685e-05, "loss": 0.471, "step": 29464 }, { "epoch": 1.6499608018815097, "grad_norm": 1.2383745908737183, "learning_rate": 9.751289473684211e-05, "loss": 0.318, "step": 29465 }, { "epoch": 1.6500167991936388, "grad_norm": 1.1266353130340576, "learning_rate": 9.751263157894738e-05, "loss": 0.3896, "step": 29466 }, { "epoch": 1.6500727965057678, "grad_norm": 1.3946961164474487, "learning_rate": 9.751236842105264e-05, "loss": 0.4474, "step": 29467 }, { "epoch": 1.6501287938178968, "grad_norm": 1.186700463294983, "learning_rate": 9.75121052631579e-05, "loss": 0.319, "step": 29468 }, { "epoch": 1.6501847911300258, "grad_norm": 1.1210553646087646, "learning_rate": 9.751184210526316e-05, "loss": 0.3125, "step": 29469 }, { "epoch": 1.6502407884421548, "grad_norm": 1.0811258554458618, "learning_rate": 9.751157894736842e-05, "loss": 0.3655, "step": 29470 }, { "epoch": 1.6502967857542838, "grad_norm": 1.7336432933807373, "learning_rate": 9.75113157894737e-05, "loss": 0.4085, "step": 29471 }, { "epoch": 1.6503527830664129, "grad_norm": 1.5337207317352295, "learning_rate": 9.751105263157895e-05, "loss": 0.4996, "step": 29472 }, { "epoch": 1.6504087803785419, "grad_norm": 1.4256982803344727, "learning_rate": 9.751078947368421e-05, "loss": 0.3553, "step": 29473 }, { "epoch": 1.650464777690671, "grad_norm": 1.38419508934021, "learning_rate": 9.751052631578947e-05, "loss": 0.4517, "step": 29474 }, { "epoch": 1.6505207750028, "grad_norm": 1.5306450128555298, "learning_rate": 9.751026315789475e-05, "loss": 0.4817, "step": 29475 }, { "epoch": 1.650576772314929, "grad_norm": 1.411915898323059, "learning_rate": 9.751e-05, "loss": 0.4983, "step": 29476 }, { "epoch": 1.650632769627058, "grad_norm": 1.1324421167373657, "learning_rate": 9.750973684210527e-05, "loss": 0.3834, "step": 29477 }, { "epoch": 1.650688766939187, "grad_norm": 1.4064148664474487, "learning_rate": 9.750947368421053e-05, "loss": 0.5747, "step": 29478 }, { "epoch": 1.650744764251316, "grad_norm": 1.3565051555633545, "learning_rate": 9.75092105263158e-05, "loss": 0.4658, "step": 29479 }, { "epoch": 1.650800761563445, "grad_norm": 1.220345377922058, "learning_rate": 9.750894736842106e-05, "loss": 0.4655, "step": 29480 }, { "epoch": 1.650856758875574, "grad_norm": 1.3398367166519165, "learning_rate": 9.750868421052632e-05, "loss": 0.5441, "step": 29481 }, { "epoch": 1.650912756187703, "grad_norm": 3.0499606132507324, "learning_rate": 9.750842105263158e-05, "loss": 0.4552, "step": 29482 }, { "epoch": 1.650968753499832, "grad_norm": 1.3581091165542603, "learning_rate": 9.750815789473685e-05, "loss": 0.5208, "step": 29483 }, { "epoch": 1.651024750811961, "grad_norm": 1.7729673385620117, "learning_rate": 9.750789473684211e-05, "loss": 0.6271, "step": 29484 }, { "epoch": 1.6510807481240901, "grad_norm": 1.3497742414474487, "learning_rate": 9.750763157894738e-05, "loss": 0.4637, "step": 29485 }, { "epoch": 1.6511367454362191, "grad_norm": 1.1094225645065308, "learning_rate": 9.750736842105263e-05, "loss": 0.5067, "step": 29486 }, { "epoch": 1.6511927427483482, "grad_norm": 1.2953336238861084, "learning_rate": 9.750710526315789e-05, "loss": 0.4458, "step": 29487 }, { "epoch": 1.6512487400604772, "grad_norm": 1.1549729108810425, "learning_rate": 9.750684210526316e-05, "loss": 0.437, "step": 29488 }, { "epoch": 1.6513047373726062, "grad_norm": 1.3755946159362793, "learning_rate": 9.750657894736842e-05, "loss": 0.4402, "step": 29489 }, { "epoch": 1.6513607346847352, "grad_norm": 1.4747800827026367, "learning_rate": 9.750631578947368e-05, "loss": 0.4794, "step": 29490 }, { "epoch": 1.6514167319968642, "grad_norm": 1.548931360244751, "learning_rate": 9.750605263157894e-05, "loss": 0.5268, "step": 29491 }, { "epoch": 1.6514727293089932, "grad_norm": 1.3267422914505005, "learning_rate": 9.750578947368422e-05, "loss": 0.4324, "step": 29492 }, { "epoch": 1.6515287266211223, "grad_norm": 1.1592317819595337, "learning_rate": 9.750552631578948e-05, "loss": 0.4064, "step": 29493 }, { "epoch": 1.6515847239332513, "grad_norm": 1.4331164360046387, "learning_rate": 9.750526315789475e-05, "loss": 0.6338, "step": 29494 }, { "epoch": 1.6516407212453803, "grad_norm": 1.419296383857727, "learning_rate": 9.7505e-05, "loss": 0.5461, "step": 29495 }, { "epoch": 1.6516967185575093, "grad_norm": 1.5112463235855103, "learning_rate": 9.750473684210527e-05, "loss": 0.401, "step": 29496 }, { "epoch": 1.6517527158696383, "grad_norm": 1.4628111124038696, "learning_rate": 9.750447368421053e-05, "loss": 0.5966, "step": 29497 }, { "epoch": 1.6518087131817674, "grad_norm": 1.2177109718322754, "learning_rate": 9.75042105263158e-05, "loss": 0.3109, "step": 29498 }, { "epoch": 1.6518647104938964, "grad_norm": 1.3104015588760376, "learning_rate": 9.750394736842106e-05, "loss": 0.4858, "step": 29499 }, { "epoch": 1.6519207078060254, "grad_norm": 1.5395573377609253, "learning_rate": 9.750368421052632e-05, "loss": 0.437, "step": 29500 }, { "epoch": 1.6519767051181544, "grad_norm": 2.065742254257202, "learning_rate": 9.750342105263158e-05, "loss": 0.5739, "step": 29501 }, { "epoch": 1.6520327024302834, "grad_norm": 1.1836086511611938, "learning_rate": 9.750315789473685e-05, "loss": 0.4297, "step": 29502 }, { "epoch": 1.6520886997424125, "grad_norm": 1.1465251445770264, "learning_rate": 9.750289473684211e-05, "loss": 0.4416, "step": 29503 }, { "epoch": 1.6521446970545415, "grad_norm": 1.2871326208114624, "learning_rate": 9.750263157894737e-05, "loss": 0.5569, "step": 29504 }, { "epoch": 1.6522006943666705, "grad_norm": 1.2531226873397827, "learning_rate": 9.750236842105263e-05, "loss": 0.3886, "step": 29505 }, { "epoch": 1.6522566916787995, "grad_norm": 1.8033958673477173, "learning_rate": 9.75021052631579e-05, "loss": 0.4523, "step": 29506 }, { "epoch": 1.6523126889909285, "grad_norm": 1.0945812463760376, "learning_rate": 9.750184210526317e-05, "loss": 0.3309, "step": 29507 }, { "epoch": 1.6523686863030576, "grad_norm": 1.440437912940979, "learning_rate": 9.750157894736843e-05, "loss": 0.5318, "step": 29508 }, { "epoch": 1.6524246836151866, "grad_norm": 1.3866740465164185, "learning_rate": 9.750131578947369e-05, "loss": 0.6054, "step": 29509 }, { "epoch": 1.6524806809273156, "grad_norm": 1.3404566049575806, "learning_rate": 9.750105263157895e-05, "loss": 0.7541, "step": 29510 }, { "epoch": 1.6525366782394446, "grad_norm": 1.138606071472168, "learning_rate": 9.750078947368422e-05, "loss": 0.3472, "step": 29511 }, { "epoch": 1.6525926755515736, "grad_norm": 1.5618456602096558, "learning_rate": 9.750052631578948e-05, "loss": 0.5744, "step": 29512 }, { "epoch": 1.6526486728637027, "grad_norm": 1.288644790649414, "learning_rate": 9.750026315789474e-05, "loss": 0.5016, "step": 29513 }, { "epoch": 1.6527046701758317, "grad_norm": 1.5030759572982788, "learning_rate": 9.75e-05, "loss": 0.6377, "step": 29514 }, { "epoch": 1.6527606674879607, "grad_norm": 3.824960231781006, "learning_rate": 9.749973684210527e-05, "loss": 0.4982, "step": 29515 }, { "epoch": 1.6528166648000897, "grad_norm": 1.2412022352218628, "learning_rate": 9.749947368421053e-05, "loss": 0.4165, "step": 29516 }, { "epoch": 1.6528726621122187, "grad_norm": 1.4901292324066162, "learning_rate": 9.74992105263158e-05, "loss": 0.4076, "step": 29517 }, { "epoch": 1.6529286594243477, "grad_norm": 1.3059476613998413, "learning_rate": 9.749894736842105e-05, "loss": 0.4026, "step": 29518 }, { "epoch": 1.6529846567364768, "grad_norm": 1.6507205963134766, "learning_rate": 9.749868421052632e-05, "loss": 0.5171, "step": 29519 }, { "epoch": 1.6530406540486058, "grad_norm": 1.561182975769043, "learning_rate": 9.749842105263158e-05, "loss": 0.5634, "step": 29520 }, { "epoch": 1.6530966513607348, "grad_norm": 1.259454607963562, "learning_rate": 9.749815789473684e-05, "loss": 0.4905, "step": 29521 }, { "epoch": 1.6531526486728638, "grad_norm": 2.1463735103607178, "learning_rate": 9.749789473684212e-05, "loss": 0.6679, "step": 29522 }, { "epoch": 1.6532086459849928, "grad_norm": 5.757894515991211, "learning_rate": 9.749763157894736e-05, "loss": 0.6395, "step": 29523 }, { "epoch": 1.6532646432971219, "grad_norm": 1.4047274589538574, "learning_rate": 9.749736842105264e-05, "loss": 0.4141, "step": 29524 }, { "epoch": 1.6533206406092509, "grad_norm": 1.2539740800857544, "learning_rate": 9.74971052631579e-05, "loss": 0.4272, "step": 29525 }, { "epoch": 1.65337663792138, "grad_norm": 1.2732815742492676, "learning_rate": 9.749684210526317e-05, "loss": 0.5182, "step": 29526 }, { "epoch": 1.653432635233509, "grad_norm": 1.3196169137954712, "learning_rate": 9.749657894736842e-05, "loss": 0.3876, "step": 29527 }, { "epoch": 1.653488632545638, "grad_norm": 1.3361912965774536, "learning_rate": 9.749631578947369e-05, "loss": 0.4089, "step": 29528 }, { "epoch": 1.653544629857767, "grad_norm": 1.3709325790405273, "learning_rate": 9.749605263157895e-05, "loss": 0.6227, "step": 29529 }, { "epoch": 1.653600627169896, "grad_norm": 1.0868281126022339, "learning_rate": 9.749578947368422e-05, "loss": 0.3535, "step": 29530 }, { "epoch": 1.653656624482025, "grad_norm": 1.2153334617614746, "learning_rate": 9.749552631578948e-05, "loss": 0.3779, "step": 29531 }, { "epoch": 1.653712621794154, "grad_norm": 1.5079317092895508, "learning_rate": 9.749526315789474e-05, "loss": 0.4953, "step": 29532 }, { "epoch": 1.653768619106283, "grad_norm": 1.44725501537323, "learning_rate": 9.7495e-05, "loss": 0.5007, "step": 29533 }, { "epoch": 1.653824616418412, "grad_norm": 1.10235595703125, "learning_rate": 9.749473684210527e-05, "loss": 0.4212, "step": 29534 }, { "epoch": 1.653880613730541, "grad_norm": 1.637251615524292, "learning_rate": 9.749447368421053e-05, "loss": 0.5369, "step": 29535 }, { "epoch": 1.65393661104267, "grad_norm": 1.3870211839675903, "learning_rate": 9.74942105263158e-05, "loss": 0.4615, "step": 29536 }, { "epoch": 1.653992608354799, "grad_norm": 1.8148595094680786, "learning_rate": 9.749394736842105e-05, "loss": 0.608, "step": 29537 }, { "epoch": 1.6540486056669281, "grad_norm": 1.1375174522399902, "learning_rate": 9.749368421052633e-05, "loss": 0.4065, "step": 29538 }, { "epoch": 1.6541046029790571, "grad_norm": 1.3817225694656372, "learning_rate": 9.749342105263159e-05, "loss": 0.5522, "step": 29539 }, { "epoch": 1.6541606002911862, "grad_norm": 1.1288411617279053, "learning_rate": 9.749315789473685e-05, "loss": 0.429, "step": 29540 }, { "epoch": 1.6542165976033152, "grad_norm": 1.1628162860870361, "learning_rate": 9.74928947368421e-05, "loss": 0.3498, "step": 29541 }, { "epoch": 1.6542725949154442, "grad_norm": 1.3848342895507812, "learning_rate": 9.749263157894737e-05, "loss": 0.4717, "step": 29542 }, { "epoch": 1.6543285922275732, "grad_norm": 1.5429258346557617, "learning_rate": 9.749236842105264e-05, "loss": 0.458, "step": 29543 }, { "epoch": 1.6543845895397022, "grad_norm": 1.1181485652923584, "learning_rate": 9.74921052631579e-05, "loss": 0.4494, "step": 29544 }, { "epoch": 1.6544405868518313, "grad_norm": 1.3679903745651245, "learning_rate": 9.749184210526316e-05, "loss": 0.5011, "step": 29545 }, { "epoch": 1.6544965841639603, "grad_norm": 1.237748622894287, "learning_rate": 9.749157894736842e-05, "loss": 0.3805, "step": 29546 }, { "epoch": 1.6545525814760893, "grad_norm": 1.2433940172195435, "learning_rate": 9.749131578947369e-05, "loss": 0.3774, "step": 29547 }, { "epoch": 1.6546085787882183, "grad_norm": 1.468341588973999, "learning_rate": 9.749105263157895e-05, "loss": 0.4329, "step": 29548 }, { "epoch": 1.6546645761003473, "grad_norm": 1.4150248765945435, "learning_rate": 9.749078947368422e-05, "loss": 0.4416, "step": 29549 }, { "epoch": 1.6547205734124764, "grad_norm": 1.1630582809448242, "learning_rate": 9.749052631578947e-05, "loss": 0.4249, "step": 29550 }, { "epoch": 1.6547765707246054, "grad_norm": 1.161422610282898, "learning_rate": 9.749026315789474e-05, "loss": 0.3848, "step": 29551 }, { "epoch": 1.6548325680367344, "grad_norm": 1.278017520904541, "learning_rate": 9.749e-05, "loss": 0.402, "step": 29552 }, { "epoch": 1.6548885653488634, "grad_norm": 1.96346914768219, "learning_rate": 9.748973684210528e-05, "loss": 0.6422, "step": 29553 }, { "epoch": 1.6549445626609924, "grad_norm": 1.2192671298980713, "learning_rate": 9.748947368421054e-05, "loss": 0.5708, "step": 29554 }, { "epoch": 1.6550005599731215, "grad_norm": 1.2060295343399048, "learning_rate": 9.74892105263158e-05, "loss": 0.5261, "step": 29555 }, { "epoch": 1.6550565572852505, "grad_norm": 1.8292222023010254, "learning_rate": 9.748894736842106e-05, "loss": 0.5154, "step": 29556 }, { "epoch": 1.6551125545973793, "grad_norm": 1.2270359992980957, "learning_rate": 9.748868421052632e-05, "loss": 0.5626, "step": 29557 }, { "epoch": 1.6551685519095083, "grad_norm": 1.1451836824417114, "learning_rate": 9.748842105263159e-05, "loss": 0.5148, "step": 29558 }, { "epoch": 1.6552245492216373, "grad_norm": 1.3507838249206543, "learning_rate": 9.748815789473685e-05, "loss": 0.6799, "step": 29559 }, { "epoch": 1.6552805465337663, "grad_norm": 1.8475145101547241, "learning_rate": 9.748789473684211e-05, "loss": 0.6658, "step": 29560 }, { "epoch": 1.6553365438458953, "grad_norm": 1.3059606552124023, "learning_rate": 9.748763157894737e-05, "loss": 0.4534, "step": 29561 }, { "epoch": 1.6553925411580244, "grad_norm": 1.292354941368103, "learning_rate": 9.748736842105264e-05, "loss": 0.432, "step": 29562 }, { "epoch": 1.6554485384701534, "grad_norm": 1.2703644037246704, "learning_rate": 9.74871052631579e-05, "loss": 0.5647, "step": 29563 }, { "epoch": 1.6555045357822824, "grad_norm": 1.7154474258422852, "learning_rate": 9.748684210526316e-05, "loss": 0.5775, "step": 29564 }, { "epoch": 1.6555605330944114, "grad_norm": 1.4255478382110596, "learning_rate": 9.748657894736842e-05, "loss": 0.407, "step": 29565 }, { "epoch": 1.6556165304065404, "grad_norm": 1.5124664306640625, "learning_rate": 9.74863157894737e-05, "loss": 0.4397, "step": 29566 }, { "epoch": 1.6556725277186695, "grad_norm": 1.2534633874893188, "learning_rate": 9.748605263157895e-05, "loss": 0.4175, "step": 29567 }, { "epoch": 1.6557285250307985, "grad_norm": 1.333579182624817, "learning_rate": 9.748578947368421e-05, "loss": 0.515, "step": 29568 }, { "epoch": 1.6557845223429275, "grad_norm": 1.228956937789917, "learning_rate": 9.748552631578947e-05, "loss": 0.518, "step": 29569 }, { "epoch": 1.6558405196550565, "grad_norm": 1.1497122049331665, "learning_rate": 9.748526315789475e-05, "loss": 0.4025, "step": 29570 }, { "epoch": 1.6558965169671855, "grad_norm": 1.0443910360336304, "learning_rate": 9.7485e-05, "loss": 0.427, "step": 29571 }, { "epoch": 1.6559525142793146, "grad_norm": 1.344438076019287, "learning_rate": 9.748473684210528e-05, "loss": 0.45, "step": 29572 }, { "epoch": 1.6560085115914436, "grad_norm": 1.1079256534576416, "learning_rate": 9.748447368421053e-05, "loss": 0.4042, "step": 29573 }, { "epoch": 1.6560645089035726, "grad_norm": 1.3417983055114746, "learning_rate": 9.748421052631579e-05, "loss": 0.435, "step": 29574 }, { "epoch": 1.6561205062157016, "grad_norm": 1.4069679975509644, "learning_rate": 9.748394736842106e-05, "loss": 0.46, "step": 29575 }, { "epoch": 1.6561765035278306, "grad_norm": 1.1575227975845337, "learning_rate": 9.748368421052632e-05, "loss": 0.4175, "step": 29576 }, { "epoch": 1.6562325008399597, "grad_norm": 1.3027757406234741, "learning_rate": 9.748342105263159e-05, "loss": 0.4284, "step": 29577 }, { "epoch": 1.6562884981520887, "grad_norm": 1.4100687503814697, "learning_rate": 9.748315789473684e-05, "loss": 0.8479, "step": 29578 }, { "epoch": 1.6563444954642177, "grad_norm": 1.2550665140151978, "learning_rate": 9.748289473684211e-05, "loss": 0.5583, "step": 29579 }, { "epoch": 1.6564004927763467, "grad_norm": 1.2159197330474854, "learning_rate": 9.748263157894737e-05, "loss": 0.5085, "step": 29580 }, { "epoch": 1.6564564900884757, "grad_norm": 1.5163697004318237, "learning_rate": 9.748236842105264e-05, "loss": 0.4916, "step": 29581 }, { "epoch": 1.6565124874006047, "grad_norm": 1.3901549577713013, "learning_rate": 9.748210526315789e-05, "loss": 0.4707, "step": 29582 }, { "epoch": 1.6565684847127338, "grad_norm": 1.4969396591186523, "learning_rate": 9.748184210526316e-05, "loss": 0.5083, "step": 29583 }, { "epoch": 1.6566244820248628, "grad_norm": 1.331526279449463, "learning_rate": 9.748157894736842e-05, "loss": 0.3855, "step": 29584 }, { "epoch": 1.6566804793369918, "grad_norm": 1.305368423461914, "learning_rate": 9.74813157894737e-05, "loss": 0.4413, "step": 29585 }, { "epoch": 1.6567364766491208, "grad_norm": 1.43300461769104, "learning_rate": 9.748105263157896e-05, "loss": 0.5209, "step": 29586 }, { "epoch": 1.6567924739612498, "grad_norm": 1.1488232612609863, "learning_rate": 9.748078947368422e-05, "loss": 0.4211, "step": 29587 }, { "epoch": 1.6568484712733789, "grad_norm": 1.2985970973968506, "learning_rate": 9.748052631578948e-05, "loss": 0.531, "step": 29588 }, { "epoch": 1.6569044685855079, "grad_norm": 1.1614956855773926, "learning_rate": 9.748026315789475e-05, "loss": 0.4647, "step": 29589 }, { "epoch": 1.656960465897637, "grad_norm": 1.4463664293289185, "learning_rate": 9.748000000000001e-05, "loss": 0.5643, "step": 29590 }, { "epoch": 1.657016463209766, "grad_norm": 1.2314857244491577, "learning_rate": 9.747973684210527e-05, "loss": 0.478, "step": 29591 }, { "epoch": 1.657072460521895, "grad_norm": 1.4421050548553467, "learning_rate": 9.747947368421053e-05, "loss": 0.4519, "step": 29592 }, { "epoch": 1.657128457834024, "grad_norm": 1.3177601099014282, "learning_rate": 9.747921052631579e-05, "loss": 0.4995, "step": 29593 }, { "epoch": 1.657184455146153, "grad_norm": 1.2215352058410645, "learning_rate": 9.747894736842106e-05, "loss": 0.3068, "step": 29594 }, { "epoch": 1.657240452458282, "grad_norm": 1.2740132808685303, "learning_rate": 9.747868421052632e-05, "loss": 0.5852, "step": 29595 }, { "epoch": 1.657296449770411, "grad_norm": 1.2289226055145264, "learning_rate": 9.747842105263158e-05, "loss": 0.5125, "step": 29596 }, { "epoch": 1.65735244708254, "grad_norm": 1.533870816230774, "learning_rate": 9.747815789473684e-05, "loss": 0.7458, "step": 29597 }, { "epoch": 1.657408444394669, "grad_norm": 1.202687382698059, "learning_rate": 9.747789473684211e-05, "loss": 0.4193, "step": 29598 }, { "epoch": 1.657464441706798, "grad_norm": 1.5871564149856567, "learning_rate": 9.747763157894737e-05, "loss": 0.6005, "step": 29599 }, { "epoch": 1.657520439018927, "grad_norm": 1.3361384868621826, "learning_rate": 9.747736842105263e-05, "loss": 0.4297, "step": 29600 }, { "epoch": 1.6575764363310561, "grad_norm": 1.468178629875183, "learning_rate": 9.747710526315789e-05, "loss": 0.5172, "step": 29601 }, { "epoch": 1.6576324336431851, "grad_norm": 1.1791592836380005, "learning_rate": 9.747684210526317e-05, "loss": 0.3931, "step": 29602 }, { "epoch": 1.6576884309553142, "grad_norm": 1.1586310863494873, "learning_rate": 9.747657894736843e-05, "loss": 0.4085, "step": 29603 }, { "epoch": 1.6577444282674432, "grad_norm": 1.1029143333435059, "learning_rate": 9.74763157894737e-05, "loss": 0.4226, "step": 29604 }, { "epoch": 1.6578004255795722, "grad_norm": 1.5106743574142456, "learning_rate": 9.747605263157895e-05, "loss": 0.4974, "step": 29605 }, { "epoch": 1.6578564228917012, "grad_norm": 1.2638213634490967, "learning_rate": 9.747578947368422e-05, "loss": 0.3985, "step": 29606 }, { "epoch": 1.6579124202038302, "grad_norm": 1.1854311227798462, "learning_rate": 9.747552631578948e-05, "loss": 0.3951, "step": 29607 }, { "epoch": 1.6579684175159592, "grad_norm": 1.3831970691680908, "learning_rate": 9.747526315789474e-05, "loss": 0.4718, "step": 29608 }, { "epoch": 1.6580244148280883, "grad_norm": 1.2144850492477417, "learning_rate": 9.747500000000001e-05, "loss": 0.5548, "step": 29609 }, { "epoch": 1.6580804121402173, "grad_norm": 1.3014781475067139, "learning_rate": 9.747473684210526e-05, "loss": 0.3956, "step": 29610 }, { "epoch": 1.6581364094523463, "grad_norm": 1.3508234024047852, "learning_rate": 9.747447368421053e-05, "loss": 0.4022, "step": 29611 }, { "epoch": 1.6581924067644753, "grad_norm": 1.2428338527679443, "learning_rate": 9.747421052631579e-05, "loss": 0.4778, "step": 29612 }, { "epoch": 1.6582484040766043, "grad_norm": 1.5273547172546387, "learning_rate": 9.747394736842106e-05, "loss": 0.542, "step": 29613 }, { "epoch": 1.6583044013887334, "grad_norm": 1.193739891052246, "learning_rate": 9.747368421052632e-05, "loss": 0.4343, "step": 29614 }, { "epoch": 1.6583603987008624, "grad_norm": 2.0253703594207764, "learning_rate": 9.747342105263158e-05, "loss": 0.6708, "step": 29615 }, { "epoch": 1.6584163960129914, "grad_norm": 1.3252277374267578, "learning_rate": 9.747315789473684e-05, "loss": 0.3905, "step": 29616 }, { "epoch": 1.6584723933251204, "grad_norm": 1.2533987760543823, "learning_rate": 9.747289473684212e-05, "loss": 0.384, "step": 29617 }, { "epoch": 1.6585283906372494, "grad_norm": 1.2431951761245728, "learning_rate": 9.747263157894738e-05, "loss": 0.579, "step": 29618 }, { "epoch": 1.6585843879493785, "grad_norm": 1.0912152528762817, "learning_rate": 9.747236842105264e-05, "loss": 0.4041, "step": 29619 }, { "epoch": 1.6586403852615075, "grad_norm": 1.4213159084320068, "learning_rate": 9.74721052631579e-05, "loss": 0.6027, "step": 29620 }, { "epoch": 1.6586963825736365, "grad_norm": 1.2781519889831543, "learning_rate": 9.747184210526317e-05, "loss": 0.4213, "step": 29621 }, { "epoch": 1.6587523798857655, "grad_norm": 1.4699957370758057, "learning_rate": 9.747157894736843e-05, "loss": 0.5215, "step": 29622 }, { "epoch": 1.6588083771978945, "grad_norm": 2.326874017715454, "learning_rate": 9.747131578947369e-05, "loss": 0.3919, "step": 29623 }, { "epoch": 1.6588643745100236, "grad_norm": 1.2619982957839966, "learning_rate": 9.747105263157895e-05, "loss": 0.4346, "step": 29624 }, { "epoch": 1.6589203718221526, "grad_norm": 1.6012333631515503, "learning_rate": 9.747078947368421e-05, "loss": 0.4691, "step": 29625 }, { "epoch": 1.6589763691342816, "grad_norm": 1.347254753112793, "learning_rate": 9.747052631578948e-05, "loss": 0.3613, "step": 29626 }, { "epoch": 1.6590323664464106, "grad_norm": 1.4736576080322266, "learning_rate": 9.747026315789474e-05, "loss": 0.4603, "step": 29627 }, { "epoch": 1.6590883637585396, "grad_norm": 1.2075624465942383, "learning_rate": 9.747e-05, "loss": 0.4067, "step": 29628 }, { "epoch": 1.6591443610706686, "grad_norm": 1.2575129270553589, "learning_rate": 9.746973684210526e-05, "loss": 0.4704, "step": 29629 }, { "epoch": 1.6592003583827977, "grad_norm": 1.3325769901275635, "learning_rate": 9.746947368421053e-05, "loss": 0.5049, "step": 29630 }, { "epoch": 1.6592563556949267, "grad_norm": 1.4154554605484009, "learning_rate": 9.746921052631579e-05, "loss": 0.5334, "step": 29631 }, { "epoch": 1.6593123530070557, "grad_norm": 1.592248558998108, "learning_rate": 9.746894736842107e-05, "loss": 0.5429, "step": 29632 }, { "epoch": 1.6593683503191847, "grad_norm": 1.3198601007461548, "learning_rate": 9.746868421052631e-05, "loss": 0.4146, "step": 29633 }, { "epoch": 1.6594243476313137, "grad_norm": 1.1203079223632812, "learning_rate": 9.746842105263159e-05, "loss": 0.4375, "step": 29634 }, { "epoch": 1.6594803449434428, "grad_norm": 1.3273859024047852, "learning_rate": 9.746815789473685e-05, "loss": 0.4493, "step": 29635 }, { "epoch": 1.6595363422555718, "grad_norm": 1.3148910999298096, "learning_rate": 9.746789473684212e-05, "loss": 0.5294, "step": 29636 }, { "epoch": 1.6595923395677008, "grad_norm": 1.3773329257965088, "learning_rate": 9.746763157894736e-05, "loss": 0.4416, "step": 29637 }, { "epoch": 1.6596483368798298, "grad_norm": 1.2627286911010742, "learning_rate": 9.746736842105264e-05, "loss": 0.4194, "step": 29638 }, { "epoch": 1.6597043341919588, "grad_norm": 1.3446952104568481, "learning_rate": 9.74671052631579e-05, "loss": 0.4122, "step": 29639 }, { "epoch": 1.6597603315040876, "grad_norm": 1.124110460281372, "learning_rate": 9.746684210526317e-05, "loss": 0.5054, "step": 29640 }, { "epoch": 1.6598163288162167, "grad_norm": 1.5453792810440063, "learning_rate": 9.746657894736843e-05, "loss": 0.5256, "step": 29641 }, { "epoch": 1.6598723261283457, "grad_norm": 1.4115320444107056, "learning_rate": 9.746631578947368e-05, "loss": 0.4691, "step": 29642 }, { "epoch": 1.6599283234404747, "grad_norm": 1.2420213222503662, "learning_rate": 9.746605263157895e-05, "loss": 0.452, "step": 29643 }, { "epoch": 1.6599843207526037, "grad_norm": 1.2624112367630005, "learning_rate": 9.746578947368421e-05, "loss": 0.4987, "step": 29644 }, { "epoch": 1.6600403180647327, "grad_norm": 1.2748420238494873, "learning_rate": 9.746552631578948e-05, "loss": 0.5516, "step": 29645 }, { "epoch": 1.6600963153768618, "grad_norm": 1.506902813911438, "learning_rate": 9.746526315789474e-05, "loss": 0.4746, "step": 29646 }, { "epoch": 1.6601523126889908, "grad_norm": 1.2483278512954712, "learning_rate": 9.7465e-05, "loss": 0.4323, "step": 29647 }, { "epoch": 1.6602083100011198, "grad_norm": 1.122362732887268, "learning_rate": 9.746473684210526e-05, "loss": 0.3422, "step": 29648 }, { "epoch": 1.6602643073132488, "grad_norm": 1.3935191631317139, "learning_rate": 9.746447368421054e-05, "loss": 0.3623, "step": 29649 }, { "epoch": 1.6603203046253778, "grad_norm": 1.3148080110549927, "learning_rate": 9.74642105263158e-05, "loss": 0.4853, "step": 29650 }, { "epoch": 1.6603763019375068, "grad_norm": 1.0957515239715576, "learning_rate": 9.746394736842106e-05, "loss": 0.4409, "step": 29651 }, { "epoch": 1.6604322992496359, "grad_norm": 1.3667657375335693, "learning_rate": 9.746368421052631e-05, "loss": 0.5785, "step": 29652 }, { "epoch": 1.6604882965617649, "grad_norm": 1.3684552907943726, "learning_rate": 9.746342105263159e-05, "loss": 0.4376, "step": 29653 }, { "epoch": 1.660544293873894, "grad_norm": 1.152071475982666, "learning_rate": 9.746315789473685e-05, "loss": 0.4768, "step": 29654 }, { "epoch": 1.660600291186023, "grad_norm": 3.4706151485443115, "learning_rate": 9.746289473684211e-05, "loss": 0.5972, "step": 29655 }, { "epoch": 1.660656288498152, "grad_norm": 1.246666431427002, "learning_rate": 9.746263157894737e-05, "loss": 0.4079, "step": 29656 }, { "epoch": 1.660712285810281, "grad_norm": 1.2707175016403198, "learning_rate": 9.746236842105264e-05, "loss": 0.3829, "step": 29657 }, { "epoch": 1.66076828312241, "grad_norm": 1.4588186740875244, "learning_rate": 9.74621052631579e-05, "loss": 0.4542, "step": 29658 }, { "epoch": 1.660824280434539, "grad_norm": 1.4935686588287354, "learning_rate": 9.746184210526317e-05, "loss": 0.393, "step": 29659 }, { "epoch": 1.660880277746668, "grad_norm": 1.3570958375930786, "learning_rate": 9.746157894736842e-05, "loss": 0.3965, "step": 29660 }, { "epoch": 1.660936275058797, "grad_norm": 1.4252172708511353, "learning_rate": 9.746131578947368e-05, "loss": 0.5035, "step": 29661 }, { "epoch": 1.660992272370926, "grad_norm": 1.4909636974334717, "learning_rate": 9.746105263157895e-05, "loss": 0.4325, "step": 29662 }, { "epoch": 1.661048269683055, "grad_norm": 1.0536448955535889, "learning_rate": 9.746078947368421e-05, "loss": 0.2782, "step": 29663 }, { "epoch": 1.661104266995184, "grad_norm": 1.175025463104248, "learning_rate": 9.746052631578949e-05, "loss": 0.3818, "step": 29664 }, { "epoch": 1.6611602643073131, "grad_norm": 1.2664695978164673, "learning_rate": 9.746026315789473e-05, "loss": 0.399, "step": 29665 }, { "epoch": 1.6612162616194421, "grad_norm": 1.3744924068450928, "learning_rate": 9.746e-05, "loss": 0.5242, "step": 29666 }, { "epoch": 1.6612722589315712, "grad_norm": 1.2288700342178345, "learning_rate": 9.745973684210527e-05, "loss": 0.4353, "step": 29667 }, { "epoch": 1.6613282562437002, "grad_norm": 1.1383012533187866, "learning_rate": 9.745947368421054e-05, "loss": 0.3404, "step": 29668 }, { "epoch": 1.6613842535558292, "grad_norm": 1.3235902786254883, "learning_rate": 9.74592105263158e-05, "loss": 0.4674, "step": 29669 }, { "epoch": 1.6614402508679582, "grad_norm": 1.4621721506118774, "learning_rate": 9.745894736842106e-05, "loss": 0.5222, "step": 29670 }, { "epoch": 1.6614962481800872, "grad_norm": 1.462956428527832, "learning_rate": 9.745868421052632e-05, "loss": 0.4607, "step": 29671 }, { "epoch": 1.6615522454922163, "grad_norm": 1.5089340209960938, "learning_rate": 9.745842105263159e-05, "loss": 0.4257, "step": 29672 }, { "epoch": 1.6616082428043453, "grad_norm": 1.2238178253173828, "learning_rate": 9.745815789473685e-05, "loss": 0.5535, "step": 29673 }, { "epoch": 1.6616642401164743, "grad_norm": 1.066439151763916, "learning_rate": 9.745789473684211e-05, "loss": 0.4257, "step": 29674 }, { "epoch": 1.6617202374286033, "grad_norm": 1.2708643674850464, "learning_rate": 9.745763157894737e-05, "loss": 0.3578, "step": 29675 }, { "epoch": 1.6617762347407323, "grad_norm": 1.596035361289978, "learning_rate": 9.745736842105264e-05, "loss": 0.7007, "step": 29676 }, { "epoch": 1.6618322320528613, "grad_norm": 1.338188648223877, "learning_rate": 9.74571052631579e-05, "loss": 0.4382, "step": 29677 }, { "epoch": 1.6618882293649904, "grad_norm": 1.3567324876785278, "learning_rate": 9.745684210526316e-05, "loss": 0.5165, "step": 29678 }, { "epoch": 1.6619442266771194, "grad_norm": 1.4828284978866577, "learning_rate": 9.745657894736842e-05, "loss": 0.693, "step": 29679 }, { "epoch": 1.6620002239892484, "grad_norm": 1.2866010665893555, "learning_rate": 9.745631578947368e-05, "loss": 0.4845, "step": 29680 }, { "epoch": 1.6620562213013774, "grad_norm": 1.3637531995773315, "learning_rate": 9.745605263157896e-05, "loss": 0.3814, "step": 29681 }, { "epoch": 1.6621122186135064, "grad_norm": 1.154879093170166, "learning_rate": 9.745578947368422e-05, "loss": 0.4708, "step": 29682 }, { "epoch": 1.6621682159256355, "grad_norm": 1.2102686166763306, "learning_rate": 9.745552631578947e-05, "loss": 0.543, "step": 29683 }, { "epoch": 1.6622242132377645, "grad_norm": 1.2043113708496094, "learning_rate": 9.745526315789473e-05, "loss": 0.4621, "step": 29684 }, { "epoch": 1.6622802105498935, "grad_norm": 1.490852952003479, "learning_rate": 9.745500000000001e-05, "loss": 0.399, "step": 29685 }, { "epoch": 1.6623362078620225, "grad_norm": 1.358458161354065, "learning_rate": 9.745473684210527e-05, "loss": 0.5194, "step": 29686 }, { "epoch": 1.6623922051741515, "grad_norm": 1.3399975299835205, "learning_rate": 9.745447368421054e-05, "loss": 0.5748, "step": 29687 }, { "epoch": 1.6624482024862806, "grad_norm": 1.2152906656265259, "learning_rate": 9.745421052631579e-05, "loss": 0.4133, "step": 29688 }, { "epoch": 1.6625041997984096, "grad_norm": 1.3359047174453735, "learning_rate": 9.745394736842106e-05, "loss": 0.5731, "step": 29689 }, { "epoch": 1.6625601971105386, "grad_norm": 1.3178585767745972, "learning_rate": 9.745368421052632e-05, "loss": 0.4571, "step": 29690 }, { "epoch": 1.6626161944226676, "grad_norm": 1.138414978981018, "learning_rate": 9.74534210526316e-05, "loss": 0.4158, "step": 29691 }, { "epoch": 1.6626721917347966, "grad_norm": 1.1555383205413818, "learning_rate": 9.745315789473684e-05, "loss": 0.3688, "step": 29692 }, { "epoch": 1.6627281890469257, "grad_norm": 1.456110954284668, "learning_rate": 9.745289473684211e-05, "loss": 0.4896, "step": 29693 }, { "epoch": 1.6627841863590547, "grad_norm": 1.2308326959609985, "learning_rate": 9.745263157894737e-05, "loss": 0.3622, "step": 29694 }, { "epoch": 1.6628401836711837, "grad_norm": 1.0821744203567505, "learning_rate": 9.745236842105263e-05, "loss": 0.3717, "step": 29695 }, { "epoch": 1.6628961809833127, "grad_norm": 1.3416963815689087, "learning_rate": 9.74521052631579e-05, "loss": 0.455, "step": 29696 }, { "epoch": 1.6629521782954417, "grad_norm": 1.3744860887527466, "learning_rate": 9.745184210526315e-05, "loss": 0.3445, "step": 29697 }, { "epoch": 1.6630081756075707, "grad_norm": 1.2955223321914673, "learning_rate": 9.745157894736843e-05, "loss": 0.4848, "step": 29698 }, { "epoch": 1.6630641729196998, "grad_norm": 1.7026212215423584, "learning_rate": 9.745131578947368e-05, "loss": 0.4915, "step": 29699 }, { "epoch": 1.6631201702318288, "grad_norm": 1.5713258981704712, "learning_rate": 9.745105263157896e-05, "loss": 0.4763, "step": 29700 }, { "epoch": 1.6631761675439578, "grad_norm": 1.4540106058120728, "learning_rate": 9.745078947368422e-05, "loss": 0.3707, "step": 29701 }, { "epoch": 1.6632321648560868, "grad_norm": 1.380915880203247, "learning_rate": 9.745052631578948e-05, "loss": 0.477, "step": 29702 }, { "epoch": 1.6632881621682158, "grad_norm": 1.3537237644195557, "learning_rate": 9.745026315789474e-05, "loss": 0.5506, "step": 29703 }, { "epoch": 1.6633441594803449, "grad_norm": 1.5406357049942017, "learning_rate": 9.745000000000001e-05, "loss": 0.767, "step": 29704 }, { "epoch": 1.6634001567924739, "grad_norm": 1.2187687158584595, "learning_rate": 9.744973684210527e-05, "loss": 0.4966, "step": 29705 }, { "epoch": 1.663456154104603, "grad_norm": 1.3390312194824219, "learning_rate": 9.744947368421053e-05, "loss": 0.4701, "step": 29706 }, { "epoch": 1.663512151416732, "grad_norm": 1.160866618156433, "learning_rate": 9.744921052631579e-05, "loss": 0.4631, "step": 29707 }, { "epoch": 1.663568148728861, "grad_norm": 1.4030683040618896, "learning_rate": 9.744894736842106e-05, "loss": 0.5906, "step": 29708 }, { "epoch": 1.66362414604099, "grad_norm": 1.3373100757598877, "learning_rate": 9.744868421052632e-05, "loss": 0.4799, "step": 29709 }, { "epoch": 1.663680143353119, "grad_norm": 1.0900112390518188, "learning_rate": 9.744842105263158e-05, "loss": 0.3698, "step": 29710 }, { "epoch": 1.663736140665248, "grad_norm": 1.1846494674682617, "learning_rate": 9.744815789473684e-05, "loss": 0.5321, "step": 29711 }, { "epoch": 1.663792137977377, "grad_norm": 1.3239916563034058, "learning_rate": 9.74478947368421e-05, "loss": 0.4737, "step": 29712 }, { "epoch": 1.663848135289506, "grad_norm": 1.345616102218628, "learning_rate": 9.744763157894738e-05, "loss": 0.4982, "step": 29713 }, { "epoch": 1.663904132601635, "grad_norm": 1.169311285018921, "learning_rate": 9.744736842105263e-05, "loss": 0.3796, "step": 29714 }, { "epoch": 1.663960129913764, "grad_norm": 1.1881449222564697, "learning_rate": 9.74471052631579e-05, "loss": 0.3696, "step": 29715 }, { "epoch": 1.664016127225893, "grad_norm": 1.3634545803070068, "learning_rate": 9.744684210526315e-05, "loss": 0.4455, "step": 29716 }, { "epoch": 1.6640721245380221, "grad_norm": 1.3136138916015625, "learning_rate": 9.744657894736843e-05, "loss": 0.538, "step": 29717 }, { "epoch": 1.6641281218501511, "grad_norm": 1.3176722526550293, "learning_rate": 9.744631578947369e-05, "loss": 0.5301, "step": 29718 }, { "epoch": 1.6641841191622802, "grad_norm": 1.1613926887512207, "learning_rate": 9.744605263157896e-05, "loss": 0.4518, "step": 29719 }, { "epoch": 1.6642401164744092, "grad_norm": 1.5034033060073853, "learning_rate": 9.744578947368421e-05, "loss": 0.5178, "step": 29720 }, { "epoch": 1.6642961137865382, "grad_norm": 1.380826711654663, "learning_rate": 9.744552631578948e-05, "loss": 0.3322, "step": 29721 }, { "epoch": 1.6643521110986672, "grad_norm": 1.1774296760559082, "learning_rate": 9.744526315789474e-05, "loss": 0.4113, "step": 29722 }, { "epoch": 1.6644081084107962, "grad_norm": 1.336766004562378, "learning_rate": 9.744500000000001e-05, "loss": 0.5364, "step": 29723 }, { "epoch": 1.6644641057229252, "grad_norm": 1.1515580415725708, "learning_rate": 9.744473684210527e-05, "loss": 0.4323, "step": 29724 }, { "epoch": 1.6645201030350543, "grad_norm": 1.148767352104187, "learning_rate": 9.744447368421053e-05, "loss": 0.3941, "step": 29725 }, { "epoch": 1.6645761003471833, "grad_norm": 1.420762538909912, "learning_rate": 9.744421052631579e-05, "loss": 0.4618, "step": 29726 }, { "epoch": 1.6646320976593123, "grad_norm": 1.3800610303878784, "learning_rate": 9.744394736842107e-05, "loss": 0.5246, "step": 29727 }, { "epoch": 1.6646880949714413, "grad_norm": 1.1994104385375977, "learning_rate": 9.744368421052633e-05, "loss": 0.4552, "step": 29728 }, { "epoch": 1.6647440922835703, "grad_norm": 1.433768391609192, "learning_rate": 9.744342105263157e-05, "loss": 0.4113, "step": 29729 }, { "epoch": 1.6648000895956994, "grad_norm": 1.3951174020767212, "learning_rate": 9.744315789473684e-05, "loss": 0.6001, "step": 29730 }, { "epoch": 1.6648560869078284, "grad_norm": 1.1915291547775269, "learning_rate": 9.74428947368421e-05, "loss": 0.484, "step": 29731 }, { "epoch": 1.6649120842199574, "grad_norm": 4.183198928833008, "learning_rate": 9.744263157894738e-05, "loss": 0.4532, "step": 29732 }, { "epoch": 1.6649680815320864, "grad_norm": 1.5106087923049927, "learning_rate": 9.744236842105264e-05, "loss": 0.4033, "step": 29733 }, { "epoch": 1.6650240788442154, "grad_norm": 1.2340867519378662, "learning_rate": 9.74421052631579e-05, "loss": 0.3915, "step": 29734 }, { "epoch": 1.6650800761563445, "grad_norm": 1.3168102502822876, "learning_rate": 9.744184210526316e-05, "loss": 0.4267, "step": 29735 }, { "epoch": 1.6651360734684735, "grad_norm": 1.1634732484817505, "learning_rate": 9.744157894736843e-05, "loss": 0.36, "step": 29736 }, { "epoch": 1.6651920707806025, "grad_norm": 1.2173901796340942, "learning_rate": 9.744131578947369e-05, "loss": 0.5981, "step": 29737 }, { "epoch": 1.6652480680927315, "grad_norm": 1.174443244934082, "learning_rate": 9.744105263157895e-05, "loss": 0.5204, "step": 29738 }, { "epoch": 1.6653040654048605, "grad_norm": 1.5484994649887085, "learning_rate": 9.744078947368421e-05, "loss": 0.3388, "step": 29739 }, { "epoch": 1.6653600627169896, "grad_norm": 1.8700287342071533, "learning_rate": 9.744052631578948e-05, "loss": 0.6348, "step": 29740 }, { "epoch": 1.6654160600291186, "grad_norm": 1.558946132659912, "learning_rate": 9.744026315789474e-05, "loss": 0.4442, "step": 29741 }, { "epoch": 1.6654720573412476, "grad_norm": 1.4027788639068604, "learning_rate": 9.744000000000002e-05, "loss": 0.4474, "step": 29742 }, { "epoch": 1.6655280546533766, "grad_norm": 1.3163743019104004, "learning_rate": 9.743973684210526e-05, "loss": 0.3866, "step": 29743 }, { "epoch": 1.6655840519655056, "grad_norm": 1.152384638786316, "learning_rate": 9.743947368421054e-05, "loss": 0.4037, "step": 29744 }, { "epoch": 1.6656400492776346, "grad_norm": 1.4696251153945923, "learning_rate": 9.74392105263158e-05, "loss": 0.5558, "step": 29745 }, { "epoch": 1.6656960465897637, "grad_norm": 1.2139456272125244, "learning_rate": 9.743894736842105e-05, "loss": 0.3967, "step": 29746 }, { "epoch": 1.6657520439018927, "grad_norm": 1.3266783952713013, "learning_rate": 9.743868421052631e-05, "loss": 0.4887, "step": 29747 }, { "epoch": 1.6658080412140217, "grad_norm": 1.4412658214569092, "learning_rate": 9.743842105263157e-05, "loss": 0.5102, "step": 29748 }, { "epoch": 1.6658640385261507, "grad_norm": 1.185046672821045, "learning_rate": 9.743815789473685e-05, "loss": 0.341, "step": 29749 }, { "epoch": 1.6659200358382797, "grad_norm": 1.3606398105621338, "learning_rate": 9.743789473684211e-05, "loss": 0.5179, "step": 29750 }, { "epoch": 1.6659760331504088, "grad_norm": 1.2505133152008057, "learning_rate": 9.743763157894738e-05, "loss": 0.4396, "step": 29751 }, { "epoch": 1.6660320304625378, "grad_norm": 1.2264609336853027, "learning_rate": 9.743736842105263e-05, "loss": 0.3688, "step": 29752 }, { "epoch": 1.6660880277746668, "grad_norm": 1.205929160118103, "learning_rate": 9.74371052631579e-05, "loss": 0.4127, "step": 29753 }, { "epoch": 1.6661440250867958, "grad_norm": 1.192292332649231, "learning_rate": 9.743684210526316e-05, "loss": 0.4382, "step": 29754 }, { "epoch": 1.6662000223989248, "grad_norm": 15.911165237426758, "learning_rate": 9.743657894736843e-05, "loss": 0.3512, "step": 29755 }, { "epoch": 1.6662560197110539, "grad_norm": 1.7799100875854492, "learning_rate": 9.743631578947369e-05, "loss": 0.5259, "step": 29756 }, { "epoch": 1.6663120170231829, "grad_norm": 3.8827314376831055, "learning_rate": 9.743605263157895e-05, "loss": 0.5644, "step": 29757 }, { "epoch": 1.666368014335312, "grad_norm": 1.453129529953003, "learning_rate": 9.743578947368421e-05, "loss": 0.6702, "step": 29758 }, { "epoch": 1.666424011647441, "grad_norm": 1.8428001403808594, "learning_rate": 9.743552631578949e-05, "loss": 0.4429, "step": 29759 }, { "epoch": 1.66648000895957, "grad_norm": 1.1079034805297852, "learning_rate": 9.743526315789475e-05, "loss": 0.4014, "step": 29760 }, { "epoch": 1.666536006271699, "grad_norm": 1.9630281925201416, "learning_rate": 9.7435e-05, "loss": 0.5883, "step": 29761 }, { "epoch": 1.666592003583828, "grad_norm": 2.018019914627075, "learning_rate": 9.743473684210526e-05, "loss": 0.6178, "step": 29762 }, { "epoch": 1.666648000895957, "grad_norm": 1.1073989868164062, "learning_rate": 9.743447368421052e-05, "loss": 0.4937, "step": 29763 }, { "epoch": 1.666703998208086, "grad_norm": 1.2717527151107788, "learning_rate": 9.74342105263158e-05, "loss": 0.5471, "step": 29764 }, { "epoch": 1.666759995520215, "grad_norm": 1.579235315322876, "learning_rate": 9.743394736842106e-05, "loss": 0.4843, "step": 29765 }, { "epoch": 1.666815992832344, "grad_norm": 1.0931422710418701, "learning_rate": 9.743368421052632e-05, "loss": 0.3776, "step": 29766 }, { "epoch": 1.666871990144473, "grad_norm": 1.1754850149154663, "learning_rate": 9.743342105263158e-05, "loss": 0.3256, "step": 29767 }, { "epoch": 1.666927987456602, "grad_norm": 1.2110705375671387, "learning_rate": 9.743315789473685e-05, "loss": 0.4423, "step": 29768 }, { "epoch": 1.666983984768731, "grad_norm": 1.223608374595642, "learning_rate": 9.743289473684211e-05, "loss": 0.4686, "step": 29769 }, { "epoch": 1.6670399820808601, "grad_norm": 1.2251781225204468, "learning_rate": 9.743263157894737e-05, "loss": 0.432, "step": 29770 }, { "epoch": 1.6670959793929891, "grad_norm": 1.308613657951355, "learning_rate": 9.743236842105263e-05, "loss": 0.531, "step": 29771 }, { "epoch": 1.6671519767051182, "grad_norm": 1.401737928390503, "learning_rate": 9.74321052631579e-05, "loss": 0.5821, "step": 29772 }, { "epoch": 1.6672079740172472, "grad_norm": 1.3729923963546753, "learning_rate": 9.743184210526316e-05, "loss": 0.4739, "step": 29773 }, { "epoch": 1.6672639713293762, "grad_norm": 1.2187713384628296, "learning_rate": 9.743157894736844e-05, "loss": 0.5502, "step": 29774 }, { "epoch": 1.6673199686415052, "grad_norm": 1.1433850526809692, "learning_rate": 9.743131578947368e-05, "loss": 0.443, "step": 29775 }, { "epoch": 1.6673759659536342, "grad_norm": 1.6556144952774048, "learning_rate": 9.743105263157895e-05, "loss": 0.5669, "step": 29776 }, { "epoch": 1.6674319632657633, "grad_norm": 1.1630947589874268, "learning_rate": 9.743078947368421e-05, "loss": 0.3854, "step": 29777 }, { "epoch": 1.6674879605778923, "grad_norm": 1.1792995929718018, "learning_rate": 9.743052631578949e-05, "loss": 0.4185, "step": 29778 }, { "epoch": 1.6675439578900213, "grad_norm": 1.5685478448867798, "learning_rate": 9.743026315789475e-05, "loss": 0.5321, "step": 29779 }, { "epoch": 1.6675999552021503, "grad_norm": 1.180819034576416, "learning_rate": 9.743000000000001e-05, "loss": 0.3929, "step": 29780 }, { "epoch": 1.6676559525142793, "grad_norm": 1.367296814918518, "learning_rate": 9.742973684210527e-05, "loss": 0.4154, "step": 29781 }, { "epoch": 1.6677119498264084, "grad_norm": 1.4089455604553223, "learning_rate": 9.742947368421053e-05, "loss": 0.4036, "step": 29782 }, { "epoch": 1.6677679471385374, "grad_norm": 1.3270117044448853, "learning_rate": 9.74292105263158e-05, "loss": 0.5185, "step": 29783 }, { "epoch": 1.6678239444506664, "grad_norm": 1.379282832145691, "learning_rate": 9.742894736842105e-05, "loss": 0.3957, "step": 29784 }, { "epoch": 1.6678799417627954, "grad_norm": 1.2152642011642456, "learning_rate": 9.742868421052632e-05, "loss": 0.4632, "step": 29785 }, { "epoch": 1.6679359390749244, "grad_norm": 1.2232376337051392, "learning_rate": 9.742842105263158e-05, "loss": 0.4157, "step": 29786 }, { "epoch": 1.6679919363870535, "grad_norm": 2.0165438652038574, "learning_rate": 9.742815789473685e-05, "loss": 0.414, "step": 29787 }, { "epoch": 1.6680479336991825, "grad_norm": 44.752079010009766, "learning_rate": 9.742789473684211e-05, "loss": 0.388, "step": 29788 }, { "epoch": 1.6681039310113115, "grad_norm": 1.298831820487976, "learning_rate": 9.742763157894737e-05, "loss": 0.6055, "step": 29789 }, { "epoch": 1.6681599283234405, "grad_norm": 1.1803098917007446, "learning_rate": 9.742736842105263e-05, "loss": 0.3863, "step": 29790 }, { "epoch": 1.6682159256355695, "grad_norm": 1.267655849456787, "learning_rate": 9.74271052631579e-05, "loss": 0.4228, "step": 29791 }, { "epoch": 1.6682719229476985, "grad_norm": 1.3102296590805054, "learning_rate": 9.742684210526316e-05, "loss": 0.4232, "step": 29792 }, { "epoch": 1.6683279202598276, "grad_norm": 1.2054402828216553, "learning_rate": 9.742657894736842e-05, "loss": 0.4745, "step": 29793 }, { "epoch": 1.6683839175719566, "grad_norm": 1.2526195049285889, "learning_rate": 9.742631578947368e-05, "loss": 0.3803, "step": 29794 }, { "epoch": 1.6684399148840856, "grad_norm": 1.429464340209961, "learning_rate": 9.742605263157896e-05, "loss": 0.5537, "step": 29795 }, { "epoch": 1.6684959121962146, "grad_norm": 1.1986188888549805, "learning_rate": 9.742578947368422e-05, "loss": 0.3575, "step": 29796 }, { "epoch": 1.6685519095083436, "grad_norm": 1.2700387239456177, "learning_rate": 9.742552631578949e-05, "loss": 0.4072, "step": 29797 }, { "epoch": 1.6686079068204727, "grad_norm": 1.1243160963058472, "learning_rate": 9.742526315789474e-05, "loss": 0.453, "step": 29798 }, { "epoch": 1.6686639041326017, "grad_norm": 1.2256789207458496, "learning_rate": 9.7425e-05, "loss": 0.3341, "step": 29799 }, { "epoch": 1.6687199014447307, "grad_norm": 1.7087154388427734, "learning_rate": 9.742473684210527e-05, "loss": 0.4214, "step": 29800 }, { "epoch": 1.6687758987568597, "grad_norm": 1.2494076490402222, "learning_rate": 9.742447368421053e-05, "loss": 0.609, "step": 29801 }, { "epoch": 1.6688318960689887, "grad_norm": 1.8516823053359985, "learning_rate": 9.742421052631579e-05, "loss": 0.411, "step": 29802 }, { "epoch": 1.6688878933811178, "grad_norm": 1.2978487014770508, "learning_rate": 9.742394736842105e-05, "loss": 0.5197, "step": 29803 }, { "epoch": 1.6689438906932468, "grad_norm": 1.579337239265442, "learning_rate": 9.742368421052632e-05, "loss": 0.5147, "step": 29804 }, { "epoch": 1.6689998880053758, "grad_norm": 1.4517598152160645, "learning_rate": 9.742342105263158e-05, "loss": 0.5645, "step": 29805 }, { "epoch": 1.6690558853175048, "grad_norm": 1.1208311319351196, "learning_rate": 9.742315789473686e-05, "loss": 0.4752, "step": 29806 }, { "epoch": 1.6691118826296338, "grad_norm": 1.1679915189743042, "learning_rate": 9.74228947368421e-05, "loss": 0.4083, "step": 29807 }, { "epoch": 1.6691678799417629, "grad_norm": 1.585705041885376, "learning_rate": 9.742263157894737e-05, "loss": 0.4569, "step": 29808 }, { "epoch": 1.6692238772538919, "grad_norm": 1.0873582363128662, "learning_rate": 9.742236842105263e-05, "loss": 0.39, "step": 29809 }, { "epoch": 1.669279874566021, "grad_norm": 1.1331446170806885, "learning_rate": 9.742210526315791e-05, "loss": 0.4536, "step": 29810 }, { "epoch": 1.66933587187815, "grad_norm": 1.2082889080047607, "learning_rate": 9.742184210526317e-05, "loss": 0.4531, "step": 29811 }, { "epoch": 1.669391869190279, "grad_norm": 1.4118036031723022, "learning_rate": 9.742157894736843e-05, "loss": 0.3652, "step": 29812 }, { "epoch": 1.669447866502408, "grad_norm": 1.1882736682891846, "learning_rate": 9.742131578947369e-05, "loss": 0.3823, "step": 29813 }, { "epoch": 1.669503863814537, "grad_norm": 1.3445663452148438, "learning_rate": 9.742105263157896e-05, "loss": 0.4056, "step": 29814 }, { "epoch": 1.669559861126666, "grad_norm": 1.6700886487960815, "learning_rate": 9.742078947368422e-05, "loss": 0.7545, "step": 29815 }, { "epoch": 1.669615858438795, "grad_norm": 1.2657943964004517, "learning_rate": 9.742052631578948e-05, "loss": 0.4648, "step": 29816 }, { "epoch": 1.669671855750924, "grad_norm": 1.2922332286834717, "learning_rate": 9.742026315789474e-05, "loss": 0.4243, "step": 29817 }, { "epoch": 1.669727853063053, "grad_norm": 1.136527419090271, "learning_rate": 9.742e-05, "loss": 0.4334, "step": 29818 }, { "epoch": 1.669783850375182, "grad_norm": 1.1991740465164185, "learning_rate": 9.741973684210527e-05, "loss": 0.4208, "step": 29819 }, { "epoch": 1.669839847687311, "grad_norm": 1.3565469980239868, "learning_rate": 9.741947368421053e-05, "loss": 0.5002, "step": 29820 }, { "epoch": 1.66989584499944, "grad_norm": 1.618970274925232, "learning_rate": 9.741921052631579e-05, "loss": 0.446, "step": 29821 }, { "epoch": 1.6699518423115691, "grad_norm": 2.0439717769622803, "learning_rate": 9.741894736842105e-05, "loss": 0.5478, "step": 29822 }, { "epoch": 1.6700078396236981, "grad_norm": 1.4054450988769531, "learning_rate": 9.741868421052632e-05, "loss": 0.4755, "step": 29823 }, { "epoch": 1.6700638369358272, "grad_norm": 1.5218238830566406, "learning_rate": 9.741842105263158e-05, "loss": 0.4832, "step": 29824 }, { "epoch": 1.6701198342479562, "grad_norm": 1.2585930824279785, "learning_rate": 9.741815789473684e-05, "loss": 0.6058, "step": 29825 }, { "epoch": 1.6701758315600852, "grad_norm": 1.4107081890106201, "learning_rate": 9.74178947368421e-05, "loss": 0.4306, "step": 29826 }, { "epoch": 1.6702318288722142, "grad_norm": 1.1761633157730103, "learning_rate": 9.741763157894738e-05, "loss": 0.4319, "step": 29827 }, { "epoch": 1.6702878261843432, "grad_norm": 1.3887361288070679, "learning_rate": 9.741736842105264e-05, "loss": 0.6162, "step": 29828 }, { "epoch": 1.6703438234964723, "grad_norm": 1.3429697751998901, "learning_rate": 9.741710526315791e-05, "loss": 0.4642, "step": 29829 }, { "epoch": 1.6703998208086013, "grad_norm": 1.3777918815612793, "learning_rate": 9.741684210526316e-05, "loss": 0.5079, "step": 29830 }, { "epoch": 1.6704558181207303, "grad_norm": 1.3541017770767212, "learning_rate": 9.741657894736843e-05, "loss": 0.513, "step": 29831 }, { "epoch": 1.6705118154328593, "grad_norm": 1.4516394138336182, "learning_rate": 9.741631578947369e-05, "loss": 0.4262, "step": 29832 }, { "epoch": 1.6705678127449883, "grad_norm": 1.2368522882461548, "learning_rate": 9.741605263157895e-05, "loss": 0.4667, "step": 29833 }, { "epoch": 1.6706238100571174, "grad_norm": 1.223478078842163, "learning_rate": 9.741578947368422e-05, "loss": 0.4711, "step": 29834 }, { "epoch": 1.6706798073692464, "grad_norm": 1.3523377180099487, "learning_rate": 9.741552631578947e-05, "loss": 0.4679, "step": 29835 }, { "epoch": 1.6707358046813754, "grad_norm": 1.3053621053695679, "learning_rate": 9.741526315789474e-05, "loss": 0.3716, "step": 29836 }, { "epoch": 1.6707918019935044, "grad_norm": 2.675323247909546, "learning_rate": 9.7415e-05, "loss": 0.3351, "step": 29837 }, { "epoch": 1.6708477993056334, "grad_norm": 1.5996848344802856, "learning_rate": 9.741473684210527e-05, "loss": 0.3857, "step": 29838 }, { "epoch": 1.6709037966177624, "grad_norm": 1.1878904104232788, "learning_rate": 9.741447368421052e-05, "loss": 0.4158, "step": 29839 }, { "epoch": 1.6709597939298915, "grad_norm": 1.2107948064804077, "learning_rate": 9.74142105263158e-05, "loss": 0.4259, "step": 29840 }, { "epoch": 1.6710157912420205, "grad_norm": 2.0347845554351807, "learning_rate": 9.741394736842105e-05, "loss": 0.4988, "step": 29841 }, { "epoch": 1.6710717885541495, "grad_norm": 1.2873926162719727, "learning_rate": 9.741368421052633e-05, "loss": 0.4446, "step": 29842 }, { "epoch": 1.6711277858662785, "grad_norm": 1.201278805732727, "learning_rate": 9.741342105263159e-05, "loss": 0.4185, "step": 29843 }, { "epoch": 1.6711837831784075, "grad_norm": 1.1728787422180176, "learning_rate": 9.741315789473685e-05, "loss": 0.3828, "step": 29844 }, { "epoch": 1.6712397804905366, "grad_norm": 1.3575496673583984, "learning_rate": 9.74128947368421e-05, "loss": 0.4228, "step": 29845 }, { "epoch": 1.6712957778026656, "grad_norm": 1.3685311079025269, "learning_rate": 9.741263157894738e-05, "loss": 0.4682, "step": 29846 }, { "epoch": 1.6713517751147946, "grad_norm": 1.4752414226531982, "learning_rate": 9.741236842105264e-05, "loss": 0.3884, "step": 29847 }, { "epoch": 1.6714077724269236, "grad_norm": 1.190846562385559, "learning_rate": 9.74121052631579e-05, "loss": 0.4626, "step": 29848 }, { "epoch": 1.6714637697390526, "grad_norm": 1.1097655296325684, "learning_rate": 9.741184210526316e-05, "loss": 0.414, "step": 29849 }, { "epoch": 1.6715197670511817, "grad_norm": 1.0734790563583374, "learning_rate": 9.741157894736842e-05, "loss": 0.4411, "step": 29850 }, { "epoch": 1.6715757643633107, "grad_norm": 1.2061095237731934, "learning_rate": 9.741131578947369e-05, "loss": 0.4699, "step": 29851 }, { "epoch": 1.6716317616754397, "grad_norm": 2.0681514739990234, "learning_rate": 9.741105263157895e-05, "loss": 0.4441, "step": 29852 }, { "epoch": 1.6716877589875687, "grad_norm": 1.153032660484314, "learning_rate": 9.741078947368421e-05, "loss": 0.4403, "step": 29853 }, { "epoch": 1.6717437562996977, "grad_norm": 1.0701205730438232, "learning_rate": 9.741052631578947e-05, "loss": 0.3503, "step": 29854 }, { "epoch": 1.6717997536118268, "grad_norm": 1.1991156339645386, "learning_rate": 9.741026315789474e-05, "loss": 0.4324, "step": 29855 }, { "epoch": 1.6718557509239558, "grad_norm": 1.3591337203979492, "learning_rate": 9.741e-05, "loss": 0.5427, "step": 29856 }, { "epoch": 1.6719117482360848, "grad_norm": 1.1301558017730713, "learning_rate": 9.740973684210526e-05, "loss": 0.4113, "step": 29857 }, { "epoch": 1.6719677455482138, "grad_norm": 1.305549144744873, "learning_rate": 9.740947368421052e-05, "loss": 0.5043, "step": 29858 }, { "epoch": 1.6720237428603428, "grad_norm": 3.242582321166992, "learning_rate": 9.74092105263158e-05, "loss": 0.5073, "step": 29859 }, { "epoch": 1.6720797401724719, "grad_norm": 1.270106315612793, "learning_rate": 9.740894736842106e-05, "loss": 0.3616, "step": 29860 }, { "epoch": 1.6721357374846009, "grad_norm": 1.0806063413619995, "learning_rate": 9.740868421052633e-05, "loss": 0.4177, "step": 29861 }, { "epoch": 1.67219173479673, "grad_norm": 1.2932080030441284, "learning_rate": 9.740842105263158e-05, "loss": 0.4994, "step": 29862 }, { "epoch": 1.672247732108859, "grad_norm": 1.2522165775299072, "learning_rate": 9.740815789473685e-05, "loss": 0.3813, "step": 29863 }, { "epoch": 1.672303729420988, "grad_norm": 1.3221081495285034, "learning_rate": 9.740789473684211e-05, "loss": 0.5046, "step": 29864 }, { "epoch": 1.672359726733117, "grad_norm": 3.6724979877471924, "learning_rate": 9.740763157894738e-05, "loss": 0.4389, "step": 29865 }, { "epoch": 1.672415724045246, "grad_norm": 1.2772105932235718, "learning_rate": 9.740736842105264e-05, "loss": 0.4686, "step": 29866 }, { "epoch": 1.672471721357375, "grad_norm": 1.2992759943008423, "learning_rate": 9.740710526315789e-05, "loss": 0.5575, "step": 29867 }, { "epoch": 1.672527718669504, "grad_norm": 1.3870601654052734, "learning_rate": 9.740684210526316e-05, "loss": 0.4174, "step": 29868 }, { "epoch": 1.672583715981633, "grad_norm": 1.2118297815322876, "learning_rate": 9.740657894736842e-05, "loss": 0.4362, "step": 29869 }, { "epoch": 1.672639713293762, "grad_norm": 1.3227561712265015, "learning_rate": 9.74063157894737e-05, "loss": 0.3491, "step": 29870 }, { "epoch": 1.672695710605891, "grad_norm": 1.1811689138412476, "learning_rate": 9.740605263157895e-05, "loss": 0.5631, "step": 29871 }, { "epoch": 1.67275170791802, "grad_norm": 1.6219584941864014, "learning_rate": 9.740578947368421e-05, "loss": 0.6717, "step": 29872 }, { "epoch": 1.672807705230149, "grad_norm": 1.3326748609542847, "learning_rate": 9.740552631578947e-05, "loss": 0.4784, "step": 29873 }, { "epoch": 1.6728637025422781, "grad_norm": 1.8884485960006714, "learning_rate": 9.740526315789475e-05, "loss": 0.3688, "step": 29874 }, { "epoch": 1.6729196998544071, "grad_norm": 1.4678939580917358, "learning_rate": 9.7405e-05, "loss": 0.4771, "step": 29875 }, { "epoch": 1.6729756971665362, "grad_norm": 1.3649877309799194, "learning_rate": 9.740473684210527e-05, "loss": 0.4531, "step": 29876 }, { "epoch": 1.6730316944786652, "grad_norm": 1.416639804840088, "learning_rate": 9.740447368421053e-05, "loss": 0.7262, "step": 29877 }, { "epoch": 1.6730876917907942, "grad_norm": 1.2157752513885498, "learning_rate": 9.74042105263158e-05, "loss": 0.4003, "step": 29878 }, { "epoch": 1.6731436891029232, "grad_norm": 1.248354196548462, "learning_rate": 9.740394736842106e-05, "loss": 0.5036, "step": 29879 }, { "epoch": 1.6731996864150522, "grad_norm": 1.21266770362854, "learning_rate": 9.740368421052632e-05, "loss": 0.432, "step": 29880 }, { "epoch": 1.6732556837271813, "grad_norm": 1.730705976486206, "learning_rate": 9.740342105263158e-05, "loss": 0.4821, "step": 29881 }, { "epoch": 1.6733116810393103, "grad_norm": 1.1938798427581787, "learning_rate": 9.740315789473685e-05, "loss": 0.4414, "step": 29882 }, { "epoch": 1.6733676783514393, "grad_norm": 7.105051040649414, "learning_rate": 9.740289473684211e-05, "loss": 0.4072, "step": 29883 }, { "epoch": 1.6734236756635683, "grad_norm": 1.474406123161316, "learning_rate": 9.740263157894737e-05, "loss": 0.5356, "step": 29884 }, { "epoch": 1.6734796729756973, "grad_norm": 1.5218783617019653, "learning_rate": 9.740236842105263e-05, "loss": 0.5101, "step": 29885 }, { "epoch": 1.6735356702878263, "grad_norm": 1.516426682472229, "learning_rate": 9.740210526315789e-05, "loss": 0.5078, "step": 29886 }, { "epoch": 1.6735916675999554, "grad_norm": 1.0792291164398193, "learning_rate": 9.740184210526316e-05, "loss": 0.4085, "step": 29887 }, { "epoch": 1.6736476649120842, "grad_norm": 1.207808256149292, "learning_rate": 9.740157894736842e-05, "loss": 0.3622, "step": 29888 }, { "epoch": 1.6737036622242132, "grad_norm": 1.3278062343597412, "learning_rate": 9.74013157894737e-05, "loss": 0.4181, "step": 29889 }, { "epoch": 1.6737596595363422, "grad_norm": 1.1830133199691772, "learning_rate": 9.740105263157894e-05, "loss": 0.3895, "step": 29890 }, { "epoch": 1.6738156568484712, "grad_norm": 1.3510818481445312, "learning_rate": 9.740078947368422e-05, "loss": 0.3787, "step": 29891 }, { "epoch": 1.6738716541606002, "grad_norm": 1.8418278694152832, "learning_rate": 9.740052631578948e-05, "loss": 0.4635, "step": 29892 }, { "epoch": 1.6739276514727293, "grad_norm": 1.1231051683425903, "learning_rate": 9.740026315789475e-05, "loss": 0.417, "step": 29893 }, { "epoch": 1.6739836487848583, "grad_norm": 1.301915168762207, "learning_rate": 9.74e-05, "loss": 0.3922, "step": 29894 }, { "epoch": 1.6740396460969873, "grad_norm": 1.3169394731521606, "learning_rate": 9.739973684210527e-05, "loss": 0.372, "step": 29895 }, { "epoch": 1.6740956434091163, "grad_norm": 1.2419829368591309, "learning_rate": 9.739947368421053e-05, "loss": 0.4359, "step": 29896 }, { "epoch": 1.6741516407212453, "grad_norm": 1.370413899421692, "learning_rate": 9.73992105263158e-05, "loss": 0.4605, "step": 29897 }, { "epoch": 1.6742076380333744, "grad_norm": 1.1843878030776978, "learning_rate": 9.739894736842106e-05, "loss": 0.4267, "step": 29898 }, { "epoch": 1.6742636353455034, "grad_norm": 1.4774214029312134, "learning_rate": 9.739868421052632e-05, "loss": 0.4291, "step": 29899 }, { "epoch": 1.6743196326576324, "grad_norm": 1.1830426454544067, "learning_rate": 9.739842105263158e-05, "loss": 0.3761, "step": 29900 }, { "epoch": 1.6743756299697614, "grad_norm": 1.2707479000091553, "learning_rate": 9.739815789473685e-05, "loss": 0.3663, "step": 29901 }, { "epoch": 1.6744316272818904, "grad_norm": 1.1479617357254028, "learning_rate": 9.739789473684211e-05, "loss": 0.376, "step": 29902 }, { "epoch": 1.6744876245940195, "grad_norm": 1.423410415649414, "learning_rate": 9.739763157894737e-05, "loss": 0.5771, "step": 29903 }, { "epoch": 1.6745436219061485, "grad_norm": 1.1610666513442993, "learning_rate": 9.739736842105263e-05, "loss": 0.4341, "step": 29904 }, { "epoch": 1.6745996192182775, "grad_norm": 1.1887913942337036, "learning_rate": 9.73971052631579e-05, "loss": 0.5333, "step": 29905 }, { "epoch": 1.6746556165304065, "grad_norm": 1.1377102136611938, "learning_rate": 9.739684210526317e-05, "loss": 0.4063, "step": 29906 }, { "epoch": 1.6747116138425355, "grad_norm": 1.2968591451644897, "learning_rate": 9.739657894736843e-05, "loss": 0.5483, "step": 29907 }, { "epoch": 1.6747676111546645, "grad_norm": 1.3600558042526245, "learning_rate": 9.739631578947369e-05, "loss": 0.4918, "step": 29908 }, { "epoch": 1.6748236084667936, "grad_norm": 1.417197585105896, "learning_rate": 9.739605263157895e-05, "loss": 0.4186, "step": 29909 }, { "epoch": 1.6748796057789226, "grad_norm": 1.2363240718841553, "learning_rate": 9.739578947368422e-05, "loss": 0.452, "step": 29910 }, { "epoch": 1.6749356030910516, "grad_norm": 1.2443267107009888, "learning_rate": 9.739552631578948e-05, "loss": 0.5973, "step": 29911 }, { "epoch": 1.6749916004031806, "grad_norm": 1.374777913093567, "learning_rate": 9.739526315789474e-05, "loss": 0.5798, "step": 29912 }, { "epoch": 1.6750475977153096, "grad_norm": 1.2771215438842773, "learning_rate": 9.7395e-05, "loss": 0.4863, "step": 29913 }, { "epoch": 1.6751035950274387, "grad_norm": 1.2088505029678345, "learning_rate": 9.739473684210527e-05, "loss": 0.5217, "step": 29914 }, { "epoch": 1.6751595923395677, "grad_norm": 1.2444523572921753, "learning_rate": 9.739447368421053e-05, "loss": 0.3946, "step": 29915 }, { "epoch": 1.6752155896516967, "grad_norm": 1.221674919128418, "learning_rate": 9.73942105263158e-05, "loss": 0.4118, "step": 29916 }, { "epoch": 1.6752715869638257, "grad_norm": 1.1710373163223267, "learning_rate": 9.739394736842105e-05, "loss": 0.4843, "step": 29917 }, { "epoch": 1.6753275842759547, "grad_norm": 1.4426542520523071, "learning_rate": 9.739368421052632e-05, "loss": 0.3877, "step": 29918 }, { "epoch": 1.6753835815880838, "grad_norm": 1.4047588109970093, "learning_rate": 9.739342105263158e-05, "loss": 0.5278, "step": 29919 }, { "epoch": 1.6754395789002128, "grad_norm": 1.557431697845459, "learning_rate": 9.739315789473684e-05, "loss": 0.3851, "step": 29920 }, { "epoch": 1.6754955762123418, "grad_norm": 1.5626405477523804, "learning_rate": 9.739289473684212e-05, "loss": 0.5033, "step": 29921 }, { "epoch": 1.6755515735244708, "grad_norm": 1.4701859951019287, "learning_rate": 9.739263157894736e-05, "loss": 0.4843, "step": 29922 }, { "epoch": 1.6756075708365998, "grad_norm": 1.4334521293640137, "learning_rate": 9.739236842105264e-05, "loss": 0.5705, "step": 29923 }, { "epoch": 1.6756635681487289, "grad_norm": 1.1340359449386597, "learning_rate": 9.73921052631579e-05, "loss": 0.3748, "step": 29924 }, { "epoch": 1.6757195654608579, "grad_norm": 1.2505853176116943, "learning_rate": 9.739184210526317e-05, "loss": 0.4332, "step": 29925 }, { "epoch": 1.675775562772987, "grad_norm": 1.6236600875854492, "learning_rate": 9.739157894736843e-05, "loss": 0.5216, "step": 29926 }, { "epoch": 1.675831560085116, "grad_norm": 1.5575350522994995, "learning_rate": 9.739131578947369e-05, "loss": 0.6268, "step": 29927 }, { "epoch": 1.675887557397245, "grad_norm": 1.2122241258621216, "learning_rate": 9.739105263157895e-05, "loss": 0.3967, "step": 29928 }, { "epoch": 1.675943554709374, "grad_norm": 1.1989843845367432, "learning_rate": 9.739078947368422e-05, "loss": 0.4996, "step": 29929 }, { "epoch": 1.675999552021503, "grad_norm": 1.2929365634918213, "learning_rate": 9.739052631578948e-05, "loss": 0.4767, "step": 29930 }, { "epoch": 1.676055549333632, "grad_norm": 1.5503559112548828, "learning_rate": 9.739026315789474e-05, "loss": 0.4401, "step": 29931 }, { "epoch": 1.676111546645761, "grad_norm": 1.1933990716934204, "learning_rate": 9.739e-05, "loss": 0.4673, "step": 29932 }, { "epoch": 1.67616754395789, "grad_norm": 1.1944936513900757, "learning_rate": 9.738973684210527e-05, "loss": 0.3465, "step": 29933 }, { "epoch": 1.676223541270019, "grad_norm": 1.291595697402954, "learning_rate": 9.738947368421053e-05, "loss": 0.4419, "step": 29934 }, { "epoch": 1.676279538582148, "grad_norm": 1.2692008018493652, "learning_rate": 9.73892105263158e-05, "loss": 0.4615, "step": 29935 }, { "epoch": 1.676335535894277, "grad_norm": 1.3068135976791382, "learning_rate": 9.738894736842105e-05, "loss": 0.5755, "step": 29936 }, { "epoch": 1.676391533206406, "grad_norm": 1.5347583293914795, "learning_rate": 9.738868421052631e-05, "loss": 0.4634, "step": 29937 }, { "epoch": 1.6764475305185351, "grad_norm": 1.3170474767684937, "learning_rate": 9.738842105263159e-05, "loss": 0.4509, "step": 29938 }, { "epoch": 1.6765035278306641, "grad_norm": 1.2580183744430542, "learning_rate": 9.738815789473685e-05, "loss": 0.3957, "step": 29939 }, { "epoch": 1.6765595251427932, "grad_norm": 1.0235823392868042, "learning_rate": 9.73878947368421e-05, "loss": 0.2971, "step": 29940 }, { "epoch": 1.6766155224549222, "grad_norm": 1.274720311164856, "learning_rate": 9.738763157894737e-05, "loss": 0.401, "step": 29941 }, { "epoch": 1.6766715197670512, "grad_norm": 1.6044840812683105, "learning_rate": 9.738736842105264e-05, "loss": 0.7239, "step": 29942 }, { "epoch": 1.6767275170791802, "grad_norm": 1.2107619047164917, "learning_rate": 9.73871052631579e-05, "loss": 0.4301, "step": 29943 }, { "epoch": 1.6767835143913092, "grad_norm": 1.5246989727020264, "learning_rate": 9.738684210526317e-05, "loss": 0.5253, "step": 29944 }, { "epoch": 1.6768395117034383, "grad_norm": 1.3754037618637085, "learning_rate": 9.738657894736842e-05, "loss": 0.4578, "step": 29945 }, { "epoch": 1.6768955090155673, "grad_norm": 1.1871083974838257, "learning_rate": 9.738631578947369e-05, "loss": 0.3561, "step": 29946 }, { "epoch": 1.6769515063276963, "grad_norm": 1.3134440183639526, "learning_rate": 9.738605263157895e-05, "loss": 0.5167, "step": 29947 }, { "epoch": 1.6770075036398253, "grad_norm": 1.5911232233047485, "learning_rate": 9.738578947368422e-05, "loss": 0.4224, "step": 29948 }, { "epoch": 1.6770635009519543, "grad_norm": 1.1805120706558228, "learning_rate": 9.738552631578947e-05, "loss": 0.4332, "step": 29949 }, { "epoch": 1.6771194982640834, "grad_norm": 1.2180368900299072, "learning_rate": 9.738526315789474e-05, "loss": 0.4843, "step": 29950 }, { "epoch": 1.6771754955762124, "grad_norm": 0.9737374782562256, "learning_rate": 9.7385e-05, "loss": 0.3797, "step": 29951 }, { "epoch": 1.6772314928883414, "grad_norm": 1.3471542596817017, "learning_rate": 9.738473684210528e-05, "loss": 0.5777, "step": 29952 }, { "epoch": 1.6772874902004704, "grad_norm": 1.407422661781311, "learning_rate": 9.738447368421054e-05, "loss": 0.4742, "step": 29953 }, { "epoch": 1.6773434875125994, "grad_norm": 1.4307019710540771, "learning_rate": 9.738421052631578e-05, "loss": 0.4776, "step": 29954 }, { "epoch": 1.6773994848247284, "grad_norm": 1.4423656463623047, "learning_rate": 9.738394736842106e-05, "loss": 0.6163, "step": 29955 }, { "epoch": 1.6774554821368575, "grad_norm": 1.8022441864013672, "learning_rate": 9.738368421052632e-05, "loss": 0.4509, "step": 29956 }, { "epoch": 1.6775114794489865, "grad_norm": 1.2700351476669312, "learning_rate": 9.738342105263159e-05, "loss": 0.4093, "step": 29957 }, { "epoch": 1.6775674767611155, "grad_norm": 1.5666680335998535, "learning_rate": 9.738315789473685e-05, "loss": 0.5372, "step": 29958 }, { "epoch": 1.6776234740732445, "grad_norm": 1.3759901523590088, "learning_rate": 9.738289473684211e-05, "loss": 0.4121, "step": 29959 }, { "epoch": 1.6776794713853735, "grad_norm": 1.5998016595840454, "learning_rate": 9.738263157894737e-05, "loss": 0.5589, "step": 29960 }, { "epoch": 1.6777354686975026, "grad_norm": 1.2518067359924316, "learning_rate": 9.738236842105264e-05, "loss": 0.3976, "step": 29961 }, { "epoch": 1.6777914660096316, "grad_norm": 1.2326905727386475, "learning_rate": 9.73821052631579e-05, "loss": 0.4092, "step": 29962 }, { "epoch": 1.6778474633217606, "grad_norm": 1.0324153900146484, "learning_rate": 9.738184210526316e-05, "loss": 0.433, "step": 29963 }, { "epoch": 1.6779034606338896, "grad_norm": 1.4028562307357788, "learning_rate": 9.738157894736842e-05, "loss": 0.5518, "step": 29964 }, { "epoch": 1.6779594579460186, "grad_norm": 1.1557691097259521, "learning_rate": 9.73813157894737e-05, "loss": 0.4234, "step": 29965 }, { "epoch": 1.6780154552581477, "grad_norm": 1.2471976280212402, "learning_rate": 9.738105263157895e-05, "loss": 0.4519, "step": 29966 }, { "epoch": 1.6780714525702767, "grad_norm": 1.1806623935699463, "learning_rate": 9.738078947368421e-05, "loss": 0.4599, "step": 29967 }, { "epoch": 1.6781274498824057, "grad_norm": 1.3362605571746826, "learning_rate": 9.738052631578947e-05, "loss": 0.6427, "step": 29968 }, { "epoch": 1.6781834471945347, "grad_norm": 1.3070813417434692, "learning_rate": 9.738026315789475e-05, "loss": 0.5511, "step": 29969 }, { "epoch": 1.6782394445066637, "grad_norm": 1.2259621620178223, "learning_rate": 9.738e-05, "loss": 0.3998, "step": 29970 }, { "epoch": 1.6782954418187925, "grad_norm": 1.2757374048233032, "learning_rate": 9.737973684210527e-05, "loss": 0.4778, "step": 29971 }, { "epoch": 1.6783514391309216, "grad_norm": 1.2138557434082031, "learning_rate": 9.737947368421053e-05, "loss": 0.3277, "step": 29972 }, { "epoch": 1.6784074364430506, "grad_norm": 1.3146134614944458, "learning_rate": 9.737921052631579e-05, "loss": 0.3642, "step": 29973 }, { "epoch": 1.6784634337551796, "grad_norm": 1.1987791061401367, "learning_rate": 9.737894736842106e-05, "loss": 0.4592, "step": 29974 }, { "epoch": 1.6785194310673086, "grad_norm": 1.5726855993270874, "learning_rate": 9.737868421052632e-05, "loss": 0.6088, "step": 29975 }, { "epoch": 1.6785754283794376, "grad_norm": 1.094915509223938, "learning_rate": 9.737842105263159e-05, "loss": 0.4098, "step": 29976 }, { "epoch": 1.6786314256915666, "grad_norm": 1.1610496044158936, "learning_rate": 9.737815789473684e-05, "loss": 0.4652, "step": 29977 }, { "epoch": 1.6786874230036957, "grad_norm": 1.342151165008545, "learning_rate": 9.737789473684211e-05, "loss": 0.392, "step": 29978 }, { "epoch": 1.6787434203158247, "grad_norm": 1.1701411008834839, "learning_rate": 9.737763157894737e-05, "loss": 0.4119, "step": 29979 }, { "epoch": 1.6787994176279537, "grad_norm": 1.7732369899749756, "learning_rate": 9.737736842105264e-05, "loss": 0.4794, "step": 29980 }, { "epoch": 1.6788554149400827, "grad_norm": 1.4591057300567627, "learning_rate": 9.73771052631579e-05, "loss": 0.4294, "step": 29981 }, { "epoch": 1.6789114122522117, "grad_norm": 1.2426187992095947, "learning_rate": 9.737684210526316e-05, "loss": 0.5808, "step": 29982 }, { "epoch": 1.6789674095643408, "grad_norm": 1.490771770477295, "learning_rate": 9.737657894736842e-05, "loss": 0.5741, "step": 29983 }, { "epoch": 1.6790234068764698, "grad_norm": 1.3071575164794922, "learning_rate": 9.73763157894737e-05, "loss": 0.4386, "step": 29984 }, { "epoch": 1.6790794041885988, "grad_norm": 1.275038480758667, "learning_rate": 9.737605263157896e-05, "loss": 0.3716, "step": 29985 }, { "epoch": 1.6791354015007278, "grad_norm": 1.1046198606491089, "learning_rate": 9.737578947368422e-05, "loss": 0.4326, "step": 29986 }, { "epoch": 1.6791913988128568, "grad_norm": 1.302770972251892, "learning_rate": 9.737552631578948e-05, "loss": 0.4648, "step": 29987 }, { "epoch": 1.6792473961249859, "grad_norm": 1.6733739376068115, "learning_rate": 9.737526315789474e-05, "loss": 0.4772, "step": 29988 }, { "epoch": 1.6793033934371149, "grad_norm": 1.2860265970230103, "learning_rate": 9.737500000000001e-05, "loss": 0.312, "step": 29989 }, { "epoch": 1.679359390749244, "grad_norm": 1.3980674743652344, "learning_rate": 9.737473684210527e-05, "loss": 0.517, "step": 29990 }, { "epoch": 1.679415388061373, "grad_norm": 1.5490573644638062, "learning_rate": 9.737447368421053e-05, "loss": 0.6049, "step": 29991 }, { "epoch": 1.679471385373502, "grad_norm": 1.5674378871917725, "learning_rate": 9.737421052631579e-05, "loss": 0.5021, "step": 29992 }, { "epoch": 1.679527382685631, "grad_norm": 1.4125771522521973, "learning_rate": 9.737394736842106e-05, "loss": 0.4054, "step": 29993 }, { "epoch": 1.67958337999776, "grad_norm": 4.377709865570068, "learning_rate": 9.737368421052632e-05, "loss": 0.2836, "step": 29994 }, { "epoch": 1.679639377309889, "grad_norm": 1.6075434684753418, "learning_rate": 9.737342105263158e-05, "loss": 0.3507, "step": 29995 }, { "epoch": 1.679695374622018, "grad_norm": 1.4482122659683228, "learning_rate": 9.737315789473684e-05, "loss": 0.4288, "step": 29996 }, { "epoch": 1.679751371934147, "grad_norm": 1.3585349321365356, "learning_rate": 9.737289473684211e-05, "loss": 0.5288, "step": 29997 }, { "epoch": 1.679807369246276, "grad_norm": 1.1839277744293213, "learning_rate": 9.737263157894737e-05, "loss": 0.4808, "step": 29998 }, { "epoch": 1.679863366558405, "grad_norm": 1.5054590702056885, "learning_rate": 9.737236842105263e-05, "loss": 0.4966, "step": 29999 }, { "epoch": 1.679919363870534, "grad_norm": 1.5555325746536255, "learning_rate": 9.737210526315789e-05, "loss": 0.4613, "step": 30000 }, { "epoch": 1.679975361182663, "grad_norm": 1.2994585037231445, "learning_rate": 9.737184210526317e-05, "loss": 0.4076, "step": 30001 }, { "epoch": 1.6800313584947921, "grad_norm": 1.3233389854431152, "learning_rate": 9.737157894736843e-05, "loss": 0.4217, "step": 30002 }, { "epoch": 1.6800873558069211, "grad_norm": 1.224224328994751, "learning_rate": 9.73713157894737e-05, "loss": 0.3775, "step": 30003 }, { "epoch": 1.6801433531190502, "grad_norm": 1.1796660423278809, "learning_rate": 9.737105263157895e-05, "loss": 0.4234, "step": 30004 }, { "epoch": 1.6801993504311792, "grad_norm": 1.211627721786499, "learning_rate": 9.73707894736842e-05, "loss": 0.4587, "step": 30005 }, { "epoch": 1.6802553477433082, "grad_norm": 1.2654528617858887, "learning_rate": 9.737052631578948e-05, "loss": 0.4753, "step": 30006 }, { "epoch": 1.6803113450554372, "grad_norm": 1.498504400253296, "learning_rate": 9.737026315789474e-05, "loss": 0.3996, "step": 30007 }, { "epoch": 1.6803673423675662, "grad_norm": 1.0283262729644775, "learning_rate": 9.737000000000001e-05, "loss": 0.3684, "step": 30008 }, { "epoch": 1.6804233396796953, "grad_norm": 1.4832878112792969, "learning_rate": 9.736973684210526e-05, "loss": 0.5157, "step": 30009 }, { "epoch": 1.6804793369918243, "grad_norm": 1.3012858629226685, "learning_rate": 9.736947368421053e-05, "loss": 0.5147, "step": 30010 }, { "epoch": 1.6805353343039533, "grad_norm": 1.2735188007354736, "learning_rate": 9.736921052631579e-05, "loss": 0.5182, "step": 30011 }, { "epoch": 1.6805913316160823, "grad_norm": 1.273616909980774, "learning_rate": 9.736894736842106e-05, "loss": 0.4831, "step": 30012 }, { "epoch": 1.6806473289282113, "grad_norm": 1.2145116329193115, "learning_rate": 9.736868421052632e-05, "loss": 0.4781, "step": 30013 }, { "epoch": 1.6807033262403404, "grad_norm": 1.4690943956375122, "learning_rate": 9.736842105263158e-05, "loss": 0.677, "step": 30014 }, { "epoch": 1.6807593235524694, "grad_norm": 1.4068059921264648, "learning_rate": 9.736815789473684e-05, "loss": 0.4209, "step": 30015 }, { "epoch": 1.6808153208645984, "grad_norm": 1.0696946382522583, "learning_rate": 9.736789473684212e-05, "loss": 0.4339, "step": 30016 }, { "epoch": 1.6808713181767274, "grad_norm": 1.1998757123947144, "learning_rate": 9.736763157894738e-05, "loss": 0.4082, "step": 30017 }, { "epoch": 1.6809273154888564, "grad_norm": 1.4615795612335205, "learning_rate": 9.736736842105264e-05, "loss": 0.4241, "step": 30018 }, { "epoch": 1.6809833128009855, "grad_norm": 1.3013535737991333, "learning_rate": 9.73671052631579e-05, "loss": 0.4069, "step": 30019 }, { "epoch": 1.6810393101131145, "grad_norm": 1.12428617477417, "learning_rate": 9.736684210526317e-05, "loss": 0.4252, "step": 30020 }, { "epoch": 1.6810953074252435, "grad_norm": 1.2561031579971313, "learning_rate": 9.736657894736843e-05, "loss": 0.4285, "step": 30021 }, { "epoch": 1.6811513047373725, "grad_norm": 1.296723484992981, "learning_rate": 9.736631578947369e-05, "loss": 0.5986, "step": 30022 }, { "epoch": 1.6812073020495015, "grad_norm": 1.1818569898605347, "learning_rate": 9.736605263157895e-05, "loss": 0.4809, "step": 30023 }, { "epoch": 1.6812632993616305, "grad_norm": 1.256476879119873, "learning_rate": 9.736578947368421e-05, "loss": 0.4643, "step": 30024 }, { "epoch": 1.6813192966737596, "grad_norm": 1.585464596748352, "learning_rate": 9.736552631578948e-05, "loss": 0.4811, "step": 30025 }, { "epoch": 1.6813752939858886, "grad_norm": 1.2282109260559082, "learning_rate": 9.736526315789474e-05, "loss": 0.4674, "step": 30026 }, { "epoch": 1.6814312912980176, "grad_norm": 2.262535810470581, "learning_rate": 9.7365e-05, "loss": 0.5656, "step": 30027 }, { "epoch": 1.6814872886101466, "grad_norm": 1.455905795097351, "learning_rate": 9.736473684210526e-05, "loss": 0.6624, "step": 30028 }, { "epoch": 1.6815432859222756, "grad_norm": 1.2129590511322021, "learning_rate": 9.736447368421053e-05, "loss": 0.3836, "step": 30029 }, { "epoch": 1.6815992832344047, "grad_norm": 1.4859960079193115, "learning_rate": 9.736421052631579e-05, "loss": 0.401, "step": 30030 }, { "epoch": 1.6816552805465337, "grad_norm": 1.437681794166565, "learning_rate": 9.736394736842107e-05, "loss": 0.4562, "step": 30031 }, { "epoch": 1.6817112778586627, "grad_norm": 1.2419911623001099, "learning_rate": 9.736368421052631e-05, "loss": 0.5159, "step": 30032 }, { "epoch": 1.6817672751707917, "grad_norm": 1.2178417444229126, "learning_rate": 9.736342105263159e-05, "loss": 0.4272, "step": 30033 }, { "epoch": 1.6818232724829207, "grad_norm": 1.9885791540145874, "learning_rate": 9.736315789473685e-05, "loss": 0.7109, "step": 30034 }, { "epoch": 1.6818792697950498, "grad_norm": 1.2620553970336914, "learning_rate": 9.736289473684212e-05, "loss": 0.4715, "step": 30035 }, { "epoch": 1.6819352671071788, "grad_norm": 1.548359990119934, "learning_rate": 9.736263157894738e-05, "loss": 0.4702, "step": 30036 }, { "epoch": 1.6819912644193078, "grad_norm": 2.5032172203063965, "learning_rate": 9.736236842105264e-05, "loss": 0.5865, "step": 30037 }, { "epoch": 1.6820472617314368, "grad_norm": 1.3945519924163818, "learning_rate": 9.73621052631579e-05, "loss": 0.4448, "step": 30038 }, { "epoch": 1.6821032590435658, "grad_norm": 1.1375930309295654, "learning_rate": 9.736184210526317e-05, "loss": 0.3363, "step": 30039 }, { "epoch": 1.6821592563556949, "grad_norm": 1.2704322338104248, "learning_rate": 9.736157894736843e-05, "loss": 0.4223, "step": 30040 }, { "epoch": 1.6822152536678239, "grad_norm": 1.4568498134613037, "learning_rate": 9.736131578947368e-05, "loss": 0.5237, "step": 30041 }, { "epoch": 1.682271250979953, "grad_norm": 1.1758419275283813, "learning_rate": 9.736105263157895e-05, "loss": 0.4233, "step": 30042 }, { "epoch": 1.682327248292082, "grad_norm": 1.3888391256332397, "learning_rate": 9.736078947368421e-05, "loss": 0.4914, "step": 30043 }, { "epoch": 1.682383245604211, "grad_norm": 1.6250340938568115, "learning_rate": 9.736052631578948e-05, "loss": 0.5952, "step": 30044 }, { "epoch": 1.68243924291634, "grad_norm": 2.2349987030029297, "learning_rate": 9.736026315789474e-05, "loss": 0.5461, "step": 30045 }, { "epoch": 1.682495240228469, "grad_norm": 1.2714875936508179, "learning_rate": 9.736e-05, "loss": 0.301, "step": 30046 }, { "epoch": 1.682551237540598, "grad_norm": 1.311805009841919, "learning_rate": 9.735973684210526e-05, "loss": 0.4269, "step": 30047 }, { "epoch": 1.682607234852727, "grad_norm": 1.1518783569335938, "learning_rate": 9.735947368421054e-05, "loss": 0.6915, "step": 30048 }, { "epoch": 1.682663232164856, "grad_norm": 1.5244580507278442, "learning_rate": 9.73592105263158e-05, "loss": 0.599, "step": 30049 }, { "epoch": 1.682719229476985, "grad_norm": 1.5039039850234985, "learning_rate": 9.735894736842106e-05, "loss": 0.4116, "step": 30050 }, { "epoch": 1.682775226789114, "grad_norm": 1.5512877702713013, "learning_rate": 9.735868421052632e-05, "loss": 0.4742, "step": 30051 }, { "epoch": 1.682831224101243, "grad_norm": 1.1976828575134277, "learning_rate": 9.735842105263159e-05, "loss": 0.4459, "step": 30052 }, { "epoch": 1.682887221413372, "grad_norm": 1.203157901763916, "learning_rate": 9.735815789473685e-05, "loss": 0.393, "step": 30053 }, { "epoch": 1.6829432187255011, "grad_norm": 1.2579879760742188, "learning_rate": 9.735789473684211e-05, "loss": 0.4515, "step": 30054 }, { "epoch": 1.6829992160376301, "grad_norm": 1.4593232870101929, "learning_rate": 9.735763157894737e-05, "loss": 0.5174, "step": 30055 }, { "epoch": 1.6830552133497592, "grad_norm": 1.5238014459609985, "learning_rate": 9.735736842105264e-05, "loss": 0.5091, "step": 30056 }, { "epoch": 1.6831112106618882, "grad_norm": 1.6702297925949097, "learning_rate": 9.73571052631579e-05, "loss": 0.5301, "step": 30057 }, { "epoch": 1.6831672079740172, "grad_norm": 1.409104824066162, "learning_rate": 9.735684210526316e-05, "loss": 0.5162, "step": 30058 }, { "epoch": 1.6832232052861462, "grad_norm": 1.4003217220306396, "learning_rate": 9.735657894736842e-05, "loss": 0.4034, "step": 30059 }, { "epoch": 1.6832792025982752, "grad_norm": 1.0628284215927124, "learning_rate": 9.735631578947368e-05, "loss": 0.3403, "step": 30060 }, { "epoch": 1.6833351999104043, "grad_norm": 1.4191914796829224, "learning_rate": 9.735605263157895e-05, "loss": 0.4711, "step": 30061 }, { "epoch": 1.6833911972225333, "grad_norm": 1.4225420951843262, "learning_rate": 9.735578947368421e-05, "loss": 0.4781, "step": 30062 }, { "epoch": 1.6834471945346623, "grad_norm": 1.2523552179336548, "learning_rate": 9.735552631578949e-05, "loss": 0.3511, "step": 30063 }, { "epoch": 1.6835031918467913, "grad_norm": 1.1969324350357056, "learning_rate": 9.735526315789473e-05, "loss": 0.4782, "step": 30064 }, { "epoch": 1.6835591891589203, "grad_norm": 1.314456820487976, "learning_rate": 9.7355e-05, "loss": 0.3676, "step": 30065 }, { "epoch": 1.6836151864710494, "grad_norm": 3.1269609928131104, "learning_rate": 9.735473684210527e-05, "loss": 0.4836, "step": 30066 }, { "epoch": 1.6836711837831784, "grad_norm": 1.3982963562011719, "learning_rate": 9.735447368421054e-05, "loss": 0.5443, "step": 30067 }, { "epoch": 1.6837271810953074, "grad_norm": 1.2770054340362549, "learning_rate": 9.73542105263158e-05, "loss": 0.3972, "step": 30068 }, { "epoch": 1.6837831784074364, "grad_norm": 1.6281217336654663, "learning_rate": 9.735394736842106e-05, "loss": 0.4614, "step": 30069 }, { "epoch": 1.6838391757195654, "grad_norm": 1.2229751348495483, "learning_rate": 9.735368421052632e-05, "loss": 0.617, "step": 30070 }, { "epoch": 1.6838951730316944, "grad_norm": 1.2579398155212402, "learning_rate": 9.735342105263159e-05, "loss": 0.5079, "step": 30071 }, { "epoch": 1.6839511703438235, "grad_norm": 1.5189852714538574, "learning_rate": 9.735315789473685e-05, "loss": 0.4688, "step": 30072 }, { "epoch": 1.6840071676559525, "grad_norm": 1.2876662015914917, "learning_rate": 9.735289473684211e-05, "loss": 0.4895, "step": 30073 }, { "epoch": 1.6840631649680815, "grad_norm": 1.3178309202194214, "learning_rate": 9.735263157894737e-05, "loss": 0.5764, "step": 30074 }, { "epoch": 1.6841191622802105, "grad_norm": 1.5533462762832642, "learning_rate": 9.735236842105263e-05, "loss": 0.5602, "step": 30075 }, { "epoch": 1.6841751595923395, "grad_norm": 1.2621676921844482, "learning_rate": 9.73521052631579e-05, "loss": 0.4298, "step": 30076 }, { "epoch": 1.6842311569044686, "grad_norm": 1.3253891468048096, "learning_rate": 9.735184210526316e-05, "loss": 0.6581, "step": 30077 }, { "epoch": 1.6842871542165976, "grad_norm": 1.3398269414901733, "learning_rate": 9.735157894736842e-05, "loss": 0.5109, "step": 30078 }, { "epoch": 1.6843431515287266, "grad_norm": 1.0978977680206299, "learning_rate": 9.735131578947368e-05, "loss": 0.3186, "step": 30079 }, { "epoch": 1.6843991488408556, "grad_norm": 1.2364259958267212, "learning_rate": 9.735105263157896e-05, "loss": 0.4888, "step": 30080 }, { "epoch": 1.6844551461529846, "grad_norm": 1.2050082683563232, "learning_rate": 9.735078947368422e-05, "loss": 0.3848, "step": 30081 }, { "epoch": 1.6845111434651137, "grad_norm": 1.363664150238037, "learning_rate": 9.735052631578948e-05, "loss": 0.3568, "step": 30082 }, { "epoch": 1.6845671407772427, "grad_norm": 1.1327582597732544, "learning_rate": 9.735026315789473e-05, "loss": 0.3377, "step": 30083 }, { "epoch": 1.6846231380893717, "grad_norm": 1.2636672258377075, "learning_rate": 9.735000000000001e-05, "loss": 0.3855, "step": 30084 }, { "epoch": 1.6846791354015007, "grad_norm": 1.3640165328979492, "learning_rate": 9.734973684210527e-05, "loss": 0.3801, "step": 30085 }, { "epoch": 1.6847351327136297, "grad_norm": 1.1210427284240723, "learning_rate": 9.734947368421054e-05, "loss": 0.4005, "step": 30086 }, { "epoch": 1.6847911300257588, "grad_norm": 1.3407551050186157, "learning_rate": 9.734921052631579e-05, "loss": 0.5224, "step": 30087 }, { "epoch": 1.6848471273378878, "grad_norm": 1.1256792545318604, "learning_rate": 9.734894736842106e-05, "loss": 0.3795, "step": 30088 }, { "epoch": 1.6849031246500168, "grad_norm": 1.897878885269165, "learning_rate": 9.734868421052632e-05, "loss": 0.7088, "step": 30089 }, { "epoch": 1.6849591219621458, "grad_norm": 1.8243905305862427, "learning_rate": 9.73484210526316e-05, "loss": 0.6018, "step": 30090 }, { "epoch": 1.6850151192742748, "grad_norm": 1.3680564165115356, "learning_rate": 9.734815789473685e-05, "loss": 0.4472, "step": 30091 }, { "epoch": 1.6850711165864038, "grad_norm": 1.147234320640564, "learning_rate": 9.73478947368421e-05, "loss": 0.3787, "step": 30092 }, { "epoch": 1.6851271138985329, "grad_norm": 1.3384244441986084, "learning_rate": 9.734763157894737e-05, "loss": 0.5763, "step": 30093 }, { "epoch": 1.6851831112106619, "grad_norm": 1.4484529495239258, "learning_rate": 9.734736842105263e-05, "loss": 0.5563, "step": 30094 }, { "epoch": 1.685239108522791, "grad_norm": 1.1401987075805664, "learning_rate": 9.73471052631579e-05, "loss": 0.3952, "step": 30095 }, { "epoch": 1.68529510583492, "grad_norm": 1.1800525188446045, "learning_rate": 9.734684210526315e-05, "loss": 0.4631, "step": 30096 }, { "epoch": 1.685351103147049, "grad_norm": 1.7282980680465698, "learning_rate": 9.734657894736843e-05, "loss": 0.6157, "step": 30097 }, { "epoch": 1.685407100459178, "grad_norm": 1.2098634243011475, "learning_rate": 9.734631578947368e-05, "loss": 0.49, "step": 30098 }, { "epoch": 1.685463097771307, "grad_norm": 1.2357712984085083, "learning_rate": 9.734605263157896e-05, "loss": 0.4857, "step": 30099 }, { "epoch": 1.685519095083436, "grad_norm": 1.075027585029602, "learning_rate": 9.734578947368422e-05, "loss": 0.4665, "step": 30100 }, { "epoch": 1.685575092395565, "grad_norm": 1.2004560232162476, "learning_rate": 9.734552631578948e-05, "loss": 0.5046, "step": 30101 }, { "epoch": 1.685631089707694, "grad_norm": 1.4363986253738403, "learning_rate": 9.734526315789474e-05, "loss": 0.4104, "step": 30102 }, { "epoch": 1.685687087019823, "grad_norm": 1.3236922025680542, "learning_rate": 9.734500000000001e-05, "loss": 0.4833, "step": 30103 }, { "epoch": 1.685743084331952, "grad_norm": 1.2907474040985107, "learning_rate": 9.734473684210527e-05, "loss": 0.5518, "step": 30104 }, { "epoch": 1.685799081644081, "grad_norm": 1.906058430671692, "learning_rate": 9.734447368421053e-05, "loss": 0.6542, "step": 30105 }, { "epoch": 1.6858550789562101, "grad_norm": 1.234045386314392, "learning_rate": 9.734421052631579e-05, "loss": 0.3836, "step": 30106 }, { "epoch": 1.6859110762683391, "grad_norm": 1.3779826164245605, "learning_rate": 9.734394736842106e-05, "loss": 0.5039, "step": 30107 }, { "epoch": 1.6859670735804682, "grad_norm": 1.2399606704711914, "learning_rate": 9.734368421052632e-05, "loss": 0.4581, "step": 30108 }, { "epoch": 1.6860230708925972, "grad_norm": 1.3214467763900757, "learning_rate": 9.734342105263158e-05, "loss": 0.4162, "step": 30109 }, { "epoch": 1.6860790682047262, "grad_norm": 1.1875327825546265, "learning_rate": 9.734315789473684e-05, "loss": 0.4735, "step": 30110 }, { "epoch": 1.6861350655168552, "grad_norm": 1.963280200958252, "learning_rate": 9.73428947368421e-05, "loss": 0.6456, "step": 30111 }, { "epoch": 1.6861910628289842, "grad_norm": 1.2360131740570068, "learning_rate": 9.734263157894738e-05, "loss": 0.5305, "step": 30112 }, { "epoch": 1.6862470601411133, "grad_norm": 1.077210545539856, "learning_rate": 9.734236842105264e-05, "loss": 0.3636, "step": 30113 }, { "epoch": 1.6863030574532423, "grad_norm": 1.512688398361206, "learning_rate": 9.73421052631579e-05, "loss": 0.5859, "step": 30114 }, { "epoch": 1.6863590547653713, "grad_norm": 2.2748279571533203, "learning_rate": 9.734184210526315e-05, "loss": 0.5212, "step": 30115 }, { "epoch": 1.6864150520775003, "grad_norm": 1.3436267375946045, "learning_rate": 9.734157894736843e-05, "loss": 0.411, "step": 30116 }, { "epoch": 1.6864710493896293, "grad_norm": 1.226438045501709, "learning_rate": 9.734131578947369e-05, "loss": 0.385, "step": 30117 }, { "epoch": 1.6865270467017583, "grad_norm": 1.1846208572387695, "learning_rate": 9.734105263157896e-05, "loss": 0.3664, "step": 30118 }, { "epoch": 1.6865830440138874, "grad_norm": 1.4303958415985107, "learning_rate": 9.734078947368421e-05, "loss": 0.6189, "step": 30119 }, { "epoch": 1.6866390413260164, "grad_norm": 1.2793289422988892, "learning_rate": 9.734052631578948e-05, "loss": 0.4675, "step": 30120 }, { "epoch": 1.6866950386381454, "grad_norm": 1.8236483335494995, "learning_rate": 9.734026315789474e-05, "loss": 0.5422, "step": 30121 }, { "epoch": 1.6867510359502744, "grad_norm": 1.384316086769104, "learning_rate": 9.734000000000001e-05, "loss": 0.4779, "step": 30122 }, { "epoch": 1.6868070332624034, "grad_norm": 1.2739804983139038, "learning_rate": 9.733973684210527e-05, "loss": 0.5885, "step": 30123 }, { "epoch": 1.6868630305745325, "grad_norm": 1.3124525547027588, "learning_rate": 9.733947368421053e-05, "loss": 0.3851, "step": 30124 }, { "epoch": 1.6869190278866615, "grad_norm": 5.500936031341553, "learning_rate": 9.733921052631579e-05, "loss": 0.5684, "step": 30125 }, { "epoch": 1.6869750251987905, "grad_norm": 1.1165839433670044, "learning_rate": 9.733894736842105e-05, "loss": 0.4695, "step": 30126 }, { "epoch": 1.6870310225109195, "grad_norm": 17.776029586791992, "learning_rate": 9.733868421052633e-05, "loss": 0.4529, "step": 30127 }, { "epoch": 1.6870870198230485, "grad_norm": 14.615177154541016, "learning_rate": 9.733842105263159e-05, "loss": 0.6338, "step": 30128 }, { "epoch": 1.6871430171351776, "grad_norm": 1.2860603332519531, "learning_rate": 9.733815789473684e-05, "loss": 0.5732, "step": 30129 }, { "epoch": 1.6871990144473066, "grad_norm": 1.4584449529647827, "learning_rate": 9.73378947368421e-05, "loss": 0.5255, "step": 30130 }, { "epoch": 1.6872550117594356, "grad_norm": 1.2533122301101685, "learning_rate": 9.733763157894738e-05, "loss": 0.4487, "step": 30131 }, { "epoch": 1.6873110090715646, "grad_norm": 1.1864601373672485, "learning_rate": 9.733736842105264e-05, "loss": 0.3937, "step": 30132 }, { "epoch": 1.6873670063836936, "grad_norm": 1.6280063390731812, "learning_rate": 9.73371052631579e-05, "loss": 0.4634, "step": 30133 }, { "epoch": 1.6874230036958227, "grad_norm": 1.5736427307128906, "learning_rate": 9.733684210526316e-05, "loss": 0.4416, "step": 30134 }, { "epoch": 1.6874790010079517, "grad_norm": 3.5148539543151855, "learning_rate": 9.733657894736843e-05, "loss": 0.578, "step": 30135 }, { "epoch": 1.6875349983200807, "grad_norm": 1.5513710975646973, "learning_rate": 9.733631578947369e-05, "loss": 0.4843, "step": 30136 }, { "epoch": 1.6875909956322097, "grad_norm": 1.3073911666870117, "learning_rate": 9.733605263157895e-05, "loss": 0.3689, "step": 30137 }, { "epoch": 1.6876469929443387, "grad_norm": 1.873498558998108, "learning_rate": 9.733578947368421e-05, "loss": 0.5152, "step": 30138 }, { "epoch": 1.6877029902564677, "grad_norm": 1.4693621397018433, "learning_rate": 9.733552631578948e-05, "loss": 0.5006, "step": 30139 }, { "epoch": 1.6877589875685968, "grad_norm": 1.352457880973816, "learning_rate": 9.733526315789474e-05, "loss": 0.5083, "step": 30140 }, { "epoch": 1.6878149848807258, "grad_norm": 1.1852613687515259, "learning_rate": 9.733500000000002e-05, "loss": 0.3672, "step": 30141 }, { "epoch": 1.6878709821928548, "grad_norm": 1.3586770296096802, "learning_rate": 9.733473684210526e-05, "loss": 0.5802, "step": 30142 }, { "epoch": 1.6879269795049838, "grad_norm": 1.1743924617767334, "learning_rate": 9.733447368421054e-05, "loss": 0.3909, "step": 30143 }, { "epoch": 1.6879829768171128, "grad_norm": 1.2252435684204102, "learning_rate": 9.73342105263158e-05, "loss": 0.3919, "step": 30144 }, { "epoch": 1.6880389741292419, "grad_norm": 1.4737225770950317, "learning_rate": 9.733394736842105e-05, "loss": 0.4831, "step": 30145 }, { "epoch": 1.6880949714413709, "grad_norm": 1.5884093046188354, "learning_rate": 9.733368421052633e-05, "loss": 0.3723, "step": 30146 }, { "epoch": 1.6881509687535, "grad_norm": 1.301109790802002, "learning_rate": 9.733342105263157e-05, "loss": 0.5235, "step": 30147 }, { "epoch": 1.688206966065629, "grad_norm": 1.361692190170288, "learning_rate": 9.733315789473685e-05, "loss": 0.5165, "step": 30148 }, { "epoch": 1.688262963377758, "grad_norm": 1.2293809652328491, "learning_rate": 9.733289473684211e-05, "loss": 0.4645, "step": 30149 }, { "epoch": 1.688318960689887, "grad_norm": 1.2032302618026733, "learning_rate": 9.733263157894738e-05, "loss": 0.5969, "step": 30150 }, { "epoch": 1.688374958002016, "grad_norm": 1.4731817245483398, "learning_rate": 9.733236842105263e-05, "loss": 0.4357, "step": 30151 }, { "epoch": 1.688430955314145, "grad_norm": 1.2632147073745728, "learning_rate": 9.73321052631579e-05, "loss": 0.4602, "step": 30152 }, { "epoch": 1.688486952626274, "grad_norm": 1.2067543268203735, "learning_rate": 9.733184210526316e-05, "loss": 0.3663, "step": 30153 }, { "epoch": 1.688542949938403, "grad_norm": 1.1950796842575073, "learning_rate": 9.733157894736843e-05, "loss": 0.359, "step": 30154 }, { "epoch": 1.688598947250532, "grad_norm": 1.176719069480896, "learning_rate": 9.733131578947369e-05, "loss": 0.4632, "step": 30155 }, { "epoch": 1.688654944562661, "grad_norm": 1.5808255672454834, "learning_rate": 9.733105263157895e-05, "loss": 0.4925, "step": 30156 }, { "epoch": 1.68871094187479, "grad_norm": 1.0797353982925415, "learning_rate": 9.733078947368421e-05, "loss": 0.3291, "step": 30157 }, { "epoch": 1.6887669391869191, "grad_norm": 1.3204952478408813, "learning_rate": 9.733052631578949e-05, "loss": 0.3987, "step": 30158 }, { "epoch": 1.6888229364990481, "grad_norm": 1.4973983764648438, "learning_rate": 9.733026315789475e-05, "loss": 0.4919, "step": 30159 }, { "epoch": 1.6888789338111772, "grad_norm": 1.1468061208724976, "learning_rate": 9.733e-05, "loss": 0.394, "step": 30160 }, { "epoch": 1.6889349311233062, "grad_norm": 1.3484851121902466, "learning_rate": 9.732973684210526e-05, "loss": 0.383, "step": 30161 }, { "epoch": 1.6889909284354352, "grad_norm": 1.2262699604034424, "learning_rate": 9.732947368421052e-05, "loss": 0.4681, "step": 30162 }, { "epoch": 1.6890469257475642, "grad_norm": 1.2382701635360718, "learning_rate": 9.73292105263158e-05, "loss": 0.3634, "step": 30163 }, { "epoch": 1.6891029230596932, "grad_norm": 1.236656904220581, "learning_rate": 9.732894736842106e-05, "loss": 0.5847, "step": 30164 }, { "epoch": 1.6891589203718222, "grad_norm": 1.5243562459945679, "learning_rate": 9.732868421052632e-05, "loss": 0.4452, "step": 30165 }, { "epoch": 1.6892149176839513, "grad_norm": 1.3775279521942139, "learning_rate": 9.732842105263158e-05, "loss": 0.466, "step": 30166 }, { "epoch": 1.6892709149960803, "grad_norm": 1.2446755170822144, "learning_rate": 9.732815789473685e-05, "loss": 0.4259, "step": 30167 }, { "epoch": 1.6893269123082093, "grad_norm": 1.2202911376953125, "learning_rate": 9.732789473684211e-05, "loss": 0.4717, "step": 30168 }, { "epoch": 1.6893829096203383, "grad_norm": 1.2155464887619019, "learning_rate": 9.732763157894737e-05, "loss": 0.4708, "step": 30169 }, { "epoch": 1.6894389069324673, "grad_norm": 1.3304054737091064, "learning_rate": 9.732736842105263e-05, "loss": 0.4863, "step": 30170 }, { "epoch": 1.6894949042445964, "grad_norm": 1.3756487369537354, "learning_rate": 9.73271052631579e-05, "loss": 0.5803, "step": 30171 }, { "epoch": 1.6895509015567254, "grad_norm": 1.2009427547454834, "learning_rate": 9.732684210526316e-05, "loss": 0.415, "step": 30172 }, { "epoch": 1.6896068988688544, "grad_norm": 1.3241255283355713, "learning_rate": 9.732657894736844e-05, "loss": 0.5297, "step": 30173 }, { "epoch": 1.6896628961809834, "grad_norm": 1.2313463687896729, "learning_rate": 9.732631578947368e-05, "loss": 0.4625, "step": 30174 }, { "epoch": 1.6897188934931124, "grad_norm": 1.45566725730896, "learning_rate": 9.732605263157896e-05, "loss": 0.461, "step": 30175 }, { "epoch": 1.6897748908052415, "grad_norm": 1.1152372360229492, "learning_rate": 9.732578947368421e-05, "loss": 0.4818, "step": 30176 }, { "epoch": 1.6898308881173705, "grad_norm": 1.3938652276992798, "learning_rate": 9.732552631578949e-05, "loss": 0.3975, "step": 30177 }, { "epoch": 1.6898868854294995, "grad_norm": 1.3016711473464966, "learning_rate": 9.732526315789475e-05, "loss": 0.4593, "step": 30178 }, { "epoch": 1.6899428827416285, "grad_norm": 1.3438842296600342, "learning_rate": 9.7325e-05, "loss": 0.4991, "step": 30179 }, { "epoch": 1.6899988800537575, "grad_norm": 1.6723095178604126, "learning_rate": 9.732473684210527e-05, "loss": 0.4055, "step": 30180 }, { "epoch": 1.6900548773658866, "grad_norm": 1.1403666734695435, "learning_rate": 9.732447368421053e-05, "loss": 0.4457, "step": 30181 }, { "epoch": 1.6901108746780156, "grad_norm": 1.7278417348861694, "learning_rate": 9.73242105263158e-05, "loss": 0.6594, "step": 30182 }, { "epoch": 1.6901668719901446, "grad_norm": 1.2462953329086304, "learning_rate": 9.732394736842106e-05, "loss": 0.3927, "step": 30183 }, { "epoch": 1.6902228693022736, "grad_norm": 1.1908814907073975, "learning_rate": 9.732368421052632e-05, "loss": 0.4063, "step": 30184 }, { "epoch": 1.6902788666144026, "grad_norm": 1.24465811252594, "learning_rate": 9.732342105263158e-05, "loss": 0.4597, "step": 30185 }, { "epoch": 1.6903348639265316, "grad_norm": 1.2822506427764893, "learning_rate": 9.732315789473685e-05, "loss": 0.3647, "step": 30186 }, { "epoch": 1.6903908612386607, "grad_norm": 1.6926192045211792, "learning_rate": 9.732289473684211e-05, "loss": 0.5835, "step": 30187 }, { "epoch": 1.6904468585507897, "grad_norm": 1.2964590787887573, "learning_rate": 9.732263157894737e-05, "loss": 0.4284, "step": 30188 }, { "epoch": 1.6905028558629187, "grad_norm": 1.1388252973556519, "learning_rate": 9.732236842105263e-05, "loss": 0.5062, "step": 30189 }, { "epoch": 1.6905588531750477, "grad_norm": 1.3133678436279297, "learning_rate": 9.73221052631579e-05, "loss": 0.4705, "step": 30190 }, { "epoch": 1.6906148504871767, "grad_norm": 1.310320496559143, "learning_rate": 9.732184210526316e-05, "loss": 0.5138, "step": 30191 }, { "epoch": 1.6906708477993058, "grad_norm": 1.4054476022720337, "learning_rate": 9.732157894736842e-05, "loss": 0.5325, "step": 30192 }, { "epoch": 1.6907268451114348, "grad_norm": 1.578825831413269, "learning_rate": 9.732131578947368e-05, "loss": 0.4885, "step": 30193 }, { "epoch": 1.6907828424235638, "grad_norm": 1.1037230491638184, "learning_rate": 9.732105263157896e-05, "loss": 0.3731, "step": 30194 }, { "epoch": 1.6908388397356928, "grad_norm": 1.074224829673767, "learning_rate": 9.732078947368422e-05, "loss": 0.4612, "step": 30195 }, { "epoch": 1.6908948370478218, "grad_norm": 1.2621155977249146, "learning_rate": 9.732052631578948e-05, "loss": 0.41, "step": 30196 }, { "epoch": 1.6909508343599509, "grad_norm": 1.3471922874450684, "learning_rate": 9.732026315789474e-05, "loss": 0.4656, "step": 30197 }, { "epoch": 1.6910068316720799, "grad_norm": 1.1260102987289429, "learning_rate": 9.732e-05, "loss": 0.5334, "step": 30198 }, { "epoch": 1.691062828984209, "grad_norm": 1.2001433372497559, "learning_rate": 9.731973684210527e-05, "loss": 0.4569, "step": 30199 }, { "epoch": 1.691118826296338, "grad_norm": 1.347712516784668, "learning_rate": 9.731947368421053e-05, "loss": 0.553, "step": 30200 }, { "epoch": 1.691174823608467, "grad_norm": 1.417059302330017, "learning_rate": 9.731921052631579e-05, "loss": 0.6363, "step": 30201 }, { "epoch": 1.691230820920596, "grad_norm": 1.2156375646591187, "learning_rate": 9.731894736842105e-05, "loss": 0.4198, "step": 30202 }, { "epoch": 1.691286818232725, "grad_norm": 1.3916971683502197, "learning_rate": 9.731868421052632e-05, "loss": 0.4246, "step": 30203 }, { "epoch": 1.691342815544854, "grad_norm": 1.130895972251892, "learning_rate": 9.731842105263158e-05, "loss": 0.3977, "step": 30204 }, { "epoch": 1.691398812856983, "grad_norm": 1.2875492572784424, "learning_rate": 9.731815789473686e-05, "loss": 0.5469, "step": 30205 }, { "epoch": 1.691454810169112, "grad_norm": 1.2050881385803223, "learning_rate": 9.73178947368421e-05, "loss": 0.4667, "step": 30206 }, { "epoch": 1.691510807481241, "grad_norm": 1.1337792873382568, "learning_rate": 9.731763157894737e-05, "loss": 0.4197, "step": 30207 }, { "epoch": 1.69156680479337, "grad_norm": 1.0673487186431885, "learning_rate": 9.731736842105263e-05, "loss": 0.3491, "step": 30208 }, { "epoch": 1.691622802105499, "grad_norm": 1.392971158027649, "learning_rate": 9.731710526315791e-05, "loss": 0.6227, "step": 30209 }, { "epoch": 1.691678799417628, "grad_norm": 1.138738989830017, "learning_rate": 9.731684210526317e-05, "loss": 0.4073, "step": 30210 }, { "epoch": 1.6917347967297571, "grad_norm": 1.3383424282073975, "learning_rate": 9.731657894736843e-05, "loss": 0.4484, "step": 30211 }, { "epoch": 1.6917907940418861, "grad_norm": 1.3517189025878906, "learning_rate": 9.731631578947369e-05, "loss": 0.3765, "step": 30212 }, { "epoch": 1.6918467913540152, "grad_norm": 1.1367801427841187, "learning_rate": 9.731605263157895e-05, "loss": 0.3876, "step": 30213 }, { "epoch": 1.6919027886661442, "grad_norm": 1.1613292694091797, "learning_rate": 9.731578947368422e-05, "loss": 0.4214, "step": 30214 }, { "epoch": 1.6919587859782732, "grad_norm": 1.3283835649490356, "learning_rate": 9.731552631578948e-05, "loss": 0.382, "step": 30215 }, { "epoch": 1.6920147832904022, "grad_norm": 2.5240118503570557, "learning_rate": 9.731526315789474e-05, "loss": 0.4319, "step": 30216 }, { "epoch": 1.6920707806025312, "grad_norm": 1.573872685432434, "learning_rate": 9.7315e-05, "loss": 0.4483, "step": 30217 }, { "epoch": 1.6921267779146603, "grad_norm": 1.1742709875106812, "learning_rate": 9.731473684210527e-05, "loss": 0.5286, "step": 30218 }, { "epoch": 1.692182775226789, "grad_norm": 1.2130649089813232, "learning_rate": 9.731447368421053e-05, "loss": 0.4015, "step": 30219 }, { "epoch": 1.692238772538918, "grad_norm": 1.6368238925933838, "learning_rate": 9.731421052631579e-05, "loss": 0.6805, "step": 30220 }, { "epoch": 1.692294769851047, "grad_norm": 1.2728393077850342, "learning_rate": 9.731394736842105e-05, "loss": 0.4743, "step": 30221 }, { "epoch": 1.6923507671631761, "grad_norm": 1.1846604347229004, "learning_rate": 9.731368421052632e-05, "loss": 0.4199, "step": 30222 }, { "epoch": 1.6924067644753051, "grad_norm": 1.1958200931549072, "learning_rate": 9.731342105263158e-05, "loss": 0.5363, "step": 30223 }, { "epoch": 1.6924627617874342, "grad_norm": 1.304478645324707, "learning_rate": 9.731315789473684e-05, "loss": 0.5076, "step": 30224 }, { "epoch": 1.6925187590995632, "grad_norm": 1.5974457263946533, "learning_rate": 9.73128947368421e-05, "loss": 0.5178, "step": 30225 }, { "epoch": 1.6925747564116922, "grad_norm": 0.9744231700897217, "learning_rate": 9.731263157894738e-05, "loss": 0.4195, "step": 30226 }, { "epoch": 1.6926307537238212, "grad_norm": 1.188340663909912, "learning_rate": 9.731236842105264e-05, "loss": 0.4446, "step": 30227 }, { "epoch": 1.6926867510359502, "grad_norm": 1.4804326295852661, "learning_rate": 9.731210526315791e-05, "loss": 0.587, "step": 30228 }, { "epoch": 1.6927427483480793, "grad_norm": 1.5981879234313965, "learning_rate": 9.731184210526316e-05, "loss": 0.4969, "step": 30229 }, { "epoch": 1.6927987456602083, "grad_norm": 1.0407193899154663, "learning_rate": 9.731157894736842e-05, "loss": 0.365, "step": 30230 }, { "epoch": 1.6928547429723373, "grad_norm": 1.2792677879333496, "learning_rate": 9.731131578947369e-05, "loss": 0.584, "step": 30231 }, { "epoch": 1.6929107402844663, "grad_norm": 1.2894998788833618, "learning_rate": 9.731105263157895e-05, "loss": 0.4905, "step": 30232 }, { "epoch": 1.6929667375965953, "grad_norm": 1.29068124294281, "learning_rate": 9.731078947368422e-05, "loss": 0.4458, "step": 30233 }, { "epoch": 1.6930227349087243, "grad_norm": 1.3974711894989014, "learning_rate": 9.731052631578947e-05, "loss": 0.5001, "step": 30234 }, { "epoch": 1.6930787322208534, "grad_norm": 1.2442253828048706, "learning_rate": 9.731026315789474e-05, "loss": 0.2887, "step": 30235 }, { "epoch": 1.6931347295329824, "grad_norm": 1.3117127418518066, "learning_rate": 9.731e-05, "loss": 0.4423, "step": 30236 }, { "epoch": 1.6931907268451114, "grad_norm": 1.3299121856689453, "learning_rate": 9.730973684210528e-05, "loss": 0.5066, "step": 30237 }, { "epoch": 1.6932467241572404, "grad_norm": 1.3412389755249023, "learning_rate": 9.730947368421053e-05, "loss": 0.3981, "step": 30238 }, { "epoch": 1.6933027214693694, "grad_norm": 1.9414891004562378, "learning_rate": 9.73092105263158e-05, "loss": 0.5383, "step": 30239 }, { "epoch": 1.6933587187814985, "grad_norm": 1.3416924476623535, "learning_rate": 9.730894736842105e-05, "loss": 0.3939, "step": 30240 }, { "epoch": 1.6934147160936275, "grad_norm": 1.4639008045196533, "learning_rate": 9.730868421052633e-05, "loss": 0.4688, "step": 30241 }, { "epoch": 1.6934707134057565, "grad_norm": 1.4314970970153809, "learning_rate": 9.730842105263159e-05, "loss": 0.4478, "step": 30242 }, { "epoch": 1.6935267107178855, "grad_norm": 1.1324588060379028, "learning_rate": 9.730815789473685e-05, "loss": 0.4582, "step": 30243 }, { "epoch": 1.6935827080300145, "grad_norm": 1.1943910121917725, "learning_rate": 9.73078947368421e-05, "loss": 0.3673, "step": 30244 }, { "epoch": 1.6936387053421436, "grad_norm": 1.440931797027588, "learning_rate": 9.730763157894738e-05, "loss": 0.5169, "step": 30245 }, { "epoch": 1.6936947026542726, "grad_norm": 1.5068817138671875, "learning_rate": 9.730736842105264e-05, "loss": 0.4397, "step": 30246 }, { "epoch": 1.6937506999664016, "grad_norm": 1.0686894655227661, "learning_rate": 9.73071052631579e-05, "loss": 0.4375, "step": 30247 }, { "epoch": 1.6938066972785306, "grad_norm": 1.2193092107772827, "learning_rate": 9.730684210526316e-05, "loss": 0.4532, "step": 30248 }, { "epoch": 1.6938626945906596, "grad_norm": 1.2892290353775024, "learning_rate": 9.730657894736842e-05, "loss": 0.5527, "step": 30249 }, { "epoch": 1.6939186919027887, "grad_norm": 1.4779232740402222, "learning_rate": 9.730631578947369e-05, "loss": 0.4601, "step": 30250 }, { "epoch": 1.6939746892149177, "grad_norm": 1.5048413276672363, "learning_rate": 9.730605263157895e-05, "loss": 0.4469, "step": 30251 }, { "epoch": 1.6940306865270467, "grad_norm": 1.171181082725525, "learning_rate": 9.730578947368421e-05, "loss": 0.422, "step": 30252 }, { "epoch": 1.6940866838391757, "grad_norm": 1.3899632692337036, "learning_rate": 9.730552631578947e-05, "loss": 0.378, "step": 30253 }, { "epoch": 1.6941426811513047, "grad_norm": 1.1190667152404785, "learning_rate": 9.730526315789474e-05, "loss": 0.3367, "step": 30254 }, { "epoch": 1.6941986784634337, "grad_norm": 1.3638272285461426, "learning_rate": 9.7305e-05, "loss": 0.5276, "step": 30255 }, { "epoch": 1.6942546757755628, "grad_norm": 1.307783603668213, "learning_rate": 9.730473684210526e-05, "loss": 0.5087, "step": 30256 }, { "epoch": 1.6943106730876918, "grad_norm": 1.137140154838562, "learning_rate": 9.730447368421052e-05, "loss": 0.4028, "step": 30257 }, { "epoch": 1.6943666703998208, "grad_norm": 1.8036515712738037, "learning_rate": 9.73042105263158e-05, "loss": 0.5313, "step": 30258 }, { "epoch": 1.6944226677119498, "grad_norm": 1.3460479974746704, "learning_rate": 9.730394736842106e-05, "loss": 0.4697, "step": 30259 }, { "epoch": 1.6944786650240788, "grad_norm": 1.4281642436981201, "learning_rate": 9.730368421052633e-05, "loss": 0.5596, "step": 30260 }, { "epoch": 1.6945346623362079, "grad_norm": 1.271577000617981, "learning_rate": 9.730342105263158e-05, "loss": 0.3794, "step": 30261 }, { "epoch": 1.6945906596483369, "grad_norm": 1.4523308277130127, "learning_rate": 9.730315789473685e-05, "loss": 0.5573, "step": 30262 }, { "epoch": 1.694646656960466, "grad_norm": 1.1428872346878052, "learning_rate": 9.730289473684211e-05, "loss": 0.4664, "step": 30263 }, { "epoch": 1.694702654272595, "grad_norm": 1.3204830884933472, "learning_rate": 9.730263157894738e-05, "loss": 0.611, "step": 30264 }, { "epoch": 1.694758651584724, "grad_norm": 1.1934540271759033, "learning_rate": 9.730236842105264e-05, "loss": 0.3084, "step": 30265 }, { "epoch": 1.694814648896853, "grad_norm": 1.0935078859329224, "learning_rate": 9.730210526315789e-05, "loss": 0.41, "step": 30266 }, { "epoch": 1.694870646208982, "grad_norm": 1.3033454418182373, "learning_rate": 9.730184210526316e-05, "loss": 0.5077, "step": 30267 }, { "epoch": 1.694926643521111, "grad_norm": 1.4620641469955444, "learning_rate": 9.730157894736842e-05, "loss": 0.4244, "step": 30268 }, { "epoch": 1.69498264083324, "grad_norm": 1.3887606859207153, "learning_rate": 9.73013157894737e-05, "loss": 0.5175, "step": 30269 }, { "epoch": 1.695038638145369, "grad_norm": 1.2040472030639648, "learning_rate": 9.730105263157895e-05, "loss": 0.4533, "step": 30270 }, { "epoch": 1.695094635457498, "grad_norm": 1.8510829210281372, "learning_rate": 9.730078947368421e-05, "loss": 0.5017, "step": 30271 }, { "epoch": 1.695150632769627, "grad_norm": 1.2853960990905762, "learning_rate": 9.730052631578947e-05, "loss": 0.3734, "step": 30272 }, { "epoch": 1.695206630081756, "grad_norm": 1.4359183311462402, "learning_rate": 9.730026315789475e-05, "loss": 0.4473, "step": 30273 }, { "epoch": 1.6952626273938851, "grad_norm": 1.0984580516815186, "learning_rate": 9.730000000000001e-05, "loss": 0.3766, "step": 30274 }, { "epoch": 1.6953186247060141, "grad_norm": 1.4857829809188843, "learning_rate": 9.729973684210527e-05, "loss": 0.5342, "step": 30275 }, { "epoch": 1.6953746220181432, "grad_norm": 1.3552742004394531, "learning_rate": 9.729947368421053e-05, "loss": 0.3975, "step": 30276 }, { "epoch": 1.6954306193302722, "grad_norm": 1.2708369493484497, "learning_rate": 9.72992105263158e-05, "loss": 0.4116, "step": 30277 }, { "epoch": 1.6954866166424012, "grad_norm": 1.426080584526062, "learning_rate": 9.729894736842106e-05, "loss": 0.5349, "step": 30278 }, { "epoch": 1.6955426139545302, "grad_norm": 1.2011563777923584, "learning_rate": 9.729868421052632e-05, "loss": 0.388, "step": 30279 }, { "epoch": 1.6955986112666592, "grad_norm": 1.3876416683197021, "learning_rate": 9.729842105263158e-05, "loss": 0.3935, "step": 30280 }, { "epoch": 1.6956546085787882, "grad_norm": 1.0139832496643066, "learning_rate": 9.729815789473685e-05, "loss": 0.4902, "step": 30281 }, { "epoch": 1.6957106058909173, "grad_norm": 1.6276042461395264, "learning_rate": 9.729789473684211e-05, "loss": 0.388, "step": 30282 }, { "epoch": 1.6957666032030463, "grad_norm": 1.4506871700286865, "learning_rate": 9.729763157894737e-05, "loss": 0.4478, "step": 30283 }, { "epoch": 1.6958226005151753, "grad_norm": 1.4444221258163452, "learning_rate": 9.729736842105263e-05, "loss": 0.4424, "step": 30284 }, { "epoch": 1.6958785978273043, "grad_norm": 1.1168495416641235, "learning_rate": 9.729710526315789e-05, "loss": 0.3262, "step": 30285 }, { "epoch": 1.6959345951394333, "grad_norm": 1.3173397779464722, "learning_rate": 9.729684210526316e-05, "loss": 0.4328, "step": 30286 }, { "epoch": 1.6959905924515624, "grad_norm": 1.8876169919967651, "learning_rate": 9.729657894736842e-05, "loss": 0.4391, "step": 30287 }, { "epoch": 1.6960465897636914, "grad_norm": 1.3588616847991943, "learning_rate": 9.72963157894737e-05, "loss": 0.3272, "step": 30288 }, { "epoch": 1.6961025870758204, "grad_norm": 1.0950651168823242, "learning_rate": 9.729605263157894e-05, "loss": 0.4377, "step": 30289 }, { "epoch": 1.6961585843879494, "grad_norm": 1.5283358097076416, "learning_rate": 9.729578947368422e-05, "loss": 0.4147, "step": 30290 }, { "epoch": 1.6962145817000784, "grad_norm": 1.4009937047958374, "learning_rate": 9.729552631578948e-05, "loss": 0.5298, "step": 30291 }, { "epoch": 1.6962705790122075, "grad_norm": 1.4278610944747925, "learning_rate": 9.729526315789475e-05, "loss": 0.4376, "step": 30292 }, { "epoch": 1.6963265763243365, "grad_norm": 2.084202289581299, "learning_rate": 9.729500000000001e-05, "loss": 0.6569, "step": 30293 }, { "epoch": 1.6963825736364655, "grad_norm": 1.378486156463623, "learning_rate": 9.729473684210527e-05, "loss": 0.4371, "step": 30294 }, { "epoch": 1.6964385709485945, "grad_norm": 1.3103843927383423, "learning_rate": 9.729447368421053e-05, "loss": 0.4739, "step": 30295 }, { "epoch": 1.6964945682607235, "grad_norm": 1.1588494777679443, "learning_rate": 9.72942105263158e-05, "loss": 0.3367, "step": 30296 }, { "epoch": 1.6965505655728526, "grad_norm": 1.5184040069580078, "learning_rate": 9.729394736842106e-05, "loss": 0.7053, "step": 30297 }, { "epoch": 1.6966065628849816, "grad_norm": 1.2309514284133911, "learning_rate": 9.729368421052632e-05, "loss": 0.3411, "step": 30298 }, { "epoch": 1.6966625601971106, "grad_norm": 1.477604866027832, "learning_rate": 9.729342105263158e-05, "loss": 0.6026, "step": 30299 }, { "epoch": 1.6967185575092396, "grad_norm": 1.1118342876434326, "learning_rate": 9.729315789473684e-05, "loss": 0.4385, "step": 30300 }, { "epoch": 1.6967745548213684, "grad_norm": 1.553444266319275, "learning_rate": 9.729289473684211e-05, "loss": 0.6059, "step": 30301 }, { "epoch": 1.6968305521334974, "grad_norm": 1.1063417196273804, "learning_rate": 9.729263157894737e-05, "loss": 0.3791, "step": 30302 }, { "epoch": 1.6968865494456264, "grad_norm": 1.128703236579895, "learning_rate": 9.729236842105263e-05, "loss": 0.3458, "step": 30303 }, { "epoch": 1.6969425467577555, "grad_norm": 1.374710202217102, "learning_rate": 9.72921052631579e-05, "loss": 0.5214, "step": 30304 }, { "epoch": 1.6969985440698845, "grad_norm": 1.659647822380066, "learning_rate": 9.729184210526317e-05, "loss": 0.5203, "step": 30305 }, { "epoch": 1.6970545413820135, "grad_norm": 1.23285710811615, "learning_rate": 9.729157894736843e-05, "loss": 0.588, "step": 30306 }, { "epoch": 1.6971105386941425, "grad_norm": 1.2525181770324707, "learning_rate": 9.729131578947369e-05, "loss": 0.3612, "step": 30307 }, { "epoch": 1.6971665360062715, "grad_norm": 1.2933460474014282, "learning_rate": 9.729105263157895e-05, "loss": 0.4537, "step": 30308 }, { "epoch": 1.6972225333184006, "grad_norm": 1.0990279912948608, "learning_rate": 9.729078947368422e-05, "loss": 0.3874, "step": 30309 }, { "epoch": 1.6972785306305296, "grad_norm": 1.3627982139587402, "learning_rate": 9.729052631578948e-05, "loss": 0.4063, "step": 30310 }, { "epoch": 1.6973345279426586, "grad_norm": 1.2585375308990479, "learning_rate": 9.729026315789474e-05, "loss": 0.3325, "step": 30311 }, { "epoch": 1.6973905252547876, "grad_norm": 1.0967957973480225, "learning_rate": 9.729e-05, "loss": 0.3409, "step": 30312 }, { "epoch": 1.6974465225669166, "grad_norm": 1.286448359489441, "learning_rate": 9.728973684210527e-05, "loss": 0.3923, "step": 30313 }, { "epoch": 1.6975025198790457, "grad_norm": 1.4949727058410645, "learning_rate": 9.728947368421053e-05, "loss": 0.4739, "step": 30314 }, { "epoch": 1.6975585171911747, "grad_norm": 1.621005654335022, "learning_rate": 9.72892105263158e-05, "loss": 0.6308, "step": 30315 }, { "epoch": 1.6976145145033037, "grad_norm": 1.20448637008667, "learning_rate": 9.728894736842105e-05, "loss": 0.4985, "step": 30316 }, { "epoch": 1.6976705118154327, "grad_norm": 1.2671122550964355, "learning_rate": 9.728868421052631e-05, "loss": 0.387, "step": 30317 }, { "epoch": 1.6977265091275617, "grad_norm": 1.430680751800537, "learning_rate": 9.728842105263158e-05, "loss": 0.574, "step": 30318 }, { "epoch": 1.6977825064396908, "grad_norm": 1.3269802331924438, "learning_rate": 9.728815789473684e-05, "loss": 0.5057, "step": 30319 }, { "epoch": 1.6978385037518198, "grad_norm": 2.183607339859009, "learning_rate": 9.728789473684212e-05, "loss": 0.5895, "step": 30320 }, { "epoch": 1.6978945010639488, "grad_norm": 1.1722875833511353, "learning_rate": 9.728763157894736e-05, "loss": 0.3825, "step": 30321 }, { "epoch": 1.6979504983760778, "grad_norm": 1.1483943462371826, "learning_rate": 9.728736842105264e-05, "loss": 0.3443, "step": 30322 }, { "epoch": 1.6980064956882068, "grad_norm": 1.5114575624465942, "learning_rate": 9.72871052631579e-05, "loss": 0.4387, "step": 30323 }, { "epoch": 1.6980624930003358, "grad_norm": 1.5740960836410522, "learning_rate": 9.728684210526317e-05, "loss": 0.4974, "step": 30324 }, { "epoch": 1.6981184903124649, "grad_norm": 1.171128273010254, "learning_rate": 9.728657894736843e-05, "loss": 0.4277, "step": 30325 }, { "epoch": 1.6981744876245939, "grad_norm": 1.138498067855835, "learning_rate": 9.728631578947369e-05, "loss": 0.4725, "step": 30326 }, { "epoch": 1.698230484936723, "grad_norm": 1.3775825500488281, "learning_rate": 9.728605263157895e-05, "loss": 0.5069, "step": 30327 }, { "epoch": 1.698286482248852, "grad_norm": 1.1714247465133667, "learning_rate": 9.728578947368422e-05, "loss": 0.3434, "step": 30328 }, { "epoch": 1.698342479560981, "grad_norm": 1.5268391370773315, "learning_rate": 9.728552631578948e-05, "loss": 0.3696, "step": 30329 }, { "epoch": 1.69839847687311, "grad_norm": 1.343772292137146, "learning_rate": 9.728526315789474e-05, "loss": 0.5687, "step": 30330 }, { "epoch": 1.698454474185239, "grad_norm": 64.54393005371094, "learning_rate": 9.7285e-05, "loss": 0.4503, "step": 30331 }, { "epoch": 1.698510471497368, "grad_norm": 1.3343149423599243, "learning_rate": 9.728473684210527e-05, "loss": 0.4719, "step": 30332 }, { "epoch": 1.698566468809497, "grad_norm": 1.236086130142212, "learning_rate": 9.728447368421053e-05, "loss": 0.4541, "step": 30333 }, { "epoch": 1.698622466121626, "grad_norm": 1.2689766883850098, "learning_rate": 9.72842105263158e-05, "loss": 0.4627, "step": 30334 }, { "epoch": 1.698678463433755, "grad_norm": 1.5581690073013306, "learning_rate": 9.728394736842105e-05, "loss": 0.576, "step": 30335 }, { "epoch": 1.698734460745884, "grad_norm": 1.436468482017517, "learning_rate": 9.728368421052631e-05, "loss": 0.4485, "step": 30336 }, { "epoch": 1.698790458058013, "grad_norm": 1.2061339616775513, "learning_rate": 9.728342105263159e-05, "loss": 0.4498, "step": 30337 }, { "epoch": 1.6988464553701421, "grad_norm": 1.6089482307434082, "learning_rate": 9.728315789473685e-05, "loss": 0.5003, "step": 30338 }, { "epoch": 1.6989024526822711, "grad_norm": 1.0775550603866577, "learning_rate": 9.72828947368421e-05, "loss": 0.3458, "step": 30339 }, { "epoch": 1.6989584499944002, "grad_norm": 1.5468053817749023, "learning_rate": 9.728263157894737e-05, "loss": 0.7323, "step": 30340 }, { "epoch": 1.6990144473065292, "grad_norm": 1.2649682760238647, "learning_rate": 9.728236842105264e-05, "loss": 0.4295, "step": 30341 }, { "epoch": 1.6990704446186582, "grad_norm": 1.3385552167892456, "learning_rate": 9.72821052631579e-05, "loss": 0.5028, "step": 30342 }, { "epoch": 1.6991264419307872, "grad_norm": 1.2830957174301147, "learning_rate": 9.728184210526317e-05, "loss": 0.4697, "step": 30343 }, { "epoch": 1.6991824392429162, "grad_norm": 2.1357758045196533, "learning_rate": 9.728157894736842e-05, "loss": 0.4494, "step": 30344 }, { "epoch": 1.6992384365550453, "grad_norm": 1.2747297286987305, "learning_rate": 9.728131578947369e-05, "loss": 0.5061, "step": 30345 }, { "epoch": 1.6992944338671743, "grad_norm": 1.3982797861099243, "learning_rate": 9.728105263157895e-05, "loss": 0.4909, "step": 30346 }, { "epoch": 1.6993504311793033, "grad_norm": 1.4929579496383667, "learning_rate": 9.728078947368422e-05, "loss": 0.5826, "step": 30347 }, { "epoch": 1.6994064284914323, "grad_norm": 1.6980502605438232, "learning_rate": 9.728052631578948e-05, "loss": 0.5291, "step": 30348 }, { "epoch": 1.6994624258035613, "grad_norm": 1.1305783987045288, "learning_rate": 9.728026315789474e-05, "loss": 0.4975, "step": 30349 }, { "epoch": 1.6995184231156903, "grad_norm": 1.2212214469909668, "learning_rate": 9.728e-05, "loss": 0.4564, "step": 30350 }, { "epoch": 1.6995744204278194, "grad_norm": 1.3931679725646973, "learning_rate": 9.727973684210526e-05, "loss": 0.6455, "step": 30351 }, { "epoch": 1.6996304177399484, "grad_norm": 1.271008014678955, "learning_rate": 9.727947368421054e-05, "loss": 0.4099, "step": 30352 }, { "epoch": 1.6996864150520774, "grad_norm": 1.3081786632537842, "learning_rate": 9.727921052631578e-05, "loss": 0.4813, "step": 30353 }, { "epoch": 1.6997424123642064, "grad_norm": 1.37801194190979, "learning_rate": 9.727894736842106e-05, "loss": 0.451, "step": 30354 }, { "epoch": 1.6997984096763354, "grad_norm": 1.640711784362793, "learning_rate": 9.727868421052632e-05, "loss": 0.5858, "step": 30355 }, { "epoch": 1.6998544069884645, "grad_norm": 1.121232032775879, "learning_rate": 9.727842105263159e-05, "loss": 0.4415, "step": 30356 }, { "epoch": 1.6999104043005935, "grad_norm": 1.3477303981781006, "learning_rate": 9.727815789473685e-05, "loss": 0.4505, "step": 30357 }, { "epoch": 1.6999664016127225, "grad_norm": 2.0943679809570312, "learning_rate": 9.727789473684211e-05, "loss": 0.4611, "step": 30358 }, { "epoch": 1.7000223989248515, "grad_norm": 1.7255926132202148, "learning_rate": 9.727763157894737e-05, "loss": 0.4871, "step": 30359 }, { "epoch": 1.7000783962369805, "grad_norm": 1.3967081308364868, "learning_rate": 9.727736842105264e-05, "loss": 0.4439, "step": 30360 }, { "epoch": 1.7001343935491096, "grad_norm": 1.5152515172958374, "learning_rate": 9.72771052631579e-05, "loss": 0.4556, "step": 30361 }, { "epoch": 1.7001903908612386, "grad_norm": 1.40230131149292, "learning_rate": 9.727684210526316e-05, "loss": 0.5182, "step": 30362 }, { "epoch": 1.7002463881733676, "grad_norm": 1.2221062183380127, "learning_rate": 9.727657894736842e-05, "loss": 0.4226, "step": 30363 }, { "epoch": 1.7003023854854966, "grad_norm": 6.97371768951416, "learning_rate": 9.72763157894737e-05, "loss": 0.4474, "step": 30364 }, { "epoch": 1.7003583827976256, "grad_norm": 1.3779109716415405, "learning_rate": 9.727605263157895e-05, "loss": 0.4449, "step": 30365 }, { "epoch": 1.7004143801097547, "grad_norm": 1.1985836029052734, "learning_rate": 9.727578947368421e-05, "loss": 0.3976, "step": 30366 }, { "epoch": 1.7004703774218837, "grad_norm": 1.293669581413269, "learning_rate": 9.727552631578947e-05, "loss": 0.4866, "step": 30367 }, { "epoch": 1.7005263747340127, "grad_norm": 1.30210280418396, "learning_rate": 9.727526315789473e-05, "loss": 0.4353, "step": 30368 }, { "epoch": 1.7005823720461417, "grad_norm": 1.4130703210830688, "learning_rate": 9.7275e-05, "loss": 0.4157, "step": 30369 }, { "epoch": 1.7006383693582707, "grad_norm": 1.3582077026367188, "learning_rate": 9.727473684210527e-05, "loss": 0.5089, "step": 30370 }, { "epoch": 1.7006943666703997, "grad_norm": 1.5402058362960815, "learning_rate": 9.727447368421053e-05, "loss": 0.5706, "step": 30371 }, { "epoch": 1.7007503639825288, "grad_norm": 1.088727593421936, "learning_rate": 9.727421052631579e-05, "loss": 0.3592, "step": 30372 }, { "epoch": 1.7008063612946578, "grad_norm": 1.4988527297973633, "learning_rate": 9.727394736842106e-05, "loss": 0.3488, "step": 30373 }, { "epoch": 1.7008623586067868, "grad_norm": 1.0835514068603516, "learning_rate": 9.727368421052632e-05, "loss": 0.3906, "step": 30374 }, { "epoch": 1.7009183559189158, "grad_norm": 1.2537224292755127, "learning_rate": 9.727342105263159e-05, "loss": 0.4103, "step": 30375 }, { "epoch": 1.7009743532310448, "grad_norm": 1.2662135362625122, "learning_rate": 9.727315789473684e-05, "loss": 0.4934, "step": 30376 }, { "epoch": 1.7010303505431739, "grad_norm": 1.1604135036468506, "learning_rate": 9.727289473684211e-05, "loss": 0.3871, "step": 30377 }, { "epoch": 1.7010863478553029, "grad_norm": 1.2488824129104614, "learning_rate": 9.727263157894737e-05, "loss": 0.3867, "step": 30378 }, { "epoch": 1.701142345167432, "grad_norm": 1.395504355430603, "learning_rate": 9.727236842105264e-05, "loss": 0.4575, "step": 30379 }, { "epoch": 1.701198342479561, "grad_norm": 1.2531800270080566, "learning_rate": 9.72721052631579e-05, "loss": 0.4608, "step": 30380 }, { "epoch": 1.70125433979169, "grad_norm": 1.281958818435669, "learning_rate": 9.727184210526316e-05, "loss": 0.4547, "step": 30381 }, { "epoch": 1.701310337103819, "grad_norm": 1.3307981491088867, "learning_rate": 9.727157894736842e-05, "loss": 0.3593, "step": 30382 }, { "epoch": 1.701366334415948, "grad_norm": 1.6306116580963135, "learning_rate": 9.72713157894737e-05, "loss": 0.58, "step": 30383 }, { "epoch": 1.701422331728077, "grad_norm": 1.287005066871643, "learning_rate": 9.727105263157896e-05, "loss": 0.4737, "step": 30384 }, { "epoch": 1.701478329040206, "grad_norm": 1.359609603881836, "learning_rate": 9.727078947368422e-05, "loss": 0.5492, "step": 30385 }, { "epoch": 1.701534326352335, "grad_norm": 1.3711774349212646, "learning_rate": 9.727052631578948e-05, "loss": 0.4229, "step": 30386 }, { "epoch": 1.701590323664464, "grad_norm": 1.237109899520874, "learning_rate": 9.727026315789474e-05, "loss": 0.4392, "step": 30387 }, { "epoch": 1.701646320976593, "grad_norm": 1.3945149183273315, "learning_rate": 9.727000000000001e-05, "loss": 0.4245, "step": 30388 }, { "epoch": 1.701702318288722, "grad_norm": 1.4769854545593262, "learning_rate": 9.726973684210527e-05, "loss": 0.4906, "step": 30389 }, { "epoch": 1.7017583156008511, "grad_norm": 1.082001805305481, "learning_rate": 9.726947368421053e-05, "loss": 0.3421, "step": 30390 }, { "epoch": 1.7018143129129801, "grad_norm": 1.4194084405899048, "learning_rate": 9.726921052631579e-05, "loss": 0.4602, "step": 30391 }, { "epoch": 1.7018703102251092, "grad_norm": 1.1614925861358643, "learning_rate": 9.726894736842106e-05, "loss": 0.5332, "step": 30392 }, { "epoch": 1.7019263075372382, "grad_norm": 1.0861917734146118, "learning_rate": 9.726868421052632e-05, "loss": 0.377, "step": 30393 }, { "epoch": 1.7019823048493672, "grad_norm": 1.1531882286071777, "learning_rate": 9.726842105263158e-05, "loss": 0.4217, "step": 30394 }, { "epoch": 1.7020383021614962, "grad_norm": 1.6344202756881714, "learning_rate": 9.726815789473684e-05, "loss": 0.4903, "step": 30395 }, { "epoch": 1.7020942994736252, "grad_norm": 1.258263349533081, "learning_rate": 9.726789473684211e-05, "loss": 0.486, "step": 30396 }, { "epoch": 1.7021502967857542, "grad_norm": 1.3069782257080078, "learning_rate": 9.726763157894737e-05, "loss": 0.4587, "step": 30397 }, { "epoch": 1.7022062940978833, "grad_norm": 1.4569857120513916, "learning_rate": 9.726736842105265e-05, "loss": 0.4588, "step": 30398 }, { "epoch": 1.7022622914100123, "grad_norm": 1.7946926355361938, "learning_rate": 9.726710526315789e-05, "loss": 0.6473, "step": 30399 }, { "epoch": 1.7023182887221413, "grad_norm": 1.431936502456665, "learning_rate": 9.726684210526317e-05, "loss": 0.457, "step": 30400 }, { "epoch": 1.7023742860342703, "grad_norm": 1.1445733308792114, "learning_rate": 9.726657894736843e-05, "loss": 0.4208, "step": 30401 }, { "epoch": 1.7024302833463993, "grad_norm": 1.2694015502929688, "learning_rate": 9.72663157894737e-05, "loss": 0.3792, "step": 30402 }, { "epoch": 1.7024862806585284, "grad_norm": 1.0210872888565063, "learning_rate": 9.726605263157895e-05, "loss": 0.4022, "step": 30403 }, { "epoch": 1.7025422779706574, "grad_norm": 4.105430603027344, "learning_rate": 9.72657894736842e-05, "loss": 0.584, "step": 30404 }, { "epoch": 1.7025982752827864, "grad_norm": 1.0736076831817627, "learning_rate": 9.726552631578948e-05, "loss": 0.4297, "step": 30405 }, { "epoch": 1.7026542725949154, "grad_norm": 1.2866970300674438, "learning_rate": 9.726526315789474e-05, "loss": 0.4977, "step": 30406 }, { "epoch": 1.7027102699070444, "grad_norm": 1.7026922702789307, "learning_rate": 9.726500000000001e-05, "loss": 0.5121, "step": 30407 }, { "epoch": 1.7027662672191735, "grad_norm": 1.3127936124801636, "learning_rate": 9.726473684210526e-05, "loss": 0.4201, "step": 30408 }, { "epoch": 1.7028222645313025, "grad_norm": 1.456664800643921, "learning_rate": 9.726447368421053e-05, "loss": 0.6082, "step": 30409 }, { "epoch": 1.7028782618434315, "grad_norm": 1.3231877088546753, "learning_rate": 9.726421052631579e-05, "loss": 0.496, "step": 30410 }, { "epoch": 1.7029342591555605, "grad_norm": 1.4809296131134033, "learning_rate": 9.726394736842106e-05, "loss": 0.4325, "step": 30411 }, { "epoch": 1.7029902564676895, "grad_norm": 1.2596771717071533, "learning_rate": 9.726368421052632e-05, "loss": 0.4379, "step": 30412 }, { "epoch": 1.7030462537798186, "grad_norm": 1.4474269151687622, "learning_rate": 9.726342105263158e-05, "loss": 0.5362, "step": 30413 }, { "epoch": 1.7031022510919476, "grad_norm": 1.1639429330825806, "learning_rate": 9.726315789473684e-05, "loss": 0.353, "step": 30414 }, { "epoch": 1.7031582484040766, "grad_norm": 1.225325107574463, "learning_rate": 9.726289473684212e-05, "loss": 0.3527, "step": 30415 }, { "epoch": 1.7032142457162056, "grad_norm": 1.3113058805465698, "learning_rate": 9.726263157894738e-05, "loss": 0.5815, "step": 30416 }, { "epoch": 1.7032702430283346, "grad_norm": 1.1792465448379517, "learning_rate": 9.726236842105264e-05, "loss": 0.5102, "step": 30417 }, { "epoch": 1.7033262403404636, "grad_norm": 1.6666264533996582, "learning_rate": 9.72621052631579e-05, "loss": 0.5822, "step": 30418 }, { "epoch": 1.7033822376525927, "grad_norm": 1.060243010520935, "learning_rate": 9.726184210526317e-05, "loss": 0.408, "step": 30419 }, { "epoch": 1.7034382349647217, "grad_norm": 1.1443876028060913, "learning_rate": 9.726157894736843e-05, "loss": 0.3858, "step": 30420 }, { "epoch": 1.7034942322768507, "grad_norm": 1.060678482055664, "learning_rate": 9.726131578947369e-05, "loss": 0.399, "step": 30421 }, { "epoch": 1.7035502295889797, "grad_norm": 1.4833637475967407, "learning_rate": 9.726105263157895e-05, "loss": 0.466, "step": 30422 }, { "epoch": 1.7036062269011087, "grad_norm": 1.1668732166290283, "learning_rate": 9.726078947368421e-05, "loss": 0.3098, "step": 30423 }, { "epoch": 1.7036622242132378, "grad_norm": 1.498699426651001, "learning_rate": 9.726052631578948e-05, "loss": 0.543, "step": 30424 }, { "epoch": 1.7037182215253668, "grad_norm": 1.2083587646484375, "learning_rate": 9.726026315789474e-05, "loss": 0.3934, "step": 30425 }, { "epoch": 1.7037742188374958, "grad_norm": 1.2850645780563354, "learning_rate": 9.726e-05, "loss": 0.5364, "step": 30426 }, { "epoch": 1.7038302161496248, "grad_norm": 1.451698899269104, "learning_rate": 9.725973684210526e-05, "loss": 0.4178, "step": 30427 }, { "epoch": 1.7038862134617538, "grad_norm": 1.3936408758163452, "learning_rate": 9.725947368421053e-05, "loss": 0.4768, "step": 30428 }, { "epoch": 1.7039422107738829, "grad_norm": 1.2527719736099243, "learning_rate": 9.72592105263158e-05, "loss": 0.3909, "step": 30429 }, { "epoch": 1.7039982080860119, "grad_norm": 1.081490159034729, "learning_rate": 9.725894736842107e-05, "loss": 0.4346, "step": 30430 }, { "epoch": 1.704054205398141, "grad_norm": 1.1686931848526, "learning_rate": 9.725868421052631e-05, "loss": 0.4694, "step": 30431 }, { "epoch": 1.70411020271027, "grad_norm": 1.3607361316680908, "learning_rate": 9.725842105263159e-05, "loss": 0.4416, "step": 30432 }, { "epoch": 1.704166200022399, "grad_norm": 1.2696168422698975, "learning_rate": 9.725815789473685e-05, "loss": 0.4423, "step": 30433 }, { "epoch": 1.704222197334528, "grad_norm": 1.1625663042068481, "learning_rate": 9.725789473684212e-05, "loss": 0.3651, "step": 30434 }, { "epoch": 1.704278194646657, "grad_norm": 1.2001128196716309, "learning_rate": 9.725763157894738e-05, "loss": 0.3825, "step": 30435 }, { "epoch": 1.704334191958786, "grad_norm": 1.3356788158416748, "learning_rate": 9.725736842105264e-05, "loss": 0.4432, "step": 30436 }, { "epoch": 1.704390189270915, "grad_norm": 1.427314043045044, "learning_rate": 9.72571052631579e-05, "loss": 0.4516, "step": 30437 }, { "epoch": 1.704446186583044, "grad_norm": 1.3588881492614746, "learning_rate": 9.725684210526316e-05, "loss": 0.5084, "step": 30438 }, { "epoch": 1.704502183895173, "grad_norm": 1.1967135667800903, "learning_rate": 9.725657894736843e-05, "loss": 0.3756, "step": 30439 }, { "epoch": 1.704558181207302, "grad_norm": 1.4664340019226074, "learning_rate": 9.725631578947369e-05, "loss": 0.5995, "step": 30440 }, { "epoch": 1.704614178519431, "grad_norm": 1.5603938102722168, "learning_rate": 9.725605263157895e-05, "loss": 0.6902, "step": 30441 }, { "epoch": 1.70467017583156, "grad_norm": 1.2841347455978394, "learning_rate": 9.725578947368421e-05, "loss": 0.3792, "step": 30442 }, { "epoch": 1.7047261731436891, "grad_norm": 1.312217354774475, "learning_rate": 9.725552631578948e-05, "loss": 0.4777, "step": 30443 }, { "epoch": 1.7047821704558181, "grad_norm": 1.2578442096710205, "learning_rate": 9.725526315789474e-05, "loss": 0.4364, "step": 30444 }, { "epoch": 1.7048381677679472, "grad_norm": 1.2755889892578125, "learning_rate": 9.7255e-05, "loss": 0.4185, "step": 30445 }, { "epoch": 1.7048941650800762, "grad_norm": 1.2359391450881958, "learning_rate": 9.725473684210526e-05, "loss": 0.4579, "step": 30446 }, { "epoch": 1.7049501623922052, "grad_norm": 1.344716191291809, "learning_rate": 9.725447368421054e-05, "loss": 0.5177, "step": 30447 }, { "epoch": 1.7050061597043342, "grad_norm": 1.423856258392334, "learning_rate": 9.72542105263158e-05, "loss": 0.5039, "step": 30448 }, { "epoch": 1.7050621570164632, "grad_norm": 1.1566288471221924, "learning_rate": 9.725394736842106e-05, "loss": 0.4561, "step": 30449 }, { "epoch": 1.7051181543285923, "grad_norm": 1.3783998489379883, "learning_rate": 9.725368421052632e-05, "loss": 0.4719, "step": 30450 }, { "epoch": 1.7051741516407213, "grad_norm": 1.2949869632720947, "learning_rate": 9.725342105263159e-05, "loss": 0.613, "step": 30451 }, { "epoch": 1.7052301489528503, "grad_norm": 1.324256181716919, "learning_rate": 9.725315789473685e-05, "loss": 0.3451, "step": 30452 }, { "epoch": 1.7052861462649793, "grad_norm": 1.0943481922149658, "learning_rate": 9.725289473684212e-05, "loss": 0.3402, "step": 30453 }, { "epoch": 1.7053421435771083, "grad_norm": 1.1040737628936768, "learning_rate": 9.725263157894737e-05, "loss": 0.4539, "step": 30454 }, { "epoch": 1.7053981408892374, "grad_norm": 1.1457338333129883, "learning_rate": 9.725236842105263e-05, "loss": 0.4564, "step": 30455 }, { "epoch": 1.7054541382013664, "grad_norm": 1.266891360282898, "learning_rate": 9.72521052631579e-05, "loss": 0.3382, "step": 30456 }, { "epoch": 1.7055101355134954, "grad_norm": 1.2966458797454834, "learning_rate": 9.725184210526316e-05, "loss": 0.4611, "step": 30457 }, { "epoch": 1.7055661328256244, "grad_norm": 1.471893548965454, "learning_rate": 9.725157894736842e-05, "loss": 0.4955, "step": 30458 }, { "epoch": 1.7056221301377534, "grad_norm": 1.342434048652649, "learning_rate": 9.725131578947368e-05, "loss": 0.4862, "step": 30459 }, { "epoch": 1.7056781274498825, "grad_norm": 1.1808223724365234, "learning_rate": 9.725105263157895e-05, "loss": 0.3575, "step": 30460 }, { "epoch": 1.7057341247620115, "grad_norm": 1.2905265092849731, "learning_rate": 9.725078947368421e-05, "loss": 0.5352, "step": 30461 }, { "epoch": 1.7057901220741405, "grad_norm": 1.2809385061264038, "learning_rate": 9.725052631578949e-05, "loss": 0.5049, "step": 30462 }, { "epoch": 1.7058461193862695, "grad_norm": 1.2980560064315796, "learning_rate": 9.725026315789473e-05, "loss": 0.3592, "step": 30463 }, { "epoch": 1.7059021166983985, "grad_norm": 1.2398675680160522, "learning_rate": 9.725e-05, "loss": 0.4474, "step": 30464 }, { "epoch": 1.7059581140105275, "grad_norm": 1.3723958730697632, "learning_rate": 9.724973684210527e-05, "loss": 0.6417, "step": 30465 }, { "epoch": 1.7060141113226566, "grad_norm": 1.1518769264221191, "learning_rate": 9.724947368421054e-05, "loss": 0.3915, "step": 30466 }, { "epoch": 1.7060701086347856, "grad_norm": 1.6506885290145874, "learning_rate": 9.72492105263158e-05, "loss": 0.4685, "step": 30467 }, { "epoch": 1.7061261059469146, "grad_norm": 1.1885184049606323, "learning_rate": 9.724894736842106e-05, "loss": 0.4079, "step": 30468 }, { "epoch": 1.7061821032590436, "grad_norm": 1.6102256774902344, "learning_rate": 9.724868421052632e-05, "loss": 0.4723, "step": 30469 }, { "epoch": 1.7062381005711726, "grad_norm": 1.0847893953323364, "learning_rate": 9.724842105263159e-05, "loss": 0.4572, "step": 30470 }, { "epoch": 1.7062940978833017, "grad_norm": 1.1210451126098633, "learning_rate": 9.724815789473685e-05, "loss": 0.3528, "step": 30471 }, { "epoch": 1.7063500951954307, "grad_norm": 1.3248612880706787, "learning_rate": 9.724789473684211e-05, "loss": 0.5108, "step": 30472 }, { "epoch": 1.7064060925075597, "grad_norm": 1.2521291971206665, "learning_rate": 9.724763157894737e-05, "loss": 0.4766, "step": 30473 }, { "epoch": 1.7064620898196887, "grad_norm": 1.1710253953933716, "learning_rate": 9.724736842105263e-05, "loss": 0.5097, "step": 30474 }, { "epoch": 1.7065180871318177, "grad_norm": 1.3462587594985962, "learning_rate": 9.72471052631579e-05, "loss": 0.5612, "step": 30475 }, { "epoch": 1.7065740844439468, "grad_norm": 1.2117505073547363, "learning_rate": 9.724684210526316e-05, "loss": 0.4326, "step": 30476 }, { "epoch": 1.7066300817560758, "grad_norm": 1.5765436887741089, "learning_rate": 9.724657894736842e-05, "loss": 0.7118, "step": 30477 }, { "epoch": 1.7066860790682048, "grad_norm": 1.361477017402649, "learning_rate": 9.724631578947368e-05, "loss": 0.3543, "step": 30478 }, { "epoch": 1.7067420763803338, "grad_norm": 1.6338622570037842, "learning_rate": 9.724605263157896e-05, "loss": 0.528, "step": 30479 }, { "epoch": 1.7067980736924628, "grad_norm": 1.4930986166000366, "learning_rate": 9.724578947368422e-05, "loss": 0.5079, "step": 30480 }, { "epoch": 1.7068540710045919, "grad_norm": 1.363598108291626, "learning_rate": 9.724552631578948e-05, "loss": 0.4515, "step": 30481 }, { "epoch": 1.7069100683167209, "grad_norm": 1.3108935356140137, "learning_rate": 9.724526315789474e-05, "loss": 0.5058, "step": 30482 }, { "epoch": 1.70696606562885, "grad_norm": 1.2128300666809082, "learning_rate": 9.724500000000001e-05, "loss": 0.5126, "step": 30483 }, { "epoch": 1.707022062940979, "grad_norm": 1.2338123321533203, "learning_rate": 9.724473684210527e-05, "loss": 0.3588, "step": 30484 }, { "epoch": 1.707078060253108, "grad_norm": 1.3296399116516113, "learning_rate": 9.724447368421054e-05, "loss": 0.4617, "step": 30485 }, { "epoch": 1.707134057565237, "grad_norm": 1.0531251430511475, "learning_rate": 9.724421052631579e-05, "loss": 0.3619, "step": 30486 }, { "epoch": 1.707190054877366, "grad_norm": 1.1226277351379395, "learning_rate": 9.724394736842106e-05, "loss": 0.3713, "step": 30487 }, { "epoch": 1.707246052189495, "grad_norm": 1.1927179098129272, "learning_rate": 9.724368421052632e-05, "loss": 0.3629, "step": 30488 }, { "epoch": 1.707302049501624, "grad_norm": 1.546406865119934, "learning_rate": 9.724342105263158e-05, "loss": 0.6288, "step": 30489 }, { "epoch": 1.707358046813753, "grad_norm": 1.3373161554336548, "learning_rate": 9.724315789473685e-05, "loss": 0.3687, "step": 30490 }, { "epoch": 1.707414044125882, "grad_norm": 1.1055089235305786, "learning_rate": 9.72428947368421e-05, "loss": 0.4005, "step": 30491 }, { "epoch": 1.707470041438011, "grad_norm": 1.1816829442977905, "learning_rate": 9.724263157894737e-05, "loss": 0.4471, "step": 30492 }, { "epoch": 1.70752603875014, "grad_norm": 1.4974868297576904, "learning_rate": 9.724236842105263e-05, "loss": 0.4795, "step": 30493 }, { "epoch": 1.707582036062269, "grad_norm": 1.2631924152374268, "learning_rate": 9.72421052631579e-05, "loss": 0.4412, "step": 30494 }, { "epoch": 1.7076380333743981, "grad_norm": 1.2478843927383423, "learning_rate": 9.724184210526317e-05, "loss": 0.5724, "step": 30495 }, { "epoch": 1.7076940306865271, "grad_norm": 1.5070134401321411, "learning_rate": 9.724157894736843e-05, "loss": 0.5999, "step": 30496 }, { "epoch": 1.7077500279986562, "grad_norm": 1.4186972379684448, "learning_rate": 9.724131578947369e-05, "loss": 0.4678, "step": 30497 }, { "epoch": 1.7078060253107852, "grad_norm": 1.4076356887817383, "learning_rate": 9.724105263157896e-05, "loss": 0.471, "step": 30498 }, { "epoch": 1.7078620226229142, "grad_norm": 1.4378857612609863, "learning_rate": 9.724078947368422e-05, "loss": 0.4323, "step": 30499 }, { "epoch": 1.7079180199350432, "grad_norm": 1.23332941532135, "learning_rate": 9.724052631578948e-05, "loss": 0.5648, "step": 30500 }, { "epoch": 1.7079740172471722, "grad_norm": 1.2035006284713745, "learning_rate": 9.724026315789474e-05, "loss": 0.4644, "step": 30501 }, { "epoch": 1.7080300145593013, "grad_norm": 1.1639639139175415, "learning_rate": 9.724000000000001e-05, "loss": 0.4612, "step": 30502 }, { "epoch": 1.7080860118714303, "grad_norm": 1.1637418270111084, "learning_rate": 9.723973684210527e-05, "loss": 0.3284, "step": 30503 }, { "epoch": 1.7081420091835593, "grad_norm": 1.4097344875335693, "learning_rate": 9.723947368421053e-05, "loss": 0.5028, "step": 30504 }, { "epoch": 1.7081980064956883, "grad_norm": 1.4470453262329102, "learning_rate": 9.723921052631579e-05, "loss": 0.4567, "step": 30505 }, { "epoch": 1.7082540038078173, "grad_norm": 1.3159265518188477, "learning_rate": 9.723894736842106e-05, "loss": 0.4034, "step": 30506 }, { "epoch": 1.7083100011199464, "grad_norm": 2.2175371646881104, "learning_rate": 9.723868421052632e-05, "loss": 0.4138, "step": 30507 }, { "epoch": 1.7083659984320754, "grad_norm": 1.3051400184631348, "learning_rate": 9.723842105263158e-05, "loss": 0.4557, "step": 30508 }, { "epoch": 1.7084219957442044, "grad_norm": 1.4340587854385376, "learning_rate": 9.723815789473684e-05, "loss": 0.3551, "step": 30509 }, { "epoch": 1.7084779930563334, "grad_norm": 1.2266170978546143, "learning_rate": 9.72378947368421e-05, "loss": 0.4332, "step": 30510 }, { "epoch": 1.7085339903684624, "grad_norm": 1.3109291791915894, "learning_rate": 9.723763157894738e-05, "loss": 0.41, "step": 30511 }, { "epoch": 1.7085899876805914, "grad_norm": 1.437774658203125, "learning_rate": 9.723736842105264e-05, "loss": 0.4451, "step": 30512 }, { "epoch": 1.7086459849927205, "grad_norm": 1.1644784212112427, "learning_rate": 9.72371052631579e-05, "loss": 0.2779, "step": 30513 }, { "epoch": 1.7087019823048495, "grad_norm": 1.1141005754470825, "learning_rate": 9.723684210526315e-05, "loss": 0.4475, "step": 30514 }, { "epoch": 1.7087579796169785, "grad_norm": 1.309321403503418, "learning_rate": 9.723657894736843e-05, "loss": 0.3756, "step": 30515 }, { "epoch": 1.7088139769291075, "grad_norm": 1.527395248413086, "learning_rate": 9.723631578947369e-05, "loss": 0.5072, "step": 30516 }, { "epoch": 1.7088699742412365, "grad_norm": 1.4535458087921143, "learning_rate": 9.723605263157896e-05, "loss": 0.408, "step": 30517 }, { "epoch": 1.7089259715533656, "grad_norm": 1.2554166316986084, "learning_rate": 9.723578947368421e-05, "loss": 0.5063, "step": 30518 }, { "epoch": 1.7089819688654946, "grad_norm": 1.2566369771957397, "learning_rate": 9.723552631578948e-05, "loss": 0.4132, "step": 30519 }, { "epoch": 1.7090379661776236, "grad_norm": 1.10805344581604, "learning_rate": 9.723526315789474e-05, "loss": 0.4375, "step": 30520 }, { "epoch": 1.7090939634897526, "grad_norm": 1.84591543674469, "learning_rate": 9.723500000000001e-05, "loss": 0.5896, "step": 30521 }, { "epoch": 1.7091499608018816, "grad_norm": 1.3341203927993774, "learning_rate": 9.723473684210527e-05, "loss": 0.3631, "step": 30522 }, { "epoch": 1.7092059581140107, "grad_norm": 1.2211353778839111, "learning_rate": 9.723447368421053e-05, "loss": 0.4661, "step": 30523 }, { "epoch": 1.7092619554261397, "grad_norm": 1.682071566581726, "learning_rate": 9.723421052631579e-05, "loss": 0.5394, "step": 30524 }, { "epoch": 1.7093179527382687, "grad_norm": 1.1894397735595703, "learning_rate": 9.723394736842105e-05, "loss": 0.3991, "step": 30525 }, { "epoch": 1.7093739500503977, "grad_norm": 1.1776165962219238, "learning_rate": 9.723368421052633e-05, "loss": 0.3585, "step": 30526 }, { "epoch": 1.7094299473625267, "grad_norm": 1.2605681419372559, "learning_rate": 9.723342105263159e-05, "loss": 0.485, "step": 30527 }, { "epoch": 1.7094859446746558, "grad_norm": 1.5534437894821167, "learning_rate": 9.723315789473685e-05, "loss": 0.5367, "step": 30528 }, { "epoch": 1.7095419419867848, "grad_norm": 1.427203893661499, "learning_rate": 9.72328947368421e-05, "loss": 0.4518, "step": 30529 }, { "epoch": 1.7095979392989138, "grad_norm": 1.098892092704773, "learning_rate": 9.723263157894738e-05, "loss": 0.3718, "step": 30530 }, { "epoch": 1.7096539366110428, "grad_norm": 1.3872413635253906, "learning_rate": 9.723236842105264e-05, "loss": 0.4475, "step": 30531 }, { "epoch": 1.7097099339231718, "grad_norm": 1.3392170667648315, "learning_rate": 9.72321052631579e-05, "loss": 0.5314, "step": 30532 }, { "epoch": 1.7097659312353009, "grad_norm": 1.3937572240829468, "learning_rate": 9.723184210526316e-05, "loss": 0.4211, "step": 30533 }, { "epoch": 1.7098219285474299, "grad_norm": 1.1660832166671753, "learning_rate": 9.723157894736843e-05, "loss": 0.4424, "step": 30534 }, { "epoch": 1.709877925859559, "grad_norm": 1.8650375604629517, "learning_rate": 9.723131578947369e-05, "loss": 0.4601, "step": 30535 }, { "epoch": 1.709933923171688, "grad_norm": 1.4749939441680908, "learning_rate": 9.723105263157895e-05, "loss": 0.4636, "step": 30536 }, { "epoch": 1.709989920483817, "grad_norm": 1.7622720003128052, "learning_rate": 9.723078947368421e-05, "loss": 0.5261, "step": 30537 }, { "epoch": 1.710045917795946, "grad_norm": 1.921120047569275, "learning_rate": 9.723052631578948e-05, "loss": 0.4489, "step": 30538 }, { "epoch": 1.710101915108075, "grad_norm": 1.7017356157302856, "learning_rate": 9.723026315789474e-05, "loss": 0.4543, "step": 30539 }, { "epoch": 1.710157912420204, "grad_norm": 1.1812225580215454, "learning_rate": 9.723000000000002e-05, "loss": 0.3206, "step": 30540 }, { "epoch": 1.710213909732333, "grad_norm": 1.267765760421753, "learning_rate": 9.722973684210526e-05, "loss": 0.4081, "step": 30541 }, { "epoch": 1.710269907044462, "grad_norm": 5.9477667808532715, "learning_rate": 9.722947368421052e-05, "loss": 0.4174, "step": 30542 }, { "epoch": 1.710325904356591, "grad_norm": 1.1593431234359741, "learning_rate": 9.72292105263158e-05, "loss": 0.4871, "step": 30543 }, { "epoch": 1.71038190166872, "grad_norm": 1.4129647016525269, "learning_rate": 9.722894736842105e-05, "loss": 0.5077, "step": 30544 }, { "epoch": 1.710437898980849, "grad_norm": 1.1454287767410278, "learning_rate": 9.722868421052633e-05, "loss": 0.596, "step": 30545 }, { "epoch": 1.710493896292978, "grad_norm": 1.4092313051223755, "learning_rate": 9.722842105263157e-05, "loss": 0.5816, "step": 30546 }, { "epoch": 1.7105498936051071, "grad_norm": 1.3189082145690918, "learning_rate": 9.722815789473685e-05, "loss": 0.4882, "step": 30547 }, { "epoch": 1.7106058909172361, "grad_norm": 1.5342934131622314, "learning_rate": 9.722789473684211e-05, "loss": 0.3852, "step": 30548 }, { "epoch": 1.710661888229365, "grad_norm": 1.220430850982666, "learning_rate": 9.722763157894738e-05, "loss": 0.5128, "step": 30549 }, { "epoch": 1.710717885541494, "grad_norm": 1.8775285482406616, "learning_rate": 9.722736842105263e-05, "loss": 0.5091, "step": 30550 }, { "epoch": 1.710773882853623, "grad_norm": 1.3726714849472046, "learning_rate": 9.72271052631579e-05, "loss": 0.5295, "step": 30551 }, { "epoch": 1.710829880165752, "grad_norm": 1.4516642093658447, "learning_rate": 9.722684210526316e-05, "loss": 0.4134, "step": 30552 }, { "epoch": 1.710885877477881, "grad_norm": 1.3525302410125732, "learning_rate": 9.722657894736843e-05, "loss": 0.4878, "step": 30553 }, { "epoch": 1.71094187479001, "grad_norm": 1.3447599411010742, "learning_rate": 9.722631578947369e-05, "loss": 0.565, "step": 30554 }, { "epoch": 1.710997872102139, "grad_norm": 1.1941272020339966, "learning_rate": 9.722605263157895e-05, "loss": 0.3563, "step": 30555 }, { "epoch": 1.711053869414268, "grad_norm": 1.3645598888397217, "learning_rate": 9.722578947368421e-05, "loss": 0.5775, "step": 30556 }, { "epoch": 1.711109866726397, "grad_norm": 1.2135733366012573, "learning_rate": 9.722552631578949e-05, "loss": 0.4698, "step": 30557 }, { "epoch": 1.711165864038526, "grad_norm": 1.188438057899475, "learning_rate": 9.722526315789475e-05, "loss": 0.3848, "step": 30558 }, { "epoch": 1.7112218613506551, "grad_norm": 1.270774006843567, "learning_rate": 9.7225e-05, "loss": 0.4707, "step": 30559 }, { "epoch": 1.7112778586627841, "grad_norm": 1.282051920890808, "learning_rate": 9.722473684210526e-05, "loss": 0.3673, "step": 30560 }, { "epoch": 1.7113338559749132, "grad_norm": 1.2208714485168457, "learning_rate": 9.722447368421052e-05, "loss": 0.406, "step": 30561 }, { "epoch": 1.7113898532870422, "grad_norm": 1.078134536743164, "learning_rate": 9.72242105263158e-05, "loss": 0.4927, "step": 30562 }, { "epoch": 1.7114458505991712, "grad_norm": 7.428740978240967, "learning_rate": 9.722394736842106e-05, "loss": 0.4742, "step": 30563 }, { "epoch": 1.7115018479113002, "grad_norm": 1.3124948740005493, "learning_rate": 9.722368421052632e-05, "loss": 0.4801, "step": 30564 }, { "epoch": 1.7115578452234292, "grad_norm": 1.4366769790649414, "learning_rate": 9.722342105263158e-05, "loss": 0.4579, "step": 30565 }, { "epoch": 1.7116138425355583, "grad_norm": 1.1832019090652466, "learning_rate": 9.722315789473685e-05, "loss": 0.4563, "step": 30566 }, { "epoch": 1.7116698398476873, "grad_norm": 1.219072699546814, "learning_rate": 9.722289473684211e-05, "loss": 0.3572, "step": 30567 }, { "epoch": 1.7117258371598163, "grad_norm": 1.352976679801941, "learning_rate": 9.722263157894737e-05, "loss": 0.4948, "step": 30568 }, { "epoch": 1.7117818344719453, "grad_norm": 2.1166634559631348, "learning_rate": 9.722236842105263e-05, "loss": 0.3773, "step": 30569 }, { "epoch": 1.7118378317840743, "grad_norm": 1.0951555967330933, "learning_rate": 9.72221052631579e-05, "loss": 0.4427, "step": 30570 }, { "epoch": 1.7118938290962034, "grad_norm": 1.319106936454773, "learning_rate": 9.722184210526316e-05, "loss": 0.4357, "step": 30571 }, { "epoch": 1.7119498264083324, "grad_norm": 1.6315269470214844, "learning_rate": 9.722157894736844e-05, "loss": 0.4057, "step": 30572 }, { "epoch": 1.7120058237204614, "grad_norm": 1.246069073677063, "learning_rate": 9.722131578947368e-05, "loss": 0.3893, "step": 30573 }, { "epoch": 1.7120618210325904, "grad_norm": 1.2473217248916626, "learning_rate": 9.722105263157896e-05, "loss": 0.4203, "step": 30574 }, { "epoch": 1.7121178183447194, "grad_norm": 1.3081737756729126, "learning_rate": 9.722078947368421e-05, "loss": 0.4838, "step": 30575 }, { "epoch": 1.7121738156568485, "grad_norm": 1.5456491708755493, "learning_rate": 9.722052631578947e-05, "loss": 0.4271, "step": 30576 }, { "epoch": 1.7122298129689775, "grad_norm": 1.4192774295806885, "learning_rate": 9.722026315789475e-05, "loss": 0.4473, "step": 30577 }, { "epoch": 1.7122858102811065, "grad_norm": 1.259068489074707, "learning_rate": 9.722e-05, "loss": 0.4821, "step": 30578 }, { "epoch": 1.7123418075932355, "grad_norm": 1.2192338705062866, "learning_rate": 9.721973684210527e-05, "loss": 0.4147, "step": 30579 }, { "epoch": 1.7123978049053645, "grad_norm": 1.3094992637634277, "learning_rate": 9.721947368421053e-05, "loss": 0.4099, "step": 30580 }, { "epoch": 1.7124538022174935, "grad_norm": 1.2361643314361572, "learning_rate": 9.72192105263158e-05, "loss": 0.4245, "step": 30581 }, { "epoch": 1.7125097995296226, "grad_norm": 1.2182124853134155, "learning_rate": 9.721894736842106e-05, "loss": 0.5122, "step": 30582 }, { "epoch": 1.7125657968417516, "grad_norm": 1.3774763345718384, "learning_rate": 9.721868421052632e-05, "loss": 0.4316, "step": 30583 }, { "epoch": 1.7126217941538806, "grad_norm": 1.2150533199310303, "learning_rate": 9.721842105263158e-05, "loss": 0.4453, "step": 30584 }, { "epoch": 1.7126777914660096, "grad_norm": 1.3658932447433472, "learning_rate": 9.721815789473685e-05, "loss": 0.5215, "step": 30585 }, { "epoch": 1.7127337887781386, "grad_norm": 1.7840657234191895, "learning_rate": 9.721789473684211e-05, "loss": 0.4515, "step": 30586 }, { "epoch": 1.7127897860902677, "grad_norm": 12.474908828735352, "learning_rate": 9.721763157894737e-05, "loss": 0.397, "step": 30587 }, { "epoch": 1.7128457834023967, "grad_norm": 1.279429316520691, "learning_rate": 9.721736842105263e-05, "loss": 0.4489, "step": 30588 }, { "epoch": 1.7129017807145257, "grad_norm": 1.2631796598434448, "learning_rate": 9.72171052631579e-05, "loss": 0.4396, "step": 30589 }, { "epoch": 1.7129577780266547, "grad_norm": 1.2091093063354492, "learning_rate": 9.721684210526317e-05, "loss": 0.4535, "step": 30590 }, { "epoch": 1.7130137753387837, "grad_norm": 1.2190179824829102, "learning_rate": 9.721657894736842e-05, "loss": 0.3891, "step": 30591 }, { "epoch": 1.7130697726509128, "grad_norm": 4.941834926605225, "learning_rate": 9.721631578947368e-05, "loss": 0.5027, "step": 30592 }, { "epoch": 1.7131257699630418, "grad_norm": 1.272477388381958, "learning_rate": 9.721605263157894e-05, "loss": 0.4147, "step": 30593 }, { "epoch": 1.7131817672751708, "grad_norm": 1.1352440118789673, "learning_rate": 9.721578947368422e-05, "loss": 0.4997, "step": 30594 }, { "epoch": 1.7132377645872998, "grad_norm": 1.238242506980896, "learning_rate": 9.721552631578948e-05, "loss": 0.5159, "step": 30595 }, { "epoch": 1.7132937618994288, "grad_norm": 1.2218987941741943, "learning_rate": 9.721526315789474e-05, "loss": 0.5147, "step": 30596 }, { "epoch": 1.7133497592115579, "grad_norm": 1.0865470170974731, "learning_rate": 9.7215e-05, "loss": 0.3208, "step": 30597 }, { "epoch": 1.7134057565236869, "grad_norm": 1.231279969215393, "learning_rate": 9.721473684210527e-05, "loss": 0.5322, "step": 30598 }, { "epoch": 1.713461753835816, "grad_norm": 1.792553186416626, "learning_rate": 9.721447368421053e-05, "loss": 0.6053, "step": 30599 }, { "epoch": 1.713517751147945, "grad_norm": 16.398744583129883, "learning_rate": 9.72142105263158e-05, "loss": 0.6179, "step": 30600 }, { "epoch": 1.713573748460074, "grad_norm": 1.4515624046325684, "learning_rate": 9.721394736842105e-05, "loss": 0.4621, "step": 30601 }, { "epoch": 1.713629745772203, "grad_norm": 1.3135499954223633, "learning_rate": 9.721368421052632e-05, "loss": 0.4713, "step": 30602 }, { "epoch": 1.713685743084332, "grad_norm": 1.1159043312072754, "learning_rate": 9.721342105263158e-05, "loss": 0.3756, "step": 30603 }, { "epoch": 1.713741740396461, "grad_norm": 1.162896752357483, "learning_rate": 9.721315789473686e-05, "loss": 0.3915, "step": 30604 }, { "epoch": 1.71379773770859, "grad_norm": 1.2234514951705933, "learning_rate": 9.72128947368421e-05, "loss": 0.426, "step": 30605 }, { "epoch": 1.713853735020719, "grad_norm": 1.1834098100662231, "learning_rate": 9.721263157894737e-05, "loss": 0.4264, "step": 30606 }, { "epoch": 1.713909732332848, "grad_norm": 1.1783487796783447, "learning_rate": 9.721236842105263e-05, "loss": 0.3993, "step": 30607 }, { "epoch": 1.713965729644977, "grad_norm": 1.6386569738388062, "learning_rate": 9.721210526315791e-05, "loss": 0.5254, "step": 30608 }, { "epoch": 1.714021726957106, "grad_norm": 1.2830673456192017, "learning_rate": 9.721184210526317e-05, "loss": 0.3077, "step": 30609 }, { "epoch": 1.714077724269235, "grad_norm": 1.170202612876892, "learning_rate": 9.721157894736841e-05, "loss": 0.3864, "step": 30610 }, { "epoch": 1.7141337215813641, "grad_norm": 2.121738910675049, "learning_rate": 9.721131578947369e-05, "loss": 0.5497, "step": 30611 }, { "epoch": 1.7141897188934931, "grad_norm": 1.3131927251815796, "learning_rate": 9.721105263157895e-05, "loss": 0.461, "step": 30612 }, { "epoch": 1.7142457162056222, "grad_norm": 1.1239851713180542, "learning_rate": 9.721078947368422e-05, "loss": 0.4226, "step": 30613 }, { "epoch": 1.7143017135177512, "grad_norm": 1.1671003103256226, "learning_rate": 9.721052631578948e-05, "loss": 0.5343, "step": 30614 }, { "epoch": 1.7143577108298802, "grad_norm": 1.367677927017212, "learning_rate": 9.721026315789474e-05, "loss": 0.6234, "step": 30615 }, { "epoch": 1.7144137081420092, "grad_norm": 1.6681021451950073, "learning_rate": 9.721e-05, "loss": 0.3351, "step": 30616 }, { "epoch": 1.7144697054541382, "grad_norm": 1.8264193534851074, "learning_rate": 9.720973684210527e-05, "loss": 0.5302, "step": 30617 }, { "epoch": 1.7145257027662673, "grad_norm": 1.5470703840255737, "learning_rate": 9.720947368421053e-05, "loss": 0.594, "step": 30618 }, { "epoch": 1.7145817000783963, "grad_norm": 1.2262579202651978, "learning_rate": 9.720921052631579e-05, "loss": 0.3708, "step": 30619 }, { "epoch": 1.7146376973905253, "grad_norm": 1.1791127920150757, "learning_rate": 9.720894736842105e-05, "loss": 0.4498, "step": 30620 }, { "epoch": 1.7146936947026543, "grad_norm": 1.3748281002044678, "learning_rate": 9.720868421052633e-05, "loss": 0.4342, "step": 30621 }, { "epoch": 1.7147496920147833, "grad_norm": 1.1307182312011719, "learning_rate": 9.720842105263158e-05, "loss": 0.4498, "step": 30622 }, { "epoch": 1.7148056893269124, "grad_norm": 1.339543104171753, "learning_rate": 9.720815789473684e-05, "loss": 0.5279, "step": 30623 }, { "epoch": 1.7148616866390414, "grad_norm": 1.3946506977081299, "learning_rate": 9.72078947368421e-05, "loss": 0.5304, "step": 30624 }, { "epoch": 1.7149176839511704, "grad_norm": 2.007319688796997, "learning_rate": 9.720763157894738e-05, "loss": 0.4803, "step": 30625 }, { "epoch": 1.7149736812632994, "grad_norm": 1.4103554487228394, "learning_rate": 9.720736842105264e-05, "loss": 0.4206, "step": 30626 }, { "epoch": 1.7150296785754284, "grad_norm": 1.0439146757125854, "learning_rate": 9.720710526315791e-05, "loss": 0.5129, "step": 30627 }, { "epoch": 1.7150856758875574, "grad_norm": 1.2654073238372803, "learning_rate": 9.720684210526316e-05, "loss": 0.4236, "step": 30628 }, { "epoch": 1.7151416731996865, "grad_norm": 1.275828242301941, "learning_rate": 9.720657894736842e-05, "loss": 0.4752, "step": 30629 }, { "epoch": 1.7151976705118155, "grad_norm": 1.4743921756744385, "learning_rate": 9.720631578947369e-05, "loss": 0.4481, "step": 30630 }, { "epoch": 1.7152536678239445, "grad_norm": 1.5907094478607178, "learning_rate": 9.720605263157895e-05, "loss": 0.4339, "step": 30631 }, { "epoch": 1.7153096651360733, "grad_norm": 2.2020251750946045, "learning_rate": 9.720578947368422e-05, "loss": 0.4501, "step": 30632 }, { "epoch": 1.7153656624482023, "grad_norm": 1.5741947889328003, "learning_rate": 9.720552631578947e-05, "loss": 0.5433, "step": 30633 }, { "epoch": 1.7154216597603313, "grad_norm": 1.4771791696548462, "learning_rate": 9.720526315789474e-05, "loss": 0.4377, "step": 30634 }, { "epoch": 1.7154776570724604, "grad_norm": 1.3025338649749756, "learning_rate": 9.7205e-05, "loss": 0.4054, "step": 30635 }, { "epoch": 1.7155336543845894, "grad_norm": 1.1977874040603638, "learning_rate": 9.720473684210528e-05, "loss": 0.4227, "step": 30636 }, { "epoch": 1.7155896516967184, "grad_norm": 1.209682583808899, "learning_rate": 9.720447368421053e-05, "loss": 0.3793, "step": 30637 }, { "epoch": 1.7156456490088474, "grad_norm": 1.4564845561981201, "learning_rate": 9.72042105263158e-05, "loss": 0.4768, "step": 30638 }, { "epoch": 1.7157016463209764, "grad_norm": 1.0919175148010254, "learning_rate": 9.720394736842105e-05, "loss": 0.4748, "step": 30639 }, { "epoch": 1.7157576436331055, "grad_norm": 1.1943644285202026, "learning_rate": 9.720368421052633e-05, "loss": 0.3998, "step": 30640 }, { "epoch": 1.7158136409452345, "grad_norm": 1.596779704093933, "learning_rate": 9.720342105263159e-05, "loss": 0.4699, "step": 30641 }, { "epoch": 1.7158696382573635, "grad_norm": 1.8138458728790283, "learning_rate": 9.720315789473685e-05, "loss": 0.6418, "step": 30642 }, { "epoch": 1.7159256355694925, "grad_norm": 1.606316089630127, "learning_rate": 9.720289473684211e-05, "loss": 0.602, "step": 30643 }, { "epoch": 1.7159816328816215, "grad_norm": 1.4772661924362183, "learning_rate": 9.720263157894738e-05, "loss": 0.4297, "step": 30644 }, { "epoch": 1.7160376301937506, "grad_norm": 2.572352409362793, "learning_rate": 9.720236842105264e-05, "loss": 0.3914, "step": 30645 }, { "epoch": 1.7160936275058796, "grad_norm": 1.6091371774673462, "learning_rate": 9.72021052631579e-05, "loss": 0.3716, "step": 30646 }, { "epoch": 1.7161496248180086, "grad_norm": 1.4084526300430298, "learning_rate": 9.720184210526316e-05, "loss": 0.4196, "step": 30647 }, { "epoch": 1.7162056221301376, "grad_norm": 1.1943351030349731, "learning_rate": 9.720157894736842e-05, "loss": 0.3734, "step": 30648 }, { "epoch": 1.7162616194422666, "grad_norm": 1.3019176721572876, "learning_rate": 9.720131578947369e-05, "loss": 0.5291, "step": 30649 }, { "epoch": 1.7163176167543956, "grad_norm": 1.2358137369155884, "learning_rate": 9.720105263157895e-05, "loss": 0.4952, "step": 30650 }, { "epoch": 1.7163736140665247, "grad_norm": 1.4986878633499146, "learning_rate": 9.720078947368421e-05, "loss": 0.6412, "step": 30651 }, { "epoch": 1.7164296113786537, "grad_norm": 1.4451597929000854, "learning_rate": 9.720052631578947e-05, "loss": 0.479, "step": 30652 }, { "epoch": 1.7164856086907827, "grad_norm": 1.2005128860473633, "learning_rate": 9.720026315789474e-05, "loss": 0.5046, "step": 30653 }, { "epoch": 1.7165416060029117, "grad_norm": 1.1480522155761719, "learning_rate": 9.72e-05, "loss": 0.489, "step": 30654 }, { "epoch": 1.7165976033150407, "grad_norm": 1.278332233428955, "learning_rate": 9.719973684210528e-05, "loss": 0.5082, "step": 30655 }, { "epoch": 1.7166536006271698, "grad_norm": 1.2766473293304443, "learning_rate": 9.719947368421052e-05, "loss": 0.4704, "step": 30656 }, { "epoch": 1.7167095979392988, "grad_norm": 1.597607135772705, "learning_rate": 9.71992105263158e-05, "loss": 0.4532, "step": 30657 }, { "epoch": 1.7167655952514278, "grad_norm": 1.3771840333938599, "learning_rate": 9.719894736842106e-05, "loss": 0.4287, "step": 30658 }, { "epoch": 1.7168215925635568, "grad_norm": 1.1253502368927002, "learning_rate": 9.719868421052633e-05, "loss": 0.3985, "step": 30659 }, { "epoch": 1.7168775898756858, "grad_norm": 1.0468671321868896, "learning_rate": 9.719842105263158e-05, "loss": 0.3813, "step": 30660 }, { "epoch": 1.7169335871878149, "grad_norm": 1.1901935338974, "learning_rate": 9.719815789473685e-05, "loss": 0.4057, "step": 30661 }, { "epoch": 1.7169895844999439, "grad_norm": 1.9568490982055664, "learning_rate": 9.719789473684211e-05, "loss": 0.3452, "step": 30662 }, { "epoch": 1.717045581812073, "grad_norm": 1.33143150806427, "learning_rate": 9.719763157894737e-05, "loss": 0.5552, "step": 30663 }, { "epoch": 1.717101579124202, "grad_norm": 1.3214884996414185, "learning_rate": 9.719736842105264e-05, "loss": 0.4655, "step": 30664 }, { "epoch": 1.717157576436331, "grad_norm": 1.1933867931365967, "learning_rate": 9.719710526315789e-05, "loss": 0.4986, "step": 30665 }, { "epoch": 1.71721357374846, "grad_norm": 1.3275268077850342, "learning_rate": 9.719684210526316e-05, "loss": 0.3995, "step": 30666 }, { "epoch": 1.717269571060589, "grad_norm": 1.329003930091858, "learning_rate": 9.719657894736842e-05, "loss": 0.4352, "step": 30667 }, { "epoch": 1.717325568372718, "grad_norm": 1.2409541606903076, "learning_rate": 9.71963157894737e-05, "loss": 0.4716, "step": 30668 }, { "epoch": 1.717381565684847, "grad_norm": 1.4531842470169067, "learning_rate": 9.719605263157895e-05, "loss": 0.4473, "step": 30669 }, { "epoch": 1.717437562996976, "grad_norm": 1.2469524145126343, "learning_rate": 9.719578947368421e-05, "loss": 0.47, "step": 30670 }, { "epoch": 1.717493560309105, "grad_norm": 1.8494130373001099, "learning_rate": 9.719552631578947e-05, "loss": 0.42, "step": 30671 }, { "epoch": 1.717549557621234, "grad_norm": 1.1138805150985718, "learning_rate": 9.719526315789475e-05, "loss": 0.4293, "step": 30672 }, { "epoch": 1.717605554933363, "grad_norm": 1.6776938438415527, "learning_rate": 9.719500000000001e-05, "loss": 0.4414, "step": 30673 }, { "epoch": 1.717661552245492, "grad_norm": 1.2268189191818237, "learning_rate": 9.719473684210527e-05, "loss": 0.4444, "step": 30674 }, { "epoch": 1.7177175495576211, "grad_norm": 1.4933257102966309, "learning_rate": 9.719447368421053e-05, "loss": 0.5096, "step": 30675 }, { "epoch": 1.7177735468697501, "grad_norm": 1.1103745698928833, "learning_rate": 9.71942105263158e-05, "loss": 0.3784, "step": 30676 }, { "epoch": 1.7178295441818792, "grad_norm": 1.309709072113037, "learning_rate": 9.719394736842106e-05, "loss": 0.5753, "step": 30677 }, { "epoch": 1.7178855414940082, "grad_norm": 1.2489408254623413, "learning_rate": 9.719368421052632e-05, "loss": 0.3883, "step": 30678 }, { "epoch": 1.7179415388061372, "grad_norm": 1.180154800415039, "learning_rate": 9.719342105263158e-05, "loss": 0.3999, "step": 30679 }, { "epoch": 1.7179975361182662, "grad_norm": 1.634352445602417, "learning_rate": 9.719315789473684e-05, "loss": 0.4748, "step": 30680 }, { "epoch": 1.7180535334303952, "grad_norm": 1.1118837594985962, "learning_rate": 9.719289473684211e-05, "loss": 0.4365, "step": 30681 }, { "epoch": 1.7181095307425243, "grad_norm": 1.3902416229248047, "learning_rate": 9.719263157894737e-05, "loss": 0.5039, "step": 30682 }, { "epoch": 1.7181655280546533, "grad_norm": 1.4275425672531128, "learning_rate": 9.719236842105263e-05, "loss": 0.4861, "step": 30683 }, { "epoch": 1.7182215253667823, "grad_norm": 1.4224351644515991, "learning_rate": 9.719210526315789e-05, "loss": 0.583, "step": 30684 }, { "epoch": 1.7182775226789113, "grad_norm": 1.3549447059631348, "learning_rate": 9.719184210526316e-05, "loss": 0.4253, "step": 30685 }, { "epoch": 1.7183335199910403, "grad_norm": 1.197293758392334, "learning_rate": 9.719157894736842e-05, "loss": 0.3436, "step": 30686 }, { "epoch": 1.7183895173031694, "grad_norm": 1.121922254562378, "learning_rate": 9.71913157894737e-05, "loss": 0.426, "step": 30687 }, { "epoch": 1.7184455146152984, "grad_norm": 1.2227044105529785, "learning_rate": 9.719105263157894e-05, "loss": 0.433, "step": 30688 }, { "epoch": 1.7185015119274274, "grad_norm": 1.234965443611145, "learning_rate": 9.719078947368422e-05, "loss": 0.4819, "step": 30689 }, { "epoch": 1.7185575092395564, "grad_norm": 1.185038685798645, "learning_rate": 9.719052631578948e-05, "loss": 0.4845, "step": 30690 }, { "epoch": 1.7186135065516854, "grad_norm": 1.0895192623138428, "learning_rate": 9.719026315789475e-05, "loss": 0.416, "step": 30691 }, { "epoch": 1.7186695038638145, "grad_norm": 1.145519733428955, "learning_rate": 9.719000000000001e-05, "loss": 0.5857, "step": 30692 }, { "epoch": 1.7187255011759435, "grad_norm": 1.2973244190216064, "learning_rate": 9.718973684210527e-05, "loss": 0.5575, "step": 30693 }, { "epoch": 1.7187814984880725, "grad_norm": 1.195210337638855, "learning_rate": 9.718947368421053e-05, "loss": 0.3612, "step": 30694 }, { "epoch": 1.7188374958002015, "grad_norm": 1.2516976594924927, "learning_rate": 9.71892105263158e-05, "loss": 0.5291, "step": 30695 }, { "epoch": 1.7188934931123305, "grad_norm": 1.301780104637146, "learning_rate": 9.718894736842106e-05, "loss": 0.4843, "step": 30696 }, { "epoch": 1.7189494904244595, "grad_norm": 1.1459444761276245, "learning_rate": 9.718868421052632e-05, "loss": 0.479, "step": 30697 }, { "epoch": 1.7190054877365886, "grad_norm": 1.6155439615249634, "learning_rate": 9.718842105263158e-05, "loss": 0.5189, "step": 30698 }, { "epoch": 1.7190614850487176, "grad_norm": 1.2038792371749878, "learning_rate": 9.718815789473684e-05, "loss": 0.3827, "step": 30699 }, { "epoch": 1.7191174823608466, "grad_norm": 1.0903184413909912, "learning_rate": 9.718789473684211e-05, "loss": 0.3966, "step": 30700 }, { "epoch": 1.7191734796729756, "grad_norm": 1.5096611976623535, "learning_rate": 9.718763157894737e-05, "loss": 0.4004, "step": 30701 }, { "epoch": 1.7192294769851046, "grad_norm": 1.1890404224395752, "learning_rate": 9.718736842105263e-05, "loss": 0.5064, "step": 30702 }, { "epoch": 1.7192854742972337, "grad_norm": 1.2429345846176147, "learning_rate": 9.71871052631579e-05, "loss": 0.5223, "step": 30703 }, { "epoch": 1.7193414716093627, "grad_norm": 1.2582592964172363, "learning_rate": 9.718684210526317e-05, "loss": 0.5147, "step": 30704 }, { "epoch": 1.7193974689214917, "grad_norm": 1.1780757904052734, "learning_rate": 9.718657894736843e-05, "loss": 0.4832, "step": 30705 }, { "epoch": 1.7194534662336207, "grad_norm": 1.1705541610717773, "learning_rate": 9.718631578947369e-05, "loss": 0.4167, "step": 30706 }, { "epoch": 1.7195094635457497, "grad_norm": 1.7118525505065918, "learning_rate": 9.718605263157895e-05, "loss": 0.5343, "step": 30707 }, { "epoch": 1.7195654608578788, "grad_norm": 1.2762616872787476, "learning_rate": 9.718578947368422e-05, "loss": 0.441, "step": 30708 }, { "epoch": 1.7196214581700078, "grad_norm": 1.195202350616455, "learning_rate": 9.718552631578948e-05, "loss": 0.5421, "step": 30709 }, { "epoch": 1.7196774554821368, "grad_norm": 1.2236769199371338, "learning_rate": 9.718526315789475e-05, "loss": 0.4745, "step": 30710 }, { "epoch": 1.7197334527942658, "grad_norm": 1.5898139476776123, "learning_rate": 9.7185e-05, "loss": 0.5193, "step": 30711 }, { "epoch": 1.7197894501063948, "grad_norm": 1.3720322847366333, "learning_rate": 9.718473684210527e-05, "loss": 0.4336, "step": 30712 }, { "epoch": 1.7198454474185239, "grad_norm": 1.2265517711639404, "learning_rate": 9.718447368421053e-05, "loss": 0.443, "step": 30713 }, { "epoch": 1.7199014447306529, "grad_norm": 1.514292597770691, "learning_rate": 9.718421052631579e-05, "loss": 0.4443, "step": 30714 }, { "epoch": 1.719957442042782, "grad_norm": 1.1352652311325073, "learning_rate": 9.718394736842105e-05, "loss": 0.3963, "step": 30715 }, { "epoch": 1.720013439354911, "grad_norm": 1.0488125085830688, "learning_rate": 9.718368421052631e-05, "loss": 0.4215, "step": 30716 }, { "epoch": 1.72006943666704, "grad_norm": 1.1707981824874878, "learning_rate": 9.718342105263158e-05, "loss": 0.4291, "step": 30717 }, { "epoch": 1.720125433979169, "grad_norm": 1.696366786956787, "learning_rate": 9.718315789473684e-05, "loss": 0.5962, "step": 30718 }, { "epoch": 1.720181431291298, "grad_norm": 1.5346957445144653, "learning_rate": 9.718289473684212e-05, "loss": 0.5974, "step": 30719 }, { "epoch": 1.720237428603427, "grad_norm": 1.9283950328826904, "learning_rate": 9.718263157894736e-05, "loss": 0.4745, "step": 30720 }, { "epoch": 1.720293425915556, "grad_norm": 1.3071030378341675, "learning_rate": 9.718236842105264e-05, "loss": 0.5455, "step": 30721 }, { "epoch": 1.720349423227685, "grad_norm": 1.199967384338379, "learning_rate": 9.71821052631579e-05, "loss": 0.4828, "step": 30722 }, { "epoch": 1.720405420539814, "grad_norm": 1.2231509685516357, "learning_rate": 9.718184210526317e-05, "loss": 0.4276, "step": 30723 }, { "epoch": 1.720461417851943, "grad_norm": 1.6901391744613647, "learning_rate": 9.718157894736843e-05, "loss": 0.5318, "step": 30724 }, { "epoch": 1.720517415164072, "grad_norm": 1.1251397132873535, "learning_rate": 9.718131578947369e-05, "loss": 0.4106, "step": 30725 }, { "epoch": 1.720573412476201, "grad_norm": 1.2451826333999634, "learning_rate": 9.718105263157895e-05, "loss": 0.4808, "step": 30726 }, { "epoch": 1.7206294097883301, "grad_norm": 1.2075666189193726, "learning_rate": 9.718078947368422e-05, "loss": 0.4009, "step": 30727 }, { "epoch": 1.7206854071004591, "grad_norm": 1.3249578475952148, "learning_rate": 9.718052631578948e-05, "loss": 0.4715, "step": 30728 }, { "epoch": 1.7207414044125882, "grad_norm": 1.2314748764038086, "learning_rate": 9.718026315789474e-05, "loss": 0.6087, "step": 30729 }, { "epoch": 1.7207974017247172, "grad_norm": 1.3579349517822266, "learning_rate": 9.718e-05, "loss": 0.5412, "step": 30730 }, { "epoch": 1.7208533990368462, "grad_norm": 1.256733775138855, "learning_rate": 9.717973684210526e-05, "loss": 0.4667, "step": 30731 }, { "epoch": 1.7209093963489752, "grad_norm": 1.2888498306274414, "learning_rate": 9.717947368421053e-05, "loss": 0.4215, "step": 30732 }, { "epoch": 1.7209653936611042, "grad_norm": 1.2255321741104126, "learning_rate": 9.71792105263158e-05, "loss": 0.4204, "step": 30733 }, { "epoch": 1.7210213909732333, "grad_norm": 1.2704070806503296, "learning_rate": 9.717894736842105e-05, "loss": 0.4192, "step": 30734 }, { "epoch": 1.7210773882853623, "grad_norm": 1.1680527925491333, "learning_rate": 9.717868421052631e-05, "loss": 0.5187, "step": 30735 }, { "epoch": 1.7211333855974913, "grad_norm": 1.2011053562164307, "learning_rate": 9.717842105263159e-05, "loss": 0.5848, "step": 30736 }, { "epoch": 1.7211893829096203, "grad_norm": 1.4032227993011475, "learning_rate": 9.717815789473685e-05, "loss": 0.4313, "step": 30737 }, { "epoch": 1.7212453802217493, "grad_norm": 1.2838280200958252, "learning_rate": 9.71778947368421e-05, "loss": 0.5346, "step": 30738 }, { "epoch": 1.7213013775338784, "grad_norm": 1.3266584873199463, "learning_rate": 9.717763157894737e-05, "loss": 0.4865, "step": 30739 }, { "epoch": 1.7213573748460074, "grad_norm": 1.2213317155838013, "learning_rate": 9.717736842105264e-05, "loss": 0.5428, "step": 30740 }, { "epoch": 1.7214133721581364, "grad_norm": 1.1680879592895508, "learning_rate": 9.71771052631579e-05, "loss": 0.4599, "step": 30741 }, { "epoch": 1.7214693694702654, "grad_norm": 1.1384347677230835, "learning_rate": 9.717684210526317e-05, "loss": 0.3859, "step": 30742 }, { "epoch": 1.7215253667823944, "grad_norm": 1.2647172212600708, "learning_rate": 9.717657894736842e-05, "loss": 0.4213, "step": 30743 }, { "epoch": 1.7215813640945234, "grad_norm": 1.1774641275405884, "learning_rate": 9.717631578947369e-05, "loss": 0.4956, "step": 30744 }, { "epoch": 1.7216373614066525, "grad_norm": 1.4863076210021973, "learning_rate": 9.717605263157895e-05, "loss": 0.4511, "step": 30745 }, { "epoch": 1.7216933587187815, "grad_norm": 1.4243805408477783, "learning_rate": 9.717578947368422e-05, "loss": 0.4707, "step": 30746 }, { "epoch": 1.7217493560309105, "grad_norm": 1.3286114931106567, "learning_rate": 9.717552631578948e-05, "loss": 0.4154, "step": 30747 }, { "epoch": 1.7218053533430395, "grad_norm": 1.3599361181259155, "learning_rate": 9.717526315789474e-05, "loss": 0.5631, "step": 30748 }, { "epoch": 1.7218613506551685, "grad_norm": 1.6556535959243774, "learning_rate": 9.7175e-05, "loss": 0.4746, "step": 30749 }, { "epoch": 1.7219173479672976, "grad_norm": 1.2787960767745972, "learning_rate": 9.717473684210526e-05, "loss": 0.4807, "step": 30750 }, { "epoch": 1.7219733452794266, "grad_norm": 1.2840867042541504, "learning_rate": 9.717447368421054e-05, "loss": 0.3179, "step": 30751 }, { "epoch": 1.7220293425915556, "grad_norm": 1.2753773927688599, "learning_rate": 9.717421052631578e-05, "loss": 0.389, "step": 30752 }, { "epoch": 1.7220853399036846, "grad_norm": 1.3589175939559937, "learning_rate": 9.717394736842106e-05, "loss": 0.6011, "step": 30753 }, { "epoch": 1.7221413372158136, "grad_norm": 1.447645902633667, "learning_rate": 9.717368421052632e-05, "loss": 0.3273, "step": 30754 }, { "epoch": 1.7221973345279427, "grad_norm": 1.3061238527297974, "learning_rate": 9.717342105263159e-05, "loss": 0.4034, "step": 30755 }, { "epoch": 1.7222533318400717, "grad_norm": 1.231913685798645, "learning_rate": 9.717315789473685e-05, "loss": 0.4542, "step": 30756 }, { "epoch": 1.7223093291522007, "grad_norm": 1.2924394607543945, "learning_rate": 9.717289473684211e-05, "loss": 0.5165, "step": 30757 }, { "epoch": 1.7223653264643297, "grad_norm": 1.3542739152908325, "learning_rate": 9.717263157894737e-05, "loss": 0.3954, "step": 30758 }, { "epoch": 1.7224213237764587, "grad_norm": 1.2190207242965698, "learning_rate": 9.717236842105264e-05, "loss": 0.3373, "step": 30759 }, { "epoch": 1.7224773210885878, "grad_norm": 1.327471375465393, "learning_rate": 9.71721052631579e-05, "loss": 0.5234, "step": 30760 }, { "epoch": 1.7225333184007168, "grad_norm": 1.2556498050689697, "learning_rate": 9.717184210526316e-05, "loss": 0.3365, "step": 30761 }, { "epoch": 1.7225893157128458, "grad_norm": 1.273229956626892, "learning_rate": 9.717157894736842e-05, "loss": 0.4334, "step": 30762 }, { "epoch": 1.7226453130249748, "grad_norm": 1.2415874004364014, "learning_rate": 9.71713157894737e-05, "loss": 0.3701, "step": 30763 }, { "epoch": 1.7227013103371038, "grad_norm": 1.1257222890853882, "learning_rate": 9.717105263157895e-05, "loss": 0.4524, "step": 30764 }, { "epoch": 1.7227573076492328, "grad_norm": 1.111696720123291, "learning_rate": 9.717078947368423e-05, "loss": 0.4103, "step": 30765 }, { "epoch": 1.7228133049613619, "grad_norm": 1.157478928565979, "learning_rate": 9.717052631578947e-05, "loss": 0.4071, "step": 30766 }, { "epoch": 1.7228693022734909, "grad_norm": 1.3755488395690918, "learning_rate": 9.717026315789473e-05, "loss": 0.441, "step": 30767 }, { "epoch": 1.72292529958562, "grad_norm": 1.4772863388061523, "learning_rate": 9.717e-05, "loss": 0.5367, "step": 30768 }, { "epoch": 1.722981296897749, "grad_norm": 1.3974822759628296, "learning_rate": 9.716973684210527e-05, "loss": 0.4845, "step": 30769 }, { "epoch": 1.723037294209878, "grad_norm": 1.4198808670043945, "learning_rate": 9.716947368421053e-05, "loss": 0.4812, "step": 30770 }, { "epoch": 1.723093291522007, "grad_norm": 1.354041337966919, "learning_rate": 9.716921052631579e-05, "loss": 0.4878, "step": 30771 }, { "epoch": 1.723149288834136, "grad_norm": 1.297965407371521, "learning_rate": 9.716894736842106e-05, "loss": 0.411, "step": 30772 }, { "epoch": 1.723205286146265, "grad_norm": 1.2439676523208618, "learning_rate": 9.716868421052632e-05, "loss": 0.4477, "step": 30773 }, { "epoch": 1.723261283458394, "grad_norm": 1.2469125986099243, "learning_rate": 9.716842105263159e-05, "loss": 0.48, "step": 30774 }, { "epoch": 1.723317280770523, "grad_norm": 1.226881980895996, "learning_rate": 9.716815789473684e-05, "loss": 0.4352, "step": 30775 }, { "epoch": 1.723373278082652, "grad_norm": 1.4902311563491821, "learning_rate": 9.716789473684211e-05, "loss": 0.3364, "step": 30776 }, { "epoch": 1.723429275394781, "grad_norm": 1.266294002532959, "learning_rate": 9.716763157894737e-05, "loss": 0.3488, "step": 30777 }, { "epoch": 1.72348527270691, "grad_norm": 1.145354986190796, "learning_rate": 9.716736842105264e-05, "loss": 0.3891, "step": 30778 }, { "epoch": 1.7235412700190391, "grad_norm": 1.250402569770813, "learning_rate": 9.71671052631579e-05, "loss": 0.5013, "step": 30779 }, { "epoch": 1.7235972673311681, "grad_norm": 1.360511064529419, "learning_rate": 9.716684210526316e-05, "loss": 0.4564, "step": 30780 }, { "epoch": 1.7236532646432972, "grad_norm": 1.220169186592102, "learning_rate": 9.716657894736842e-05, "loss": 0.4816, "step": 30781 }, { "epoch": 1.7237092619554262, "grad_norm": 1.2172590494155884, "learning_rate": 9.71663157894737e-05, "loss": 0.4198, "step": 30782 }, { "epoch": 1.7237652592675552, "grad_norm": 1.2992833852767944, "learning_rate": 9.716605263157896e-05, "loss": 0.5251, "step": 30783 }, { "epoch": 1.7238212565796842, "grad_norm": 1.2512496709823608, "learning_rate": 9.716578947368422e-05, "loss": 0.4944, "step": 30784 }, { "epoch": 1.7238772538918132, "grad_norm": 1.255361557006836, "learning_rate": 9.716552631578948e-05, "loss": 0.4415, "step": 30785 }, { "epoch": 1.7239332512039423, "grad_norm": 1.219032883644104, "learning_rate": 9.716526315789474e-05, "loss": 0.4437, "step": 30786 }, { "epoch": 1.7239892485160713, "grad_norm": 1.262127161026001, "learning_rate": 9.716500000000001e-05, "loss": 0.4603, "step": 30787 }, { "epoch": 1.7240452458282003, "grad_norm": 1.7772603034973145, "learning_rate": 9.716473684210527e-05, "loss": 0.5027, "step": 30788 }, { "epoch": 1.7241012431403293, "grad_norm": 1.373513102531433, "learning_rate": 9.716447368421053e-05, "loss": 0.4535, "step": 30789 }, { "epoch": 1.7241572404524583, "grad_norm": 1.1996124982833862, "learning_rate": 9.716421052631579e-05, "loss": 0.429, "step": 30790 }, { "epoch": 1.7242132377645873, "grad_norm": 1.2211042642593384, "learning_rate": 9.716394736842106e-05, "loss": 0.4429, "step": 30791 }, { "epoch": 1.7242692350767164, "grad_norm": 1.1607037782669067, "learning_rate": 9.716368421052632e-05, "loss": 0.3886, "step": 30792 }, { "epoch": 1.7243252323888454, "grad_norm": 1.408096194267273, "learning_rate": 9.716342105263158e-05, "loss": 0.5755, "step": 30793 }, { "epoch": 1.7243812297009744, "grad_norm": 1.4828472137451172, "learning_rate": 9.716315789473684e-05, "loss": 0.4418, "step": 30794 }, { "epoch": 1.7244372270131034, "grad_norm": 1.319539189338684, "learning_rate": 9.716289473684211e-05, "loss": 0.4372, "step": 30795 }, { "epoch": 1.7244932243252324, "grad_norm": 1.5043408870697021, "learning_rate": 9.716263157894737e-05, "loss": 0.4326, "step": 30796 }, { "epoch": 1.7245492216373615, "grad_norm": 1.0972951650619507, "learning_rate": 9.716236842105265e-05, "loss": 0.5003, "step": 30797 }, { "epoch": 1.7246052189494905, "grad_norm": 1.3848352432250977, "learning_rate": 9.716210526315789e-05, "loss": 0.4515, "step": 30798 }, { "epoch": 1.7246612162616195, "grad_norm": 1.3718483448028564, "learning_rate": 9.716184210526317e-05, "loss": 0.7014, "step": 30799 }, { "epoch": 1.7247172135737485, "grad_norm": 1.258340835571289, "learning_rate": 9.716157894736843e-05, "loss": 0.4507, "step": 30800 }, { "epoch": 1.7247732108858775, "grad_norm": 1.5159603357315063, "learning_rate": 9.716131578947369e-05, "loss": 0.5084, "step": 30801 }, { "epoch": 1.7248292081980066, "grad_norm": 1.6316993236541748, "learning_rate": 9.716105263157896e-05, "loss": 0.481, "step": 30802 }, { "epoch": 1.7248852055101356, "grad_norm": 1.4929754734039307, "learning_rate": 9.71607894736842e-05, "loss": 0.4773, "step": 30803 }, { "epoch": 1.7249412028222646, "grad_norm": 1.174686312675476, "learning_rate": 9.716052631578948e-05, "loss": 0.4631, "step": 30804 }, { "epoch": 1.7249972001343936, "grad_norm": 1.2033119201660156, "learning_rate": 9.716026315789474e-05, "loss": 0.5065, "step": 30805 }, { "epoch": 1.7250531974465226, "grad_norm": 1.6159331798553467, "learning_rate": 9.716000000000001e-05, "loss": 0.3934, "step": 30806 }, { "epoch": 1.7251091947586517, "grad_norm": 1.3112629652023315, "learning_rate": 9.715973684210526e-05, "loss": 0.3701, "step": 30807 }, { "epoch": 1.7251651920707807, "grad_norm": 1.2754032611846924, "learning_rate": 9.715947368421053e-05, "loss": 0.4287, "step": 30808 }, { "epoch": 1.7252211893829097, "grad_norm": 2.0877411365509033, "learning_rate": 9.715921052631579e-05, "loss": 0.6157, "step": 30809 }, { "epoch": 1.7252771866950387, "grad_norm": 1.2816237211227417, "learning_rate": 9.715894736842106e-05, "loss": 0.4074, "step": 30810 }, { "epoch": 1.7253331840071677, "grad_norm": 1.1529109477996826, "learning_rate": 9.715868421052632e-05, "loss": 0.4474, "step": 30811 }, { "epoch": 1.7253891813192967, "grad_norm": 1.130661129951477, "learning_rate": 9.715842105263158e-05, "loss": 0.5181, "step": 30812 }, { "epoch": 1.7254451786314258, "grad_norm": 1.5895882844924927, "learning_rate": 9.715815789473684e-05, "loss": 0.4612, "step": 30813 }, { "epoch": 1.7255011759435548, "grad_norm": 1.087876796722412, "learning_rate": 9.715789473684212e-05, "loss": 0.3793, "step": 30814 }, { "epoch": 1.7255571732556838, "grad_norm": 2.8522260189056396, "learning_rate": 9.715763157894738e-05, "loss": 0.3504, "step": 30815 }, { "epoch": 1.7256131705678128, "grad_norm": 2.2068569660186768, "learning_rate": 9.715736842105264e-05, "loss": 0.5478, "step": 30816 }, { "epoch": 1.7256691678799418, "grad_norm": 1.260810136795044, "learning_rate": 9.71571052631579e-05, "loss": 0.4399, "step": 30817 }, { "epoch": 1.7257251651920709, "grad_norm": 1.6022473573684692, "learning_rate": 9.715684210526316e-05, "loss": 0.3176, "step": 30818 }, { "epoch": 1.7257811625041999, "grad_norm": 1.4457985162734985, "learning_rate": 9.715657894736843e-05, "loss": 0.4669, "step": 30819 }, { "epoch": 1.725837159816329, "grad_norm": 1.2545794248580933, "learning_rate": 9.715631578947369e-05, "loss": 0.4905, "step": 30820 }, { "epoch": 1.725893157128458, "grad_norm": 1.3766297101974487, "learning_rate": 9.715605263157895e-05, "loss": 0.432, "step": 30821 }, { "epoch": 1.725949154440587, "grad_norm": 1.2831355333328247, "learning_rate": 9.715578947368421e-05, "loss": 0.419, "step": 30822 }, { "epoch": 1.726005151752716, "grad_norm": 1.179022192955017, "learning_rate": 9.715552631578948e-05, "loss": 0.4321, "step": 30823 }, { "epoch": 1.726061149064845, "grad_norm": 1.3134585618972778, "learning_rate": 9.715526315789474e-05, "loss": 0.5058, "step": 30824 }, { "epoch": 1.726117146376974, "grad_norm": 1.1801894903182983, "learning_rate": 9.7155e-05, "loss": 0.5043, "step": 30825 }, { "epoch": 1.726173143689103, "grad_norm": 1.549644947052002, "learning_rate": 9.715473684210526e-05, "loss": 0.4859, "step": 30826 }, { "epoch": 1.726229141001232, "grad_norm": 1.0734951496124268, "learning_rate": 9.715447368421053e-05, "loss": 0.414, "step": 30827 }, { "epoch": 1.726285138313361, "grad_norm": 1.5300109386444092, "learning_rate": 9.71542105263158e-05, "loss": 0.488, "step": 30828 }, { "epoch": 1.72634113562549, "grad_norm": 3.6961076259613037, "learning_rate": 9.715394736842107e-05, "loss": 0.5406, "step": 30829 }, { "epoch": 1.726397132937619, "grad_norm": 1.129594326019287, "learning_rate": 9.715368421052631e-05, "loss": 0.3016, "step": 30830 }, { "epoch": 1.7264531302497481, "grad_norm": 1.2709170579910278, "learning_rate": 9.715342105263159e-05, "loss": 0.452, "step": 30831 }, { "epoch": 1.7265091275618771, "grad_norm": 1.4917163848876953, "learning_rate": 9.715315789473685e-05, "loss": 0.4523, "step": 30832 }, { "epoch": 1.7265651248740062, "grad_norm": 1.2385468482971191, "learning_rate": 9.715289473684212e-05, "loss": 0.3755, "step": 30833 }, { "epoch": 1.7266211221861352, "grad_norm": 1.2770428657531738, "learning_rate": 9.715263157894738e-05, "loss": 0.4445, "step": 30834 }, { "epoch": 1.7266771194982642, "grad_norm": 1.444815754890442, "learning_rate": 9.715236842105263e-05, "loss": 0.4496, "step": 30835 }, { "epoch": 1.7267331168103932, "grad_norm": 1.4520589113235474, "learning_rate": 9.71521052631579e-05, "loss": 0.5309, "step": 30836 }, { "epoch": 1.7267891141225222, "grad_norm": 1.0482993125915527, "learning_rate": 9.715184210526316e-05, "loss": 0.4375, "step": 30837 }, { "epoch": 1.7268451114346512, "grad_norm": 1.4906712770462036, "learning_rate": 9.715157894736843e-05, "loss": 0.5349, "step": 30838 }, { "epoch": 1.7269011087467803, "grad_norm": 2.543030261993408, "learning_rate": 9.715131578947369e-05, "loss": 0.6079, "step": 30839 }, { "epoch": 1.7269571060589093, "grad_norm": 1.0798964500427246, "learning_rate": 9.715105263157895e-05, "loss": 0.4067, "step": 30840 }, { "epoch": 1.7270131033710383, "grad_norm": 1.2734647989273071, "learning_rate": 9.715078947368421e-05, "loss": 0.4286, "step": 30841 }, { "epoch": 1.7270691006831673, "grad_norm": 1.318042516708374, "learning_rate": 9.715052631578948e-05, "loss": 0.5077, "step": 30842 }, { "epoch": 1.7271250979952963, "grad_norm": 1.216072678565979, "learning_rate": 9.715026315789474e-05, "loss": 0.5351, "step": 30843 }, { "epoch": 1.7271810953074254, "grad_norm": 1.4882640838623047, "learning_rate": 9.715e-05, "loss": 0.475, "step": 30844 }, { "epoch": 1.7272370926195544, "grad_norm": 1.2969797849655151, "learning_rate": 9.714973684210526e-05, "loss": 0.422, "step": 30845 }, { "epoch": 1.7272930899316834, "grad_norm": 1.624219536781311, "learning_rate": 9.714947368421054e-05, "loss": 0.464, "step": 30846 }, { "epoch": 1.7273490872438124, "grad_norm": 1.4371284246444702, "learning_rate": 9.71492105263158e-05, "loss": 0.4519, "step": 30847 }, { "epoch": 1.7274050845559414, "grad_norm": 1.3296457529067993, "learning_rate": 9.714894736842106e-05, "loss": 0.4431, "step": 30848 }, { "epoch": 1.7274610818680705, "grad_norm": 1.1049184799194336, "learning_rate": 9.714868421052632e-05, "loss": 0.376, "step": 30849 }, { "epoch": 1.7275170791801995, "grad_norm": 1.1859382390975952, "learning_rate": 9.714842105263159e-05, "loss": 0.4267, "step": 30850 }, { "epoch": 1.7275730764923285, "grad_norm": 1.1705151796340942, "learning_rate": 9.714815789473685e-05, "loss": 0.3619, "step": 30851 }, { "epoch": 1.7276290738044575, "grad_norm": 1.3142000436782837, "learning_rate": 9.714789473684211e-05, "loss": 0.4021, "step": 30852 }, { "epoch": 1.7276850711165865, "grad_norm": 1.1487438678741455, "learning_rate": 9.714763157894737e-05, "loss": 0.4081, "step": 30853 }, { "epoch": 1.7277410684287156, "grad_norm": 1.276232361793518, "learning_rate": 9.714736842105263e-05, "loss": 0.4626, "step": 30854 }, { "epoch": 1.7277970657408446, "grad_norm": 1.2493582963943481, "learning_rate": 9.71471052631579e-05, "loss": 0.4618, "step": 30855 }, { "epoch": 1.7278530630529736, "grad_norm": 1.2609328031539917, "learning_rate": 9.714684210526316e-05, "loss": 0.3786, "step": 30856 }, { "epoch": 1.7279090603651026, "grad_norm": 1.2448943853378296, "learning_rate": 9.714657894736843e-05, "loss": 0.5076, "step": 30857 }, { "epoch": 1.7279650576772316, "grad_norm": 1.2144376039505005, "learning_rate": 9.714631578947368e-05, "loss": 0.5064, "step": 30858 }, { "epoch": 1.7280210549893606, "grad_norm": 1.4060462713241577, "learning_rate": 9.714605263157895e-05, "loss": 0.5556, "step": 30859 }, { "epoch": 1.7280770523014897, "grad_norm": 1.2787256240844727, "learning_rate": 9.714578947368421e-05, "loss": 0.3218, "step": 30860 }, { "epoch": 1.7281330496136187, "grad_norm": 1.5079458951950073, "learning_rate": 9.714552631578949e-05, "loss": 0.6093, "step": 30861 }, { "epoch": 1.7281890469257477, "grad_norm": 1.6231735944747925, "learning_rate": 9.714526315789473e-05, "loss": 0.5613, "step": 30862 }, { "epoch": 1.7282450442378767, "grad_norm": 1.4514434337615967, "learning_rate": 9.7145e-05, "loss": 0.4394, "step": 30863 }, { "epoch": 1.7283010415500057, "grad_norm": 1.2759157419204712, "learning_rate": 9.714473684210527e-05, "loss": 0.4208, "step": 30864 }, { "epoch": 1.7283570388621348, "grad_norm": 1.2782208919525146, "learning_rate": 9.714447368421054e-05, "loss": 0.5318, "step": 30865 }, { "epoch": 1.7284130361742638, "grad_norm": 2.5487778186798096, "learning_rate": 9.71442105263158e-05, "loss": 0.4648, "step": 30866 }, { "epoch": 1.7284690334863928, "grad_norm": 1.0467078685760498, "learning_rate": 9.714394736842106e-05, "loss": 0.3735, "step": 30867 }, { "epoch": 1.7285250307985218, "grad_norm": 1.2292864322662354, "learning_rate": 9.714368421052632e-05, "loss": 0.3864, "step": 30868 }, { "epoch": 1.7285810281106508, "grad_norm": 1.3292547464370728, "learning_rate": 9.714342105263159e-05, "loss": 0.6821, "step": 30869 }, { "epoch": 1.7286370254227799, "grad_norm": 1.1492819786071777, "learning_rate": 9.714315789473685e-05, "loss": 0.4084, "step": 30870 }, { "epoch": 1.7286930227349089, "grad_norm": 2.3479971885681152, "learning_rate": 9.714289473684211e-05, "loss": 0.4776, "step": 30871 }, { "epoch": 1.728749020047038, "grad_norm": 1.6894288063049316, "learning_rate": 9.714263157894737e-05, "loss": 0.5442, "step": 30872 }, { "epoch": 1.728805017359167, "grad_norm": 1.538828730583191, "learning_rate": 9.714236842105263e-05, "loss": 0.5265, "step": 30873 }, { "epoch": 1.728861014671296, "grad_norm": 1.340962290763855, "learning_rate": 9.71421052631579e-05, "loss": 0.5284, "step": 30874 }, { "epoch": 1.728917011983425, "grad_norm": 1.1624521017074585, "learning_rate": 9.714184210526316e-05, "loss": 0.2955, "step": 30875 }, { "epoch": 1.728973009295554, "grad_norm": 1.3382991552352905, "learning_rate": 9.714157894736842e-05, "loss": 0.4681, "step": 30876 }, { "epoch": 1.729029006607683, "grad_norm": 1.409085988998413, "learning_rate": 9.714131578947368e-05, "loss": 0.4515, "step": 30877 }, { "epoch": 1.729085003919812, "grad_norm": 1.0673515796661377, "learning_rate": 9.714105263157896e-05, "loss": 0.5178, "step": 30878 }, { "epoch": 1.729141001231941, "grad_norm": 1.5937429666519165, "learning_rate": 9.714078947368422e-05, "loss": 0.4162, "step": 30879 }, { "epoch": 1.7291969985440698, "grad_norm": 1.2507046461105347, "learning_rate": 9.714052631578948e-05, "loss": 0.392, "step": 30880 }, { "epoch": 1.7292529958561988, "grad_norm": 1.7141634225845337, "learning_rate": 9.714026315789474e-05, "loss": 0.3829, "step": 30881 }, { "epoch": 1.7293089931683279, "grad_norm": 1.924500584602356, "learning_rate": 9.714000000000001e-05, "loss": 0.5439, "step": 30882 }, { "epoch": 1.7293649904804569, "grad_norm": 1.5594667196273804, "learning_rate": 9.713973684210527e-05, "loss": 0.5527, "step": 30883 }, { "epoch": 1.729420987792586, "grad_norm": 1.3248144388198853, "learning_rate": 9.713947368421054e-05, "loss": 0.4547, "step": 30884 }, { "epoch": 1.729476985104715, "grad_norm": 1.5802890062332153, "learning_rate": 9.713921052631579e-05, "loss": 0.6615, "step": 30885 }, { "epoch": 1.729532982416844, "grad_norm": 1.5077229738235474, "learning_rate": 9.713894736842106e-05, "loss": 0.4718, "step": 30886 }, { "epoch": 1.729588979728973, "grad_norm": 1.4542040824890137, "learning_rate": 9.713868421052632e-05, "loss": 0.6412, "step": 30887 }, { "epoch": 1.729644977041102, "grad_norm": 1.3625273704528809, "learning_rate": 9.713842105263158e-05, "loss": 0.369, "step": 30888 }, { "epoch": 1.729700974353231, "grad_norm": 1.2317105531692505, "learning_rate": 9.713815789473685e-05, "loss": 0.326, "step": 30889 }, { "epoch": 1.72975697166536, "grad_norm": 1.2325578927993774, "learning_rate": 9.71378947368421e-05, "loss": 0.4449, "step": 30890 }, { "epoch": 1.729812968977489, "grad_norm": 1.545949935913086, "learning_rate": 9.713763157894737e-05, "loss": 0.4058, "step": 30891 }, { "epoch": 1.729868966289618, "grad_norm": 1.4828636646270752, "learning_rate": 9.713736842105263e-05, "loss": 0.5084, "step": 30892 }, { "epoch": 1.729924963601747, "grad_norm": 1.7670361995697021, "learning_rate": 9.71371052631579e-05, "loss": 0.5689, "step": 30893 }, { "epoch": 1.729980960913876, "grad_norm": 1.1305797100067139, "learning_rate": 9.713684210526317e-05, "loss": 0.4592, "step": 30894 }, { "epoch": 1.7300369582260051, "grad_norm": 1.1148333549499512, "learning_rate": 9.713657894736843e-05, "loss": 0.5318, "step": 30895 }, { "epoch": 1.7300929555381341, "grad_norm": 1.5832405090332031, "learning_rate": 9.713631578947369e-05, "loss": 0.4372, "step": 30896 }, { "epoch": 1.7301489528502632, "grad_norm": 1.43873929977417, "learning_rate": 9.713605263157896e-05, "loss": 0.47, "step": 30897 }, { "epoch": 1.7302049501623922, "grad_norm": 1.550362229347229, "learning_rate": 9.713578947368422e-05, "loss": 0.5809, "step": 30898 }, { "epoch": 1.7302609474745212, "grad_norm": 1.765970230102539, "learning_rate": 9.713552631578948e-05, "loss": 0.4819, "step": 30899 }, { "epoch": 1.7303169447866502, "grad_norm": 1.1776527166366577, "learning_rate": 9.713526315789474e-05, "loss": 0.4204, "step": 30900 }, { "epoch": 1.7303729420987792, "grad_norm": 1.1927144527435303, "learning_rate": 9.713500000000001e-05, "loss": 0.4343, "step": 30901 }, { "epoch": 1.7304289394109083, "grad_norm": 1.1371490955352783, "learning_rate": 9.713473684210527e-05, "loss": 0.5609, "step": 30902 }, { "epoch": 1.7304849367230373, "grad_norm": 1.316361665725708, "learning_rate": 9.713447368421053e-05, "loss": 0.5007, "step": 30903 }, { "epoch": 1.7305409340351663, "grad_norm": 1.2518609762191772, "learning_rate": 9.713421052631579e-05, "loss": 0.3775, "step": 30904 }, { "epoch": 1.7305969313472953, "grad_norm": 1.2294044494628906, "learning_rate": 9.713394736842105e-05, "loss": 0.4788, "step": 30905 }, { "epoch": 1.7306529286594243, "grad_norm": 1.3479539155960083, "learning_rate": 9.713368421052632e-05, "loss": 0.4313, "step": 30906 }, { "epoch": 1.7307089259715533, "grad_norm": 1.2183607816696167, "learning_rate": 9.713342105263158e-05, "loss": 0.3896, "step": 30907 }, { "epoch": 1.7307649232836824, "grad_norm": 17.11246109008789, "learning_rate": 9.713315789473684e-05, "loss": 0.636, "step": 30908 }, { "epoch": 1.7308209205958114, "grad_norm": 1.1182912588119507, "learning_rate": 9.71328947368421e-05, "loss": 0.3731, "step": 30909 }, { "epoch": 1.7308769179079404, "grad_norm": 1.2550162076950073, "learning_rate": 9.713263157894738e-05, "loss": 0.4245, "step": 30910 }, { "epoch": 1.7309329152200694, "grad_norm": 1.3438735008239746, "learning_rate": 9.713236842105264e-05, "loss": 0.4981, "step": 30911 }, { "epoch": 1.7309889125321984, "grad_norm": 1.236170768737793, "learning_rate": 9.713210526315791e-05, "loss": 0.4917, "step": 30912 }, { "epoch": 1.7310449098443275, "grad_norm": 1.380021333694458, "learning_rate": 9.713184210526315e-05, "loss": 0.5406, "step": 30913 }, { "epoch": 1.7311009071564565, "grad_norm": 1.2908653020858765, "learning_rate": 9.713157894736843e-05, "loss": 0.4543, "step": 30914 }, { "epoch": 1.7311569044685855, "grad_norm": 2.054720640182495, "learning_rate": 9.713131578947369e-05, "loss": 0.4492, "step": 30915 }, { "epoch": 1.7312129017807145, "grad_norm": 1.1338237524032593, "learning_rate": 9.713105263157896e-05, "loss": 0.3648, "step": 30916 }, { "epoch": 1.7312688990928435, "grad_norm": 1.230956792831421, "learning_rate": 9.713078947368421e-05, "loss": 0.3829, "step": 30917 }, { "epoch": 1.7313248964049726, "grad_norm": 1.3563530445098877, "learning_rate": 9.713052631578948e-05, "loss": 0.4956, "step": 30918 }, { "epoch": 1.7313808937171016, "grad_norm": 1.4030046463012695, "learning_rate": 9.713026315789474e-05, "loss": 0.4191, "step": 30919 }, { "epoch": 1.7314368910292306, "grad_norm": 1.2252717018127441, "learning_rate": 9.713000000000001e-05, "loss": 0.4479, "step": 30920 }, { "epoch": 1.7314928883413596, "grad_norm": 1.3108750581741333, "learning_rate": 9.712973684210527e-05, "loss": 0.5127, "step": 30921 }, { "epoch": 1.7315488856534886, "grad_norm": 1.378910779953003, "learning_rate": 9.712947368421052e-05, "loss": 0.4461, "step": 30922 }, { "epoch": 1.7316048829656177, "grad_norm": 1.148742437362671, "learning_rate": 9.712921052631579e-05, "loss": 0.3799, "step": 30923 }, { "epoch": 1.7316608802777467, "grad_norm": 1.2744581699371338, "learning_rate": 9.712894736842105e-05, "loss": 0.5466, "step": 30924 }, { "epoch": 1.7317168775898757, "grad_norm": 1.3952937126159668, "learning_rate": 9.712868421052633e-05, "loss": 0.4239, "step": 30925 }, { "epoch": 1.7317728749020047, "grad_norm": 1.8824533224105835, "learning_rate": 9.712842105263159e-05, "loss": 0.4215, "step": 30926 }, { "epoch": 1.7318288722141337, "grad_norm": 1.224982500076294, "learning_rate": 9.712815789473685e-05, "loss": 0.5098, "step": 30927 }, { "epoch": 1.7318848695262627, "grad_norm": 1.2281935214996338, "learning_rate": 9.71278947368421e-05, "loss": 0.5545, "step": 30928 }, { "epoch": 1.7319408668383918, "grad_norm": 1.7226656675338745, "learning_rate": 9.712763157894738e-05, "loss": 0.4299, "step": 30929 }, { "epoch": 1.7319968641505208, "grad_norm": 1.298384189605713, "learning_rate": 9.712736842105264e-05, "loss": 0.4545, "step": 30930 }, { "epoch": 1.7320528614626498, "grad_norm": 1.3409208059310913, "learning_rate": 9.71271052631579e-05, "loss": 0.4533, "step": 30931 }, { "epoch": 1.7321088587747788, "grad_norm": 1.1323697566986084, "learning_rate": 9.712684210526316e-05, "loss": 0.4413, "step": 30932 }, { "epoch": 1.7321648560869078, "grad_norm": 1.2897166013717651, "learning_rate": 9.712657894736843e-05, "loss": 0.3455, "step": 30933 }, { "epoch": 1.7322208533990369, "grad_norm": 1.7649589776992798, "learning_rate": 9.712631578947369e-05, "loss": 0.4775, "step": 30934 }, { "epoch": 1.7322768507111659, "grad_norm": 1.309889316558838, "learning_rate": 9.712605263157895e-05, "loss": 0.5041, "step": 30935 }, { "epoch": 1.732332848023295, "grad_norm": 1.0974708795547485, "learning_rate": 9.712578947368421e-05, "loss": 0.4333, "step": 30936 }, { "epoch": 1.732388845335424, "grad_norm": 1.4677408933639526, "learning_rate": 9.712552631578948e-05, "loss": 0.5098, "step": 30937 }, { "epoch": 1.732444842647553, "grad_norm": 1.055536150932312, "learning_rate": 9.712526315789474e-05, "loss": 0.3425, "step": 30938 }, { "epoch": 1.732500839959682, "grad_norm": 1.0858639478683472, "learning_rate": 9.7125e-05, "loss": 0.4275, "step": 30939 }, { "epoch": 1.732556837271811, "grad_norm": 1.2390285730361938, "learning_rate": 9.712473684210526e-05, "loss": 0.5106, "step": 30940 }, { "epoch": 1.73261283458394, "grad_norm": 1.2363107204437256, "learning_rate": 9.712447368421052e-05, "loss": 0.3923, "step": 30941 }, { "epoch": 1.732668831896069, "grad_norm": 1.291670322418213, "learning_rate": 9.71242105263158e-05, "loss": 0.4968, "step": 30942 }, { "epoch": 1.732724829208198, "grad_norm": 1.3040119409561157, "learning_rate": 9.712394736842106e-05, "loss": 0.53, "step": 30943 }, { "epoch": 1.732780826520327, "grad_norm": 1.4760606288909912, "learning_rate": 9.712368421052633e-05, "loss": 0.6047, "step": 30944 }, { "epoch": 1.732836823832456, "grad_norm": 1.2323769330978394, "learning_rate": 9.712342105263157e-05, "loss": 0.401, "step": 30945 }, { "epoch": 1.732892821144585, "grad_norm": 1.1582738161087036, "learning_rate": 9.712315789473685e-05, "loss": 0.4747, "step": 30946 }, { "epoch": 1.7329488184567141, "grad_norm": 1.2419651746749878, "learning_rate": 9.712289473684211e-05, "loss": 0.5276, "step": 30947 }, { "epoch": 1.7330048157688431, "grad_norm": 1.2945401668548584, "learning_rate": 9.712263157894738e-05, "loss": 0.4313, "step": 30948 }, { "epoch": 1.7330608130809722, "grad_norm": 1.580261468887329, "learning_rate": 9.712236842105264e-05, "loss": 0.4265, "step": 30949 }, { "epoch": 1.7331168103931012, "grad_norm": 1.2586312294006348, "learning_rate": 9.71221052631579e-05, "loss": 0.4663, "step": 30950 }, { "epoch": 1.7331728077052302, "grad_norm": 2.0549309253692627, "learning_rate": 9.712184210526316e-05, "loss": 0.4111, "step": 30951 }, { "epoch": 1.7332288050173592, "grad_norm": 1.2690858840942383, "learning_rate": 9.712157894736843e-05, "loss": 0.4778, "step": 30952 }, { "epoch": 1.7332848023294882, "grad_norm": 1.483779788017273, "learning_rate": 9.712131578947369e-05, "loss": 0.5175, "step": 30953 }, { "epoch": 1.7333407996416172, "grad_norm": 1.263841152191162, "learning_rate": 9.712105263157895e-05, "loss": 0.4895, "step": 30954 }, { "epoch": 1.7333967969537463, "grad_norm": 1.1458534002304077, "learning_rate": 9.712078947368421e-05, "loss": 0.4272, "step": 30955 }, { "epoch": 1.7334527942658753, "grad_norm": 1.2037091255187988, "learning_rate": 9.712052631578947e-05, "loss": 0.3797, "step": 30956 }, { "epoch": 1.7335087915780043, "grad_norm": 1.631466269493103, "learning_rate": 9.712026315789475e-05, "loss": 0.6362, "step": 30957 }, { "epoch": 1.7335647888901333, "grad_norm": 1.3013267517089844, "learning_rate": 9.712e-05, "loss": 0.5013, "step": 30958 }, { "epoch": 1.7336207862022623, "grad_norm": 1.4832836389541626, "learning_rate": 9.711973684210526e-05, "loss": 0.5935, "step": 30959 }, { "epoch": 1.7336767835143914, "grad_norm": 1.1693936586380005, "learning_rate": 9.711947368421052e-05, "loss": 0.3655, "step": 30960 }, { "epoch": 1.7337327808265204, "grad_norm": 1.3943352699279785, "learning_rate": 9.71192105263158e-05, "loss": 0.5045, "step": 30961 }, { "epoch": 1.7337887781386494, "grad_norm": 1.4526394605636597, "learning_rate": 9.711894736842106e-05, "loss": 0.4856, "step": 30962 }, { "epoch": 1.7338447754507782, "grad_norm": 1.68649423122406, "learning_rate": 9.711868421052632e-05, "loss": 0.4439, "step": 30963 }, { "epoch": 1.7339007727629072, "grad_norm": 1.2721911668777466, "learning_rate": 9.711842105263158e-05, "loss": 0.5415, "step": 30964 }, { "epoch": 1.7339567700750362, "grad_norm": 1.460144281387329, "learning_rate": 9.711815789473685e-05, "loss": 0.552, "step": 30965 }, { "epoch": 1.7340127673871653, "grad_norm": 1.6503968238830566, "learning_rate": 9.711789473684211e-05, "loss": 0.6655, "step": 30966 }, { "epoch": 1.7340687646992943, "grad_norm": 1.4066368341445923, "learning_rate": 9.711763157894738e-05, "loss": 0.5155, "step": 30967 }, { "epoch": 1.7341247620114233, "grad_norm": 1.1384308338165283, "learning_rate": 9.711736842105263e-05, "loss": 0.4905, "step": 30968 }, { "epoch": 1.7341807593235523, "grad_norm": 1.749942660331726, "learning_rate": 9.71171052631579e-05, "loss": 0.5055, "step": 30969 }, { "epoch": 1.7342367566356813, "grad_norm": 1.4464553594589233, "learning_rate": 9.711684210526316e-05, "loss": 0.452, "step": 30970 }, { "epoch": 1.7342927539478104, "grad_norm": 1.3154737949371338, "learning_rate": 9.711657894736844e-05, "loss": 0.4312, "step": 30971 }, { "epoch": 1.7343487512599394, "grad_norm": 1.425906777381897, "learning_rate": 9.711631578947368e-05, "loss": 0.615, "step": 30972 }, { "epoch": 1.7344047485720684, "grad_norm": 1.1637827157974243, "learning_rate": 9.711605263157894e-05, "loss": 0.4332, "step": 30973 }, { "epoch": 1.7344607458841974, "grad_norm": 1.2237335443496704, "learning_rate": 9.711578947368422e-05, "loss": 0.3171, "step": 30974 }, { "epoch": 1.7345167431963264, "grad_norm": 1.2054741382598877, "learning_rate": 9.711552631578947e-05, "loss": 0.4881, "step": 30975 }, { "epoch": 1.7345727405084554, "grad_norm": 1.2754313945770264, "learning_rate": 9.711526315789475e-05, "loss": 0.4256, "step": 30976 }, { "epoch": 1.7346287378205845, "grad_norm": 1.0154242515563965, "learning_rate": 9.7115e-05, "loss": 0.3056, "step": 30977 }, { "epoch": 1.7346847351327135, "grad_norm": 1.235609531402588, "learning_rate": 9.711473684210527e-05, "loss": 0.459, "step": 30978 }, { "epoch": 1.7347407324448425, "grad_norm": 1.4229402542114258, "learning_rate": 9.711447368421053e-05, "loss": 0.4944, "step": 30979 }, { "epoch": 1.7347967297569715, "grad_norm": 1.2148220539093018, "learning_rate": 9.71142105263158e-05, "loss": 0.5224, "step": 30980 }, { "epoch": 1.7348527270691005, "grad_norm": 1.2697978019714355, "learning_rate": 9.711394736842106e-05, "loss": 0.3654, "step": 30981 }, { "epoch": 1.7349087243812296, "grad_norm": 1.1354392766952515, "learning_rate": 9.711368421052632e-05, "loss": 0.4228, "step": 30982 }, { "epoch": 1.7349647216933586, "grad_norm": 1.4376211166381836, "learning_rate": 9.711342105263158e-05, "loss": 0.4614, "step": 30983 }, { "epoch": 1.7350207190054876, "grad_norm": 1.1127361059188843, "learning_rate": 9.711315789473685e-05, "loss": 0.3225, "step": 30984 }, { "epoch": 1.7350767163176166, "grad_norm": 1.2308012247085571, "learning_rate": 9.711289473684211e-05, "loss": 0.4746, "step": 30985 }, { "epoch": 1.7351327136297456, "grad_norm": 1.4758014678955078, "learning_rate": 9.711263157894737e-05, "loss": 0.4164, "step": 30986 }, { "epoch": 1.7351887109418747, "grad_norm": 6.427015781402588, "learning_rate": 9.711236842105263e-05, "loss": 0.5271, "step": 30987 }, { "epoch": 1.7352447082540037, "grad_norm": 1.6755577325820923, "learning_rate": 9.71121052631579e-05, "loss": 0.5436, "step": 30988 }, { "epoch": 1.7353007055661327, "grad_norm": 1.2384088039398193, "learning_rate": 9.711184210526317e-05, "loss": 0.3598, "step": 30989 }, { "epoch": 1.7353567028782617, "grad_norm": 1.1982392072677612, "learning_rate": 9.711157894736842e-05, "loss": 0.3884, "step": 30990 }, { "epoch": 1.7354127001903907, "grad_norm": 1.1121225357055664, "learning_rate": 9.711131578947368e-05, "loss": 0.4059, "step": 30991 }, { "epoch": 1.7354686975025198, "grad_norm": 1.5421463251113892, "learning_rate": 9.711105263157894e-05, "loss": 0.395, "step": 30992 }, { "epoch": 1.7355246948146488, "grad_norm": 1.4363654851913452, "learning_rate": 9.711078947368422e-05, "loss": 0.5026, "step": 30993 }, { "epoch": 1.7355806921267778, "grad_norm": 1.8139078617095947, "learning_rate": 9.711052631578948e-05, "loss": 0.6181, "step": 30994 }, { "epoch": 1.7356366894389068, "grad_norm": 1.1919149160385132, "learning_rate": 9.711026315789474e-05, "loss": 0.4262, "step": 30995 }, { "epoch": 1.7356926867510358, "grad_norm": 1.5225441455841064, "learning_rate": 9.711e-05, "loss": 0.5237, "step": 30996 }, { "epoch": 1.7357486840631648, "grad_norm": 1.6076759099960327, "learning_rate": 9.710973684210527e-05, "loss": 0.4948, "step": 30997 }, { "epoch": 1.7358046813752939, "grad_norm": 1.1972556114196777, "learning_rate": 9.710947368421053e-05, "loss": 0.3819, "step": 30998 }, { "epoch": 1.7358606786874229, "grad_norm": 1.541182041168213, "learning_rate": 9.71092105263158e-05, "loss": 0.5491, "step": 30999 }, { "epoch": 1.735916675999552, "grad_norm": 1.5775507688522339, "learning_rate": 9.710894736842105e-05, "loss": 0.5367, "step": 31000 }, { "epoch": 1.735972673311681, "grad_norm": 1.3196219205856323, "learning_rate": 9.710868421052632e-05, "loss": 0.508, "step": 31001 }, { "epoch": 1.73602867062381, "grad_norm": 1.4850828647613525, "learning_rate": 9.710842105263158e-05, "loss": 0.5157, "step": 31002 }, { "epoch": 1.736084667935939, "grad_norm": 1.3666611909866333, "learning_rate": 9.710815789473686e-05, "loss": 0.5597, "step": 31003 }, { "epoch": 1.736140665248068, "grad_norm": 1.231253981590271, "learning_rate": 9.710789473684212e-05, "loss": 0.4525, "step": 31004 }, { "epoch": 1.736196662560197, "grad_norm": 1.0799951553344727, "learning_rate": 9.710763157894738e-05, "loss": 0.387, "step": 31005 }, { "epoch": 1.736252659872326, "grad_norm": 1.2046945095062256, "learning_rate": 9.710736842105263e-05, "loss": 0.4121, "step": 31006 }, { "epoch": 1.736308657184455, "grad_norm": 1.1753010749816895, "learning_rate": 9.710710526315791e-05, "loss": 0.3987, "step": 31007 }, { "epoch": 1.736364654496584, "grad_norm": 1.2389849424362183, "learning_rate": 9.710684210526317e-05, "loss": 0.5673, "step": 31008 }, { "epoch": 1.736420651808713, "grad_norm": 1.3578776121139526, "learning_rate": 9.710657894736841e-05, "loss": 0.4629, "step": 31009 }, { "epoch": 1.736476649120842, "grad_norm": 1.1498032808303833, "learning_rate": 9.710631578947369e-05, "loss": 0.5048, "step": 31010 }, { "epoch": 1.7365326464329711, "grad_norm": 1.311370611190796, "learning_rate": 9.710605263157895e-05, "loss": 0.4367, "step": 31011 }, { "epoch": 1.7365886437451001, "grad_norm": 1.094753623008728, "learning_rate": 9.710578947368422e-05, "loss": 0.4911, "step": 31012 }, { "epoch": 1.7366446410572292, "grad_norm": 1.3457807302474976, "learning_rate": 9.710552631578948e-05, "loss": 0.3923, "step": 31013 }, { "epoch": 1.7367006383693582, "grad_norm": 1.321081280708313, "learning_rate": 9.710526315789474e-05, "loss": 0.402, "step": 31014 }, { "epoch": 1.7367566356814872, "grad_norm": 1.2904243469238281, "learning_rate": 9.7105e-05, "loss": 0.4171, "step": 31015 }, { "epoch": 1.7368126329936162, "grad_norm": 1.4380868673324585, "learning_rate": 9.710473684210527e-05, "loss": 0.3509, "step": 31016 }, { "epoch": 1.7368686303057452, "grad_norm": 1.2378318309783936, "learning_rate": 9.710447368421053e-05, "loss": 0.4268, "step": 31017 }, { "epoch": 1.7369246276178743, "grad_norm": 1.30984628200531, "learning_rate": 9.710421052631579e-05, "loss": 0.4519, "step": 31018 }, { "epoch": 1.7369806249300033, "grad_norm": 1.7147924900054932, "learning_rate": 9.710394736842105e-05, "loss": 0.536, "step": 31019 }, { "epoch": 1.7370366222421323, "grad_norm": 1.2327755689620972, "learning_rate": 9.710368421052633e-05, "loss": 0.4454, "step": 31020 }, { "epoch": 1.7370926195542613, "grad_norm": 1.0665234327316284, "learning_rate": 9.710342105263158e-05, "loss": 0.3624, "step": 31021 }, { "epoch": 1.7371486168663903, "grad_norm": 1.1231725215911865, "learning_rate": 9.710315789473684e-05, "loss": 0.398, "step": 31022 }, { "epoch": 1.7372046141785193, "grad_norm": 1.2381103038787842, "learning_rate": 9.71028947368421e-05, "loss": 0.445, "step": 31023 }, { "epoch": 1.7372606114906484, "grad_norm": 1.2365427017211914, "learning_rate": 9.710263157894738e-05, "loss": 0.4542, "step": 31024 }, { "epoch": 1.7373166088027774, "grad_norm": 1.264270305633545, "learning_rate": 9.710236842105264e-05, "loss": 0.5108, "step": 31025 }, { "epoch": 1.7373726061149064, "grad_norm": 1.1615774631500244, "learning_rate": 9.71021052631579e-05, "loss": 0.4658, "step": 31026 }, { "epoch": 1.7374286034270354, "grad_norm": 1.5205755233764648, "learning_rate": 9.710184210526316e-05, "loss": 0.7027, "step": 31027 }, { "epoch": 1.7374846007391644, "grad_norm": 1.4298266172409058, "learning_rate": 9.710157894736842e-05, "loss": 0.4847, "step": 31028 }, { "epoch": 1.7375405980512935, "grad_norm": 1.3484586477279663, "learning_rate": 9.710131578947369e-05, "loss": 0.5279, "step": 31029 }, { "epoch": 1.7375965953634225, "grad_norm": 1.2147866487503052, "learning_rate": 9.710105263157895e-05, "loss": 0.4323, "step": 31030 }, { "epoch": 1.7376525926755515, "grad_norm": 1.224869966506958, "learning_rate": 9.710078947368422e-05, "loss": 0.5024, "step": 31031 }, { "epoch": 1.7377085899876805, "grad_norm": 1.0645188093185425, "learning_rate": 9.710052631578947e-05, "loss": 0.5958, "step": 31032 }, { "epoch": 1.7377645872998095, "grad_norm": 1.4227310419082642, "learning_rate": 9.710026315789474e-05, "loss": 0.4599, "step": 31033 }, { "epoch": 1.7378205846119386, "grad_norm": 1.4048056602478027, "learning_rate": 9.71e-05, "loss": 0.4401, "step": 31034 }, { "epoch": 1.7378765819240676, "grad_norm": 1.2840206623077393, "learning_rate": 9.709973684210528e-05, "loss": 0.4556, "step": 31035 }, { "epoch": 1.7379325792361966, "grad_norm": 1.3036516904830933, "learning_rate": 9.709947368421054e-05, "loss": 0.4384, "step": 31036 }, { "epoch": 1.7379885765483256, "grad_norm": 1.2935075759887695, "learning_rate": 9.70992105263158e-05, "loss": 0.4812, "step": 31037 }, { "epoch": 1.7380445738604546, "grad_norm": 1.361349105834961, "learning_rate": 9.709894736842105e-05, "loss": 0.42, "step": 31038 }, { "epoch": 1.7381005711725837, "grad_norm": 1.3498167991638184, "learning_rate": 9.709868421052633e-05, "loss": 0.458, "step": 31039 }, { "epoch": 1.7381565684847127, "grad_norm": 1.1751586198806763, "learning_rate": 9.709842105263159e-05, "loss": 0.4148, "step": 31040 }, { "epoch": 1.7382125657968417, "grad_norm": 1.1283135414123535, "learning_rate": 9.709815789473685e-05, "loss": 0.2819, "step": 31041 }, { "epoch": 1.7382685631089707, "grad_norm": 1.1883455514907837, "learning_rate": 9.709789473684211e-05, "loss": 0.4182, "step": 31042 }, { "epoch": 1.7383245604210997, "grad_norm": 1.396022081375122, "learning_rate": 9.709763157894737e-05, "loss": 0.4821, "step": 31043 }, { "epoch": 1.7383805577332287, "grad_norm": 1.1936545372009277, "learning_rate": 9.709736842105264e-05, "loss": 0.4972, "step": 31044 }, { "epoch": 1.7384365550453578, "grad_norm": 1.2382044792175293, "learning_rate": 9.70971052631579e-05, "loss": 0.5357, "step": 31045 }, { "epoch": 1.7384925523574868, "grad_norm": 1.3015687465667725, "learning_rate": 9.709684210526316e-05, "loss": 0.4523, "step": 31046 }, { "epoch": 1.7385485496696158, "grad_norm": 1.3992559909820557, "learning_rate": 9.709657894736842e-05, "loss": 0.3672, "step": 31047 }, { "epoch": 1.7386045469817448, "grad_norm": 1.282843828201294, "learning_rate": 9.709631578947369e-05, "loss": 0.4421, "step": 31048 }, { "epoch": 1.7386605442938738, "grad_norm": 1.4199289083480835, "learning_rate": 9.709605263157895e-05, "loss": 0.5129, "step": 31049 }, { "epoch": 1.7387165416060029, "grad_norm": 0.9760697484016418, "learning_rate": 9.709578947368421e-05, "loss": 0.3753, "step": 31050 }, { "epoch": 1.7387725389181319, "grad_norm": 1.2729848623275757, "learning_rate": 9.709552631578947e-05, "loss": 0.4728, "step": 31051 }, { "epoch": 1.738828536230261, "grad_norm": 1.1899892091751099, "learning_rate": 9.709526315789474e-05, "loss": 0.4492, "step": 31052 }, { "epoch": 1.73888453354239, "grad_norm": 1.205531358718872, "learning_rate": 9.7095e-05, "loss": 0.5223, "step": 31053 }, { "epoch": 1.738940530854519, "grad_norm": 1.3818005323410034, "learning_rate": 9.709473684210528e-05, "loss": 0.3802, "step": 31054 }, { "epoch": 1.738996528166648, "grad_norm": 1.2059881687164307, "learning_rate": 9.709447368421052e-05, "loss": 0.4753, "step": 31055 }, { "epoch": 1.739052525478777, "grad_norm": 1.1895244121551514, "learning_rate": 9.70942105263158e-05, "loss": 0.4158, "step": 31056 }, { "epoch": 1.739108522790906, "grad_norm": 1.4219253063201904, "learning_rate": 9.709394736842106e-05, "loss": 0.398, "step": 31057 }, { "epoch": 1.739164520103035, "grad_norm": 1.3253235816955566, "learning_rate": 9.709368421052633e-05, "loss": 0.5903, "step": 31058 }, { "epoch": 1.739220517415164, "grad_norm": 1.251280665397644, "learning_rate": 9.709342105263159e-05, "loss": 0.484, "step": 31059 }, { "epoch": 1.739276514727293, "grad_norm": 1.4518582820892334, "learning_rate": 9.709315789473684e-05, "loss": 0.5893, "step": 31060 }, { "epoch": 1.739332512039422, "grad_norm": 1.418822169303894, "learning_rate": 9.709289473684211e-05, "loss": 0.5994, "step": 31061 }, { "epoch": 1.739388509351551, "grad_norm": 1.2382903099060059, "learning_rate": 9.709263157894737e-05, "loss": 0.4333, "step": 31062 }, { "epoch": 1.7394445066636801, "grad_norm": 1.23009192943573, "learning_rate": 9.709236842105264e-05, "loss": 0.5559, "step": 31063 }, { "epoch": 1.7395005039758091, "grad_norm": 1.3467671871185303, "learning_rate": 9.709210526315789e-05, "loss": 0.441, "step": 31064 }, { "epoch": 1.7395565012879382, "grad_norm": 1.2569304704666138, "learning_rate": 9.709184210526316e-05, "loss": 0.4839, "step": 31065 }, { "epoch": 1.7396124986000672, "grad_norm": 1.2604433298110962, "learning_rate": 9.709157894736842e-05, "loss": 0.4684, "step": 31066 }, { "epoch": 1.7396684959121962, "grad_norm": 1.1469084024429321, "learning_rate": 9.70913157894737e-05, "loss": 0.4653, "step": 31067 }, { "epoch": 1.7397244932243252, "grad_norm": 1.407639503479004, "learning_rate": 9.709105263157895e-05, "loss": 0.4579, "step": 31068 }, { "epoch": 1.7397804905364542, "grad_norm": 1.30097496509552, "learning_rate": 9.709078947368421e-05, "loss": 0.6514, "step": 31069 }, { "epoch": 1.7398364878485832, "grad_norm": 1.5303374528884888, "learning_rate": 9.709052631578947e-05, "loss": 0.477, "step": 31070 }, { "epoch": 1.7398924851607123, "grad_norm": 1.188430905342102, "learning_rate": 9.709026315789475e-05, "loss": 0.4511, "step": 31071 }, { "epoch": 1.7399484824728413, "grad_norm": 1.0792704820632935, "learning_rate": 9.709000000000001e-05, "loss": 0.3328, "step": 31072 }, { "epoch": 1.7400044797849703, "grad_norm": 1.2438716888427734, "learning_rate": 9.708973684210527e-05, "loss": 0.3821, "step": 31073 }, { "epoch": 1.7400604770970993, "grad_norm": 1.2731200456619263, "learning_rate": 9.708947368421053e-05, "loss": 0.4448, "step": 31074 }, { "epoch": 1.7401164744092283, "grad_norm": 1.2387651205062866, "learning_rate": 9.70892105263158e-05, "loss": 0.4412, "step": 31075 }, { "epoch": 1.7401724717213574, "grad_norm": 1.2949508428573608, "learning_rate": 9.708894736842106e-05, "loss": 0.47, "step": 31076 }, { "epoch": 1.7402284690334864, "grad_norm": 1.228320598602295, "learning_rate": 9.708868421052632e-05, "loss": 0.408, "step": 31077 }, { "epoch": 1.7402844663456154, "grad_norm": 1.0673197507858276, "learning_rate": 9.708842105263158e-05, "loss": 0.3355, "step": 31078 }, { "epoch": 1.7403404636577444, "grad_norm": 1.6339677572250366, "learning_rate": 9.708815789473684e-05, "loss": 0.5258, "step": 31079 }, { "epoch": 1.7403964609698734, "grad_norm": 1.0141102075576782, "learning_rate": 9.708789473684211e-05, "loss": 0.3434, "step": 31080 }, { "epoch": 1.7404524582820025, "grad_norm": 1.1632977724075317, "learning_rate": 9.708763157894737e-05, "loss": 0.4494, "step": 31081 }, { "epoch": 1.7405084555941315, "grad_norm": 1.386582374572754, "learning_rate": 9.708736842105263e-05, "loss": 0.5816, "step": 31082 }, { "epoch": 1.7405644529062605, "grad_norm": 1.2117961645126343, "learning_rate": 9.708710526315789e-05, "loss": 0.3829, "step": 31083 }, { "epoch": 1.7406204502183895, "grad_norm": 1.2344613075256348, "learning_rate": 9.708684210526316e-05, "loss": 0.6839, "step": 31084 }, { "epoch": 1.7406764475305185, "grad_norm": 1.557342529296875, "learning_rate": 9.708657894736842e-05, "loss": 0.6371, "step": 31085 }, { "epoch": 1.7407324448426476, "grad_norm": 1.478472352027893, "learning_rate": 9.70863157894737e-05, "loss": 0.4407, "step": 31086 }, { "epoch": 1.7407884421547766, "grad_norm": 1.3969019651412964, "learning_rate": 9.708605263157894e-05, "loss": 0.4745, "step": 31087 }, { "epoch": 1.7408444394669056, "grad_norm": 1.1291042566299438, "learning_rate": 9.708578947368422e-05, "loss": 0.3991, "step": 31088 }, { "epoch": 1.7409004367790346, "grad_norm": 1.3396730422973633, "learning_rate": 9.708552631578948e-05, "loss": 0.4702, "step": 31089 }, { "epoch": 1.7409564340911636, "grad_norm": 1.233237385749817, "learning_rate": 9.708526315789475e-05, "loss": 0.4785, "step": 31090 }, { "epoch": 1.7410124314032926, "grad_norm": 1.2310079336166382, "learning_rate": 9.708500000000001e-05, "loss": 0.3986, "step": 31091 }, { "epoch": 1.7410684287154217, "grad_norm": 1.2281066179275513, "learning_rate": 9.708473684210527e-05, "loss": 0.3578, "step": 31092 }, { "epoch": 1.7411244260275507, "grad_norm": 1.6963021755218506, "learning_rate": 9.708447368421053e-05, "loss": 0.7621, "step": 31093 }, { "epoch": 1.7411804233396797, "grad_norm": 1.2462592124938965, "learning_rate": 9.708421052631579e-05, "loss": 0.5078, "step": 31094 }, { "epoch": 1.7412364206518087, "grad_norm": 1.1407694816589355, "learning_rate": 9.708394736842106e-05, "loss": 0.464, "step": 31095 }, { "epoch": 1.7412924179639377, "grad_norm": 1.5024384260177612, "learning_rate": 9.708368421052632e-05, "loss": 0.354, "step": 31096 }, { "epoch": 1.7413484152760668, "grad_norm": 1.2973623275756836, "learning_rate": 9.708342105263158e-05, "loss": 0.3644, "step": 31097 }, { "epoch": 1.7414044125881958, "grad_norm": 1.157495141029358, "learning_rate": 9.708315789473684e-05, "loss": 0.4756, "step": 31098 }, { "epoch": 1.7414604099003248, "grad_norm": 1.5629620552062988, "learning_rate": 9.708289473684211e-05, "loss": 0.5234, "step": 31099 }, { "epoch": 1.7415164072124538, "grad_norm": 1.3443243503570557, "learning_rate": 9.708263157894737e-05, "loss": 0.456, "step": 31100 }, { "epoch": 1.7415724045245828, "grad_norm": 1.2040119171142578, "learning_rate": 9.708236842105263e-05, "loss": 0.3734, "step": 31101 }, { "epoch": 1.7416284018367119, "grad_norm": 1.5180034637451172, "learning_rate": 9.70821052631579e-05, "loss": 0.4813, "step": 31102 }, { "epoch": 1.7416843991488409, "grad_norm": 1.3192877769470215, "learning_rate": 9.708184210526317e-05, "loss": 0.4648, "step": 31103 }, { "epoch": 1.74174039646097, "grad_norm": 1.2469027042388916, "learning_rate": 9.708157894736843e-05, "loss": 0.4622, "step": 31104 }, { "epoch": 1.741796393773099, "grad_norm": 1.5461679697036743, "learning_rate": 9.708131578947369e-05, "loss": 0.3631, "step": 31105 }, { "epoch": 1.741852391085228, "grad_norm": 1.1623367071151733, "learning_rate": 9.708105263157895e-05, "loss": 0.5347, "step": 31106 }, { "epoch": 1.741908388397357, "grad_norm": 1.2339093685150146, "learning_rate": 9.708078947368422e-05, "loss": 0.4605, "step": 31107 }, { "epoch": 1.741964385709486, "grad_norm": 1.197686791419983, "learning_rate": 9.708052631578948e-05, "loss": 0.5984, "step": 31108 }, { "epoch": 1.742020383021615, "grad_norm": 1.1987754106521606, "learning_rate": 9.708026315789475e-05, "loss": 0.3691, "step": 31109 }, { "epoch": 1.742076380333744, "grad_norm": 1.3697737455368042, "learning_rate": 9.708e-05, "loss": 0.446, "step": 31110 }, { "epoch": 1.742132377645873, "grad_norm": 1.2985669374465942, "learning_rate": 9.707973684210527e-05, "loss": 0.4275, "step": 31111 }, { "epoch": 1.742188374958002, "grad_norm": 1.3137775659561157, "learning_rate": 9.707947368421053e-05, "loss": 0.4253, "step": 31112 }, { "epoch": 1.742244372270131, "grad_norm": 1.1247609853744507, "learning_rate": 9.707921052631579e-05, "loss": 0.4312, "step": 31113 }, { "epoch": 1.74230036958226, "grad_norm": 1.3682801723480225, "learning_rate": 9.707894736842106e-05, "loss": 0.5176, "step": 31114 }, { "epoch": 1.742356366894389, "grad_norm": 1.2950327396392822, "learning_rate": 9.707868421052631e-05, "loss": 0.4822, "step": 31115 }, { "epoch": 1.7424123642065181, "grad_norm": 1.2705243825912476, "learning_rate": 9.707842105263158e-05, "loss": 0.4913, "step": 31116 }, { "epoch": 1.7424683615186471, "grad_norm": 1.3573381900787354, "learning_rate": 9.707815789473684e-05, "loss": 0.4824, "step": 31117 }, { "epoch": 1.7425243588307762, "grad_norm": 1.2273638248443604, "learning_rate": 9.707789473684212e-05, "loss": 0.4039, "step": 31118 }, { "epoch": 1.7425803561429052, "grad_norm": 1.455743670463562, "learning_rate": 9.707763157894736e-05, "loss": 0.4769, "step": 31119 }, { "epoch": 1.7426363534550342, "grad_norm": 1.5062799453735352, "learning_rate": 9.707736842105264e-05, "loss": 0.3431, "step": 31120 }, { "epoch": 1.7426923507671632, "grad_norm": 1.1785157918930054, "learning_rate": 9.70771052631579e-05, "loss": 0.6031, "step": 31121 }, { "epoch": 1.7427483480792922, "grad_norm": 1.2476261854171753, "learning_rate": 9.707684210526317e-05, "loss": 0.3797, "step": 31122 }, { "epoch": 1.7428043453914213, "grad_norm": 1.257611870765686, "learning_rate": 9.707657894736843e-05, "loss": 0.4104, "step": 31123 }, { "epoch": 1.7428603427035503, "grad_norm": 1.2240301370620728, "learning_rate": 9.707631578947369e-05, "loss": 0.4853, "step": 31124 }, { "epoch": 1.7429163400156793, "grad_norm": 1.4875454902648926, "learning_rate": 9.707605263157895e-05, "loss": 0.4447, "step": 31125 }, { "epoch": 1.7429723373278083, "grad_norm": 1.1892359256744385, "learning_rate": 9.707578947368422e-05, "loss": 0.4327, "step": 31126 }, { "epoch": 1.7430283346399373, "grad_norm": 1.1689327955245972, "learning_rate": 9.707552631578948e-05, "loss": 0.3766, "step": 31127 }, { "epoch": 1.7430843319520664, "grad_norm": 1.5304299592971802, "learning_rate": 9.707526315789474e-05, "loss": 0.6333, "step": 31128 }, { "epoch": 1.7431403292641954, "grad_norm": 1.2294846773147583, "learning_rate": 9.7075e-05, "loss": 0.4744, "step": 31129 }, { "epoch": 1.7431963265763244, "grad_norm": 1.062276005744934, "learning_rate": 9.707473684210526e-05, "loss": 0.3565, "step": 31130 }, { "epoch": 1.7432523238884534, "grad_norm": 1.1798160076141357, "learning_rate": 9.707447368421053e-05, "loss": 0.3753, "step": 31131 }, { "epoch": 1.7433083212005824, "grad_norm": 1.4706741571426392, "learning_rate": 9.70742105263158e-05, "loss": 0.6998, "step": 31132 }, { "epoch": 1.7433643185127115, "grad_norm": 1.335087776184082, "learning_rate": 9.707394736842105e-05, "loss": 0.4379, "step": 31133 }, { "epoch": 1.7434203158248405, "grad_norm": 1.7656105756759644, "learning_rate": 9.707368421052631e-05, "loss": 0.4847, "step": 31134 }, { "epoch": 1.7434763131369695, "grad_norm": 1.0609756708145142, "learning_rate": 9.707342105263159e-05, "loss": 0.3443, "step": 31135 }, { "epoch": 1.7435323104490985, "grad_norm": 1.1438069343566895, "learning_rate": 9.707315789473685e-05, "loss": 0.4442, "step": 31136 }, { "epoch": 1.7435883077612275, "grad_norm": 1.1084822416305542, "learning_rate": 9.70728947368421e-05, "loss": 0.4142, "step": 31137 }, { "epoch": 1.7436443050733565, "grad_norm": 1.147504210472107, "learning_rate": 9.707263157894737e-05, "loss": 0.3977, "step": 31138 }, { "epoch": 1.7437003023854856, "grad_norm": 1.682477355003357, "learning_rate": 9.707236842105264e-05, "loss": 0.4326, "step": 31139 }, { "epoch": 1.7437562996976146, "grad_norm": 1.200075387954712, "learning_rate": 9.70721052631579e-05, "loss": 0.3948, "step": 31140 }, { "epoch": 1.7438122970097436, "grad_norm": 1.2038331031799316, "learning_rate": 9.707184210526317e-05, "loss": 0.3892, "step": 31141 }, { "epoch": 1.7438682943218726, "grad_norm": 1.3430256843566895, "learning_rate": 9.707157894736842e-05, "loss": 0.499, "step": 31142 }, { "epoch": 1.7439242916340016, "grad_norm": 1.3392972946166992, "learning_rate": 9.707131578947369e-05, "loss": 0.5093, "step": 31143 }, { "epoch": 1.7439802889461307, "grad_norm": 1.2368052005767822, "learning_rate": 9.707105263157895e-05, "loss": 0.3795, "step": 31144 }, { "epoch": 1.7440362862582597, "grad_norm": 1.3254647254943848, "learning_rate": 9.707078947368422e-05, "loss": 0.41, "step": 31145 }, { "epoch": 1.7440922835703887, "grad_norm": 1.5145714282989502, "learning_rate": 9.707052631578948e-05, "loss": 0.4794, "step": 31146 }, { "epoch": 1.7441482808825177, "grad_norm": 1.277005910873413, "learning_rate": 9.707026315789473e-05, "loss": 0.4421, "step": 31147 }, { "epoch": 1.7442042781946467, "grad_norm": 1.0948920249938965, "learning_rate": 9.707e-05, "loss": 0.3123, "step": 31148 }, { "epoch": 1.7442602755067758, "grad_norm": 1.3008092641830444, "learning_rate": 9.706973684210526e-05, "loss": 0.5022, "step": 31149 }, { "epoch": 1.7443162728189048, "grad_norm": 1.3784140348434448, "learning_rate": 9.706947368421054e-05, "loss": 0.4771, "step": 31150 }, { "epoch": 1.7443722701310338, "grad_norm": 1.32255220413208, "learning_rate": 9.70692105263158e-05, "loss": 0.4237, "step": 31151 }, { "epoch": 1.7444282674431628, "grad_norm": 1.2068743705749512, "learning_rate": 9.706894736842106e-05, "loss": 0.4902, "step": 31152 }, { "epoch": 1.7444842647552918, "grad_norm": 1.5408138036727905, "learning_rate": 9.706868421052632e-05, "loss": 0.5773, "step": 31153 }, { "epoch": 1.7445402620674209, "grad_norm": 2.509183883666992, "learning_rate": 9.706842105263159e-05, "loss": 0.5349, "step": 31154 }, { "epoch": 1.7445962593795499, "grad_norm": 1.1979178190231323, "learning_rate": 9.706815789473685e-05, "loss": 0.3273, "step": 31155 }, { "epoch": 1.744652256691679, "grad_norm": 1.3609654903411865, "learning_rate": 9.706789473684211e-05, "loss": 0.4675, "step": 31156 }, { "epoch": 1.744708254003808, "grad_norm": 1.5484447479248047, "learning_rate": 9.706763157894737e-05, "loss": 0.5337, "step": 31157 }, { "epoch": 1.744764251315937, "grad_norm": 1.31448495388031, "learning_rate": 9.706736842105264e-05, "loss": 0.4891, "step": 31158 }, { "epoch": 1.744820248628066, "grad_norm": 1.3487775325775146, "learning_rate": 9.70671052631579e-05, "loss": 0.553, "step": 31159 }, { "epoch": 1.744876245940195, "grad_norm": 1.3651710748672485, "learning_rate": 9.706684210526316e-05, "loss": 0.6034, "step": 31160 }, { "epoch": 1.744932243252324, "grad_norm": 1.2114300727844238, "learning_rate": 9.706657894736842e-05, "loss": 0.3901, "step": 31161 }, { "epoch": 1.744988240564453, "grad_norm": 1.1772379875183105, "learning_rate": 9.70663157894737e-05, "loss": 0.4121, "step": 31162 }, { "epoch": 1.745044237876582, "grad_norm": 1.1749367713928223, "learning_rate": 9.706605263157895e-05, "loss": 0.4912, "step": 31163 }, { "epoch": 1.745100235188711, "grad_norm": 1.1842727661132812, "learning_rate": 9.706578947368421e-05, "loss": 0.4274, "step": 31164 }, { "epoch": 1.74515623250084, "grad_norm": 1.3142340183258057, "learning_rate": 9.706552631578947e-05, "loss": 0.5152, "step": 31165 }, { "epoch": 1.745212229812969, "grad_norm": 1.4865119457244873, "learning_rate": 9.706526315789473e-05, "loss": 0.486, "step": 31166 }, { "epoch": 1.745268227125098, "grad_norm": 1.3705570697784424, "learning_rate": 9.7065e-05, "loss": 0.5238, "step": 31167 }, { "epoch": 1.7453242244372271, "grad_norm": 1.315129041671753, "learning_rate": 9.706473684210527e-05, "loss": 0.5359, "step": 31168 }, { "epoch": 1.7453802217493561, "grad_norm": 1.2519727945327759, "learning_rate": 9.706447368421054e-05, "loss": 0.3968, "step": 31169 }, { "epoch": 1.7454362190614852, "grad_norm": 1.2539435625076294, "learning_rate": 9.706421052631579e-05, "loss": 0.3755, "step": 31170 }, { "epoch": 1.7454922163736142, "grad_norm": 1.0977662801742554, "learning_rate": 9.706394736842106e-05, "loss": 0.4074, "step": 31171 }, { "epoch": 1.7455482136857432, "grad_norm": 1.293164849281311, "learning_rate": 9.706368421052632e-05, "loss": 0.3691, "step": 31172 }, { "epoch": 1.7456042109978722, "grad_norm": 1.315238356590271, "learning_rate": 9.706342105263159e-05, "loss": 0.4713, "step": 31173 }, { "epoch": 1.7456602083100012, "grad_norm": 1.282739520072937, "learning_rate": 9.706315789473684e-05, "loss": 0.3478, "step": 31174 }, { "epoch": 1.7457162056221303, "grad_norm": 1.2086079120635986, "learning_rate": 9.706289473684211e-05, "loss": 0.5047, "step": 31175 }, { "epoch": 1.7457722029342593, "grad_norm": 2.1796000003814697, "learning_rate": 9.706263157894737e-05, "loss": 0.5282, "step": 31176 }, { "epoch": 1.7458282002463883, "grad_norm": 1.298432469367981, "learning_rate": 9.706236842105264e-05, "loss": 0.4878, "step": 31177 }, { "epoch": 1.7458841975585173, "grad_norm": 1.4929630756378174, "learning_rate": 9.70621052631579e-05, "loss": 0.505, "step": 31178 }, { "epoch": 1.7459401948706463, "grad_norm": 1.1581120491027832, "learning_rate": 9.706184210526316e-05, "loss": 0.3978, "step": 31179 }, { "epoch": 1.7459961921827754, "grad_norm": 1.0985528230667114, "learning_rate": 9.706157894736842e-05, "loss": 0.4282, "step": 31180 }, { "epoch": 1.7460521894949044, "grad_norm": 1.657619595527649, "learning_rate": 9.706131578947368e-05, "loss": 0.7524, "step": 31181 }, { "epoch": 1.7461081868070334, "grad_norm": 1.113402009010315, "learning_rate": 9.706105263157896e-05, "loss": 0.3417, "step": 31182 }, { "epoch": 1.7461641841191624, "grad_norm": 1.1263939142227173, "learning_rate": 9.706078947368422e-05, "loss": 0.4483, "step": 31183 }, { "epoch": 1.7462201814312914, "grad_norm": 1.3888018131256104, "learning_rate": 9.706052631578948e-05, "loss": 0.6088, "step": 31184 }, { "epoch": 1.7462761787434204, "grad_norm": 1.1755828857421875, "learning_rate": 9.706026315789474e-05, "loss": 0.423, "step": 31185 }, { "epoch": 1.7463321760555495, "grad_norm": 1.1729487180709839, "learning_rate": 9.706000000000001e-05, "loss": 0.4509, "step": 31186 }, { "epoch": 1.7463881733676785, "grad_norm": 1.492467999458313, "learning_rate": 9.705973684210527e-05, "loss": 0.5417, "step": 31187 }, { "epoch": 1.7464441706798075, "grad_norm": 1.4126147031784058, "learning_rate": 9.705947368421053e-05, "loss": 0.5848, "step": 31188 }, { "epoch": 1.7465001679919365, "grad_norm": 1.270298719406128, "learning_rate": 9.705921052631579e-05, "loss": 0.473, "step": 31189 }, { "epoch": 1.7465561653040655, "grad_norm": 1.3339388370513916, "learning_rate": 9.705894736842106e-05, "loss": 0.5668, "step": 31190 }, { "epoch": 1.7466121626161946, "grad_norm": 1.4679417610168457, "learning_rate": 9.705868421052632e-05, "loss": 0.6043, "step": 31191 }, { "epoch": 1.7466681599283236, "grad_norm": 1.3213615417480469, "learning_rate": 9.705842105263158e-05, "loss": 0.4504, "step": 31192 }, { "epoch": 1.7467241572404526, "grad_norm": 1.5918020009994507, "learning_rate": 9.705815789473684e-05, "loss": 0.5611, "step": 31193 }, { "epoch": 1.7467801545525816, "grad_norm": 1.4029831886291504, "learning_rate": 9.705789473684211e-05, "loss": 0.3647, "step": 31194 }, { "epoch": 1.7468361518647106, "grad_norm": 1.5055946111679077, "learning_rate": 9.705763157894737e-05, "loss": 0.5016, "step": 31195 }, { "epoch": 1.7468921491768397, "grad_norm": 1.0396732091903687, "learning_rate": 9.705736842105265e-05, "loss": 0.3701, "step": 31196 }, { "epoch": 1.7469481464889687, "grad_norm": 1.2922356128692627, "learning_rate": 9.70571052631579e-05, "loss": 0.5646, "step": 31197 }, { "epoch": 1.7470041438010977, "grad_norm": 1.3413194417953491, "learning_rate": 9.705684210526315e-05, "loss": 0.4088, "step": 31198 }, { "epoch": 1.7470601411132267, "grad_norm": 1.216722846031189, "learning_rate": 9.705657894736843e-05, "loss": 0.5008, "step": 31199 }, { "epoch": 1.7471161384253557, "grad_norm": 1.3155068159103394, "learning_rate": 9.705631578947369e-05, "loss": 0.4174, "step": 31200 }, { "epoch": 1.7471721357374848, "grad_norm": 1.207533836364746, "learning_rate": 9.705605263157896e-05, "loss": 0.5191, "step": 31201 }, { "epoch": 1.7472281330496138, "grad_norm": 1.6117228269577026, "learning_rate": 9.70557894736842e-05, "loss": 0.5692, "step": 31202 }, { "epoch": 1.7472841303617428, "grad_norm": 1.204028844833374, "learning_rate": 9.705552631578948e-05, "loss": 0.4536, "step": 31203 }, { "epoch": 1.7473401276738718, "grad_norm": 1.4153550863265991, "learning_rate": 9.705526315789474e-05, "loss": 0.438, "step": 31204 }, { "epoch": 1.7473961249860008, "grad_norm": 1.072204828262329, "learning_rate": 9.705500000000001e-05, "loss": 0.3561, "step": 31205 }, { "epoch": 1.7474521222981299, "grad_norm": 1.1738938093185425, "learning_rate": 9.705473684210527e-05, "loss": 0.411, "step": 31206 }, { "epoch": 1.7475081196102589, "grad_norm": 1.049439549446106, "learning_rate": 9.705447368421053e-05, "loss": 0.3586, "step": 31207 }, { "epoch": 1.747564116922388, "grad_norm": 1.2313178777694702, "learning_rate": 9.705421052631579e-05, "loss": 0.4807, "step": 31208 }, { "epoch": 1.747620114234517, "grad_norm": 1.2798324823379517, "learning_rate": 9.705394736842106e-05, "loss": 0.3847, "step": 31209 }, { "epoch": 1.747676111546646, "grad_norm": 1.2703906297683716, "learning_rate": 9.705368421052632e-05, "loss": 0.4997, "step": 31210 }, { "epoch": 1.7477321088587747, "grad_norm": 1.3671642541885376, "learning_rate": 9.705342105263158e-05, "loss": 0.4343, "step": 31211 }, { "epoch": 1.7477881061709037, "grad_norm": 1.385200023651123, "learning_rate": 9.705315789473684e-05, "loss": 0.478, "step": 31212 }, { "epoch": 1.7478441034830328, "grad_norm": 1.0814567804336548, "learning_rate": 9.705289473684212e-05, "loss": 0.3617, "step": 31213 }, { "epoch": 1.7479001007951618, "grad_norm": 1.0792593955993652, "learning_rate": 9.705263157894738e-05, "loss": 0.3795, "step": 31214 }, { "epoch": 1.7479560981072908, "grad_norm": 1.7391676902770996, "learning_rate": 9.705236842105264e-05, "loss": 0.4514, "step": 31215 }, { "epoch": 1.7480120954194198, "grad_norm": 1.5550775527954102, "learning_rate": 9.70521052631579e-05, "loss": 0.6263, "step": 31216 }, { "epoch": 1.7480680927315488, "grad_norm": 1.538326382637024, "learning_rate": 9.705184210526316e-05, "loss": 0.5031, "step": 31217 }, { "epoch": 1.7481240900436779, "grad_norm": 1.129977822303772, "learning_rate": 9.705157894736843e-05, "loss": 0.3993, "step": 31218 }, { "epoch": 1.7481800873558069, "grad_norm": 1.1480122804641724, "learning_rate": 9.705131578947369e-05, "loss": 0.4013, "step": 31219 }, { "epoch": 1.748236084667936, "grad_norm": 1.175819754600525, "learning_rate": 9.705105263157895e-05, "loss": 0.3819, "step": 31220 }, { "epoch": 1.748292081980065, "grad_norm": 1.3679624795913696, "learning_rate": 9.705078947368421e-05, "loss": 0.442, "step": 31221 }, { "epoch": 1.748348079292194, "grad_norm": 1.1933364868164062, "learning_rate": 9.705052631578948e-05, "loss": 0.4248, "step": 31222 }, { "epoch": 1.748404076604323, "grad_norm": 1.593381404876709, "learning_rate": 9.705026315789474e-05, "loss": 0.3883, "step": 31223 }, { "epoch": 1.748460073916452, "grad_norm": 1.7808988094329834, "learning_rate": 9.705e-05, "loss": 0.517, "step": 31224 }, { "epoch": 1.748516071228581, "grad_norm": 1.6892181634902954, "learning_rate": 9.704973684210526e-05, "loss": 0.5065, "step": 31225 }, { "epoch": 1.74857206854071, "grad_norm": 1.612975001335144, "learning_rate": 9.704947368421053e-05, "loss": 0.5153, "step": 31226 }, { "epoch": 1.748628065852839, "grad_norm": 1.4767022132873535, "learning_rate": 9.70492105263158e-05, "loss": 0.4598, "step": 31227 }, { "epoch": 1.748684063164968, "grad_norm": 1.2587836980819702, "learning_rate": 9.704894736842107e-05, "loss": 0.4505, "step": 31228 }, { "epoch": 1.748740060477097, "grad_norm": 1.2305335998535156, "learning_rate": 9.704868421052631e-05, "loss": 0.4602, "step": 31229 }, { "epoch": 1.748796057789226, "grad_norm": 1.1922690868377686, "learning_rate": 9.704842105263159e-05, "loss": 0.5075, "step": 31230 }, { "epoch": 1.748852055101355, "grad_norm": 1.8206361532211304, "learning_rate": 9.704815789473685e-05, "loss": 0.482, "step": 31231 }, { "epoch": 1.7489080524134841, "grad_norm": 1.375199556350708, "learning_rate": 9.704789473684212e-05, "loss": 0.4182, "step": 31232 }, { "epoch": 1.7489640497256131, "grad_norm": 1.3915208578109741, "learning_rate": 9.704763157894738e-05, "loss": 0.4135, "step": 31233 }, { "epoch": 1.7490200470377422, "grad_norm": 1.5195192098617554, "learning_rate": 9.704736842105263e-05, "loss": 0.4485, "step": 31234 }, { "epoch": 1.7490760443498712, "grad_norm": 1.5032994747161865, "learning_rate": 9.70471052631579e-05, "loss": 0.4478, "step": 31235 }, { "epoch": 1.7491320416620002, "grad_norm": 1.0254724025726318, "learning_rate": 9.704684210526316e-05, "loss": 0.3381, "step": 31236 }, { "epoch": 1.7491880389741292, "grad_norm": 1.2206236124038696, "learning_rate": 9.704657894736843e-05, "loss": 0.4974, "step": 31237 }, { "epoch": 1.7492440362862582, "grad_norm": 1.2313346862792969, "learning_rate": 9.704631578947369e-05, "loss": 0.3825, "step": 31238 }, { "epoch": 1.7493000335983873, "grad_norm": 1.2586702108383179, "learning_rate": 9.704605263157895e-05, "loss": 0.3527, "step": 31239 }, { "epoch": 1.7493560309105163, "grad_norm": 1.4547477960586548, "learning_rate": 9.704578947368421e-05, "loss": 0.5049, "step": 31240 }, { "epoch": 1.7494120282226453, "grad_norm": 1.1846121549606323, "learning_rate": 9.704552631578948e-05, "loss": 0.3511, "step": 31241 }, { "epoch": 1.7494680255347743, "grad_norm": 1.0702062845230103, "learning_rate": 9.704526315789474e-05, "loss": 0.3378, "step": 31242 }, { "epoch": 1.7495240228469033, "grad_norm": 1.2872754335403442, "learning_rate": 9.7045e-05, "loss": 0.3932, "step": 31243 }, { "epoch": 1.7495800201590324, "grad_norm": 1.1747994422912598, "learning_rate": 9.704473684210526e-05, "loss": 0.4573, "step": 31244 }, { "epoch": 1.7496360174711614, "grad_norm": 1.3304767608642578, "learning_rate": 9.704447368421054e-05, "loss": 0.4008, "step": 31245 }, { "epoch": 1.7496920147832904, "grad_norm": 1.310982584953308, "learning_rate": 9.70442105263158e-05, "loss": 0.4587, "step": 31246 }, { "epoch": 1.7497480120954194, "grad_norm": 1.4259971380233765, "learning_rate": 9.704394736842106e-05, "loss": 0.4796, "step": 31247 }, { "epoch": 1.7498040094075484, "grad_norm": 1.1854883432388306, "learning_rate": 9.704368421052632e-05, "loss": 0.5095, "step": 31248 }, { "epoch": 1.7498600067196775, "grad_norm": 1.2379854917526245, "learning_rate": 9.704342105263159e-05, "loss": 0.4267, "step": 31249 }, { "epoch": 1.7499160040318065, "grad_norm": 1.5443087816238403, "learning_rate": 9.704315789473685e-05, "loss": 0.6044, "step": 31250 }, { "epoch": 1.7499720013439355, "grad_norm": 1.7336145639419556, "learning_rate": 9.704289473684211e-05, "loss": 0.5171, "step": 31251 }, { "epoch": 1.7500279986560645, "grad_norm": 1.352027177810669, "learning_rate": 9.704263157894737e-05, "loss": 0.5536, "step": 31252 }, { "epoch": 1.7500839959681935, "grad_norm": 1.3359984159469604, "learning_rate": 9.704236842105263e-05, "loss": 0.4543, "step": 31253 }, { "epoch": 1.7501399932803225, "grad_norm": 1.0503405332565308, "learning_rate": 9.70421052631579e-05, "loss": 0.3971, "step": 31254 }, { "epoch": 1.7501959905924516, "grad_norm": 1.1917916536331177, "learning_rate": 9.704184210526316e-05, "loss": 0.3951, "step": 31255 }, { "epoch": 1.7502519879045806, "grad_norm": 1.2114452123641968, "learning_rate": 9.704157894736843e-05, "loss": 0.4025, "step": 31256 }, { "epoch": 1.7503079852167096, "grad_norm": 1.156714677810669, "learning_rate": 9.704131578947368e-05, "loss": 0.4568, "step": 31257 }, { "epoch": 1.7503639825288386, "grad_norm": 1.41642165184021, "learning_rate": 9.704105263157895e-05, "loss": 0.5325, "step": 31258 }, { "epoch": 1.7504199798409676, "grad_norm": 1.2985272407531738, "learning_rate": 9.704078947368421e-05, "loss": 0.4317, "step": 31259 }, { "epoch": 1.7504759771530967, "grad_norm": 1.1710045337677002, "learning_rate": 9.704052631578949e-05, "loss": 0.4385, "step": 31260 }, { "epoch": 1.7505319744652257, "grad_norm": 1.4962447881698608, "learning_rate": 9.704026315789475e-05, "loss": 0.4705, "step": 31261 }, { "epoch": 1.7505879717773547, "grad_norm": 1.2892462015151978, "learning_rate": 9.704e-05, "loss": 0.4027, "step": 31262 }, { "epoch": 1.7506439690894837, "grad_norm": 1.217913031578064, "learning_rate": 9.703973684210527e-05, "loss": 0.5367, "step": 31263 }, { "epoch": 1.7506999664016127, "grad_norm": 1.1747641563415527, "learning_rate": 9.703947368421054e-05, "loss": 0.4703, "step": 31264 }, { "epoch": 1.7507559637137418, "grad_norm": 1.229805588722229, "learning_rate": 9.70392105263158e-05, "loss": 0.3878, "step": 31265 }, { "epoch": 1.7508119610258708, "grad_norm": 1.64512038230896, "learning_rate": 9.703894736842106e-05, "loss": 0.5059, "step": 31266 }, { "epoch": 1.7508679583379998, "grad_norm": 1.3795711994171143, "learning_rate": 9.703868421052632e-05, "loss": 0.59, "step": 31267 }, { "epoch": 1.7509239556501288, "grad_norm": 1.3502846956253052, "learning_rate": 9.703842105263158e-05, "loss": 0.5191, "step": 31268 }, { "epoch": 1.7509799529622578, "grad_norm": 1.1269354820251465, "learning_rate": 9.703815789473685e-05, "loss": 0.4288, "step": 31269 }, { "epoch": 1.7510359502743869, "grad_norm": 1.2391060590744019, "learning_rate": 9.703789473684211e-05, "loss": 0.3725, "step": 31270 }, { "epoch": 1.7510919475865159, "grad_norm": 1.2322804927825928, "learning_rate": 9.703763157894737e-05, "loss": 0.5085, "step": 31271 }, { "epoch": 1.751147944898645, "grad_norm": 1.2614835500717163, "learning_rate": 9.703736842105263e-05, "loss": 0.4143, "step": 31272 }, { "epoch": 1.751203942210774, "grad_norm": 1.2989394664764404, "learning_rate": 9.70371052631579e-05, "loss": 0.5407, "step": 31273 }, { "epoch": 1.751259939522903, "grad_norm": 1.300110101699829, "learning_rate": 9.703684210526316e-05, "loss": 0.5379, "step": 31274 }, { "epoch": 1.751315936835032, "grad_norm": 1.4079710245132446, "learning_rate": 9.703657894736842e-05, "loss": 0.4106, "step": 31275 }, { "epoch": 1.751371934147161, "grad_norm": 1.0841021537780762, "learning_rate": 9.703631578947368e-05, "loss": 0.3562, "step": 31276 }, { "epoch": 1.75142793145929, "grad_norm": 41.36603927612305, "learning_rate": 9.703605263157896e-05, "loss": 0.4181, "step": 31277 }, { "epoch": 1.751483928771419, "grad_norm": 1.1688511371612549, "learning_rate": 9.703578947368422e-05, "loss": 0.3864, "step": 31278 }, { "epoch": 1.751539926083548, "grad_norm": 1.1154115200042725, "learning_rate": 9.703552631578948e-05, "loss": 0.3512, "step": 31279 }, { "epoch": 1.751595923395677, "grad_norm": 1.5570732355117798, "learning_rate": 9.703526315789474e-05, "loss": 0.5818, "step": 31280 }, { "epoch": 1.751651920707806, "grad_norm": 1.2714945077896118, "learning_rate": 9.703500000000001e-05, "loss": 0.3954, "step": 31281 }, { "epoch": 1.751707918019935, "grad_norm": 1.1105530261993408, "learning_rate": 9.703473684210527e-05, "loss": 0.4644, "step": 31282 }, { "epoch": 1.751763915332064, "grad_norm": 1.3542370796203613, "learning_rate": 9.703447368421054e-05, "loss": 0.4844, "step": 31283 }, { "epoch": 1.7518199126441931, "grad_norm": 1.2920011281967163, "learning_rate": 9.703421052631579e-05, "loss": 0.3696, "step": 31284 }, { "epoch": 1.7518759099563221, "grad_norm": 1.2199699878692627, "learning_rate": 9.703394736842105e-05, "loss": 0.3776, "step": 31285 }, { "epoch": 1.7519319072684512, "grad_norm": 1.2068777084350586, "learning_rate": 9.703368421052632e-05, "loss": 0.4286, "step": 31286 }, { "epoch": 1.7519879045805802, "grad_norm": 1.2400809526443481, "learning_rate": 9.703342105263158e-05, "loss": 0.4208, "step": 31287 }, { "epoch": 1.7520439018927092, "grad_norm": 1.454540491104126, "learning_rate": 9.703315789473685e-05, "loss": 0.5424, "step": 31288 }, { "epoch": 1.7520998992048382, "grad_norm": 1.419374942779541, "learning_rate": 9.70328947368421e-05, "loss": 0.4436, "step": 31289 }, { "epoch": 1.7521558965169672, "grad_norm": 1.0190922021865845, "learning_rate": 9.703263157894737e-05, "loss": 0.4034, "step": 31290 }, { "epoch": 1.7522118938290963, "grad_norm": 1.2758339643478394, "learning_rate": 9.703236842105263e-05, "loss": 0.5161, "step": 31291 }, { "epoch": 1.7522678911412253, "grad_norm": 1.1510674953460693, "learning_rate": 9.70321052631579e-05, "loss": 0.4194, "step": 31292 }, { "epoch": 1.752323888453354, "grad_norm": 1.3816848993301392, "learning_rate": 9.703184210526317e-05, "loss": 0.5782, "step": 31293 }, { "epoch": 1.752379885765483, "grad_norm": 1.1880531311035156, "learning_rate": 9.703157894736843e-05, "loss": 0.4683, "step": 31294 }, { "epoch": 1.752435883077612, "grad_norm": 1.1447632312774658, "learning_rate": 9.703131578947369e-05, "loss": 0.5645, "step": 31295 }, { "epoch": 1.7524918803897411, "grad_norm": 1.6359468698501587, "learning_rate": 9.703105263157896e-05, "loss": 0.6439, "step": 31296 }, { "epoch": 1.7525478777018701, "grad_norm": 1.5488758087158203, "learning_rate": 9.703078947368422e-05, "loss": 0.4205, "step": 31297 }, { "epoch": 1.7526038750139992, "grad_norm": 1.2353792190551758, "learning_rate": 9.703052631578948e-05, "loss": 0.3828, "step": 31298 }, { "epoch": 1.7526598723261282, "grad_norm": 1.3690663576126099, "learning_rate": 9.703026315789474e-05, "loss": 0.4517, "step": 31299 }, { "epoch": 1.7527158696382572, "grad_norm": 5.144920825958252, "learning_rate": 9.703000000000001e-05, "loss": 0.5015, "step": 31300 }, { "epoch": 1.7527718669503862, "grad_norm": 1.2079733610153198, "learning_rate": 9.702973684210527e-05, "loss": 0.5603, "step": 31301 }, { "epoch": 1.7528278642625152, "grad_norm": 1.635956048965454, "learning_rate": 9.702947368421053e-05, "loss": 0.438, "step": 31302 }, { "epoch": 1.7528838615746443, "grad_norm": 1.3473219871520996, "learning_rate": 9.702921052631579e-05, "loss": 0.4354, "step": 31303 }, { "epoch": 1.7529398588867733, "grad_norm": 1.3026502132415771, "learning_rate": 9.702894736842105e-05, "loss": 0.3847, "step": 31304 }, { "epoch": 1.7529958561989023, "grad_norm": 1.169182300567627, "learning_rate": 9.702868421052632e-05, "loss": 0.4121, "step": 31305 }, { "epoch": 1.7530518535110313, "grad_norm": 0.9932965040206909, "learning_rate": 9.702842105263158e-05, "loss": 0.3512, "step": 31306 }, { "epoch": 1.7531078508231603, "grad_norm": 1.2884613275527954, "learning_rate": 9.702815789473684e-05, "loss": 0.373, "step": 31307 }, { "epoch": 1.7531638481352894, "grad_norm": 1.0677106380462646, "learning_rate": 9.70278947368421e-05, "loss": 0.3906, "step": 31308 }, { "epoch": 1.7532198454474184, "grad_norm": 1.265514612197876, "learning_rate": 9.702763157894738e-05, "loss": 0.6163, "step": 31309 }, { "epoch": 1.7532758427595474, "grad_norm": 1.2068476676940918, "learning_rate": 9.702736842105264e-05, "loss": 0.3683, "step": 31310 }, { "epoch": 1.7533318400716764, "grad_norm": 1.3675965070724487, "learning_rate": 9.702710526315791e-05, "loss": 0.4875, "step": 31311 }, { "epoch": 1.7533878373838054, "grad_norm": 1.547469139099121, "learning_rate": 9.702684210526316e-05, "loss": 0.4268, "step": 31312 }, { "epoch": 1.7534438346959345, "grad_norm": 1.2892094850540161, "learning_rate": 9.702657894736843e-05, "loss": 0.6642, "step": 31313 }, { "epoch": 1.7534998320080635, "grad_norm": 1.1886186599731445, "learning_rate": 9.702631578947369e-05, "loss": 0.4739, "step": 31314 }, { "epoch": 1.7535558293201925, "grad_norm": 1.0835310220718384, "learning_rate": 9.702605263157896e-05, "loss": 0.4139, "step": 31315 }, { "epoch": 1.7536118266323215, "grad_norm": 1.205816388130188, "learning_rate": 9.702578947368422e-05, "loss": 0.4162, "step": 31316 }, { "epoch": 1.7536678239444505, "grad_norm": 1.6776663064956665, "learning_rate": 9.702552631578948e-05, "loss": 0.4073, "step": 31317 }, { "epoch": 1.7537238212565796, "grad_norm": 4.810730934143066, "learning_rate": 9.702526315789474e-05, "loss": 0.4364, "step": 31318 }, { "epoch": 1.7537798185687086, "grad_norm": 1.4386926889419556, "learning_rate": 9.7025e-05, "loss": 0.4643, "step": 31319 }, { "epoch": 1.7538358158808376, "grad_norm": 1.1380045413970947, "learning_rate": 9.702473684210527e-05, "loss": 0.4737, "step": 31320 }, { "epoch": 1.7538918131929666, "grad_norm": 1.463484764099121, "learning_rate": 9.702447368421052e-05, "loss": 0.3943, "step": 31321 }, { "epoch": 1.7539478105050956, "grad_norm": 1.4343619346618652, "learning_rate": 9.702421052631579e-05, "loss": 0.3837, "step": 31322 }, { "epoch": 1.7540038078172246, "grad_norm": 1.1751424074172974, "learning_rate": 9.702394736842105e-05, "loss": 0.397, "step": 31323 }, { "epoch": 1.7540598051293537, "grad_norm": 1.3465496301651, "learning_rate": 9.702368421052633e-05, "loss": 0.5806, "step": 31324 }, { "epoch": 1.7541158024414827, "grad_norm": 1.3034881353378296, "learning_rate": 9.702342105263159e-05, "loss": 0.4874, "step": 31325 }, { "epoch": 1.7541717997536117, "grad_norm": 1.4874991178512573, "learning_rate": 9.702315789473685e-05, "loss": 0.492, "step": 31326 }, { "epoch": 1.7542277970657407, "grad_norm": 1.558430552482605, "learning_rate": 9.70228947368421e-05, "loss": 0.5006, "step": 31327 }, { "epoch": 1.7542837943778697, "grad_norm": 1.315670132637024, "learning_rate": 9.702263157894738e-05, "loss": 0.5163, "step": 31328 }, { "epoch": 1.7543397916899988, "grad_norm": 1.3746200799942017, "learning_rate": 9.702236842105264e-05, "loss": 0.4011, "step": 31329 }, { "epoch": 1.7543957890021278, "grad_norm": 1.3804941177368164, "learning_rate": 9.70221052631579e-05, "loss": 0.5946, "step": 31330 }, { "epoch": 1.7544517863142568, "grad_norm": 1.374734878540039, "learning_rate": 9.702184210526316e-05, "loss": 0.3677, "step": 31331 }, { "epoch": 1.7545077836263858, "grad_norm": 1.3483905792236328, "learning_rate": 9.702157894736843e-05, "loss": 0.5115, "step": 31332 }, { "epoch": 1.7545637809385148, "grad_norm": 1.217339277267456, "learning_rate": 9.702131578947369e-05, "loss": 0.4084, "step": 31333 }, { "epoch": 1.7546197782506439, "grad_norm": 1.4455842971801758, "learning_rate": 9.702105263157895e-05, "loss": 0.5797, "step": 31334 }, { "epoch": 1.7546757755627729, "grad_norm": 1.247218370437622, "learning_rate": 9.702078947368421e-05, "loss": 0.3732, "step": 31335 }, { "epoch": 1.754731772874902, "grad_norm": 1.3854154348373413, "learning_rate": 9.702052631578947e-05, "loss": 0.5129, "step": 31336 }, { "epoch": 1.754787770187031, "grad_norm": 1.4090889692306519, "learning_rate": 9.702026315789474e-05, "loss": 0.5575, "step": 31337 }, { "epoch": 1.75484376749916, "grad_norm": 1.278238296508789, "learning_rate": 9.702e-05, "loss": 0.5516, "step": 31338 }, { "epoch": 1.754899764811289, "grad_norm": 1.2656220197677612, "learning_rate": 9.701973684210526e-05, "loss": 0.4156, "step": 31339 }, { "epoch": 1.754955762123418, "grad_norm": 1.3355220556259155, "learning_rate": 9.701947368421052e-05, "loss": 0.4433, "step": 31340 }, { "epoch": 1.755011759435547, "grad_norm": 1.4254103899002075, "learning_rate": 9.70192105263158e-05, "loss": 0.4262, "step": 31341 }, { "epoch": 1.755067756747676, "grad_norm": 1.6784453392028809, "learning_rate": 9.701894736842106e-05, "loss": 0.5253, "step": 31342 }, { "epoch": 1.755123754059805, "grad_norm": 1.319821834564209, "learning_rate": 9.701868421052633e-05, "loss": 0.592, "step": 31343 }, { "epoch": 1.755179751371934, "grad_norm": 1.1004695892333984, "learning_rate": 9.701842105263157e-05, "loss": 0.3536, "step": 31344 }, { "epoch": 1.755235748684063, "grad_norm": 1.2576149702072144, "learning_rate": 9.701815789473685e-05, "loss": 0.4572, "step": 31345 }, { "epoch": 1.755291745996192, "grad_norm": 1.5715019702911377, "learning_rate": 9.701789473684211e-05, "loss": 0.4802, "step": 31346 }, { "epoch": 1.755347743308321, "grad_norm": 1.1008951663970947, "learning_rate": 9.701763157894738e-05, "loss": 0.4025, "step": 31347 }, { "epoch": 1.7554037406204501, "grad_norm": 1.357969045639038, "learning_rate": 9.701736842105264e-05, "loss": 0.386, "step": 31348 }, { "epoch": 1.7554597379325791, "grad_norm": 1.2820796966552734, "learning_rate": 9.70171052631579e-05, "loss": 0.4113, "step": 31349 }, { "epoch": 1.7555157352447082, "grad_norm": 1.2205705642700195, "learning_rate": 9.701684210526316e-05, "loss": 0.4281, "step": 31350 }, { "epoch": 1.7555717325568372, "grad_norm": 1.1640247106552124, "learning_rate": 9.701657894736843e-05, "loss": 0.4124, "step": 31351 }, { "epoch": 1.7556277298689662, "grad_norm": 1.018678903579712, "learning_rate": 9.70163157894737e-05, "loss": 0.3908, "step": 31352 }, { "epoch": 1.7556837271810952, "grad_norm": 1.4704970121383667, "learning_rate": 9.701605263157895e-05, "loss": 0.4808, "step": 31353 }, { "epoch": 1.7557397244932242, "grad_norm": 1.1616512537002563, "learning_rate": 9.701578947368421e-05, "loss": 0.3945, "step": 31354 }, { "epoch": 1.7557957218053533, "grad_norm": 1.2916069030761719, "learning_rate": 9.701552631578947e-05, "loss": 0.4441, "step": 31355 }, { "epoch": 1.7558517191174823, "grad_norm": 1.2617357969284058, "learning_rate": 9.701526315789475e-05, "loss": 0.4459, "step": 31356 }, { "epoch": 1.7559077164296113, "grad_norm": 1.7488200664520264, "learning_rate": 9.7015e-05, "loss": 0.5532, "step": 31357 }, { "epoch": 1.7559637137417403, "grad_norm": 1.1443363428115845, "learning_rate": 9.701473684210527e-05, "loss": 0.3739, "step": 31358 }, { "epoch": 1.7560197110538693, "grad_norm": 1.3983464241027832, "learning_rate": 9.701447368421052e-05, "loss": 0.4532, "step": 31359 }, { "epoch": 1.7560757083659984, "grad_norm": 1.4170891046524048, "learning_rate": 9.70142105263158e-05, "loss": 0.4413, "step": 31360 }, { "epoch": 1.7561317056781274, "grad_norm": 1.5757806301116943, "learning_rate": 9.701394736842106e-05, "loss": 0.5593, "step": 31361 }, { "epoch": 1.7561877029902564, "grad_norm": 1.6156803369522095, "learning_rate": 9.701368421052632e-05, "loss": 0.456, "step": 31362 }, { "epoch": 1.7562437003023854, "grad_norm": 1.2452237606048584, "learning_rate": 9.701342105263158e-05, "loss": 0.3817, "step": 31363 }, { "epoch": 1.7562996976145144, "grad_norm": 1.4902656078338623, "learning_rate": 9.701315789473685e-05, "loss": 0.5332, "step": 31364 }, { "epoch": 1.7563556949266435, "grad_norm": 1.3083916902542114, "learning_rate": 9.701289473684211e-05, "loss": 0.5033, "step": 31365 }, { "epoch": 1.7564116922387725, "grad_norm": 1.261059284210205, "learning_rate": 9.701263157894738e-05, "loss": 0.5927, "step": 31366 }, { "epoch": 1.7564676895509015, "grad_norm": 1.3278404474258423, "learning_rate": 9.701236842105263e-05, "loss": 0.3786, "step": 31367 }, { "epoch": 1.7565236868630305, "grad_norm": 1.5977632999420166, "learning_rate": 9.70121052631579e-05, "loss": 0.4335, "step": 31368 }, { "epoch": 1.7565796841751595, "grad_norm": 1.1443051099777222, "learning_rate": 9.701184210526316e-05, "loss": 0.4152, "step": 31369 }, { "epoch": 1.7566356814872885, "grad_norm": 1.3411881923675537, "learning_rate": 9.701157894736844e-05, "loss": 0.443, "step": 31370 }, { "epoch": 1.7566916787994176, "grad_norm": 1.2317705154418945, "learning_rate": 9.701131578947368e-05, "loss": 0.4129, "step": 31371 }, { "epoch": 1.7567476761115466, "grad_norm": 1.2970744371414185, "learning_rate": 9.701105263157894e-05, "loss": 0.5434, "step": 31372 }, { "epoch": 1.7568036734236756, "grad_norm": 1.1788445711135864, "learning_rate": 9.701078947368422e-05, "loss": 0.3618, "step": 31373 }, { "epoch": 1.7568596707358046, "grad_norm": 1.4097920656204224, "learning_rate": 9.701052631578948e-05, "loss": 0.513, "step": 31374 }, { "epoch": 1.7569156680479336, "grad_norm": 1.1962306499481201, "learning_rate": 9.701026315789475e-05, "loss": 0.5054, "step": 31375 }, { "epoch": 1.7569716653600627, "grad_norm": 1.5471967458724976, "learning_rate": 9.701e-05, "loss": 0.4983, "step": 31376 }, { "epoch": 1.7570276626721917, "grad_norm": 1.1592347621917725, "learning_rate": 9.700973684210527e-05, "loss": 0.6716, "step": 31377 }, { "epoch": 1.7570836599843207, "grad_norm": 1.21383535861969, "learning_rate": 9.700947368421053e-05, "loss": 0.4102, "step": 31378 }, { "epoch": 1.7571396572964497, "grad_norm": 1.246001958847046, "learning_rate": 9.70092105263158e-05, "loss": 0.4585, "step": 31379 }, { "epoch": 1.7571956546085787, "grad_norm": 1.214787244796753, "learning_rate": 9.700894736842106e-05, "loss": 0.3326, "step": 31380 }, { "epoch": 1.7572516519207078, "grad_norm": 1.2083290815353394, "learning_rate": 9.700868421052632e-05, "loss": 0.3861, "step": 31381 }, { "epoch": 1.7573076492328368, "grad_norm": 1.724625825881958, "learning_rate": 9.700842105263158e-05, "loss": 0.7243, "step": 31382 }, { "epoch": 1.7573636465449658, "grad_norm": 1.2969224452972412, "learning_rate": 9.700815789473685e-05, "loss": 0.561, "step": 31383 }, { "epoch": 1.7574196438570948, "grad_norm": 1.1480454206466675, "learning_rate": 9.700789473684211e-05, "loss": 0.5084, "step": 31384 }, { "epoch": 1.7574756411692238, "grad_norm": 1.3230129480361938, "learning_rate": 9.700763157894737e-05, "loss": 0.5435, "step": 31385 }, { "epoch": 1.7575316384813529, "grad_norm": 1.4467577934265137, "learning_rate": 9.700736842105263e-05, "loss": 0.5748, "step": 31386 }, { "epoch": 1.7575876357934819, "grad_norm": 1.3462480306625366, "learning_rate": 9.70071052631579e-05, "loss": 0.429, "step": 31387 }, { "epoch": 1.757643633105611, "grad_norm": 1.230965495109558, "learning_rate": 9.700684210526317e-05, "loss": 0.5786, "step": 31388 }, { "epoch": 1.75769963041774, "grad_norm": 1.4086639881134033, "learning_rate": 9.700657894736843e-05, "loss": 0.4782, "step": 31389 }, { "epoch": 1.757755627729869, "grad_norm": 1.478774905204773, "learning_rate": 9.700631578947368e-05, "loss": 0.3529, "step": 31390 }, { "epoch": 1.757811625041998, "grad_norm": 1.061188817024231, "learning_rate": 9.700605263157894e-05, "loss": 0.3758, "step": 31391 }, { "epoch": 1.757867622354127, "grad_norm": 1.043335199356079, "learning_rate": 9.700578947368422e-05, "loss": 0.435, "step": 31392 }, { "epoch": 1.757923619666256, "grad_norm": 1.258521318435669, "learning_rate": 9.700552631578948e-05, "loss": 0.3658, "step": 31393 }, { "epoch": 1.757979616978385, "grad_norm": 4.0094757080078125, "learning_rate": 9.700526315789474e-05, "loss": 0.3804, "step": 31394 }, { "epoch": 1.758035614290514, "grad_norm": 1.100940227508545, "learning_rate": 9.7005e-05, "loss": 0.3954, "step": 31395 }, { "epoch": 1.758091611602643, "grad_norm": 1.1431269645690918, "learning_rate": 9.700473684210527e-05, "loss": 0.3382, "step": 31396 }, { "epoch": 1.758147608914772, "grad_norm": 1.1409684419631958, "learning_rate": 9.700447368421053e-05, "loss": 0.5586, "step": 31397 }, { "epoch": 1.758203606226901, "grad_norm": 1.3768917322158813, "learning_rate": 9.70042105263158e-05, "loss": 0.4479, "step": 31398 }, { "epoch": 1.75825960353903, "grad_norm": 1.5339504480361938, "learning_rate": 9.700394736842105e-05, "loss": 0.4439, "step": 31399 }, { "epoch": 1.7583156008511591, "grad_norm": 1.4344311952590942, "learning_rate": 9.700368421052632e-05, "loss": 0.4942, "step": 31400 }, { "epoch": 1.7583715981632881, "grad_norm": 1.1855796575546265, "learning_rate": 9.700342105263158e-05, "loss": 0.4133, "step": 31401 }, { "epoch": 1.7584275954754172, "grad_norm": 1.260161280632019, "learning_rate": 9.700315789473686e-05, "loss": 0.3815, "step": 31402 }, { "epoch": 1.7584835927875462, "grad_norm": 1.1264280080795288, "learning_rate": 9.700289473684212e-05, "loss": 0.5768, "step": 31403 }, { "epoch": 1.7585395900996752, "grad_norm": 1.344635248184204, "learning_rate": 9.700263157894738e-05, "loss": 0.5187, "step": 31404 }, { "epoch": 1.7585955874118042, "grad_norm": 1.3495181798934937, "learning_rate": 9.700236842105263e-05, "loss": 0.5547, "step": 31405 }, { "epoch": 1.7586515847239332, "grad_norm": 1.0829442739486694, "learning_rate": 9.70021052631579e-05, "loss": 0.3704, "step": 31406 }, { "epoch": 1.7587075820360623, "grad_norm": 1.1291688680648804, "learning_rate": 9.700184210526317e-05, "loss": 0.3036, "step": 31407 }, { "epoch": 1.7587635793481913, "grad_norm": 1.5994068384170532, "learning_rate": 9.700157894736843e-05, "loss": 0.4838, "step": 31408 }, { "epoch": 1.7588195766603203, "grad_norm": 1.3823555707931519, "learning_rate": 9.700131578947369e-05, "loss": 0.3572, "step": 31409 }, { "epoch": 1.7588755739724493, "grad_norm": 1.4652913808822632, "learning_rate": 9.700105263157895e-05, "loss": 0.7145, "step": 31410 }, { "epoch": 1.7589315712845783, "grad_norm": 1.3560162782669067, "learning_rate": 9.700078947368422e-05, "loss": 0.4733, "step": 31411 }, { "epoch": 1.7589875685967074, "grad_norm": 1.3950577974319458, "learning_rate": 9.700052631578948e-05, "loss": 0.5525, "step": 31412 }, { "epoch": 1.7590435659088364, "grad_norm": 1.1825885772705078, "learning_rate": 9.700026315789474e-05, "loss": 0.3966, "step": 31413 }, { "epoch": 1.7590995632209654, "grad_norm": 1.2298526763916016, "learning_rate": 9.7e-05, "loss": 0.4035, "step": 31414 }, { "epoch": 1.7591555605330944, "grad_norm": 1.301370620727539, "learning_rate": 9.699973684210527e-05, "loss": 0.5156, "step": 31415 }, { "epoch": 1.7592115578452234, "grad_norm": 1.4733564853668213, "learning_rate": 9.699947368421053e-05, "loss": 0.4461, "step": 31416 }, { "epoch": 1.7592675551573524, "grad_norm": 1.7607483863830566, "learning_rate": 9.699921052631579e-05, "loss": 0.5734, "step": 31417 }, { "epoch": 1.7593235524694815, "grad_norm": 1.5247420072555542, "learning_rate": 9.699894736842105e-05, "loss": 0.4377, "step": 31418 }, { "epoch": 1.7593795497816105, "grad_norm": 1.4009897708892822, "learning_rate": 9.699868421052633e-05, "loss": 0.4704, "step": 31419 }, { "epoch": 1.7594355470937395, "grad_norm": 1.2151381969451904, "learning_rate": 9.699842105263159e-05, "loss": 0.4475, "step": 31420 }, { "epoch": 1.7594915444058685, "grad_norm": 1.4483447074890137, "learning_rate": 9.699815789473686e-05, "loss": 0.463, "step": 31421 }, { "epoch": 1.7595475417179975, "grad_norm": 1.5088638067245483, "learning_rate": 9.69978947368421e-05, "loss": 0.4674, "step": 31422 }, { "epoch": 1.7596035390301266, "grad_norm": 1.3373827934265137, "learning_rate": 9.699763157894736e-05, "loss": 0.4543, "step": 31423 }, { "epoch": 1.7596595363422556, "grad_norm": 1.1299997568130493, "learning_rate": 9.699736842105264e-05, "loss": 0.4847, "step": 31424 }, { "epoch": 1.7597155336543846, "grad_norm": 1.358533501625061, "learning_rate": 9.69971052631579e-05, "loss": 0.3672, "step": 31425 }, { "epoch": 1.7597715309665136, "grad_norm": 1.4248813390731812, "learning_rate": 9.699684210526316e-05, "loss": 0.3376, "step": 31426 }, { "epoch": 1.7598275282786426, "grad_norm": 1.6210975646972656, "learning_rate": 9.699657894736842e-05, "loss": 0.602, "step": 31427 }, { "epoch": 1.7598835255907717, "grad_norm": 1.2547893524169922, "learning_rate": 9.699631578947369e-05, "loss": 0.4421, "step": 31428 }, { "epoch": 1.7599395229029007, "grad_norm": 1.2690328359603882, "learning_rate": 9.699605263157895e-05, "loss": 0.432, "step": 31429 }, { "epoch": 1.7599955202150297, "grad_norm": 1.2691785097122192, "learning_rate": 9.699578947368422e-05, "loss": 0.4426, "step": 31430 }, { "epoch": 1.7600515175271587, "grad_norm": 1.4508098363876343, "learning_rate": 9.699552631578947e-05, "loss": 0.5126, "step": 31431 }, { "epoch": 1.7601075148392877, "grad_norm": 1.277583360671997, "learning_rate": 9.699526315789474e-05, "loss": 0.4527, "step": 31432 }, { "epoch": 1.7601635121514168, "grad_norm": 1.5339820384979248, "learning_rate": 9.6995e-05, "loss": 0.459, "step": 31433 }, { "epoch": 1.7602195094635458, "grad_norm": 1.3452469110488892, "learning_rate": 9.699473684210528e-05, "loss": 0.5097, "step": 31434 }, { "epoch": 1.7602755067756748, "grad_norm": 1.2519586086273193, "learning_rate": 9.699447368421054e-05, "loss": 0.4339, "step": 31435 }, { "epoch": 1.7603315040878038, "grad_norm": 1.7836685180664062, "learning_rate": 9.69942105263158e-05, "loss": 0.4599, "step": 31436 }, { "epoch": 1.7603875013999328, "grad_norm": 1.384204387664795, "learning_rate": 9.699394736842105e-05, "loss": 0.5123, "step": 31437 }, { "epoch": 1.7604434987120618, "grad_norm": 1.1255943775177002, "learning_rate": 9.699368421052633e-05, "loss": 0.4688, "step": 31438 }, { "epoch": 1.7604994960241909, "grad_norm": 1.056462287902832, "learning_rate": 9.699342105263159e-05, "loss": 0.333, "step": 31439 }, { "epoch": 1.7605554933363199, "grad_norm": 1.203423261642456, "learning_rate": 9.699315789473685e-05, "loss": 0.4784, "step": 31440 }, { "epoch": 1.760611490648449, "grad_norm": 5.010548114776611, "learning_rate": 9.699289473684211e-05, "loss": 0.5922, "step": 31441 }, { "epoch": 1.760667487960578, "grad_norm": 1.019890546798706, "learning_rate": 9.699263157894737e-05, "loss": 0.4258, "step": 31442 }, { "epoch": 1.760723485272707, "grad_norm": 1.202873945236206, "learning_rate": 9.699236842105264e-05, "loss": 0.539, "step": 31443 }, { "epoch": 1.760779482584836, "grad_norm": 1.1808617115020752, "learning_rate": 9.69921052631579e-05, "loss": 0.5889, "step": 31444 }, { "epoch": 1.760835479896965, "grad_norm": 1.1226969957351685, "learning_rate": 9.699184210526316e-05, "loss": 0.374, "step": 31445 }, { "epoch": 1.760891477209094, "grad_norm": 1.376338243484497, "learning_rate": 9.699157894736842e-05, "loss": 0.3882, "step": 31446 }, { "epoch": 1.760947474521223, "grad_norm": 1.1124764680862427, "learning_rate": 9.699131578947369e-05, "loss": 0.4151, "step": 31447 }, { "epoch": 1.761003471833352, "grad_norm": 1.4831949472427368, "learning_rate": 9.699105263157895e-05, "loss": 0.4305, "step": 31448 }, { "epoch": 1.761059469145481, "grad_norm": 1.3852840662002563, "learning_rate": 9.699078947368421e-05, "loss": 0.3966, "step": 31449 }, { "epoch": 1.76111546645761, "grad_norm": 1.6167633533477783, "learning_rate": 9.699052631578947e-05, "loss": 0.411, "step": 31450 }, { "epoch": 1.761171463769739, "grad_norm": 1.3998875617980957, "learning_rate": 9.699026315789475e-05, "loss": 0.5188, "step": 31451 }, { "epoch": 1.7612274610818681, "grad_norm": 1.45525062084198, "learning_rate": 9.699e-05, "loss": 0.5463, "step": 31452 }, { "epoch": 1.7612834583939971, "grad_norm": 0.9640327095985413, "learning_rate": 9.698973684210528e-05, "loss": 0.3042, "step": 31453 }, { "epoch": 1.7613394557061262, "grad_norm": 1.1937028169631958, "learning_rate": 9.698947368421052e-05, "loss": 0.4544, "step": 31454 }, { "epoch": 1.7613954530182552, "grad_norm": 1.2516857385635376, "learning_rate": 9.69892105263158e-05, "loss": 0.4241, "step": 31455 }, { "epoch": 1.7614514503303842, "grad_norm": 1.1950222253799438, "learning_rate": 9.698894736842106e-05, "loss": 0.4083, "step": 31456 }, { "epoch": 1.7615074476425132, "grad_norm": 1.199872374534607, "learning_rate": 9.698868421052632e-05, "loss": 0.5317, "step": 31457 }, { "epoch": 1.7615634449546422, "grad_norm": 1.4942198991775513, "learning_rate": 9.698842105263159e-05, "loss": 0.3739, "step": 31458 }, { "epoch": 1.7616194422667713, "grad_norm": 2.0371768474578857, "learning_rate": 9.698815789473684e-05, "loss": 0.5374, "step": 31459 }, { "epoch": 1.7616754395789003, "grad_norm": 1.3608322143554688, "learning_rate": 9.698789473684211e-05, "loss": 0.3797, "step": 31460 }, { "epoch": 1.7617314368910293, "grad_norm": 1.410219430923462, "learning_rate": 9.698763157894737e-05, "loss": 0.3973, "step": 31461 }, { "epoch": 1.7617874342031583, "grad_norm": 1.2892036437988281, "learning_rate": 9.698736842105264e-05, "loss": 0.5025, "step": 31462 }, { "epoch": 1.7618434315152873, "grad_norm": 1.354514241218567, "learning_rate": 9.69871052631579e-05, "loss": 0.584, "step": 31463 }, { "epoch": 1.7618994288274163, "grad_norm": 1.2871614694595337, "learning_rate": 9.698684210526316e-05, "loss": 0.4542, "step": 31464 }, { "epoch": 1.7619554261395454, "grad_norm": 0.9832156300544739, "learning_rate": 9.698657894736842e-05, "loss": 0.3554, "step": 31465 }, { "epoch": 1.7620114234516744, "grad_norm": 1.3597266674041748, "learning_rate": 9.69863157894737e-05, "loss": 0.5007, "step": 31466 }, { "epoch": 1.7620674207638034, "grad_norm": 1.1829832792282104, "learning_rate": 9.698605263157895e-05, "loss": 0.4606, "step": 31467 }, { "epoch": 1.7621234180759324, "grad_norm": 1.450077772140503, "learning_rate": 9.698578947368421e-05, "loss": 0.4254, "step": 31468 }, { "epoch": 1.7621794153880614, "grad_norm": 1.1420611143112183, "learning_rate": 9.698552631578947e-05, "loss": 0.4703, "step": 31469 }, { "epoch": 1.7622354127001905, "grad_norm": 1.348101019859314, "learning_rate": 9.698526315789475e-05, "loss": 0.514, "step": 31470 }, { "epoch": 1.7622914100123195, "grad_norm": 1.2099101543426514, "learning_rate": 9.698500000000001e-05, "loss": 0.4555, "step": 31471 }, { "epoch": 1.7623474073244485, "grad_norm": 1.2844172716140747, "learning_rate": 9.698473684210527e-05, "loss": 0.5856, "step": 31472 }, { "epoch": 1.7624034046365775, "grad_norm": 1.1239879131317139, "learning_rate": 9.698447368421053e-05, "loss": 0.3856, "step": 31473 }, { "epoch": 1.7624594019487065, "grad_norm": 1.1519743204116821, "learning_rate": 9.69842105263158e-05, "loss": 0.3621, "step": 31474 }, { "epoch": 1.7625153992608356, "grad_norm": 1.4256031513214111, "learning_rate": 9.698394736842106e-05, "loss": 0.4518, "step": 31475 }, { "epoch": 1.7625713965729646, "grad_norm": 1.134522557258606, "learning_rate": 9.698368421052632e-05, "loss": 0.3928, "step": 31476 }, { "epoch": 1.7626273938850936, "grad_norm": 1.2252823114395142, "learning_rate": 9.698342105263158e-05, "loss": 0.4037, "step": 31477 }, { "epoch": 1.7626833911972226, "grad_norm": 1.4766465425491333, "learning_rate": 9.698315789473684e-05, "loss": 0.6407, "step": 31478 }, { "epoch": 1.7627393885093516, "grad_norm": 1.9454439878463745, "learning_rate": 9.698289473684211e-05, "loss": 0.5116, "step": 31479 }, { "epoch": 1.7627953858214807, "grad_norm": 1.4040120840072632, "learning_rate": 9.698263157894737e-05, "loss": 0.5018, "step": 31480 }, { "epoch": 1.7628513831336097, "grad_norm": 1.3570436239242554, "learning_rate": 9.698236842105263e-05, "loss": 0.4183, "step": 31481 }, { "epoch": 1.7629073804457387, "grad_norm": 1.0709501504898071, "learning_rate": 9.698210526315789e-05, "loss": 0.4102, "step": 31482 }, { "epoch": 1.7629633777578677, "grad_norm": 1.4571372270584106, "learning_rate": 9.698184210526316e-05, "loss": 0.6113, "step": 31483 }, { "epoch": 1.7630193750699967, "grad_norm": 1.3944772481918335, "learning_rate": 9.698157894736842e-05, "loss": 0.4673, "step": 31484 }, { "epoch": 1.7630753723821257, "grad_norm": 1.5022156238555908, "learning_rate": 9.69813157894737e-05, "loss": 0.5727, "step": 31485 }, { "epoch": 1.7631313696942548, "grad_norm": 1.2592860460281372, "learning_rate": 9.698105263157894e-05, "loss": 0.4493, "step": 31486 }, { "epoch": 1.7631873670063838, "grad_norm": 1.3246036767959595, "learning_rate": 9.698078947368422e-05, "loss": 0.4089, "step": 31487 }, { "epoch": 1.7632433643185128, "grad_norm": 1.2681739330291748, "learning_rate": 9.698052631578948e-05, "loss": 0.4553, "step": 31488 }, { "epoch": 1.7632993616306418, "grad_norm": 1.3387362957000732, "learning_rate": 9.698026315789475e-05, "loss": 0.409, "step": 31489 }, { "epoch": 1.7633553589427708, "grad_norm": 1.2997803688049316, "learning_rate": 9.698000000000001e-05, "loss": 0.4797, "step": 31490 }, { "epoch": 1.7634113562548999, "grad_norm": 1.271246075630188, "learning_rate": 9.697973684210527e-05, "loss": 0.3911, "step": 31491 }, { "epoch": 1.7634673535670289, "grad_norm": 1.261385440826416, "learning_rate": 9.697947368421053e-05, "loss": 0.4139, "step": 31492 }, { "epoch": 1.763523350879158, "grad_norm": 1.2858843803405762, "learning_rate": 9.697921052631579e-05, "loss": 0.5789, "step": 31493 }, { "epoch": 1.763579348191287, "grad_norm": 1.1422197818756104, "learning_rate": 9.697894736842106e-05, "loss": 0.3957, "step": 31494 }, { "epoch": 1.763635345503416, "grad_norm": 1.1950825452804565, "learning_rate": 9.697868421052632e-05, "loss": 0.3616, "step": 31495 }, { "epoch": 1.763691342815545, "grad_norm": 1.162674903869629, "learning_rate": 9.697842105263158e-05, "loss": 0.4708, "step": 31496 }, { "epoch": 1.763747340127674, "grad_norm": 1.1652240753173828, "learning_rate": 9.697815789473684e-05, "loss": 0.4366, "step": 31497 }, { "epoch": 1.763803337439803, "grad_norm": 1.3446221351623535, "learning_rate": 9.697789473684211e-05, "loss": 0.5055, "step": 31498 }, { "epoch": 1.763859334751932, "grad_norm": 1.241191029548645, "learning_rate": 9.697763157894737e-05, "loss": 0.4349, "step": 31499 }, { "epoch": 1.763915332064061, "grad_norm": 1.277579426765442, "learning_rate": 9.697736842105263e-05, "loss": 0.5438, "step": 31500 }, { "epoch": 1.76397132937619, "grad_norm": 1.2783100605010986, "learning_rate": 9.69771052631579e-05, "loss": 0.4395, "step": 31501 }, { "epoch": 1.764027326688319, "grad_norm": 1.133917212486267, "learning_rate": 9.697684210526317e-05, "loss": 0.4213, "step": 31502 }, { "epoch": 1.764083324000448, "grad_norm": 1.1544266939163208, "learning_rate": 9.697657894736843e-05, "loss": 0.4736, "step": 31503 }, { "epoch": 1.7641393213125771, "grad_norm": 1.3574148416519165, "learning_rate": 9.697631578947369e-05, "loss": 0.3828, "step": 31504 }, { "epoch": 1.7641953186247061, "grad_norm": 1.3751143217086792, "learning_rate": 9.697605263157895e-05, "loss": 0.4168, "step": 31505 }, { "epoch": 1.7642513159368352, "grad_norm": 1.498292326927185, "learning_rate": 9.697578947368422e-05, "loss": 0.5091, "step": 31506 }, { "epoch": 1.7643073132489642, "grad_norm": 1.330779790878296, "learning_rate": 9.697552631578948e-05, "loss": 0.5447, "step": 31507 }, { "epoch": 1.7643633105610932, "grad_norm": 1.4283132553100586, "learning_rate": 9.697526315789475e-05, "loss": 0.389, "step": 31508 }, { "epoch": 1.7644193078732222, "grad_norm": 1.2465740442276, "learning_rate": 9.6975e-05, "loss": 0.379, "step": 31509 }, { "epoch": 1.7644753051853512, "grad_norm": 1.3268877267837524, "learning_rate": 9.697473684210526e-05, "loss": 0.5558, "step": 31510 }, { "epoch": 1.7645313024974802, "grad_norm": 1.5753016471862793, "learning_rate": 9.697447368421053e-05, "loss": 0.4791, "step": 31511 }, { "epoch": 1.7645872998096093, "grad_norm": 1.1190284490585327, "learning_rate": 9.697421052631579e-05, "loss": 0.4404, "step": 31512 }, { "epoch": 1.7646432971217383, "grad_norm": 1.1840065717697144, "learning_rate": 9.697394736842107e-05, "loss": 0.4407, "step": 31513 }, { "epoch": 1.7646992944338673, "grad_norm": 1.1299233436584473, "learning_rate": 9.697368421052631e-05, "loss": 0.4273, "step": 31514 }, { "epoch": 1.7647552917459963, "grad_norm": 1.1534067392349243, "learning_rate": 9.697342105263158e-05, "loss": 0.4088, "step": 31515 }, { "epoch": 1.7648112890581253, "grad_norm": 1.4165939092636108, "learning_rate": 9.697315789473684e-05, "loss": 0.3707, "step": 31516 }, { "epoch": 1.7648672863702544, "grad_norm": 1.1244193315505981, "learning_rate": 9.697289473684212e-05, "loss": 0.5268, "step": 31517 }, { "epoch": 1.7649232836823834, "grad_norm": 1.745214819908142, "learning_rate": 9.697263157894738e-05, "loss": 0.5335, "step": 31518 }, { "epoch": 1.7649792809945124, "grad_norm": 1.2726930379867554, "learning_rate": 9.697236842105264e-05, "loss": 0.4248, "step": 31519 }, { "epoch": 1.7650352783066414, "grad_norm": 1.469014286994934, "learning_rate": 9.69721052631579e-05, "loss": 0.5316, "step": 31520 }, { "epoch": 1.7650912756187704, "grad_norm": 1.2842581272125244, "learning_rate": 9.697184210526317e-05, "loss": 0.5588, "step": 31521 }, { "epoch": 1.7651472729308995, "grad_norm": 1.3293139934539795, "learning_rate": 9.697157894736843e-05, "loss": 0.5061, "step": 31522 }, { "epoch": 1.7652032702430285, "grad_norm": 1.244208574295044, "learning_rate": 9.697131578947369e-05, "loss": 0.4003, "step": 31523 }, { "epoch": 1.7652592675551575, "grad_norm": 1.1883312463760376, "learning_rate": 9.697105263157895e-05, "loss": 0.4022, "step": 31524 }, { "epoch": 1.7653152648672865, "grad_norm": 0.9889773726463318, "learning_rate": 9.697078947368422e-05, "loss": 0.2647, "step": 31525 }, { "epoch": 1.7653712621794155, "grad_norm": 1.2561534643173218, "learning_rate": 9.697052631578948e-05, "loss": 0.4171, "step": 31526 }, { "epoch": 1.7654272594915446, "grad_norm": 1.361660361289978, "learning_rate": 9.697026315789474e-05, "loss": 0.4491, "step": 31527 }, { "epoch": 1.7654832568036736, "grad_norm": 1.2190622091293335, "learning_rate": 9.697e-05, "loss": 0.4789, "step": 31528 }, { "epoch": 1.7655392541158026, "grad_norm": 1.2869620323181152, "learning_rate": 9.696973684210526e-05, "loss": 0.4522, "step": 31529 }, { "epoch": 1.7655952514279316, "grad_norm": 1.0969651937484741, "learning_rate": 9.696947368421053e-05, "loss": 0.4566, "step": 31530 }, { "epoch": 1.7656512487400606, "grad_norm": 1.3023502826690674, "learning_rate": 9.69692105263158e-05, "loss": 0.4476, "step": 31531 }, { "epoch": 1.7657072460521896, "grad_norm": 1.3227877616882324, "learning_rate": 9.696894736842105e-05, "loss": 0.5335, "step": 31532 }, { "epoch": 1.7657632433643187, "grad_norm": 1.197417140007019, "learning_rate": 9.696868421052631e-05, "loss": 0.444, "step": 31533 }, { "epoch": 1.7658192406764477, "grad_norm": 1.3425732851028442, "learning_rate": 9.696842105263159e-05, "loss": 0.5227, "step": 31534 }, { "epoch": 1.7658752379885767, "grad_norm": 1.4290789365768433, "learning_rate": 9.696815789473685e-05, "loss": 0.5945, "step": 31535 }, { "epoch": 1.7659312353007057, "grad_norm": 1.2781115770339966, "learning_rate": 9.69678947368421e-05, "loss": 0.3768, "step": 31536 }, { "epoch": 1.7659872326128347, "grad_norm": 1.3153653144836426, "learning_rate": 9.696763157894737e-05, "loss": 0.4119, "step": 31537 }, { "epoch": 1.7660432299249638, "grad_norm": 1.438663363456726, "learning_rate": 9.696736842105264e-05, "loss": 0.3812, "step": 31538 }, { "epoch": 1.7660992272370928, "grad_norm": 1.6345983743667603, "learning_rate": 9.69671052631579e-05, "loss": 0.808, "step": 31539 }, { "epoch": 1.7661552245492218, "grad_norm": 1.1660674810409546, "learning_rate": 9.696684210526317e-05, "loss": 0.4568, "step": 31540 }, { "epoch": 1.7662112218613506, "grad_norm": 1.3210314512252808, "learning_rate": 9.696657894736842e-05, "loss": 0.5017, "step": 31541 }, { "epoch": 1.7662672191734796, "grad_norm": 1.3592729568481445, "learning_rate": 9.696631578947369e-05, "loss": 0.5468, "step": 31542 }, { "epoch": 1.7663232164856086, "grad_norm": 1.4642986059188843, "learning_rate": 9.696605263157895e-05, "loss": 0.4787, "step": 31543 }, { "epoch": 1.7663792137977377, "grad_norm": 1.237945318222046, "learning_rate": 9.696578947368421e-05, "loss": 0.3869, "step": 31544 }, { "epoch": 1.7664352111098667, "grad_norm": 1.135084867477417, "learning_rate": 9.696552631578948e-05, "loss": 0.3375, "step": 31545 }, { "epoch": 1.7664912084219957, "grad_norm": 1.292121171951294, "learning_rate": 9.696526315789473e-05, "loss": 0.4826, "step": 31546 }, { "epoch": 1.7665472057341247, "grad_norm": 1.1377503871917725, "learning_rate": 9.6965e-05, "loss": 0.442, "step": 31547 }, { "epoch": 1.7666032030462537, "grad_norm": 1.2419637441635132, "learning_rate": 9.696473684210526e-05, "loss": 0.5338, "step": 31548 }, { "epoch": 1.7666592003583828, "grad_norm": 1.1220835447311401, "learning_rate": 9.696447368421054e-05, "loss": 0.3862, "step": 31549 }, { "epoch": 1.7667151976705118, "grad_norm": 1.1769870519638062, "learning_rate": 9.69642105263158e-05, "loss": 0.3768, "step": 31550 }, { "epoch": 1.7667711949826408, "grad_norm": 1.450187087059021, "learning_rate": 9.696394736842106e-05, "loss": 0.4583, "step": 31551 }, { "epoch": 1.7668271922947698, "grad_norm": 1.1313263177871704, "learning_rate": 9.696368421052632e-05, "loss": 0.4579, "step": 31552 }, { "epoch": 1.7668831896068988, "grad_norm": 1.4563243389129639, "learning_rate": 9.696342105263159e-05, "loss": 0.5501, "step": 31553 }, { "epoch": 1.7669391869190278, "grad_norm": 1.1168112754821777, "learning_rate": 9.696315789473685e-05, "loss": 0.359, "step": 31554 }, { "epoch": 1.7669951842311569, "grad_norm": 1.2646751403808594, "learning_rate": 9.696289473684211e-05, "loss": 0.4124, "step": 31555 }, { "epoch": 1.7670511815432859, "grad_norm": 1.4349724054336548, "learning_rate": 9.696263157894737e-05, "loss": 0.5039, "step": 31556 }, { "epoch": 1.767107178855415, "grad_norm": 1.2223607301712036, "learning_rate": 9.696236842105264e-05, "loss": 0.3452, "step": 31557 }, { "epoch": 1.767163176167544, "grad_norm": 1.445101022720337, "learning_rate": 9.69621052631579e-05, "loss": 0.5003, "step": 31558 }, { "epoch": 1.767219173479673, "grad_norm": 1.5038913488388062, "learning_rate": 9.696184210526316e-05, "loss": 0.5452, "step": 31559 }, { "epoch": 1.767275170791802, "grad_norm": 1.1562330722808838, "learning_rate": 9.696157894736842e-05, "loss": 0.4638, "step": 31560 }, { "epoch": 1.767331168103931, "grad_norm": 1.3902146816253662, "learning_rate": 9.696131578947368e-05, "loss": 0.4532, "step": 31561 }, { "epoch": 1.76738716541606, "grad_norm": 1.4367204904556274, "learning_rate": 9.696105263157895e-05, "loss": 0.6274, "step": 31562 }, { "epoch": 1.767443162728189, "grad_norm": 1.3518548011779785, "learning_rate": 9.696078947368421e-05, "loss": 0.4238, "step": 31563 }, { "epoch": 1.767499160040318, "grad_norm": 1.3402560949325562, "learning_rate": 9.696052631578947e-05, "loss": 0.4032, "step": 31564 }, { "epoch": 1.767555157352447, "grad_norm": 1.5832061767578125, "learning_rate": 9.696026315789473e-05, "loss": 0.4415, "step": 31565 }, { "epoch": 1.767611154664576, "grad_norm": 1.2894824743270874, "learning_rate": 9.696000000000001e-05, "loss": 0.3704, "step": 31566 }, { "epoch": 1.767667151976705, "grad_norm": 1.0190800428390503, "learning_rate": 9.695973684210527e-05, "loss": 0.3231, "step": 31567 }, { "epoch": 1.7677231492888341, "grad_norm": 1.3909094333648682, "learning_rate": 9.695947368421054e-05, "loss": 0.5809, "step": 31568 }, { "epoch": 1.7677791466009631, "grad_norm": 1.6655607223510742, "learning_rate": 9.695921052631579e-05, "loss": 0.7583, "step": 31569 }, { "epoch": 1.7678351439130922, "grad_norm": 4.349919319152832, "learning_rate": 9.695894736842106e-05, "loss": 0.5639, "step": 31570 }, { "epoch": 1.7678911412252212, "grad_norm": 9.490988731384277, "learning_rate": 9.695868421052632e-05, "loss": 0.3731, "step": 31571 }, { "epoch": 1.7679471385373502, "grad_norm": 1.2213760614395142, "learning_rate": 9.695842105263159e-05, "loss": 0.4255, "step": 31572 }, { "epoch": 1.7680031358494792, "grad_norm": 1.6073302030563354, "learning_rate": 9.695815789473684e-05, "loss": 0.4538, "step": 31573 }, { "epoch": 1.7680591331616082, "grad_norm": 1.3122426271438599, "learning_rate": 9.695789473684211e-05, "loss": 0.3353, "step": 31574 }, { "epoch": 1.7681151304737373, "grad_norm": 1.3224055767059326, "learning_rate": 9.695763157894737e-05, "loss": 0.3719, "step": 31575 }, { "epoch": 1.7681711277858663, "grad_norm": 1.2959257364273071, "learning_rate": 9.695736842105264e-05, "loss": 0.5298, "step": 31576 }, { "epoch": 1.7682271250979953, "grad_norm": 1.9519927501678467, "learning_rate": 9.69571052631579e-05, "loss": 0.5065, "step": 31577 }, { "epoch": 1.7682831224101243, "grad_norm": 1.4113036394119263, "learning_rate": 9.695684210526315e-05, "loss": 0.5254, "step": 31578 }, { "epoch": 1.7683391197222533, "grad_norm": 1.2891052961349487, "learning_rate": 9.695657894736842e-05, "loss": 0.6011, "step": 31579 }, { "epoch": 1.7683951170343823, "grad_norm": 1.668357491493225, "learning_rate": 9.695631578947368e-05, "loss": 0.4809, "step": 31580 }, { "epoch": 1.7684511143465114, "grad_norm": 1.218103051185608, "learning_rate": 9.695605263157896e-05, "loss": 0.5375, "step": 31581 }, { "epoch": 1.7685071116586404, "grad_norm": 1.2777475118637085, "learning_rate": 9.695578947368422e-05, "loss": 0.3912, "step": 31582 }, { "epoch": 1.7685631089707694, "grad_norm": 1.4177066087722778, "learning_rate": 9.695552631578948e-05, "loss": 0.4315, "step": 31583 }, { "epoch": 1.7686191062828984, "grad_norm": 1.2517859935760498, "learning_rate": 9.695526315789474e-05, "loss": 0.4109, "step": 31584 }, { "epoch": 1.7686751035950274, "grad_norm": 1.3994783163070679, "learning_rate": 9.695500000000001e-05, "loss": 0.3294, "step": 31585 }, { "epoch": 1.7687311009071565, "grad_norm": 1.345992088317871, "learning_rate": 9.695473684210527e-05, "loss": 0.4645, "step": 31586 }, { "epoch": 1.7687870982192855, "grad_norm": 1.1893301010131836, "learning_rate": 9.695447368421053e-05, "loss": 0.4279, "step": 31587 }, { "epoch": 1.7688430955314145, "grad_norm": 1.310773253440857, "learning_rate": 9.695421052631579e-05, "loss": 0.4225, "step": 31588 }, { "epoch": 1.7688990928435435, "grad_norm": 1.2024987936019897, "learning_rate": 9.695394736842106e-05, "loss": 0.4784, "step": 31589 }, { "epoch": 1.7689550901556725, "grad_norm": 1.2498165369033813, "learning_rate": 9.695368421052632e-05, "loss": 0.5249, "step": 31590 }, { "epoch": 1.7690110874678016, "grad_norm": 1.200305700302124, "learning_rate": 9.695342105263158e-05, "loss": 0.3937, "step": 31591 }, { "epoch": 1.7690670847799306, "grad_norm": 1.3510410785675049, "learning_rate": 9.695315789473684e-05, "loss": 0.606, "step": 31592 }, { "epoch": 1.7691230820920596, "grad_norm": 1.2939341068267822, "learning_rate": 9.695289473684211e-05, "loss": 0.4131, "step": 31593 }, { "epoch": 1.7691790794041886, "grad_norm": 1.2193002700805664, "learning_rate": 9.695263157894737e-05, "loss": 0.3932, "step": 31594 }, { "epoch": 1.7692350767163176, "grad_norm": 2.1687841415405273, "learning_rate": 9.695236842105265e-05, "loss": 0.5081, "step": 31595 }, { "epoch": 1.7692910740284467, "grad_norm": 1.2129395008087158, "learning_rate": 9.69521052631579e-05, "loss": 0.3913, "step": 31596 }, { "epoch": 1.7693470713405757, "grad_norm": 1.280544638633728, "learning_rate": 9.695184210526315e-05, "loss": 0.4863, "step": 31597 }, { "epoch": 1.7694030686527047, "grad_norm": 1.1567386388778687, "learning_rate": 9.695157894736843e-05, "loss": 0.4065, "step": 31598 }, { "epoch": 1.7694590659648337, "grad_norm": 1.4139891862869263, "learning_rate": 9.695131578947369e-05, "loss": 0.3961, "step": 31599 }, { "epoch": 1.7695150632769627, "grad_norm": 1.68169367313385, "learning_rate": 9.695105263157896e-05, "loss": 0.454, "step": 31600 }, { "epoch": 1.7695710605890917, "grad_norm": 1.2753052711486816, "learning_rate": 9.69507894736842e-05, "loss": 0.4978, "step": 31601 }, { "epoch": 1.7696270579012208, "grad_norm": 1.3094966411590576, "learning_rate": 9.695052631578948e-05, "loss": 0.4663, "step": 31602 }, { "epoch": 1.7696830552133498, "grad_norm": 1.4657502174377441, "learning_rate": 9.695026315789474e-05, "loss": 0.428, "step": 31603 }, { "epoch": 1.7697390525254788, "grad_norm": 1.3512753248214722, "learning_rate": 9.695000000000001e-05, "loss": 0.4366, "step": 31604 }, { "epoch": 1.7697950498376078, "grad_norm": 1.6124919652938843, "learning_rate": 9.694973684210527e-05, "loss": 0.4766, "step": 31605 }, { "epoch": 1.7698510471497368, "grad_norm": 1.2926450967788696, "learning_rate": 9.694947368421053e-05, "loss": 0.5164, "step": 31606 }, { "epoch": 1.7699070444618659, "grad_norm": 1.2213493585586548, "learning_rate": 9.694921052631579e-05, "loss": 0.3471, "step": 31607 }, { "epoch": 1.7699630417739949, "grad_norm": 1.5887097120285034, "learning_rate": 9.694894736842106e-05, "loss": 0.4045, "step": 31608 }, { "epoch": 1.770019039086124, "grad_norm": 1.4204438924789429, "learning_rate": 9.694868421052632e-05, "loss": 0.5412, "step": 31609 }, { "epoch": 1.770075036398253, "grad_norm": 1.4192122220993042, "learning_rate": 9.694842105263158e-05, "loss": 0.3931, "step": 31610 }, { "epoch": 1.770131033710382, "grad_norm": 1.5752617120742798, "learning_rate": 9.694815789473684e-05, "loss": 0.6127, "step": 31611 }, { "epoch": 1.770187031022511, "grad_norm": 1.273874044418335, "learning_rate": 9.694789473684212e-05, "loss": 0.3776, "step": 31612 }, { "epoch": 1.77024302833464, "grad_norm": 1.2905644178390503, "learning_rate": 9.694763157894738e-05, "loss": 0.4609, "step": 31613 }, { "epoch": 1.770299025646769, "grad_norm": 1.2650513648986816, "learning_rate": 9.694736842105264e-05, "loss": 0.5233, "step": 31614 }, { "epoch": 1.770355022958898, "grad_norm": 1.0819251537322998, "learning_rate": 9.69471052631579e-05, "loss": 0.4006, "step": 31615 }, { "epoch": 1.770411020271027, "grad_norm": 1.757305383682251, "learning_rate": 9.694684210526316e-05, "loss": 0.5248, "step": 31616 }, { "epoch": 1.770467017583156, "grad_norm": 1.2096786499023438, "learning_rate": 9.694657894736843e-05, "loss": 0.4087, "step": 31617 }, { "epoch": 1.770523014895285, "grad_norm": 1.274585247039795, "learning_rate": 9.694631578947369e-05, "loss": 0.5563, "step": 31618 }, { "epoch": 1.770579012207414, "grad_norm": 1.2701423168182373, "learning_rate": 9.694605263157895e-05, "loss": 0.4777, "step": 31619 }, { "epoch": 1.7706350095195431, "grad_norm": 1.6738379001617432, "learning_rate": 9.694578947368421e-05, "loss": 0.5118, "step": 31620 }, { "epoch": 1.7706910068316721, "grad_norm": 1.7709014415740967, "learning_rate": 9.694552631578948e-05, "loss": 0.4175, "step": 31621 }, { "epoch": 1.7707470041438012, "grad_norm": 1.1367491483688354, "learning_rate": 9.694526315789474e-05, "loss": 0.4296, "step": 31622 }, { "epoch": 1.7708030014559302, "grad_norm": 1.3250112533569336, "learning_rate": 9.694500000000001e-05, "loss": 0.4725, "step": 31623 }, { "epoch": 1.770858998768059, "grad_norm": 1.6657941341400146, "learning_rate": 9.694473684210526e-05, "loss": 0.4841, "step": 31624 }, { "epoch": 1.770914996080188, "grad_norm": 1.638959527015686, "learning_rate": 9.694447368421053e-05, "loss": 0.4254, "step": 31625 }, { "epoch": 1.770970993392317, "grad_norm": 1.305229663848877, "learning_rate": 9.69442105263158e-05, "loss": 0.7032, "step": 31626 }, { "epoch": 1.771026990704446, "grad_norm": 1.2624949216842651, "learning_rate": 9.694394736842107e-05, "loss": 0.4501, "step": 31627 }, { "epoch": 1.771082988016575, "grad_norm": 1.4390772581100464, "learning_rate": 9.694368421052631e-05, "loss": 0.4499, "step": 31628 }, { "epoch": 1.771138985328704, "grad_norm": 1.0659310817718506, "learning_rate": 9.694342105263159e-05, "loss": 0.4712, "step": 31629 }, { "epoch": 1.771194982640833, "grad_norm": 1.2920441627502441, "learning_rate": 9.694315789473685e-05, "loss": 0.6832, "step": 31630 }, { "epoch": 1.771250979952962, "grad_norm": 1.3546836376190186, "learning_rate": 9.69428947368421e-05, "loss": 0.4744, "step": 31631 }, { "epoch": 1.7713069772650911, "grad_norm": 1.0631438493728638, "learning_rate": 9.694263157894738e-05, "loss": 0.3273, "step": 31632 }, { "epoch": 1.7713629745772201, "grad_norm": 1.3528141975402832, "learning_rate": 9.694236842105263e-05, "loss": 0.5658, "step": 31633 }, { "epoch": 1.7714189718893492, "grad_norm": 1.164036750793457, "learning_rate": 9.69421052631579e-05, "loss": 0.5891, "step": 31634 }, { "epoch": 1.7714749692014782, "grad_norm": 1.2094208002090454, "learning_rate": 9.694184210526316e-05, "loss": 0.4431, "step": 31635 }, { "epoch": 1.7715309665136072, "grad_norm": 1.2523390054702759, "learning_rate": 9.694157894736843e-05, "loss": 0.4231, "step": 31636 }, { "epoch": 1.7715869638257362, "grad_norm": 1.4777216911315918, "learning_rate": 9.694131578947369e-05, "loss": 0.5889, "step": 31637 }, { "epoch": 1.7716429611378652, "grad_norm": 1.0440623760223389, "learning_rate": 9.694105263157895e-05, "loss": 0.4355, "step": 31638 }, { "epoch": 1.7716989584499943, "grad_norm": 2.2610154151916504, "learning_rate": 9.694078947368421e-05, "loss": 0.5215, "step": 31639 }, { "epoch": 1.7717549557621233, "grad_norm": 1.2607110738754272, "learning_rate": 9.694052631578948e-05, "loss": 0.4782, "step": 31640 }, { "epoch": 1.7718109530742523, "grad_norm": 1.078346848487854, "learning_rate": 9.694026315789474e-05, "loss": 0.3725, "step": 31641 }, { "epoch": 1.7718669503863813, "grad_norm": 2.2641713619232178, "learning_rate": 9.694e-05, "loss": 0.3142, "step": 31642 }, { "epoch": 1.7719229476985103, "grad_norm": 1.4688613414764404, "learning_rate": 9.693973684210526e-05, "loss": 0.3142, "step": 31643 }, { "epoch": 1.7719789450106394, "grad_norm": 1.177298665046692, "learning_rate": 9.693947368421054e-05, "loss": 0.3972, "step": 31644 }, { "epoch": 1.7720349423227684, "grad_norm": 1.3002843856811523, "learning_rate": 9.69392105263158e-05, "loss": 0.439, "step": 31645 }, { "epoch": 1.7720909396348974, "grad_norm": 1.273055911064148, "learning_rate": 9.693894736842106e-05, "loss": 0.4135, "step": 31646 }, { "epoch": 1.7721469369470264, "grad_norm": 1.3855077028274536, "learning_rate": 9.693868421052632e-05, "loss": 0.4321, "step": 31647 }, { "epoch": 1.7722029342591554, "grad_norm": 1.1521739959716797, "learning_rate": 9.693842105263158e-05, "loss": 0.3381, "step": 31648 }, { "epoch": 1.7722589315712844, "grad_norm": 1.1715768575668335, "learning_rate": 9.693815789473685e-05, "loss": 0.4246, "step": 31649 }, { "epoch": 1.7723149288834135, "grad_norm": 1.096937894821167, "learning_rate": 9.693789473684211e-05, "loss": 0.4936, "step": 31650 }, { "epoch": 1.7723709261955425, "grad_norm": 1.292404055595398, "learning_rate": 9.693763157894737e-05, "loss": 0.4372, "step": 31651 }, { "epoch": 1.7724269235076715, "grad_norm": 1.322475790977478, "learning_rate": 9.693736842105263e-05, "loss": 0.3626, "step": 31652 }, { "epoch": 1.7724829208198005, "grad_norm": 1.5499192476272583, "learning_rate": 9.69371052631579e-05, "loss": 0.5435, "step": 31653 }, { "epoch": 1.7725389181319295, "grad_norm": 1.8029277324676514, "learning_rate": 9.693684210526316e-05, "loss": 0.5153, "step": 31654 }, { "epoch": 1.7725949154440586, "grad_norm": 1.4051363468170166, "learning_rate": 9.693657894736843e-05, "loss": 0.4287, "step": 31655 }, { "epoch": 1.7726509127561876, "grad_norm": 1.1059962511062622, "learning_rate": 9.693631578947368e-05, "loss": 0.4496, "step": 31656 }, { "epoch": 1.7727069100683166, "grad_norm": 1.195513129234314, "learning_rate": 9.693605263157895e-05, "loss": 0.4725, "step": 31657 }, { "epoch": 1.7727629073804456, "grad_norm": 1.163993239402771, "learning_rate": 9.693578947368421e-05, "loss": 0.4145, "step": 31658 }, { "epoch": 1.7728189046925746, "grad_norm": 1.1289546489715576, "learning_rate": 9.693552631578949e-05, "loss": 0.3838, "step": 31659 }, { "epoch": 1.7728749020047037, "grad_norm": 1.1771734952926636, "learning_rate": 9.693526315789475e-05, "loss": 0.5962, "step": 31660 }, { "epoch": 1.7729308993168327, "grad_norm": 1.2909501791000366, "learning_rate": 9.6935e-05, "loss": 0.4819, "step": 31661 }, { "epoch": 1.7729868966289617, "grad_norm": 1.3093260526657104, "learning_rate": 9.693473684210527e-05, "loss": 0.4384, "step": 31662 }, { "epoch": 1.7730428939410907, "grad_norm": 1.278106927871704, "learning_rate": 9.693447368421054e-05, "loss": 0.5425, "step": 31663 }, { "epoch": 1.7730988912532197, "grad_norm": 1.4459409713745117, "learning_rate": 9.69342105263158e-05, "loss": 0.689, "step": 31664 }, { "epoch": 1.7731548885653488, "grad_norm": 1.3192808628082275, "learning_rate": 9.693394736842106e-05, "loss": 0.4512, "step": 31665 }, { "epoch": 1.7732108858774778, "grad_norm": 1.1529093980789185, "learning_rate": 9.693368421052632e-05, "loss": 0.4549, "step": 31666 }, { "epoch": 1.7732668831896068, "grad_norm": 1.417559266090393, "learning_rate": 9.693342105263158e-05, "loss": 0.4787, "step": 31667 }, { "epoch": 1.7733228805017358, "grad_norm": 1.1452620029449463, "learning_rate": 9.693315789473685e-05, "loss": 0.4548, "step": 31668 }, { "epoch": 1.7733788778138648, "grad_norm": 1.2665936946868896, "learning_rate": 9.693289473684211e-05, "loss": 0.4287, "step": 31669 }, { "epoch": 1.7734348751259938, "grad_norm": 1.3512331247329712, "learning_rate": 9.693263157894737e-05, "loss": 0.4704, "step": 31670 }, { "epoch": 1.7734908724381229, "grad_norm": 1.2202987670898438, "learning_rate": 9.693236842105263e-05, "loss": 0.4467, "step": 31671 }, { "epoch": 1.7735468697502519, "grad_norm": 1.2009615898132324, "learning_rate": 9.69321052631579e-05, "loss": 0.4032, "step": 31672 }, { "epoch": 1.773602867062381, "grad_norm": 1.3964484930038452, "learning_rate": 9.693184210526316e-05, "loss": 0.4818, "step": 31673 }, { "epoch": 1.77365886437451, "grad_norm": 1.4268285036087036, "learning_rate": 9.693157894736842e-05, "loss": 0.4191, "step": 31674 }, { "epoch": 1.773714861686639, "grad_norm": 1.3885416984558105, "learning_rate": 9.693131578947368e-05, "loss": 0.4025, "step": 31675 }, { "epoch": 1.773770858998768, "grad_norm": 1.4444143772125244, "learning_rate": 9.693105263157896e-05, "loss": 0.4656, "step": 31676 }, { "epoch": 1.773826856310897, "grad_norm": 1.0805639028549194, "learning_rate": 9.693078947368422e-05, "loss": 0.2582, "step": 31677 }, { "epoch": 1.773882853623026, "grad_norm": 1.2887513637542725, "learning_rate": 9.693052631578949e-05, "loss": 0.4735, "step": 31678 }, { "epoch": 1.773938850935155, "grad_norm": 1.3559648990631104, "learning_rate": 9.693026315789474e-05, "loss": 0.4658, "step": 31679 }, { "epoch": 1.773994848247284, "grad_norm": 1.340680718421936, "learning_rate": 9.693000000000001e-05, "loss": 0.4844, "step": 31680 }, { "epoch": 1.774050845559413, "grad_norm": 1.3505420684814453, "learning_rate": 9.692973684210527e-05, "loss": 0.4911, "step": 31681 }, { "epoch": 1.774106842871542, "grad_norm": 1.1470973491668701, "learning_rate": 9.692947368421053e-05, "loss": 0.4863, "step": 31682 }, { "epoch": 1.774162840183671, "grad_norm": 1.459671139717102, "learning_rate": 9.692921052631579e-05, "loss": 0.6615, "step": 31683 }, { "epoch": 1.7742188374958001, "grad_norm": 1.1560617685317993, "learning_rate": 9.692894736842105e-05, "loss": 0.3681, "step": 31684 }, { "epoch": 1.7742748348079291, "grad_norm": 1.1295814514160156, "learning_rate": 9.692868421052632e-05, "loss": 0.4043, "step": 31685 }, { "epoch": 1.7743308321200582, "grad_norm": 1.4021835327148438, "learning_rate": 9.692842105263158e-05, "loss": 0.5041, "step": 31686 }, { "epoch": 1.7743868294321872, "grad_norm": 1.4055055379867554, "learning_rate": 9.692815789473685e-05, "loss": 0.41, "step": 31687 }, { "epoch": 1.7744428267443162, "grad_norm": 3.6340811252593994, "learning_rate": 9.69278947368421e-05, "loss": 0.3583, "step": 31688 }, { "epoch": 1.7744988240564452, "grad_norm": 1.1289743185043335, "learning_rate": 9.692763157894737e-05, "loss": 0.4102, "step": 31689 }, { "epoch": 1.7745548213685742, "grad_norm": 1.2529717683792114, "learning_rate": 9.692736842105263e-05, "loss": 0.3426, "step": 31690 }, { "epoch": 1.7746108186807033, "grad_norm": 1.2105739116668701, "learning_rate": 9.69271052631579e-05, "loss": 0.3889, "step": 31691 }, { "epoch": 1.7746668159928323, "grad_norm": 1.1601111888885498, "learning_rate": 9.692684210526317e-05, "loss": 0.3919, "step": 31692 }, { "epoch": 1.7747228133049613, "grad_norm": 1.2139086723327637, "learning_rate": 9.692657894736843e-05, "loss": 0.3789, "step": 31693 }, { "epoch": 1.7747788106170903, "grad_norm": 1.1330914497375488, "learning_rate": 9.692631578947369e-05, "loss": 0.6112, "step": 31694 }, { "epoch": 1.7748348079292193, "grad_norm": 1.6613965034484863, "learning_rate": 9.692605263157896e-05, "loss": 0.5128, "step": 31695 }, { "epoch": 1.7748908052413483, "grad_norm": 1.1898295879364014, "learning_rate": 9.692578947368422e-05, "loss": 0.4033, "step": 31696 }, { "epoch": 1.7749468025534774, "grad_norm": 1.0605305433273315, "learning_rate": 9.692552631578948e-05, "loss": 0.4209, "step": 31697 }, { "epoch": 1.7750027998656064, "grad_norm": 1.1873576641082764, "learning_rate": 9.692526315789474e-05, "loss": 0.4714, "step": 31698 }, { "epoch": 1.7750587971777354, "grad_norm": 1.2370866537094116, "learning_rate": 9.6925e-05, "loss": 0.3921, "step": 31699 }, { "epoch": 1.7751147944898644, "grad_norm": 1.138181447982788, "learning_rate": 9.692473684210527e-05, "loss": 0.4276, "step": 31700 }, { "epoch": 1.7751707918019934, "grad_norm": 1.3021535873413086, "learning_rate": 9.692447368421053e-05, "loss": 0.3714, "step": 31701 }, { "epoch": 1.7752267891141225, "grad_norm": 1.5191811323165894, "learning_rate": 9.692421052631579e-05, "loss": 0.4694, "step": 31702 }, { "epoch": 1.7752827864262515, "grad_norm": 1.4879953861236572, "learning_rate": 9.692394736842105e-05, "loss": 0.5514, "step": 31703 }, { "epoch": 1.7753387837383805, "grad_norm": 1.4922763109207153, "learning_rate": 9.692368421052632e-05, "loss": 0.637, "step": 31704 }, { "epoch": 1.7753947810505095, "grad_norm": 1.2755488157272339, "learning_rate": 9.692342105263158e-05, "loss": 0.473, "step": 31705 }, { "epoch": 1.7754507783626385, "grad_norm": 1.2252918481826782, "learning_rate": 9.692315789473684e-05, "loss": 0.3126, "step": 31706 }, { "epoch": 1.7755067756747676, "grad_norm": 1.1464699506759644, "learning_rate": 9.69228947368421e-05, "loss": 0.4335, "step": 31707 }, { "epoch": 1.7755627729868966, "grad_norm": 1.3130724430084229, "learning_rate": 9.692263157894738e-05, "loss": 0.3764, "step": 31708 }, { "epoch": 1.7756187702990256, "grad_norm": 2.37186861038208, "learning_rate": 9.692236842105264e-05, "loss": 0.4217, "step": 31709 }, { "epoch": 1.7756747676111546, "grad_norm": 1.2758076190948486, "learning_rate": 9.692210526315791e-05, "loss": 0.4001, "step": 31710 }, { "epoch": 1.7757307649232836, "grad_norm": 1.1145694255828857, "learning_rate": 9.692184210526316e-05, "loss": 0.5205, "step": 31711 }, { "epoch": 1.7757867622354127, "grad_norm": 1.9133871793746948, "learning_rate": 9.692157894736843e-05, "loss": 0.4757, "step": 31712 }, { "epoch": 1.7758427595475417, "grad_norm": 1.3190933465957642, "learning_rate": 9.692131578947369e-05, "loss": 0.5608, "step": 31713 }, { "epoch": 1.7758987568596707, "grad_norm": 2.1781973838806152, "learning_rate": 9.692105263157896e-05, "loss": 0.4955, "step": 31714 }, { "epoch": 1.7759547541717997, "grad_norm": 1.279032826423645, "learning_rate": 9.692078947368422e-05, "loss": 0.4946, "step": 31715 }, { "epoch": 1.7760107514839287, "grad_norm": 1.3642264604568481, "learning_rate": 9.692052631578948e-05, "loss": 0.6054, "step": 31716 }, { "epoch": 1.7760667487960577, "grad_norm": 1.3150238990783691, "learning_rate": 9.692026315789474e-05, "loss": 0.3583, "step": 31717 }, { "epoch": 1.7761227461081868, "grad_norm": 1.5483708381652832, "learning_rate": 9.692e-05, "loss": 0.5425, "step": 31718 }, { "epoch": 1.7761787434203158, "grad_norm": 1.29545259475708, "learning_rate": 9.691973684210527e-05, "loss": 0.569, "step": 31719 }, { "epoch": 1.7762347407324448, "grad_norm": 1.1991771459579468, "learning_rate": 9.691947368421053e-05, "loss": 0.5426, "step": 31720 }, { "epoch": 1.7762907380445738, "grad_norm": 1.1908551454544067, "learning_rate": 9.691921052631579e-05, "loss": 0.4921, "step": 31721 }, { "epoch": 1.7763467353567028, "grad_norm": 1.421525001525879, "learning_rate": 9.691894736842105e-05, "loss": 0.6218, "step": 31722 }, { "epoch": 1.7764027326688319, "grad_norm": 1.1339898109436035, "learning_rate": 9.691868421052633e-05, "loss": 0.4113, "step": 31723 }, { "epoch": 1.7764587299809609, "grad_norm": 1.093232274055481, "learning_rate": 9.691842105263159e-05, "loss": 0.362, "step": 31724 }, { "epoch": 1.77651472729309, "grad_norm": 1.2974497079849243, "learning_rate": 9.691815789473685e-05, "loss": 0.378, "step": 31725 }, { "epoch": 1.776570724605219, "grad_norm": 1.2342931032180786, "learning_rate": 9.69178947368421e-05, "loss": 0.4887, "step": 31726 }, { "epoch": 1.776626721917348, "grad_norm": 1.3725272417068481, "learning_rate": 9.691763157894738e-05, "loss": 0.4088, "step": 31727 }, { "epoch": 1.776682719229477, "grad_norm": 1.853804349899292, "learning_rate": 9.691736842105264e-05, "loss": 0.4583, "step": 31728 }, { "epoch": 1.776738716541606, "grad_norm": 1.1662169694900513, "learning_rate": 9.69171052631579e-05, "loss": 0.3787, "step": 31729 }, { "epoch": 1.776794713853735, "grad_norm": 1.2693150043487549, "learning_rate": 9.691684210526316e-05, "loss": 0.4017, "step": 31730 }, { "epoch": 1.776850711165864, "grad_norm": 1.52790367603302, "learning_rate": 9.691657894736843e-05, "loss": 0.5531, "step": 31731 }, { "epoch": 1.776906708477993, "grad_norm": 1.3320693969726562, "learning_rate": 9.691631578947369e-05, "loss": 0.4455, "step": 31732 }, { "epoch": 1.776962705790122, "grad_norm": 1.2238317728042603, "learning_rate": 9.691605263157896e-05, "loss": 0.5328, "step": 31733 }, { "epoch": 1.777018703102251, "grad_norm": 1.2253999710083008, "learning_rate": 9.691578947368421e-05, "loss": 0.4084, "step": 31734 }, { "epoch": 1.77707470041438, "grad_norm": 1.1530673503875732, "learning_rate": 9.691552631578947e-05, "loss": 0.3968, "step": 31735 }, { "epoch": 1.7771306977265091, "grad_norm": 1.1781492233276367, "learning_rate": 9.691526315789474e-05, "loss": 0.4335, "step": 31736 }, { "epoch": 1.7771866950386381, "grad_norm": 1.2266974449157715, "learning_rate": 9.6915e-05, "loss": 0.4273, "step": 31737 }, { "epoch": 1.7772426923507672, "grad_norm": 1.2361249923706055, "learning_rate": 9.691473684210526e-05, "loss": 0.4069, "step": 31738 }, { "epoch": 1.7772986896628962, "grad_norm": 1.9679622650146484, "learning_rate": 9.691447368421052e-05, "loss": 0.5161, "step": 31739 }, { "epoch": 1.7773546869750252, "grad_norm": 1.1759806871414185, "learning_rate": 9.69142105263158e-05, "loss": 0.4516, "step": 31740 }, { "epoch": 1.7774106842871542, "grad_norm": 1.8541347980499268, "learning_rate": 9.691394736842106e-05, "loss": 0.4834, "step": 31741 }, { "epoch": 1.7774666815992832, "grad_norm": 1.1747126579284668, "learning_rate": 9.691368421052633e-05, "loss": 0.3733, "step": 31742 }, { "epoch": 1.7775226789114122, "grad_norm": 1.3951380252838135, "learning_rate": 9.691342105263157e-05, "loss": 0.4915, "step": 31743 }, { "epoch": 1.7775786762235413, "grad_norm": 1.2836487293243408, "learning_rate": 9.691315789473685e-05, "loss": 0.4666, "step": 31744 }, { "epoch": 1.7776346735356703, "grad_norm": 1.1611849069595337, "learning_rate": 9.691289473684211e-05, "loss": 0.4434, "step": 31745 }, { "epoch": 1.7776906708477993, "grad_norm": 1.3393611907958984, "learning_rate": 9.691263157894738e-05, "loss": 0.4784, "step": 31746 }, { "epoch": 1.7777466681599283, "grad_norm": 1.1358977556228638, "learning_rate": 9.691236842105264e-05, "loss": 0.4414, "step": 31747 }, { "epoch": 1.7778026654720573, "grad_norm": 1.4379245042800903, "learning_rate": 9.69121052631579e-05, "loss": 0.6311, "step": 31748 }, { "epoch": 1.7778586627841864, "grad_norm": 1.019580364227295, "learning_rate": 9.691184210526316e-05, "loss": 0.3647, "step": 31749 }, { "epoch": 1.7779146600963154, "grad_norm": 1.4219930171966553, "learning_rate": 9.691157894736843e-05, "loss": 0.5379, "step": 31750 }, { "epoch": 1.7779706574084444, "grad_norm": 1.107913851737976, "learning_rate": 9.69113157894737e-05, "loss": 0.3647, "step": 31751 }, { "epoch": 1.7780266547205734, "grad_norm": 1.3188385963439941, "learning_rate": 9.691105263157895e-05, "loss": 0.3607, "step": 31752 }, { "epoch": 1.7780826520327024, "grad_norm": 1.370132565498352, "learning_rate": 9.691078947368421e-05, "loss": 0.4885, "step": 31753 }, { "epoch": 1.7781386493448315, "grad_norm": 1.3688284158706665, "learning_rate": 9.691052631578947e-05, "loss": 0.4173, "step": 31754 }, { "epoch": 1.7781946466569605, "grad_norm": 1.4062292575836182, "learning_rate": 9.691026315789475e-05, "loss": 0.3259, "step": 31755 }, { "epoch": 1.7782506439690895, "grad_norm": 1.1700189113616943, "learning_rate": 9.691e-05, "loss": 0.4681, "step": 31756 }, { "epoch": 1.7783066412812185, "grad_norm": 1.588115930557251, "learning_rate": 9.690973684210527e-05, "loss": 0.4796, "step": 31757 }, { "epoch": 1.7783626385933475, "grad_norm": 1.4379734992980957, "learning_rate": 9.690947368421053e-05, "loss": 0.5249, "step": 31758 }, { "epoch": 1.7784186359054766, "grad_norm": 1.4565213918685913, "learning_rate": 9.69092105263158e-05, "loss": 0.4669, "step": 31759 }, { "epoch": 1.7784746332176056, "grad_norm": 1.3071175813674927, "learning_rate": 9.690894736842106e-05, "loss": 0.4272, "step": 31760 }, { "epoch": 1.7785306305297346, "grad_norm": 1.2956658601760864, "learning_rate": 9.690868421052632e-05, "loss": 0.4045, "step": 31761 }, { "epoch": 1.7785866278418636, "grad_norm": 1.0321497917175293, "learning_rate": 9.690842105263158e-05, "loss": 0.3443, "step": 31762 }, { "epoch": 1.7786426251539926, "grad_norm": 1.2439674139022827, "learning_rate": 9.690815789473685e-05, "loss": 0.422, "step": 31763 }, { "epoch": 1.7786986224661216, "grad_norm": 1.3908679485321045, "learning_rate": 9.690789473684211e-05, "loss": 0.5081, "step": 31764 }, { "epoch": 1.7787546197782507, "grad_norm": 1.2918809652328491, "learning_rate": 9.690763157894738e-05, "loss": 0.5505, "step": 31765 }, { "epoch": 1.7788106170903797, "grad_norm": 1.4847381114959717, "learning_rate": 9.690736842105263e-05, "loss": 0.6401, "step": 31766 }, { "epoch": 1.7788666144025087, "grad_norm": 1.180594801902771, "learning_rate": 9.69071052631579e-05, "loss": 0.3993, "step": 31767 }, { "epoch": 1.7789226117146377, "grad_norm": 1.1299070119857788, "learning_rate": 9.690684210526316e-05, "loss": 0.3554, "step": 31768 }, { "epoch": 1.7789786090267667, "grad_norm": 1.3463633060455322, "learning_rate": 9.690657894736842e-05, "loss": 0.3062, "step": 31769 }, { "epoch": 1.7790346063388958, "grad_norm": 0.982157826423645, "learning_rate": 9.69063157894737e-05, "loss": 0.3706, "step": 31770 }, { "epoch": 1.7790906036510248, "grad_norm": 1.5274629592895508, "learning_rate": 9.690605263157894e-05, "loss": 0.4789, "step": 31771 }, { "epoch": 1.7791466009631538, "grad_norm": 1.5049922466278076, "learning_rate": 9.690578947368422e-05, "loss": 0.5137, "step": 31772 }, { "epoch": 1.7792025982752828, "grad_norm": 1.152225375175476, "learning_rate": 9.690552631578948e-05, "loss": 0.4106, "step": 31773 }, { "epoch": 1.7792585955874118, "grad_norm": 1.4260683059692383, "learning_rate": 9.690526315789475e-05, "loss": 0.5347, "step": 31774 }, { "epoch": 1.7793145928995409, "grad_norm": 2.1656501293182373, "learning_rate": 9.6905e-05, "loss": 0.6394, "step": 31775 }, { "epoch": 1.7793705902116699, "grad_norm": 1.3388731479644775, "learning_rate": 9.690473684210527e-05, "loss": 0.4603, "step": 31776 }, { "epoch": 1.779426587523799, "grad_norm": 1.748445749282837, "learning_rate": 9.690447368421053e-05, "loss": 0.5135, "step": 31777 }, { "epoch": 1.779482584835928, "grad_norm": 1.1211421489715576, "learning_rate": 9.69042105263158e-05, "loss": 0.3823, "step": 31778 }, { "epoch": 1.779538582148057, "grad_norm": 1.2308905124664307, "learning_rate": 9.690394736842106e-05, "loss": 0.6448, "step": 31779 }, { "epoch": 1.779594579460186, "grad_norm": 1.1907392740249634, "learning_rate": 9.690368421052632e-05, "loss": 0.4445, "step": 31780 }, { "epoch": 1.779650576772315, "grad_norm": 1.2433491945266724, "learning_rate": 9.690342105263158e-05, "loss": 0.4855, "step": 31781 }, { "epoch": 1.779706574084444, "grad_norm": 1.5997979640960693, "learning_rate": 9.690315789473685e-05, "loss": 0.5141, "step": 31782 }, { "epoch": 1.779762571396573, "grad_norm": 1.5376241207122803, "learning_rate": 9.690289473684211e-05, "loss": 0.4068, "step": 31783 }, { "epoch": 1.779818568708702, "grad_norm": 1.3327856063842773, "learning_rate": 9.690263157894737e-05, "loss": 0.4554, "step": 31784 }, { "epoch": 1.779874566020831, "grad_norm": 1.2942867279052734, "learning_rate": 9.690236842105263e-05, "loss": 0.4102, "step": 31785 }, { "epoch": 1.77993056333296, "grad_norm": 1.1606141328811646, "learning_rate": 9.690210526315789e-05, "loss": 0.4507, "step": 31786 }, { "epoch": 1.779986560645089, "grad_norm": 1.7103277444839478, "learning_rate": 9.690184210526317e-05, "loss": 0.6373, "step": 31787 }, { "epoch": 1.780042557957218, "grad_norm": 1.3943488597869873, "learning_rate": 9.690157894736843e-05, "loss": 0.4965, "step": 31788 }, { "epoch": 1.7800985552693471, "grad_norm": 1.1858793497085571, "learning_rate": 9.690131578947369e-05, "loss": 0.4665, "step": 31789 }, { "epoch": 1.7801545525814761, "grad_norm": 1.5315145254135132, "learning_rate": 9.690105263157894e-05, "loss": 0.4587, "step": 31790 }, { "epoch": 1.7802105498936052, "grad_norm": 1.3065170049667358, "learning_rate": 9.690078947368422e-05, "loss": 0.4388, "step": 31791 }, { "epoch": 1.7802665472057342, "grad_norm": 1.2521973848342896, "learning_rate": 9.690052631578948e-05, "loss": 0.3834, "step": 31792 }, { "epoch": 1.7803225445178632, "grad_norm": 1.2697452306747437, "learning_rate": 9.690026315789474e-05, "loss": 0.5289, "step": 31793 }, { "epoch": 1.7803785418299922, "grad_norm": 1.2355313301086426, "learning_rate": 9.69e-05, "loss": 0.4728, "step": 31794 }, { "epoch": 1.7804345391421212, "grad_norm": 1.1875896453857422, "learning_rate": 9.689973684210527e-05, "loss": 0.4261, "step": 31795 }, { "epoch": 1.7804905364542503, "grad_norm": 1.2282297611236572, "learning_rate": 9.689947368421053e-05, "loss": 0.5512, "step": 31796 }, { "epoch": 1.7805465337663793, "grad_norm": 1.7952834367752075, "learning_rate": 9.68992105263158e-05, "loss": 0.4245, "step": 31797 }, { "epoch": 1.7806025310785083, "grad_norm": 2.2149720191955566, "learning_rate": 9.689894736842105e-05, "loss": 0.4552, "step": 31798 }, { "epoch": 1.7806585283906373, "grad_norm": 1.2255440950393677, "learning_rate": 9.689868421052632e-05, "loss": 0.4927, "step": 31799 }, { "epoch": 1.7807145257027663, "grad_norm": 1.1667736768722534, "learning_rate": 9.689842105263158e-05, "loss": 0.3893, "step": 31800 }, { "epoch": 1.7807705230148954, "grad_norm": 1.3435007333755493, "learning_rate": 9.689815789473686e-05, "loss": 0.4568, "step": 31801 }, { "epoch": 1.7808265203270244, "grad_norm": 1.2837270498275757, "learning_rate": 9.689789473684212e-05, "loss": 0.4608, "step": 31802 }, { "epoch": 1.7808825176391534, "grad_norm": 1.213241457939148, "learning_rate": 9.689763157894736e-05, "loss": 0.4059, "step": 31803 }, { "epoch": 1.7809385149512824, "grad_norm": 1.6467320919036865, "learning_rate": 9.689736842105264e-05, "loss": 0.4102, "step": 31804 }, { "epoch": 1.7809945122634114, "grad_norm": 1.0240404605865479, "learning_rate": 9.68971052631579e-05, "loss": 0.3627, "step": 31805 }, { "epoch": 1.7810505095755405, "grad_norm": 1.316407561302185, "learning_rate": 9.689684210526317e-05, "loss": 0.4339, "step": 31806 }, { "epoch": 1.7811065068876695, "grad_norm": 1.584269404411316, "learning_rate": 9.689657894736843e-05, "loss": 0.5503, "step": 31807 }, { "epoch": 1.7811625041997985, "grad_norm": 1.0722568035125732, "learning_rate": 9.689631578947369e-05, "loss": 0.4073, "step": 31808 }, { "epoch": 1.7812185015119275, "grad_norm": 1.6375370025634766, "learning_rate": 9.689605263157895e-05, "loss": 0.6101, "step": 31809 }, { "epoch": 1.7812744988240565, "grad_norm": 1.1459671258926392, "learning_rate": 9.689578947368422e-05, "loss": 0.4613, "step": 31810 }, { "epoch": 1.7813304961361855, "grad_norm": 1.2400115728378296, "learning_rate": 9.689552631578948e-05, "loss": 0.4566, "step": 31811 }, { "epoch": 1.7813864934483146, "grad_norm": 1.147714614868164, "learning_rate": 9.689526315789474e-05, "loss": 0.4565, "step": 31812 }, { "epoch": 1.7814424907604436, "grad_norm": 1.325195550918579, "learning_rate": 9.6895e-05, "loss": 0.4775, "step": 31813 }, { "epoch": 1.7814984880725726, "grad_norm": 1.9836220741271973, "learning_rate": 9.689473684210527e-05, "loss": 0.5257, "step": 31814 }, { "epoch": 1.7815544853847016, "grad_norm": 1.124781847000122, "learning_rate": 9.689447368421053e-05, "loss": 0.391, "step": 31815 }, { "epoch": 1.7816104826968306, "grad_norm": 1.200110673904419, "learning_rate": 9.689421052631579e-05, "loss": 0.4523, "step": 31816 }, { "epoch": 1.7816664800089597, "grad_norm": 1.1694585084915161, "learning_rate": 9.689394736842105e-05, "loss": 0.3514, "step": 31817 }, { "epoch": 1.7817224773210887, "grad_norm": 1.551188588142395, "learning_rate": 9.689368421052633e-05, "loss": 0.5292, "step": 31818 }, { "epoch": 1.7817784746332177, "grad_norm": 1.1470431089401245, "learning_rate": 9.689342105263159e-05, "loss": 0.5614, "step": 31819 }, { "epoch": 1.7818344719453467, "grad_norm": 1.3186626434326172, "learning_rate": 9.689315789473684e-05, "loss": 0.4121, "step": 31820 }, { "epoch": 1.7818904692574757, "grad_norm": 0.9956709742546082, "learning_rate": 9.68928947368421e-05, "loss": 0.3453, "step": 31821 }, { "epoch": 1.7819464665696048, "grad_norm": 1.278331995010376, "learning_rate": 9.689263157894736e-05, "loss": 0.3733, "step": 31822 }, { "epoch": 1.7820024638817338, "grad_norm": 1.5626790523529053, "learning_rate": 9.689236842105264e-05, "loss": 0.477, "step": 31823 }, { "epoch": 1.7820584611938628, "grad_norm": 1.3329910039901733, "learning_rate": 9.68921052631579e-05, "loss": 0.5075, "step": 31824 }, { "epoch": 1.7821144585059918, "grad_norm": 1.240760087966919, "learning_rate": 9.689184210526317e-05, "loss": 0.4999, "step": 31825 }, { "epoch": 1.7821704558181208, "grad_norm": 1.1359931230545044, "learning_rate": 9.689157894736842e-05, "loss": 0.3785, "step": 31826 }, { "epoch": 1.7822264531302499, "grad_norm": 12.997991561889648, "learning_rate": 9.689131578947369e-05, "loss": 0.5553, "step": 31827 }, { "epoch": 1.7822824504423789, "grad_norm": 1.2757093906402588, "learning_rate": 9.689105263157895e-05, "loss": 0.4396, "step": 31828 }, { "epoch": 1.782338447754508, "grad_norm": 1.2319806814193726, "learning_rate": 9.689078947368422e-05, "loss": 0.4742, "step": 31829 }, { "epoch": 1.782394445066637, "grad_norm": 1.4098560810089111, "learning_rate": 9.689052631578947e-05, "loss": 0.4182, "step": 31830 }, { "epoch": 1.782450442378766, "grad_norm": 1.340309500694275, "learning_rate": 9.689026315789474e-05, "loss": 0.4386, "step": 31831 }, { "epoch": 1.782506439690895, "grad_norm": 1.3434604406356812, "learning_rate": 9.689e-05, "loss": 0.4141, "step": 31832 }, { "epoch": 1.782562437003024, "grad_norm": 1.1592700481414795, "learning_rate": 9.688973684210528e-05, "loss": 0.3565, "step": 31833 }, { "epoch": 1.782618434315153, "grad_norm": 1.4475698471069336, "learning_rate": 9.688947368421054e-05, "loss": 0.526, "step": 31834 }, { "epoch": 1.782674431627282, "grad_norm": 1.2637836933135986, "learning_rate": 9.68892105263158e-05, "loss": 0.5198, "step": 31835 }, { "epoch": 1.782730428939411, "grad_norm": 1.7472718954086304, "learning_rate": 9.688894736842105e-05, "loss": 0.5239, "step": 31836 }, { "epoch": 1.78278642625154, "grad_norm": 1.2594273090362549, "learning_rate": 9.688868421052633e-05, "loss": 0.6498, "step": 31837 }, { "epoch": 1.782842423563669, "grad_norm": 1.727853775024414, "learning_rate": 9.688842105263159e-05, "loss": 0.6166, "step": 31838 }, { "epoch": 1.782898420875798, "grad_norm": 1.330910086631775, "learning_rate": 9.688815789473685e-05, "loss": 0.5106, "step": 31839 }, { "epoch": 1.782954418187927, "grad_norm": 1.4381299018859863, "learning_rate": 9.688789473684211e-05, "loss": 0.4625, "step": 31840 }, { "epoch": 1.7830104155000561, "grad_norm": 1.1630643606185913, "learning_rate": 9.688763157894737e-05, "loss": 0.5307, "step": 31841 }, { "epoch": 1.7830664128121851, "grad_norm": 1.0656017065048218, "learning_rate": 9.688736842105264e-05, "loss": 0.431, "step": 31842 }, { "epoch": 1.7831224101243142, "grad_norm": 1.2093061208724976, "learning_rate": 9.68871052631579e-05, "loss": 0.3468, "step": 31843 }, { "epoch": 1.7831784074364432, "grad_norm": 1.1307623386383057, "learning_rate": 9.688684210526316e-05, "loss": 0.3215, "step": 31844 }, { "epoch": 1.7832344047485722, "grad_norm": 1.2576932907104492, "learning_rate": 9.688657894736842e-05, "loss": 0.4666, "step": 31845 }, { "epoch": 1.7832904020607012, "grad_norm": 1.3892356157302856, "learning_rate": 9.688631578947369e-05, "loss": 0.3826, "step": 31846 }, { "epoch": 1.7833463993728302, "grad_norm": 1.03448486328125, "learning_rate": 9.688605263157895e-05, "loss": 0.3517, "step": 31847 }, { "epoch": 1.7834023966849593, "grad_norm": 1.18561851978302, "learning_rate": 9.688578947368421e-05, "loss": 0.4066, "step": 31848 }, { "epoch": 1.7834583939970883, "grad_norm": 1.6200153827667236, "learning_rate": 9.688552631578947e-05, "loss": 0.4927, "step": 31849 }, { "epoch": 1.7835143913092173, "grad_norm": 1.7550350427627563, "learning_rate": 9.688526315789475e-05, "loss": 0.4092, "step": 31850 }, { "epoch": 1.7835703886213463, "grad_norm": 1.1973174810409546, "learning_rate": 9.6885e-05, "loss": 0.4021, "step": 31851 }, { "epoch": 1.7836263859334753, "grad_norm": 1.3861836194992065, "learning_rate": 9.688473684210528e-05, "loss": 0.5933, "step": 31852 }, { "epoch": 1.7836823832456044, "grad_norm": 1.1747161149978638, "learning_rate": 9.688447368421052e-05, "loss": 0.5082, "step": 31853 }, { "epoch": 1.7837383805577334, "grad_norm": 1.2632472515106201, "learning_rate": 9.68842105263158e-05, "loss": 0.4313, "step": 31854 }, { "epoch": 1.7837943778698624, "grad_norm": 1.205872893333435, "learning_rate": 9.688394736842106e-05, "loss": 0.4356, "step": 31855 }, { "epoch": 1.7838503751819914, "grad_norm": 1.2383676767349243, "learning_rate": 9.688368421052632e-05, "loss": 0.3702, "step": 31856 }, { "epoch": 1.7839063724941204, "grad_norm": 1.3884590864181519, "learning_rate": 9.688342105263159e-05, "loss": 0.6019, "step": 31857 }, { "epoch": 1.7839623698062494, "grad_norm": 1.1572792530059814, "learning_rate": 9.688315789473684e-05, "loss": 0.3724, "step": 31858 }, { "epoch": 1.7840183671183785, "grad_norm": 1.1126223802566528, "learning_rate": 9.688289473684211e-05, "loss": 0.4468, "step": 31859 }, { "epoch": 1.7840743644305075, "grad_norm": 1.447487235069275, "learning_rate": 9.688263157894737e-05, "loss": 0.4622, "step": 31860 }, { "epoch": 1.7841303617426365, "grad_norm": 1.2140978574752808, "learning_rate": 9.688236842105264e-05, "loss": 0.4691, "step": 31861 }, { "epoch": 1.7841863590547655, "grad_norm": 1.0958054065704346, "learning_rate": 9.68821052631579e-05, "loss": 0.5264, "step": 31862 }, { "epoch": 1.7842423563668945, "grad_norm": 1.5366441011428833, "learning_rate": 9.688184210526316e-05, "loss": 0.4639, "step": 31863 }, { "epoch": 1.7842983536790236, "grad_norm": 1.129201054573059, "learning_rate": 9.688157894736842e-05, "loss": 0.4285, "step": 31864 }, { "epoch": 1.7843543509911526, "grad_norm": 1.1247044801712036, "learning_rate": 9.68813157894737e-05, "loss": 0.3693, "step": 31865 }, { "epoch": 1.7844103483032816, "grad_norm": 1.5763261318206787, "learning_rate": 9.688105263157896e-05, "loss": 0.5131, "step": 31866 }, { "epoch": 1.7844663456154106, "grad_norm": 1.141681432723999, "learning_rate": 9.688078947368421e-05, "loss": 0.5572, "step": 31867 }, { "epoch": 1.7845223429275396, "grad_norm": 1.4665062427520752, "learning_rate": 9.688052631578947e-05, "loss": 0.5142, "step": 31868 }, { "epoch": 1.7845783402396687, "grad_norm": 1.1503149271011353, "learning_rate": 9.688026315789475e-05, "loss": 0.3358, "step": 31869 }, { "epoch": 1.7846343375517977, "grad_norm": 1.065591812133789, "learning_rate": 9.688000000000001e-05, "loss": 0.3363, "step": 31870 }, { "epoch": 1.7846903348639267, "grad_norm": 1.360927700996399, "learning_rate": 9.687973684210527e-05, "loss": 0.3881, "step": 31871 }, { "epoch": 1.7847463321760555, "grad_norm": 1.1576305627822876, "learning_rate": 9.687947368421053e-05, "loss": 0.4286, "step": 31872 }, { "epoch": 1.7848023294881845, "grad_norm": 1.765459656715393, "learning_rate": 9.687921052631579e-05, "loss": 0.6039, "step": 31873 }, { "epoch": 1.7848583268003135, "grad_norm": 1.1548362970352173, "learning_rate": 9.687894736842106e-05, "loss": 0.5625, "step": 31874 }, { "epoch": 1.7849143241124426, "grad_norm": 1.1519672870635986, "learning_rate": 9.687868421052632e-05, "loss": 0.4924, "step": 31875 }, { "epoch": 1.7849703214245716, "grad_norm": 1.2811933755874634, "learning_rate": 9.687842105263158e-05, "loss": 0.492, "step": 31876 }, { "epoch": 1.7850263187367006, "grad_norm": 1.201781153678894, "learning_rate": 9.687815789473684e-05, "loss": 0.3948, "step": 31877 }, { "epoch": 1.7850823160488296, "grad_norm": 1.2015191316604614, "learning_rate": 9.687789473684211e-05, "loss": 0.3957, "step": 31878 }, { "epoch": 1.7851383133609586, "grad_norm": 1.3081945180892944, "learning_rate": 9.687763157894737e-05, "loss": 0.4933, "step": 31879 }, { "epoch": 1.7851943106730876, "grad_norm": 1.3227640390396118, "learning_rate": 9.687736842105265e-05, "loss": 0.4365, "step": 31880 }, { "epoch": 1.7852503079852167, "grad_norm": 1.1125644445419312, "learning_rate": 9.687710526315789e-05, "loss": 0.4648, "step": 31881 }, { "epoch": 1.7853063052973457, "grad_norm": 1.2398297786712646, "learning_rate": 9.687684210526316e-05, "loss": 0.387, "step": 31882 }, { "epoch": 1.7853623026094747, "grad_norm": 1.2380250692367554, "learning_rate": 9.687657894736842e-05, "loss": 0.4519, "step": 31883 }, { "epoch": 1.7854182999216037, "grad_norm": 1.1946462392807007, "learning_rate": 9.68763157894737e-05, "loss": 0.476, "step": 31884 }, { "epoch": 1.7854742972337327, "grad_norm": 1.0793193578720093, "learning_rate": 9.687605263157894e-05, "loss": 0.4237, "step": 31885 }, { "epoch": 1.7855302945458618, "grad_norm": 1.292043685913086, "learning_rate": 9.687578947368422e-05, "loss": 0.4286, "step": 31886 }, { "epoch": 1.7855862918579908, "grad_norm": 1.3193550109863281, "learning_rate": 9.687552631578948e-05, "loss": 0.4822, "step": 31887 }, { "epoch": 1.7856422891701198, "grad_norm": 1.0490700006484985, "learning_rate": 9.687526315789475e-05, "loss": 0.4018, "step": 31888 }, { "epoch": 1.7856982864822488, "grad_norm": 2.052964210510254, "learning_rate": 9.687500000000001e-05, "loss": 0.5584, "step": 31889 }, { "epoch": 1.7857542837943778, "grad_norm": 1.2843341827392578, "learning_rate": 9.687473684210526e-05, "loss": 0.4075, "step": 31890 }, { "epoch": 1.7858102811065069, "grad_norm": 1.354588270187378, "learning_rate": 9.687447368421053e-05, "loss": 0.3725, "step": 31891 }, { "epoch": 1.7858662784186359, "grad_norm": 1.2183945178985596, "learning_rate": 9.687421052631579e-05, "loss": 0.4345, "step": 31892 }, { "epoch": 1.785922275730765, "grad_norm": 1.8452256917953491, "learning_rate": 9.687394736842106e-05, "loss": 0.4525, "step": 31893 }, { "epoch": 1.785978273042894, "grad_norm": 1.266893744468689, "learning_rate": 9.687368421052632e-05, "loss": 0.4361, "step": 31894 }, { "epoch": 1.786034270355023, "grad_norm": 1.3150335550308228, "learning_rate": 9.687342105263158e-05, "loss": 0.4323, "step": 31895 }, { "epoch": 1.786090267667152, "grad_norm": 1.535183072090149, "learning_rate": 9.687315789473684e-05, "loss": 0.4749, "step": 31896 }, { "epoch": 1.786146264979281, "grad_norm": 1.301011085510254, "learning_rate": 9.687289473684212e-05, "loss": 0.4234, "step": 31897 }, { "epoch": 1.78620226229141, "grad_norm": 1.994390606880188, "learning_rate": 9.687263157894737e-05, "loss": 0.4824, "step": 31898 }, { "epoch": 1.786258259603539, "grad_norm": 1.3605796098709106, "learning_rate": 9.687236842105263e-05, "loss": 0.457, "step": 31899 }, { "epoch": 1.786314256915668, "grad_norm": 1.096146583557129, "learning_rate": 9.68721052631579e-05, "loss": 0.3761, "step": 31900 }, { "epoch": 1.786370254227797, "grad_norm": 1.2055675983428955, "learning_rate": 9.687184210526317e-05, "loss": 0.4838, "step": 31901 }, { "epoch": 1.786426251539926, "grad_norm": 1.2578544616699219, "learning_rate": 9.687157894736843e-05, "loss": 0.4031, "step": 31902 }, { "epoch": 1.786482248852055, "grad_norm": 1.4588252305984497, "learning_rate": 9.687131578947369e-05, "loss": 0.4556, "step": 31903 }, { "epoch": 1.786538246164184, "grad_norm": 2.3814427852630615, "learning_rate": 9.687105263157895e-05, "loss": 0.4625, "step": 31904 }, { "epoch": 1.7865942434763131, "grad_norm": 1.379195213317871, "learning_rate": 9.687078947368422e-05, "loss": 0.495, "step": 31905 }, { "epoch": 1.7866502407884421, "grad_norm": 1.1372191905975342, "learning_rate": 9.687052631578948e-05, "loss": 0.3828, "step": 31906 }, { "epoch": 1.7867062381005712, "grad_norm": 1.2545710802078247, "learning_rate": 9.687026315789474e-05, "loss": 0.6164, "step": 31907 }, { "epoch": 1.7867622354127002, "grad_norm": 1.309998631477356, "learning_rate": 9.687e-05, "loss": 0.4626, "step": 31908 }, { "epoch": 1.7868182327248292, "grad_norm": 1.2679873704910278, "learning_rate": 9.686973684210526e-05, "loss": 0.4296, "step": 31909 }, { "epoch": 1.7868742300369582, "grad_norm": 1.700130581855774, "learning_rate": 9.686947368421053e-05, "loss": 0.5619, "step": 31910 }, { "epoch": 1.7869302273490872, "grad_norm": 1.367583155632019, "learning_rate": 9.686921052631579e-05, "loss": 0.6026, "step": 31911 }, { "epoch": 1.7869862246612163, "grad_norm": 1.5523277521133423, "learning_rate": 9.686894736842107e-05, "loss": 0.3854, "step": 31912 }, { "epoch": 1.7870422219733453, "grad_norm": 1.2146656513214111, "learning_rate": 9.686868421052631e-05, "loss": 0.4527, "step": 31913 }, { "epoch": 1.7870982192854743, "grad_norm": 1.5013121366500854, "learning_rate": 9.686842105263158e-05, "loss": 0.4224, "step": 31914 }, { "epoch": 1.7871542165976033, "grad_norm": 1.2803595066070557, "learning_rate": 9.686815789473684e-05, "loss": 0.4494, "step": 31915 }, { "epoch": 1.7872102139097323, "grad_norm": 1.6458550691604614, "learning_rate": 9.686789473684212e-05, "loss": 0.4969, "step": 31916 }, { "epoch": 1.7872662112218614, "grad_norm": 1.4115514755249023, "learning_rate": 9.686763157894738e-05, "loss": 0.4788, "step": 31917 }, { "epoch": 1.7873222085339904, "grad_norm": 1.4544378519058228, "learning_rate": 9.686736842105264e-05, "loss": 0.5141, "step": 31918 }, { "epoch": 1.7873782058461194, "grad_norm": 1.1197162866592407, "learning_rate": 9.68671052631579e-05, "loss": 0.4807, "step": 31919 }, { "epoch": 1.7874342031582484, "grad_norm": 1.3073194026947021, "learning_rate": 9.686684210526317e-05, "loss": 0.5076, "step": 31920 }, { "epoch": 1.7874902004703774, "grad_norm": 1.1209121942520142, "learning_rate": 9.686657894736843e-05, "loss": 0.4475, "step": 31921 }, { "epoch": 1.7875461977825065, "grad_norm": 1.3270071744918823, "learning_rate": 9.686631578947369e-05, "loss": 0.5461, "step": 31922 }, { "epoch": 1.7876021950946355, "grad_norm": 1.3773369789123535, "learning_rate": 9.686605263157895e-05, "loss": 0.3995, "step": 31923 }, { "epoch": 1.7876581924067645, "grad_norm": 1.25208580493927, "learning_rate": 9.686578947368421e-05, "loss": 0.422, "step": 31924 }, { "epoch": 1.7877141897188935, "grad_norm": 1.4375327825546265, "learning_rate": 9.686552631578948e-05, "loss": 0.4481, "step": 31925 }, { "epoch": 1.7877701870310225, "grad_norm": 1.1717000007629395, "learning_rate": 9.686526315789474e-05, "loss": 0.3859, "step": 31926 }, { "epoch": 1.7878261843431515, "grad_norm": 1.3534276485443115, "learning_rate": 9.6865e-05, "loss": 0.5196, "step": 31927 }, { "epoch": 1.7878821816552806, "grad_norm": 1.1850370168685913, "learning_rate": 9.686473684210526e-05, "loss": 0.4482, "step": 31928 }, { "epoch": 1.7879381789674096, "grad_norm": 1.0730195045471191, "learning_rate": 9.686447368421053e-05, "loss": 0.3513, "step": 31929 }, { "epoch": 1.7879941762795386, "grad_norm": 1.3491233587265015, "learning_rate": 9.68642105263158e-05, "loss": 0.466, "step": 31930 }, { "epoch": 1.7880501735916676, "grad_norm": 1.3401660919189453, "learning_rate": 9.686394736842105e-05, "loss": 0.5137, "step": 31931 }, { "epoch": 1.7881061709037966, "grad_norm": 1.0419365167617798, "learning_rate": 9.686368421052631e-05, "loss": 0.3629, "step": 31932 }, { "epoch": 1.7881621682159257, "grad_norm": 1.366532564163208, "learning_rate": 9.686342105263159e-05, "loss": 0.4584, "step": 31933 }, { "epoch": 1.7882181655280547, "grad_norm": 1.2646071910858154, "learning_rate": 9.686315789473685e-05, "loss": 0.5024, "step": 31934 }, { "epoch": 1.7882741628401837, "grad_norm": 1.360987663269043, "learning_rate": 9.686289473684212e-05, "loss": 0.3943, "step": 31935 }, { "epoch": 1.7883301601523127, "grad_norm": 1.5033934116363525, "learning_rate": 9.686263157894737e-05, "loss": 0.4614, "step": 31936 }, { "epoch": 1.7883861574644417, "grad_norm": 1.2284682989120483, "learning_rate": 9.686236842105264e-05, "loss": 0.4583, "step": 31937 }, { "epoch": 1.7884421547765708, "grad_norm": 1.2431849241256714, "learning_rate": 9.68621052631579e-05, "loss": 0.4344, "step": 31938 }, { "epoch": 1.7884981520886998, "grad_norm": 1.0632922649383545, "learning_rate": 9.686184210526317e-05, "loss": 0.3701, "step": 31939 }, { "epoch": 1.7885541494008288, "grad_norm": 1.4324793815612793, "learning_rate": 9.686157894736842e-05, "loss": 0.5288, "step": 31940 }, { "epoch": 1.7886101467129578, "grad_norm": 1.2565683126449585, "learning_rate": 9.686131578947368e-05, "loss": 0.4015, "step": 31941 }, { "epoch": 1.7886661440250868, "grad_norm": 1.568869709968567, "learning_rate": 9.686105263157895e-05, "loss": 0.4316, "step": 31942 }, { "epoch": 1.7887221413372159, "grad_norm": 1.173990249633789, "learning_rate": 9.686078947368421e-05, "loss": 0.4195, "step": 31943 }, { "epoch": 1.7887781386493449, "grad_norm": 1.4450441598892212, "learning_rate": 9.686052631578948e-05, "loss": 0.4647, "step": 31944 }, { "epoch": 1.788834135961474, "grad_norm": 1.2189844846725464, "learning_rate": 9.686026315789473e-05, "loss": 0.5044, "step": 31945 }, { "epoch": 1.788890133273603, "grad_norm": 1.457152009010315, "learning_rate": 9.686e-05, "loss": 0.5644, "step": 31946 }, { "epoch": 1.788946130585732, "grad_norm": 1.3943636417388916, "learning_rate": 9.685973684210526e-05, "loss": 0.363, "step": 31947 }, { "epoch": 1.789002127897861, "grad_norm": 1.5694568157196045, "learning_rate": 9.685947368421054e-05, "loss": 0.416, "step": 31948 }, { "epoch": 1.78905812520999, "grad_norm": 1.3233616352081299, "learning_rate": 9.68592105263158e-05, "loss": 0.5417, "step": 31949 }, { "epoch": 1.789114122522119, "grad_norm": 1.9907273054122925, "learning_rate": 9.685894736842106e-05, "loss": 0.4553, "step": 31950 }, { "epoch": 1.789170119834248, "grad_norm": 1.4094562530517578, "learning_rate": 9.685868421052632e-05, "loss": 0.5634, "step": 31951 }, { "epoch": 1.789226117146377, "grad_norm": 1.2532178163528442, "learning_rate": 9.685842105263159e-05, "loss": 0.4212, "step": 31952 }, { "epoch": 1.789282114458506, "grad_norm": 1.1450448036193848, "learning_rate": 9.685815789473685e-05, "loss": 0.4174, "step": 31953 }, { "epoch": 1.789338111770635, "grad_norm": 5.6791582107543945, "learning_rate": 9.685789473684211e-05, "loss": 0.4486, "step": 31954 }, { "epoch": 1.7893941090827639, "grad_norm": 1.223522424697876, "learning_rate": 9.685763157894737e-05, "loss": 0.5301, "step": 31955 }, { "epoch": 1.7894501063948929, "grad_norm": 1.2860651016235352, "learning_rate": 9.685736842105264e-05, "loss": 0.6105, "step": 31956 }, { "epoch": 1.789506103707022, "grad_norm": 1.1884591579437256, "learning_rate": 9.68571052631579e-05, "loss": 0.4867, "step": 31957 }, { "epoch": 1.789562101019151, "grad_norm": 1.3899544477462769, "learning_rate": 9.685684210526316e-05, "loss": 0.5301, "step": 31958 }, { "epoch": 1.78961809833128, "grad_norm": 1.0374916791915894, "learning_rate": 9.685657894736842e-05, "loss": 0.3354, "step": 31959 }, { "epoch": 1.789674095643409, "grad_norm": 1.2505470514297485, "learning_rate": 9.685631578947368e-05, "loss": 0.4811, "step": 31960 }, { "epoch": 1.789730092955538, "grad_norm": 3.660677671432495, "learning_rate": 9.685605263157895e-05, "loss": 0.4431, "step": 31961 }, { "epoch": 1.789786090267667, "grad_norm": 1.2718796730041504, "learning_rate": 9.685578947368421e-05, "loss": 0.439, "step": 31962 }, { "epoch": 1.789842087579796, "grad_norm": 1.0270181894302368, "learning_rate": 9.685552631578947e-05, "loss": 0.3657, "step": 31963 }, { "epoch": 1.789898084891925, "grad_norm": 1.5077179670333862, "learning_rate": 9.685526315789473e-05, "loss": 0.7349, "step": 31964 }, { "epoch": 1.789954082204054, "grad_norm": 1.1391503810882568, "learning_rate": 9.685500000000001e-05, "loss": 0.3734, "step": 31965 }, { "epoch": 1.790010079516183, "grad_norm": 1.2708402872085571, "learning_rate": 9.685473684210527e-05, "loss": 0.4743, "step": 31966 }, { "epoch": 1.790066076828312, "grad_norm": 1.5185188055038452, "learning_rate": 9.685447368421054e-05, "loss": 0.3838, "step": 31967 }, { "epoch": 1.790122074140441, "grad_norm": 1.0767401456832886, "learning_rate": 9.685421052631579e-05, "loss": 0.332, "step": 31968 }, { "epoch": 1.7901780714525701, "grad_norm": 2.1739299297332764, "learning_rate": 9.685394736842106e-05, "loss": 0.5035, "step": 31969 }, { "epoch": 1.7902340687646991, "grad_norm": 1.1915934085845947, "learning_rate": 9.685368421052632e-05, "loss": 0.3914, "step": 31970 }, { "epoch": 1.7902900660768282, "grad_norm": 1.3421036005020142, "learning_rate": 9.685342105263159e-05, "loss": 0.4314, "step": 31971 }, { "epoch": 1.7903460633889572, "grad_norm": 1.072020411491394, "learning_rate": 9.685315789473685e-05, "loss": 0.3705, "step": 31972 }, { "epoch": 1.7904020607010862, "grad_norm": 1.4651178121566772, "learning_rate": 9.685289473684211e-05, "loss": 0.4829, "step": 31973 }, { "epoch": 1.7904580580132152, "grad_norm": 1.6425532102584839, "learning_rate": 9.685263157894737e-05, "loss": 0.5433, "step": 31974 }, { "epoch": 1.7905140553253442, "grad_norm": 1.3565775156021118, "learning_rate": 9.685236842105264e-05, "loss": 0.4022, "step": 31975 }, { "epoch": 1.7905700526374733, "grad_norm": 1.4639005661010742, "learning_rate": 9.68521052631579e-05, "loss": 0.3853, "step": 31976 }, { "epoch": 1.7906260499496023, "grad_norm": 1.363292932510376, "learning_rate": 9.685184210526315e-05, "loss": 0.5404, "step": 31977 }, { "epoch": 1.7906820472617313, "grad_norm": 1.3989355564117432, "learning_rate": 9.685157894736842e-05, "loss": 0.439, "step": 31978 }, { "epoch": 1.7907380445738603, "grad_norm": 1.1867221593856812, "learning_rate": 9.685131578947368e-05, "loss": 0.351, "step": 31979 }, { "epoch": 1.7907940418859893, "grad_norm": 1.2818231582641602, "learning_rate": 9.685105263157896e-05, "loss": 0.3876, "step": 31980 }, { "epoch": 1.7908500391981184, "grad_norm": 1.0945988893508911, "learning_rate": 9.685078947368422e-05, "loss": 0.4235, "step": 31981 }, { "epoch": 1.7909060365102474, "grad_norm": 1.424734354019165, "learning_rate": 9.685052631578948e-05, "loss": 0.4813, "step": 31982 }, { "epoch": 1.7909620338223764, "grad_norm": 1.2352250814437866, "learning_rate": 9.685026315789474e-05, "loss": 0.4418, "step": 31983 }, { "epoch": 1.7910180311345054, "grad_norm": 1.2557188272476196, "learning_rate": 9.685000000000001e-05, "loss": 0.4996, "step": 31984 }, { "epoch": 1.7910740284466344, "grad_norm": 1.4204963445663452, "learning_rate": 9.684973684210527e-05, "loss": 0.5375, "step": 31985 }, { "epoch": 1.7911300257587635, "grad_norm": 1.0954538583755493, "learning_rate": 9.684947368421053e-05, "loss": 0.4255, "step": 31986 }, { "epoch": 1.7911860230708925, "grad_norm": 1.6155771017074585, "learning_rate": 9.684921052631579e-05, "loss": 0.5792, "step": 31987 }, { "epoch": 1.7912420203830215, "grad_norm": 1.3473423719406128, "learning_rate": 9.684894736842106e-05, "loss": 0.6393, "step": 31988 }, { "epoch": 1.7912980176951505, "grad_norm": 1.2579264640808105, "learning_rate": 9.684868421052632e-05, "loss": 0.4093, "step": 31989 }, { "epoch": 1.7913540150072795, "grad_norm": 1.2967183589935303, "learning_rate": 9.68484210526316e-05, "loss": 0.501, "step": 31990 }, { "epoch": 1.7914100123194086, "grad_norm": 1.2125723361968994, "learning_rate": 9.684815789473684e-05, "loss": 0.4765, "step": 31991 }, { "epoch": 1.7914660096315376, "grad_norm": 1.2356135845184326, "learning_rate": 9.684789473684211e-05, "loss": 0.4129, "step": 31992 }, { "epoch": 1.7915220069436666, "grad_norm": 1.2769567966461182, "learning_rate": 9.684763157894737e-05, "loss": 0.4385, "step": 31993 }, { "epoch": 1.7915780042557956, "grad_norm": 1.0065739154815674, "learning_rate": 9.684736842105263e-05, "loss": 0.381, "step": 31994 }, { "epoch": 1.7916340015679246, "grad_norm": 1.2326743602752686, "learning_rate": 9.68471052631579e-05, "loss": 0.5205, "step": 31995 }, { "epoch": 1.7916899988800536, "grad_norm": 1.2099952697753906, "learning_rate": 9.684684210526315e-05, "loss": 0.4812, "step": 31996 }, { "epoch": 1.7917459961921827, "grad_norm": 1.4518769979476929, "learning_rate": 9.684657894736843e-05, "loss": 0.5582, "step": 31997 }, { "epoch": 1.7918019935043117, "grad_norm": 1.2784314155578613, "learning_rate": 9.684631578947369e-05, "loss": 0.4311, "step": 31998 }, { "epoch": 1.7918579908164407, "grad_norm": 1.170072317123413, "learning_rate": 9.684605263157896e-05, "loss": 0.454, "step": 31999 }, { "epoch": 1.7919139881285697, "grad_norm": 1.7834513187408447, "learning_rate": 9.68457894736842e-05, "loss": 0.5746, "step": 32000 }, { "epoch": 1.7919699854406987, "grad_norm": 1.2325236797332764, "learning_rate": 9.684552631578948e-05, "loss": 0.4664, "step": 32001 }, { "epoch": 1.7920259827528278, "grad_norm": 1.687134861946106, "learning_rate": 9.684526315789474e-05, "loss": 0.635, "step": 32002 }, { "epoch": 1.7920819800649568, "grad_norm": 1.6744015216827393, "learning_rate": 9.684500000000001e-05, "loss": 0.6643, "step": 32003 }, { "epoch": 1.7921379773770858, "grad_norm": 1.270850658416748, "learning_rate": 9.684473684210527e-05, "loss": 0.4488, "step": 32004 }, { "epoch": 1.7921939746892148, "grad_norm": 1.3037875890731812, "learning_rate": 9.684447368421053e-05, "loss": 0.4997, "step": 32005 }, { "epoch": 1.7922499720013438, "grad_norm": 1.092988133430481, "learning_rate": 9.684421052631579e-05, "loss": 0.3485, "step": 32006 }, { "epoch": 1.7923059693134729, "grad_norm": 1.4699571132659912, "learning_rate": 9.684394736842106e-05, "loss": 0.5074, "step": 32007 }, { "epoch": 1.7923619666256019, "grad_norm": 1.195314645767212, "learning_rate": 9.684368421052632e-05, "loss": 0.3733, "step": 32008 }, { "epoch": 1.792417963937731, "grad_norm": 1.9048974514007568, "learning_rate": 9.684342105263158e-05, "loss": 0.4565, "step": 32009 }, { "epoch": 1.79247396124986, "grad_norm": 1.0765550136566162, "learning_rate": 9.684315789473684e-05, "loss": 0.4448, "step": 32010 }, { "epoch": 1.792529958561989, "grad_norm": 1.8611201047897339, "learning_rate": 9.68428947368421e-05, "loss": 0.6514, "step": 32011 }, { "epoch": 1.792585955874118, "grad_norm": 1.2042350769042969, "learning_rate": 9.684263157894738e-05, "loss": 0.3938, "step": 32012 }, { "epoch": 1.792641953186247, "grad_norm": 1.2873826026916504, "learning_rate": 9.684236842105264e-05, "loss": 0.463, "step": 32013 }, { "epoch": 1.792697950498376, "grad_norm": 1.1961383819580078, "learning_rate": 9.68421052631579e-05, "loss": 0.3753, "step": 32014 }, { "epoch": 1.792753947810505, "grad_norm": 1.4698563814163208, "learning_rate": 9.684184210526316e-05, "loss": 0.3901, "step": 32015 }, { "epoch": 1.792809945122634, "grad_norm": 1.2973783016204834, "learning_rate": 9.684157894736843e-05, "loss": 0.4112, "step": 32016 }, { "epoch": 1.792865942434763, "grad_norm": 1.3621046543121338, "learning_rate": 9.684131578947369e-05, "loss": 0.4495, "step": 32017 }, { "epoch": 1.792921939746892, "grad_norm": 1.4783670902252197, "learning_rate": 9.684105263157895e-05, "loss": 0.5238, "step": 32018 }, { "epoch": 1.792977937059021, "grad_norm": 1.2862879037857056, "learning_rate": 9.684078947368421e-05, "loss": 0.4602, "step": 32019 }, { "epoch": 1.79303393437115, "grad_norm": 1.5063560009002686, "learning_rate": 9.684052631578948e-05, "loss": 0.4594, "step": 32020 }, { "epoch": 1.7930899316832791, "grad_norm": 1.4773988723754883, "learning_rate": 9.684026315789474e-05, "loss": 0.6903, "step": 32021 }, { "epoch": 1.7931459289954081, "grad_norm": 1.222888469696045, "learning_rate": 9.684000000000001e-05, "loss": 0.4066, "step": 32022 }, { "epoch": 1.7932019263075372, "grad_norm": 0.9128860831260681, "learning_rate": 9.683973684210526e-05, "loss": 0.3873, "step": 32023 }, { "epoch": 1.7932579236196662, "grad_norm": 1.1375364065170288, "learning_rate": 9.683947368421053e-05, "loss": 0.3439, "step": 32024 }, { "epoch": 1.7933139209317952, "grad_norm": 1.0395842790603638, "learning_rate": 9.68392105263158e-05, "loss": 0.3702, "step": 32025 }, { "epoch": 1.7933699182439242, "grad_norm": 1.6284608840942383, "learning_rate": 9.683894736842107e-05, "loss": 0.5268, "step": 32026 }, { "epoch": 1.7934259155560532, "grad_norm": 1.306712031364441, "learning_rate": 9.683868421052633e-05, "loss": 0.6079, "step": 32027 }, { "epoch": 1.7934819128681823, "grad_norm": 1.5549505949020386, "learning_rate": 9.683842105263157e-05, "loss": 0.302, "step": 32028 }, { "epoch": 1.7935379101803113, "grad_norm": 1.2894011735916138, "learning_rate": 9.683815789473685e-05, "loss": 0.5774, "step": 32029 }, { "epoch": 1.7935939074924403, "grad_norm": 1.305033564567566, "learning_rate": 9.68378947368421e-05, "loss": 0.5882, "step": 32030 }, { "epoch": 1.7936499048045693, "grad_norm": 1.203795313835144, "learning_rate": 9.683763157894738e-05, "loss": 0.4354, "step": 32031 }, { "epoch": 1.7937059021166983, "grad_norm": 1.35097074508667, "learning_rate": 9.683736842105263e-05, "loss": 0.4676, "step": 32032 }, { "epoch": 1.7937618994288274, "grad_norm": 1.9423344135284424, "learning_rate": 9.68371052631579e-05, "loss": 0.4425, "step": 32033 }, { "epoch": 1.7938178967409564, "grad_norm": 1.3653912544250488, "learning_rate": 9.683684210526316e-05, "loss": 0.4419, "step": 32034 }, { "epoch": 1.7938738940530854, "grad_norm": 1.712348461151123, "learning_rate": 9.683657894736843e-05, "loss": 0.5226, "step": 32035 }, { "epoch": 1.7939298913652144, "grad_norm": 1.261184573173523, "learning_rate": 9.683631578947369e-05, "loss": 0.6008, "step": 32036 }, { "epoch": 1.7939858886773434, "grad_norm": 1.3308205604553223, "learning_rate": 9.683605263157895e-05, "loss": 0.4221, "step": 32037 }, { "epoch": 1.7940418859894725, "grad_norm": 1.3777445554733276, "learning_rate": 9.683578947368421e-05, "loss": 0.5306, "step": 32038 }, { "epoch": 1.7940978833016015, "grad_norm": 1.1889625787734985, "learning_rate": 9.683552631578948e-05, "loss": 0.4811, "step": 32039 }, { "epoch": 1.7941538806137305, "grad_norm": 1.2197210788726807, "learning_rate": 9.683526315789474e-05, "loss": 0.4505, "step": 32040 }, { "epoch": 1.7942098779258595, "grad_norm": 2.1066439151763916, "learning_rate": 9.6835e-05, "loss": 0.4286, "step": 32041 }, { "epoch": 1.7942658752379885, "grad_norm": 1.5215115547180176, "learning_rate": 9.683473684210526e-05, "loss": 0.4157, "step": 32042 }, { "epoch": 1.7943218725501175, "grad_norm": 1.2734651565551758, "learning_rate": 9.683447368421054e-05, "loss": 0.4494, "step": 32043 }, { "epoch": 1.7943778698622466, "grad_norm": 1.4336392879486084, "learning_rate": 9.68342105263158e-05, "loss": 0.5462, "step": 32044 }, { "epoch": 1.7944338671743756, "grad_norm": 1.415024757385254, "learning_rate": 9.683394736842106e-05, "loss": 0.5185, "step": 32045 }, { "epoch": 1.7944898644865046, "grad_norm": 1.0889935493469238, "learning_rate": 9.683368421052632e-05, "loss": 0.3652, "step": 32046 }, { "epoch": 1.7945458617986336, "grad_norm": 1.3765455484390259, "learning_rate": 9.683342105263158e-05, "loss": 0.5184, "step": 32047 }, { "epoch": 1.7946018591107626, "grad_norm": 1.792376160621643, "learning_rate": 9.683315789473685e-05, "loss": 0.4826, "step": 32048 }, { "epoch": 1.7946578564228917, "grad_norm": 1.0766433477401733, "learning_rate": 9.683289473684211e-05, "loss": 0.386, "step": 32049 }, { "epoch": 1.7947138537350207, "grad_norm": 1.2214709520339966, "learning_rate": 9.683263157894737e-05, "loss": 0.4808, "step": 32050 }, { "epoch": 1.7947698510471497, "grad_norm": 1.2832372188568115, "learning_rate": 9.683236842105263e-05, "loss": 0.4893, "step": 32051 }, { "epoch": 1.7948258483592787, "grad_norm": 2.639239549636841, "learning_rate": 9.68321052631579e-05, "loss": 0.7086, "step": 32052 }, { "epoch": 1.7948818456714077, "grad_norm": 1.6005090475082397, "learning_rate": 9.683184210526316e-05, "loss": 0.4862, "step": 32053 }, { "epoch": 1.7949378429835368, "grad_norm": 1.281174898147583, "learning_rate": 9.683157894736843e-05, "loss": 0.5162, "step": 32054 }, { "epoch": 1.7949938402956658, "grad_norm": 1.1073137521743774, "learning_rate": 9.683131578947368e-05, "loss": 0.4177, "step": 32055 }, { "epoch": 1.7950498376077948, "grad_norm": 1.5641192197799683, "learning_rate": 9.683105263157895e-05, "loss": 0.4397, "step": 32056 }, { "epoch": 1.7951058349199238, "grad_norm": 1.0586190223693848, "learning_rate": 9.683078947368421e-05, "loss": 0.3676, "step": 32057 }, { "epoch": 1.7951618322320528, "grad_norm": 1.2605412006378174, "learning_rate": 9.683052631578949e-05, "loss": 0.4423, "step": 32058 }, { "epoch": 1.7952178295441819, "grad_norm": 1.313506841659546, "learning_rate": 9.683026315789475e-05, "loss": 0.3356, "step": 32059 }, { "epoch": 1.7952738268563109, "grad_norm": 1.2559566497802734, "learning_rate": 9.683e-05, "loss": 0.5682, "step": 32060 }, { "epoch": 1.79532982416844, "grad_norm": 1.969361424446106, "learning_rate": 9.682973684210527e-05, "loss": 0.5176, "step": 32061 }, { "epoch": 1.795385821480569, "grad_norm": 1.121721863746643, "learning_rate": 9.682947368421053e-05, "loss": 0.3406, "step": 32062 }, { "epoch": 1.795441818792698, "grad_norm": 1.276720643043518, "learning_rate": 9.68292105263158e-05, "loss": 0.6006, "step": 32063 }, { "epoch": 1.795497816104827, "grad_norm": 1.2242330312728882, "learning_rate": 9.682894736842106e-05, "loss": 0.3921, "step": 32064 }, { "epoch": 1.795553813416956, "grad_norm": 1.31707763671875, "learning_rate": 9.682868421052632e-05, "loss": 0.4271, "step": 32065 }, { "epoch": 1.795609810729085, "grad_norm": 1.3440417051315308, "learning_rate": 9.682842105263158e-05, "loss": 0.4761, "step": 32066 }, { "epoch": 1.795665808041214, "grad_norm": 1.528328776359558, "learning_rate": 9.682815789473685e-05, "loss": 0.5074, "step": 32067 }, { "epoch": 1.795721805353343, "grad_norm": 1.130800724029541, "learning_rate": 9.682789473684211e-05, "loss": 0.3965, "step": 32068 }, { "epoch": 1.795777802665472, "grad_norm": 1.5954233407974243, "learning_rate": 9.682763157894737e-05, "loss": 0.4566, "step": 32069 }, { "epoch": 1.795833799977601, "grad_norm": 1.1379814147949219, "learning_rate": 9.682736842105263e-05, "loss": 0.4734, "step": 32070 }, { "epoch": 1.79588979728973, "grad_norm": 1.2935895919799805, "learning_rate": 9.68271052631579e-05, "loss": 0.3702, "step": 32071 }, { "epoch": 1.795945794601859, "grad_norm": 1.4047520160675049, "learning_rate": 9.682684210526316e-05, "loss": 0.5346, "step": 32072 }, { "epoch": 1.7960017919139881, "grad_norm": 1.4247809648513794, "learning_rate": 9.682657894736842e-05, "loss": 0.4072, "step": 32073 }, { "epoch": 1.7960577892261171, "grad_norm": 1.657099723815918, "learning_rate": 9.682631578947368e-05, "loss": 0.5088, "step": 32074 }, { "epoch": 1.7961137865382462, "grad_norm": 1.5160071849822998, "learning_rate": 9.682605263157896e-05, "loss": 0.5435, "step": 32075 }, { "epoch": 1.7961697838503752, "grad_norm": 1.0926828384399414, "learning_rate": 9.682578947368422e-05, "loss": 0.3499, "step": 32076 }, { "epoch": 1.7962257811625042, "grad_norm": 1.184757947921753, "learning_rate": 9.682552631578949e-05, "loss": 0.3446, "step": 32077 }, { "epoch": 1.7962817784746332, "grad_norm": 1.2774674892425537, "learning_rate": 9.682526315789474e-05, "loss": 0.4119, "step": 32078 }, { "epoch": 1.7963377757867622, "grad_norm": 1.1742981672286987, "learning_rate": 9.682500000000001e-05, "loss": 0.4501, "step": 32079 }, { "epoch": 1.7963937730988913, "grad_norm": 1.0841269493103027, "learning_rate": 9.682473684210527e-05, "loss": 0.3399, "step": 32080 }, { "epoch": 1.7964497704110203, "grad_norm": 1.441339373588562, "learning_rate": 9.682447368421053e-05, "loss": 0.6248, "step": 32081 }, { "epoch": 1.7965057677231493, "grad_norm": 1.1356699466705322, "learning_rate": 9.68242105263158e-05, "loss": 0.3581, "step": 32082 }, { "epoch": 1.7965617650352783, "grad_norm": 1.0941351652145386, "learning_rate": 9.682394736842105e-05, "loss": 0.4131, "step": 32083 }, { "epoch": 1.7966177623474073, "grad_norm": 1.2052688598632812, "learning_rate": 9.682368421052632e-05, "loss": 0.4974, "step": 32084 }, { "epoch": 1.7966737596595364, "grad_norm": 1.3144172430038452, "learning_rate": 9.682342105263158e-05, "loss": 0.5425, "step": 32085 }, { "epoch": 1.7967297569716654, "grad_norm": 1.3686069250106812, "learning_rate": 9.682315789473685e-05, "loss": 0.439, "step": 32086 }, { "epoch": 1.7967857542837944, "grad_norm": 1.1614123582839966, "learning_rate": 9.68228947368421e-05, "loss": 0.355, "step": 32087 }, { "epoch": 1.7968417515959234, "grad_norm": 1.3086341619491577, "learning_rate": 9.682263157894737e-05, "loss": 0.4057, "step": 32088 }, { "epoch": 1.7968977489080524, "grad_norm": 1.2517374753952026, "learning_rate": 9.682236842105263e-05, "loss": 0.4739, "step": 32089 }, { "epoch": 1.7969537462201814, "grad_norm": 1.3251738548278809, "learning_rate": 9.68221052631579e-05, "loss": 0.4241, "step": 32090 }, { "epoch": 1.7970097435323105, "grad_norm": 1.3720954656600952, "learning_rate": 9.682184210526317e-05, "loss": 0.5435, "step": 32091 }, { "epoch": 1.7970657408444395, "grad_norm": 1.362906813621521, "learning_rate": 9.682157894736843e-05, "loss": 0.4403, "step": 32092 }, { "epoch": 1.7971217381565685, "grad_norm": 1.5304017066955566, "learning_rate": 9.682131578947369e-05, "loss": 0.3982, "step": 32093 }, { "epoch": 1.7971777354686975, "grad_norm": 1.428594946861267, "learning_rate": 9.682105263157896e-05, "loss": 0.4535, "step": 32094 }, { "epoch": 1.7972337327808265, "grad_norm": 1.2646770477294922, "learning_rate": 9.682078947368422e-05, "loss": 0.4245, "step": 32095 }, { "epoch": 1.7972897300929556, "grad_norm": 1.387730598449707, "learning_rate": 9.682052631578948e-05, "loss": 0.3672, "step": 32096 }, { "epoch": 1.7973457274050846, "grad_norm": 1.1369807720184326, "learning_rate": 9.682026315789474e-05, "loss": 0.4255, "step": 32097 }, { "epoch": 1.7974017247172136, "grad_norm": 1.5108939409255981, "learning_rate": 9.682e-05, "loss": 0.5703, "step": 32098 }, { "epoch": 1.7974577220293426, "grad_norm": 1.3130950927734375, "learning_rate": 9.681973684210527e-05, "loss": 0.503, "step": 32099 }, { "epoch": 1.7975137193414716, "grad_norm": 1.1229069232940674, "learning_rate": 9.681947368421053e-05, "loss": 0.3066, "step": 32100 }, { "epoch": 1.7975697166536007, "grad_norm": 1.3163306713104248, "learning_rate": 9.681921052631579e-05, "loss": 0.577, "step": 32101 }, { "epoch": 1.7976257139657297, "grad_norm": 1.2117362022399902, "learning_rate": 9.681894736842105e-05, "loss": 0.4055, "step": 32102 }, { "epoch": 1.7976817112778587, "grad_norm": 1.4718432426452637, "learning_rate": 9.681868421052632e-05, "loss": 0.453, "step": 32103 }, { "epoch": 1.7977377085899877, "grad_norm": 2.0896828174591064, "learning_rate": 9.681842105263158e-05, "loss": 0.3959, "step": 32104 }, { "epoch": 1.7977937059021167, "grad_norm": 1.7026898860931396, "learning_rate": 9.681815789473684e-05, "loss": 0.6358, "step": 32105 }, { "epoch": 1.7978497032142458, "grad_norm": 1.3427798748016357, "learning_rate": 9.68178947368421e-05, "loss": 0.5346, "step": 32106 }, { "epoch": 1.7979057005263748, "grad_norm": 1.5636309385299683, "learning_rate": 9.681763157894738e-05, "loss": 0.4554, "step": 32107 }, { "epoch": 1.7979616978385038, "grad_norm": 1.7977672815322876, "learning_rate": 9.681736842105264e-05, "loss": 0.4401, "step": 32108 }, { "epoch": 1.7980176951506328, "grad_norm": 1.3516571521759033, "learning_rate": 9.681710526315791e-05, "loss": 0.4898, "step": 32109 }, { "epoch": 1.7980736924627618, "grad_norm": 1.267030119895935, "learning_rate": 9.681684210526316e-05, "loss": 0.4257, "step": 32110 }, { "epoch": 1.7981296897748908, "grad_norm": 0.9707457423210144, "learning_rate": 9.681657894736843e-05, "loss": 0.402, "step": 32111 }, { "epoch": 1.7981856870870199, "grad_norm": 1.283591389656067, "learning_rate": 9.681631578947369e-05, "loss": 0.3039, "step": 32112 }, { "epoch": 1.7982416843991489, "grad_norm": 1.1905903816223145, "learning_rate": 9.681605263157896e-05, "loss": 0.4965, "step": 32113 }, { "epoch": 1.798297681711278, "grad_norm": 1.1125209331512451, "learning_rate": 9.681578947368422e-05, "loss": 0.3219, "step": 32114 }, { "epoch": 1.798353679023407, "grad_norm": 1.3652342557907104, "learning_rate": 9.681552631578947e-05, "loss": 0.4651, "step": 32115 }, { "epoch": 1.798409676335536, "grad_norm": 1.111520528793335, "learning_rate": 9.681526315789474e-05, "loss": 0.3665, "step": 32116 }, { "epoch": 1.798465673647665, "grad_norm": 1.493578314781189, "learning_rate": 9.6815e-05, "loss": 0.5879, "step": 32117 }, { "epoch": 1.798521670959794, "grad_norm": 1.2755286693572998, "learning_rate": 9.681473684210527e-05, "loss": 0.3884, "step": 32118 }, { "epoch": 1.798577668271923, "grad_norm": 1.6973812580108643, "learning_rate": 9.681447368421053e-05, "loss": 0.4738, "step": 32119 }, { "epoch": 1.798633665584052, "grad_norm": 1.2047200202941895, "learning_rate": 9.68142105263158e-05, "loss": 0.4954, "step": 32120 }, { "epoch": 1.798689662896181, "grad_norm": 1.3098191022872925, "learning_rate": 9.681394736842105e-05, "loss": 0.4377, "step": 32121 }, { "epoch": 1.79874566020831, "grad_norm": 1.3078348636627197, "learning_rate": 9.681368421052633e-05, "loss": 0.3742, "step": 32122 }, { "epoch": 1.798801657520439, "grad_norm": 4.851028919219971, "learning_rate": 9.681342105263159e-05, "loss": 0.5733, "step": 32123 }, { "epoch": 1.798857654832568, "grad_norm": 1.3678643703460693, "learning_rate": 9.681315789473685e-05, "loss": 0.352, "step": 32124 }, { "epoch": 1.7989136521446971, "grad_norm": 1.1013264656066895, "learning_rate": 9.68128947368421e-05, "loss": 0.4205, "step": 32125 }, { "epoch": 1.7989696494568261, "grad_norm": 1.7529017925262451, "learning_rate": 9.681263157894738e-05, "loss": 0.6738, "step": 32126 }, { "epoch": 1.7990256467689552, "grad_norm": 1.278640627861023, "learning_rate": 9.681236842105264e-05, "loss": 0.3978, "step": 32127 }, { "epoch": 1.7990816440810842, "grad_norm": 1.36661958694458, "learning_rate": 9.68121052631579e-05, "loss": 0.4554, "step": 32128 }, { "epoch": 1.7991376413932132, "grad_norm": 1.1438835859298706, "learning_rate": 9.681184210526316e-05, "loss": 0.3538, "step": 32129 }, { "epoch": 1.7991936387053422, "grad_norm": 1.1635104417800903, "learning_rate": 9.681157894736843e-05, "loss": 0.3943, "step": 32130 }, { "epoch": 1.7992496360174712, "grad_norm": 1.2324329614639282, "learning_rate": 9.681131578947369e-05, "loss": 0.3721, "step": 32131 }, { "epoch": 1.7993056333296003, "grad_norm": 1.1140331029891968, "learning_rate": 9.681105263157895e-05, "loss": 0.3159, "step": 32132 }, { "epoch": 1.7993616306417293, "grad_norm": 1.3687115907669067, "learning_rate": 9.681078947368421e-05, "loss": 0.5195, "step": 32133 }, { "epoch": 1.7994176279538583, "grad_norm": 1.2846589088439941, "learning_rate": 9.681052631578947e-05, "loss": 0.4109, "step": 32134 }, { "epoch": 1.7994736252659873, "grad_norm": 1.5153899192810059, "learning_rate": 9.681026315789474e-05, "loss": 0.3978, "step": 32135 }, { "epoch": 1.7995296225781163, "grad_norm": 1.623278021812439, "learning_rate": 9.681e-05, "loss": 0.5264, "step": 32136 }, { "epoch": 1.7995856198902453, "grad_norm": 1.4748739004135132, "learning_rate": 9.680973684210528e-05, "loss": 0.4594, "step": 32137 }, { "epoch": 1.7996416172023744, "grad_norm": 1.1571379899978638, "learning_rate": 9.680947368421052e-05, "loss": 0.3855, "step": 32138 }, { "epoch": 1.7996976145145034, "grad_norm": 1.15643310546875, "learning_rate": 9.68092105263158e-05, "loss": 0.5464, "step": 32139 }, { "epoch": 1.7997536118266324, "grad_norm": 1.2689732313156128, "learning_rate": 9.680894736842106e-05, "loss": 0.5113, "step": 32140 }, { "epoch": 1.7998096091387614, "grad_norm": 1.3981406688690186, "learning_rate": 9.680868421052633e-05, "loss": 0.4687, "step": 32141 }, { "epoch": 1.7998656064508904, "grad_norm": 1.1974506378173828, "learning_rate": 9.680842105263158e-05, "loss": 0.4869, "step": 32142 }, { "epoch": 1.7999216037630195, "grad_norm": 1.3745614290237427, "learning_rate": 9.680815789473685e-05, "loss": 0.5289, "step": 32143 }, { "epoch": 1.7999776010751485, "grad_norm": 1.3292382955551147, "learning_rate": 9.680789473684211e-05, "loss": 0.3645, "step": 32144 }, { "epoch": 1.8000335983872775, "grad_norm": 1.2569174766540527, "learning_rate": 9.680763157894738e-05, "loss": 0.3685, "step": 32145 }, { "epoch": 1.8000895956994065, "grad_norm": 1.0837546586990356, "learning_rate": 9.680736842105264e-05, "loss": 0.3407, "step": 32146 }, { "epoch": 1.8001455930115355, "grad_norm": 1.473798155784607, "learning_rate": 9.68071052631579e-05, "loss": 0.4752, "step": 32147 }, { "epoch": 1.8002015903236646, "grad_norm": 1.2655588388442993, "learning_rate": 9.680684210526316e-05, "loss": 0.4597, "step": 32148 }, { "epoch": 1.8002575876357936, "grad_norm": 1.2842222452163696, "learning_rate": 9.680657894736842e-05, "loss": 0.3583, "step": 32149 }, { "epoch": 1.8003135849479226, "grad_norm": 1.1543163061141968, "learning_rate": 9.68063157894737e-05, "loss": 0.3795, "step": 32150 }, { "epoch": 1.8003695822600516, "grad_norm": 1.3438326120376587, "learning_rate": 9.680605263157895e-05, "loss": 0.4056, "step": 32151 }, { "epoch": 1.8004255795721806, "grad_norm": 1.7318296432495117, "learning_rate": 9.680578947368421e-05, "loss": 0.5532, "step": 32152 }, { "epoch": 1.8004815768843097, "grad_norm": 1.3657329082489014, "learning_rate": 9.680552631578947e-05, "loss": 0.5227, "step": 32153 }, { "epoch": 1.8005375741964387, "grad_norm": 1.2669062614440918, "learning_rate": 9.680526315789475e-05, "loss": 0.4564, "step": 32154 }, { "epoch": 1.8005935715085677, "grad_norm": 1.3922449350357056, "learning_rate": 9.6805e-05, "loss": 0.4705, "step": 32155 }, { "epoch": 1.8006495688206967, "grad_norm": 1.5206490755081177, "learning_rate": 9.680473684210527e-05, "loss": 0.5274, "step": 32156 }, { "epoch": 1.8007055661328257, "grad_norm": 1.0941153764724731, "learning_rate": 9.680447368421053e-05, "loss": 0.3552, "step": 32157 }, { "epoch": 1.8007615634449547, "grad_norm": 1.8165775537490845, "learning_rate": 9.68042105263158e-05, "loss": 0.606, "step": 32158 }, { "epoch": 1.8008175607570838, "grad_norm": 1.0419973134994507, "learning_rate": 9.680394736842106e-05, "loss": 0.4076, "step": 32159 }, { "epoch": 1.8008735580692128, "grad_norm": 1.3721517324447632, "learning_rate": 9.680368421052632e-05, "loss": 0.5063, "step": 32160 }, { "epoch": 1.8009295553813418, "grad_norm": 1.3869708776474, "learning_rate": 9.680342105263158e-05, "loss": 0.4236, "step": 32161 }, { "epoch": 1.8009855526934708, "grad_norm": 1.7303255796432495, "learning_rate": 9.680315789473685e-05, "loss": 0.4363, "step": 32162 }, { "epoch": 1.8010415500055998, "grad_norm": 1.1910163164138794, "learning_rate": 9.680289473684211e-05, "loss": 0.4146, "step": 32163 }, { "epoch": 1.8010975473177289, "grad_norm": 1.8176252841949463, "learning_rate": 9.680263157894738e-05, "loss": 0.6962, "step": 32164 }, { "epoch": 1.8011535446298579, "grad_norm": 1.0348756313323975, "learning_rate": 9.680236842105263e-05, "loss": 0.3322, "step": 32165 }, { "epoch": 1.801209541941987, "grad_norm": 1.2159212827682495, "learning_rate": 9.680210526315789e-05, "loss": 0.4055, "step": 32166 }, { "epoch": 1.801265539254116, "grad_norm": 1.4628657102584839, "learning_rate": 9.680184210526316e-05, "loss": 0.4393, "step": 32167 }, { "epoch": 1.801321536566245, "grad_norm": 1.2295887470245361, "learning_rate": 9.680157894736842e-05, "loss": 0.5086, "step": 32168 }, { "epoch": 1.801377533878374, "grad_norm": 1.1949251890182495, "learning_rate": 9.68013157894737e-05, "loss": 0.4459, "step": 32169 }, { "epoch": 1.801433531190503, "grad_norm": 1.3208088874816895, "learning_rate": 9.680105263157894e-05, "loss": 0.6299, "step": 32170 }, { "epoch": 1.801489528502632, "grad_norm": 1.6019909381866455, "learning_rate": 9.680078947368422e-05, "loss": 0.5921, "step": 32171 }, { "epoch": 1.801545525814761, "grad_norm": 1.39640212059021, "learning_rate": 9.680052631578948e-05, "loss": 0.4855, "step": 32172 }, { "epoch": 1.80160152312689, "grad_norm": 1.4144102334976196, "learning_rate": 9.680026315789475e-05, "loss": 0.4963, "step": 32173 }, { "epoch": 1.801657520439019, "grad_norm": 1.6579128503799438, "learning_rate": 9.680000000000001e-05, "loss": 0.2844, "step": 32174 }, { "epoch": 1.801713517751148, "grad_norm": 1.77386474609375, "learning_rate": 9.679973684210527e-05, "loss": 0.5623, "step": 32175 }, { "epoch": 1.801769515063277, "grad_norm": 1.1909027099609375, "learning_rate": 9.679947368421053e-05, "loss": 0.3624, "step": 32176 }, { "epoch": 1.8018255123754061, "grad_norm": 1.2934516668319702, "learning_rate": 9.67992105263158e-05, "loss": 0.3771, "step": 32177 }, { "epoch": 1.8018815096875351, "grad_norm": 1.3932722806930542, "learning_rate": 9.679894736842106e-05, "loss": 0.4694, "step": 32178 }, { "epoch": 1.8019375069996642, "grad_norm": 1.1344374418258667, "learning_rate": 9.679868421052632e-05, "loss": 0.4734, "step": 32179 }, { "epoch": 1.8019935043117932, "grad_norm": 1.27701735496521, "learning_rate": 9.679842105263158e-05, "loss": 0.4842, "step": 32180 }, { "epoch": 1.8020495016239222, "grad_norm": 1.308442234992981, "learning_rate": 9.679815789473685e-05, "loss": 0.3844, "step": 32181 }, { "epoch": 1.8021054989360512, "grad_norm": 1.227443814277649, "learning_rate": 9.679789473684211e-05, "loss": 0.4875, "step": 32182 }, { "epoch": 1.8021614962481802, "grad_norm": 1.7548967599868774, "learning_rate": 9.679763157894737e-05, "loss": 0.424, "step": 32183 }, { "epoch": 1.8022174935603092, "grad_norm": 1.1256011724472046, "learning_rate": 9.679736842105263e-05, "loss": 0.4127, "step": 32184 }, { "epoch": 1.8022734908724383, "grad_norm": 1.2418335676193237, "learning_rate": 9.679710526315789e-05, "loss": 0.3754, "step": 32185 }, { "epoch": 1.8023294881845673, "grad_norm": 1.2596659660339355, "learning_rate": 9.679684210526317e-05, "loss": 0.4198, "step": 32186 }, { "epoch": 1.8023854854966963, "grad_norm": 1.1821552515029907, "learning_rate": 9.679657894736843e-05, "loss": 0.398, "step": 32187 }, { "epoch": 1.8024414828088253, "grad_norm": 1.5141074657440186, "learning_rate": 9.679631578947369e-05, "loss": 0.5065, "step": 32188 }, { "epoch": 1.8024974801209543, "grad_norm": 1.3615210056304932, "learning_rate": 9.679605263157894e-05, "loss": 0.3789, "step": 32189 }, { "epoch": 1.8025534774330834, "grad_norm": 1.0209513902664185, "learning_rate": 9.679578947368422e-05, "loss": 0.3548, "step": 32190 }, { "epoch": 1.8026094747452124, "grad_norm": 1.6605288982391357, "learning_rate": 9.679552631578948e-05, "loss": 0.3921, "step": 32191 }, { "epoch": 1.8026654720573414, "grad_norm": 1.5985584259033203, "learning_rate": 9.679526315789475e-05, "loss": 0.2913, "step": 32192 }, { "epoch": 1.8027214693694704, "grad_norm": 1.179266333580017, "learning_rate": 9.6795e-05, "loss": 0.3706, "step": 32193 }, { "epoch": 1.8027774666815994, "grad_norm": 1.19562828540802, "learning_rate": 9.679473684210527e-05, "loss": 0.4548, "step": 32194 }, { "epoch": 1.8028334639937285, "grad_norm": 1.4101299047470093, "learning_rate": 9.679447368421053e-05, "loss": 0.5092, "step": 32195 }, { "epoch": 1.8028894613058575, "grad_norm": 1.1909759044647217, "learning_rate": 9.67942105263158e-05, "loss": 0.4377, "step": 32196 }, { "epoch": 1.8029454586179865, "grad_norm": 1.3512917757034302, "learning_rate": 9.679394736842105e-05, "loss": 0.472, "step": 32197 }, { "epoch": 1.8030014559301155, "grad_norm": 0.9307965636253357, "learning_rate": 9.679368421052632e-05, "loss": 0.4077, "step": 32198 }, { "epoch": 1.8030574532422445, "grad_norm": 1.3607311248779297, "learning_rate": 9.679342105263158e-05, "loss": 0.4763, "step": 32199 }, { "epoch": 1.8031134505543736, "grad_norm": 1.2147271633148193, "learning_rate": 9.679315789473686e-05, "loss": 0.5062, "step": 32200 }, { "epoch": 1.8031694478665026, "grad_norm": 1.5019962787628174, "learning_rate": 9.679289473684212e-05, "loss": 0.4833, "step": 32201 }, { "epoch": 1.8032254451786316, "grad_norm": 1.2628660202026367, "learning_rate": 9.679263157894736e-05, "loss": 0.4927, "step": 32202 }, { "epoch": 1.8032814424907604, "grad_norm": 1.1674035787582397, "learning_rate": 9.679236842105264e-05, "loss": 0.4991, "step": 32203 }, { "epoch": 1.8033374398028894, "grad_norm": 1.3083025217056274, "learning_rate": 9.67921052631579e-05, "loss": 0.4124, "step": 32204 }, { "epoch": 1.8033934371150184, "grad_norm": 1.4513611793518066, "learning_rate": 9.679184210526317e-05, "loss": 0.4372, "step": 32205 }, { "epoch": 1.8034494344271474, "grad_norm": 1.7680569887161255, "learning_rate": 9.679157894736843e-05, "loss": 0.4754, "step": 32206 }, { "epoch": 1.8035054317392765, "grad_norm": 1.0999014377593994, "learning_rate": 9.679131578947369e-05, "loss": 0.4982, "step": 32207 }, { "epoch": 1.8035614290514055, "grad_norm": 1.3361412286758423, "learning_rate": 9.679105263157895e-05, "loss": 0.4791, "step": 32208 }, { "epoch": 1.8036174263635345, "grad_norm": 1.3928239345550537, "learning_rate": 9.679078947368422e-05, "loss": 0.5503, "step": 32209 }, { "epoch": 1.8036734236756635, "grad_norm": 1.30655038356781, "learning_rate": 9.679052631578948e-05, "loss": 0.45, "step": 32210 }, { "epoch": 1.8037294209877925, "grad_norm": 1.2056602239608765, "learning_rate": 9.679026315789474e-05, "loss": 0.4359, "step": 32211 }, { "epoch": 1.8037854182999216, "grad_norm": 1.2480945587158203, "learning_rate": 9.679e-05, "loss": 0.503, "step": 32212 }, { "epoch": 1.8038414156120506, "grad_norm": 1.1976988315582275, "learning_rate": 9.678973684210527e-05, "loss": 0.3591, "step": 32213 }, { "epoch": 1.8038974129241796, "grad_norm": 1.318771243095398, "learning_rate": 9.678947368421053e-05, "loss": 0.5894, "step": 32214 }, { "epoch": 1.8039534102363086, "grad_norm": 1.0990079641342163, "learning_rate": 9.678921052631579e-05, "loss": 0.4771, "step": 32215 }, { "epoch": 1.8040094075484376, "grad_norm": 1.2563648223876953, "learning_rate": 9.678894736842105e-05, "loss": 0.5148, "step": 32216 }, { "epoch": 1.8040654048605667, "grad_norm": 1.1544779539108276, "learning_rate": 9.678868421052633e-05, "loss": 0.4065, "step": 32217 }, { "epoch": 1.8041214021726957, "grad_norm": 1.2515053749084473, "learning_rate": 9.678842105263159e-05, "loss": 0.3505, "step": 32218 }, { "epoch": 1.8041773994848247, "grad_norm": 1.3231868743896484, "learning_rate": 9.678815789473685e-05, "loss": 0.4272, "step": 32219 }, { "epoch": 1.8042333967969537, "grad_norm": 1.3882676362991333, "learning_rate": 9.67878947368421e-05, "loss": 0.4257, "step": 32220 }, { "epoch": 1.8042893941090827, "grad_norm": 1.2684223651885986, "learning_rate": 9.678763157894736e-05, "loss": 0.5404, "step": 32221 }, { "epoch": 1.8043453914212118, "grad_norm": 1.5299326181411743, "learning_rate": 9.678736842105264e-05, "loss": 0.4626, "step": 32222 }, { "epoch": 1.8044013887333408, "grad_norm": 1.2657458782196045, "learning_rate": 9.67871052631579e-05, "loss": 0.4272, "step": 32223 }, { "epoch": 1.8044573860454698, "grad_norm": 1.9942419528961182, "learning_rate": 9.678684210526317e-05, "loss": 0.4734, "step": 32224 }, { "epoch": 1.8045133833575988, "grad_norm": 1.2680203914642334, "learning_rate": 9.678657894736842e-05, "loss": 0.4573, "step": 32225 }, { "epoch": 1.8045693806697278, "grad_norm": 1.1932395696640015, "learning_rate": 9.678631578947369e-05, "loss": 0.4803, "step": 32226 }, { "epoch": 1.8046253779818568, "grad_norm": 1.2549781799316406, "learning_rate": 9.678605263157895e-05, "loss": 0.5361, "step": 32227 }, { "epoch": 1.8046813752939859, "grad_norm": 1.649200677871704, "learning_rate": 9.678578947368422e-05, "loss": 0.4761, "step": 32228 }, { "epoch": 1.8047373726061149, "grad_norm": 1.0116018056869507, "learning_rate": 9.678552631578948e-05, "loss": 0.2765, "step": 32229 }, { "epoch": 1.804793369918244, "grad_norm": 1.1805944442749023, "learning_rate": 9.678526315789474e-05, "loss": 0.4441, "step": 32230 }, { "epoch": 1.804849367230373, "grad_norm": 1.7720093727111816, "learning_rate": 9.6785e-05, "loss": 0.4193, "step": 32231 }, { "epoch": 1.804905364542502, "grad_norm": 1.3531529903411865, "learning_rate": 9.678473684210528e-05, "loss": 0.6227, "step": 32232 }, { "epoch": 1.804961361854631, "grad_norm": 1.4623477458953857, "learning_rate": 9.678447368421054e-05, "loss": 0.5265, "step": 32233 }, { "epoch": 1.80501735916676, "grad_norm": 1.2007571458816528, "learning_rate": 9.67842105263158e-05, "loss": 0.3738, "step": 32234 }, { "epoch": 1.805073356478889, "grad_norm": 1.392640471458435, "learning_rate": 9.678394736842106e-05, "loss": 0.6393, "step": 32235 }, { "epoch": 1.805129353791018, "grad_norm": 1.4702069759368896, "learning_rate": 9.678368421052631e-05, "loss": 0.4482, "step": 32236 }, { "epoch": 1.805185351103147, "grad_norm": 1.3520073890686035, "learning_rate": 9.678342105263159e-05, "loss": 0.6072, "step": 32237 }, { "epoch": 1.805241348415276, "grad_norm": 1.0187300443649292, "learning_rate": 9.678315789473685e-05, "loss": 0.3648, "step": 32238 }, { "epoch": 1.805297345727405, "grad_norm": 1.127990961074829, "learning_rate": 9.678289473684211e-05, "loss": 0.4802, "step": 32239 }, { "epoch": 1.805353343039534, "grad_norm": 1.6357054710388184, "learning_rate": 9.678263157894737e-05, "loss": 0.6482, "step": 32240 }, { "epoch": 1.8054093403516631, "grad_norm": 1.160599708557129, "learning_rate": 9.678236842105264e-05, "loss": 0.4286, "step": 32241 }, { "epoch": 1.8054653376637921, "grad_norm": 2.0232622623443604, "learning_rate": 9.67821052631579e-05, "loss": 0.7219, "step": 32242 }, { "epoch": 1.8055213349759212, "grad_norm": 1.1855010986328125, "learning_rate": 9.678184210526316e-05, "loss": 0.3872, "step": 32243 }, { "epoch": 1.8055773322880502, "grad_norm": 1.3260722160339355, "learning_rate": 9.678157894736842e-05, "loss": 0.4659, "step": 32244 }, { "epoch": 1.8056333296001792, "grad_norm": 1.2078649997711182, "learning_rate": 9.678131578947369e-05, "loss": 0.359, "step": 32245 }, { "epoch": 1.8056893269123082, "grad_norm": 1.1123757362365723, "learning_rate": 9.678105263157895e-05, "loss": 0.3495, "step": 32246 }, { "epoch": 1.8057453242244372, "grad_norm": 1.3541125059127808, "learning_rate": 9.678078947368421e-05, "loss": 0.4439, "step": 32247 }, { "epoch": 1.8058013215365663, "grad_norm": 1.3888378143310547, "learning_rate": 9.678052631578947e-05, "loss": 0.4234, "step": 32248 }, { "epoch": 1.8058573188486953, "grad_norm": 1.2677805423736572, "learning_rate": 9.678026315789475e-05, "loss": 0.4105, "step": 32249 }, { "epoch": 1.8059133161608243, "grad_norm": 1.654213309288025, "learning_rate": 9.678e-05, "loss": 0.5667, "step": 32250 }, { "epoch": 1.8059693134729533, "grad_norm": 1.298555850982666, "learning_rate": 9.677973684210528e-05, "loss": 0.4278, "step": 32251 }, { "epoch": 1.8060253107850823, "grad_norm": 1.2293976545333862, "learning_rate": 9.677947368421052e-05, "loss": 0.3329, "step": 32252 }, { "epoch": 1.8060813080972113, "grad_norm": 1.1662614345550537, "learning_rate": 9.677921052631578e-05, "loss": 0.4153, "step": 32253 }, { "epoch": 1.8061373054093404, "grad_norm": 1.3072854280471802, "learning_rate": 9.677894736842106e-05, "loss": 0.4323, "step": 32254 }, { "epoch": 1.8061933027214694, "grad_norm": 1.1795058250427246, "learning_rate": 9.677868421052632e-05, "loss": 0.4744, "step": 32255 }, { "epoch": 1.8062493000335984, "grad_norm": 1.3275505304336548, "learning_rate": 9.677842105263159e-05, "loss": 0.5024, "step": 32256 }, { "epoch": 1.8063052973457274, "grad_norm": 1.6467268466949463, "learning_rate": 9.677815789473684e-05, "loss": 0.5493, "step": 32257 }, { "epoch": 1.8063612946578564, "grad_norm": 1.3063864707946777, "learning_rate": 9.677789473684211e-05, "loss": 0.3845, "step": 32258 }, { "epoch": 1.8064172919699855, "grad_norm": 1.3723528385162354, "learning_rate": 9.677763157894737e-05, "loss": 0.3789, "step": 32259 }, { "epoch": 1.8064732892821145, "grad_norm": 1.4124640226364136, "learning_rate": 9.677736842105264e-05, "loss": 0.4726, "step": 32260 }, { "epoch": 1.8065292865942435, "grad_norm": 1.2545688152313232, "learning_rate": 9.67771052631579e-05, "loss": 0.3276, "step": 32261 }, { "epoch": 1.8065852839063725, "grad_norm": 1.3047953844070435, "learning_rate": 9.677684210526316e-05, "loss": 0.5004, "step": 32262 }, { "epoch": 1.8066412812185015, "grad_norm": 1.2162398099899292, "learning_rate": 9.677657894736842e-05, "loss": 0.6282, "step": 32263 }, { "epoch": 1.8066972785306306, "grad_norm": 1.263633131980896, "learning_rate": 9.67763157894737e-05, "loss": 0.4557, "step": 32264 }, { "epoch": 1.8067532758427596, "grad_norm": 1.1215318441390991, "learning_rate": 9.677605263157896e-05, "loss": 0.3735, "step": 32265 }, { "epoch": 1.8068092731548886, "grad_norm": 1.6310820579528809, "learning_rate": 9.677578947368421e-05, "loss": 0.379, "step": 32266 }, { "epoch": 1.8068652704670176, "grad_norm": 1.388453483581543, "learning_rate": 9.677552631578947e-05, "loss": 0.6514, "step": 32267 }, { "epoch": 1.8069212677791466, "grad_norm": 1.3009912967681885, "learning_rate": 9.677526315789475e-05, "loss": 0.3912, "step": 32268 }, { "epoch": 1.8069772650912757, "grad_norm": 1.580469012260437, "learning_rate": 9.677500000000001e-05, "loss": 0.3718, "step": 32269 }, { "epoch": 1.8070332624034047, "grad_norm": 1.0518136024475098, "learning_rate": 9.677473684210527e-05, "loss": 0.5209, "step": 32270 }, { "epoch": 1.8070892597155337, "grad_norm": 1.1770780086517334, "learning_rate": 9.677447368421053e-05, "loss": 0.4068, "step": 32271 }, { "epoch": 1.8071452570276627, "grad_norm": 1.210191011428833, "learning_rate": 9.677421052631579e-05, "loss": 0.417, "step": 32272 }, { "epoch": 1.8072012543397917, "grad_norm": 1.0887278318405151, "learning_rate": 9.677394736842106e-05, "loss": 0.397, "step": 32273 }, { "epoch": 1.8072572516519207, "grad_norm": 1.2882227897644043, "learning_rate": 9.677368421052632e-05, "loss": 0.4956, "step": 32274 }, { "epoch": 1.8073132489640498, "grad_norm": 1.2671078443527222, "learning_rate": 9.677342105263158e-05, "loss": 0.4725, "step": 32275 }, { "epoch": 1.8073692462761788, "grad_norm": 1.2704538106918335, "learning_rate": 9.677315789473684e-05, "loss": 0.3748, "step": 32276 }, { "epoch": 1.8074252435883078, "grad_norm": 1.7363781929016113, "learning_rate": 9.677289473684211e-05, "loss": 0.5365, "step": 32277 }, { "epoch": 1.8074812409004368, "grad_norm": 1.450387954711914, "learning_rate": 9.677263157894737e-05, "loss": 0.5902, "step": 32278 }, { "epoch": 1.8075372382125658, "grad_norm": 1.348443627357483, "learning_rate": 9.677236842105265e-05, "loss": 0.4706, "step": 32279 }, { "epoch": 1.8075932355246949, "grad_norm": 1.037840485572815, "learning_rate": 9.677210526315789e-05, "loss": 0.4654, "step": 32280 }, { "epoch": 1.8076492328368239, "grad_norm": 1.7464728355407715, "learning_rate": 9.677184210526317e-05, "loss": 0.4769, "step": 32281 }, { "epoch": 1.807705230148953, "grad_norm": 1.4880002737045288, "learning_rate": 9.677157894736842e-05, "loss": 0.5658, "step": 32282 }, { "epoch": 1.807761227461082, "grad_norm": 1.1665414571762085, "learning_rate": 9.67713157894737e-05, "loss": 0.4121, "step": 32283 }, { "epoch": 1.807817224773211, "grad_norm": 1.2785245180130005, "learning_rate": 9.677105263157896e-05, "loss": 0.7103, "step": 32284 }, { "epoch": 1.8078732220853397, "grad_norm": 1.2456238269805908, "learning_rate": 9.677078947368422e-05, "loss": 0.5982, "step": 32285 }, { "epoch": 1.8079292193974688, "grad_norm": 1.2603510618209839, "learning_rate": 9.677052631578948e-05, "loss": 0.4032, "step": 32286 }, { "epoch": 1.8079852167095978, "grad_norm": 1.1457760334014893, "learning_rate": 9.677026315789474e-05, "loss": 0.3193, "step": 32287 }, { "epoch": 1.8080412140217268, "grad_norm": 1.6529200077056885, "learning_rate": 9.677000000000001e-05, "loss": 0.3488, "step": 32288 }, { "epoch": 1.8080972113338558, "grad_norm": 1.6476494073867798, "learning_rate": 9.676973684210526e-05, "loss": 0.5689, "step": 32289 }, { "epoch": 1.8081532086459848, "grad_norm": 1.4062116146087646, "learning_rate": 9.676947368421053e-05, "loss": 0.4178, "step": 32290 }, { "epoch": 1.8082092059581139, "grad_norm": 1.2759218215942383, "learning_rate": 9.676921052631579e-05, "loss": 0.4394, "step": 32291 }, { "epoch": 1.8082652032702429, "grad_norm": 1.1573610305786133, "learning_rate": 9.676894736842106e-05, "loss": 0.4067, "step": 32292 }, { "epoch": 1.808321200582372, "grad_norm": 1.9033753871917725, "learning_rate": 9.676868421052632e-05, "loss": 0.4422, "step": 32293 }, { "epoch": 1.808377197894501, "grad_norm": 1.420331358909607, "learning_rate": 9.676842105263158e-05, "loss": 0.4602, "step": 32294 }, { "epoch": 1.80843319520663, "grad_norm": 1.4490327835083008, "learning_rate": 9.676815789473684e-05, "loss": 0.4131, "step": 32295 }, { "epoch": 1.808489192518759, "grad_norm": 1.15997314453125, "learning_rate": 9.676789473684212e-05, "loss": 0.4344, "step": 32296 }, { "epoch": 1.808545189830888, "grad_norm": 1.6639409065246582, "learning_rate": 9.676763157894737e-05, "loss": 0.3995, "step": 32297 }, { "epoch": 1.808601187143017, "grad_norm": 1.2038909196853638, "learning_rate": 9.676736842105263e-05, "loss": 0.5114, "step": 32298 }, { "epoch": 1.808657184455146, "grad_norm": 4.691858291625977, "learning_rate": 9.67671052631579e-05, "loss": 0.4514, "step": 32299 }, { "epoch": 1.808713181767275, "grad_norm": 1.3198199272155762, "learning_rate": 9.676684210526317e-05, "loss": 0.4075, "step": 32300 }, { "epoch": 1.808769179079404, "grad_norm": 1.3034695386886597, "learning_rate": 9.676657894736843e-05, "loss": 0.3631, "step": 32301 }, { "epoch": 1.808825176391533, "grad_norm": 1.3898499011993408, "learning_rate": 9.676631578947369e-05, "loss": 0.4603, "step": 32302 }, { "epoch": 1.808881173703662, "grad_norm": 1.1379055976867676, "learning_rate": 9.676605263157895e-05, "loss": 0.4313, "step": 32303 }, { "epoch": 1.808937171015791, "grad_norm": 1.540268898010254, "learning_rate": 9.67657894736842e-05, "loss": 0.3844, "step": 32304 }, { "epoch": 1.8089931683279201, "grad_norm": 1.2744961977005005, "learning_rate": 9.676552631578948e-05, "loss": 0.4449, "step": 32305 }, { "epoch": 1.8090491656400491, "grad_norm": 1.347895622253418, "learning_rate": 9.676526315789474e-05, "loss": 0.5563, "step": 32306 }, { "epoch": 1.8091051629521782, "grad_norm": 1.3699471950531006, "learning_rate": 9.6765e-05, "loss": 0.4339, "step": 32307 }, { "epoch": 1.8091611602643072, "grad_norm": 1.4137951135635376, "learning_rate": 9.676473684210526e-05, "loss": 0.4656, "step": 32308 }, { "epoch": 1.8092171575764362, "grad_norm": 1.2649034261703491, "learning_rate": 9.676447368421053e-05, "loss": 0.6339, "step": 32309 }, { "epoch": 1.8092731548885652, "grad_norm": 1.2294228076934814, "learning_rate": 9.676421052631579e-05, "loss": 0.4258, "step": 32310 }, { "epoch": 1.8093291522006942, "grad_norm": 1.2274796962738037, "learning_rate": 9.676394736842107e-05, "loss": 0.4107, "step": 32311 }, { "epoch": 1.8093851495128233, "grad_norm": 1.0296804904937744, "learning_rate": 9.676368421052631e-05, "loss": 0.34, "step": 32312 }, { "epoch": 1.8094411468249523, "grad_norm": 1.4007890224456787, "learning_rate": 9.676342105263158e-05, "loss": 0.5687, "step": 32313 }, { "epoch": 1.8094971441370813, "grad_norm": 1.0892633199691772, "learning_rate": 9.676315789473684e-05, "loss": 0.379, "step": 32314 }, { "epoch": 1.8095531414492103, "grad_norm": 1.4093111753463745, "learning_rate": 9.676289473684212e-05, "loss": 0.4331, "step": 32315 }, { "epoch": 1.8096091387613393, "grad_norm": 1.26890230178833, "learning_rate": 9.676263157894738e-05, "loss": 0.4225, "step": 32316 }, { "epoch": 1.8096651360734684, "grad_norm": 1.212093710899353, "learning_rate": 9.676236842105264e-05, "loss": 0.448, "step": 32317 }, { "epoch": 1.8097211333855974, "grad_norm": 1.0250269174575806, "learning_rate": 9.67621052631579e-05, "loss": 0.3897, "step": 32318 }, { "epoch": 1.8097771306977264, "grad_norm": 1.362004280090332, "learning_rate": 9.676184210526317e-05, "loss": 0.4284, "step": 32319 }, { "epoch": 1.8098331280098554, "grad_norm": 1.3526875972747803, "learning_rate": 9.676157894736843e-05, "loss": 0.4642, "step": 32320 }, { "epoch": 1.8098891253219844, "grad_norm": 1.1725151538848877, "learning_rate": 9.676131578947369e-05, "loss": 0.2806, "step": 32321 }, { "epoch": 1.8099451226341134, "grad_norm": 1.3579639196395874, "learning_rate": 9.676105263157895e-05, "loss": 0.3285, "step": 32322 }, { "epoch": 1.8100011199462425, "grad_norm": 1.3228201866149902, "learning_rate": 9.676078947368421e-05, "loss": 0.3841, "step": 32323 }, { "epoch": 1.8100571172583715, "grad_norm": 1.3598320484161377, "learning_rate": 9.676052631578948e-05, "loss": 0.4721, "step": 32324 }, { "epoch": 1.8101131145705005, "grad_norm": 1.3178752660751343, "learning_rate": 9.676026315789474e-05, "loss": 0.5529, "step": 32325 }, { "epoch": 1.8101691118826295, "grad_norm": 1.1229150295257568, "learning_rate": 9.676e-05, "loss": 0.3938, "step": 32326 }, { "epoch": 1.8102251091947585, "grad_norm": 1.2453727722167969, "learning_rate": 9.675973684210526e-05, "loss": 0.4427, "step": 32327 }, { "epoch": 1.8102811065068876, "grad_norm": 1.4503259658813477, "learning_rate": 9.675947368421053e-05, "loss": 0.6826, "step": 32328 }, { "epoch": 1.8103371038190166, "grad_norm": 1.2733267545700073, "learning_rate": 9.67592105263158e-05, "loss": 0.4243, "step": 32329 }, { "epoch": 1.8103931011311456, "grad_norm": 1.0814517736434937, "learning_rate": 9.675894736842105e-05, "loss": 0.3606, "step": 32330 }, { "epoch": 1.8104490984432746, "grad_norm": 1.253149151802063, "learning_rate": 9.675868421052631e-05, "loss": 0.4945, "step": 32331 }, { "epoch": 1.8105050957554036, "grad_norm": 1.5092262029647827, "learning_rate": 9.675842105263159e-05, "loss": 0.504, "step": 32332 }, { "epoch": 1.8105610930675327, "grad_norm": 1.3534983396530151, "learning_rate": 9.675815789473685e-05, "loss": 0.4628, "step": 32333 }, { "epoch": 1.8106170903796617, "grad_norm": 1.501723289489746, "learning_rate": 9.675789473684212e-05, "loss": 0.4423, "step": 32334 }, { "epoch": 1.8106730876917907, "grad_norm": 1.143212080001831, "learning_rate": 9.675763157894737e-05, "loss": 0.4591, "step": 32335 }, { "epoch": 1.8107290850039197, "grad_norm": 1.273430585861206, "learning_rate": 9.675736842105264e-05, "loss": 0.4814, "step": 32336 }, { "epoch": 1.8107850823160487, "grad_norm": 1.1064444780349731, "learning_rate": 9.67571052631579e-05, "loss": 0.3152, "step": 32337 }, { "epoch": 1.8108410796281778, "grad_norm": 1.2755224704742432, "learning_rate": 9.675684210526317e-05, "loss": 0.4356, "step": 32338 }, { "epoch": 1.8108970769403068, "grad_norm": 1.177262544631958, "learning_rate": 9.675657894736843e-05, "loss": 0.4795, "step": 32339 }, { "epoch": 1.8109530742524358, "grad_norm": 2.422985315322876, "learning_rate": 9.675631578947368e-05, "loss": 0.4553, "step": 32340 }, { "epoch": 1.8110090715645648, "grad_norm": 1.331923246383667, "learning_rate": 9.675605263157895e-05, "loss": 0.5829, "step": 32341 }, { "epoch": 1.8110650688766938, "grad_norm": 1.192797064781189, "learning_rate": 9.675578947368421e-05, "loss": 0.4697, "step": 32342 }, { "epoch": 1.8111210661888228, "grad_norm": 1.3705257177352905, "learning_rate": 9.675552631578949e-05, "loss": 0.4342, "step": 32343 }, { "epoch": 1.8111770635009519, "grad_norm": 1.352030873298645, "learning_rate": 9.675526315789473e-05, "loss": 0.4028, "step": 32344 }, { "epoch": 1.8112330608130809, "grad_norm": 1.074386477470398, "learning_rate": 9.6755e-05, "loss": 0.3142, "step": 32345 }, { "epoch": 1.81128905812521, "grad_norm": 1.3192023038864136, "learning_rate": 9.675473684210526e-05, "loss": 0.3898, "step": 32346 }, { "epoch": 1.811345055437339, "grad_norm": 1.1558268070220947, "learning_rate": 9.675447368421054e-05, "loss": 0.4478, "step": 32347 }, { "epoch": 1.811401052749468, "grad_norm": 1.1593337059020996, "learning_rate": 9.67542105263158e-05, "loss": 0.45, "step": 32348 }, { "epoch": 1.811457050061597, "grad_norm": 1.3755301237106323, "learning_rate": 9.675394736842106e-05, "loss": 0.4612, "step": 32349 }, { "epoch": 1.811513047373726, "grad_norm": 1.855124592781067, "learning_rate": 9.675368421052632e-05, "loss": 0.519, "step": 32350 }, { "epoch": 1.811569044685855, "grad_norm": 1.2857928276062012, "learning_rate": 9.675342105263159e-05, "loss": 0.3673, "step": 32351 }, { "epoch": 1.811625041997984, "grad_norm": 1.2704052925109863, "learning_rate": 9.675315789473685e-05, "loss": 0.4789, "step": 32352 }, { "epoch": 1.811681039310113, "grad_norm": 1.3460575342178345, "learning_rate": 9.675289473684211e-05, "loss": 0.5163, "step": 32353 }, { "epoch": 1.811737036622242, "grad_norm": 1.560444712638855, "learning_rate": 9.675263157894737e-05, "loss": 0.7829, "step": 32354 }, { "epoch": 1.811793033934371, "grad_norm": 2.1030690670013428, "learning_rate": 9.675236842105264e-05, "loss": 0.4952, "step": 32355 }, { "epoch": 1.8118490312465, "grad_norm": 1.3004571199417114, "learning_rate": 9.67521052631579e-05, "loss": 0.4162, "step": 32356 }, { "epoch": 1.8119050285586291, "grad_norm": 1.7338188886642456, "learning_rate": 9.675184210526316e-05, "loss": 0.6515, "step": 32357 }, { "epoch": 1.8119610258707581, "grad_norm": 1.035826563835144, "learning_rate": 9.675157894736842e-05, "loss": 0.4084, "step": 32358 }, { "epoch": 1.8120170231828872, "grad_norm": 1.3207197189331055, "learning_rate": 9.675131578947368e-05, "loss": 0.5052, "step": 32359 }, { "epoch": 1.8120730204950162, "grad_norm": 0.9274834990501404, "learning_rate": 9.675105263157895e-05, "loss": 0.3125, "step": 32360 }, { "epoch": 1.8121290178071452, "grad_norm": 1.6074193716049194, "learning_rate": 9.675078947368421e-05, "loss": 0.4257, "step": 32361 }, { "epoch": 1.8121850151192742, "grad_norm": 1.6241343021392822, "learning_rate": 9.675052631578947e-05, "loss": 0.5435, "step": 32362 }, { "epoch": 1.8122410124314032, "grad_norm": 1.4978107213974, "learning_rate": 9.675026315789473e-05, "loss": 0.374, "step": 32363 }, { "epoch": 1.8122970097435323, "grad_norm": 1.2861928939819336, "learning_rate": 9.675000000000001e-05, "loss": 0.4076, "step": 32364 }, { "epoch": 1.8123530070556613, "grad_norm": 1.4866325855255127, "learning_rate": 9.674973684210527e-05, "loss": 0.5525, "step": 32365 }, { "epoch": 1.8124090043677903, "grad_norm": 1.2640074491500854, "learning_rate": 9.674947368421054e-05, "loss": 0.4721, "step": 32366 }, { "epoch": 1.8124650016799193, "grad_norm": 1.274003505706787, "learning_rate": 9.674921052631579e-05, "loss": 0.5322, "step": 32367 }, { "epoch": 1.8125209989920483, "grad_norm": 1.17131507396698, "learning_rate": 9.674894736842106e-05, "loss": 0.4904, "step": 32368 }, { "epoch": 1.8125769963041773, "grad_norm": 1.154022455215454, "learning_rate": 9.674868421052632e-05, "loss": 0.3773, "step": 32369 }, { "epoch": 1.8126329936163064, "grad_norm": 1.114092469215393, "learning_rate": 9.674842105263159e-05, "loss": 0.608, "step": 32370 }, { "epoch": 1.8126889909284354, "grad_norm": 1.2449208498001099, "learning_rate": 9.674815789473685e-05, "loss": 0.4599, "step": 32371 }, { "epoch": 1.8127449882405644, "grad_norm": 1.368186354637146, "learning_rate": 9.674789473684211e-05, "loss": 0.4416, "step": 32372 }, { "epoch": 1.8128009855526934, "grad_norm": 1.2514532804489136, "learning_rate": 9.674763157894737e-05, "loss": 0.4161, "step": 32373 }, { "epoch": 1.8128569828648224, "grad_norm": 1.328495979309082, "learning_rate": 9.674736842105263e-05, "loss": 0.4454, "step": 32374 }, { "epoch": 1.8129129801769515, "grad_norm": 1.437473177909851, "learning_rate": 9.67471052631579e-05, "loss": 0.499, "step": 32375 }, { "epoch": 1.8129689774890805, "grad_norm": 1.3084028959274292, "learning_rate": 9.674684210526316e-05, "loss": 0.4292, "step": 32376 }, { "epoch": 1.8130249748012095, "grad_norm": 1.2749313116073608, "learning_rate": 9.674657894736842e-05, "loss": 0.4161, "step": 32377 }, { "epoch": 1.8130809721133385, "grad_norm": 1.0798845291137695, "learning_rate": 9.674631578947368e-05, "loss": 0.3706, "step": 32378 }, { "epoch": 1.8131369694254675, "grad_norm": 1.4673467874526978, "learning_rate": 9.674605263157896e-05, "loss": 0.4363, "step": 32379 }, { "epoch": 1.8131929667375966, "grad_norm": 1.444225549697876, "learning_rate": 9.674578947368422e-05, "loss": 0.488, "step": 32380 }, { "epoch": 1.8132489640497256, "grad_norm": 1.188883900642395, "learning_rate": 9.674552631578948e-05, "loss": 0.3659, "step": 32381 }, { "epoch": 1.8133049613618546, "grad_norm": 1.378848671913147, "learning_rate": 9.674526315789474e-05, "loss": 0.4077, "step": 32382 }, { "epoch": 1.8133609586739836, "grad_norm": NaN, "learning_rate": 9.674526315789474e-05, "loss": 0.6469, "step": 32383 }, { "epoch": 1.8134169559861126, "grad_norm": 1.1309772729873657, "learning_rate": 9.674500000000001e-05, "loss": 0.3757, "step": 32384 }, { "epoch": 1.8134729532982417, "grad_norm": 1.1356582641601562, "learning_rate": 9.674473684210527e-05, "loss": 0.5102, "step": 32385 }, { "epoch": 1.8135289506103707, "grad_norm": 1.3671808242797852, "learning_rate": 9.674447368421053e-05, "loss": 0.3884, "step": 32386 }, { "epoch": 1.8135849479224997, "grad_norm": 1.0610895156860352, "learning_rate": 9.674421052631579e-05, "loss": 0.4281, "step": 32387 }, { "epoch": 1.8136409452346287, "grad_norm": 1.7117756605148315, "learning_rate": 9.674394736842106e-05, "loss": 0.4621, "step": 32388 }, { "epoch": 1.8136969425467577, "grad_norm": 1.286011815071106, "learning_rate": 9.674368421052632e-05, "loss": 0.3887, "step": 32389 }, { "epoch": 1.8137529398588867, "grad_norm": 1.394737720489502, "learning_rate": 9.67434210526316e-05, "loss": 0.4349, "step": 32390 }, { "epoch": 1.8138089371710158, "grad_norm": 2.456839084625244, "learning_rate": 9.674315789473684e-05, "loss": 0.4555, "step": 32391 }, { "epoch": 1.8138649344831448, "grad_norm": 1.1063789129257202, "learning_rate": 9.67428947368421e-05, "loss": 0.3573, "step": 32392 }, { "epoch": 1.8139209317952738, "grad_norm": 1.1253880262374878, "learning_rate": 9.674263157894737e-05, "loss": 0.3738, "step": 32393 }, { "epoch": 1.8139769291074028, "grad_norm": 1.2422535419464111, "learning_rate": 9.674236842105263e-05, "loss": 0.5004, "step": 32394 }, { "epoch": 1.8140329264195318, "grad_norm": 2.7551474571228027, "learning_rate": 9.67421052631579e-05, "loss": 0.6218, "step": 32395 }, { "epoch": 1.8140889237316609, "grad_norm": 1.719006896018982, "learning_rate": 9.674184210526315e-05, "loss": 0.6325, "step": 32396 }, { "epoch": 1.8141449210437899, "grad_norm": 1.2585749626159668, "learning_rate": 9.674157894736843e-05, "loss": 0.4662, "step": 32397 }, { "epoch": 1.814200918355919, "grad_norm": 1.2235743999481201, "learning_rate": 9.674131578947369e-05, "loss": 0.5555, "step": 32398 }, { "epoch": 1.814256915668048, "grad_norm": 1.2404729127883911, "learning_rate": 9.674105263157896e-05, "loss": 0.4315, "step": 32399 }, { "epoch": 1.814312912980177, "grad_norm": 1.3855149745941162, "learning_rate": 9.67407894736842e-05, "loss": 0.6888, "step": 32400 }, { "epoch": 1.814368910292306, "grad_norm": 1.2371548414230347, "learning_rate": 9.674052631578948e-05, "loss": 0.4986, "step": 32401 }, { "epoch": 1.814424907604435, "grad_norm": 1.169758677482605, "learning_rate": 9.674026315789474e-05, "loss": 0.3782, "step": 32402 }, { "epoch": 1.814480904916564, "grad_norm": 1.3540791273117065, "learning_rate": 9.674000000000001e-05, "loss": 0.5137, "step": 32403 }, { "epoch": 1.814536902228693, "grad_norm": 1.1753042936325073, "learning_rate": 9.673973684210527e-05, "loss": 0.5769, "step": 32404 }, { "epoch": 1.814592899540822, "grad_norm": 1.234282374382019, "learning_rate": 9.673947368421053e-05, "loss": 0.4231, "step": 32405 }, { "epoch": 1.814648896852951, "grad_norm": 1.2665352821350098, "learning_rate": 9.673921052631579e-05, "loss": 0.4462, "step": 32406 }, { "epoch": 1.81470489416508, "grad_norm": 1.6300742626190186, "learning_rate": 9.673894736842106e-05, "loss": 0.3469, "step": 32407 }, { "epoch": 1.814760891477209, "grad_norm": 1.1760287284851074, "learning_rate": 9.673868421052632e-05, "loss": 0.3751, "step": 32408 }, { "epoch": 1.8148168887893381, "grad_norm": 1.4402811527252197, "learning_rate": 9.673842105263158e-05, "loss": 0.4324, "step": 32409 }, { "epoch": 1.8148728861014671, "grad_norm": 1.525953769683838, "learning_rate": 9.673815789473684e-05, "loss": 0.4798, "step": 32410 }, { "epoch": 1.8149288834135962, "grad_norm": 1.136301040649414, "learning_rate": 9.67378947368421e-05, "loss": 0.351, "step": 32411 }, { "epoch": 1.8149848807257252, "grad_norm": 1.1505835056304932, "learning_rate": 9.673763157894738e-05, "loss": 0.4527, "step": 32412 }, { "epoch": 1.8150408780378542, "grad_norm": 1.2191072702407837, "learning_rate": 9.673736842105264e-05, "loss": 0.4146, "step": 32413 }, { "epoch": 1.8150968753499832, "grad_norm": 1.3683855533599854, "learning_rate": 9.67371052631579e-05, "loss": 0.5296, "step": 32414 }, { "epoch": 1.8151528726621122, "grad_norm": 1.335317611694336, "learning_rate": 9.673684210526316e-05, "loss": 0.3824, "step": 32415 }, { "epoch": 1.8152088699742412, "grad_norm": 1.258230209350586, "learning_rate": 9.673657894736843e-05, "loss": 0.408, "step": 32416 }, { "epoch": 1.8152648672863703, "grad_norm": 1.4180176258087158, "learning_rate": 9.673631578947369e-05, "loss": 0.3647, "step": 32417 }, { "epoch": 1.8153208645984993, "grad_norm": 1.151563286781311, "learning_rate": 9.673605263157895e-05, "loss": 0.3705, "step": 32418 }, { "epoch": 1.8153768619106283, "grad_norm": 1.4188717603683472, "learning_rate": 9.673578947368421e-05, "loss": 0.5693, "step": 32419 }, { "epoch": 1.8154328592227573, "grad_norm": 1.3015892505645752, "learning_rate": 9.673552631578948e-05, "loss": 0.396, "step": 32420 }, { "epoch": 1.8154888565348863, "grad_norm": 1.1772204637527466, "learning_rate": 9.673526315789474e-05, "loss": 0.4458, "step": 32421 }, { "epoch": 1.8155448538470154, "grad_norm": 1.3385714292526245, "learning_rate": 9.673500000000001e-05, "loss": 0.4674, "step": 32422 }, { "epoch": 1.8156008511591444, "grad_norm": 1.2404509782791138, "learning_rate": 9.673473684210526e-05, "loss": 0.4117, "step": 32423 }, { "epoch": 1.8156568484712734, "grad_norm": 1.6911565065383911, "learning_rate": 9.673447368421053e-05, "loss": 0.4973, "step": 32424 }, { "epoch": 1.8157128457834024, "grad_norm": 1.200289011001587, "learning_rate": 9.67342105263158e-05, "loss": 0.3987, "step": 32425 }, { "epoch": 1.8157688430955314, "grad_norm": 1.3031913042068481, "learning_rate": 9.673394736842105e-05, "loss": 0.4437, "step": 32426 }, { "epoch": 1.8158248404076605, "grad_norm": 1.4410406351089478, "learning_rate": 9.673368421052633e-05, "loss": 0.4704, "step": 32427 }, { "epoch": 1.8158808377197895, "grad_norm": 3.1850626468658447, "learning_rate": 9.673342105263157e-05, "loss": 0.4591, "step": 32428 }, { "epoch": 1.8159368350319185, "grad_norm": 1.22998046875, "learning_rate": 9.673315789473685e-05, "loss": 0.5411, "step": 32429 }, { "epoch": 1.8159928323440475, "grad_norm": 1.366072416305542, "learning_rate": 9.67328947368421e-05, "loss": 0.5403, "step": 32430 }, { "epoch": 1.8160488296561765, "grad_norm": 1.1950949430465698, "learning_rate": 9.673263157894738e-05, "loss": 0.5019, "step": 32431 }, { "epoch": 1.8161048269683056, "grad_norm": 1.0988450050354004, "learning_rate": 9.673236842105264e-05, "loss": 0.377, "step": 32432 }, { "epoch": 1.8161608242804346, "grad_norm": 1.191611886024475, "learning_rate": 9.67321052631579e-05, "loss": 0.3688, "step": 32433 }, { "epoch": 1.8162168215925636, "grad_norm": 1.55448579788208, "learning_rate": 9.673184210526316e-05, "loss": 0.4016, "step": 32434 }, { "epoch": 1.8162728189046926, "grad_norm": 1.2630044221878052, "learning_rate": 9.673157894736843e-05, "loss": 0.4086, "step": 32435 }, { "epoch": 1.8163288162168216, "grad_norm": 1.627791166305542, "learning_rate": 9.673131578947369e-05, "loss": 0.4262, "step": 32436 }, { "epoch": 1.8163848135289506, "grad_norm": 1.4962151050567627, "learning_rate": 9.673105263157895e-05, "loss": 0.4298, "step": 32437 }, { "epoch": 1.8164408108410797, "grad_norm": 1.174643635749817, "learning_rate": 9.673078947368421e-05, "loss": 0.4499, "step": 32438 }, { "epoch": 1.8164968081532087, "grad_norm": 1.398247480392456, "learning_rate": 9.673052631578948e-05, "loss": 0.4514, "step": 32439 }, { "epoch": 1.8165528054653377, "grad_norm": 1.5056617259979248, "learning_rate": 9.673026315789474e-05, "loss": 0.4985, "step": 32440 }, { "epoch": 1.8166088027774667, "grad_norm": 1.16782808303833, "learning_rate": 9.673e-05, "loss": 0.4234, "step": 32441 }, { "epoch": 1.8166648000895957, "grad_norm": 1.407913327217102, "learning_rate": 9.672973684210526e-05, "loss": 0.455, "step": 32442 }, { "epoch": 1.8167207974017248, "grad_norm": 1.251463532447815, "learning_rate": 9.672947368421054e-05, "loss": 0.4097, "step": 32443 }, { "epoch": 1.8167767947138538, "grad_norm": 1.2283661365509033, "learning_rate": 9.67292105263158e-05, "loss": 0.3914, "step": 32444 }, { "epoch": 1.8168327920259828, "grad_norm": 1.2987653017044067, "learning_rate": 9.672894736842106e-05, "loss": 0.447, "step": 32445 }, { "epoch": 1.8168887893381118, "grad_norm": 1.072947382926941, "learning_rate": 9.672868421052632e-05, "loss": 0.395, "step": 32446 }, { "epoch": 1.8169447866502408, "grad_norm": 1.221902847290039, "learning_rate": 9.672842105263158e-05, "loss": 0.4584, "step": 32447 }, { "epoch": 1.8170007839623699, "grad_norm": 1.433893084526062, "learning_rate": 9.672815789473685e-05, "loss": 0.4965, "step": 32448 }, { "epoch": 1.8170567812744989, "grad_norm": 1.2009406089782715, "learning_rate": 9.672789473684211e-05, "loss": 0.3997, "step": 32449 }, { "epoch": 1.817112778586628, "grad_norm": 1.0657004117965698, "learning_rate": 9.672763157894737e-05, "loss": 0.3921, "step": 32450 }, { "epoch": 1.817168775898757, "grad_norm": 1.0397655963897705, "learning_rate": 9.672736842105263e-05, "loss": 0.319, "step": 32451 }, { "epoch": 1.817224773210886, "grad_norm": 1.0295485258102417, "learning_rate": 9.67271052631579e-05, "loss": 0.3316, "step": 32452 }, { "epoch": 1.817280770523015, "grad_norm": 1.3678115606307983, "learning_rate": 9.672684210526316e-05, "loss": 0.4833, "step": 32453 }, { "epoch": 1.817336767835144, "grad_norm": 1.404530644416809, "learning_rate": 9.672657894736843e-05, "loss": 0.4843, "step": 32454 }, { "epoch": 1.817392765147273, "grad_norm": 1.1422580480575562, "learning_rate": 9.672631578947368e-05, "loss": 0.4187, "step": 32455 }, { "epoch": 1.817448762459402, "grad_norm": 1.2389475107192993, "learning_rate": 9.672605263157895e-05, "loss": 0.4367, "step": 32456 }, { "epoch": 1.817504759771531, "grad_norm": 1.4369456768035889, "learning_rate": 9.672578947368421e-05, "loss": 0.5171, "step": 32457 }, { "epoch": 1.81756075708366, "grad_norm": 1.3177380561828613, "learning_rate": 9.672552631578949e-05, "loss": 0.3973, "step": 32458 }, { "epoch": 1.817616754395789, "grad_norm": 1.4693036079406738, "learning_rate": 9.672526315789475e-05, "loss": 0.484, "step": 32459 }, { "epoch": 1.817672751707918, "grad_norm": 1.2984145879745483, "learning_rate": 9.6725e-05, "loss": 0.4323, "step": 32460 }, { "epoch": 1.817728749020047, "grad_norm": 1.3219741582870483, "learning_rate": 9.672473684210527e-05, "loss": 0.462, "step": 32461 }, { "epoch": 1.8177847463321761, "grad_norm": 1.6226540803909302, "learning_rate": 9.672447368421053e-05, "loss": 0.5406, "step": 32462 }, { "epoch": 1.8178407436443051, "grad_norm": 1.251319408416748, "learning_rate": 9.67242105263158e-05, "loss": 0.436, "step": 32463 }, { "epoch": 1.8178967409564342, "grad_norm": 1.5320277214050293, "learning_rate": 9.672394736842106e-05, "loss": 0.4934, "step": 32464 }, { "epoch": 1.8179527382685632, "grad_norm": 1.1777857542037964, "learning_rate": 9.672368421052632e-05, "loss": 0.3865, "step": 32465 }, { "epoch": 1.8180087355806922, "grad_norm": 1.2484936714172363, "learning_rate": 9.672342105263158e-05, "loss": 0.3981, "step": 32466 }, { "epoch": 1.8180647328928212, "grad_norm": 1.3128679990768433, "learning_rate": 9.672315789473685e-05, "loss": 0.4972, "step": 32467 }, { "epoch": 1.8181207302049502, "grad_norm": 1.1874022483825684, "learning_rate": 9.672289473684211e-05, "loss": 0.4869, "step": 32468 }, { "epoch": 1.8181767275170793, "grad_norm": 1.3044192790985107, "learning_rate": 9.672263157894737e-05, "loss": 0.5496, "step": 32469 }, { "epoch": 1.8182327248292083, "grad_norm": 25.047182083129883, "learning_rate": 9.672236842105263e-05, "loss": 0.5854, "step": 32470 }, { "epoch": 1.8182887221413373, "grad_norm": 1.2622507810592651, "learning_rate": 9.67221052631579e-05, "loss": 0.3639, "step": 32471 }, { "epoch": 1.8183447194534663, "grad_norm": 1.3041987419128418, "learning_rate": 9.672184210526316e-05, "loss": 0.4311, "step": 32472 }, { "epoch": 1.8184007167655953, "grad_norm": 1.299324631690979, "learning_rate": 9.672157894736842e-05, "loss": 0.4381, "step": 32473 }, { "epoch": 1.8184567140777244, "grad_norm": 1.4443858861923218, "learning_rate": 9.672131578947368e-05, "loss": 0.4631, "step": 32474 }, { "epoch": 1.8185127113898534, "grad_norm": 1.4813605546951294, "learning_rate": 9.672105263157896e-05, "loss": 0.4762, "step": 32475 }, { "epoch": 1.8185687087019824, "grad_norm": 1.339863657951355, "learning_rate": 9.672078947368422e-05, "loss": 0.4089, "step": 32476 }, { "epoch": 1.8186247060141114, "grad_norm": 1.1854147911071777, "learning_rate": 9.672052631578949e-05, "loss": 0.5234, "step": 32477 }, { "epoch": 1.8186807033262404, "grad_norm": 1.4232052564620972, "learning_rate": 9.672026315789474e-05, "loss": 0.5493, "step": 32478 }, { "epoch": 1.8187367006383695, "grad_norm": 1.1105598211288452, "learning_rate": 9.672e-05, "loss": 0.3632, "step": 32479 }, { "epoch": 1.8187926979504985, "grad_norm": 1.3750697374343872, "learning_rate": 9.671973684210527e-05, "loss": 0.5164, "step": 32480 }, { "epoch": 1.8188486952626275, "grad_norm": 1.3002640008926392, "learning_rate": 9.671947368421053e-05, "loss": 0.4005, "step": 32481 }, { "epoch": 1.8189046925747565, "grad_norm": 1.077797532081604, "learning_rate": 9.67192105263158e-05, "loss": 0.3966, "step": 32482 }, { "epoch": 1.8189606898868855, "grad_norm": 1.2418121099472046, "learning_rate": 9.671894736842105e-05, "loss": 0.4458, "step": 32483 }, { "epoch": 1.8190166871990145, "grad_norm": 1.2358874082565308, "learning_rate": 9.671868421052632e-05, "loss": 0.4766, "step": 32484 }, { "epoch": 1.8190726845111436, "grad_norm": 1.1712127923965454, "learning_rate": 9.671842105263158e-05, "loss": 0.4278, "step": 32485 }, { "epoch": 1.8191286818232726, "grad_norm": 1.1019481420516968, "learning_rate": 9.671815789473685e-05, "loss": 0.4316, "step": 32486 }, { "epoch": 1.8191846791354016, "grad_norm": 1.5419946908950806, "learning_rate": 9.671789473684211e-05, "loss": 0.5452, "step": 32487 }, { "epoch": 1.8192406764475306, "grad_norm": 0.9683425426483154, "learning_rate": 9.671763157894737e-05, "loss": 0.3032, "step": 32488 }, { "epoch": 1.8192966737596596, "grad_norm": 1.3100649118423462, "learning_rate": 9.671736842105263e-05, "loss": 0.4757, "step": 32489 }, { "epoch": 1.8193526710717887, "grad_norm": 1.3169389963150024, "learning_rate": 9.67171052631579e-05, "loss": 0.4721, "step": 32490 }, { "epoch": 1.8194086683839177, "grad_norm": 1.4245240688323975, "learning_rate": 9.671684210526317e-05, "loss": 0.4404, "step": 32491 }, { "epoch": 1.8194646656960467, "grad_norm": 1.2592575550079346, "learning_rate": 9.671657894736843e-05, "loss": 0.5125, "step": 32492 }, { "epoch": 1.8195206630081757, "grad_norm": 1.2397279739379883, "learning_rate": 9.671631578947369e-05, "loss": 0.5608, "step": 32493 }, { "epoch": 1.8195766603203047, "grad_norm": 1.2415943145751953, "learning_rate": 9.671605263157896e-05, "loss": 0.4949, "step": 32494 }, { "epoch": 1.8196326576324338, "grad_norm": 1.1079696416854858, "learning_rate": 9.671578947368422e-05, "loss": 0.3893, "step": 32495 }, { "epoch": 1.8196886549445628, "grad_norm": 1.2162058353424072, "learning_rate": 9.671552631578948e-05, "loss": 0.3831, "step": 32496 }, { "epoch": 1.8197446522566918, "grad_norm": 1.2957499027252197, "learning_rate": 9.671526315789474e-05, "loss": 0.474, "step": 32497 }, { "epoch": 1.8198006495688208, "grad_norm": 1.0710455179214478, "learning_rate": 9.6715e-05, "loss": 0.3684, "step": 32498 }, { "epoch": 1.8198566468809498, "grad_norm": 1.2035348415374756, "learning_rate": 9.671473684210527e-05, "loss": 0.3746, "step": 32499 }, { "epoch": 1.8199126441930789, "grad_norm": 1.1825233697891235, "learning_rate": 9.671447368421053e-05, "loss": 0.4409, "step": 32500 }, { "epoch": 1.8199686415052079, "grad_norm": 1.3556849956512451, "learning_rate": 9.671421052631579e-05, "loss": 0.5354, "step": 32501 }, { "epoch": 1.820024638817337, "grad_norm": 1.3185393810272217, "learning_rate": 9.671394736842105e-05, "loss": 0.4968, "step": 32502 }, { "epoch": 1.820080636129466, "grad_norm": 1.3182179927825928, "learning_rate": 9.671368421052632e-05, "loss": 0.5185, "step": 32503 }, { "epoch": 1.820136633441595, "grad_norm": 2.057490587234497, "learning_rate": 9.671342105263158e-05, "loss": 0.4132, "step": 32504 }, { "epoch": 1.820192630753724, "grad_norm": 1.2419458627700806, "learning_rate": 9.671315789473684e-05, "loss": 0.6465, "step": 32505 }, { "epoch": 1.820248628065853, "grad_norm": 1.45686936378479, "learning_rate": 9.67128947368421e-05, "loss": 0.4034, "step": 32506 }, { "epoch": 1.820304625377982, "grad_norm": 1.3267873525619507, "learning_rate": 9.671263157894738e-05, "loss": 0.5541, "step": 32507 }, { "epoch": 1.820360622690111, "grad_norm": 1.2031878232955933, "learning_rate": 9.671236842105264e-05, "loss": 0.5194, "step": 32508 }, { "epoch": 1.82041662000224, "grad_norm": 1.5494134426116943, "learning_rate": 9.671210526315791e-05, "loss": 0.5716, "step": 32509 }, { "epoch": 1.820472617314369, "grad_norm": 1.2340202331542969, "learning_rate": 9.671184210526316e-05, "loss": 0.437, "step": 32510 }, { "epoch": 1.820528614626498, "grad_norm": 1.1425443887710571, "learning_rate": 9.671157894736843e-05, "loss": 0.3416, "step": 32511 }, { "epoch": 1.820584611938627, "grad_norm": 1.1883059740066528, "learning_rate": 9.671131578947369e-05, "loss": 0.528, "step": 32512 }, { "epoch": 1.820640609250756, "grad_norm": 1.4798719882965088, "learning_rate": 9.671105263157895e-05, "loss": 0.5063, "step": 32513 }, { "epoch": 1.8206966065628851, "grad_norm": 1.866953730583191, "learning_rate": 9.671078947368422e-05, "loss": 0.4992, "step": 32514 }, { "epoch": 1.8207526038750141, "grad_norm": 1.6121031045913696, "learning_rate": 9.671052631578947e-05, "loss": 0.4008, "step": 32515 }, { "epoch": 1.8208086011871432, "grad_norm": 1.46975576877594, "learning_rate": 9.671026315789474e-05, "loss": 0.6517, "step": 32516 }, { "epoch": 1.8208645984992722, "grad_norm": 1.711916208267212, "learning_rate": 9.671e-05, "loss": 0.6506, "step": 32517 }, { "epoch": 1.8209205958114012, "grad_norm": 1.4283182621002197, "learning_rate": 9.670973684210527e-05, "loss": 0.4752, "step": 32518 }, { "epoch": 1.8209765931235302, "grad_norm": 1.1654165983200073, "learning_rate": 9.670947368421053e-05, "loss": 0.4088, "step": 32519 }, { "epoch": 1.8210325904356592, "grad_norm": 1.0469211339950562, "learning_rate": 9.67092105263158e-05, "loss": 0.4589, "step": 32520 }, { "epoch": 1.8210885877477883, "grad_norm": 1.2741965055465698, "learning_rate": 9.670894736842105e-05, "loss": 0.46, "step": 32521 }, { "epoch": 1.8211445850599173, "grad_norm": 1.0342941284179688, "learning_rate": 9.670868421052633e-05, "loss": 0.4866, "step": 32522 }, { "epoch": 1.8212005823720463, "grad_norm": 1.4391822814941406, "learning_rate": 9.670842105263159e-05, "loss": 0.4368, "step": 32523 }, { "epoch": 1.8212565796841753, "grad_norm": 1.0916335582733154, "learning_rate": 9.670815789473685e-05, "loss": 0.4136, "step": 32524 }, { "epoch": 1.8213125769963043, "grad_norm": 1.1497094631195068, "learning_rate": 9.67078947368421e-05, "loss": 0.468, "step": 32525 }, { "epoch": 1.8213685743084334, "grad_norm": 1.96045982837677, "learning_rate": 9.670763157894738e-05, "loss": 0.3914, "step": 32526 }, { "epoch": 1.8214245716205624, "grad_norm": 1.3185077905654907, "learning_rate": 9.670736842105264e-05, "loss": 0.5111, "step": 32527 }, { "epoch": 1.8214805689326914, "grad_norm": 1.5654627084732056, "learning_rate": 9.67071052631579e-05, "loss": 0.5916, "step": 32528 }, { "epoch": 1.8215365662448204, "grad_norm": 1.1321732997894287, "learning_rate": 9.670684210526316e-05, "loss": 0.3828, "step": 32529 }, { "epoch": 1.8215925635569494, "grad_norm": 1.2071489095687866, "learning_rate": 9.670657894736842e-05, "loss": 0.3852, "step": 32530 }, { "epoch": 1.8216485608690784, "grad_norm": 1.2667101621627808, "learning_rate": 9.670631578947369e-05, "loss": 0.5004, "step": 32531 }, { "epoch": 1.8217045581812075, "grad_norm": 1.2658908367156982, "learning_rate": 9.670605263157895e-05, "loss": 0.424, "step": 32532 }, { "epoch": 1.8217605554933363, "grad_norm": 1.3126044273376465, "learning_rate": 9.670578947368421e-05, "loss": 0.3907, "step": 32533 }, { "epoch": 1.8218165528054653, "grad_norm": 1.6182126998901367, "learning_rate": 9.670552631578947e-05, "loss": 0.3787, "step": 32534 }, { "epoch": 1.8218725501175943, "grad_norm": 1.3860315084457397, "learning_rate": 9.670526315789474e-05, "loss": 0.4279, "step": 32535 }, { "epoch": 1.8219285474297233, "grad_norm": 1.235561728477478, "learning_rate": 9.6705e-05, "loss": 0.5008, "step": 32536 }, { "epoch": 1.8219845447418523, "grad_norm": 1.148931860923767, "learning_rate": 9.670473684210528e-05, "loss": 0.4007, "step": 32537 }, { "epoch": 1.8220405420539814, "grad_norm": 1.3664186000823975, "learning_rate": 9.670447368421052e-05, "loss": 0.4464, "step": 32538 }, { "epoch": 1.8220965393661104, "grad_norm": 1.6627272367477417, "learning_rate": 9.67042105263158e-05, "loss": 0.5376, "step": 32539 }, { "epoch": 1.8221525366782394, "grad_norm": 1.3919153213500977, "learning_rate": 9.670394736842106e-05, "loss": 0.4488, "step": 32540 }, { "epoch": 1.8222085339903684, "grad_norm": 1.199384093284607, "learning_rate": 9.670368421052633e-05, "loss": 0.5649, "step": 32541 }, { "epoch": 1.8222645313024974, "grad_norm": 1.414018988609314, "learning_rate": 9.670342105263159e-05, "loss": 0.5207, "step": 32542 }, { "epoch": 1.8223205286146265, "grad_norm": 1.116434097290039, "learning_rate": 9.670315789473685e-05, "loss": 0.3832, "step": 32543 }, { "epoch": 1.8223765259267555, "grad_norm": 1.2003717422485352, "learning_rate": 9.670289473684211e-05, "loss": 0.441, "step": 32544 }, { "epoch": 1.8224325232388845, "grad_norm": 1.4894652366638184, "learning_rate": 9.670263157894738e-05, "loss": 0.4912, "step": 32545 }, { "epoch": 1.8224885205510135, "grad_norm": 1.1877217292785645, "learning_rate": 9.670236842105264e-05, "loss": 0.4977, "step": 32546 }, { "epoch": 1.8225445178631425, "grad_norm": 1.110384464263916, "learning_rate": 9.670210526315789e-05, "loss": 0.3216, "step": 32547 }, { "epoch": 1.8226005151752716, "grad_norm": 1.4273176193237305, "learning_rate": 9.670184210526316e-05, "loss": 0.4597, "step": 32548 }, { "epoch": 1.8226565124874006, "grad_norm": 1.3708255290985107, "learning_rate": 9.670157894736842e-05, "loss": 0.4172, "step": 32549 }, { "epoch": 1.8227125097995296, "grad_norm": 1.3241392374038696, "learning_rate": 9.67013157894737e-05, "loss": 0.4817, "step": 32550 }, { "epoch": 1.8227685071116586, "grad_norm": 1.4388078451156616, "learning_rate": 9.670105263157895e-05, "loss": 0.3829, "step": 32551 }, { "epoch": 1.8228245044237876, "grad_norm": 1.3396120071411133, "learning_rate": 9.670078947368421e-05, "loss": 0.3631, "step": 32552 }, { "epoch": 1.8228805017359166, "grad_norm": 1.3354370594024658, "learning_rate": 9.670052631578947e-05, "loss": 0.5774, "step": 32553 }, { "epoch": 1.8229364990480457, "grad_norm": 1.1911250352859497, "learning_rate": 9.670026315789475e-05, "loss": 0.3018, "step": 32554 }, { "epoch": 1.8229924963601747, "grad_norm": 1.2693029642105103, "learning_rate": 9.67e-05, "loss": 0.518, "step": 32555 }, { "epoch": 1.8230484936723037, "grad_norm": 1.420599102973938, "learning_rate": 9.669973684210527e-05, "loss": 0.4161, "step": 32556 }, { "epoch": 1.8231044909844327, "grad_norm": 1.2358683347702026, "learning_rate": 9.669947368421053e-05, "loss": 0.4386, "step": 32557 }, { "epoch": 1.8231604882965617, "grad_norm": 1.0605735778808594, "learning_rate": 9.66992105263158e-05, "loss": 0.4498, "step": 32558 }, { "epoch": 1.8232164856086908, "grad_norm": 3.1415019035339355, "learning_rate": 9.669894736842106e-05, "loss": 0.5537, "step": 32559 }, { "epoch": 1.8232724829208198, "grad_norm": 1.2564799785614014, "learning_rate": 9.669868421052632e-05, "loss": 0.4851, "step": 32560 }, { "epoch": 1.8233284802329488, "grad_norm": 1.4091824293136597, "learning_rate": 9.669842105263158e-05, "loss": 0.4864, "step": 32561 }, { "epoch": 1.8233844775450778, "grad_norm": 1.2649959325790405, "learning_rate": 9.669815789473685e-05, "loss": 0.4714, "step": 32562 }, { "epoch": 1.8234404748572068, "grad_norm": 1.2252275943756104, "learning_rate": 9.669789473684211e-05, "loss": 0.56, "step": 32563 }, { "epoch": 1.8234964721693359, "grad_norm": 1.1678951978683472, "learning_rate": 9.669763157894738e-05, "loss": 0.4308, "step": 32564 }, { "epoch": 1.8235524694814649, "grad_norm": 1.6501609086990356, "learning_rate": 9.669736842105263e-05, "loss": 0.4936, "step": 32565 }, { "epoch": 1.823608466793594, "grad_norm": 1.3688122034072876, "learning_rate": 9.669710526315789e-05, "loss": 0.5517, "step": 32566 }, { "epoch": 1.823664464105723, "grad_norm": 1.4094111919403076, "learning_rate": 9.669684210526316e-05, "loss": 0.5128, "step": 32567 }, { "epoch": 1.823720461417852, "grad_norm": 1.262662649154663, "learning_rate": 9.669657894736842e-05, "loss": 0.4281, "step": 32568 }, { "epoch": 1.823776458729981, "grad_norm": 1.237623929977417, "learning_rate": 9.66963157894737e-05, "loss": 0.4589, "step": 32569 }, { "epoch": 1.82383245604211, "grad_norm": 1.4218124151229858, "learning_rate": 9.669605263157894e-05, "loss": 0.4109, "step": 32570 }, { "epoch": 1.823888453354239, "grad_norm": 1.2873589992523193, "learning_rate": 9.669578947368422e-05, "loss": 0.4809, "step": 32571 }, { "epoch": 1.823944450666368, "grad_norm": 1.2886723279953003, "learning_rate": 9.669552631578948e-05, "loss": 0.5073, "step": 32572 }, { "epoch": 1.824000447978497, "grad_norm": 1.300972580909729, "learning_rate": 9.669526315789475e-05, "loss": 0.5037, "step": 32573 }, { "epoch": 1.824056445290626, "grad_norm": 1.2771070003509521, "learning_rate": 9.669500000000001e-05, "loss": 0.5444, "step": 32574 }, { "epoch": 1.824112442602755, "grad_norm": 1.7403132915496826, "learning_rate": 9.669473684210527e-05, "loss": 0.5305, "step": 32575 }, { "epoch": 1.824168439914884, "grad_norm": 1.533210277557373, "learning_rate": 9.669447368421053e-05, "loss": 0.5951, "step": 32576 }, { "epoch": 1.824224437227013, "grad_norm": 1.3646222352981567, "learning_rate": 9.66942105263158e-05, "loss": 0.5527, "step": 32577 }, { "epoch": 1.8242804345391421, "grad_norm": 1.0207258462905884, "learning_rate": 9.669394736842106e-05, "loss": 0.4674, "step": 32578 }, { "epoch": 1.8243364318512711, "grad_norm": 1.1555756330490112, "learning_rate": 9.669368421052632e-05, "loss": 0.4146, "step": 32579 }, { "epoch": 1.8243924291634002, "grad_norm": 1.2460813522338867, "learning_rate": 9.669342105263158e-05, "loss": 0.4015, "step": 32580 }, { "epoch": 1.8244484264755292, "grad_norm": 1.219386100769043, "learning_rate": 9.669315789473685e-05, "loss": 0.4457, "step": 32581 }, { "epoch": 1.8245044237876582, "grad_norm": 1.1244335174560547, "learning_rate": 9.669289473684211e-05, "loss": 0.4759, "step": 32582 }, { "epoch": 1.8245604210997872, "grad_norm": 1.125978946685791, "learning_rate": 9.669263157894737e-05, "loss": 0.503, "step": 32583 }, { "epoch": 1.8246164184119162, "grad_norm": 1.4441940784454346, "learning_rate": 9.669236842105263e-05, "loss": 0.5164, "step": 32584 }, { "epoch": 1.8246724157240453, "grad_norm": 1.0562191009521484, "learning_rate": 9.669210526315789e-05, "loss": 0.4711, "step": 32585 }, { "epoch": 1.8247284130361743, "grad_norm": 1.3122776746749878, "learning_rate": 9.669184210526317e-05, "loss": 0.4623, "step": 32586 }, { "epoch": 1.8247844103483033, "grad_norm": 1.3073694705963135, "learning_rate": 9.669157894736843e-05, "loss": 0.4393, "step": 32587 }, { "epoch": 1.8248404076604323, "grad_norm": 2.049767017364502, "learning_rate": 9.669131578947369e-05, "loss": 0.5651, "step": 32588 }, { "epoch": 1.8248964049725613, "grad_norm": 1.6820993423461914, "learning_rate": 9.669105263157895e-05, "loss": 0.3945, "step": 32589 }, { "epoch": 1.8249524022846904, "grad_norm": 1.2036643028259277, "learning_rate": 9.669078947368422e-05, "loss": 0.3957, "step": 32590 }, { "epoch": 1.8250083995968194, "grad_norm": 1.2300925254821777, "learning_rate": 9.669052631578948e-05, "loss": 0.4814, "step": 32591 }, { "epoch": 1.8250643969089484, "grad_norm": 1.1175167560577393, "learning_rate": 9.669026315789475e-05, "loss": 0.4314, "step": 32592 }, { "epoch": 1.8251203942210774, "grad_norm": 1.4281058311462402, "learning_rate": 9.669e-05, "loss": 0.4205, "step": 32593 }, { "epoch": 1.8251763915332064, "grad_norm": 1.2252016067504883, "learning_rate": 9.668973684210527e-05, "loss": 0.4933, "step": 32594 }, { "epoch": 1.8252323888453355, "grad_norm": 1.2222179174423218, "learning_rate": 9.668947368421053e-05, "loss": 0.3959, "step": 32595 }, { "epoch": 1.8252883861574645, "grad_norm": 1.1796728372573853, "learning_rate": 9.66892105263158e-05, "loss": 0.4052, "step": 32596 }, { "epoch": 1.8253443834695935, "grad_norm": 1.116337537765503, "learning_rate": 9.668894736842105e-05, "loss": 0.4055, "step": 32597 }, { "epoch": 1.8254003807817225, "grad_norm": 1.5841273069381714, "learning_rate": 9.668868421052632e-05, "loss": 0.3562, "step": 32598 }, { "epoch": 1.8254563780938515, "grad_norm": 1.1964962482452393, "learning_rate": 9.668842105263158e-05, "loss": 0.4176, "step": 32599 }, { "epoch": 1.8255123754059805, "grad_norm": 1.1066843271255493, "learning_rate": 9.668815789473684e-05, "loss": 0.3333, "step": 32600 }, { "epoch": 1.8255683727181096, "grad_norm": 1.0237269401550293, "learning_rate": 9.668789473684212e-05, "loss": 0.4636, "step": 32601 }, { "epoch": 1.8256243700302386, "grad_norm": 1.3789377212524414, "learning_rate": 9.668763157894736e-05, "loss": 0.4183, "step": 32602 }, { "epoch": 1.8256803673423676, "grad_norm": 1.4113359451293945, "learning_rate": 9.668736842105264e-05, "loss": 0.5721, "step": 32603 }, { "epoch": 1.8257363646544966, "grad_norm": 1.2128055095672607, "learning_rate": 9.66871052631579e-05, "loss": 0.4906, "step": 32604 }, { "epoch": 1.8257923619666256, "grad_norm": 1.167681336402893, "learning_rate": 9.668684210526317e-05, "loss": 0.3832, "step": 32605 }, { "epoch": 1.8258483592787547, "grad_norm": 1.196630597114563, "learning_rate": 9.668657894736843e-05, "loss": 0.4218, "step": 32606 }, { "epoch": 1.8259043565908837, "grad_norm": 1.136807918548584, "learning_rate": 9.668631578947369e-05, "loss": 0.5089, "step": 32607 }, { "epoch": 1.8259603539030127, "grad_norm": 1.463698387145996, "learning_rate": 9.668605263157895e-05, "loss": 0.548, "step": 32608 }, { "epoch": 1.8260163512151417, "grad_norm": 1.3486815690994263, "learning_rate": 9.668578947368422e-05, "loss": 0.4634, "step": 32609 }, { "epoch": 1.8260723485272707, "grad_norm": 5.1178998947143555, "learning_rate": 9.668552631578948e-05, "loss": 0.4325, "step": 32610 }, { "epoch": 1.8261283458393998, "grad_norm": 1.2187914848327637, "learning_rate": 9.668526315789474e-05, "loss": 0.4171, "step": 32611 }, { "epoch": 1.8261843431515288, "grad_norm": 1.1664485931396484, "learning_rate": 9.6685e-05, "loss": 0.408, "step": 32612 }, { "epoch": 1.8262403404636578, "grad_norm": 1.2440263032913208, "learning_rate": 9.668473684210527e-05, "loss": 0.4748, "step": 32613 }, { "epoch": 1.8262963377757868, "grad_norm": 1.2527198791503906, "learning_rate": 9.668447368421053e-05, "loss": 0.514, "step": 32614 }, { "epoch": 1.8263523350879158, "grad_norm": 1.047395944595337, "learning_rate": 9.668421052631579e-05, "loss": 0.4122, "step": 32615 }, { "epoch": 1.8264083324000446, "grad_norm": 1.3422802686691284, "learning_rate": 9.668394736842105e-05, "loss": 0.7273, "step": 32616 }, { "epoch": 1.8264643297121737, "grad_norm": 1.388197660446167, "learning_rate": 9.668368421052631e-05, "loss": 0.4851, "step": 32617 }, { "epoch": 1.8265203270243027, "grad_norm": 1.5944206714630127, "learning_rate": 9.668342105263159e-05, "loss": 0.5703, "step": 32618 }, { "epoch": 1.8265763243364317, "grad_norm": 1.9509798288345337, "learning_rate": 9.668315789473685e-05, "loss": 0.4805, "step": 32619 }, { "epoch": 1.8266323216485607, "grad_norm": 1.3672857284545898, "learning_rate": 9.66828947368421e-05, "loss": 0.5626, "step": 32620 }, { "epoch": 1.8266883189606897, "grad_norm": 1.4626967906951904, "learning_rate": 9.668263157894736e-05, "loss": 0.3086, "step": 32621 }, { "epoch": 1.8267443162728187, "grad_norm": 1.1000856161117554, "learning_rate": 9.668236842105264e-05, "loss": 0.3826, "step": 32622 }, { "epoch": 1.8268003135849478, "grad_norm": 1.2412537336349487, "learning_rate": 9.66821052631579e-05, "loss": 0.3716, "step": 32623 }, { "epoch": 1.8268563108970768, "grad_norm": 1.2429136037826538, "learning_rate": 9.668184210526317e-05, "loss": 0.4128, "step": 32624 }, { "epoch": 1.8269123082092058, "grad_norm": 1.3121851682662964, "learning_rate": 9.668157894736842e-05, "loss": 0.4995, "step": 32625 }, { "epoch": 1.8269683055213348, "grad_norm": 1.2313607931137085, "learning_rate": 9.668131578947369e-05, "loss": 0.489, "step": 32626 }, { "epoch": 1.8270243028334638, "grad_norm": 1.1633257865905762, "learning_rate": 9.668105263157895e-05, "loss": 0.4584, "step": 32627 }, { "epoch": 1.8270803001455929, "grad_norm": 1.3403372764587402, "learning_rate": 9.668078947368422e-05, "loss": 0.4156, "step": 32628 }, { "epoch": 1.8271362974577219, "grad_norm": 1.3637710809707642, "learning_rate": 9.668052631578948e-05, "loss": 0.5571, "step": 32629 }, { "epoch": 1.827192294769851, "grad_norm": 1.3844184875488281, "learning_rate": 9.668026315789474e-05, "loss": 0.483, "step": 32630 }, { "epoch": 1.82724829208198, "grad_norm": 1.2805343866348267, "learning_rate": 9.668e-05, "loss": 0.4295, "step": 32631 }, { "epoch": 1.827304289394109, "grad_norm": 1.489109992980957, "learning_rate": 9.667973684210528e-05, "loss": 0.5253, "step": 32632 }, { "epoch": 1.827360286706238, "grad_norm": 1.432491421699524, "learning_rate": 9.667947368421054e-05, "loss": 0.5053, "step": 32633 }, { "epoch": 1.827416284018367, "grad_norm": 1.2370774745941162, "learning_rate": 9.66792105263158e-05, "loss": 0.386, "step": 32634 }, { "epoch": 1.827472281330496, "grad_norm": 1.4065133333206177, "learning_rate": 9.667894736842106e-05, "loss": 0.4296, "step": 32635 }, { "epoch": 1.827528278642625, "grad_norm": 1.4006291627883911, "learning_rate": 9.667868421052631e-05, "loss": 0.4126, "step": 32636 }, { "epoch": 1.827584275954754, "grad_norm": 1.0816266536712646, "learning_rate": 9.667842105263159e-05, "loss": 0.3676, "step": 32637 }, { "epoch": 1.827640273266883, "grad_norm": 1.4172054529190063, "learning_rate": 9.667815789473685e-05, "loss": 0.44, "step": 32638 }, { "epoch": 1.827696270579012, "grad_norm": 1.3221161365509033, "learning_rate": 9.667789473684211e-05, "loss": 0.467, "step": 32639 }, { "epoch": 1.827752267891141, "grad_norm": 1.667936086654663, "learning_rate": 9.667763157894737e-05, "loss": 0.6794, "step": 32640 }, { "epoch": 1.82780826520327, "grad_norm": 1.3499414920806885, "learning_rate": 9.667736842105264e-05, "loss": 0.4399, "step": 32641 }, { "epoch": 1.8278642625153991, "grad_norm": 1.3167911767959595, "learning_rate": 9.66771052631579e-05, "loss": 0.511, "step": 32642 }, { "epoch": 1.8279202598275281, "grad_norm": 1.287710189819336, "learning_rate": 9.667684210526316e-05, "loss": 0.4156, "step": 32643 }, { "epoch": 1.8279762571396572, "grad_norm": 1.2779874801635742, "learning_rate": 9.667657894736842e-05, "loss": 0.4393, "step": 32644 }, { "epoch": 1.8280322544517862, "grad_norm": 1.7091296911239624, "learning_rate": 9.667631578947369e-05, "loss": 0.4468, "step": 32645 }, { "epoch": 1.8280882517639152, "grad_norm": 1.4625667333602905, "learning_rate": 9.667605263157895e-05, "loss": 0.4958, "step": 32646 }, { "epoch": 1.8281442490760442, "grad_norm": 1.1907398700714111, "learning_rate": 9.667578947368423e-05, "loss": 0.3929, "step": 32647 }, { "epoch": 1.8282002463881732, "grad_norm": 1.2688395977020264, "learning_rate": 9.667552631578947e-05, "loss": 0.4314, "step": 32648 }, { "epoch": 1.8282562437003023, "grad_norm": 1.3119951486587524, "learning_rate": 9.667526315789475e-05, "loss": 0.4976, "step": 32649 }, { "epoch": 1.8283122410124313, "grad_norm": 1.057431697845459, "learning_rate": 9.6675e-05, "loss": 0.3274, "step": 32650 }, { "epoch": 1.8283682383245603, "grad_norm": 1.3666818141937256, "learning_rate": 9.667473684210527e-05, "loss": 0.435, "step": 32651 }, { "epoch": 1.8284242356366893, "grad_norm": 1.2738220691680908, "learning_rate": 9.667447368421052e-05, "loss": 0.4332, "step": 32652 }, { "epoch": 1.8284802329488183, "grad_norm": 1.235003113746643, "learning_rate": 9.667421052631578e-05, "loss": 0.379, "step": 32653 }, { "epoch": 1.8285362302609474, "grad_norm": 1.3275306224822998, "learning_rate": 9.667394736842106e-05, "loss": 0.4661, "step": 32654 }, { "epoch": 1.8285922275730764, "grad_norm": 1.3707810640335083, "learning_rate": 9.667368421052632e-05, "loss": 0.3822, "step": 32655 }, { "epoch": 1.8286482248852054, "grad_norm": 1.2254188060760498, "learning_rate": 9.667342105263159e-05, "loss": 0.5657, "step": 32656 }, { "epoch": 1.8287042221973344, "grad_norm": 1.3151730298995972, "learning_rate": 9.667315789473684e-05, "loss": 0.5042, "step": 32657 }, { "epoch": 1.8287602195094634, "grad_norm": 1.1538809537887573, "learning_rate": 9.667289473684211e-05, "loss": 0.4685, "step": 32658 }, { "epoch": 1.8288162168215925, "grad_norm": 1.4197208881378174, "learning_rate": 9.667263157894737e-05, "loss": 0.5985, "step": 32659 }, { "epoch": 1.8288722141337215, "grad_norm": 1.1300543546676636, "learning_rate": 9.667236842105264e-05, "loss": 0.4008, "step": 32660 }, { "epoch": 1.8289282114458505, "grad_norm": 1.0233668088912964, "learning_rate": 9.66721052631579e-05, "loss": 0.3974, "step": 32661 }, { "epoch": 1.8289842087579795, "grad_norm": 1.1356687545776367, "learning_rate": 9.667184210526316e-05, "loss": 0.4677, "step": 32662 }, { "epoch": 1.8290402060701085, "grad_norm": 1.3866469860076904, "learning_rate": 9.667157894736842e-05, "loss": 0.4227, "step": 32663 }, { "epoch": 1.8290962033822376, "grad_norm": 1.1879624128341675, "learning_rate": 9.66713157894737e-05, "loss": 0.3934, "step": 32664 }, { "epoch": 1.8291522006943666, "grad_norm": 1.0241892337799072, "learning_rate": 9.667105263157896e-05, "loss": 0.3762, "step": 32665 }, { "epoch": 1.8292081980064956, "grad_norm": 1.2172296047210693, "learning_rate": 9.667078947368422e-05, "loss": 0.401, "step": 32666 }, { "epoch": 1.8292641953186246, "grad_norm": 1.5045950412750244, "learning_rate": 9.667052631578947e-05, "loss": 0.4328, "step": 32667 }, { "epoch": 1.8293201926307536, "grad_norm": 1.503819227218628, "learning_rate": 9.667026315789473e-05, "loss": 0.5246, "step": 32668 }, { "epoch": 1.8293761899428826, "grad_norm": 1.2372878789901733, "learning_rate": 9.667000000000001e-05, "loss": 0.521, "step": 32669 }, { "epoch": 1.8294321872550117, "grad_norm": 1.2774956226348877, "learning_rate": 9.666973684210527e-05, "loss": 0.4547, "step": 32670 }, { "epoch": 1.8294881845671407, "grad_norm": 1.1652895212173462, "learning_rate": 9.666947368421053e-05, "loss": 0.2893, "step": 32671 }, { "epoch": 1.8295441818792697, "grad_norm": 1.0408943891525269, "learning_rate": 9.666921052631579e-05, "loss": 0.4493, "step": 32672 }, { "epoch": 1.8296001791913987, "grad_norm": 1.4637556076049805, "learning_rate": 9.666894736842106e-05, "loss": 0.5793, "step": 32673 }, { "epoch": 1.8296561765035277, "grad_norm": 1.8593851327896118, "learning_rate": 9.666868421052632e-05, "loss": 0.5905, "step": 32674 }, { "epoch": 1.8297121738156568, "grad_norm": 1.6842243671417236, "learning_rate": 9.666842105263158e-05, "loss": 0.528, "step": 32675 }, { "epoch": 1.8297681711277858, "grad_norm": 1.177577257156372, "learning_rate": 9.666815789473684e-05, "loss": 0.4878, "step": 32676 }, { "epoch": 1.8298241684399148, "grad_norm": 1.218920111656189, "learning_rate": 9.666789473684211e-05, "loss": 0.4243, "step": 32677 }, { "epoch": 1.8298801657520438, "grad_norm": 1.0772826671600342, "learning_rate": 9.666763157894737e-05, "loss": 0.4324, "step": 32678 }, { "epoch": 1.8299361630641728, "grad_norm": 1.2124522924423218, "learning_rate": 9.666736842105265e-05, "loss": 0.4393, "step": 32679 }, { "epoch": 1.8299921603763019, "grad_norm": 1.0846668481826782, "learning_rate": 9.666710526315789e-05, "loss": 0.3997, "step": 32680 }, { "epoch": 1.8300481576884309, "grad_norm": 1.091623306274414, "learning_rate": 9.666684210526317e-05, "loss": 0.3132, "step": 32681 }, { "epoch": 1.83010415500056, "grad_norm": 1.125016689300537, "learning_rate": 9.666657894736842e-05, "loss": 0.4582, "step": 32682 }, { "epoch": 1.830160152312689, "grad_norm": 1.3759264945983887, "learning_rate": 9.66663157894737e-05, "loss": 0.4266, "step": 32683 }, { "epoch": 1.830216149624818, "grad_norm": 1.172193169593811, "learning_rate": 9.666605263157896e-05, "loss": 0.3767, "step": 32684 }, { "epoch": 1.830272146936947, "grad_norm": 1.1010347604751587, "learning_rate": 9.666578947368422e-05, "loss": 0.3532, "step": 32685 }, { "epoch": 1.830328144249076, "grad_norm": 1.3016178607940674, "learning_rate": 9.666552631578948e-05, "loss": 0.3954, "step": 32686 }, { "epoch": 1.830384141561205, "grad_norm": 1.693276286125183, "learning_rate": 9.666526315789474e-05, "loss": 0.5813, "step": 32687 }, { "epoch": 1.830440138873334, "grad_norm": 1.37324857711792, "learning_rate": 9.666500000000001e-05, "loss": 0.5363, "step": 32688 }, { "epoch": 1.830496136185463, "grad_norm": 1.3337602615356445, "learning_rate": 9.666473684210527e-05, "loss": 0.3974, "step": 32689 }, { "epoch": 1.830552133497592, "grad_norm": 1.5725585222244263, "learning_rate": 9.666447368421053e-05, "loss": 0.4479, "step": 32690 }, { "epoch": 1.830608130809721, "grad_norm": 1.1100361347198486, "learning_rate": 9.666421052631579e-05, "loss": 0.3584, "step": 32691 }, { "epoch": 1.83066412812185, "grad_norm": 1.5025217533111572, "learning_rate": 9.666394736842106e-05, "loss": 0.5089, "step": 32692 }, { "epoch": 1.830720125433979, "grad_norm": 1.9290908575057983, "learning_rate": 9.666368421052632e-05, "loss": 0.3646, "step": 32693 }, { "epoch": 1.8307761227461081, "grad_norm": 1.5080006122589111, "learning_rate": 9.666342105263158e-05, "loss": 0.561, "step": 32694 }, { "epoch": 1.8308321200582371, "grad_norm": 1.1862131357192993, "learning_rate": 9.666315789473684e-05, "loss": 0.3779, "step": 32695 }, { "epoch": 1.8308881173703662, "grad_norm": 1.2814775705337524, "learning_rate": 9.666289473684212e-05, "loss": 0.4245, "step": 32696 }, { "epoch": 1.8309441146824952, "grad_norm": 1.4017432928085327, "learning_rate": 9.666263157894738e-05, "loss": 0.4706, "step": 32697 }, { "epoch": 1.8310001119946242, "grad_norm": 1.4507789611816406, "learning_rate": 9.666236842105263e-05, "loss": 0.4321, "step": 32698 }, { "epoch": 1.8310561093067532, "grad_norm": 1.382832646369934, "learning_rate": 9.66621052631579e-05, "loss": 0.4984, "step": 32699 }, { "epoch": 1.8311121066188822, "grad_norm": 1.1069839000701904, "learning_rate": 9.666184210526317e-05, "loss": 0.4409, "step": 32700 }, { "epoch": 1.8311681039310113, "grad_norm": 1.4302692413330078, "learning_rate": 9.666157894736843e-05, "loss": 0.4594, "step": 32701 }, { "epoch": 1.8312241012431403, "grad_norm": 1.1549898386001587, "learning_rate": 9.66613157894737e-05, "loss": 0.3996, "step": 32702 }, { "epoch": 1.8312800985552693, "grad_norm": 1.374919056892395, "learning_rate": 9.666105263157895e-05, "loss": 0.4857, "step": 32703 }, { "epoch": 1.8313360958673983, "grad_norm": 1.1142358779907227, "learning_rate": 9.66607894736842e-05, "loss": 0.3632, "step": 32704 }, { "epoch": 1.8313920931795273, "grad_norm": 1.3335648775100708, "learning_rate": 9.666052631578948e-05, "loss": 0.5292, "step": 32705 }, { "epoch": 1.8314480904916564, "grad_norm": 1.409423589706421, "learning_rate": 9.666026315789474e-05, "loss": 0.3871, "step": 32706 }, { "epoch": 1.8315040878037854, "grad_norm": 1.2985585927963257, "learning_rate": 9.666e-05, "loss": 0.5195, "step": 32707 }, { "epoch": 1.8315600851159144, "grad_norm": 1.509708046913147, "learning_rate": 9.665973684210526e-05, "loss": 0.4044, "step": 32708 }, { "epoch": 1.8316160824280434, "grad_norm": 1.2600221633911133, "learning_rate": 9.665947368421053e-05, "loss": 0.3483, "step": 32709 }, { "epoch": 1.8316720797401724, "grad_norm": 1.5613957643508911, "learning_rate": 9.665921052631579e-05, "loss": 0.4571, "step": 32710 }, { "epoch": 1.8317280770523015, "grad_norm": 1.3759835958480835, "learning_rate": 9.665894736842107e-05, "loss": 0.4701, "step": 32711 }, { "epoch": 1.8317840743644305, "grad_norm": 1.1702947616577148, "learning_rate": 9.665868421052631e-05, "loss": 0.3795, "step": 32712 }, { "epoch": 1.8318400716765595, "grad_norm": 3.083773374557495, "learning_rate": 9.665842105263158e-05, "loss": 0.3771, "step": 32713 }, { "epoch": 1.8318960689886885, "grad_norm": 1.134027361869812, "learning_rate": 9.665815789473684e-05, "loss": 0.5371, "step": 32714 }, { "epoch": 1.8319520663008175, "grad_norm": 9.868935585021973, "learning_rate": 9.665789473684212e-05, "loss": 0.7914, "step": 32715 }, { "epoch": 1.8320080636129465, "grad_norm": 1.3018718957901, "learning_rate": 9.665763157894738e-05, "loss": 0.4281, "step": 32716 }, { "epoch": 1.8320640609250756, "grad_norm": 1.113433837890625, "learning_rate": 9.665736842105264e-05, "loss": 0.4189, "step": 32717 }, { "epoch": 1.8321200582372046, "grad_norm": 1.243665337562561, "learning_rate": 9.66571052631579e-05, "loss": 0.49, "step": 32718 }, { "epoch": 1.8321760555493336, "grad_norm": 1.505263090133667, "learning_rate": 9.665684210526317e-05, "loss": 0.4262, "step": 32719 }, { "epoch": 1.8322320528614626, "grad_norm": 1.5680044889450073, "learning_rate": 9.665657894736843e-05, "loss": 0.4489, "step": 32720 }, { "epoch": 1.8322880501735916, "grad_norm": 1.7051466703414917, "learning_rate": 9.665631578947369e-05, "loss": 0.559, "step": 32721 }, { "epoch": 1.8323440474857207, "grad_norm": 1.1974444389343262, "learning_rate": 9.665605263157895e-05, "loss": 0.4361, "step": 32722 }, { "epoch": 1.8324000447978497, "grad_norm": 1.1413769721984863, "learning_rate": 9.665578947368421e-05, "loss": 0.4257, "step": 32723 }, { "epoch": 1.8324560421099787, "grad_norm": 0.9643688201904297, "learning_rate": 9.665552631578948e-05, "loss": 0.3265, "step": 32724 }, { "epoch": 1.8325120394221077, "grad_norm": 1.388958215713501, "learning_rate": 9.665526315789474e-05, "loss": 0.5034, "step": 32725 }, { "epoch": 1.8325680367342367, "grad_norm": 1.190285563468933, "learning_rate": 9.6655e-05, "loss": 0.4158, "step": 32726 }, { "epoch": 1.8326240340463658, "grad_norm": 1.7620315551757812, "learning_rate": 9.665473684210526e-05, "loss": 0.3913, "step": 32727 }, { "epoch": 1.8326800313584948, "grad_norm": 1.3899251222610474, "learning_rate": 9.665447368421054e-05, "loss": 0.3877, "step": 32728 }, { "epoch": 1.8327360286706238, "grad_norm": 1.5617096424102783, "learning_rate": 9.66542105263158e-05, "loss": 0.4683, "step": 32729 }, { "epoch": 1.8327920259827528, "grad_norm": 1.2852299213409424, "learning_rate": 9.665394736842105e-05, "loss": 0.4646, "step": 32730 }, { "epoch": 1.8328480232948818, "grad_norm": 1.3972654342651367, "learning_rate": 9.665368421052631e-05, "loss": 0.5794, "step": 32731 }, { "epoch": 1.8329040206070109, "grad_norm": 1.4230235815048218, "learning_rate": 9.665342105263159e-05, "loss": 0.3749, "step": 32732 }, { "epoch": 1.8329600179191399, "grad_norm": 1.1495620012283325, "learning_rate": 9.665315789473685e-05, "loss": 0.3909, "step": 32733 }, { "epoch": 1.833016015231269, "grad_norm": 1.1372021436691284, "learning_rate": 9.665289473684212e-05, "loss": 0.4301, "step": 32734 }, { "epoch": 1.833072012543398, "grad_norm": 1.7445937395095825, "learning_rate": 9.665263157894737e-05, "loss": 0.4723, "step": 32735 }, { "epoch": 1.833128009855527, "grad_norm": 1.34993314743042, "learning_rate": 9.665236842105264e-05, "loss": 0.3778, "step": 32736 }, { "epoch": 1.833184007167656, "grad_norm": 1.1281663179397583, "learning_rate": 9.66521052631579e-05, "loss": 0.4953, "step": 32737 }, { "epoch": 1.833240004479785, "grad_norm": 1.7411876916885376, "learning_rate": 9.665184210526316e-05, "loss": 0.5088, "step": 32738 }, { "epoch": 1.833296001791914, "grad_norm": 8.135942459106445, "learning_rate": 9.665157894736843e-05, "loss": 0.3537, "step": 32739 }, { "epoch": 1.833351999104043, "grad_norm": 1.7054061889648438, "learning_rate": 9.665131578947368e-05, "loss": 0.4314, "step": 32740 }, { "epoch": 1.833407996416172, "grad_norm": 1.3007142543792725, "learning_rate": 9.665105263157895e-05, "loss": 0.5351, "step": 32741 }, { "epoch": 1.833463993728301, "grad_norm": 1.2788482904434204, "learning_rate": 9.665078947368421e-05, "loss": 0.4453, "step": 32742 }, { "epoch": 1.83351999104043, "grad_norm": 1.6615954637527466, "learning_rate": 9.665052631578949e-05, "loss": 0.6915, "step": 32743 }, { "epoch": 1.833575988352559, "grad_norm": 0.9844016432762146, "learning_rate": 9.665026315789474e-05, "loss": 0.3056, "step": 32744 }, { "epoch": 1.833631985664688, "grad_norm": 1.385599136352539, "learning_rate": 9.665e-05, "loss": 0.3915, "step": 32745 }, { "epoch": 1.8336879829768171, "grad_norm": 1.181168794631958, "learning_rate": 9.664973684210526e-05, "loss": 0.4216, "step": 32746 }, { "epoch": 1.8337439802889461, "grad_norm": 1.248872995376587, "learning_rate": 9.664947368421054e-05, "loss": 0.4273, "step": 32747 }, { "epoch": 1.8337999776010752, "grad_norm": 1.19883131980896, "learning_rate": 9.66492105263158e-05, "loss": 0.4783, "step": 32748 }, { "epoch": 1.8338559749132042, "grad_norm": 1.430114507675171, "learning_rate": 9.664894736842106e-05, "loss": 0.4207, "step": 32749 }, { "epoch": 1.8339119722253332, "grad_norm": 1.2618263959884644, "learning_rate": 9.664868421052632e-05, "loss": 0.4088, "step": 32750 }, { "epoch": 1.8339679695374622, "grad_norm": 1.3619344234466553, "learning_rate": 9.664842105263159e-05, "loss": 0.4562, "step": 32751 }, { "epoch": 1.8340239668495912, "grad_norm": 1.2100218534469604, "learning_rate": 9.664815789473685e-05, "loss": 0.4778, "step": 32752 }, { "epoch": 1.8340799641617203, "grad_norm": 1.2954472303390503, "learning_rate": 9.664789473684211e-05, "loss": 0.3898, "step": 32753 }, { "epoch": 1.8341359614738493, "grad_norm": 1.3339674472808838, "learning_rate": 9.664763157894737e-05, "loss": 0.4605, "step": 32754 }, { "epoch": 1.8341919587859783, "grad_norm": 1.3754647970199585, "learning_rate": 9.664736842105263e-05, "loss": 0.5608, "step": 32755 }, { "epoch": 1.8342479560981073, "grad_norm": 1.32127046585083, "learning_rate": 9.66471052631579e-05, "loss": 0.4764, "step": 32756 }, { "epoch": 1.8343039534102363, "grad_norm": 1.9327669143676758, "learning_rate": 9.664684210526316e-05, "loss": 0.6438, "step": 32757 }, { "epoch": 1.8343599507223654, "grad_norm": 1.1660802364349365, "learning_rate": 9.664657894736842e-05, "loss": 0.3647, "step": 32758 }, { "epoch": 1.8344159480344944, "grad_norm": 1.2363452911376953, "learning_rate": 9.664631578947368e-05, "loss": 0.3862, "step": 32759 }, { "epoch": 1.8344719453466234, "grad_norm": 1.5387428998947144, "learning_rate": 9.664605263157895e-05, "loss": 0.4448, "step": 32760 }, { "epoch": 1.8345279426587524, "grad_norm": 1.165092945098877, "learning_rate": 9.664578947368421e-05, "loss": 0.4194, "step": 32761 }, { "epoch": 1.8345839399708814, "grad_norm": 1.6571848392486572, "learning_rate": 9.664552631578947e-05, "loss": 0.5064, "step": 32762 }, { "epoch": 1.8346399372830104, "grad_norm": 1.0997291803359985, "learning_rate": 9.664526315789473e-05, "loss": 0.463, "step": 32763 }, { "epoch": 1.8346959345951395, "grad_norm": 1.1814830303192139, "learning_rate": 9.664500000000001e-05, "loss": 0.562, "step": 32764 }, { "epoch": 1.8347519319072685, "grad_norm": 1.591491937637329, "learning_rate": 9.664473684210527e-05, "loss": 0.4498, "step": 32765 }, { "epoch": 1.8348079292193975, "grad_norm": 1.3598792552947998, "learning_rate": 9.664447368421054e-05, "loss": 0.4368, "step": 32766 }, { "epoch": 1.8348639265315265, "grad_norm": 1.3111408948898315, "learning_rate": 9.664421052631579e-05, "loss": 0.4398, "step": 32767 }, { "epoch": 1.8349199238436555, "grad_norm": 1.3196347951889038, "learning_rate": 9.664394736842106e-05, "loss": 0.3836, "step": 32768 }, { "epoch": 1.8349759211557846, "grad_norm": 1.611499547958374, "learning_rate": 9.664368421052632e-05, "loss": 0.513, "step": 32769 }, { "epoch": 1.8350319184679136, "grad_norm": 1.3398953676223755, "learning_rate": 9.664342105263159e-05, "loss": 0.3723, "step": 32770 }, { "epoch": 1.8350879157800426, "grad_norm": 1.3389477729797363, "learning_rate": 9.664315789473685e-05, "loss": 0.4886, "step": 32771 }, { "epoch": 1.8351439130921716, "grad_norm": 8.145891189575195, "learning_rate": 9.66428947368421e-05, "loss": 0.4429, "step": 32772 }, { "epoch": 1.8351999104043006, "grad_norm": 1.5232269763946533, "learning_rate": 9.664263157894737e-05, "loss": 0.4692, "step": 32773 }, { "epoch": 1.8352559077164297, "grad_norm": 1.6759427785873413, "learning_rate": 9.664236842105263e-05, "loss": 0.4715, "step": 32774 }, { "epoch": 1.8353119050285587, "grad_norm": 1.2519711256027222, "learning_rate": 9.66421052631579e-05, "loss": 0.6044, "step": 32775 }, { "epoch": 1.8353679023406877, "grad_norm": 1.3026199340820312, "learning_rate": 9.664184210526316e-05, "loss": 0.5726, "step": 32776 }, { "epoch": 1.8354238996528167, "grad_norm": 1.4376665353775024, "learning_rate": 9.664157894736842e-05, "loss": 0.4096, "step": 32777 }, { "epoch": 1.8354798969649457, "grad_norm": 1.316648244857788, "learning_rate": 9.664131578947368e-05, "loss": 0.4464, "step": 32778 }, { "epoch": 1.8355358942770748, "grad_norm": 1.3668608665466309, "learning_rate": 9.664105263157896e-05, "loss": 0.6071, "step": 32779 }, { "epoch": 1.8355918915892038, "grad_norm": 1.3676471710205078, "learning_rate": 9.664078947368422e-05, "loss": 0.4454, "step": 32780 }, { "epoch": 1.8356478889013328, "grad_norm": 1.1532204151153564, "learning_rate": 9.664052631578948e-05, "loss": 0.5273, "step": 32781 }, { "epoch": 1.8357038862134618, "grad_norm": 1.2275989055633545, "learning_rate": 9.664026315789474e-05, "loss": 0.4498, "step": 32782 }, { "epoch": 1.8357598835255908, "grad_norm": 1.100254774093628, "learning_rate": 9.664000000000001e-05, "loss": 0.5322, "step": 32783 }, { "epoch": 1.8358158808377198, "grad_norm": 1.539417028427124, "learning_rate": 9.663973684210527e-05, "loss": 0.5364, "step": 32784 }, { "epoch": 1.8358718781498489, "grad_norm": 1.8416458368301392, "learning_rate": 9.663947368421053e-05, "loss": 0.528, "step": 32785 }, { "epoch": 1.8359278754619779, "grad_norm": 1.192286729812622, "learning_rate": 9.663921052631579e-05, "loss": 0.4131, "step": 32786 }, { "epoch": 1.835983872774107, "grad_norm": 1.1289178133010864, "learning_rate": 9.663894736842106e-05, "loss": 0.4501, "step": 32787 }, { "epoch": 1.836039870086236, "grad_norm": 1.2684918642044067, "learning_rate": 9.663868421052632e-05, "loss": 0.4582, "step": 32788 }, { "epoch": 1.836095867398365, "grad_norm": 1.1682430505752563, "learning_rate": 9.663842105263158e-05, "loss": 0.458, "step": 32789 }, { "epoch": 1.836151864710494, "grad_norm": 1.2826062440872192, "learning_rate": 9.663815789473684e-05, "loss": 0.4753, "step": 32790 }, { "epoch": 1.836207862022623, "grad_norm": 1.398698329925537, "learning_rate": 9.66378947368421e-05, "loss": 0.4009, "step": 32791 }, { "epoch": 1.836263859334752, "grad_norm": 1.1002782583236694, "learning_rate": 9.663763157894737e-05, "loss": 0.3559, "step": 32792 }, { "epoch": 1.836319856646881, "grad_norm": 1.801236629486084, "learning_rate": 9.663736842105263e-05, "loss": 0.4569, "step": 32793 }, { "epoch": 1.83637585395901, "grad_norm": 1.076566219329834, "learning_rate": 9.663710526315791e-05, "loss": 0.4302, "step": 32794 }, { "epoch": 1.836431851271139, "grad_norm": 1.3344539403915405, "learning_rate": 9.663684210526315e-05, "loss": 0.4285, "step": 32795 }, { "epoch": 1.836487848583268, "grad_norm": 1.2668132781982422, "learning_rate": 9.663657894736843e-05, "loss": 0.4649, "step": 32796 }, { "epoch": 1.836543845895397, "grad_norm": 43.08137512207031, "learning_rate": 9.663631578947369e-05, "loss": 0.4311, "step": 32797 }, { "epoch": 1.8365998432075261, "grad_norm": 1.227246642112732, "learning_rate": 9.663605263157896e-05, "loss": 0.3829, "step": 32798 }, { "epoch": 1.8366558405196551, "grad_norm": 1.2849862575531006, "learning_rate": 9.66357894736842e-05, "loss": 0.4545, "step": 32799 }, { "epoch": 1.8367118378317842, "grad_norm": 1.5289303064346313, "learning_rate": 9.663552631578948e-05, "loss": 0.5596, "step": 32800 }, { "epoch": 1.8367678351439132, "grad_norm": 1.6089023351669312, "learning_rate": 9.663526315789474e-05, "loss": 0.6024, "step": 32801 }, { "epoch": 1.8368238324560422, "grad_norm": 1.6643784046173096, "learning_rate": 9.663500000000001e-05, "loss": 0.5891, "step": 32802 }, { "epoch": 1.8368798297681712, "grad_norm": 1.2046257257461548, "learning_rate": 9.663473684210527e-05, "loss": 0.5249, "step": 32803 }, { "epoch": 1.8369358270803002, "grad_norm": 1.3768064975738525, "learning_rate": 9.663447368421053e-05, "loss": 0.6448, "step": 32804 }, { "epoch": 1.8369918243924293, "grad_norm": 1.4686816930770874, "learning_rate": 9.663421052631579e-05, "loss": 0.4023, "step": 32805 }, { "epoch": 1.8370478217045583, "grad_norm": 1.0558440685272217, "learning_rate": 9.663394736842106e-05, "loss": 0.3994, "step": 32806 }, { "epoch": 1.8371038190166873, "grad_norm": 1.4201335906982422, "learning_rate": 9.663368421052632e-05, "loss": 0.6545, "step": 32807 }, { "epoch": 1.8371598163288163, "grad_norm": 1.1541748046875, "learning_rate": 9.663342105263158e-05, "loss": 0.4763, "step": 32808 }, { "epoch": 1.8372158136409453, "grad_norm": 1.4335594177246094, "learning_rate": 9.663315789473684e-05, "loss": 0.4626, "step": 32809 }, { "epoch": 1.8372718109530743, "grad_norm": 1.263968825340271, "learning_rate": 9.66328947368421e-05, "loss": 0.4137, "step": 32810 }, { "epoch": 1.8373278082652034, "grad_norm": 1.3395044803619385, "learning_rate": 9.663263157894738e-05, "loss": 0.5898, "step": 32811 }, { "epoch": 1.8373838055773324, "grad_norm": 1.0702292919158936, "learning_rate": 9.663236842105264e-05, "loss": 0.3403, "step": 32812 }, { "epoch": 1.8374398028894614, "grad_norm": 1.3203871250152588, "learning_rate": 9.66321052631579e-05, "loss": 0.4055, "step": 32813 }, { "epoch": 1.8374958002015904, "grad_norm": 1.0626285076141357, "learning_rate": 9.663184210526316e-05, "loss": 0.4178, "step": 32814 }, { "epoch": 1.8375517975137194, "grad_norm": 1.1373602151870728, "learning_rate": 9.663157894736843e-05, "loss": 0.4189, "step": 32815 }, { "epoch": 1.8376077948258485, "grad_norm": 1.1357685327529907, "learning_rate": 9.663131578947369e-05, "loss": 0.3559, "step": 32816 }, { "epoch": 1.8376637921379775, "grad_norm": 1.2538461685180664, "learning_rate": 9.663105263157895e-05, "loss": 0.4359, "step": 32817 }, { "epoch": 1.8377197894501065, "grad_norm": 1.2521655559539795, "learning_rate": 9.663078947368421e-05, "loss": 0.4569, "step": 32818 }, { "epoch": 1.8377757867622355, "grad_norm": 1.338199496269226, "learning_rate": 9.663052631578948e-05, "loss": 0.454, "step": 32819 }, { "epoch": 1.8378317840743645, "grad_norm": 1.312105417251587, "learning_rate": 9.663026315789474e-05, "loss": 0.3525, "step": 32820 }, { "epoch": 1.8378877813864936, "grad_norm": 1.2985937595367432, "learning_rate": 9.663000000000002e-05, "loss": 0.4823, "step": 32821 }, { "epoch": 1.8379437786986226, "grad_norm": 1.1555944681167603, "learning_rate": 9.662973684210526e-05, "loss": 0.5203, "step": 32822 }, { "epoch": 1.8379997760107516, "grad_norm": 1.1490230560302734, "learning_rate": 9.662947368421053e-05, "loss": 0.4, "step": 32823 }, { "epoch": 1.8380557733228806, "grad_norm": 1.2443671226501465, "learning_rate": 9.66292105263158e-05, "loss": 0.4949, "step": 32824 }, { "epoch": 1.8381117706350096, "grad_norm": 1.3705300092697144, "learning_rate": 9.662894736842105e-05, "loss": 0.4172, "step": 32825 }, { "epoch": 1.8381677679471387, "grad_norm": 1.2172324657440186, "learning_rate": 9.662868421052633e-05, "loss": 0.485, "step": 32826 }, { "epoch": 1.8382237652592677, "grad_norm": 1.6878315210342407, "learning_rate": 9.662842105263157e-05, "loss": 0.4268, "step": 32827 }, { "epoch": 1.8382797625713967, "grad_norm": 1.7038004398345947, "learning_rate": 9.662815789473685e-05, "loss": 0.5283, "step": 32828 }, { "epoch": 1.8383357598835257, "grad_norm": 1.224631667137146, "learning_rate": 9.66278947368421e-05, "loss": 0.5011, "step": 32829 }, { "epoch": 1.8383917571956547, "grad_norm": 1.2574737071990967, "learning_rate": 9.662763157894738e-05, "loss": 0.4075, "step": 32830 }, { "epoch": 1.8384477545077837, "grad_norm": 1.220641016960144, "learning_rate": 9.662736842105264e-05, "loss": 0.483, "step": 32831 }, { "epoch": 1.8385037518199128, "grad_norm": 1.5085011720657349, "learning_rate": 9.66271052631579e-05, "loss": 0.491, "step": 32832 }, { "epoch": 1.8385597491320418, "grad_norm": 1.2420427799224854, "learning_rate": 9.662684210526316e-05, "loss": 0.4541, "step": 32833 }, { "epoch": 1.8386157464441708, "grad_norm": 1.1556251049041748, "learning_rate": 9.662657894736843e-05, "loss": 0.5116, "step": 32834 }, { "epoch": 1.8386717437562998, "grad_norm": 1.3470628261566162, "learning_rate": 9.662631578947369e-05, "loss": 0.4289, "step": 32835 }, { "epoch": 1.8387277410684288, "grad_norm": 1.3866633176803589, "learning_rate": 9.662605263157895e-05, "loss": 0.6079, "step": 32836 }, { "epoch": 1.8387837383805579, "grad_norm": 1.2391531467437744, "learning_rate": 9.662578947368421e-05, "loss": 0.3687, "step": 32837 }, { "epoch": 1.8388397356926869, "grad_norm": 1.0916754007339478, "learning_rate": 9.662552631578948e-05, "loss": 0.4057, "step": 32838 }, { "epoch": 1.838895733004816, "grad_norm": 1.2940764427185059, "learning_rate": 9.662526315789474e-05, "loss": 0.4639, "step": 32839 }, { "epoch": 1.838951730316945, "grad_norm": 1.2522588968276978, "learning_rate": 9.6625e-05, "loss": 0.6026, "step": 32840 }, { "epoch": 1.839007727629074, "grad_norm": 1.3938361406326294, "learning_rate": 9.662473684210526e-05, "loss": 0.5013, "step": 32841 }, { "epoch": 1.839063724941203, "grad_norm": 1.405850887298584, "learning_rate": 9.662447368421052e-05, "loss": 0.5068, "step": 32842 }, { "epoch": 1.839119722253332, "grad_norm": 1.6099187135696411, "learning_rate": 9.66242105263158e-05, "loss": 0.3717, "step": 32843 }, { "epoch": 1.839175719565461, "grad_norm": 1.1712437868118286, "learning_rate": 9.662394736842106e-05, "loss": 0.4962, "step": 32844 }, { "epoch": 1.83923171687759, "grad_norm": 1.2748817205429077, "learning_rate": 9.662368421052632e-05, "loss": 0.489, "step": 32845 }, { "epoch": 1.839287714189719, "grad_norm": 1.3727037906646729, "learning_rate": 9.662342105263158e-05, "loss": 0.4432, "step": 32846 }, { "epoch": 1.839343711501848, "grad_norm": 1.0529649257659912, "learning_rate": 9.662315789473685e-05, "loss": 0.4623, "step": 32847 }, { "epoch": 1.839399708813977, "grad_norm": 1.2337660789489746, "learning_rate": 9.662289473684211e-05, "loss": 0.3982, "step": 32848 }, { "epoch": 1.839455706126106, "grad_norm": 1.2151973247528076, "learning_rate": 9.662263157894738e-05, "loss": 0.3726, "step": 32849 }, { "epoch": 1.8395117034382351, "grad_norm": 1.1094932556152344, "learning_rate": 9.662236842105263e-05, "loss": 0.5162, "step": 32850 }, { "epoch": 1.8395677007503641, "grad_norm": 1.5417227745056152, "learning_rate": 9.66221052631579e-05, "loss": 0.5746, "step": 32851 }, { "epoch": 1.8396236980624932, "grad_norm": 1.2124770879745483, "learning_rate": 9.662184210526316e-05, "loss": 0.4133, "step": 32852 }, { "epoch": 1.8396796953746222, "grad_norm": 1.2000820636749268, "learning_rate": 9.662157894736843e-05, "loss": 0.428, "step": 32853 }, { "epoch": 1.8397356926867512, "grad_norm": 1.2374606132507324, "learning_rate": 9.662131578947368e-05, "loss": 0.5061, "step": 32854 }, { "epoch": 1.8397916899988802, "grad_norm": 1.285317063331604, "learning_rate": 9.662105263157895e-05, "loss": 0.534, "step": 32855 }, { "epoch": 1.8398476873110092, "grad_norm": 0.9246919751167297, "learning_rate": 9.662078947368421e-05, "loss": 0.3633, "step": 32856 }, { "epoch": 1.8399036846231382, "grad_norm": 1.323366641998291, "learning_rate": 9.662052631578949e-05, "loss": 0.5477, "step": 32857 }, { "epoch": 1.8399596819352673, "grad_norm": 1.421041488647461, "learning_rate": 9.662026315789475e-05, "loss": 0.4274, "step": 32858 }, { "epoch": 1.8400156792473963, "grad_norm": 1.307328462600708, "learning_rate": 9.661999999999999e-05, "loss": 0.3705, "step": 32859 }, { "epoch": 1.8400716765595253, "grad_norm": 1.5341895818710327, "learning_rate": 9.661973684210527e-05, "loss": 0.6223, "step": 32860 }, { "epoch": 1.8401276738716543, "grad_norm": 1.3287591934204102, "learning_rate": 9.661947368421053e-05, "loss": 0.4585, "step": 32861 }, { "epoch": 1.8401836711837833, "grad_norm": 1.434287190437317, "learning_rate": 9.66192105263158e-05, "loss": 0.4387, "step": 32862 }, { "epoch": 1.8402396684959124, "grad_norm": 1.4952448606491089, "learning_rate": 9.661894736842106e-05, "loss": 0.5614, "step": 32863 }, { "epoch": 1.8402956658080412, "grad_norm": 1.2806533575057983, "learning_rate": 9.661868421052632e-05, "loss": 0.4939, "step": 32864 }, { "epoch": 1.8403516631201702, "grad_norm": 2.0783755779266357, "learning_rate": 9.661842105263158e-05, "loss": 0.4356, "step": 32865 }, { "epoch": 1.8404076604322992, "grad_norm": 1.4120310544967651, "learning_rate": 9.661815789473685e-05, "loss": 0.5665, "step": 32866 }, { "epoch": 1.8404636577444282, "grad_norm": 1.0840588808059692, "learning_rate": 9.661789473684211e-05, "loss": 0.3623, "step": 32867 }, { "epoch": 1.8405196550565572, "grad_norm": 1.24738347530365, "learning_rate": 9.661763157894737e-05, "loss": 0.5108, "step": 32868 }, { "epoch": 1.8405756523686863, "grad_norm": 1.4922033548355103, "learning_rate": 9.661736842105263e-05, "loss": 0.4668, "step": 32869 }, { "epoch": 1.8406316496808153, "grad_norm": 1.4111509323120117, "learning_rate": 9.66171052631579e-05, "loss": 0.4468, "step": 32870 }, { "epoch": 1.8406876469929443, "grad_norm": 2.189815044403076, "learning_rate": 9.661684210526316e-05, "loss": 0.4086, "step": 32871 }, { "epoch": 1.8407436443050733, "grad_norm": 1.1416786909103394, "learning_rate": 9.661657894736842e-05, "loss": 0.4294, "step": 32872 }, { "epoch": 1.8407996416172023, "grad_norm": 1.2118918895721436, "learning_rate": 9.661631578947368e-05, "loss": 0.4384, "step": 32873 }, { "epoch": 1.8408556389293314, "grad_norm": 1.250840425491333, "learning_rate": 9.661605263157896e-05, "loss": 0.4667, "step": 32874 }, { "epoch": 1.8409116362414604, "grad_norm": 1.3781054019927979, "learning_rate": 9.661578947368422e-05, "loss": 0.5255, "step": 32875 }, { "epoch": 1.8409676335535894, "grad_norm": 1.709642767906189, "learning_rate": 9.661552631578948e-05, "loss": 0.7597, "step": 32876 }, { "epoch": 1.8410236308657184, "grad_norm": 1.3381803035736084, "learning_rate": 9.661526315789474e-05, "loss": 0.5986, "step": 32877 }, { "epoch": 1.8410796281778474, "grad_norm": 1.1220204830169678, "learning_rate": 9.6615e-05, "loss": 0.4215, "step": 32878 }, { "epoch": 1.8411356254899764, "grad_norm": 1.6010150909423828, "learning_rate": 9.661473684210527e-05, "loss": 0.4468, "step": 32879 }, { "epoch": 1.8411916228021055, "grad_norm": 1.2880184650421143, "learning_rate": 9.661447368421053e-05, "loss": 0.4617, "step": 32880 }, { "epoch": 1.8412476201142345, "grad_norm": 1.3595713376998901, "learning_rate": 9.66142105263158e-05, "loss": 0.5685, "step": 32881 }, { "epoch": 1.8413036174263635, "grad_norm": 1.3904837369918823, "learning_rate": 9.661394736842105e-05, "loss": 0.5028, "step": 32882 }, { "epoch": 1.8413596147384925, "grad_norm": 1.2443723678588867, "learning_rate": 9.661368421052632e-05, "loss": 0.3857, "step": 32883 }, { "epoch": 1.8414156120506215, "grad_norm": 1.3925625085830688, "learning_rate": 9.661342105263158e-05, "loss": 0.5692, "step": 32884 }, { "epoch": 1.8414716093627506, "grad_norm": 1.2612394094467163, "learning_rate": 9.661315789473685e-05, "loss": 0.4527, "step": 32885 }, { "epoch": 1.8415276066748796, "grad_norm": 1.4269003868103027, "learning_rate": 9.661289473684211e-05, "loss": 0.5568, "step": 32886 }, { "epoch": 1.8415836039870086, "grad_norm": 1.358361005783081, "learning_rate": 9.661263157894737e-05, "loss": 0.472, "step": 32887 }, { "epoch": 1.8416396012991376, "grad_norm": 1.2917922735214233, "learning_rate": 9.661236842105263e-05, "loss": 0.3534, "step": 32888 }, { "epoch": 1.8416955986112666, "grad_norm": 1.4576170444488525, "learning_rate": 9.661210526315791e-05, "loss": 0.5876, "step": 32889 }, { "epoch": 1.8417515959233957, "grad_norm": 1.164176344871521, "learning_rate": 9.661184210526317e-05, "loss": 0.4028, "step": 32890 }, { "epoch": 1.8418075932355247, "grad_norm": 1.0955661535263062, "learning_rate": 9.661157894736843e-05, "loss": 0.3932, "step": 32891 }, { "epoch": 1.8418635905476537, "grad_norm": 1.2213622331619263, "learning_rate": 9.661131578947369e-05, "loss": 0.3994, "step": 32892 }, { "epoch": 1.8419195878597827, "grad_norm": 1.3236523866653442, "learning_rate": 9.661105263157895e-05, "loss": 0.3853, "step": 32893 }, { "epoch": 1.8419755851719117, "grad_norm": 1.3061505556106567, "learning_rate": 9.661078947368422e-05, "loss": 0.5589, "step": 32894 }, { "epoch": 1.8420315824840408, "grad_norm": 1.585598111152649, "learning_rate": 9.661052631578948e-05, "loss": 0.4717, "step": 32895 }, { "epoch": 1.8420875797961698, "grad_norm": 1.441094994544983, "learning_rate": 9.661026315789474e-05, "loss": 0.5385, "step": 32896 }, { "epoch": 1.8421435771082988, "grad_norm": 1.3344049453735352, "learning_rate": 9.661e-05, "loss": 0.4717, "step": 32897 }, { "epoch": 1.8421995744204278, "grad_norm": 1.2032164335250854, "learning_rate": 9.660973684210527e-05, "loss": 0.336, "step": 32898 }, { "epoch": 1.8422555717325568, "grad_norm": 1.133961796760559, "learning_rate": 9.660947368421053e-05, "loss": 0.4464, "step": 32899 }, { "epoch": 1.8423115690446858, "grad_norm": 33.66267395019531, "learning_rate": 9.660921052631579e-05, "loss": 0.4082, "step": 32900 }, { "epoch": 1.8423675663568149, "grad_norm": 1.3558391332626343, "learning_rate": 9.660894736842105e-05, "loss": 0.4596, "step": 32901 }, { "epoch": 1.8424235636689439, "grad_norm": 1.2404605150222778, "learning_rate": 9.660868421052632e-05, "loss": 0.3912, "step": 32902 }, { "epoch": 1.842479560981073, "grad_norm": 1.6435612440109253, "learning_rate": 9.660842105263158e-05, "loss": 0.5293, "step": 32903 }, { "epoch": 1.842535558293202, "grad_norm": 1.2336878776550293, "learning_rate": 9.660815789473686e-05, "loss": 0.4404, "step": 32904 }, { "epoch": 1.842591555605331, "grad_norm": 1.19773268699646, "learning_rate": 9.66078947368421e-05, "loss": 0.4471, "step": 32905 }, { "epoch": 1.84264755291746, "grad_norm": 1.265150547027588, "learning_rate": 9.660763157894738e-05, "loss": 0.4126, "step": 32906 }, { "epoch": 1.842703550229589, "grad_norm": 1.2897471189498901, "learning_rate": 9.660736842105264e-05, "loss": 0.3866, "step": 32907 }, { "epoch": 1.842759547541718, "grad_norm": 1.2433923482894897, "learning_rate": 9.660710526315791e-05, "loss": 0.4181, "step": 32908 }, { "epoch": 1.842815544853847, "grad_norm": 1.3655455112457275, "learning_rate": 9.660684210526316e-05, "loss": 0.405, "step": 32909 }, { "epoch": 1.842871542165976, "grad_norm": 1.5096124410629272, "learning_rate": 9.660657894736842e-05, "loss": 0.3937, "step": 32910 }, { "epoch": 1.842927539478105, "grad_norm": 1.2894468307495117, "learning_rate": 9.660631578947369e-05, "loss": 0.3344, "step": 32911 }, { "epoch": 1.842983536790234, "grad_norm": 1.274783730506897, "learning_rate": 9.660605263157895e-05, "loss": 0.483, "step": 32912 }, { "epoch": 1.843039534102363, "grad_norm": 1.2660008668899536, "learning_rate": 9.660578947368422e-05, "loss": 0.429, "step": 32913 }, { "epoch": 1.8430955314144921, "grad_norm": 1.3275043964385986, "learning_rate": 9.660552631578947e-05, "loss": 0.3887, "step": 32914 }, { "epoch": 1.8431515287266211, "grad_norm": 1.3385419845581055, "learning_rate": 9.660526315789474e-05, "loss": 0.4337, "step": 32915 }, { "epoch": 1.8432075260387502, "grad_norm": 1.1293500661849976, "learning_rate": 9.6605e-05, "loss": 0.3122, "step": 32916 }, { "epoch": 1.8432635233508792, "grad_norm": 1.2302464246749878, "learning_rate": 9.660473684210527e-05, "loss": 0.4735, "step": 32917 }, { "epoch": 1.8433195206630082, "grad_norm": 1.0381650924682617, "learning_rate": 9.660447368421053e-05, "loss": 0.3316, "step": 32918 }, { "epoch": 1.8433755179751372, "grad_norm": 1.338551640510559, "learning_rate": 9.66042105263158e-05, "loss": 0.4724, "step": 32919 }, { "epoch": 1.8434315152872662, "grad_norm": 1.4086332321166992, "learning_rate": 9.660394736842105e-05, "loss": 0.3751, "step": 32920 }, { "epoch": 1.8434875125993953, "grad_norm": 2.265753746032715, "learning_rate": 9.660368421052633e-05, "loss": 0.5968, "step": 32921 }, { "epoch": 1.8435435099115243, "grad_norm": 1.1701689958572388, "learning_rate": 9.660342105263159e-05, "loss": 0.4198, "step": 32922 }, { "epoch": 1.8435995072236533, "grad_norm": 1.2380021810531616, "learning_rate": 9.660315789473685e-05, "loss": 0.3859, "step": 32923 }, { "epoch": 1.8436555045357823, "grad_norm": 1.7832541465759277, "learning_rate": 9.66028947368421e-05, "loss": 0.4776, "step": 32924 }, { "epoch": 1.8437115018479113, "grad_norm": 1.1675121784210205, "learning_rate": 9.660263157894738e-05, "loss": 0.3027, "step": 32925 }, { "epoch": 1.8437674991600403, "grad_norm": 1.0559121370315552, "learning_rate": 9.660236842105264e-05, "loss": 0.3486, "step": 32926 }, { "epoch": 1.8438234964721694, "grad_norm": 1.1947718858718872, "learning_rate": 9.66021052631579e-05, "loss": 0.384, "step": 32927 }, { "epoch": 1.8438794937842984, "grad_norm": 1.0962809324264526, "learning_rate": 9.660184210526316e-05, "loss": 0.4411, "step": 32928 }, { "epoch": 1.8439354910964274, "grad_norm": 1.8183112144470215, "learning_rate": 9.660157894736842e-05, "loss": 0.5891, "step": 32929 }, { "epoch": 1.8439914884085564, "grad_norm": 1.551383137702942, "learning_rate": 9.660131578947369e-05, "loss": 0.5877, "step": 32930 }, { "epoch": 1.8440474857206854, "grad_norm": 1.1563749313354492, "learning_rate": 9.660105263157895e-05, "loss": 0.4114, "step": 32931 }, { "epoch": 1.8441034830328145, "grad_norm": 1.4932472705841064, "learning_rate": 9.660078947368421e-05, "loss": 0.6754, "step": 32932 }, { "epoch": 1.8441594803449435, "grad_norm": 1.3673840761184692, "learning_rate": 9.660052631578947e-05, "loss": 0.539, "step": 32933 }, { "epoch": 1.8442154776570725, "grad_norm": 0.9999203085899353, "learning_rate": 9.660026315789474e-05, "loss": 0.4372, "step": 32934 }, { "epoch": 1.8442714749692015, "grad_norm": 1.6258403062820435, "learning_rate": 9.66e-05, "loss": 0.4996, "step": 32935 }, { "epoch": 1.8443274722813305, "grad_norm": 1.0690977573394775, "learning_rate": 9.659973684210528e-05, "loss": 0.3502, "step": 32936 }, { "epoch": 1.8443834695934596, "grad_norm": 1.2587487697601318, "learning_rate": 9.659947368421052e-05, "loss": 0.504, "step": 32937 }, { "epoch": 1.8444394669055886, "grad_norm": 1.3194084167480469, "learning_rate": 9.65992105263158e-05, "loss": 0.5542, "step": 32938 }, { "epoch": 1.8444954642177176, "grad_norm": 1.546574354171753, "learning_rate": 9.659894736842106e-05, "loss": 0.506, "step": 32939 }, { "epoch": 1.8445514615298466, "grad_norm": 1.2619967460632324, "learning_rate": 9.659868421052633e-05, "loss": 0.4009, "step": 32940 }, { "epoch": 1.8446074588419756, "grad_norm": 1.1226036548614502, "learning_rate": 9.659842105263159e-05, "loss": 0.3289, "step": 32941 }, { "epoch": 1.8446634561541047, "grad_norm": 1.6209882497787476, "learning_rate": 9.659815789473685e-05, "loss": 0.4991, "step": 32942 }, { "epoch": 1.8447194534662337, "grad_norm": 1.3435505628585815, "learning_rate": 9.659789473684211e-05, "loss": 0.4163, "step": 32943 }, { "epoch": 1.8447754507783627, "grad_norm": 1.4991600513458252, "learning_rate": 9.659763157894738e-05, "loss": 0.4847, "step": 32944 }, { "epoch": 1.8448314480904917, "grad_norm": 1.529256820678711, "learning_rate": 9.659736842105264e-05, "loss": 0.4851, "step": 32945 }, { "epoch": 1.8448874454026207, "grad_norm": 1.960693597793579, "learning_rate": 9.659710526315789e-05, "loss": 0.5274, "step": 32946 }, { "epoch": 1.8449434427147495, "grad_norm": 1.2479568719863892, "learning_rate": 9.659684210526316e-05, "loss": 0.3408, "step": 32947 }, { "epoch": 1.8449994400268785, "grad_norm": 1.3500275611877441, "learning_rate": 9.659657894736842e-05, "loss": 0.385, "step": 32948 }, { "epoch": 1.8450554373390076, "grad_norm": 1.42543625831604, "learning_rate": 9.65963157894737e-05, "loss": 0.4586, "step": 32949 }, { "epoch": 1.8451114346511366, "grad_norm": 1.253745198249817, "learning_rate": 9.659605263157895e-05, "loss": 0.4494, "step": 32950 }, { "epoch": 1.8451674319632656, "grad_norm": 1.1142747402191162, "learning_rate": 9.659578947368421e-05, "loss": 0.3687, "step": 32951 }, { "epoch": 1.8452234292753946, "grad_norm": 1.284039855003357, "learning_rate": 9.659552631578947e-05, "loss": 0.4253, "step": 32952 }, { "epoch": 1.8452794265875236, "grad_norm": 1.646706223487854, "learning_rate": 9.659526315789475e-05, "loss": 0.4558, "step": 32953 }, { "epoch": 1.8453354238996527, "grad_norm": 1.3232616186141968, "learning_rate": 9.6595e-05, "loss": 0.4442, "step": 32954 }, { "epoch": 1.8453914212117817, "grad_norm": 1.055118441581726, "learning_rate": 9.659473684210527e-05, "loss": 0.3636, "step": 32955 }, { "epoch": 1.8454474185239107, "grad_norm": 1.055234670639038, "learning_rate": 9.659447368421053e-05, "loss": 0.383, "step": 32956 }, { "epoch": 1.8455034158360397, "grad_norm": 1.223559856414795, "learning_rate": 9.65942105263158e-05, "loss": 0.4492, "step": 32957 }, { "epoch": 1.8455594131481687, "grad_norm": 1.0541549921035767, "learning_rate": 9.659394736842106e-05, "loss": 0.4253, "step": 32958 }, { "epoch": 1.8456154104602978, "grad_norm": 1.2929306030273438, "learning_rate": 9.659368421052633e-05, "loss": 0.4008, "step": 32959 }, { "epoch": 1.8456714077724268, "grad_norm": 1.321595311164856, "learning_rate": 9.659342105263158e-05, "loss": 0.5088, "step": 32960 }, { "epoch": 1.8457274050845558, "grad_norm": 1.2284960746765137, "learning_rate": 9.659315789473685e-05, "loss": 0.3958, "step": 32961 }, { "epoch": 1.8457834023966848, "grad_norm": 1.2104394435882568, "learning_rate": 9.659289473684211e-05, "loss": 0.4704, "step": 32962 }, { "epoch": 1.8458393997088138, "grad_norm": 1.3578954935073853, "learning_rate": 9.659263157894737e-05, "loss": 0.411, "step": 32963 }, { "epoch": 1.8458953970209429, "grad_norm": 1.5557917356491089, "learning_rate": 9.659236842105263e-05, "loss": 0.3553, "step": 32964 }, { "epoch": 1.8459513943330719, "grad_norm": 1.177769660949707, "learning_rate": 9.659210526315789e-05, "loss": 0.3891, "step": 32965 }, { "epoch": 1.846007391645201, "grad_norm": 1.1529127359390259, "learning_rate": 9.659184210526316e-05, "loss": 0.5096, "step": 32966 }, { "epoch": 1.84606338895733, "grad_norm": 1.1034342050552368, "learning_rate": 9.659157894736842e-05, "loss": 0.3543, "step": 32967 }, { "epoch": 1.846119386269459, "grad_norm": 1.303011178970337, "learning_rate": 9.65913157894737e-05, "loss": 0.5574, "step": 32968 }, { "epoch": 1.846175383581588, "grad_norm": 1.259000539779663, "learning_rate": 9.659105263157894e-05, "loss": 0.5287, "step": 32969 }, { "epoch": 1.846231380893717, "grad_norm": 1.2804316282272339, "learning_rate": 9.659078947368422e-05, "loss": 0.4154, "step": 32970 }, { "epoch": 1.846287378205846, "grad_norm": 1.7210476398468018, "learning_rate": 9.659052631578948e-05, "loss": 0.4942, "step": 32971 }, { "epoch": 1.846343375517975, "grad_norm": 1.3382210731506348, "learning_rate": 9.659026315789475e-05, "loss": 0.603, "step": 32972 }, { "epoch": 1.846399372830104, "grad_norm": 1.4095916748046875, "learning_rate": 9.659000000000001e-05, "loss": 0.5391, "step": 32973 }, { "epoch": 1.846455370142233, "grad_norm": 1.493009090423584, "learning_rate": 9.658973684210527e-05, "loss": 0.5056, "step": 32974 }, { "epoch": 1.846511367454362, "grad_norm": 1.3601634502410889, "learning_rate": 9.658947368421053e-05, "loss": 0.5551, "step": 32975 }, { "epoch": 1.846567364766491, "grad_norm": 1.2282062768936157, "learning_rate": 9.65892105263158e-05, "loss": 0.4257, "step": 32976 }, { "epoch": 1.84662336207862, "grad_norm": 1.2335641384124756, "learning_rate": 9.658894736842106e-05, "loss": 0.4919, "step": 32977 }, { "epoch": 1.8466793593907491, "grad_norm": 1.1923980712890625, "learning_rate": 9.658868421052632e-05, "loss": 0.4678, "step": 32978 }, { "epoch": 1.8467353567028781, "grad_norm": 1.159430742263794, "learning_rate": 9.658842105263158e-05, "loss": 0.4693, "step": 32979 }, { "epoch": 1.8467913540150072, "grad_norm": 2.119084119796753, "learning_rate": 9.658815789473684e-05, "loss": 0.5571, "step": 32980 }, { "epoch": 1.8468473513271362, "grad_norm": 1.2369295358657837, "learning_rate": 9.658789473684211e-05, "loss": 0.5934, "step": 32981 }, { "epoch": 1.8469033486392652, "grad_norm": 1.211553692817688, "learning_rate": 9.658763157894737e-05, "loss": 0.4896, "step": 32982 }, { "epoch": 1.8469593459513942, "grad_norm": 1.5806540250778198, "learning_rate": 9.658736842105263e-05, "loss": 0.5435, "step": 32983 }, { "epoch": 1.8470153432635232, "grad_norm": 1.3230743408203125, "learning_rate": 9.658710526315789e-05, "loss": 0.507, "step": 32984 }, { "epoch": 1.8470713405756523, "grad_norm": 1.4513412714004517, "learning_rate": 9.658684210526317e-05, "loss": 0.4663, "step": 32985 }, { "epoch": 1.8471273378877813, "grad_norm": 1.4557348489761353, "learning_rate": 9.658657894736843e-05, "loss": 0.3542, "step": 32986 }, { "epoch": 1.8471833351999103, "grad_norm": 1.183685302734375, "learning_rate": 9.658631578947369e-05, "loss": 0.4862, "step": 32987 }, { "epoch": 1.8472393325120393, "grad_norm": 1.3003326654434204, "learning_rate": 9.658605263157895e-05, "loss": 0.5666, "step": 32988 }, { "epoch": 1.8472953298241683, "grad_norm": 1.2486693859100342, "learning_rate": 9.658578947368422e-05, "loss": 0.451, "step": 32989 }, { "epoch": 1.8473513271362973, "grad_norm": 1.376805305480957, "learning_rate": 9.658552631578948e-05, "loss": 0.5394, "step": 32990 }, { "epoch": 1.8474073244484264, "grad_norm": 1.3795126676559448, "learning_rate": 9.658526315789475e-05, "loss": 0.4197, "step": 32991 }, { "epoch": 1.8474633217605554, "grad_norm": 1.3613721132278442, "learning_rate": 9.6585e-05, "loss": 0.4942, "step": 32992 }, { "epoch": 1.8475193190726844, "grad_norm": 1.049103021621704, "learning_rate": 9.658473684210527e-05, "loss": 0.3842, "step": 32993 }, { "epoch": 1.8475753163848134, "grad_norm": 1.1341272592544556, "learning_rate": 9.658447368421053e-05, "loss": 0.4122, "step": 32994 }, { "epoch": 1.8476313136969424, "grad_norm": 4.548651218414307, "learning_rate": 9.65842105263158e-05, "loss": 0.3641, "step": 32995 }, { "epoch": 1.8476873110090715, "grad_norm": 1.4399057626724243, "learning_rate": 9.658394736842106e-05, "loss": 0.5778, "step": 32996 }, { "epoch": 1.8477433083212005, "grad_norm": 1.3400887250900269, "learning_rate": 9.658368421052631e-05, "loss": 0.3781, "step": 32997 }, { "epoch": 1.8477993056333295, "grad_norm": 1.4371615648269653, "learning_rate": 9.658342105263158e-05, "loss": 0.397, "step": 32998 }, { "epoch": 1.8478553029454585, "grad_norm": 1.1246460676193237, "learning_rate": 9.658315789473684e-05, "loss": 0.3377, "step": 32999 }, { "epoch": 1.8479113002575875, "grad_norm": 1.2970534563064575, "learning_rate": 9.658289473684212e-05, "loss": 0.4319, "step": 33000 }, { "epoch": 1.8479672975697166, "grad_norm": 1.1445130109786987, "learning_rate": 9.658263157894736e-05, "loss": 0.4152, "step": 33001 }, { "epoch": 1.8480232948818456, "grad_norm": 1.2730820178985596, "learning_rate": 9.658236842105264e-05, "loss": 0.498, "step": 33002 }, { "epoch": 1.8480792921939746, "grad_norm": 1.1291395425796509, "learning_rate": 9.65821052631579e-05, "loss": 0.3832, "step": 33003 }, { "epoch": 1.8481352895061036, "grad_norm": 1.567795753479004, "learning_rate": 9.658184210526317e-05, "loss": 0.4268, "step": 33004 }, { "epoch": 1.8481912868182326, "grad_norm": 1.1752840280532837, "learning_rate": 9.658157894736843e-05, "loss": 0.501, "step": 33005 }, { "epoch": 1.8482472841303617, "grad_norm": 1.0759519338607788, "learning_rate": 9.658131578947369e-05, "loss": 0.3402, "step": 33006 }, { "epoch": 1.8483032814424907, "grad_norm": 1.3437083959579468, "learning_rate": 9.658105263157895e-05, "loss": 0.4244, "step": 33007 }, { "epoch": 1.8483592787546197, "grad_norm": 1.3966819047927856, "learning_rate": 9.658078947368422e-05, "loss": 0.5113, "step": 33008 }, { "epoch": 1.8484152760667487, "grad_norm": 7.844576835632324, "learning_rate": 9.658052631578948e-05, "loss": 0.5597, "step": 33009 }, { "epoch": 1.8484712733788777, "grad_norm": 1.2463321685791016, "learning_rate": 9.658026315789474e-05, "loss": 0.3794, "step": 33010 }, { "epoch": 1.8485272706910068, "grad_norm": 1.2907909154891968, "learning_rate": 9.658e-05, "loss": 0.4675, "step": 33011 }, { "epoch": 1.8485832680031358, "grad_norm": 1.0820895433425903, "learning_rate": 9.657973684210527e-05, "loss": 0.4019, "step": 33012 }, { "epoch": 1.8486392653152648, "grad_norm": 1.0687875747680664, "learning_rate": 9.657947368421053e-05, "loss": 0.3793, "step": 33013 }, { "epoch": 1.8486952626273938, "grad_norm": 1.2128746509552002, "learning_rate": 9.657921052631579e-05, "loss": 0.4496, "step": 33014 }, { "epoch": 1.8487512599395228, "grad_norm": 1.3045095205307007, "learning_rate": 9.657894736842105e-05, "loss": 0.5148, "step": 33015 }, { "epoch": 1.8488072572516518, "grad_norm": 1.2198787927627563, "learning_rate": 9.657868421052631e-05, "loss": 0.444, "step": 33016 }, { "epoch": 1.8488632545637809, "grad_norm": 1.0958393812179565, "learning_rate": 9.657842105263159e-05, "loss": 0.4635, "step": 33017 }, { "epoch": 1.8489192518759099, "grad_norm": 1.2258250713348389, "learning_rate": 9.657815789473685e-05, "loss": 0.4792, "step": 33018 }, { "epoch": 1.848975249188039, "grad_norm": 1.2343659400939941, "learning_rate": 9.65778947368421e-05, "loss": 0.5383, "step": 33019 }, { "epoch": 1.849031246500168, "grad_norm": 1.6550458669662476, "learning_rate": 9.657763157894736e-05, "loss": 0.4201, "step": 33020 }, { "epoch": 1.849087243812297, "grad_norm": 1.3469651937484741, "learning_rate": 9.657736842105264e-05, "loss": 0.3987, "step": 33021 }, { "epoch": 1.849143241124426, "grad_norm": 1.2976679801940918, "learning_rate": 9.65771052631579e-05, "loss": 0.4981, "step": 33022 }, { "epoch": 1.849199238436555, "grad_norm": 1.2716412544250488, "learning_rate": 9.657684210526317e-05, "loss": 0.4696, "step": 33023 }, { "epoch": 1.849255235748684, "grad_norm": 1.000976324081421, "learning_rate": 9.657657894736842e-05, "loss": 0.3443, "step": 33024 }, { "epoch": 1.849311233060813, "grad_norm": 1.4338499307632446, "learning_rate": 9.657631578947369e-05, "loss": 0.4904, "step": 33025 }, { "epoch": 1.849367230372942, "grad_norm": 1.2978743314743042, "learning_rate": 9.657605263157895e-05, "loss": 0.6887, "step": 33026 }, { "epoch": 1.849423227685071, "grad_norm": 1.2637507915496826, "learning_rate": 9.657578947368422e-05, "loss": 0.4435, "step": 33027 }, { "epoch": 1.8494792249972, "grad_norm": 1.7632912397384644, "learning_rate": 9.657552631578948e-05, "loss": 0.4266, "step": 33028 }, { "epoch": 1.849535222309329, "grad_norm": 1.2291784286499023, "learning_rate": 9.657526315789474e-05, "loss": 0.4696, "step": 33029 }, { "epoch": 1.8495912196214581, "grad_norm": 1.2759946584701538, "learning_rate": 9.6575e-05, "loss": 0.445, "step": 33030 }, { "epoch": 1.8496472169335871, "grad_norm": 1.1573654413223267, "learning_rate": 9.657473684210526e-05, "loss": 0.4667, "step": 33031 }, { "epoch": 1.8497032142457162, "grad_norm": 1.4651817083358765, "learning_rate": 9.657447368421054e-05, "loss": 0.4681, "step": 33032 }, { "epoch": 1.8497592115578452, "grad_norm": 1.3305976390838623, "learning_rate": 9.65742105263158e-05, "loss": 0.6046, "step": 33033 }, { "epoch": 1.8498152088699742, "grad_norm": 1.2923963069915771, "learning_rate": 9.657394736842106e-05, "loss": 0.7058, "step": 33034 }, { "epoch": 1.8498712061821032, "grad_norm": 1.3880723714828491, "learning_rate": 9.657368421052632e-05, "loss": 0.5578, "step": 33035 }, { "epoch": 1.8499272034942322, "grad_norm": 1.334620714187622, "learning_rate": 9.657342105263159e-05, "loss": 0.4341, "step": 33036 }, { "epoch": 1.8499832008063612, "grad_norm": 1.2041887044906616, "learning_rate": 9.657315789473685e-05, "loss": 0.3855, "step": 33037 }, { "epoch": 1.8500391981184903, "grad_norm": 1.3424389362335205, "learning_rate": 9.657289473684211e-05, "loss": 0.4479, "step": 33038 }, { "epoch": 1.8500951954306193, "grad_norm": 1.2787641286849976, "learning_rate": 9.657263157894737e-05, "loss": 0.4199, "step": 33039 }, { "epoch": 1.8501511927427483, "grad_norm": 1.672153353691101, "learning_rate": 9.657236842105264e-05, "loss": 0.6242, "step": 33040 }, { "epoch": 1.8502071900548773, "grad_norm": 1.437294840812683, "learning_rate": 9.65721052631579e-05, "loss": 0.5694, "step": 33041 }, { "epoch": 1.8502631873670063, "grad_norm": 1.0827196836471558, "learning_rate": 9.657184210526316e-05, "loss": 0.3911, "step": 33042 }, { "epoch": 1.8503191846791354, "grad_norm": 1.386171817779541, "learning_rate": 9.657157894736842e-05, "loss": 0.6313, "step": 33043 }, { "epoch": 1.8503751819912644, "grad_norm": 1.3338085412979126, "learning_rate": 9.65713157894737e-05, "loss": 0.4517, "step": 33044 }, { "epoch": 1.8504311793033934, "grad_norm": 1.337959885597229, "learning_rate": 9.657105263157895e-05, "loss": 0.3323, "step": 33045 }, { "epoch": 1.8504871766155224, "grad_norm": 1.3833059072494507, "learning_rate": 9.657078947368423e-05, "loss": 0.4781, "step": 33046 }, { "epoch": 1.8505431739276514, "grad_norm": 1.4422019720077515, "learning_rate": 9.657052631578947e-05, "loss": 0.5218, "step": 33047 }, { "epoch": 1.8505991712397805, "grad_norm": 1.213630199432373, "learning_rate": 9.657026315789475e-05, "loss": 0.3653, "step": 33048 }, { "epoch": 1.8506551685519095, "grad_norm": 1.1104912757873535, "learning_rate": 9.657e-05, "loss": 0.4329, "step": 33049 }, { "epoch": 1.8507111658640385, "grad_norm": 1.2492437362670898, "learning_rate": 9.656973684210527e-05, "loss": 0.3078, "step": 33050 }, { "epoch": 1.8507671631761675, "grad_norm": 1.1975221633911133, "learning_rate": 9.656947368421054e-05, "loss": 0.4118, "step": 33051 }, { "epoch": 1.8508231604882965, "grad_norm": 1.2960602045059204, "learning_rate": 9.656921052631578e-05, "loss": 0.4938, "step": 33052 }, { "epoch": 1.8508791578004256, "grad_norm": 3.541465997695923, "learning_rate": 9.656894736842106e-05, "loss": 0.5232, "step": 33053 }, { "epoch": 1.8509351551125546, "grad_norm": 1.584956169128418, "learning_rate": 9.656868421052632e-05, "loss": 0.6109, "step": 33054 }, { "epoch": 1.8509911524246836, "grad_norm": 1.1987066268920898, "learning_rate": 9.656842105263159e-05, "loss": 0.4364, "step": 33055 }, { "epoch": 1.8510471497368126, "grad_norm": 1.2029398679733276, "learning_rate": 9.656815789473684e-05, "loss": 0.4021, "step": 33056 }, { "epoch": 1.8511031470489416, "grad_norm": 1.3097554445266724, "learning_rate": 9.656789473684211e-05, "loss": 0.4785, "step": 33057 }, { "epoch": 1.8511591443610707, "grad_norm": 4.961433410644531, "learning_rate": 9.656763157894737e-05, "loss": 0.4511, "step": 33058 }, { "epoch": 1.8512151416731997, "grad_norm": 1.3062509298324585, "learning_rate": 9.656736842105264e-05, "loss": 0.5214, "step": 33059 }, { "epoch": 1.8512711389853287, "grad_norm": 0.9713279604911804, "learning_rate": 9.65671052631579e-05, "loss": 0.2653, "step": 33060 }, { "epoch": 1.8513271362974577, "grad_norm": 1.042035460472107, "learning_rate": 9.656684210526316e-05, "loss": 0.4177, "step": 33061 }, { "epoch": 1.8513831336095867, "grad_norm": 1.2576196193695068, "learning_rate": 9.656657894736842e-05, "loss": 0.4957, "step": 33062 }, { "epoch": 1.8514391309217157, "grad_norm": 1.2618991136550903, "learning_rate": 9.65663157894737e-05, "loss": 0.4992, "step": 33063 }, { "epoch": 1.8514951282338448, "grad_norm": 1.0386995077133179, "learning_rate": 9.656605263157896e-05, "loss": 0.3648, "step": 33064 }, { "epoch": 1.8515511255459738, "grad_norm": 1.4311909675598145, "learning_rate": 9.656578947368422e-05, "loss": 0.4981, "step": 33065 }, { "epoch": 1.8516071228581028, "grad_norm": 1.0725550651550293, "learning_rate": 9.656552631578948e-05, "loss": 0.425, "step": 33066 }, { "epoch": 1.8516631201702318, "grad_norm": 1.2727642059326172, "learning_rate": 9.656526315789473e-05, "loss": 0.464, "step": 33067 }, { "epoch": 1.8517191174823608, "grad_norm": 1.134121060371399, "learning_rate": 9.656500000000001e-05, "loss": 0.3897, "step": 33068 }, { "epoch": 1.8517751147944899, "grad_norm": 1.1708431243896484, "learning_rate": 9.656473684210527e-05, "loss": 0.4454, "step": 33069 }, { "epoch": 1.8518311121066189, "grad_norm": 1.1136401891708374, "learning_rate": 9.656447368421053e-05, "loss": 0.3273, "step": 33070 }, { "epoch": 1.851887109418748, "grad_norm": 1.343088150024414, "learning_rate": 9.656421052631579e-05, "loss": 0.4572, "step": 33071 }, { "epoch": 1.851943106730877, "grad_norm": 1.1251026391983032, "learning_rate": 9.656394736842106e-05, "loss": 0.3703, "step": 33072 }, { "epoch": 1.851999104043006, "grad_norm": 1.3409059047698975, "learning_rate": 9.656368421052632e-05, "loss": 0.5208, "step": 33073 }, { "epoch": 1.852055101355135, "grad_norm": 1.2337473630905151, "learning_rate": 9.656342105263158e-05, "loss": 0.4543, "step": 33074 }, { "epoch": 1.852111098667264, "grad_norm": 1.4777143001556396, "learning_rate": 9.656315789473684e-05, "loss": 0.6273, "step": 33075 }, { "epoch": 1.852167095979393, "grad_norm": 3.0945606231689453, "learning_rate": 9.656289473684211e-05, "loss": 0.4552, "step": 33076 }, { "epoch": 1.852223093291522, "grad_norm": 1.309761643409729, "learning_rate": 9.656263157894737e-05, "loss": 0.5515, "step": 33077 }, { "epoch": 1.852279090603651, "grad_norm": 1.0365557670593262, "learning_rate": 9.656236842105265e-05, "loss": 0.4608, "step": 33078 }, { "epoch": 1.85233508791578, "grad_norm": 1.2894647121429443, "learning_rate": 9.656210526315789e-05, "loss": 0.5498, "step": 33079 }, { "epoch": 1.852391085227909, "grad_norm": 1.4224568605422974, "learning_rate": 9.656184210526317e-05, "loss": 0.4155, "step": 33080 }, { "epoch": 1.852447082540038, "grad_norm": 1.1105976104736328, "learning_rate": 9.656157894736843e-05, "loss": 0.38, "step": 33081 }, { "epoch": 1.8525030798521671, "grad_norm": 1.318962574005127, "learning_rate": 9.65613157894737e-05, "loss": 0.5226, "step": 33082 }, { "epoch": 1.8525590771642961, "grad_norm": 1.0393366813659668, "learning_rate": 9.656105263157896e-05, "loss": 0.3868, "step": 33083 }, { "epoch": 1.8526150744764251, "grad_norm": 1.1849257946014404, "learning_rate": 9.65607894736842e-05, "loss": 0.5016, "step": 33084 }, { "epoch": 1.8526710717885542, "grad_norm": 1.0333153009414673, "learning_rate": 9.656052631578948e-05, "loss": 0.4353, "step": 33085 }, { "epoch": 1.8527270691006832, "grad_norm": 1.3058979511260986, "learning_rate": 9.656026315789474e-05, "loss": 0.4755, "step": 33086 }, { "epoch": 1.8527830664128122, "grad_norm": 1.299809217453003, "learning_rate": 9.656000000000001e-05, "loss": 0.4371, "step": 33087 }, { "epoch": 1.8528390637249412, "grad_norm": 1.0798301696777344, "learning_rate": 9.655973684210527e-05, "loss": 0.3373, "step": 33088 }, { "epoch": 1.8528950610370702, "grad_norm": 1.1431992053985596, "learning_rate": 9.655947368421053e-05, "loss": 0.3428, "step": 33089 }, { "epoch": 1.8529510583491993, "grad_norm": 1.3724161386489868, "learning_rate": 9.655921052631579e-05, "loss": 0.4823, "step": 33090 }, { "epoch": 1.8530070556613283, "grad_norm": 1.2289235591888428, "learning_rate": 9.655894736842106e-05, "loss": 0.4292, "step": 33091 }, { "epoch": 1.8530630529734573, "grad_norm": 1.367658257484436, "learning_rate": 9.655868421052632e-05, "loss": 0.5845, "step": 33092 }, { "epoch": 1.8531190502855863, "grad_norm": 1.4230901002883911, "learning_rate": 9.655842105263158e-05, "loss": 0.5229, "step": 33093 }, { "epoch": 1.8531750475977153, "grad_norm": 1.417458415031433, "learning_rate": 9.655815789473684e-05, "loss": 0.556, "step": 33094 }, { "epoch": 1.8532310449098444, "grad_norm": 1.4302271604537964, "learning_rate": 9.655789473684212e-05, "loss": 0.6765, "step": 33095 }, { "epoch": 1.8532870422219734, "grad_norm": 1.3495972156524658, "learning_rate": 9.655763157894738e-05, "loss": 0.5772, "step": 33096 }, { "epoch": 1.8533430395341024, "grad_norm": 1.2109516859054565, "learning_rate": 9.655736842105264e-05, "loss": 0.5467, "step": 33097 }, { "epoch": 1.8533990368462314, "grad_norm": 1.260568618774414, "learning_rate": 9.65571052631579e-05, "loss": 0.309, "step": 33098 }, { "epoch": 1.8534550341583604, "grad_norm": 1.3763041496276855, "learning_rate": 9.655684210526317e-05, "loss": 0.4497, "step": 33099 }, { "epoch": 1.8535110314704895, "grad_norm": 1.0809141397476196, "learning_rate": 9.655657894736843e-05, "loss": 0.3706, "step": 33100 }, { "epoch": 1.8535670287826185, "grad_norm": 1.1887532472610474, "learning_rate": 9.655631578947369e-05, "loss": 0.3976, "step": 33101 }, { "epoch": 1.8536230260947475, "grad_norm": 1.2558282613754272, "learning_rate": 9.655605263157895e-05, "loss": 0.4437, "step": 33102 }, { "epoch": 1.8536790234068765, "grad_norm": 1.4799175262451172, "learning_rate": 9.655578947368421e-05, "loss": 0.4076, "step": 33103 }, { "epoch": 1.8537350207190055, "grad_norm": 1.3490275144577026, "learning_rate": 9.655552631578948e-05, "loss": 0.3874, "step": 33104 }, { "epoch": 1.8537910180311346, "grad_norm": 1.267166018486023, "learning_rate": 9.655526315789474e-05, "loss": 0.4538, "step": 33105 }, { "epoch": 1.8538470153432636, "grad_norm": 2.162872314453125, "learning_rate": 9.655500000000001e-05, "loss": 0.4441, "step": 33106 }, { "epoch": 1.8539030126553926, "grad_norm": 1.268487572669983, "learning_rate": 9.655473684210526e-05, "loss": 0.607, "step": 33107 }, { "epoch": 1.8539590099675216, "grad_norm": 1.4336848258972168, "learning_rate": 9.655447368421053e-05, "loss": 0.493, "step": 33108 }, { "epoch": 1.8540150072796506, "grad_norm": 1.1673438549041748, "learning_rate": 9.655421052631579e-05, "loss": 0.4071, "step": 33109 }, { "epoch": 1.8540710045917796, "grad_norm": 1.1715099811553955, "learning_rate": 9.655394736842107e-05, "loss": 0.6009, "step": 33110 }, { "epoch": 1.8541270019039087, "grad_norm": 1.4830306768417358, "learning_rate": 9.655368421052631e-05, "loss": 0.4813, "step": 33111 }, { "epoch": 1.8541829992160377, "grad_norm": 1.2398089170455933, "learning_rate": 9.655342105263159e-05, "loss": 0.4535, "step": 33112 }, { "epoch": 1.8542389965281667, "grad_norm": 1.3025530576705933, "learning_rate": 9.655315789473684e-05, "loss": 0.5483, "step": 33113 }, { "epoch": 1.8542949938402957, "grad_norm": 1.1998889446258545, "learning_rate": 9.655289473684212e-05, "loss": 0.3307, "step": 33114 }, { "epoch": 1.8543509911524247, "grad_norm": 1.0280098915100098, "learning_rate": 9.655263157894738e-05, "loss": 0.4335, "step": 33115 }, { "epoch": 1.8544069884645538, "grad_norm": 1.661843180656433, "learning_rate": 9.655236842105264e-05, "loss": 0.5408, "step": 33116 }, { "epoch": 1.8544629857766828, "grad_norm": 1.2318053245544434, "learning_rate": 9.65521052631579e-05, "loss": 0.4169, "step": 33117 }, { "epoch": 1.8545189830888118, "grad_norm": 1.513507604598999, "learning_rate": 9.655184210526316e-05, "loss": 0.7214, "step": 33118 }, { "epoch": 1.8545749804009408, "grad_norm": 1.455310344696045, "learning_rate": 9.655157894736843e-05, "loss": 0.5072, "step": 33119 }, { "epoch": 1.8546309777130698, "grad_norm": 1.4041128158569336, "learning_rate": 9.655131578947369e-05, "loss": 0.5939, "step": 33120 }, { "epoch": 1.8546869750251989, "grad_norm": 1.0689117908477783, "learning_rate": 9.655105263157895e-05, "loss": 0.3959, "step": 33121 }, { "epoch": 1.8547429723373279, "grad_norm": 4.283168315887451, "learning_rate": 9.655078947368421e-05, "loss": 0.4627, "step": 33122 }, { "epoch": 1.854798969649457, "grad_norm": 1.3845758438110352, "learning_rate": 9.655052631578948e-05, "loss": 0.533, "step": 33123 }, { "epoch": 1.854854966961586, "grad_norm": 1.3015806674957275, "learning_rate": 9.655026315789474e-05, "loss": 0.4367, "step": 33124 }, { "epoch": 1.854910964273715, "grad_norm": 1.154928207397461, "learning_rate": 9.655e-05, "loss": 0.4463, "step": 33125 }, { "epoch": 1.854966961585844, "grad_norm": 1.3839025497436523, "learning_rate": 9.654973684210526e-05, "loss": 0.5426, "step": 33126 }, { "epoch": 1.855022958897973, "grad_norm": 1.185211181640625, "learning_rate": 9.654947368421054e-05, "loss": 0.3559, "step": 33127 }, { "epoch": 1.855078956210102, "grad_norm": 1.0509347915649414, "learning_rate": 9.65492105263158e-05, "loss": 0.426, "step": 33128 }, { "epoch": 1.855134953522231, "grad_norm": 1.454164743423462, "learning_rate": 9.654894736842105e-05, "loss": 0.3985, "step": 33129 }, { "epoch": 1.85519095083436, "grad_norm": 1.1675007343292236, "learning_rate": 9.654868421052631e-05, "loss": 0.3877, "step": 33130 }, { "epoch": 1.855246948146489, "grad_norm": 1.4339954853057861, "learning_rate": 9.654842105263159e-05, "loss": 0.5345, "step": 33131 }, { "epoch": 1.855302945458618, "grad_norm": 1.2303017377853394, "learning_rate": 9.654815789473685e-05, "loss": 0.4575, "step": 33132 }, { "epoch": 1.855358942770747, "grad_norm": 1.3257334232330322, "learning_rate": 9.654789473684212e-05, "loss": 0.4836, "step": 33133 }, { "epoch": 1.855414940082876, "grad_norm": 1.131144642829895, "learning_rate": 9.654763157894737e-05, "loss": 0.3676, "step": 33134 }, { "epoch": 1.8554709373950051, "grad_norm": 2.5463688373565674, "learning_rate": 9.654736842105263e-05, "loss": 0.4261, "step": 33135 }, { "epoch": 1.8555269347071341, "grad_norm": 1.3660928010940552, "learning_rate": 9.65471052631579e-05, "loss": 0.4662, "step": 33136 }, { "epoch": 1.8555829320192632, "grad_norm": 1.2900288105010986, "learning_rate": 9.654684210526316e-05, "loss": 0.4106, "step": 33137 }, { "epoch": 1.8556389293313922, "grad_norm": 1.391201138496399, "learning_rate": 9.654657894736843e-05, "loss": 0.382, "step": 33138 }, { "epoch": 1.8556949266435212, "grad_norm": 1.3388479948043823, "learning_rate": 9.654631578947368e-05, "loss": 0.5254, "step": 33139 }, { "epoch": 1.8557509239556502, "grad_norm": 1.160375952720642, "learning_rate": 9.654605263157895e-05, "loss": 0.3951, "step": 33140 }, { "epoch": 1.8558069212677792, "grad_norm": 1.3148711919784546, "learning_rate": 9.654578947368421e-05, "loss": 0.4271, "step": 33141 }, { "epoch": 1.8558629185799083, "grad_norm": 1.4214383363723755, "learning_rate": 9.654552631578949e-05, "loss": 0.5326, "step": 33142 }, { "epoch": 1.8559189158920373, "grad_norm": 1.1944375038146973, "learning_rate": 9.654526315789475e-05, "loss": 0.5269, "step": 33143 }, { "epoch": 1.8559749132041663, "grad_norm": 1.4981383085250854, "learning_rate": 9.6545e-05, "loss": 0.4033, "step": 33144 }, { "epoch": 1.8560309105162953, "grad_norm": 1.5162910223007202, "learning_rate": 9.654473684210526e-05, "loss": 0.4073, "step": 33145 }, { "epoch": 1.8560869078284243, "grad_norm": 1.4173718690872192, "learning_rate": 9.654447368421054e-05, "loss": 0.5271, "step": 33146 }, { "epoch": 1.8561429051405534, "grad_norm": 1.2792532444000244, "learning_rate": 9.65442105263158e-05, "loss": 0.4023, "step": 33147 }, { "epoch": 1.8561989024526824, "grad_norm": 1.521446943283081, "learning_rate": 9.654394736842106e-05, "loss": 0.5062, "step": 33148 }, { "epoch": 1.8562548997648114, "grad_norm": 1.3226897716522217, "learning_rate": 9.654368421052632e-05, "loss": 0.3883, "step": 33149 }, { "epoch": 1.8563108970769404, "grad_norm": 1.6951515674591064, "learning_rate": 9.654342105263159e-05, "loss": 0.5469, "step": 33150 }, { "epoch": 1.8563668943890694, "grad_norm": 1.3238255977630615, "learning_rate": 9.654315789473685e-05, "loss": 0.4881, "step": 33151 }, { "epoch": 1.8564228917011985, "grad_norm": 1.434852957725525, "learning_rate": 9.654289473684211e-05, "loss": 0.5531, "step": 33152 }, { "epoch": 1.8564788890133275, "grad_norm": 1.1392138004302979, "learning_rate": 9.654263157894737e-05, "loss": 0.4223, "step": 33153 }, { "epoch": 1.8565348863254565, "grad_norm": 1.3729078769683838, "learning_rate": 9.654236842105263e-05, "loss": 0.4292, "step": 33154 }, { "epoch": 1.8565908836375855, "grad_norm": 1.61236572265625, "learning_rate": 9.65421052631579e-05, "loss": 0.5432, "step": 33155 }, { "epoch": 1.8566468809497145, "grad_norm": 1.3132075071334839, "learning_rate": 9.654184210526316e-05, "loss": 0.4583, "step": 33156 }, { "epoch": 1.8567028782618435, "grad_norm": 1.4339098930358887, "learning_rate": 9.654157894736842e-05, "loss": 0.4041, "step": 33157 }, { "epoch": 1.8567588755739726, "grad_norm": 1.225520372390747, "learning_rate": 9.654131578947368e-05, "loss": 0.3977, "step": 33158 }, { "epoch": 1.8568148728861016, "grad_norm": 1.1398911476135254, "learning_rate": 9.654105263157895e-05, "loss": 0.3641, "step": 33159 }, { "epoch": 1.8568708701982306, "grad_norm": 1.2526735067367554, "learning_rate": 9.654078947368421e-05, "loss": 0.4355, "step": 33160 }, { "epoch": 1.8569268675103596, "grad_norm": 1.4194422960281372, "learning_rate": 9.654052631578949e-05, "loss": 0.4494, "step": 33161 }, { "epoch": 1.8569828648224886, "grad_norm": 1.4049956798553467, "learning_rate": 9.654026315789473e-05, "loss": 0.4442, "step": 33162 }, { "epoch": 1.8570388621346177, "grad_norm": 1.2219620943069458, "learning_rate": 9.654000000000001e-05, "loss": 0.3741, "step": 33163 }, { "epoch": 1.8570948594467467, "grad_norm": 1.4150627851486206, "learning_rate": 9.653973684210527e-05, "loss": 0.3954, "step": 33164 }, { "epoch": 1.8571508567588757, "grad_norm": 1.2717299461364746, "learning_rate": 9.653947368421054e-05, "loss": 0.4213, "step": 33165 }, { "epoch": 1.8572068540710047, "grad_norm": 1.4136308431625366, "learning_rate": 9.653921052631579e-05, "loss": 0.4058, "step": 33166 }, { "epoch": 1.8572628513831337, "grad_norm": 1.196578860282898, "learning_rate": 9.653894736842106e-05, "loss": 0.4078, "step": 33167 }, { "epoch": 1.8573188486952628, "grad_norm": 1.098273515701294, "learning_rate": 9.653868421052632e-05, "loss": 0.5009, "step": 33168 }, { "epoch": 1.8573748460073918, "grad_norm": 1.2358180284500122, "learning_rate": 9.653842105263159e-05, "loss": 0.5651, "step": 33169 }, { "epoch": 1.8574308433195208, "grad_norm": 1.186604380607605, "learning_rate": 9.653815789473685e-05, "loss": 0.4409, "step": 33170 }, { "epoch": 1.8574868406316498, "grad_norm": 1.2703826427459717, "learning_rate": 9.65378947368421e-05, "loss": 0.5416, "step": 33171 }, { "epoch": 1.8575428379437788, "grad_norm": 1.4217480421066284, "learning_rate": 9.653763157894737e-05, "loss": 0.491, "step": 33172 }, { "epoch": 1.8575988352559079, "grad_norm": 1.5214414596557617, "learning_rate": 9.653736842105263e-05, "loss": 0.4847, "step": 33173 }, { "epoch": 1.8576548325680369, "grad_norm": 1.191049575805664, "learning_rate": 9.65371052631579e-05, "loss": 0.3773, "step": 33174 }, { "epoch": 1.857710829880166, "grad_norm": 1.1918625831604004, "learning_rate": 9.653684210526316e-05, "loss": 0.5194, "step": 33175 }, { "epoch": 1.857766827192295, "grad_norm": 1.0788966417312622, "learning_rate": 9.653657894736842e-05, "loss": 0.3965, "step": 33176 }, { "epoch": 1.857822824504424, "grad_norm": 1.2535103559494019, "learning_rate": 9.653631578947368e-05, "loss": 0.4623, "step": 33177 }, { "epoch": 1.857878821816553, "grad_norm": 1.4373008012771606, "learning_rate": 9.653605263157896e-05, "loss": 0.5303, "step": 33178 }, { "epoch": 1.857934819128682, "grad_norm": 1.4039499759674072, "learning_rate": 9.653578947368422e-05, "loss": 0.3667, "step": 33179 }, { "epoch": 1.857990816440811, "grad_norm": 1.1685330867767334, "learning_rate": 9.653552631578948e-05, "loss": 0.5415, "step": 33180 }, { "epoch": 1.85804681375294, "grad_norm": 1.0712639093399048, "learning_rate": 9.653526315789474e-05, "loss": 0.3915, "step": 33181 }, { "epoch": 1.858102811065069, "grad_norm": 1.3238636255264282, "learning_rate": 9.653500000000001e-05, "loss": 0.4885, "step": 33182 }, { "epoch": 1.858158808377198, "grad_norm": 1.2522668838500977, "learning_rate": 9.653473684210527e-05, "loss": 0.5375, "step": 33183 }, { "epoch": 1.858214805689327, "grad_norm": 1.290726661682129, "learning_rate": 9.653447368421053e-05, "loss": 0.5262, "step": 33184 }, { "epoch": 1.858270803001456, "grad_norm": 1.303221583366394, "learning_rate": 9.653421052631579e-05, "loss": 0.4288, "step": 33185 }, { "epoch": 1.858326800313585, "grad_norm": 1.161790132522583, "learning_rate": 9.653394736842106e-05, "loss": 0.3933, "step": 33186 }, { "epoch": 1.8583827976257141, "grad_norm": 1.4410653114318848, "learning_rate": 9.653368421052632e-05, "loss": 0.4534, "step": 33187 }, { "epoch": 1.8584387949378431, "grad_norm": 1.4781135320663452, "learning_rate": 9.653342105263158e-05, "loss": 0.7443, "step": 33188 }, { "epoch": 1.8584947922499722, "grad_norm": 1.5559567213058472, "learning_rate": 9.653315789473684e-05, "loss": 0.6003, "step": 33189 }, { "epoch": 1.8585507895621012, "grad_norm": 1.182310938835144, "learning_rate": 9.65328947368421e-05, "loss": 0.5594, "step": 33190 }, { "epoch": 1.8586067868742302, "grad_norm": 1.2010245323181152, "learning_rate": 9.653263157894737e-05, "loss": 0.5012, "step": 33191 }, { "epoch": 1.8586627841863592, "grad_norm": 1.1277631521224976, "learning_rate": 9.653236842105263e-05, "loss": 0.3654, "step": 33192 }, { "epoch": 1.8587187814984882, "grad_norm": 1.3679944276809692, "learning_rate": 9.653210526315791e-05, "loss": 0.4288, "step": 33193 }, { "epoch": 1.8587747788106173, "grad_norm": 1.3539291620254517, "learning_rate": 9.653184210526315e-05, "loss": 0.4613, "step": 33194 }, { "epoch": 1.858830776122746, "grad_norm": 1.3745754957199097, "learning_rate": 9.653157894736843e-05, "loss": 0.6353, "step": 33195 }, { "epoch": 1.858886773434875, "grad_norm": 1.0956120491027832, "learning_rate": 9.653131578947369e-05, "loss": 0.344, "step": 33196 }, { "epoch": 1.858942770747004, "grad_norm": 1.1513943672180176, "learning_rate": 9.653105263157896e-05, "loss": 0.4839, "step": 33197 }, { "epoch": 1.858998768059133, "grad_norm": 1.3860986232757568, "learning_rate": 9.653078947368422e-05, "loss": 0.4771, "step": 33198 }, { "epoch": 1.8590547653712621, "grad_norm": 1.429257869720459, "learning_rate": 9.653052631578948e-05, "loss": 0.5066, "step": 33199 }, { "epoch": 1.8591107626833911, "grad_norm": 1.1928224563598633, "learning_rate": 9.653026315789474e-05, "loss": 0.3391, "step": 33200 }, { "epoch": 1.8591667599955202, "grad_norm": 1.1016384363174438, "learning_rate": 9.653000000000001e-05, "loss": 0.3275, "step": 33201 }, { "epoch": 1.8592227573076492, "grad_norm": 1.5377134084701538, "learning_rate": 9.652973684210527e-05, "loss": 0.3946, "step": 33202 }, { "epoch": 1.8592787546197782, "grad_norm": 1.344175100326538, "learning_rate": 9.652947368421053e-05, "loss": 0.3915, "step": 33203 }, { "epoch": 1.8593347519319072, "grad_norm": 1.3472548723220825, "learning_rate": 9.652921052631579e-05, "loss": 0.4253, "step": 33204 }, { "epoch": 1.8593907492440362, "grad_norm": 1.1115021705627441, "learning_rate": 9.652894736842105e-05, "loss": 0.3779, "step": 33205 }, { "epoch": 1.8594467465561653, "grad_norm": 1.5202127695083618, "learning_rate": 9.652868421052632e-05, "loss": 0.5438, "step": 33206 }, { "epoch": 1.8595027438682943, "grad_norm": 1.1402655839920044, "learning_rate": 9.652842105263158e-05, "loss": 0.4357, "step": 33207 }, { "epoch": 1.8595587411804233, "grad_norm": 1.2354209423065186, "learning_rate": 9.652815789473684e-05, "loss": 0.5218, "step": 33208 }, { "epoch": 1.8596147384925523, "grad_norm": 0.9355956315994263, "learning_rate": 9.65278947368421e-05, "loss": 0.3268, "step": 33209 }, { "epoch": 1.8596707358046813, "grad_norm": 1.586888313293457, "learning_rate": 9.652763157894738e-05, "loss": 0.552, "step": 33210 }, { "epoch": 1.8597267331168104, "grad_norm": 1.3255527019500732, "learning_rate": 9.652736842105264e-05, "loss": 0.4361, "step": 33211 }, { "epoch": 1.8597827304289394, "grad_norm": 1.2221790552139282, "learning_rate": 9.65271052631579e-05, "loss": 0.3618, "step": 33212 }, { "epoch": 1.8598387277410684, "grad_norm": 1.3526649475097656, "learning_rate": 9.652684210526316e-05, "loss": 0.5011, "step": 33213 }, { "epoch": 1.8598947250531974, "grad_norm": 1.154591679573059, "learning_rate": 9.652657894736843e-05, "loss": 0.3266, "step": 33214 }, { "epoch": 1.8599507223653264, "grad_norm": 1.2744574546813965, "learning_rate": 9.652631578947369e-05, "loss": 0.401, "step": 33215 }, { "epoch": 1.8600067196774555, "grad_norm": 1.0954231023788452, "learning_rate": 9.652605263157895e-05, "loss": 0.3587, "step": 33216 }, { "epoch": 1.8600627169895845, "grad_norm": 1.2176766395568848, "learning_rate": 9.652578947368421e-05, "loss": 0.3546, "step": 33217 }, { "epoch": 1.8601187143017135, "grad_norm": 1.1566481590270996, "learning_rate": 9.652552631578948e-05, "loss": 0.5207, "step": 33218 }, { "epoch": 1.8601747116138425, "grad_norm": 1.3136396408081055, "learning_rate": 9.652526315789474e-05, "loss": 0.5068, "step": 33219 }, { "epoch": 1.8602307089259715, "grad_norm": 1.2912509441375732, "learning_rate": 9.652500000000002e-05, "loss": 0.3516, "step": 33220 }, { "epoch": 1.8602867062381006, "grad_norm": 1.2112452983856201, "learning_rate": 9.652473684210526e-05, "loss": 0.3653, "step": 33221 }, { "epoch": 1.8603427035502296, "grad_norm": 1.2557883262634277, "learning_rate": 9.652447368421052e-05, "loss": 0.4438, "step": 33222 }, { "epoch": 1.8603987008623586, "grad_norm": 1.3698614835739136, "learning_rate": 9.65242105263158e-05, "loss": 0.481, "step": 33223 }, { "epoch": 1.8604546981744876, "grad_norm": 2.157306671142578, "learning_rate": 9.652394736842105e-05, "loss": 0.4831, "step": 33224 }, { "epoch": 1.8605106954866166, "grad_norm": 1.425485610961914, "learning_rate": 9.652368421052633e-05, "loss": 0.3729, "step": 33225 }, { "epoch": 1.8605666927987456, "grad_norm": 1.3635287284851074, "learning_rate": 9.652342105263157e-05, "loss": 0.5146, "step": 33226 }, { "epoch": 1.8606226901108747, "grad_norm": 1.1912692785263062, "learning_rate": 9.652315789473685e-05, "loss": 0.3423, "step": 33227 }, { "epoch": 1.8606786874230037, "grad_norm": 1.2394254207611084, "learning_rate": 9.65228947368421e-05, "loss": 0.474, "step": 33228 }, { "epoch": 1.8607346847351327, "grad_norm": 1.2628141641616821, "learning_rate": 9.652263157894738e-05, "loss": 0.4587, "step": 33229 }, { "epoch": 1.8607906820472617, "grad_norm": 1.4132494926452637, "learning_rate": 9.652236842105264e-05, "loss": 0.5303, "step": 33230 }, { "epoch": 1.8608466793593907, "grad_norm": 1.8759733438491821, "learning_rate": 9.65221052631579e-05, "loss": 0.4537, "step": 33231 }, { "epoch": 1.8609026766715198, "grad_norm": 1.3873984813690186, "learning_rate": 9.652184210526316e-05, "loss": 0.5849, "step": 33232 }, { "epoch": 1.8609586739836488, "grad_norm": 1.183600902557373, "learning_rate": 9.652157894736843e-05, "loss": 0.4262, "step": 33233 }, { "epoch": 1.8610146712957778, "grad_norm": 1.477653980255127, "learning_rate": 9.652131578947369e-05, "loss": 0.4572, "step": 33234 }, { "epoch": 1.8610706686079068, "grad_norm": 1.4078872203826904, "learning_rate": 9.652105263157895e-05, "loss": 0.5162, "step": 33235 }, { "epoch": 1.8611266659200358, "grad_norm": 1.0280321836471558, "learning_rate": 9.652078947368421e-05, "loss": 0.3673, "step": 33236 }, { "epoch": 1.8611826632321649, "grad_norm": 1.45988929271698, "learning_rate": 9.652052631578948e-05, "loss": 0.495, "step": 33237 }, { "epoch": 1.8612386605442939, "grad_norm": 1.3883593082427979, "learning_rate": 9.652026315789474e-05, "loss": 0.4717, "step": 33238 }, { "epoch": 1.861294657856423, "grad_norm": 1.2350587844848633, "learning_rate": 9.652e-05, "loss": 0.4214, "step": 33239 }, { "epoch": 1.861350655168552, "grad_norm": 1.4167972803115845, "learning_rate": 9.651973684210526e-05, "loss": 0.5187, "step": 33240 }, { "epoch": 1.861406652480681, "grad_norm": 1.1706527471542358, "learning_rate": 9.651947368421052e-05, "loss": 0.3587, "step": 33241 }, { "epoch": 1.86146264979281, "grad_norm": 1.1903153657913208, "learning_rate": 9.65192105263158e-05, "loss": 0.4871, "step": 33242 }, { "epoch": 1.861518647104939, "grad_norm": 1.4145921468734741, "learning_rate": 9.651894736842106e-05, "loss": 0.5622, "step": 33243 }, { "epoch": 1.861574644417068, "grad_norm": 1.3552058935165405, "learning_rate": 9.651868421052632e-05, "loss": 0.4162, "step": 33244 }, { "epoch": 1.861630641729197, "grad_norm": 1.501378059387207, "learning_rate": 9.651842105263158e-05, "loss": 0.5254, "step": 33245 }, { "epoch": 1.861686639041326, "grad_norm": 33.333866119384766, "learning_rate": 9.651815789473685e-05, "loss": 0.4166, "step": 33246 }, { "epoch": 1.861742636353455, "grad_norm": 1.1257805824279785, "learning_rate": 9.651789473684211e-05, "loss": 0.4534, "step": 33247 }, { "epoch": 1.861798633665584, "grad_norm": 1.1850035190582275, "learning_rate": 9.651763157894738e-05, "loss": 0.3522, "step": 33248 }, { "epoch": 1.861854630977713, "grad_norm": 1.1929658651351929, "learning_rate": 9.651736842105263e-05, "loss": 0.3706, "step": 33249 }, { "epoch": 1.861910628289842, "grad_norm": 1.1902594566345215, "learning_rate": 9.65171052631579e-05, "loss": 0.35, "step": 33250 }, { "epoch": 1.8619666256019711, "grad_norm": 1.2255616188049316, "learning_rate": 9.651684210526316e-05, "loss": 0.4522, "step": 33251 }, { "epoch": 1.8620226229141001, "grad_norm": 3.014446973800659, "learning_rate": 9.651657894736843e-05, "loss": 0.5083, "step": 33252 }, { "epoch": 1.8620786202262292, "grad_norm": 1.3079462051391602, "learning_rate": 9.65163157894737e-05, "loss": 0.4072, "step": 33253 }, { "epoch": 1.8621346175383582, "grad_norm": 1.3268287181854248, "learning_rate": 9.651605263157895e-05, "loss": 0.3646, "step": 33254 }, { "epoch": 1.8621906148504872, "grad_norm": 1.6336162090301514, "learning_rate": 9.651578947368421e-05, "loss": 0.4509, "step": 33255 }, { "epoch": 1.8622466121626162, "grad_norm": 1.2794172763824463, "learning_rate": 9.651552631578947e-05, "loss": 0.5047, "step": 33256 }, { "epoch": 1.8623026094747452, "grad_norm": 1.1605744361877441, "learning_rate": 9.651526315789475e-05, "loss": 0.4259, "step": 33257 }, { "epoch": 1.8623586067868743, "grad_norm": 1.38705313205719, "learning_rate": 9.6515e-05, "loss": 0.4562, "step": 33258 }, { "epoch": 1.8624146040990033, "grad_norm": 1.2819559574127197, "learning_rate": 9.651473684210527e-05, "loss": 0.422, "step": 33259 }, { "epoch": 1.8624706014111323, "grad_norm": 1.3177659511566162, "learning_rate": 9.651447368421053e-05, "loss": 0.4142, "step": 33260 }, { "epoch": 1.8625265987232613, "grad_norm": 1.313639760017395, "learning_rate": 9.65142105263158e-05, "loss": 0.4397, "step": 33261 }, { "epoch": 1.8625825960353903, "grad_norm": 1.1930993795394897, "learning_rate": 9.651394736842106e-05, "loss": 0.5392, "step": 33262 }, { "epoch": 1.8626385933475194, "grad_norm": 1.1592968702316284, "learning_rate": 9.651368421052632e-05, "loss": 0.3514, "step": 33263 }, { "epoch": 1.8626945906596484, "grad_norm": 1.1909130811691284, "learning_rate": 9.651342105263158e-05, "loss": 0.5344, "step": 33264 }, { "epoch": 1.8627505879717774, "grad_norm": 1.3716959953308105, "learning_rate": 9.651315789473685e-05, "loss": 0.4311, "step": 33265 }, { "epoch": 1.8628065852839064, "grad_norm": 1.1980502605438232, "learning_rate": 9.651289473684211e-05, "loss": 0.3682, "step": 33266 }, { "epoch": 1.8628625825960354, "grad_norm": 1.3581562042236328, "learning_rate": 9.651263157894737e-05, "loss": 0.3964, "step": 33267 }, { "epoch": 1.8629185799081645, "grad_norm": 1.4218684434890747, "learning_rate": 9.651236842105263e-05, "loss": 0.4884, "step": 33268 }, { "epoch": 1.8629745772202935, "grad_norm": 1.450047492980957, "learning_rate": 9.65121052631579e-05, "loss": 0.7257, "step": 33269 }, { "epoch": 1.8630305745324225, "grad_norm": 1.3057308197021484, "learning_rate": 9.651184210526316e-05, "loss": 0.408, "step": 33270 }, { "epoch": 1.8630865718445515, "grad_norm": 1.2041081190109253, "learning_rate": 9.651157894736842e-05, "loss": 0.4326, "step": 33271 }, { "epoch": 1.8631425691566805, "grad_norm": 1.2635146379470825, "learning_rate": 9.651131578947368e-05, "loss": 0.4321, "step": 33272 }, { "epoch": 1.8631985664688095, "grad_norm": 1.2276989221572876, "learning_rate": 9.651105263157894e-05, "loss": 0.3515, "step": 33273 }, { "epoch": 1.8632545637809386, "grad_norm": 1.303195595741272, "learning_rate": 9.651078947368422e-05, "loss": 0.4348, "step": 33274 }, { "epoch": 1.8633105610930676, "grad_norm": 1.6392093896865845, "learning_rate": 9.651052631578948e-05, "loss": 0.6247, "step": 33275 }, { "epoch": 1.8633665584051966, "grad_norm": 1.3800324201583862, "learning_rate": 9.651026315789474e-05, "loss": 0.5736, "step": 33276 }, { "epoch": 1.8634225557173256, "grad_norm": 1.1216520071029663, "learning_rate": 9.651e-05, "loss": 0.3868, "step": 33277 }, { "epoch": 1.8634785530294544, "grad_norm": 1.1675200462341309, "learning_rate": 9.650973684210527e-05, "loss": 0.3946, "step": 33278 }, { "epoch": 1.8635345503415834, "grad_norm": 1.2029776573181152, "learning_rate": 9.650947368421053e-05, "loss": 0.4378, "step": 33279 }, { "epoch": 1.8635905476537125, "grad_norm": 1.215852975845337, "learning_rate": 9.65092105263158e-05, "loss": 0.4484, "step": 33280 }, { "epoch": 1.8636465449658415, "grad_norm": 1.428413987159729, "learning_rate": 9.650894736842105e-05, "loss": 0.4041, "step": 33281 }, { "epoch": 1.8637025422779705, "grad_norm": 1.229519009590149, "learning_rate": 9.650868421052632e-05, "loss": 0.3805, "step": 33282 }, { "epoch": 1.8637585395900995, "grad_norm": 1.3234349489212036, "learning_rate": 9.650842105263158e-05, "loss": 0.5095, "step": 33283 }, { "epoch": 1.8638145369022285, "grad_norm": 1.1571629047393799, "learning_rate": 9.650815789473685e-05, "loss": 0.4227, "step": 33284 }, { "epoch": 1.8638705342143576, "grad_norm": 1.1840343475341797, "learning_rate": 9.650789473684211e-05, "loss": 0.464, "step": 33285 }, { "epoch": 1.8639265315264866, "grad_norm": 1.4128124713897705, "learning_rate": 9.650763157894737e-05, "loss": 0.4179, "step": 33286 }, { "epoch": 1.8639825288386156, "grad_norm": 1.331292986869812, "learning_rate": 9.650736842105263e-05, "loss": 0.528, "step": 33287 }, { "epoch": 1.8640385261507446, "grad_norm": 1.3400959968566895, "learning_rate": 9.650710526315791e-05, "loss": 0.4923, "step": 33288 }, { "epoch": 1.8640945234628736, "grad_norm": 1.6587700843811035, "learning_rate": 9.650684210526317e-05, "loss": 0.3992, "step": 33289 }, { "epoch": 1.8641505207750027, "grad_norm": 1.353323221206665, "learning_rate": 9.650657894736843e-05, "loss": 0.5483, "step": 33290 }, { "epoch": 1.8642065180871317, "grad_norm": 1.2740854024887085, "learning_rate": 9.650631578947369e-05, "loss": 0.3513, "step": 33291 }, { "epoch": 1.8642625153992607, "grad_norm": 1.1472843885421753, "learning_rate": 9.650605263157895e-05, "loss": 0.4573, "step": 33292 }, { "epoch": 1.8643185127113897, "grad_norm": 1.3171521425247192, "learning_rate": 9.650578947368422e-05, "loss": 0.6538, "step": 33293 }, { "epoch": 1.8643745100235187, "grad_norm": 1.2200098037719727, "learning_rate": 9.650552631578948e-05, "loss": 0.402, "step": 33294 }, { "epoch": 1.8644305073356477, "grad_norm": 1.4275422096252441, "learning_rate": 9.650526315789474e-05, "loss": 0.4681, "step": 33295 }, { "epoch": 1.8644865046477768, "grad_norm": 1.1108968257904053, "learning_rate": 9.6505e-05, "loss": 0.3743, "step": 33296 }, { "epoch": 1.8645425019599058, "grad_norm": 1.392356514930725, "learning_rate": 9.650473684210527e-05, "loss": 0.4675, "step": 33297 }, { "epoch": 1.8645984992720348, "grad_norm": 1.3910579681396484, "learning_rate": 9.650447368421053e-05, "loss": 0.4508, "step": 33298 }, { "epoch": 1.8646544965841638, "grad_norm": 1.153457522392273, "learning_rate": 9.650421052631579e-05, "loss": 0.331, "step": 33299 }, { "epoch": 1.8647104938962928, "grad_norm": 1.8094803094863892, "learning_rate": 9.650394736842105e-05, "loss": 0.5507, "step": 33300 }, { "epoch": 1.8647664912084219, "grad_norm": 1.2175483703613281, "learning_rate": 9.650368421052632e-05, "loss": 0.4674, "step": 33301 }, { "epoch": 1.8648224885205509, "grad_norm": 1.3713819980621338, "learning_rate": 9.650342105263158e-05, "loss": 0.5527, "step": 33302 }, { "epoch": 1.86487848583268, "grad_norm": 1.3288484811782837, "learning_rate": 9.650315789473686e-05, "loss": 0.4117, "step": 33303 }, { "epoch": 1.864934483144809, "grad_norm": 1.3322553634643555, "learning_rate": 9.65028947368421e-05, "loss": 0.4769, "step": 33304 }, { "epoch": 1.864990480456938, "grad_norm": 1.8681646585464478, "learning_rate": 9.650263157894738e-05, "loss": 0.6378, "step": 33305 }, { "epoch": 1.865046477769067, "grad_norm": 1.0922811031341553, "learning_rate": 9.650236842105264e-05, "loss": 0.4184, "step": 33306 }, { "epoch": 1.865102475081196, "grad_norm": 1.4009759426116943, "learning_rate": 9.650210526315791e-05, "loss": 0.6117, "step": 33307 }, { "epoch": 1.865158472393325, "grad_norm": 1.2001932859420776, "learning_rate": 9.650184210526317e-05, "loss": 0.3913, "step": 33308 }, { "epoch": 1.865214469705454, "grad_norm": 1.260614037513733, "learning_rate": 9.650157894736842e-05, "loss": 0.5306, "step": 33309 }, { "epoch": 1.865270467017583, "grad_norm": 1.0835615396499634, "learning_rate": 9.650131578947369e-05, "loss": 0.405, "step": 33310 }, { "epoch": 1.865326464329712, "grad_norm": 1.2805088758468628, "learning_rate": 9.650105263157895e-05, "loss": 0.4737, "step": 33311 }, { "epoch": 1.865382461641841, "grad_norm": 1.0372954607009888, "learning_rate": 9.650078947368422e-05, "loss": 0.4574, "step": 33312 }, { "epoch": 1.86543845895397, "grad_norm": 2.122236728668213, "learning_rate": 9.650052631578947e-05, "loss": 0.6584, "step": 33313 }, { "epoch": 1.865494456266099, "grad_norm": 1.1040592193603516, "learning_rate": 9.650026315789474e-05, "loss": 0.4956, "step": 33314 }, { "epoch": 1.8655504535782281, "grad_norm": 1.2051090002059937, "learning_rate": 9.65e-05, "loss": 0.4011, "step": 33315 }, { "epoch": 1.8656064508903571, "grad_norm": 1.1285511255264282, "learning_rate": 9.649973684210527e-05, "loss": 0.4076, "step": 33316 }, { "epoch": 1.8656624482024862, "grad_norm": 1.331356406211853, "learning_rate": 9.649947368421053e-05, "loss": 0.5501, "step": 33317 }, { "epoch": 1.8657184455146152, "grad_norm": 1.309225082397461, "learning_rate": 9.64992105263158e-05, "loss": 0.5664, "step": 33318 }, { "epoch": 1.8657744428267442, "grad_norm": 1.356074333190918, "learning_rate": 9.649894736842105e-05, "loss": 0.4073, "step": 33319 }, { "epoch": 1.8658304401388732, "grad_norm": 1.1765908002853394, "learning_rate": 9.649868421052633e-05, "loss": 0.3849, "step": 33320 }, { "epoch": 1.8658864374510022, "grad_norm": 1.1801725625991821, "learning_rate": 9.649842105263159e-05, "loss": 0.3199, "step": 33321 }, { "epoch": 1.8659424347631313, "grad_norm": 1.316142201423645, "learning_rate": 9.649815789473685e-05, "loss": 0.4739, "step": 33322 }, { "epoch": 1.8659984320752603, "grad_norm": 1.6283364295959473, "learning_rate": 9.64978947368421e-05, "loss": 0.4789, "step": 33323 }, { "epoch": 1.8660544293873893, "grad_norm": 1.1305875778198242, "learning_rate": 9.649763157894738e-05, "loss": 0.392, "step": 33324 }, { "epoch": 1.8661104266995183, "grad_norm": 1.1613478660583496, "learning_rate": 9.649736842105264e-05, "loss": 0.4457, "step": 33325 }, { "epoch": 1.8661664240116473, "grad_norm": 1.3853996992111206, "learning_rate": 9.64971052631579e-05, "loss": 0.4711, "step": 33326 }, { "epoch": 1.8662224213237764, "grad_norm": 1.3434357643127441, "learning_rate": 9.649684210526316e-05, "loss": 0.4273, "step": 33327 }, { "epoch": 1.8662784186359054, "grad_norm": 1.1406131982803345, "learning_rate": 9.649657894736842e-05, "loss": 0.4457, "step": 33328 }, { "epoch": 1.8663344159480344, "grad_norm": 1.2307461500167847, "learning_rate": 9.649631578947369e-05, "loss": 0.4473, "step": 33329 }, { "epoch": 1.8663904132601634, "grad_norm": 1.5934114456176758, "learning_rate": 9.649605263157895e-05, "loss": 0.5564, "step": 33330 }, { "epoch": 1.8664464105722924, "grad_norm": 1.3289058208465576, "learning_rate": 9.649578947368421e-05, "loss": 0.45, "step": 33331 }, { "epoch": 1.8665024078844215, "grad_norm": 2.0612432956695557, "learning_rate": 9.649552631578947e-05, "loss": 0.4376, "step": 33332 }, { "epoch": 1.8665584051965505, "grad_norm": 1.287265419960022, "learning_rate": 9.649526315789474e-05, "loss": 0.4721, "step": 33333 }, { "epoch": 1.8666144025086795, "grad_norm": 1.308773159980774, "learning_rate": 9.6495e-05, "loss": 0.3741, "step": 33334 }, { "epoch": 1.8666703998208085, "grad_norm": 1.1810001134872437, "learning_rate": 9.649473684210528e-05, "loss": 0.373, "step": 33335 }, { "epoch": 1.8667263971329375, "grad_norm": 1.1118175983428955, "learning_rate": 9.649447368421052e-05, "loss": 0.5305, "step": 33336 }, { "epoch": 1.8667823944450666, "grad_norm": 1.206966519355774, "learning_rate": 9.64942105263158e-05, "loss": 0.437, "step": 33337 }, { "epoch": 1.8668383917571956, "grad_norm": 1.597222924232483, "learning_rate": 9.649394736842106e-05, "loss": 0.449, "step": 33338 }, { "epoch": 1.8668943890693246, "grad_norm": 1.008517861366272, "learning_rate": 9.649368421052633e-05, "loss": 0.3517, "step": 33339 }, { "epoch": 1.8669503863814536, "grad_norm": 1.286507248878479, "learning_rate": 9.649342105263159e-05, "loss": 0.4505, "step": 33340 }, { "epoch": 1.8670063836935826, "grad_norm": 1.3307602405548096, "learning_rate": 9.649315789473685e-05, "loss": 0.57, "step": 33341 }, { "epoch": 1.8670623810057116, "grad_norm": 1.8110291957855225, "learning_rate": 9.649289473684211e-05, "loss": 0.5791, "step": 33342 }, { "epoch": 1.8671183783178407, "grad_norm": 66.24345397949219, "learning_rate": 9.649263157894737e-05, "loss": 0.4023, "step": 33343 }, { "epoch": 1.8671743756299697, "grad_norm": 1.3057340383529663, "learning_rate": 9.649236842105264e-05, "loss": 0.4617, "step": 33344 }, { "epoch": 1.8672303729420987, "grad_norm": 1.3963127136230469, "learning_rate": 9.64921052631579e-05, "loss": 0.4739, "step": 33345 }, { "epoch": 1.8672863702542277, "grad_norm": 1.2040624618530273, "learning_rate": 9.649184210526316e-05, "loss": 0.4258, "step": 33346 }, { "epoch": 1.8673423675663567, "grad_norm": 1.2081611156463623, "learning_rate": 9.649157894736842e-05, "loss": 0.4192, "step": 33347 }, { "epoch": 1.8673983648784858, "grad_norm": 1.3619356155395508, "learning_rate": 9.64913157894737e-05, "loss": 0.5874, "step": 33348 }, { "epoch": 1.8674543621906148, "grad_norm": 1.1492903232574463, "learning_rate": 9.649105263157895e-05, "loss": 0.4294, "step": 33349 }, { "epoch": 1.8675103595027438, "grad_norm": 1.2217748165130615, "learning_rate": 9.649078947368421e-05, "loss": 0.475, "step": 33350 }, { "epoch": 1.8675663568148728, "grad_norm": 1.3813090324401855, "learning_rate": 9.649052631578947e-05, "loss": 0.3238, "step": 33351 }, { "epoch": 1.8676223541270018, "grad_norm": 1.1766788959503174, "learning_rate": 9.649026315789475e-05, "loss": 0.3393, "step": 33352 }, { "epoch": 1.8676783514391309, "grad_norm": 1.260389804840088, "learning_rate": 9.649e-05, "loss": 0.3905, "step": 33353 }, { "epoch": 1.8677343487512599, "grad_norm": 1.2006301879882812, "learning_rate": 9.648973684210527e-05, "loss": 0.5319, "step": 33354 }, { "epoch": 1.867790346063389, "grad_norm": 1.167595624923706, "learning_rate": 9.648947368421053e-05, "loss": 0.5305, "step": 33355 }, { "epoch": 1.867846343375518, "grad_norm": 2.3871161937713623, "learning_rate": 9.64892105263158e-05, "loss": 0.4194, "step": 33356 }, { "epoch": 1.867902340687647, "grad_norm": 1.462841272354126, "learning_rate": 9.648894736842106e-05, "loss": 0.5383, "step": 33357 }, { "epoch": 1.867958337999776, "grad_norm": 1.3849934339523315, "learning_rate": 9.648868421052633e-05, "loss": 0.5103, "step": 33358 }, { "epoch": 1.868014335311905, "grad_norm": 1.3383766412734985, "learning_rate": 9.648842105263158e-05, "loss": 0.4018, "step": 33359 }, { "epoch": 1.868070332624034, "grad_norm": 1.3043535947799683, "learning_rate": 9.648815789473684e-05, "loss": 0.3786, "step": 33360 }, { "epoch": 1.868126329936163, "grad_norm": 1.200724720954895, "learning_rate": 9.648789473684211e-05, "loss": 0.4577, "step": 33361 }, { "epoch": 1.868182327248292, "grad_norm": 1.3673505783081055, "learning_rate": 9.648763157894737e-05, "loss": 0.442, "step": 33362 }, { "epoch": 1.868238324560421, "grad_norm": 1.3371307849884033, "learning_rate": 9.648736842105264e-05, "loss": 0.687, "step": 33363 }, { "epoch": 1.86829432187255, "grad_norm": 1.0764373540878296, "learning_rate": 9.648710526315789e-05, "loss": 0.376, "step": 33364 }, { "epoch": 1.868350319184679, "grad_norm": 1.60707426071167, "learning_rate": 9.648684210526316e-05, "loss": 0.5271, "step": 33365 }, { "epoch": 1.868406316496808, "grad_norm": 1.3147951364517212, "learning_rate": 9.648657894736842e-05, "loss": 0.412, "step": 33366 }, { "epoch": 1.8684623138089371, "grad_norm": 1.316485047340393, "learning_rate": 9.64863157894737e-05, "loss": 0.3975, "step": 33367 }, { "epoch": 1.8685183111210661, "grad_norm": 0.9729315042495728, "learning_rate": 9.648605263157894e-05, "loss": 0.3303, "step": 33368 }, { "epoch": 1.8685743084331952, "grad_norm": 1.1686781644821167, "learning_rate": 9.648578947368422e-05, "loss": 0.4727, "step": 33369 }, { "epoch": 1.8686303057453242, "grad_norm": 1.7420340776443481, "learning_rate": 9.648552631578948e-05, "loss": 0.4967, "step": 33370 }, { "epoch": 1.8686863030574532, "grad_norm": 2.7304697036743164, "learning_rate": 9.648526315789475e-05, "loss": 0.5604, "step": 33371 }, { "epoch": 1.8687423003695822, "grad_norm": 1.4076279401779175, "learning_rate": 9.648500000000001e-05, "loss": 0.5262, "step": 33372 }, { "epoch": 1.8687982976817112, "grad_norm": 2.1522576808929443, "learning_rate": 9.648473684210527e-05, "loss": 0.4765, "step": 33373 }, { "epoch": 1.8688542949938403, "grad_norm": 1.5706709623336792, "learning_rate": 9.648447368421053e-05, "loss": 0.3541, "step": 33374 }, { "epoch": 1.8689102923059693, "grad_norm": 1.5873135328292847, "learning_rate": 9.64842105263158e-05, "loss": 0.5126, "step": 33375 }, { "epoch": 1.8689662896180983, "grad_norm": 1.2355859279632568, "learning_rate": 9.648394736842106e-05, "loss": 0.4603, "step": 33376 }, { "epoch": 1.8690222869302273, "grad_norm": 1.0957920551300049, "learning_rate": 9.648368421052632e-05, "loss": 0.4055, "step": 33377 }, { "epoch": 1.8690782842423563, "grad_norm": 1.0139875411987305, "learning_rate": 9.648342105263158e-05, "loss": 0.3791, "step": 33378 }, { "epoch": 1.8691342815544854, "grad_norm": 1.4458435773849487, "learning_rate": 9.648315789473684e-05, "loss": 0.3773, "step": 33379 }, { "epoch": 1.8691902788666144, "grad_norm": 1.202217936515808, "learning_rate": 9.648289473684211e-05, "loss": 0.3829, "step": 33380 }, { "epoch": 1.8692462761787434, "grad_norm": 1.1900771856307983, "learning_rate": 9.648263157894737e-05, "loss": 0.5546, "step": 33381 }, { "epoch": 1.8693022734908724, "grad_norm": 1.2505542039871216, "learning_rate": 9.648236842105263e-05, "loss": 0.3581, "step": 33382 }, { "epoch": 1.8693582708030014, "grad_norm": 1.3131285905838013, "learning_rate": 9.648210526315789e-05, "loss": 0.5366, "step": 33383 }, { "epoch": 1.8694142681151305, "grad_norm": 1.3099415302276611, "learning_rate": 9.648184210526317e-05, "loss": 0.434, "step": 33384 }, { "epoch": 1.8694702654272595, "grad_norm": 1.3272502422332764, "learning_rate": 9.648157894736843e-05, "loss": 0.4881, "step": 33385 }, { "epoch": 1.8695262627393885, "grad_norm": 1.1791683435440063, "learning_rate": 9.648131578947369e-05, "loss": 0.3329, "step": 33386 }, { "epoch": 1.8695822600515175, "grad_norm": 1.0309921503067017, "learning_rate": 9.648105263157895e-05, "loss": 0.4096, "step": 33387 }, { "epoch": 1.8696382573636465, "grad_norm": 1.122999906539917, "learning_rate": 9.648078947368422e-05, "loss": 0.4852, "step": 33388 }, { "epoch": 1.8696942546757755, "grad_norm": 1.4413858652114868, "learning_rate": 9.648052631578948e-05, "loss": 0.541, "step": 33389 }, { "epoch": 1.8697502519879046, "grad_norm": 1.4153752326965332, "learning_rate": 9.648026315789475e-05, "loss": 0.501, "step": 33390 }, { "epoch": 1.8698062493000336, "grad_norm": 1.1813348531723022, "learning_rate": 9.648e-05, "loss": 0.4236, "step": 33391 }, { "epoch": 1.8698622466121626, "grad_norm": 1.3457003831863403, "learning_rate": 9.647973684210527e-05, "loss": 0.5359, "step": 33392 }, { "epoch": 1.8699182439242916, "grad_norm": 1.1282265186309814, "learning_rate": 9.647947368421053e-05, "loss": 0.367, "step": 33393 }, { "epoch": 1.8699742412364206, "grad_norm": 1.2186192274093628, "learning_rate": 9.647921052631579e-05, "loss": 0.4307, "step": 33394 }, { "epoch": 1.8700302385485497, "grad_norm": 1.0650784969329834, "learning_rate": 9.647894736842106e-05, "loss": 0.443, "step": 33395 }, { "epoch": 1.8700862358606787, "grad_norm": 1.222720980644226, "learning_rate": 9.647868421052631e-05, "loss": 0.4682, "step": 33396 }, { "epoch": 1.8701422331728077, "grad_norm": 1.4409343004226685, "learning_rate": 9.647842105263158e-05, "loss": 0.4807, "step": 33397 }, { "epoch": 1.8701982304849367, "grad_norm": 1.3927421569824219, "learning_rate": 9.647815789473684e-05, "loss": 0.4209, "step": 33398 }, { "epoch": 1.8702542277970657, "grad_norm": 1.0722668170928955, "learning_rate": 9.647789473684212e-05, "loss": 0.4513, "step": 33399 }, { "epoch": 1.8703102251091948, "grad_norm": 1.2874648571014404, "learning_rate": 9.647763157894738e-05, "loss": 0.549, "step": 33400 }, { "epoch": 1.8703662224213238, "grad_norm": 1.316003441810608, "learning_rate": 9.647736842105264e-05, "loss": 0.5344, "step": 33401 }, { "epoch": 1.8704222197334528, "grad_norm": 1.159010887145996, "learning_rate": 9.64771052631579e-05, "loss": 0.3841, "step": 33402 }, { "epoch": 1.8704782170455818, "grad_norm": 1.185390830039978, "learning_rate": 9.647684210526317e-05, "loss": 0.5582, "step": 33403 }, { "epoch": 1.8705342143577108, "grad_norm": 1.2026081085205078, "learning_rate": 9.647657894736843e-05, "loss": 0.4247, "step": 33404 }, { "epoch": 1.8705902116698399, "grad_norm": 1.1749247312545776, "learning_rate": 9.647631578947369e-05, "loss": 0.4169, "step": 33405 }, { "epoch": 1.8706462089819689, "grad_norm": 1.0058047771453857, "learning_rate": 9.647605263157895e-05, "loss": 0.4166, "step": 33406 }, { "epoch": 1.870702206294098, "grad_norm": 1.3511548042297363, "learning_rate": 9.647578947368422e-05, "loss": 0.4144, "step": 33407 }, { "epoch": 1.870758203606227, "grad_norm": 1.2476104497909546, "learning_rate": 9.647552631578948e-05, "loss": 0.5436, "step": 33408 }, { "epoch": 1.870814200918356, "grad_norm": 1.3797138929367065, "learning_rate": 9.647526315789474e-05, "loss": 0.4511, "step": 33409 }, { "epoch": 1.870870198230485, "grad_norm": 1.3588685989379883, "learning_rate": 9.6475e-05, "loss": 0.3568, "step": 33410 }, { "epoch": 1.870926195542614, "grad_norm": 1.2693657875061035, "learning_rate": 9.647473684210527e-05, "loss": 0.4852, "step": 33411 }, { "epoch": 1.870982192854743, "grad_norm": 1.2818776369094849, "learning_rate": 9.647447368421053e-05, "loss": 0.519, "step": 33412 }, { "epoch": 1.871038190166872, "grad_norm": 1.0625146627426147, "learning_rate": 9.647421052631579e-05, "loss": 0.4105, "step": 33413 }, { "epoch": 1.871094187479001, "grad_norm": 1.251999020576477, "learning_rate": 9.647394736842105e-05, "loss": 0.386, "step": 33414 }, { "epoch": 1.87115018479113, "grad_norm": 1.3598161935806274, "learning_rate": 9.647368421052631e-05, "loss": 0.4513, "step": 33415 }, { "epoch": 1.871206182103259, "grad_norm": 1.6231831312179565, "learning_rate": 9.647342105263159e-05, "loss": 0.3562, "step": 33416 }, { "epoch": 1.871262179415388, "grad_norm": 1.338484764099121, "learning_rate": 9.647315789473685e-05, "loss": 0.5784, "step": 33417 }, { "epoch": 1.871318176727517, "grad_norm": 1.1802111864089966, "learning_rate": 9.64728947368421e-05, "loss": 0.3312, "step": 33418 }, { "epoch": 1.8713741740396461, "grad_norm": 1.439077615737915, "learning_rate": 9.647263157894737e-05, "loss": 0.5292, "step": 33419 }, { "epoch": 1.8714301713517751, "grad_norm": 1.4299787282943726, "learning_rate": 9.647236842105264e-05, "loss": 0.5413, "step": 33420 }, { "epoch": 1.8714861686639042, "grad_norm": 1.0642756223678589, "learning_rate": 9.64721052631579e-05, "loss": 0.3577, "step": 33421 }, { "epoch": 1.8715421659760332, "grad_norm": 1.4010738134384155, "learning_rate": 9.647184210526317e-05, "loss": 0.5869, "step": 33422 }, { "epoch": 1.8715981632881622, "grad_norm": 1.336345911026001, "learning_rate": 9.647157894736842e-05, "loss": 0.4308, "step": 33423 }, { "epoch": 1.8716541606002912, "grad_norm": 1.9138880968093872, "learning_rate": 9.647131578947369e-05, "loss": 0.442, "step": 33424 }, { "epoch": 1.8717101579124202, "grad_norm": 1.2148171663284302, "learning_rate": 9.647105263157895e-05, "loss": 0.4135, "step": 33425 }, { "epoch": 1.8717661552245493, "grad_norm": 1.213161826133728, "learning_rate": 9.647078947368422e-05, "loss": 0.3819, "step": 33426 }, { "epoch": 1.8718221525366783, "grad_norm": 1.2208739519119263, "learning_rate": 9.647052631578948e-05, "loss": 0.4355, "step": 33427 }, { "epoch": 1.8718781498488073, "grad_norm": 1.170845627784729, "learning_rate": 9.647026315789474e-05, "loss": 0.432, "step": 33428 }, { "epoch": 1.8719341471609363, "grad_norm": 1.3409026861190796, "learning_rate": 9.647e-05, "loss": 0.4973, "step": 33429 }, { "epoch": 1.8719901444730653, "grad_norm": 0.9035952091217041, "learning_rate": 9.646973684210526e-05, "loss": 0.3667, "step": 33430 }, { "epoch": 1.8720461417851944, "grad_norm": 1.4720350503921509, "learning_rate": 9.646947368421054e-05, "loss": 0.4517, "step": 33431 }, { "epoch": 1.8721021390973234, "grad_norm": 1.2141084671020508, "learning_rate": 9.64692105263158e-05, "loss": 0.6211, "step": 33432 }, { "epoch": 1.8721581364094524, "grad_norm": 1.2069872617721558, "learning_rate": 9.646894736842106e-05, "loss": 0.3285, "step": 33433 }, { "epoch": 1.8722141337215814, "grad_norm": 1.1581941843032837, "learning_rate": 9.646868421052632e-05, "loss": 0.3489, "step": 33434 }, { "epoch": 1.8722701310337104, "grad_norm": 1.3856080770492554, "learning_rate": 9.646842105263159e-05, "loss": 0.4284, "step": 33435 }, { "epoch": 1.8723261283458394, "grad_norm": 1.283517599105835, "learning_rate": 9.646815789473685e-05, "loss": 0.4719, "step": 33436 }, { "epoch": 1.8723821256579685, "grad_norm": 1.2817213535308838, "learning_rate": 9.646789473684211e-05, "loss": 0.3189, "step": 33437 }, { "epoch": 1.8724381229700975, "grad_norm": 1.4360096454620361, "learning_rate": 9.646763157894737e-05, "loss": 0.4909, "step": 33438 }, { "epoch": 1.8724941202822265, "grad_norm": 1.327418565750122, "learning_rate": 9.646736842105264e-05, "loss": 0.4441, "step": 33439 }, { "epoch": 1.8725501175943555, "grad_norm": 1.2192895412445068, "learning_rate": 9.64671052631579e-05, "loss": 0.4229, "step": 33440 }, { "epoch": 1.8726061149064845, "grad_norm": 1.1235969066619873, "learning_rate": 9.646684210526316e-05, "loss": 0.3288, "step": 33441 }, { "epoch": 1.8726621122186136, "grad_norm": 1.4548892974853516, "learning_rate": 9.646657894736842e-05, "loss": 0.4478, "step": 33442 }, { "epoch": 1.8727181095307426, "grad_norm": 1.042901873588562, "learning_rate": 9.64663157894737e-05, "loss": 0.4034, "step": 33443 }, { "epoch": 1.8727741068428716, "grad_norm": 1.234602689743042, "learning_rate": 9.646605263157895e-05, "loss": 0.4703, "step": 33444 }, { "epoch": 1.8728301041550006, "grad_norm": 1.1242870092391968, "learning_rate": 9.646578947368423e-05, "loss": 0.3587, "step": 33445 }, { "epoch": 1.8728861014671296, "grad_norm": 19.38405418395996, "learning_rate": 9.646552631578947e-05, "loss": 0.6063, "step": 33446 }, { "epoch": 1.8729420987792587, "grad_norm": 1.3827544450759888, "learning_rate": 9.646526315789473e-05, "loss": 0.5936, "step": 33447 }, { "epoch": 1.8729980960913877, "grad_norm": 1.390112280845642, "learning_rate": 9.6465e-05, "loss": 0.4984, "step": 33448 }, { "epoch": 1.8730540934035167, "grad_norm": 1.5721423625946045, "learning_rate": 9.646473684210527e-05, "loss": 0.4716, "step": 33449 }, { "epoch": 1.8731100907156457, "grad_norm": 1.161504864692688, "learning_rate": 9.646447368421054e-05, "loss": 0.5542, "step": 33450 }, { "epoch": 1.8731660880277747, "grad_norm": 1.4575273990631104, "learning_rate": 9.646421052631578e-05, "loss": 0.4453, "step": 33451 }, { "epoch": 1.8732220853399038, "grad_norm": 1.0841702222824097, "learning_rate": 9.646394736842106e-05, "loss": 0.4081, "step": 33452 }, { "epoch": 1.8732780826520328, "grad_norm": 2.4907963275909424, "learning_rate": 9.646368421052632e-05, "loss": 0.5758, "step": 33453 }, { "epoch": 1.8733340799641618, "grad_norm": 1.3458235263824463, "learning_rate": 9.646342105263159e-05, "loss": 0.3684, "step": 33454 }, { "epoch": 1.8733900772762908, "grad_norm": 2.1697590351104736, "learning_rate": 9.646315789473685e-05, "loss": 0.4024, "step": 33455 }, { "epoch": 1.8734460745884198, "grad_norm": 1.2358863353729248, "learning_rate": 9.646289473684211e-05, "loss": 0.4061, "step": 33456 }, { "epoch": 1.8735020719005488, "grad_norm": 1.3101366758346558, "learning_rate": 9.646263157894737e-05, "loss": 0.3517, "step": 33457 }, { "epoch": 1.8735580692126779, "grad_norm": 1.1429742574691772, "learning_rate": 9.646236842105264e-05, "loss": 0.4721, "step": 33458 }, { "epoch": 1.8736140665248069, "grad_norm": 1.4095683097839355, "learning_rate": 9.64621052631579e-05, "loss": 0.4284, "step": 33459 }, { "epoch": 1.873670063836936, "grad_norm": 1.1719272136688232, "learning_rate": 9.646184210526316e-05, "loss": 0.3813, "step": 33460 }, { "epoch": 1.873726061149065, "grad_norm": 1.4283121824264526, "learning_rate": 9.646157894736842e-05, "loss": 0.378, "step": 33461 }, { "epoch": 1.873782058461194, "grad_norm": 1.238408088684082, "learning_rate": 9.64613157894737e-05, "loss": 0.3886, "step": 33462 }, { "epoch": 1.873838055773323, "grad_norm": 1.2752630710601807, "learning_rate": 9.646105263157896e-05, "loss": 0.5643, "step": 33463 }, { "epoch": 1.873894053085452, "grad_norm": 1.4761980772018433, "learning_rate": 9.646078947368422e-05, "loss": 0.4346, "step": 33464 }, { "epoch": 1.873950050397581, "grad_norm": 1.5497807264328003, "learning_rate": 9.646052631578948e-05, "loss": 0.4997, "step": 33465 }, { "epoch": 1.87400604770971, "grad_norm": 1.2519959211349487, "learning_rate": 9.646026315789473e-05, "loss": 0.3908, "step": 33466 }, { "epoch": 1.874062045021839, "grad_norm": 1.2081587314605713, "learning_rate": 9.646000000000001e-05, "loss": 0.3579, "step": 33467 }, { "epoch": 1.874118042333968, "grad_norm": 1.389553189277649, "learning_rate": 9.645973684210527e-05, "loss": 0.5409, "step": 33468 }, { "epoch": 1.874174039646097, "grad_norm": 1.1664785146713257, "learning_rate": 9.645947368421053e-05, "loss": 0.4342, "step": 33469 }, { "epoch": 1.874230036958226, "grad_norm": 1.5058810710906982, "learning_rate": 9.645921052631579e-05, "loss": 0.4564, "step": 33470 }, { "epoch": 1.8742860342703551, "grad_norm": 1.4256490468978882, "learning_rate": 9.645894736842106e-05, "loss": 0.7544, "step": 33471 }, { "epoch": 1.8743420315824841, "grad_norm": 1.1871120929718018, "learning_rate": 9.645868421052632e-05, "loss": 0.3614, "step": 33472 }, { "epoch": 1.8743980288946132, "grad_norm": 1.2680423259735107, "learning_rate": 9.645842105263158e-05, "loss": 0.6132, "step": 33473 }, { "epoch": 1.8744540262067422, "grad_norm": 1.3779940605163574, "learning_rate": 9.645815789473684e-05, "loss": 0.5614, "step": 33474 }, { "epoch": 1.8745100235188712, "grad_norm": 1.3336397409439087, "learning_rate": 9.645789473684211e-05, "loss": 0.7411, "step": 33475 }, { "epoch": 1.8745660208310002, "grad_norm": 1.2560302019119263, "learning_rate": 9.645763157894737e-05, "loss": 0.5966, "step": 33476 }, { "epoch": 1.8746220181431292, "grad_norm": 1.2894598245620728, "learning_rate": 9.645736842105265e-05, "loss": 0.5726, "step": 33477 }, { "epoch": 1.8746780154552583, "grad_norm": 1.4899004697799683, "learning_rate": 9.645710526315789e-05, "loss": 0.5685, "step": 33478 }, { "epoch": 1.8747340127673873, "grad_norm": 1.2647489309310913, "learning_rate": 9.645684210526317e-05, "loss": 0.5478, "step": 33479 }, { "epoch": 1.8747900100795163, "grad_norm": 1.6174085140228271, "learning_rate": 9.645657894736843e-05, "loss": 0.5126, "step": 33480 }, { "epoch": 1.8748460073916453, "grad_norm": 1.1691871881484985, "learning_rate": 9.645631578947369e-05, "loss": 0.3347, "step": 33481 }, { "epoch": 1.8749020047037743, "grad_norm": 1.0986146926879883, "learning_rate": 9.645605263157896e-05, "loss": 0.4416, "step": 33482 }, { "epoch": 1.8749580020159033, "grad_norm": 1.0803313255310059, "learning_rate": 9.64557894736842e-05, "loss": 0.3994, "step": 33483 }, { "epoch": 1.8750139993280324, "grad_norm": 1.4178773164749146, "learning_rate": 9.645552631578948e-05, "loss": 0.4352, "step": 33484 }, { "epoch": 1.8750699966401614, "grad_norm": 1.1567668914794922, "learning_rate": 9.645526315789474e-05, "loss": 0.3542, "step": 33485 }, { "epoch": 1.8751259939522904, "grad_norm": 1.1704045534133911, "learning_rate": 9.645500000000001e-05, "loss": 0.3638, "step": 33486 }, { "epoch": 1.8751819912644194, "grad_norm": 1.0589802265167236, "learning_rate": 9.645473684210527e-05, "loss": 0.3783, "step": 33487 }, { "epoch": 1.8752379885765484, "grad_norm": 1.2962638139724731, "learning_rate": 9.645447368421053e-05, "loss": 0.3587, "step": 33488 }, { "epoch": 1.8752939858886775, "grad_norm": 1.381291151046753, "learning_rate": 9.645421052631579e-05, "loss": 0.4869, "step": 33489 }, { "epoch": 1.8753499832008065, "grad_norm": 1.4825259447097778, "learning_rate": 9.645394736842106e-05, "loss": 0.3757, "step": 33490 }, { "epoch": 1.8754059805129355, "grad_norm": 1.277748942375183, "learning_rate": 9.645368421052632e-05, "loss": 0.448, "step": 33491 }, { "epoch": 1.8754619778250645, "grad_norm": 1.4495632648468018, "learning_rate": 9.645342105263158e-05, "loss": 0.4545, "step": 33492 }, { "epoch": 1.8755179751371935, "grad_norm": 2.9469432830810547, "learning_rate": 9.645315789473684e-05, "loss": 0.5421, "step": 33493 }, { "epoch": 1.8755739724493226, "grad_norm": 1.1439541578292847, "learning_rate": 9.645289473684212e-05, "loss": 0.4455, "step": 33494 }, { "epoch": 1.8756299697614516, "grad_norm": 1.2018928527832031, "learning_rate": 9.645263157894738e-05, "loss": 0.4064, "step": 33495 }, { "epoch": 1.8756859670735806, "grad_norm": 1.1954567432403564, "learning_rate": 9.645236842105264e-05, "loss": 0.4607, "step": 33496 }, { "epoch": 1.8757419643857096, "grad_norm": 1.1736812591552734, "learning_rate": 9.64521052631579e-05, "loss": 0.4126, "step": 33497 }, { "epoch": 1.8757979616978386, "grad_norm": 1.3685011863708496, "learning_rate": 9.645184210526315e-05, "loss": 0.537, "step": 33498 }, { "epoch": 1.8758539590099677, "grad_norm": 1.4805583953857422, "learning_rate": 9.645157894736843e-05, "loss": 0.4641, "step": 33499 }, { "epoch": 1.8759099563220967, "grad_norm": 1.1129140853881836, "learning_rate": 9.645131578947369e-05, "loss": 0.5136, "step": 33500 }, { "epoch": 1.8759659536342257, "grad_norm": 1.281879186630249, "learning_rate": 9.645105263157895e-05, "loss": 0.5016, "step": 33501 }, { "epoch": 1.8760219509463547, "grad_norm": 1.2343182563781738, "learning_rate": 9.645078947368421e-05, "loss": 0.4978, "step": 33502 }, { "epoch": 1.8760779482584837, "grad_norm": 2.5606625080108643, "learning_rate": 9.645052631578948e-05, "loss": 0.6544, "step": 33503 }, { "epoch": 1.8761339455706127, "grad_norm": 1.1873128414154053, "learning_rate": 9.645026315789474e-05, "loss": 0.4298, "step": 33504 }, { "epoch": 1.8761899428827418, "grad_norm": 1.2903032302856445, "learning_rate": 9.645000000000001e-05, "loss": 0.3659, "step": 33505 }, { "epoch": 1.8762459401948708, "grad_norm": 1.234182596206665, "learning_rate": 9.644973684210526e-05, "loss": 0.5044, "step": 33506 }, { "epoch": 1.8763019375069998, "grad_norm": 1.7295570373535156, "learning_rate": 9.644947368421053e-05, "loss": 0.5064, "step": 33507 }, { "epoch": 1.8763579348191288, "grad_norm": 1.4970859289169312, "learning_rate": 9.644921052631579e-05, "loss": 0.4238, "step": 33508 }, { "epoch": 1.8764139321312578, "grad_norm": 1.4024280309677124, "learning_rate": 9.644894736842107e-05, "loss": 0.3762, "step": 33509 }, { "epoch": 1.8764699294433869, "grad_norm": 1.2532130479812622, "learning_rate": 9.644868421052633e-05, "loss": 0.4599, "step": 33510 }, { "epoch": 1.8765259267555159, "grad_norm": 1.2962455749511719, "learning_rate": 9.644842105263159e-05, "loss": 0.466, "step": 33511 }, { "epoch": 1.876581924067645, "grad_norm": 1.477338194847107, "learning_rate": 9.644815789473685e-05, "loss": 0.4867, "step": 33512 }, { "epoch": 1.876637921379774, "grad_norm": 1.3582735061645508, "learning_rate": 9.644789473684212e-05, "loss": 0.5876, "step": 33513 }, { "epoch": 1.876693918691903, "grad_norm": 1.4817018508911133, "learning_rate": 9.644763157894738e-05, "loss": 0.5833, "step": 33514 }, { "epoch": 1.876749916004032, "grad_norm": 1.1551045179367065, "learning_rate": 9.644736842105262e-05, "loss": 0.366, "step": 33515 }, { "epoch": 1.876805913316161, "grad_norm": 1.5476411581039429, "learning_rate": 9.64471052631579e-05, "loss": 0.4366, "step": 33516 }, { "epoch": 1.87686191062829, "grad_norm": 1.493558406829834, "learning_rate": 9.644684210526316e-05, "loss": 0.3961, "step": 33517 }, { "epoch": 1.876917907940419, "grad_norm": 1.3267251253128052, "learning_rate": 9.644657894736843e-05, "loss": 0.5182, "step": 33518 }, { "epoch": 1.876973905252548, "grad_norm": 1.1937638521194458, "learning_rate": 9.644631578947369e-05, "loss": 0.536, "step": 33519 }, { "epoch": 1.877029902564677, "grad_norm": 1.0580928325653076, "learning_rate": 9.644605263157895e-05, "loss": 0.375, "step": 33520 }, { "epoch": 1.877085899876806, "grad_norm": 1.1075624227523804, "learning_rate": 9.644578947368421e-05, "loss": 0.4144, "step": 33521 }, { "epoch": 1.877141897188935, "grad_norm": 1.1278550624847412, "learning_rate": 9.644552631578948e-05, "loss": 0.3824, "step": 33522 }, { "epoch": 1.8771978945010641, "grad_norm": 1.2467658519744873, "learning_rate": 9.644526315789474e-05, "loss": 0.4265, "step": 33523 }, { "epoch": 1.8772538918131931, "grad_norm": 1.258202314376831, "learning_rate": 9.6445e-05, "loss": 0.4668, "step": 33524 }, { "epoch": 1.8773098891253222, "grad_norm": 1.1626869440078735, "learning_rate": 9.644473684210526e-05, "loss": 0.4467, "step": 33525 }, { "epoch": 1.877365886437451, "grad_norm": 1.2683888673782349, "learning_rate": 9.644447368421054e-05, "loss": 0.6625, "step": 33526 }, { "epoch": 1.87742188374958, "grad_norm": 1.3738536834716797, "learning_rate": 9.64442105263158e-05, "loss": 0.5151, "step": 33527 }, { "epoch": 1.877477881061709, "grad_norm": 1.4201312065124512, "learning_rate": 9.644394736842105e-05, "loss": 0.6824, "step": 33528 }, { "epoch": 1.877533878373838, "grad_norm": 1.239016056060791, "learning_rate": 9.644368421052631e-05, "loss": 0.4848, "step": 33529 }, { "epoch": 1.877589875685967, "grad_norm": 1.2624435424804688, "learning_rate": 9.644342105263159e-05, "loss": 0.3683, "step": 33530 }, { "epoch": 1.877645872998096, "grad_norm": 1.3580355644226074, "learning_rate": 9.644315789473685e-05, "loss": 0.493, "step": 33531 }, { "epoch": 1.877701870310225, "grad_norm": 6.130359172821045, "learning_rate": 9.644289473684212e-05, "loss": 0.4353, "step": 33532 }, { "epoch": 1.877757867622354, "grad_norm": 1.0849494934082031, "learning_rate": 9.644263157894737e-05, "loss": 0.5841, "step": 33533 }, { "epoch": 1.877813864934483, "grad_norm": 1.3526138067245483, "learning_rate": 9.644236842105263e-05, "loss": 0.5025, "step": 33534 }, { "epoch": 1.8778698622466121, "grad_norm": 1.5321365594863892, "learning_rate": 9.64421052631579e-05, "loss": 0.4215, "step": 33535 }, { "epoch": 1.8779258595587411, "grad_norm": 1.3473364114761353, "learning_rate": 9.644184210526316e-05, "loss": 0.4768, "step": 33536 }, { "epoch": 1.8779818568708702, "grad_norm": 2.2610108852386475, "learning_rate": 9.644157894736843e-05, "loss": 0.4832, "step": 33537 }, { "epoch": 1.8780378541829992, "grad_norm": 1.3744839429855347, "learning_rate": 9.644131578947368e-05, "loss": 0.3333, "step": 33538 }, { "epoch": 1.8780938514951282, "grad_norm": 1.569636583328247, "learning_rate": 9.644105263157895e-05, "loss": 0.4473, "step": 33539 }, { "epoch": 1.8781498488072572, "grad_norm": 1.3599305152893066, "learning_rate": 9.644078947368421e-05, "loss": 0.4995, "step": 33540 }, { "epoch": 1.8782058461193862, "grad_norm": 1.6744434833526611, "learning_rate": 9.644052631578949e-05, "loss": 0.4744, "step": 33541 }, { "epoch": 1.8782618434315153, "grad_norm": 1.747742772102356, "learning_rate": 9.644026315789475e-05, "loss": 0.6652, "step": 33542 }, { "epoch": 1.8783178407436443, "grad_norm": 1.4932689666748047, "learning_rate": 9.644e-05, "loss": 0.461, "step": 33543 }, { "epoch": 1.8783738380557733, "grad_norm": 1.44297194480896, "learning_rate": 9.643973684210526e-05, "loss": 0.4669, "step": 33544 }, { "epoch": 1.8784298353679023, "grad_norm": 1.219820499420166, "learning_rate": 9.643947368421054e-05, "loss": 0.4247, "step": 33545 }, { "epoch": 1.8784858326800313, "grad_norm": 1.0487533807754517, "learning_rate": 9.64392105263158e-05, "loss": 0.3219, "step": 33546 }, { "epoch": 1.8785418299921604, "grad_norm": 1.5364068746566772, "learning_rate": 9.643894736842106e-05, "loss": 0.4218, "step": 33547 }, { "epoch": 1.8785978273042894, "grad_norm": 1.849379062652588, "learning_rate": 9.643868421052632e-05, "loss": 0.5092, "step": 33548 }, { "epoch": 1.8786538246164184, "grad_norm": 1.3615421056747437, "learning_rate": 9.643842105263159e-05, "loss": 0.4887, "step": 33549 }, { "epoch": 1.8787098219285474, "grad_norm": 1.3537588119506836, "learning_rate": 9.643815789473685e-05, "loss": 0.3424, "step": 33550 }, { "epoch": 1.8787658192406764, "grad_norm": 1.3269686698913574, "learning_rate": 9.643789473684211e-05, "loss": 0.4016, "step": 33551 }, { "epoch": 1.8788218165528054, "grad_norm": 1.248303771018982, "learning_rate": 9.643763157894737e-05, "loss": 0.4266, "step": 33552 }, { "epoch": 1.8788778138649345, "grad_norm": 1.1459150314331055, "learning_rate": 9.643736842105263e-05, "loss": 0.5064, "step": 33553 }, { "epoch": 1.8789338111770635, "grad_norm": 1.3242383003234863, "learning_rate": 9.64371052631579e-05, "loss": 0.4664, "step": 33554 }, { "epoch": 1.8789898084891925, "grad_norm": 1.175794005393982, "learning_rate": 9.643684210526316e-05, "loss": 0.6039, "step": 33555 }, { "epoch": 1.8790458058013215, "grad_norm": 1.266629695892334, "learning_rate": 9.643657894736842e-05, "loss": 0.4808, "step": 33556 }, { "epoch": 1.8791018031134505, "grad_norm": 1.1700021028518677, "learning_rate": 9.643631578947368e-05, "loss": 0.4547, "step": 33557 }, { "epoch": 1.8791578004255796, "grad_norm": 1.6373611688613892, "learning_rate": 9.643605263157896e-05, "loss": 0.8479, "step": 33558 }, { "epoch": 1.8792137977377086, "grad_norm": 1.2791857719421387, "learning_rate": 9.643578947368421e-05, "loss": 0.6051, "step": 33559 }, { "epoch": 1.8792697950498376, "grad_norm": 2.331620454788208, "learning_rate": 9.643552631578949e-05, "loss": 0.4451, "step": 33560 }, { "epoch": 1.8793257923619666, "grad_norm": 1.241905927658081, "learning_rate": 9.643526315789473e-05, "loss": 0.4582, "step": 33561 }, { "epoch": 1.8793817896740956, "grad_norm": 1.7495547533035278, "learning_rate": 9.643500000000001e-05, "loss": 0.5253, "step": 33562 }, { "epoch": 1.8794377869862247, "grad_norm": 1.262455701828003, "learning_rate": 9.643473684210527e-05, "loss": 0.4419, "step": 33563 }, { "epoch": 1.8794937842983537, "grad_norm": 1.1354089975357056, "learning_rate": 9.643447368421054e-05, "loss": 0.3548, "step": 33564 }, { "epoch": 1.8795497816104827, "grad_norm": 1.259466528892517, "learning_rate": 9.64342105263158e-05, "loss": 0.432, "step": 33565 }, { "epoch": 1.8796057789226117, "grad_norm": 1.0078903436660767, "learning_rate": 9.643394736842106e-05, "loss": 0.5474, "step": 33566 }, { "epoch": 1.8796617762347407, "grad_norm": 1.3523410558700562, "learning_rate": 9.643368421052632e-05, "loss": 0.5023, "step": 33567 }, { "epoch": 1.8797177735468698, "grad_norm": 1.131030797958374, "learning_rate": 9.643342105263158e-05, "loss": 0.4898, "step": 33568 }, { "epoch": 1.8797737708589988, "grad_norm": 1.4040687084197998, "learning_rate": 9.643315789473685e-05, "loss": 0.4081, "step": 33569 }, { "epoch": 1.8798297681711278, "grad_norm": 1.508417010307312, "learning_rate": 9.64328947368421e-05, "loss": 0.5026, "step": 33570 }, { "epoch": 1.8798857654832568, "grad_norm": 1.6678227186203003, "learning_rate": 9.643263157894737e-05, "loss": 0.4174, "step": 33571 }, { "epoch": 1.8799417627953858, "grad_norm": 1.1624722480773926, "learning_rate": 9.643236842105263e-05, "loss": 0.3999, "step": 33572 }, { "epoch": 1.8799977601075148, "grad_norm": 1.4845346212387085, "learning_rate": 9.64321052631579e-05, "loss": 0.5066, "step": 33573 }, { "epoch": 1.8800537574196439, "grad_norm": 1.162156105041504, "learning_rate": 9.643184210526316e-05, "loss": 0.4714, "step": 33574 }, { "epoch": 1.8801097547317729, "grad_norm": 1.4354486465454102, "learning_rate": 9.643157894736842e-05, "loss": 0.5478, "step": 33575 }, { "epoch": 1.880165752043902, "grad_norm": 1.3707231283187866, "learning_rate": 9.643131578947368e-05, "loss": 0.4741, "step": 33576 }, { "epoch": 1.880221749356031, "grad_norm": 1.2601408958435059, "learning_rate": 9.643105263157896e-05, "loss": 0.5409, "step": 33577 }, { "epoch": 1.88027774666816, "grad_norm": 1.170703411102295, "learning_rate": 9.643078947368422e-05, "loss": 0.5179, "step": 33578 }, { "epoch": 1.880333743980289, "grad_norm": 1.0930501222610474, "learning_rate": 9.643052631578948e-05, "loss": 0.3924, "step": 33579 }, { "epoch": 1.880389741292418, "grad_norm": 1.5169048309326172, "learning_rate": 9.643026315789474e-05, "loss": 0.478, "step": 33580 }, { "epoch": 1.880445738604547, "grad_norm": 1.275112509727478, "learning_rate": 9.643000000000001e-05, "loss": 0.4494, "step": 33581 }, { "epoch": 1.880501735916676, "grad_norm": 1.0645058155059814, "learning_rate": 9.642973684210527e-05, "loss": 0.3672, "step": 33582 }, { "epoch": 1.880557733228805, "grad_norm": 1.5216699838638306, "learning_rate": 9.642947368421053e-05, "loss": 0.406, "step": 33583 }, { "epoch": 1.880613730540934, "grad_norm": 1.1006428003311157, "learning_rate": 9.642921052631579e-05, "loss": 0.3145, "step": 33584 }, { "epoch": 1.880669727853063, "grad_norm": 1.1141917705535889, "learning_rate": 9.642894736842105e-05, "loss": 0.4025, "step": 33585 }, { "epoch": 1.880725725165192, "grad_norm": 1.3039439916610718, "learning_rate": 9.642868421052632e-05, "loss": 0.3697, "step": 33586 }, { "epoch": 1.8807817224773211, "grad_norm": 1.3562519550323486, "learning_rate": 9.642842105263158e-05, "loss": 0.4074, "step": 33587 }, { "epoch": 1.8808377197894501, "grad_norm": 1.293365240097046, "learning_rate": 9.642815789473684e-05, "loss": 0.4704, "step": 33588 }, { "epoch": 1.8808937171015792, "grad_norm": 1.014214277267456, "learning_rate": 9.64278947368421e-05, "loss": 0.3254, "step": 33589 }, { "epoch": 1.8809497144137082, "grad_norm": 1.2974371910095215, "learning_rate": 9.642763157894737e-05, "loss": 0.4649, "step": 33590 }, { "epoch": 1.8810057117258372, "grad_norm": 1.162210464477539, "learning_rate": 9.642736842105263e-05, "loss": 0.3937, "step": 33591 }, { "epoch": 1.8810617090379662, "grad_norm": 1.4351285696029663, "learning_rate": 9.642710526315791e-05, "loss": 0.3878, "step": 33592 }, { "epoch": 1.8811177063500952, "grad_norm": 1.082622766494751, "learning_rate": 9.642684210526315e-05, "loss": 0.3281, "step": 33593 }, { "epoch": 1.8811737036622243, "grad_norm": 1.0994406938552856, "learning_rate": 9.642657894736843e-05, "loss": 0.4361, "step": 33594 }, { "epoch": 1.8812297009743533, "grad_norm": 1.3685424327850342, "learning_rate": 9.642631578947369e-05, "loss": 0.3647, "step": 33595 }, { "epoch": 1.8812856982864823, "grad_norm": 1.3866658210754395, "learning_rate": 9.642605263157896e-05, "loss": 0.4928, "step": 33596 }, { "epoch": 1.8813416955986113, "grad_norm": 1.6045397520065308, "learning_rate": 9.642578947368422e-05, "loss": 0.439, "step": 33597 }, { "epoch": 1.8813976929107403, "grad_norm": 1.2860578298568726, "learning_rate": 9.642552631578948e-05, "loss": 0.4012, "step": 33598 }, { "epoch": 1.8814536902228693, "grad_norm": 1.388733983039856, "learning_rate": 9.642526315789474e-05, "loss": 0.6161, "step": 33599 }, { "epoch": 1.8815096875349984, "grad_norm": 1.3261902332305908, "learning_rate": 9.642500000000001e-05, "loss": 0.5005, "step": 33600 }, { "epoch": 1.8815656848471274, "grad_norm": 1.3303438425064087, "learning_rate": 9.642473684210527e-05, "loss": 0.3799, "step": 33601 }, { "epoch": 1.8816216821592564, "grad_norm": 1.154757022857666, "learning_rate": 9.642447368421053e-05, "loss": 0.4278, "step": 33602 }, { "epoch": 1.8816776794713854, "grad_norm": 1.2198487520217896, "learning_rate": 9.642421052631579e-05, "loss": 0.4519, "step": 33603 }, { "epoch": 1.8817336767835144, "grad_norm": 1.1428169012069702, "learning_rate": 9.642394736842105e-05, "loss": 0.4733, "step": 33604 }, { "epoch": 1.8817896740956435, "grad_norm": 1.6005162000656128, "learning_rate": 9.642368421052632e-05, "loss": 0.4505, "step": 33605 }, { "epoch": 1.8818456714077725, "grad_norm": 1.1878433227539062, "learning_rate": 9.642342105263158e-05, "loss": 0.3358, "step": 33606 }, { "epoch": 1.8819016687199015, "grad_norm": 1.1796928644180298, "learning_rate": 9.642315789473684e-05, "loss": 0.3803, "step": 33607 }, { "epoch": 1.8819576660320303, "grad_norm": 1.4113816022872925, "learning_rate": 9.64228947368421e-05, "loss": 0.3453, "step": 33608 }, { "epoch": 1.8820136633441593, "grad_norm": 1.5846142768859863, "learning_rate": 9.642263157894738e-05, "loss": 0.4481, "step": 33609 }, { "epoch": 1.8820696606562883, "grad_norm": 1.2397921085357666, "learning_rate": 9.642236842105264e-05, "loss": 0.3869, "step": 33610 }, { "epoch": 1.8821256579684174, "grad_norm": 1.4078173637390137, "learning_rate": 9.64221052631579e-05, "loss": 0.4009, "step": 33611 }, { "epoch": 1.8821816552805464, "grad_norm": 1.3057299852371216, "learning_rate": 9.642184210526316e-05, "loss": 0.3502, "step": 33612 }, { "epoch": 1.8822376525926754, "grad_norm": 1.337715983390808, "learning_rate": 9.642157894736843e-05, "loss": 0.374, "step": 33613 }, { "epoch": 1.8822936499048044, "grad_norm": 1.0337460041046143, "learning_rate": 9.642131578947369e-05, "loss": 0.332, "step": 33614 }, { "epoch": 1.8823496472169334, "grad_norm": 1.1945194005966187, "learning_rate": 9.642105263157896e-05, "loss": 0.4678, "step": 33615 }, { "epoch": 1.8824056445290624, "grad_norm": 1.33255934715271, "learning_rate": 9.642078947368421e-05, "loss": 0.6063, "step": 33616 }, { "epoch": 1.8824616418411915, "grad_norm": 1.1708885431289673, "learning_rate": 9.642052631578948e-05, "loss": 0.4045, "step": 33617 }, { "epoch": 1.8825176391533205, "grad_norm": 1.1798646450042725, "learning_rate": 9.642026315789474e-05, "loss": 0.4694, "step": 33618 }, { "epoch": 1.8825736364654495, "grad_norm": 1.2565056085586548, "learning_rate": 9.642e-05, "loss": 0.4368, "step": 33619 }, { "epoch": 1.8826296337775785, "grad_norm": 1.0882625579833984, "learning_rate": 9.641973684210526e-05, "loss": 0.3722, "step": 33620 }, { "epoch": 1.8826856310897075, "grad_norm": 1.31728994846344, "learning_rate": 9.641947368421052e-05, "loss": 0.4617, "step": 33621 }, { "epoch": 1.8827416284018366, "grad_norm": 1.18839693069458, "learning_rate": 9.64192105263158e-05, "loss": 0.5075, "step": 33622 }, { "epoch": 1.8827976257139656, "grad_norm": 1.2895548343658447, "learning_rate": 9.641894736842105e-05, "loss": 0.4276, "step": 33623 }, { "epoch": 1.8828536230260946, "grad_norm": 1.4911162853240967, "learning_rate": 9.641868421052633e-05, "loss": 0.445, "step": 33624 }, { "epoch": 1.8829096203382236, "grad_norm": 1.3225804567337036, "learning_rate": 9.641842105263157e-05, "loss": 0.5065, "step": 33625 }, { "epoch": 1.8829656176503526, "grad_norm": 1.3103877305984497, "learning_rate": 9.641815789473685e-05, "loss": 0.4557, "step": 33626 }, { "epoch": 1.8830216149624817, "grad_norm": 1.2465382814407349, "learning_rate": 9.64178947368421e-05, "loss": 0.3737, "step": 33627 }, { "epoch": 1.8830776122746107, "grad_norm": 1.0563464164733887, "learning_rate": 9.641763157894738e-05, "loss": 0.3558, "step": 33628 }, { "epoch": 1.8831336095867397, "grad_norm": 1.0182586908340454, "learning_rate": 9.641736842105264e-05, "loss": 0.3644, "step": 33629 }, { "epoch": 1.8831896068988687, "grad_norm": 1.4864789247512817, "learning_rate": 9.64171052631579e-05, "loss": 0.5125, "step": 33630 }, { "epoch": 1.8832456042109977, "grad_norm": 1.320773720741272, "learning_rate": 9.641684210526316e-05, "loss": 0.4455, "step": 33631 }, { "epoch": 1.8833016015231268, "grad_norm": 1.162069320678711, "learning_rate": 9.641657894736843e-05, "loss": 0.4008, "step": 33632 }, { "epoch": 1.8833575988352558, "grad_norm": 1.2817904949188232, "learning_rate": 9.641631578947369e-05, "loss": 0.5957, "step": 33633 }, { "epoch": 1.8834135961473848, "grad_norm": 1.4701032638549805, "learning_rate": 9.641605263157895e-05, "loss": 0.4714, "step": 33634 }, { "epoch": 1.8834695934595138, "grad_norm": 1.1117165088653564, "learning_rate": 9.641578947368421e-05, "loss": 0.3716, "step": 33635 }, { "epoch": 1.8835255907716428, "grad_norm": 1.3376896381378174, "learning_rate": 9.641552631578947e-05, "loss": 0.4571, "step": 33636 }, { "epoch": 1.8835815880837719, "grad_norm": 1.3433526754379272, "learning_rate": 9.641526315789474e-05, "loss": 0.492, "step": 33637 }, { "epoch": 1.8836375853959009, "grad_norm": 1.2727469205856323, "learning_rate": 9.6415e-05, "loss": 0.5312, "step": 33638 }, { "epoch": 1.88369358270803, "grad_norm": 1.3819561004638672, "learning_rate": 9.641473684210526e-05, "loss": 0.3681, "step": 33639 }, { "epoch": 1.883749580020159, "grad_norm": 3.575835943222046, "learning_rate": 9.641447368421052e-05, "loss": 0.7263, "step": 33640 }, { "epoch": 1.883805577332288, "grad_norm": 1.2515321969985962, "learning_rate": 9.64142105263158e-05, "loss": 0.3699, "step": 33641 }, { "epoch": 1.883861574644417, "grad_norm": 1.6168538331985474, "learning_rate": 9.641394736842106e-05, "loss": 0.5083, "step": 33642 }, { "epoch": 1.883917571956546, "grad_norm": 1.1859713792800903, "learning_rate": 9.641368421052632e-05, "loss": 0.4167, "step": 33643 }, { "epoch": 1.883973569268675, "grad_norm": 1.1871665716171265, "learning_rate": 9.641342105263158e-05, "loss": 0.4458, "step": 33644 }, { "epoch": 1.884029566580804, "grad_norm": 1.9021202325820923, "learning_rate": 9.641315789473685e-05, "loss": 0.6099, "step": 33645 }, { "epoch": 1.884085563892933, "grad_norm": 1.1225911378860474, "learning_rate": 9.641289473684211e-05, "loss": 0.4321, "step": 33646 }, { "epoch": 1.884141561205062, "grad_norm": 1.3472858667373657, "learning_rate": 9.641263157894738e-05, "loss": 0.4517, "step": 33647 }, { "epoch": 1.884197558517191, "grad_norm": 1.1004210710525513, "learning_rate": 9.641236842105263e-05, "loss": 0.392, "step": 33648 }, { "epoch": 1.88425355582932, "grad_norm": 1.0171610116958618, "learning_rate": 9.64121052631579e-05, "loss": 0.3757, "step": 33649 }, { "epoch": 1.884309553141449, "grad_norm": 1.1212135553359985, "learning_rate": 9.641184210526316e-05, "loss": 0.3261, "step": 33650 }, { "epoch": 1.8843655504535781, "grad_norm": 1.3765286207199097, "learning_rate": 9.641157894736844e-05, "loss": 0.4012, "step": 33651 }, { "epoch": 1.8844215477657071, "grad_norm": 1.331695556640625, "learning_rate": 9.64113157894737e-05, "loss": 0.4816, "step": 33652 }, { "epoch": 1.8844775450778362, "grad_norm": 1.4452427625656128, "learning_rate": 9.641105263157895e-05, "loss": 0.4796, "step": 33653 }, { "epoch": 1.8845335423899652, "grad_norm": 1.0920870304107666, "learning_rate": 9.641078947368421e-05, "loss": 0.3887, "step": 33654 }, { "epoch": 1.8845895397020942, "grad_norm": 1.507569432258606, "learning_rate": 9.641052631578947e-05, "loss": 0.5712, "step": 33655 }, { "epoch": 1.8846455370142232, "grad_norm": 1.39818274974823, "learning_rate": 9.641026315789475e-05, "loss": 0.5199, "step": 33656 }, { "epoch": 1.8847015343263522, "grad_norm": 1.2738715410232544, "learning_rate": 9.641000000000001e-05, "loss": 0.3823, "step": 33657 }, { "epoch": 1.8847575316384813, "grad_norm": 1.2792837619781494, "learning_rate": 9.640973684210527e-05, "loss": 0.5718, "step": 33658 }, { "epoch": 1.8848135289506103, "grad_norm": 1.134690284729004, "learning_rate": 9.640947368421053e-05, "loss": 0.4987, "step": 33659 }, { "epoch": 1.8848695262627393, "grad_norm": 1.654996395111084, "learning_rate": 9.64092105263158e-05, "loss": 0.506, "step": 33660 }, { "epoch": 1.8849255235748683, "grad_norm": 1.5433547496795654, "learning_rate": 9.640894736842106e-05, "loss": 0.4502, "step": 33661 }, { "epoch": 1.8849815208869973, "grad_norm": 1.2343541383743286, "learning_rate": 9.640868421052632e-05, "loss": 0.5119, "step": 33662 }, { "epoch": 1.8850375181991263, "grad_norm": 1.1132186651229858, "learning_rate": 9.640842105263158e-05, "loss": 0.4188, "step": 33663 }, { "epoch": 1.8850935155112554, "grad_norm": 1.0647146701812744, "learning_rate": 9.640815789473685e-05, "loss": 0.3496, "step": 33664 }, { "epoch": 1.8851495128233844, "grad_norm": 1.1932886838912964, "learning_rate": 9.640789473684211e-05, "loss": 0.3946, "step": 33665 }, { "epoch": 1.8852055101355134, "grad_norm": 1.2747304439544678, "learning_rate": 9.640763157894737e-05, "loss": 0.5523, "step": 33666 }, { "epoch": 1.8852615074476424, "grad_norm": 1.1313902139663696, "learning_rate": 9.640736842105263e-05, "loss": 0.3612, "step": 33667 }, { "epoch": 1.8853175047597714, "grad_norm": 1.4558825492858887, "learning_rate": 9.64071052631579e-05, "loss": 0.4322, "step": 33668 }, { "epoch": 1.8853735020719005, "grad_norm": 1.230788230895996, "learning_rate": 9.640684210526316e-05, "loss": 0.4886, "step": 33669 }, { "epoch": 1.8854294993840295, "grad_norm": 1.6095679998397827, "learning_rate": 9.640657894736844e-05, "loss": 0.5022, "step": 33670 }, { "epoch": 1.8854854966961585, "grad_norm": 1.2208847999572754, "learning_rate": 9.640631578947368e-05, "loss": 0.3561, "step": 33671 }, { "epoch": 1.8855414940082875, "grad_norm": 1.3562772274017334, "learning_rate": 9.640605263157894e-05, "loss": 0.5832, "step": 33672 }, { "epoch": 1.8855974913204165, "grad_norm": 1.2518420219421387, "learning_rate": 9.640578947368422e-05, "loss": 0.4103, "step": 33673 }, { "epoch": 1.8856534886325456, "grad_norm": 1.2258661985397339, "learning_rate": 9.640552631578948e-05, "loss": 0.3607, "step": 33674 }, { "epoch": 1.8857094859446746, "grad_norm": 1.1410584449768066, "learning_rate": 9.640526315789474e-05, "loss": 0.3617, "step": 33675 }, { "epoch": 1.8857654832568036, "grad_norm": 1.5106875896453857, "learning_rate": 9.6405e-05, "loss": 0.4099, "step": 33676 }, { "epoch": 1.8858214805689326, "grad_norm": 1.303658366203308, "learning_rate": 9.640473684210527e-05, "loss": 0.4013, "step": 33677 }, { "epoch": 1.8858774778810616, "grad_norm": 1.1303516626358032, "learning_rate": 9.640447368421053e-05, "loss": 0.4688, "step": 33678 }, { "epoch": 1.8859334751931907, "grad_norm": 1.2534388303756714, "learning_rate": 9.64042105263158e-05, "loss": 0.5761, "step": 33679 }, { "epoch": 1.8859894725053197, "grad_norm": 1.3551287651062012, "learning_rate": 9.640394736842105e-05, "loss": 0.5568, "step": 33680 }, { "epoch": 1.8860454698174487, "grad_norm": 1.0844429731369019, "learning_rate": 9.640368421052632e-05, "loss": 0.4228, "step": 33681 }, { "epoch": 1.8861014671295777, "grad_norm": 1.2325901985168457, "learning_rate": 9.640342105263158e-05, "loss": 0.3286, "step": 33682 }, { "epoch": 1.8861574644417067, "grad_norm": 1.2700951099395752, "learning_rate": 9.640315789473685e-05, "loss": 0.5235, "step": 33683 }, { "epoch": 1.8862134617538358, "grad_norm": 1.2671318054199219, "learning_rate": 9.640289473684211e-05, "loss": 0.3488, "step": 33684 }, { "epoch": 1.8862694590659648, "grad_norm": 1.3767714500427246, "learning_rate": 9.640263157894737e-05, "loss": 0.535, "step": 33685 }, { "epoch": 1.8863254563780938, "grad_norm": 1.3846298456192017, "learning_rate": 9.640236842105263e-05, "loss": 0.4825, "step": 33686 }, { "epoch": 1.8863814536902228, "grad_norm": 1.3426178693771362, "learning_rate": 9.640210526315791e-05, "loss": 0.4473, "step": 33687 }, { "epoch": 1.8864374510023518, "grad_norm": 1.4409888982772827, "learning_rate": 9.640184210526317e-05, "loss": 0.456, "step": 33688 }, { "epoch": 1.8864934483144808, "grad_norm": 1.2739231586456299, "learning_rate": 9.640157894736843e-05, "loss": 0.5298, "step": 33689 }, { "epoch": 1.8865494456266099, "grad_norm": 1.292602777481079, "learning_rate": 9.640131578947369e-05, "loss": 0.4456, "step": 33690 }, { "epoch": 1.8866054429387389, "grad_norm": 1.380937099456787, "learning_rate": 9.640105263157895e-05, "loss": 0.4619, "step": 33691 }, { "epoch": 1.886661440250868, "grad_norm": 1.3528653383255005, "learning_rate": 9.640078947368422e-05, "loss": 0.7742, "step": 33692 }, { "epoch": 1.886717437562997, "grad_norm": 1.2947343587875366, "learning_rate": 9.640052631578948e-05, "loss": 0.4531, "step": 33693 }, { "epoch": 1.886773434875126, "grad_norm": 1.711308240890503, "learning_rate": 9.640026315789474e-05, "loss": 0.5245, "step": 33694 }, { "epoch": 1.886829432187255, "grad_norm": 1.9468669891357422, "learning_rate": 9.64e-05, "loss": 0.4723, "step": 33695 }, { "epoch": 1.886885429499384, "grad_norm": 1.1978245973587036, "learning_rate": 9.639973684210527e-05, "loss": 0.4676, "step": 33696 }, { "epoch": 1.886941426811513, "grad_norm": 1.2457702159881592, "learning_rate": 9.639947368421053e-05, "loss": 0.4324, "step": 33697 }, { "epoch": 1.886997424123642, "grad_norm": 1.3889532089233398, "learning_rate": 9.639921052631579e-05, "loss": 0.4338, "step": 33698 }, { "epoch": 1.887053421435771, "grad_norm": 3.0156965255737305, "learning_rate": 9.639894736842105e-05, "loss": 0.407, "step": 33699 }, { "epoch": 1.8871094187479, "grad_norm": 2.5987277030944824, "learning_rate": 9.639868421052632e-05, "loss": 0.4327, "step": 33700 }, { "epoch": 1.887165416060029, "grad_norm": 1.2317811250686646, "learning_rate": 9.639842105263158e-05, "loss": 0.4153, "step": 33701 }, { "epoch": 1.887221413372158, "grad_norm": 1.1623668670654297, "learning_rate": 9.639815789473686e-05, "loss": 0.427, "step": 33702 }, { "epoch": 1.8872774106842871, "grad_norm": 1.4816343784332275, "learning_rate": 9.63978947368421e-05, "loss": 0.3947, "step": 33703 }, { "epoch": 1.8873334079964161, "grad_norm": 1.0645761489868164, "learning_rate": 9.639763157894738e-05, "loss": 0.4634, "step": 33704 }, { "epoch": 1.8873894053085452, "grad_norm": 1.100671648979187, "learning_rate": 9.639736842105264e-05, "loss": 0.3889, "step": 33705 }, { "epoch": 1.8874454026206742, "grad_norm": 1.3065826892852783, "learning_rate": 9.63971052631579e-05, "loss": 0.3958, "step": 33706 }, { "epoch": 1.8875013999328032, "grad_norm": 1.1387032270431519, "learning_rate": 9.639684210526317e-05, "loss": 0.3602, "step": 33707 }, { "epoch": 1.8875573972449322, "grad_norm": 1.325945496559143, "learning_rate": 9.639657894736842e-05, "loss": 0.5236, "step": 33708 }, { "epoch": 1.8876133945570612, "grad_norm": 2.0520529747009277, "learning_rate": 9.639631578947369e-05, "loss": 0.5497, "step": 33709 }, { "epoch": 1.8876693918691902, "grad_norm": 2.4582436084747314, "learning_rate": 9.639605263157895e-05, "loss": 0.526, "step": 33710 }, { "epoch": 1.8877253891813193, "grad_norm": 1.141645908355713, "learning_rate": 9.639578947368422e-05, "loss": 0.4367, "step": 33711 }, { "epoch": 1.8877813864934483, "grad_norm": 1.6599266529083252, "learning_rate": 9.639552631578948e-05, "loss": 0.5174, "step": 33712 }, { "epoch": 1.8878373838055773, "grad_norm": 1.2973520755767822, "learning_rate": 9.639526315789474e-05, "loss": 0.4233, "step": 33713 }, { "epoch": 1.8878933811177063, "grad_norm": 1.4130158424377441, "learning_rate": 9.6395e-05, "loss": 0.4095, "step": 33714 }, { "epoch": 1.8879493784298353, "grad_norm": 1.0638209581375122, "learning_rate": 9.639473684210527e-05, "loss": 0.3541, "step": 33715 }, { "epoch": 1.8880053757419644, "grad_norm": 1.392927885055542, "learning_rate": 9.639447368421053e-05, "loss": 0.4833, "step": 33716 }, { "epoch": 1.8880613730540934, "grad_norm": 1.2576876878738403, "learning_rate": 9.63942105263158e-05, "loss": 0.4412, "step": 33717 }, { "epoch": 1.8881173703662224, "grad_norm": 1.2127974033355713, "learning_rate": 9.639394736842105e-05, "loss": 0.4262, "step": 33718 }, { "epoch": 1.8881733676783514, "grad_norm": 1.4530575275421143, "learning_rate": 9.639368421052633e-05, "loss": 0.4753, "step": 33719 }, { "epoch": 1.8882293649904804, "grad_norm": 1.5028923749923706, "learning_rate": 9.639342105263159e-05, "loss": 0.4946, "step": 33720 }, { "epoch": 1.8882853623026095, "grad_norm": 1.3612123727798462, "learning_rate": 9.639315789473685e-05, "loss": 0.5098, "step": 33721 }, { "epoch": 1.8883413596147385, "grad_norm": 1.2808541059494019, "learning_rate": 9.63928947368421e-05, "loss": 0.3995, "step": 33722 }, { "epoch": 1.8883973569268675, "grad_norm": 1.3019863367080688, "learning_rate": 9.639263157894737e-05, "loss": 0.4485, "step": 33723 }, { "epoch": 1.8884533542389965, "grad_norm": 1.5253281593322754, "learning_rate": 9.639236842105264e-05, "loss": 0.3523, "step": 33724 }, { "epoch": 1.8885093515511255, "grad_norm": 1.2433851957321167, "learning_rate": 9.63921052631579e-05, "loss": 0.3545, "step": 33725 }, { "epoch": 1.8885653488632546, "grad_norm": 1.3459885120391846, "learning_rate": 9.639184210526316e-05, "loss": 0.3981, "step": 33726 }, { "epoch": 1.8886213461753836, "grad_norm": 1.536413550376892, "learning_rate": 9.639157894736842e-05, "loss": 0.5784, "step": 33727 }, { "epoch": 1.8886773434875126, "grad_norm": 1.5997157096862793, "learning_rate": 9.639131578947369e-05, "loss": 0.5754, "step": 33728 }, { "epoch": 1.8887333407996416, "grad_norm": 1.4149631261825562, "learning_rate": 9.639105263157895e-05, "loss": 0.4858, "step": 33729 }, { "epoch": 1.8887893381117706, "grad_norm": 1.2886008024215698, "learning_rate": 9.639078947368421e-05, "loss": 0.3614, "step": 33730 }, { "epoch": 1.8888453354238997, "grad_norm": 1.094068169593811, "learning_rate": 9.639052631578947e-05, "loss": 0.3469, "step": 33731 }, { "epoch": 1.8889013327360287, "grad_norm": 1.321854591369629, "learning_rate": 9.639026315789474e-05, "loss": 0.537, "step": 33732 }, { "epoch": 1.8889573300481577, "grad_norm": 1.2715879678726196, "learning_rate": 9.639e-05, "loss": 0.3926, "step": 33733 }, { "epoch": 1.8890133273602867, "grad_norm": 1.2562849521636963, "learning_rate": 9.638973684210528e-05, "loss": 0.5515, "step": 33734 }, { "epoch": 1.8890693246724157, "grad_norm": 1.1653449535369873, "learning_rate": 9.638947368421052e-05, "loss": 0.3282, "step": 33735 }, { "epoch": 1.8891253219845447, "grad_norm": 1.2798750400543213, "learning_rate": 9.63892105263158e-05, "loss": 0.375, "step": 33736 }, { "epoch": 1.8891813192966738, "grad_norm": 1.352647066116333, "learning_rate": 9.638894736842106e-05, "loss": 0.5331, "step": 33737 }, { "epoch": 1.8892373166088028, "grad_norm": 1.0662155151367188, "learning_rate": 9.638868421052633e-05, "loss": 0.3511, "step": 33738 }, { "epoch": 1.8892933139209318, "grad_norm": 1.7140498161315918, "learning_rate": 9.638842105263159e-05, "loss": 0.628, "step": 33739 }, { "epoch": 1.8893493112330608, "grad_norm": 1.380328893661499, "learning_rate": 9.638815789473684e-05, "loss": 0.3845, "step": 33740 }, { "epoch": 1.8894053085451898, "grad_norm": 1.2696951627731323, "learning_rate": 9.638789473684211e-05, "loss": 0.4262, "step": 33741 }, { "epoch": 1.8894613058573189, "grad_norm": 1.3804117441177368, "learning_rate": 9.638763157894737e-05, "loss": 0.4314, "step": 33742 }, { "epoch": 1.8895173031694479, "grad_norm": 1.4543256759643555, "learning_rate": 9.638736842105264e-05, "loss": 0.3924, "step": 33743 }, { "epoch": 1.889573300481577, "grad_norm": 1.3162803649902344, "learning_rate": 9.63871052631579e-05, "loss": 0.4269, "step": 33744 }, { "epoch": 1.889629297793706, "grad_norm": 1.6088993549346924, "learning_rate": 9.638684210526316e-05, "loss": 0.4083, "step": 33745 }, { "epoch": 1.889685295105835, "grad_norm": 1.1304014921188354, "learning_rate": 9.638657894736842e-05, "loss": 0.3764, "step": 33746 }, { "epoch": 1.889741292417964, "grad_norm": 1.3694664239883423, "learning_rate": 9.63863157894737e-05, "loss": 0.5474, "step": 33747 }, { "epoch": 1.889797289730093, "grad_norm": 1.7699271440505981, "learning_rate": 9.638605263157895e-05, "loss": 0.6235, "step": 33748 }, { "epoch": 1.889853287042222, "grad_norm": 1.3306398391723633, "learning_rate": 9.638578947368421e-05, "loss": 0.5235, "step": 33749 }, { "epoch": 1.889909284354351, "grad_norm": 1.003732442855835, "learning_rate": 9.638552631578947e-05, "loss": 0.413, "step": 33750 }, { "epoch": 1.88996528166648, "grad_norm": 1.5121312141418457, "learning_rate": 9.638526315789475e-05, "loss": 0.5089, "step": 33751 }, { "epoch": 1.890021278978609, "grad_norm": 1.380193829536438, "learning_rate": 9.6385e-05, "loss": 0.4384, "step": 33752 }, { "epoch": 1.890077276290738, "grad_norm": 1.448320746421814, "learning_rate": 9.638473684210527e-05, "loss": 0.4362, "step": 33753 }, { "epoch": 1.890133273602867, "grad_norm": 1.176749348640442, "learning_rate": 9.638447368421053e-05, "loss": 0.4464, "step": 33754 }, { "epoch": 1.8901892709149961, "grad_norm": 1.3431552648544312, "learning_rate": 9.63842105263158e-05, "loss": 0.4593, "step": 33755 }, { "epoch": 1.8902452682271251, "grad_norm": 1.3157732486724854, "learning_rate": 9.638394736842106e-05, "loss": 0.5824, "step": 33756 }, { "epoch": 1.8903012655392541, "grad_norm": 1.3365122079849243, "learning_rate": 9.638368421052632e-05, "loss": 0.4323, "step": 33757 }, { "epoch": 1.8903572628513832, "grad_norm": 1.0848917961120605, "learning_rate": 9.638342105263158e-05, "loss": 0.4097, "step": 33758 }, { "epoch": 1.8904132601635122, "grad_norm": 1.2415964603424072, "learning_rate": 9.638315789473684e-05, "loss": 0.5244, "step": 33759 }, { "epoch": 1.8904692574756412, "grad_norm": 1.2789194583892822, "learning_rate": 9.638289473684211e-05, "loss": 0.4942, "step": 33760 }, { "epoch": 1.8905252547877702, "grad_norm": 1.1298048496246338, "learning_rate": 9.638263157894737e-05, "loss": 0.3647, "step": 33761 }, { "epoch": 1.8905812520998992, "grad_norm": 1.3419326543807983, "learning_rate": 9.638236842105264e-05, "loss": 0.4633, "step": 33762 }, { "epoch": 1.8906372494120283, "grad_norm": 1.2517443895339966, "learning_rate": 9.638210526315789e-05, "loss": 0.3826, "step": 33763 }, { "epoch": 1.8906932467241573, "grad_norm": 1.4213787317276, "learning_rate": 9.638184210526316e-05, "loss": 0.4486, "step": 33764 }, { "epoch": 1.8907492440362863, "grad_norm": 1.257437825202942, "learning_rate": 9.638157894736842e-05, "loss": 0.4895, "step": 33765 }, { "epoch": 1.8908052413484153, "grad_norm": 1.1475830078125, "learning_rate": 9.63813157894737e-05, "loss": 0.4839, "step": 33766 }, { "epoch": 1.8908612386605443, "grad_norm": 1.2176473140716553, "learning_rate": 9.638105263157894e-05, "loss": 0.4739, "step": 33767 }, { "epoch": 1.8909172359726734, "grad_norm": 1.2745640277862549, "learning_rate": 9.638078947368422e-05, "loss": 0.4743, "step": 33768 }, { "epoch": 1.8909732332848024, "grad_norm": 1.2797621488571167, "learning_rate": 9.638052631578948e-05, "loss": 0.5023, "step": 33769 }, { "epoch": 1.8910292305969314, "grad_norm": 1.512215495109558, "learning_rate": 9.638026315789475e-05, "loss": 0.5836, "step": 33770 }, { "epoch": 1.8910852279090604, "grad_norm": 1.1744507551193237, "learning_rate": 9.638000000000001e-05, "loss": 0.4569, "step": 33771 }, { "epoch": 1.8911412252211894, "grad_norm": 1.1797175407409668, "learning_rate": 9.637973684210527e-05, "loss": 0.4027, "step": 33772 }, { "epoch": 1.8911972225333185, "grad_norm": 1.2333705425262451, "learning_rate": 9.637947368421053e-05, "loss": 0.3951, "step": 33773 }, { "epoch": 1.8912532198454475, "grad_norm": 1.27533757686615, "learning_rate": 9.63792105263158e-05, "loss": 0.5331, "step": 33774 }, { "epoch": 1.8913092171575765, "grad_norm": 1.1606650352478027, "learning_rate": 9.637894736842106e-05, "loss": 0.3501, "step": 33775 }, { "epoch": 1.8913652144697055, "grad_norm": 1.5292431116104126, "learning_rate": 9.637868421052632e-05, "loss": 0.494, "step": 33776 }, { "epoch": 1.8914212117818345, "grad_norm": 1.5637319087982178, "learning_rate": 9.637842105263158e-05, "loss": 0.4728, "step": 33777 }, { "epoch": 1.8914772090939636, "grad_norm": 1.486598253250122, "learning_rate": 9.637815789473684e-05, "loss": 0.48, "step": 33778 }, { "epoch": 1.8915332064060926, "grad_norm": 1.3211822509765625, "learning_rate": 9.637789473684211e-05, "loss": 0.4402, "step": 33779 }, { "epoch": 1.8915892037182216, "grad_norm": 1.488054871559143, "learning_rate": 9.637763157894737e-05, "loss": 0.4652, "step": 33780 }, { "epoch": 1.8916452010303506, "grad_norm": 2.684025764465332, "learning_rate": 9.637736842105263e-05, "loss": 0.4414, "step": 33781 }, { "epoch": 1.8917011983424796, "grad_norm": 1.5047484636306763, "learning_rate": 9.637710526315789e-05, "loss": 0.629, "step": 33782 }, { "epoch": 1.8917571956546086, "grad_norm": 1.1654138565063477, "learning_rate": 9.637684210526317e-05, "loss": 0.4215, "step": 33783 }, { "epoch": 1.8918131929667377, "grad_norm": 1.5539867877960205, "learning_rate": 9.637657894736843e-05, "loss": 0.4363, "step": 33784 }, { "epoch": 1.8918691902788667, "grad_norm": 1.1967475414276123, "learning_rate": 9.637631578947369e-05, "loss": 0.4965, "step": 33785 }, { "epoch": 1.8919251875909957, "grad_norm": 1.3217979669570923, "learning_rate": 9.637605263157895e-05, "loss": 0.4689, "step": 33786 }, { "epoch": 1.8919811849031247, "grad_norm": 1.4112365245819092, "learning_rate": 9.637578947368422e-05, "loss": 0.411, "step": 33787 }, { "epoch": 1.8920371822152537, "grad_norm": 1.2487647533416748, "learning_rate": 9.637552631578948e-05, "loss": 0.4502, "step": 33788 }, { "epoch": 1.8920931795273828, "grad_norm": 1.1966150999069214, "learning_rate": 9.637526315789475e-05, "loss": 0.4188, "step": 33789 }, { "epoch": 1.8921491768395118, "grad_norm": 1.0316742658615112, "learning_rate": 9.6375e-05, "loss": 0.3524, "step": 33790 }, { "epoch": 1.8922051741516408, "grad_norm": 1.2423183917999268, "learning_rate": 9.637473684210527e-05, "loss": 0.3844, "step": 33791 }, { "epoch": 1.8922611714637698, "grad_norm": 1.5044188499450684, "learning_rate": 9.637447368421053e-05, "loss": 0.4557, "step": 33792 }, { "epoch": 1.8923171687758988, "grad_norm": 1.5450776815414429, "learning_rate": 9.637421052631579e-05, "loss": 0.5864, "step": 33793 }, { "epoch": 1.8923731660880279, "grad_norm": 1.2004612684249878, "learning_rate": 9.637394736842106e-05, "loss": 0.4908, "step": 33794 }, { "epoch": 1.8924291634001569, "grad_norm": 1.4424163103103638, "learning_rate": 9.637368421052631e-05, "loss": 0.4344, "step": 33795 }, { "epoch": 1.892485160712286, "grad_norm": 1.1905769109725952, "learning_rate": 9.637342105263158e-05, "loss": 0.3851, "step": 33796 }, { "epoch": 1.892541158024415, "grad_norm": 1.1438097953796387, "learning_rate": 9.637315789473684e-05, "loss": 0.5024, "step": 33797 }, { "epoch": 1.892597155336544, "grad_norm": 1.3093501329421997, "learning_rate": 9.637289473684212e-05, "loss": 0.5167, "step": 33798 }, { "epoch": 1.892653152648673, "grad_norm": 1.4427118301391602, "learning_rate": 9.637263157894738e-05, "loss": 0.4535, "step": 33799 }, { "epoch": 1.892709149960802, "grad_norm": 1.4023902416229248, "learning_rate": 9.637236842105264e-05, "loss": 0.4296, "step": 33800 }, { "epoch": 1.892765147272931, "grad_norm": 1.0676928758621216, "learning_rate": 9.63721052631579e-05, "loss": 0.4416, "step": 33801 }, { "epoch": 1.89282114458506, "grad_norm": 1.659006118774414, "learning_rate": 9.637184210526317e-05, "loss": 0.3841, "step": 33802 }, { "epoch": 1.892877141897189, "grad_norm": 1.1680701971054077, "learning_rate": 9.637157894736843e-05, "loss": 0.3974, "step": 33803 }, { "epoch": 1.892933139209318, "grad_norm": 1.1408295631408691, "learning_rate": 9.637131578947369e-05, "loss": 0.5063, "step": 33804 }, { "epoch": 1.892989136521447, "grad_norm": 1.110579490661621, "learning_rate": 9.637105263157895e-05, "loss": 0.4753, "step": 33805 }, { "epoch": 1.893045133833576, "grad_norm": 1.7396602630615234, "learning_rate": 9.637078947368422e-05, "loss": 0.5928, "step": 33806 }, { "epoch": 1.893101131145705, "grad_norm": 3.463256597518921, "learning_rate": 9.637052631578948e-05, "loss": 0.6267, "step": 33807 }, { "epoch": 1.8931571284578341, "grad_norm": 1.2343287467956543, "learning_rate": 9.637026315789474e-05, "loss": 0.5322, "step": 33808 }, { "epoch": 1.8932131257699631, "grad_norm": 1.302688717842102, "learning_rate": 9.637e-05, "loss": 0.5278, "step": 33809 }, { "epoch": 1.8932691230820922, "grad_norm": 1.4164820909500122, "learning_rate": 9.636973684210526e-05, "loss": 0.5605, "step": 33810 }, { "epoch": 1.8933251203942212, "grad_norm": 1.3477282524108887, "learning_rate": 9.636947368421053e-05, "loss": 0.5601, "step": 33811 }, { "epoch": 1.8933811177063502, "grad_norm": 1.483593463897705, "learning_rate": 9.63692105263158e-05, "loss": 0.5226, "step": 33812 }, { "epoch": 1.8934371150184792, "grad_norm": 1.3437167406082153, "learning_rate": 9.636894736842105e-05, "loss": 0.4311, "step": 33813 }, { "epoch": 1.8934931123306082, "grad_norm": 1.099973440170288, "learning_rate": 9.636868421052631e-05, "loss": 0.3685, "step": 33814 }, { "epoch": 1.8935491096427373, "grad_norm": 1.405934453010559, "learning_rate": 9.636842105263159e-05, "loss": 0.4477, "step": 33815 }, { "epoch": 1.8936051069548663, "grad_norm": 1.0704610347747803, "learning_rate": 9.636815789473685e-05, "loss": 0.3752, "step": 33816 }, { "epoch": 1.8936611042669953, "grad_norm": 1.0581151247024536, "learning_rate": 9.636789473684212e-05, "loss": 0.2809, "step": 33817 }, { "epoch": 1.8937171015791243, "grad_norm": 1.4210178852081299, "learning_rate": 9.636763157894737e-05, "loss": 0.4863, "step": 33818 }, { "epoch": 1.8937730988912533, "grad_norm": 1.2307201623916626, "learning_rate": 9.636736842105264e-05, "loss": 0.4447, "step": 33819 }, { "epoch": 1.8938290962033824, "grad_norm": 1.2953604459762573, "learning_rate": 9.63671052631579e-05, "loss": 0.3506, "step": 33820 }, { "epoch": 1.8938850935155114, "grad_norm": 1.4039552211761475, "learning_rate": 9.636684210526317e-05, "loss": 0.4966, "step": 33821 }, { "epoch": 1.8939410908276404, "grad_norm": 1.3947612047195435, "learning_rate": 9.636657894736842e-05, "loss": 0.6898, "step": 33822 }, { "epoch": 1.8939970881397694, "grad_norm": 1.1440707445144653, "learning_rate": 9.636631578947369e-05, "loss": 0.3723, "step": 33823 }, { "epoch": 1.8940530854518984, "grad_norm": 1.3068008422851562, "learning_rate": 9.636605263157895e-05, "loss": 0.4593, "step": 33824 }, { "epoch": 1.8941090827640275, "grad_norm": 1.2232640981674194, "learning_rate": 9.636578947368422e-05, "loss": 0.4409, "step": 33825 }, { "epoch": 1.8941650800761565, "grad_norm": 5.0967278480529785, "learning_rate": 9.636552631578948e-05, "loss": 0.6335, "step": 33826 }, { "epoch": 1.8942210773882855, "grad_norm": 1.1405287981033325, "learning_rate": 9.636526315789473e-05, "loss": 0.5729, "step": 33827 }, { "epoch": 1.8942770747004145, "grad_norm": 1.2232614755630493, "learning_rate": 9.6365e-05, "loss": 0.4041, "step": 33828 }, { "epoch": 1.8943330720125435, "grad_norm": 1.1981899738311768, "learning_rate": 9.636473684210526e-05, "loss": 0.4102, "step": 33829 }, { "epoch": 1.8943890693246725, "grad_norm": 1.337570309638977, "learning_rate": 9.636447368421054e-05, "loss": 0.5812, "step": 33830 }, { "epoch": 1.8944450666368016, "grad_norm": 1.2408257722854614, "learning_rate": 9.63642105263158e-05, "loss": 0.4288, "step": 33831 }, { "epoch": 1.8945010639489306, "grad_norm": 1.336145281791687, "learning_rate": 9.636394736842106e-05, "loss": 0.3697, "step": 33832 }, { "epoch": 1.8945570612610596, "grad_norm": 1.1942615509033203, "learning_rate": 9.636368421052632e-05, "loss": 0.4404, "step": 33833 }, { "epoch": 1.8946130585731886, "grad_norm": 1.264601230621338, "learning_rate": 9.636342105263159e-05, "loss": 0.4135, "step": 33834 }, { "epoch": 1.8946690558853176, "grad_norm": 1.4116460084915161, "learning_rate": 9.636315789473685e-05, "loss": 0.5765, "step": 33835 }, { "epoch": 1.8947250531974467, "grad_norm": 1.257911205291748, "learning_rate": 9.636289473684211e-05, "loss": 0.3804, "step": 33836 }, { "epoch": 1.8947810505095757, "grad_norm": 1.2506638765335083, "learning_rate": 9.636263157894737e-05, "loss": 0.5428, "step": 33837 }, { "epoch": 1.8948370478217047, "grad_norm": 1.0231753587722778, "learning_rate": 9.636236842105264e-05, "loss": 0.3928, "step": 33838 }, { "epoch": 1.8948930451338337, "grad_norm": 1.066957950592041, "learning_rate": 9.63621052631579e-05, "loss": 0.3921, "step": 33839 }, { "epoch": 1.8949490424459627, "grad_norm": 1.5812498331069946, "learning_rate": 9.636184210526316e-05, "loss": 0.5301, "step": 33840 }, { "epoch": 1.8950050397580918, "grad_norm": 1.1676290035247803, "learning_rate": 9.636157894736842e-05, "loss": 0.3537, "step": 33841 }, { "epoch": 1.8950610370702208, "grad_norm": 1.1587707996368408, "learning_rate": 9.63613157894737e-05, "loss": 0.4013, "step": 33842 }, { "epoch": 1.8951170343823498, "grad_norm": 1.092280626296997, "learning_rate": 9.636105263157895e-05, "loss": 0.3923, "step": 33843 }, { "epoch": 1.8951730316944788, "grad_norm": 1.4090932607650757, "learning_rate": 9.636078947368421e-05, "loss": 0.5199, "step": 33844 }, { "epoch": 1.8952290290066078, "grad_norm": 1.1344330310821533, "learning_rate": 9.636052631578947e-05, "loss": 0.367, "step": 33845 }, { "epoch": 1.8952850263187369, "grad_norm": 1.2157957553863525, "learning_rate": 9.636026315789473e-05, "loss": 0.4706, "step": 33846 }, { "epoch": 1.8953410236308659, "grad_norm": 1.0233741998672485, "learning_rate": 9.636e-05, "loss": 0.3999, "step": 33847 }, { "epoch": 1.895397020942995, "grad_norm": 1.2454618215560913, "learning_rate": 9.635973684210527e-05, "loss": 0.4491, "step": 33848 }, { "epoch": 1.895453018255124, "grad_norm": 1.4090995788574219, "learning_rate": 9.635947368421054e-05, "loss": 0.5261, "step": 33849 }, { "epoch": 1.895509015567253, "grad_norm": 1.2223213911056519, "learning_rate": 9.635921052631578e-05, "loss": 0.3924, "step": 33850 }, { "epoch": 1.895565012879382, "grad_norm": 1.1465595960617065, "learning_rate": 9.635894736842106e-05, "loss": 0.4231, "step": 33851 }, { "epoch": 1.895621010191511, "grad_norm": 1.3092809915542603, "learning_rate": 9.635868421052632e-05, "loss": 0.4809, "step": 33852 }, { "epoch": 1.89567700750364, "grad_norm": 1.2736303806304932, "learning_rate": 9.635842105263159e-05, "loss": 0.3895, "step": 33853 }, { "epoch": 1.895733004815769, "grad_norm": 1.46797513961792, "learning_rate": 9.635815789473685e-05, "loss": 0.6356, "step": 33854 }, { "epoch": 1.895789002127898, "grad_norm": 13.827521324157715, "learning_rate": 9.635789473684211e-05, "loss": 0.5151, "step": 33855 }, { "epoch": 1.8958449994400268, "grad_norm": 1.1565792560577393, "learning_rate": 9.635763157894737e-05, "loss": 0.3499, "step": 33856 }, { "epoch": 1.8959009967521558, "grad_norm": 1.4003320932388306, "learning_rate": 9.635736842105264e-05, "loss": 0.5926, "step": 33857 }, { "epoch": 1.8959569940642849, "grad_norm": 1.3092973232269287, "learning_rate": 9.63571052631579e-05, "loss": 0.4114, "step": 33858 }, { "epoch": 1.8960129913764139, "grad_norm": 1.2511399984359741, "learning_rate": 9.635684210526316e-05, "loss": 0.404, "step": 33859 }, { "epoch": 1.896068988688543, "grad_norm": 1.4181357622146606, "learning_rate": 9.635657894736842e-05, "loss": 0.4621, "step": 33860 }, { "epoch": 1.896124986000672, "grad_norm": 1.2863298654556274, "learning_rate": 9.635631578947368e-05, "loss": 0.4247, "step": 33861 }, { "epoch": 1.896180983312801, "grad_norm": 1.435805320739746, "learning_rate": 9.635605263157896e-05, "loss": 0.6558, "step": 33862 }, { "epoch": 1.89623698062493, "grad_norm": 1.1426249742507935, "learning_rate": 9.635578947368422e-05, "loss": 0.4401, "step": 33863 }, { "epoch": 1.896292977937059, "grad_norm": 1.1203047037124634, "learning_rate": 9.635552631578948e-05, "loss": 0.4032, "step": 33864 }, { "epoch": 1.896348975249188, "grad_norm": 1.984034776687622, "learning_rate": 9.635526315789474e-05, "loss": 0.5787, "step": 33865 }, { "epoch": 1.896404972561317, "grad_norm": 1.1878093481063843, "learning_rate": 9.635500000000001e-05, "loss": 0.5409, "step": 33866 }, { "epoch": 1.896460969873446, "grad_norm": 1.182365894317627, "learning_rate": 9.635473684210527e-05, "loss": 0.4109, "step": 33867 }, { "epoch": 1.896516967185575, "grad_norm": 1.4006723165512085, "learning_rate": 9.635447368421053e-05, "loss": 0.5323, "step": 33868 }, { "epoch": 1.896572964497704, "grad_norm": 1.240382432937622, "learning_rate": 9.635421052631579e-05, "loss": 0.3969, "step": 33869 }, { "epoch": 1.896628961809833, "grad_norm": 1.2382484674453735, "learning_rate": 9.635394736842106e-05, "loss": 0.4247, "step": 33870 }, { "epoch": 1.896684959121962, "grad_norm": 1.303580641746521, "learning_rate": 9.635368421052632e-05, "loss": 0.5061, "step": 33871 }, { "epoch": 1.8967409564340911, "grad_norm": 1.0175176858901978, "learning_rate": 9.63534210526316e-05, "loss": 0.4051, "step": 33872 }, { "epoch": 1.8967969537462201, "grad_norm": 1.1349375247955322, "learning_rate": 9.635315789473684e-05, "loss": 0.3357, "step": 33873 }, { "epoch": 1.8968529510583492, "grad_norm": 1.1460795402526855, "learning_rate": 9.635289473684211e-05, "loss": 0.5061, "step": 33874 }, { "epoch": 1.8969089483704782, "grad_norm": 1.1077089309692383, "learning_rate": 9.635263157894737e-05, "loss": 0.4761, "step": 33875 }, { "epoch": 1.8969649456826072, "grad_norm": 1.2439892292022705, "learning_rate": 9.635236842105265e-05, "loss": 0.6103, "step": 33876 }, { "epoch": 1.8970209429947362, "grad_norm": 1.1198794841766357, "learning_rate": 9.635210526315789e-05, "loss": 0.377, "step": 33877 }, { "epoch": 1.8970769403068652, "grad_norm": 1.1713459491729736, "learning_rate": 9.635184210526315e-05, "loss": 0.356, "step": 33878 }, { "epoch": 1.8971329376189943, "grad_norm": 1.2828829288482666, "learning_rate": 9.635157894736843e-05, "loss": 0.4217, "step": 33879 }, { "epoch": 1.8971889349311233, "grad_norm": 1.3622703552246094, "learning_rate": 9.635131578947369e-05, "loss": 0.4352, "step": 33880 }, { "epoch": 1.8972449322432523, "grad_norm": 1.1960508823394775, "learning_rate": 9.635105263157896e-05, "loss": 0.3662, "step": 33881 }, { "epoch": 1.8973009295553813, "grad_norm": 1.1093484163284302, "learning_rate": 9.63507894736842e-05, "loss": 0.4225, "step": 33882 }, { "epoch": 1.8973569268675103, "grad_norm": 1.1489733457565308, "learning_rate": 9.635052631578948e-05, "loss": 0.3717, "step": 33883 }, { "epoch": 1.8974129241796394, "grad_norm": 1.3215217590332031, "learning_rate": 9.635026315789474e-05, "loss": 0.5286, "step": 33884 }, { "epoch": 1.8974689214917684, "grad_norm": 1.3089959621429443, "learning_rate": 9.635000000000001e-05, "loss": 0.5382, "step": 33885 }, { "epoch": 1.8975249188038974, "grad_norm": 1.2140651941299438, "learning_rate": 9.634973684210527e-05, "loss": 0.4458, "step": 33886 }, { "epoch": 1.8975809161160264, "grad_norm": 1.1865999698638916, "learning_rate": 9.634947368421053e-05, "loss": 0.4096, "step": 33887 }, { "epoch": 1.8976369134281554, "grad_norm": 1.132714867591858, "learning_rate": 9.634921052631579e-05, "loss": 0.4003, "step": 33888 }, { "epoch": 1.8976929107402845, "grad_norm": 1.1732120513916016, "learning_rate": 9.634894736842106e-05, "loss": 0.3859, "step": 33889 }, { "epoch": 1.8977489080524135, "grad_norm": 1.196721076965332, "learning_rate": 9.634868421052632e-05, "loss": 0.4944, "step": 33890 }, { "epoch": 1.8978049053645425, "grad_norm": 2.0257256031036377, "learning_rate": 9.634842105263158e-05, "loss": 0.3944, "step": 33891 }, { "epoch": 1.8978609026766715, "grad_norm": 1.5345579385757446, "learning_rate": 9.634815789473684e-05, "loss": 0.4686, "step": 33892 }, { "epoch": 1.8979168999888005, "grad_norm": 1.37324857711792, "learning_rate": 9.634789473684212e-05, "loss": 0.3133, "step": 33893 }, { "epoch": 1.8979728973009296, "grad_norm": 1.2435295581817627, "learning_rate": 9.634763157894738e-05, "loss": 0.421, "step": 33894 }, { "epoch": 1.8980288946130586, "grad_norm": 1.1709814071655273, "learning_rate": 9.634736842105264e-05, "loss": 0.3355, "step": 33895 }, { "epoch": 1.8980848919251876, "grad_norm": 1.3459051847457886, "learning_rate": 9.63471052631579e-05, "loss": 0.432, "step": 33896 }, { "epoch": 1.8981408892373166, "grad_norm": 1.2831532955169678, "learning_rate": 9.634684210526315e-05, "loss": 0.5152, "step": 33897 }, { "epoch": 1.8981968865494456, "grad_norm": 2.883971691131592, "learning_rate": 9.634657894736843e-05, "loss": 0.3491, "step": 33898 }, { "epoch": 1.8982528838615746, "grad_norm": 1.1753042936325073, "learning_rate": 9.634631578947369e-05, "loss": 0.508, "step": 33899 }, { "epoch": 1.8983088811737037, "grad_norm": 1.3692599534988403, "learning_rate": 9.634605263157895e-05, "loss": 0.4621, "step": 33900 }, { "epoch": 1.8983648784858327, "grad_norm": 1.031097650527954, "learning_rate": 9.634578947368421e-05, "loss": 0.4122, "step": 33901 }, { "epoch": 1.8984208757979617, "grad_norm": 1.2313507795333862, "learning_rate": 9.634552631578948e-05, "loss": 0.3716, "step": 33902 }, { "epoch": 1.8984768731100907, "grad_norm": 1.2132445573806763, "learning_rate": 9.634526315789474e-05, "loss": 0.5237, "step": 33903 }, { "epoch": 1.8985328704222197, "grad_norm": 1.228338599205017, "learning_rate": 9.634500000000001e-05, "loss": 0.44, "step": 33904 }, { "epoch": 1.8985888677343488, "grad_norm": 1.3325189352035522, "learning_rate": 9.634473684210526e-05, "loss": 0.4337, "step": 33905 }, { "epoch": 1.8986448650464778, "grad_norm": 1.539389967918396, "learning_rate": 9.634447368421053e-05, "loss": 0.5252, "step": 33906 }, { "epoch": 1.8987008623586068, "grad_norm": 1.1752235889434814, "learning_rate": 9.634421052631579e-05, "loss": 0.4842, "step": 33907 }, { "epoch": 1.8987568596707358, "grad_norm": 1.2581361532211304, "learning_rate": 9.634394736842107e-05, "loss": 0.4375, "step": 33908 }, { "epoch": 1.8988128569828648, "grad_norm": 1.5986545085906982, "learning_rate": 9.634368421052633e-05, "loss": 0.6611, "step": 33909 }, { "epoch": 1.8988688542949939, "grad_norm": 1.125227689743042, "learning_rate": 9.634342105263159e-05, "loss": 0.4173, "step": 33910 }, { "epoch": 1.8989248516071229, "grad_norm": 1.1550233364105225, "learning_rate": 9.634315789473685e-05, "loss": 0.4341, "step": 33911 }, { "epoch": 1.898980848919252, "grad_norm": 0.9847545027732849, "learning_rate": 9.634289473684212e-05, "loss": 0.3214, "step": 33912 }, { "epoch": 1.899036846231381, "grad_norm": 1.723526954650879, "learning_rate": 9.634263157894738e-05, "loss": 0.4387, "step": 33913 }, { "epoch": 1.89909284354351, "grad_norm": 1.1564288139343262, "learning_rate": 9.634236842105264e-05, "loss": 0.3953, "step": 33914 }, { "epoch": 1.899148840855639, "grad_norm": 1.2269929647445679, "learning_rate": 9.63421052631579e-05, "loss": 0.4532, "step": 33915 }, { "epoch": 1.899204838167768, "grad_norm": 1.3798385858535767, "learning_rate": 9.634184210526316e-05, "loss": 0.4787, "step": 33916 }, { "epoch": 1.899260835479897, "grad_norm": 1.1531494855880737, "learning_rate": 9.634157894736843e-05, "loss": 0.5046, "step": 33917 }, { "epoch": 1.899316832792026, "grad_norm": 1.664993405342102, "learning_rate": 9.634131578947369e-05, "loss": 0.684, "step": 33918 }, { "epoch": 1.899372830104155, "grad_norm": 1.21270751953125, "learning_rate": 9.634105263157895e-05, "loss": 0.3913, "step": 33919 }, { "epoch": 1.899428827416284, "grad_norm": 1.0564695596694946, "learning_rate": 9.634078947368421e-05, "loss": 0.3431, "step": 33920 }, { "epoch": 1.899484824728413, "grad_norm": 1.189333200454712, "learning_rate": 9.634052631578948e-05, "loss": 0.4347, "step": 33921 }, { "epoch": 1.899540822040542, "grad_norm": 1.3843694925308228, "learning_rate": 9.634026315789474e-05, "loss": 0.4178, "step": 33922 }, { "epoch": 1.899596819352671, "grad_norm": 1.4496698379516602, "learning_rate": 9.634e-05, "loss": 0.5207, "step": 33923 }, { "epoch": 1.8996528166648001, "grad_norm": 1.14940345287323, "learning_rate": 9.633973684210526e-05, "loss": 0.3536, "step": 33924 }, { "epoch": 1.8997088139769291, "grad_norm": 1.3769302368164062, "learning_rate": 9.633947368421054e-05, "loss": 0.641, "step": 33925 }, { "epoch": 1.8997648112890582, "grad_norm": 1.2607393264770508, "learning_rate": 9.63392105263158e-05, "loss": 0.4933, "step": 33926 }, { "epoch": 1.8998208086011872, "grad_norm": 1.3049705028533936, "learning_rate": 9.633894736842107e-05, "loss": 0.4616, "step": 33927 }, { "epoch": 1.8998768059133162, "grad_norm": 1.2081362009048462, "learning_rate": 9.633868421052631e-05, "loss": 0.4235, "step": 33928 }, { "epoch": 1.8999328032254452, "grad_norm": 1.797351598739624, "learning_rate": 9.633842105263159e-05, "loss": 0.5012, "step": 33929 }, { "epoch": 1.8999888005375742, "grad_norm": 1.1874183416366577, "learning_rate": 9.633815789473685e-05, "loss": 0.4893, "step": 33930 }, { "epoch": 1.9000447978497033, "grad_norm": 1.5537874698638916, "learning_rate": 9.633789473684211e-05, "loss": 0.553, "step": 33931 }, { "epoch": 1.9001007951618323, "grad_norm": 4.517096996307373, "learning_rate": 9.633763157894737e-05, "loss": 0.4328, "step": 33932 }, { "epoch": 1.9001567924739613, "grad_norm": 1.2272001504898071, "learning_rate": 9.633736842105263e-05, "loss": 0.4294, "step": 33933 }, { "epoch": 1.9002127897860903, "grad_norm": 1.1718171834945679, "learning_rate": 9.63371052631579e-05, "loss": 0.3851, "step": 33934 }, { "epoch": 1.9002687870982193, "grad_norm": 1.3043668270111084, "learning_rate": 9.633684210526316e-05, "loss": 0.6295, "step": 33935 }, { "epoch": 1.9003247844103484, "grad_norm": 1.1041791439056396, "learning_rate": 9.633657894736843e-05, "loss": 0.3721, "step": 33936 }, { "epoch": 1.9003807817224774, "grad_norm": 1.1693410873413086, "learning_rate": 9.633631578947368e-05, "loss": 0.392, "step": 33937 }, { "epoch": 1.9004367790346064, "grad_norm": 1.0420026779174805, "learning_rate": 9.633605263157895e-05, "loss": 0.4449, "step": 33938 }, { "epoch": 1.9004927763467352, "grad_norm": 1.079854130744934, "learning_rate": 9.633578947368421e-05, "loss": 0.3689, "step": 33939 }, { "epoch": 1.9005487736588642, "grad_norm": 1.185213327407837, "learning_rate": 9.633552631578949e-05, "loss": 0.4248, "step": 33940 }, { "epoch": 1.9006047709709932, "grad_norm": 1.264661192893982, "learning_rate": 9.633526315789475e-05, "loss": 0.4596, "step": 33941 }, { "epoch": 1.9006607682831222, "grad_norm": 1.2667551040649414, "learning_rate": 9.6335e-05, "loss": 0.42, "step": 33942 }, { "epoch": 1.9007167655952513, "grad_norm": 1.214684247970581, "learning_rate": 9.633473684210526e-05, "loss": 0.3905, "step": 33943 }, { "epoch": 1.9007727629073803, "grad_norm": 1.3427232503890991, "learning_rate": 9.633447368421054e-05, "loss": 0.4673, "step": 33944 }, { "epoch": 1.9008287602195093, "grad_norm": 1.7394793033599854, "learning_rate": 9.63342105263158e-05, "loss": 0.4895, "step": 33945 }, { "epoch": 1.9008847575316383, "grad_norm": 1.4160462617874146, "learning_rate": 9.633394736842106e-05, "loss": 0.5204, "step": 33946 }, { "epoch": 1.9009407548437673, "grad_norm": 1.0840052366256714, "learning_rate": 9.633368421052632e-05, "loss": 0.3965, "step": 33947 }, { "epoch": 1.9009967521558964, "grad_norm": 1.3159178495407104, "learning_rate": 9.633342105263158e-05, "loss": 0.5057, "step": 33948 }, { "epoch": 1.9010527494680254, "grad_norm": 1.166968584060669, "learning_rate": 9.633315789473685e-05, "loss": 0.3823, "step": 33949 }, { "epoch": 1.9011087467801544, "grad_norm": 1.4747583866119385, "learning_rate": 9.633289473684211e-05, "loss": 0.5492, "step": 33950 }, { "epoch": 1.9011647440922834, "grad_norm": 1.4184335470199585, "learning_rate": 9.633263157894737e-05, "loss": 0.4097, "step": 33951 }, { "epoch": 1.9012207414044124, "grad_norm": 1.1197212934494019, "learning_rate": 9.633236842105263e-05, "loss": 0.362, "step": 33952 }, { "epoch": 1.9012767387165415, "grad_norm": 1.1916241645812988, "learning_rate": 9.63321052631579e-05, "loss": 0.3744, "step": 33953 }, { "epoch": 1.9013327360286705, "grad_norm": 1.0502461194992065, "learning_rate": 9.633184210526316e-05, "loss": 0.3703, "step": 33954 }, { "epoch": 1.9013887333407995, "grad_norm": 1.2252848148345947, "learning_rate": 9.633157894736842e-05, "loss": 0.4205, "step": 33955 }, { "epoch": 1.9014447306529285, "grad_norm": 1.6981635093688965, "learning_rate": 9.633131578947368e-05, "loss": 0.6063, "step": 33956 }, { "epoch": 1.9015007279650575, "grad_norm": 1.4446275234222412, "learning_rate": 9.633105263157896e-05, "loss": 0.4902, "step": 33957 }, { "epoch": 1.9015567252771866, "grad_norm": 1.41056489944458, "learning_rate": 9.633078947368422e-05, "loss": 0.4143, "step": 33958 }, { "epoch": 1.9016127225893156, "grad_norm": 1.5445547103881836, "learning_rate": 9.633052631578949e-05, "loss": 0.5362, "step": 33959 }, { "epoch": 1.9016687199014446, "grad_norm": 1.2787317037582397, "learning_rate": 9.633026315789473e-05, "loss": 0.5314, "step": 33960 }, { "epoch": 1.9017247172135736, "grad_norm": 1.3120055198669434, "learning_rate": 9.633000000000001e-05, "loss": 0.6227, "step": 33961 }, { "epoch": 1.9017807145257026, "grad_norm": 1.3405863046646118, "learning_rate": 9.632973684210527e-05, "loss": 0.5112, "step": 33962 }, { "epoch": 1.9018367118378317, "grad_norm": 1.1099889278411865, "learning_rate": 9.632947368421054e-05, "loss": 0.3395, "step": 33963 }, { "epoch": 1.9018927091499607, "grad_norm": 1.5446983575820923, "learning_rate": 9.63292105263158e-05, "loss": 0.4659, "step": 33964 }, { "epoch": 1.9019487064620897, "grad_norm": 1.3510504961013794, "learning_rate": 9.632894736842105e-05, "loss": 0.39, "step": 33965 }, { "epoch": 1.9020047037742187, "grad_norm": 1.6096017360687256, "learning_rate": 9.632868421052632e-05, "loss": 0.4869, "step": 33966 }, { "epoch": 1.9020607010863477, "grad_norm": 1.2717180252075195, "learning_rate": 9.632842105263158e-05, "loss": 0.3454, "step": 33967 }, { "epoch": 1.9021166983984767, "grad_norm": 1.4995439052581787, "learning_rate": 9.632815789473685e-05, "loss": 0.4375, "step": 33968 }, { "epoch": 1.9021726957106058, "grad_norm": 1.2514420747756958, "learning_rate": 9.63278947368421e-05, "loss": 0.398, "step": 33969 }, { "epoch": 1.9022286930227348, "grad_norm": 1.4572794437408447, "learning_rate": 9.632763157894737e-05, "loss": 0.4694, "step": 33970 }, { "epoch": 1.9022846903348638, "grad_norm": 1.3768348693847656, "learning_rate": 9.632736842105263e-05, "loss": 0.6099, "step": 33971 }, { "epoch": 1.9023406876469928, "grad_norm": 4.976465225219727, "learning_rate": 9.63271052631579e-05, "loss": 0.513, "step": 33972 }, { "epoch": 1.9023966849591218, "grad_norm": 1.2748594284057617, "learning_rate": 9.632684210526317e-05, "loss": 0.4999, "step": 33973 }, { "epoch": 1.9024526822712509, "grad_norm": 1.338942527770996, "learning_rate": 9.632657894736842e-05, "loss": 0.4815, "step": 33974 }, { "epoch": 1.9025086795833799, "grad_norm": 1.2265136241912842, "learning_rate": 9.632631578947368e-05, "loss": 0.3827, "step": 33975 }, { "epoch": 1.902564676895509, "grad_norm": 1.1530029773712158, "learning_rate": 9.632605263157896e-05, "loss": 0.3824, "step": 33976 }, { "epoch": 1.902620674207638, "grad_norm": 1.1153985261917114, "learning_rate": 9.632578947368422e-05, "loss": 0.4111, "step": 33977 }, { "epoch": 1.902676671519767, "grad_norm": 1.3617947101593018, "learning_rate": 9.632552631578948e-05, "loss": 0.5695, "step": 33978 }, { "epoch": 1.902732668831896, "grad_norm": 1.4597316980361938, "learning_rate": 9.632526315789474e-05, "loss": 0.6447, "step": 33979 }, { "epoch": 1.902788666144025, "grad_norm": 1.3488487005233765, "learning_rate": 9.632500000000001e-05, "loss": 0.4099, "step": 33980 }, { "epoch": 1.902844663456154, "grad_norm": 1.186000108718872, "learning_rate": 9.632473684210527e-05, "loss": 0.4242, "step": 33981 }, { "epoch": 1.902900660768283, "grad_norm": 1.384997010231018, "learning_rate": 9.632447368421053e-05, "loss": 0.5924, "step": 33982 }, { "epoch": 1.902956658080412, "grad_norm": 1.370645523071289, "learning_rate": 9.632421052631579e-05, "loss": 0.3987, "step": 33983 }, { "epoch": 1.903012655392541, "grad_norm": 1.3196059465408325, "learning_rate": 9.632394736842105e-05, "loss": 0.5177, "step": 33984 }, { "epoch": 1.90306865270467, "grad_norm": 1.0273501873016357, "learning_rate": 9.632368421052632e-05, "loss": 0.4669, "step": 33985 }, { "epoch": 1.903124650016799, "grad_norm": 1.3591892719268799, "learning_rate": 9.632342105263158e-05, "loss": 0.4202, "step": 33986 }, { "epoch": 1.903180647328928, "grad_norm": 1.6412771940231323, "learning_rate": 9.632315789473684e-05, "loss": 0.4595, "step": 33987 }, { "epoch": 1.9032366446410571, "grad_norm": 1.127336859703064, "learning_rate": 9.63228947368421e-05, "loss": 0.3776, "step": 33988 }, { "epoch": 1.9032926419531861, "grad_norm": 1.1677864789962769, "learning_rate": 9.632263157894737e-05, "loss": 0.4411, "step": 33989 }, { "epoch": 1.9033486392653152, "grad_norm": 1.3534612655639648, "learning_rate": 9.632236842105263e-05, "loss": 0.4277, "step": 33990 }, { "epoch": 1.9034046365774442, "grad_norm": 1.0454528331756592, "learning_rate": 9.632210526315791e-05, "loss": 0.4218, "step": 33991 }, { "epoch": 1.9034606338895732, "grad_norm": 1.1414886713027954, "learning_rate": 9.632184210526315e-05, "loss": 0.3341, "step": 33992 }, { "epoch": 1.9035166312017022, "grad_norm": 1.2816598415374756, "learning_rate": 9.632157894736843e-05, "loss": 0.4588, "step": 33993 }, { "epoch": 1.9035726285138312, "grad_norm": 1.324050784111023, "learning_rate": 9.632131578947369e-05, "loss": 0.413, "step": 33994 }, { "epoch": 1.9036286258259603, "grad_norm": 1.1996866464614868, "learning_rate": 9.632105263157896e-05, "loss": 0.3494, "step": 33995 }, { "epoch": 1.9036846231380893, "grad_norm": 1.3523858785629272, "learning_rate": 9.632078947368422e-05, "loss": 0.3639, "step": 33996 }, { "epoch": 1.9037406204502183, "grad_norm": 1.1181321144104004, "learning_rate": 9.632052631578948e-05, "loss": 0.3104, "step": 33997 }, { "epoch": 1.9037966177623473, "grad_norm": 1.1143064498901367, "learning_rate": 9.632026315789474e-05, "loss": 0.3711, "step": 33998 }, { "epoch": 1.9038526150744763, "grad_norm": 1.3833227157592773, "learning_rate": 9.632e-05, "loss": 0.6783, "step": 33999 }, { "epoch": 1.9039086123866054, "grad_norm": 1.2196251153945923, "learning_rate": 9.631973684210527e-05, "loss": 0.3824, "step": 34000 }, { "epoch": 1.9039646096987344, "grad_norm": 1.4445823431015015, "learning_rate": 9.631947368421053e-05, "loss": 0.3967, "step": 34001 }, { "epoch": 1.9040206070108634, "grad_norm": 1.404177188873291, "learning_rate": 9.631921052631579e-05, "loss": 0.4164, "step": 34002 }, { "epoch": 1.9040766043229924, "grad_norm": 1.4708819389343262, "learning_rate": 9.631894736842105e-05, "loss": 0.4517, "step": 34003 }, { "epoch": 1.9041326016351214, "grad_norm": 1.2899205684661865, "learning_rate": 9.631868421052633e-05, "loss": 0.5623, "step": 34004 }, { "epoch": 1.9041885989472505, "grad_norm": 1.2500195503234863, "learning_rate": 9.631842105263158e-05, "loss": 0.4628, "step": 34005 }, { "epoch": 1.9042445962593795, "grad_norm": 1.3206158876419067, "learning_rate": 9.631815789473684e-05, "loss": 0.4163, "step": 34006 }, { "epoch": 1.9043005935715085, "grad_norm": 1.0282695293426514, "learning_rate": 9.63178947368421e-05, "loss": 0.3138, "step": 34007 }, { "epoch": 1.9043565908836375, "grad_norm": 1.237765908241272, "learning_rate": 9.631763157894738e-05, "loss": 0.6517, "step": 34008 }, { "epoch": 1.9044125881957665, "grad_norm": 1.2659456729888916, "learning_rate": 9.631736842105264e-05, "loss": 0.4954, "step": 34009 }, { "epoch": 1.9044685855078956, "grad_norm": 1.0131499767303467, "learning_rate": 9.63171052631579e-05, "loss": 0.2964, "step": 34010 }, { "epoch": 1.9045245828200246, "grad_norm": 1.1911507844924927, "learning_rate": 9.631684210526316e-05, "loss": 0.5041, "step": 34011 }, { "epoch": 1.9045805801321536, "grad_norm": 1.6755865812301636, "learning_rate": 9.631657894736843e-05, "loss": 0.5075, "step": 34012 }, { "epoch": 1.9046365774442826, "grad_norm": 1.1088289022445679, "learning_rate": 9.631631578947369e-05, "loss": 0.3312, "step": 34013 }, { "epoch": 1.9046925747564116, "grad_norm": 0.9510724544525146, "learning_rate": 9.631605263157896e-05, "loss": 0.4505, "step": 34014 }, { "epoch": 1.9047485720685406, "grad_norm": 1.2416331768035889, "learning_rate": 9.631578947368421e-05, "loss": 0.3983, "step": 34015 }, { "epoch": 1.9048045693806697, "grad_norm": 1.8037056922912598, "learning_rate": 9.631552631578948e-05, "loss": 0.4658, "step": 34016 }, { "epoch": 1.9048605666927987, "grad_norm": 1.2995556592941284, "learning_rate": 9.631526315789474e-05, "loss": 0.5357, "step": 34017 }, { "epoch": 1.9049165640049277, "grad_norm": 1.3226078748703003, "learning_rate": 9.6315e-05, "loss": 0.4247, "step": 34018 }, { "epoch": 1.9049725613170567, "grad_norm": 1.5135236978530884, "learning_rate": 9.631473684210528e-05, "loss": 0.5026, "step": 34019 }, { "epoch": 1.9050285586291857, "grad_norm": 1.210835576057434, "learning_rate": 9.631447368421052e-05, "loss": 0.4925, "step": 34020 }, { "epoch": 1.9050845559413148, "grad_norm": 1.1402785778045654, "learning_rate": 9.63142105263158e-05, "loss": 0.3449, "step": 34021 }, { "epoch": 1.9051405532534438, "grad_norm": 1.345220923423767, "learning_rate": 9.631394736842105e-05, "loss": 0.4847, "step": 34022 }, { "epoch": 1.9051965505655728, "grad_norm": 1.2168327569961548, "learning_rate": 9.631368421052633e-05, "loss": 0.5165, "step": 34023 }, { "epoch": 1.9052525478777018, "grad_norm": 1.2520406246185303, "learning_rate": 9.631342105263157e-05, "loss": 0.3482, "step": 34024 }, { "epoch": 1.9053085451898308, "grad_norm": 1.5670303106307983, "learning_rate": 9.631315789473685e-05, "loss": 0.4262, "step": 34025 }, { "epoch": 1.9053645425019599, "grad_norm": 1.2887228727340698, "learning_rate": 9.631289473684211e-05, "loss": 0.4468, "step": 34026 }, { "epoch": 1.9054205398140889, "grad_norm": 1.4221678972244263, "learning_rate": 9.631263157894738e-05, "loss": 0.3648, "step": 34027 }, { "epoch": 1.905476537126218, "grad_norm": 1.2144310474395752, "learning_rate": 9.631236842105264e-05, "loss": 0.5628, "step": 34028 }, { "epoch": 1.905532534438347, "grad_norm": 1.2756747007369995, "learning_rate": 9.63121052631579e-05, "loss": 0.4993, "step": 34029 }, { "epoch": 1.905588531750476, "grad_norm": 1.1108474731445312, "learning_rate": 9.631184210526316e-05, "loss": 0.4143, "step": 34030 }, { "epoch": 1.905644529062605, "grad_norm": 2.419938802719116, "learning_rate": 9.631157894736843e-05, "loss": 0.5744, "step": 34031 }, { "epoch": 1.905700526374734, "grad_norm": 1.4356378316879272, "learning_rate": 9.631131578947369e-05, "loss": 0.5235, "step": 34032 }, { "epoch": 1.905756523686863, "grad_norm": 1.190591812133789, "learning_rate": 9.631105263157895e-05, "loss": 0.4297, "step": 34033 }, { "epoch": 1.905812520998992, "grad_norm": 1.5549372434616089, "learning_rate": 9.631078947368421e-05, "loss": 0.5141, "step": 34034 }, { "epoch": 1.905868518311121, "grad_norm": 1.0839934349060059, "learning_rate": 9.631052631578947e-05, "loss": 0.3773, "step": 34035 }, { "epoch": 1.90592451562325, "grad_norm": 1.4727147817611694, "learning_rate": 9.631026315789474e-05, "loss": 0.497, "step": 34036 }, { "epoch": 1.905980512935379, "grad_norm": 1.1691676378250122, "learning_rate": 9.631e-05, "loss": 0.4149, "step": 34037 }, { "epoch": 1.906036510247508, "grad_norm": 1.3910284042358398, "learning_rate": 9.630973684210526e-05, "loss": 0.5442, "step": 34038 }, { "epoch": 1.906092507559637, "grad_norm": 1.3977830410003662, "learning_rate": 9.630947368421052e-05, "loss": 0.5295, "step": 34039 }, { "epoch": 1.9061485048717661, "grad_norm": 1.445651888847351, "learning_rate": 9.63092105263158e-05, "loss": 0.4602, "step": 34040 }, { "epoch": 1.9062045021838951, "grad_norm": 1.5516797304153442, "learning_rate": 9.630894736842106e-05, "loss": 0.636, "step": 34041 }, { "epoch": 1.9062604994960242, "grad_norm": 1.1154232025146484, "learning_rate": 9.630868421052632e-05, "loss": 0.3463, "step": 34042 }, { "epoch": 1.9063164968081532, "grad_norm": 1.4853843450546265, "learning_rate": 9.630842105263158e-05, "loss": 0.4056, "step": 34043 }, { "epoch": 1.9063724941202822, "grad_norm": 1.3673241138458252, "learning_rate": 9.630815789473685e-05, "loss": 0.398, "step": 34044 }, { "epoch": 1.9064284914324112, "grad_norm": 1.294242262840271, "learning_rate": 9.630789473684211e-05, "loss": 0.5059, "step": 34045 }, { "epoch": 1.9064844887445402, "grad_norm": 1.346583604812622, "learning_rate": 9.630763157894738e-05, "loss": 0.4852, "step": 34046 }, { "epoch": 1.9065404860566693, "grad_norm": 1.1595357656478882, "learning_rate": 9.630736842105263e-05, "loss": 0.499, "step": 34047 }, { "epoch": 1.9065964833687983, "grad_norm": 1.107212781906128, "learning_rate": 9.63071052631579e-05, "loss": 0.4304, "step": 34048 }, { "epoch": 1.9066524806809273, "grad_norm": 1.377042293548584, "learning_rate": 9.630684210526316e-05, "loss": 0.6586, "step": 34049 }, { "epoch": 1.9067084779930563, "grad_norm": 1.1463828086853027, "learning_rate": 9.630657894736844e-05, "loss": 0.4227, "step": 34050 }, { "epoch": 1.9067644753051853, "grad_norm": 1.401996374130249, "learning_rate": 9.63063157894737e-05, "loss": 0.5736, "step": 34051 }, { "epoch": 1.9068204726173144, "grad_norm": 1.1936086416244507, "learning_rate": 9.630605263157894e-05, "loss": 0.3585, "step": 34052 }, { "epoch": 1.9068764699294434, "grad_norm": 1.1592646837234497, "learning_rate": 9.630578947368421e-05, "loss": 0.4199, "step": 34053 }, { "epoch": 1.9069324672415724, "grad_norm": 1.3792665004730225, "learning_rate": 9.630552631578947e-05, "loss": 0.5398, "step": 34054 }, { "epoch": 1.9069884645537014, "grad_norm": 1.6987804174423218, "learning_rate": 9.630526315789475e-05, "loss": 0.4082, "step": 34055 }, { "epoch": 1.9070444618658304, "grad_norm": 1.2151567935943604, "learning_rate": 9.630500000000001e-05, "loss": 0.4407, "step": 34056 }, { "epoch": 1.9071004591779595, "grad_norm": 1.377261996269226, "learning_rate": 9.630473684210527e-05, "loss": 0.4117, "step": 34057 }, { "epoch": 1.9071564564900885, "grad_norm": 1.2642101049423218, "learning_rate": 9.630447368421053e-05, "loss": 0.5015, "step": 34058 }, { "epoch": 1.9072124538022175, "grad_norm": 1.256766438484192, "learning_rate": 9.63042105263158e-05, "loss": 0.4659, "step": 34059 }, { "epoch": 1.9072684511143465, "grad_norm": 1.3358551263809204, "learning_rate": 9.630394736842106e-05, "loss": 0.3662, "step": 34060 }, { "epoch": 1.9073244484264755, "grad_norm": 1.1349291801452637, "learning_rate": 9.630368421052632e-05, "loss": 0.3684, "step": 34061 }, { "epoch": 1.9073804457386045, "grad_norm": 2.011260509490967, "learning_rate": 9.630342105263158e-05, "loss": 0.3968, "step": 34062 }, { "epoch": 1.9074364430507336, "grad_norm": 2.6817777156829834, "learning_rate": 9.630315789473685e-05, "loss": 0.4376, "step": 34063 }, { "epoch": 1.9074924403628626, "grad_norm": 1.0032994747161865, "learning_rate": 9.630289473684211e-05, "loss": 0.3115, "step": 34064 }, { "epoch": 1.9075484376749916, "grad_norm": 1.1072883605957031, "learning_rate": 9.630263157894737e-05, "loss": 0.3222, "step": 34065 }, { "epoch": 1.9076044349871206, "grad_norm": 1.3170570135116577, "learning_rate": 9.630236842105263e-05, "loss": 0.5636, "step": 34066 }, { "epoch": 1.9076604322992496, "grad_norm": 1.0354145765304565, "learning_rate": 9.63021052631579e-05, "loss": 0.3679, "step": 34067 }, { "epoch": 1.9077164296113787, "grad_norm": 1.7400500774383545, "learning_rate": 9.630184210526316e-05, "loss": 0.5328, "step": 34068 }, { "epoch": 1.9077724269235077, "grad_norm": 1.0497963428497314, "learning_rate": 9.630157894736842e-05, "loss": 0.4762, "step": 34069 }, { "epoch": 1.9078284242356367, "grad_norm": 1.284080147743225, "learning_rate": 9.630131578947368e-05, "loss": 0.4976, "step": 34070 }, { "epoch": 1.9078844215477657, "grad_norm": 1.3971341848373413, "learning_rate": 9.630105263157894e-05, "loss": 0.5977, "step": 34071 }, { "epoch": 1.9079404188598947, "grad_norm": 1.1857272386550903, "learning_rate": 9.630078947368422e-05, "loss": 0.3346, "step": 34072 }, { "epoch": 1.9079964161720238, "grad_norm": 1.2894997596740723, "learning_rate": 9.630052631578948e-05, "loss": 0.4208, "step": 34073 }, { "epoch": 1.9080524134841528, "grad_norm": 1.0345476865768433, "learning_rate": 9.630026315789475e-05, "loss": 0.3871, "step": 34074 }, { "epoch": 1.9081084107962818, "grad_norm": 1.2449591159820557, "learning_rate": 9.63e-05, "loss": 0.509, "step": 34075 }, { "epoch": 1.9081644081084108, "grad_norm": 1.2070410251617432, "learning_rate": 9.629973684210527e-05, "loss": 0.463, "step": 34076 }, { "epoch": 1.9082204054205398, "grad_norm": 1.2701133489608765, "learning_rate": 9.629947368421053e-05, "loss": 0.4276, "step": 34077 }, { "epoch": 1.9082764027326689, "grad_norm": 1.3997082710266113, "learning_rate": 9.62992105263158e-05, "loss": 0.5083, "step": 34078 }, { "epoch": 1.9083324000447979, "grad_norm": 1.109686255455017, "learning_rate": 9.629894736842105e-05, "loss": 0.6153, "step": 34079 }, { "epoch": 1.908388397356927, "grad_norm": 1.3531676530838013, "learning_rate": 9.629868421052632e-05, "loss": 0.3774, "step": 34080 }, { "epoch": 1.908444394669056, "grad_norm": 1.209232211112976, "learning_rate": 9.629842105263158e-05, "loss": 0.4172, "step": 34081 }, { "epoch": 1.908500391981185, "grad_norm": 1.4462954998016357, "learning_rate": 9.629815789473685e-05, "loss": 0.473, "step": 34082 }, { "epoch": 1.908556389293314, "grad_norm": 1.3191204071044922, "learning_rate": 9.629789473684211e-05, "loss": 0.4976, "step": 34083 }, { "epoch": 1.908612386605443, "grad_norm": 1.070173978805542, "learning_rate": 9.629763157894737e-05, "loss": 0.3638, "step": 34084 }, { "epoch": 1.908668383917572, "grad_norm": 1.3106045722961426, "learning_rate": 9.629736842105263e-05, "loss": 0.5458, "step": 34085 }, { "epoch": 1.908724381229701, "grad_norm": 1.342474341392517, "learning_rate": 9.62971052631579e-05, "loss": 0.4291, "step": 34086 }, { "epoch": 1.90878037854183, "grad_norm": 1.2556720972061157, "learning_rate": 9.629684210526317e-05, "loss": 0.5439, "step": 34087 }, { "epoch": 1.908836375853959, "grad_norm": 1.4221272468566895, "learning_rate": 9.629657894736843e-05, "loss": 0.3934, "step": 34088 }, { "epoch": 1.908892373166088, "grad_norm": 1.4182671308517456, "learning_rate": 9.629631578947369e-05, "loss": 0.4742, "step": 34089 }, { "epoch": 1.908948370478217, "grad_norm": 1.1342058181762695, "learning_rate": 9.629605263157895e-05, "loss": 0.4728, "step": 34090 }, { "epoch": 1.909004367790346, "grad_norm": 1.3984627723693848, "learning_rate": 9.629578947368422e-05, "loss": 0.5415, "step": 34091 }, { "epoch": 1.9090603651024751, "grad_norm": 1.110479474067688, "learning_rate": 9.629552631578948e-05, "loss": 0.4191, "step": 34092 }, { "epoch": 1.9091163624146041, "grad_norm": 1.3373271226882935, "learning_rate": 9.629526315789474e-05, "loss": 0.4739, "step": 34093 }, { "epoch": 1.9091723597267332, "grad_norm": 1.0889606475830078, "learning_rate": 9.6295e-05, "loss": 0.4729, "step": 34094 }, { "epoch": 1.9092283570388622, "grad_norm": 1.1331942081451416, "learning_rate": 9.629473684210527e-05, "loss": 0.4056, "step": 34095 }, { "epoch": 1.9092843543509912, "grad_norm": 1.4616351127624512, "learning_rate": 9.629447368421053e-05, "loss": 0.3804, "step": 34096 }, { "epoch": 1.9093403516631202, "grad_norm": 1.1372205018997192, "learning_rate": 9.629421052631579e-05, "loss": 0.3689, "step": 34097 }, { "epoch": 1.9093963489752492, "grad_norm": 1.503621220588684, "learning_rate": 9.629394736842105e-05, "loss": 0.5472, "step": 34098 }, { "epoch": 1.9094523462873783, "grad_norm": 1.1998803615570068, "learning_rate": 9.629368421052632e-05, "loss": 0.4041, "step": 34099 }, { "epoch": 1.9095083435995073, "grad_norm": 1.2902586460113525, "learning_rate": 9.629342105263158e-05, "loss": 0.4447, "step": 34100 }, { "epoch": 1.9095643409116363, "grad_norm": 1.6612448692321777, "learning_rate": 9.629315789473686e-05, "loss": 0.4459, "step": 34101 }, { "epoch": 1.9096203382237653, "grad_norm": 1.2382559776306152, "learning_rate": 9.62928947368421e-05, "loss": 0.435, "step": 34102 }, { "epoch": 1.9096763355358943, "grad_norm": 1.1899651288986206, "learning_rate": 9.629263157894736e-05, "loss": 0.396, "step": 34103 }, { "epoch": 1.9097323328480234, "grad_norm": 1.619368314743042, "learning_rate": 9.629236842105264e-05, "loss": 0.4779, "step": 34104 }, { "epoch": 1.9097883301601524, "grad_norm": 1.3264882564544678, "learning_rate": 9.62921052631579e-05, "loss": 0.5627, "step": 34105 }, { "epoch": 1.9098443274722814, "grad_norm": 1.3368408679962158, "learning_rate": 9.629184210526317e-05, "loss": 0.3888, "step": 34106 }, { "epoch": 1.9099003247844104, "grad_norm": 1.3359593152999878, "learning_rate": 9.629157894736842e-05, "loss": 0.4386, "step": 34107 }, { "epoch": 1.9099563220965394, "grad_norm": 2.0662670135498047, "learning_rate": 9.629131578947369e-05, "loss": 0.4334, "step": 34108 }, { "epoch": 1.9100123194086684, "grad_norm": 1.4234237670898438, "learning_rate": 9.629105263157895e-05, "loss": 0.5127, "step": 34109 }, { "epoch": 1.9100683167207975, "grad_norm": 1.157505750656128, "learning_rate": 9.629078947368422e-05, "loss": 0.5082, "step": 34110 }, { "epoch": 1.9101243140329265, "grad_norm": 1.3347121477127075, "learning_rate": 9.629052631578948e-05, "loss": 0.4526, "step": 34111 }, { "epoch": 1.9101803113450555, "grad_norm": 1.220191478729248, "learning_rate": 9.629026315789474e-05, "loss": 0.4028, "step": 34112 }, { "epoch": 1.9102363086571845, "grad_norm": 1.5360329151153564, "learning_rate": 9.629e-05, "loss": 0.52, "step": 34113 }, { "epoch": 1.9102923059693135, "grad_norm": 1.1935615539550781, "learning_rate": 9.628973684210527e-05, "loss": 0.4619, "step": 34114 }, { "epoch": 1.9103483032814426, "grad_norm": 1.2846730947494507, "learning_rate": 9.628947368421053e-05, "loss": 0.4897, "step": 34115 }, { "epoch": 1.9104043005935716, "grad_norm": 1.267817497253418, "learning_rate": 9.62892105263158e-05, "loss": 0.4019, "step": 34116 }, { "epoch": 1.9104602979057006, "grad_norm": 1.3004090785980225, "learning_rate": 9.628894736842105e-05, "loss": 0.4717, "step": 34117 }, { "epoch": 1.9105162952178296, "grad_norm": 1.3779370784759521, "learning_rate": 9.628868421052633e-05, "loss": 0.4668, "step": 34118 }, { "epoch": 1.9105722925299586, "grad_norm": 1.5220592021942139, "learning_rate": 9.628842105263159e-05, "loss": 0.649, "step": 34119 }, { "epoch": 1.9106282898420877, "grad_norm": 1.109362006187439, "learning_rate": 9.628815789473685e-05, "loss": 0.4501, "step": 34120 }, { "epoch": 1.9106842871542167, "grad_norm": 1.2758779525756836, "learning_rate": 9.62878947368421e-05, "loss": 0.5198, "step": 34121 }, { "epoch": 1.9107402844663457, "grad_norm": 1.2492681741714478, "learning_rate": 9.628763157894737e-05, "loss": 0.4845, "step": 34122 }, { "epoch": 1.9107962817784747, "grad_norm": 1.2996468544006348, "learning_rate": 9.628736842105264e-05, "loss": 0.4167, "step": 34123 }, { "epoch": 1.9108522790906037, "grad_norm": 1.2472577095031738, "learning_rate": 9.62871052631579e-05, "loss": 0.5334, "step": 34124 }, { "epoch": 1.9109082764027328, "grad_norm": 1.2333887815475464, "learning_rate": 9.628684210526316e-05, "loss": 0.4408, "step": 34125 }, { "epoch": 1.9109642737148618, "grad_norm": 1.2807999849319458, "learning_rate": 9.628657894736842e-05, "loss": 0.4937, "step": 34126 }, { "epoch": 1.9110202710269908, "grad_norm": 1.3753105401992798, "learning_rate": 9.628631578947369e-05, "loss": 0.4081, "step": 34127 }, { "epoch": 1.9110762683391198, "grad_norm": 1.077946424484253, "learning_rate": 9.628605263157895e-05, "loss": 0.3922, "step": 34128 }, { "epoch": 1.9111322656512488, "grad_norm": 1.2039012908935547, "learning_rate": 9.628578947368422e-05, "loss": 0.4253, "step": 34129 }, { "epoch": 1.9111882629633778, "grad_norm": 1.1264578104019165, "learning_rate": 9.628552631578947e-05, "loss": 0.3595, "step": 34130 }, { "epoch": 1.9112442602755069, "grad_norm": 1.2084472179412842, "learning_rate": 9.628526315789474e-05, "loss": 0.4605, "step": 34131 }, { "epoch": 1.9113002575876359, "grad_norm": 1.6101555824279785, "learning_rate": 9.6285e-05, "loss": 0.5062, "step": 34132 }, { "epoch": 1.911356254899765, "grad_norm": 1.0796869993209839, "learning_rate": 9.628473684210528e-05, "loss": 0.423, "step": 34133 }, { "epoch": 1.911412252211894, "grad_norm": 1.3164533376693726, "learning_rate": 9.628447368421052e-05, "loss": 0.455, "step": 34134 }, { "epoch": 1.911468249524023, "grad_norm": 1.3260061740875244, "learning_rate": 9.62842105263158e-05, "loss": 0.4756, "step": 34135 }, { "epoch": 1.911524246836152, "grad_norm": 1.1585819721221924, "learning_rate": 9.628394736842106e-05, "loss": 0.3987, "step": 34136 }, { "epoch": 1.911580244148281, "grad_norm": 1.0840131044387817, "learning_rate": 9.628368421052633e-05, "loss": 0.3771, "step": 34137 }, { "epoch": 1.91163624146041, "grad_norm": 1.3092949390411377, "learning_rate": 9.628342105263159e-05, "loss": 0.2978, "step": 34138 }, { "epoch": 1.911692238772539, "grad_norm": 1.2766376733779907, "learning_rate": 9.628315789473684e-05, "loss": 0.5586, "step": 34139 }, { "epoch": 1.911748236084668, "grad_norm": 1.1610255241394043, "learning_rate": 9.628289473684211e-05, "loss": 0.3512, "step": 34140 }, { "epoch": 1.911804233396797, "grad_norm": 1.5795600414276123, "learning_rate": 9.628263157894737e-05, "loss": 0.7731, "step": 34141 }, { "epoch": 1.911860230708926, "grad_norm": 1.1372798681259155, "learning_rate": 9.628236842105264e-05, "loss": 0.3679, "step": 34142 }, { "epoch": 1.911916228021055, "grad_norm": 1.3664319515228271, "learning_rate": 9.62821052631579e-05, "loss": 0.4945, "step": 34143 }, { "epoch": 1.9119722253331841, "grad_norm": 1.1209534406661987, "learning_rate": 9.628184210526316e-05, "loss": 0.3983, "step": 34144 }, { "epoch": 1.9120282226453131, "grad_norm": 1.691123366355896, "learning_rate": 9.628157894736842e-05, "loss": 0.3647, "step": 34145 }, { "epoch": 1.9120842199574422, "grad_norm": 1.096861481666565, "learning_rate": 9.62813157894737e-05, "loss": 0.3945, "step": 34146 }, { "epoch": 1.9121402172695712, "grad_norm": 1.6279759407043457, "learning_rate": 9.628105263157895e-05, "loss": 0.6269, "step": 34147 }, { "epoch": 1.9121962145817002, "grad_norm": 1.212383508682251, "learning_rate": 9.628078947368421e-05, "loss": 0.6039, "step": 34148 }, { "epoch": 1.9122522118938292, "grad_norm": 1.4479109048843384, "learning_rate": 9.628052631578947e-05, "loss": 0.4781, "step": 34149 }, { "epoch": 1.9123082092059582, "grad_norm": 1.1678768396377563, "learning_rate": 9.628026315789475e-05, "loss": 0.369, "step": 34150 }, { "epoch": 1.9123642065180873, "grad_norm": 1.184769630432129, "learning_rate": 9.628e-05, "loss": 0.3619, "step": 34151 }, { "epoch": 1.9124202038302163, "grad_norm": 1.2355364561080933, "learning_rate": 9.627973684210527e-05, "loss": 0.5006, "step": 34152 }, { "epoch": 1.9124762011423453, "grad_norm": 1.4290616512298584, "learning_rate": 9.627947368421053e-05, "loss": 0.4971, "step": 34153 }, { "epoch": 1.9125321984544743, "grad_norm": 1.1668577194213867, "learning_rate": 9.62792105263158e-05, "loss": 0.5216, "step": 34154 }, { "epoch": 1.9125881957666033, "grad_norm": 1.20247220993042, "learning_rate": 9.627894736842106e-05, "loss": 0.5353, "step": 34155 }, { "epoch": 1.9126441930787323, "grad_norm": 1.2770265340805054, "learning_rate": 9.627868421052632e-05, "loss": 0.4636, "step": 34156 }, { "epoch": 1.9127001903908614, "grad_norm": 1.2879416942596436, "learning_rate": 9.627842105263158e-05, "loss": 0.3738, "step": 34157 }, { "epoch": 1.9127561877029904, "grad_norm": 1.2735261917114258, "learning_rate": 9.627815789473684e-05, "loss": 0.4967, "step": 34158 }, { "epoch": 1.9128121850151194, "grad_norm": 1.1289223432540894, "learning_rate": 9.627789473684211e-05, "loss": 0.4111, "step": 34159 }, { "epoch": 1.9128681823272484, "grad_norm": 1.3695653676986694, "learning_rate": 9.627763157894737e-05, "loss": 0.4389, "step": 34160 }, { "epoch": 1.9129241796393774, "grad_norm": 1.3141605854034424, "learning_rate": 9.627736842105264e-05, "loss": 0.5207, "step": 34161 }, { "epoch": 1.9129801769515065, "grad_norm": 1.2922433614730835, "learning_rate": 9.627710526315789e-05, "loss": 0.3467, "step": 34162 }, { "epoch": 1.9130361742636355, "grad_norm": 1.3904390335083008, "learning_rate": 9.627684210526316e-05, "loss": 0.6212, "step": 34163 }, { "epoch": 1.9130921715757645, "grad_norm": 1.306141972541809, "learning_rate": 9.627657894736842e-05, "loss": 0.4378, "step": 34164 }, { "epoch": 1.9131481688878935, "grad_norm": 3.934844732284546, "learning_rate": 9.62763157894737e-05, "loss": 0.3961, "step": 34165 }, { "epoch": 1.9132041662000225, "grad_norm": 1.4914439916610718, "learning_rate": 9.627605263157896e-05, "loss": 0.4424, "step": 34166 }, { "epoch": 1.9132601635121516, "grad_norm": 1.3697915077209473, "learning_rate": 9.627578947368422e-05, "loss": 0.3805, "step": 34167 }, { "epoch": 1.9133161608242806, "grad_norm": 1.213944673538208, "learning_rate": 9.627552631578948e-05, "loss": 0.5198, "step": 34168 }, { "epoch": 1.9133721581364096, "grad_norm": 1.2957360744476318, "learning_rate": 9.627526315789475e-05, "loss": 0.5333, "step": 34169 }, { "epoch": 1.9134281554485386, "grad_norm": 1.2282534837722778, "learning_rate": 9.627500000000001e-05, "loss": 0.4358, "step": 34170 }, { "epoch": 1.9134841527606676, "grad_norm": 1.2574893236160278, "learning_rate": 9.627473684210527e-05, "loss": 0.447, "step": 34171 }, { "epoch": 1.9135401500727967, "grad_norm": 1.2336598634719849, "learning_rate": 9.627447368421053e-05, "loss": 0.3728, "step": 34172 }, { "epoch": 1.9135961473849257, "grad_norm": 1.1450051069259644, "learning_rate": 9.627421052631579e-05, "loss": 0.4009, "step": 34173 }, { "epoch": 1.9136521446970547, "grad_norm": 1.0521119832992554, "learning_rate": 9.627394736842106e-05, "loss": 0.3865, "step": 34174 }, { "epoch": 1.9137081420091837, "grad_norm": 1.1690138578414917, "learning_rate": 9.627368421052632e-05, "loss": 0.3348, "step": 34175 }, { "epoch": 1.9137641393213127, "grad_norm": 1.4462471008300781, "learning_rate": 9.627342105263158e-05, "loss": 0.533, "step": 34176 }, { "epoch": 1.9138201366334417, "grad_norm": 1.2312036752700806, "learning_rate": 9.627315789473684e-05, "loss": 0.4244, "step": 34177 }, { "epoch": 1.9138761339455708, "grad_norm": 1.2292710542678833, "learning_rate": 9.627289473684211e-05, "loss": 0.4981, "step": 34178 }, { "epoch": 1.9139321312576998, "grad_norm": 1.1272227764129639, "learning_rate": 9.627263157894737e-05, "loss": 0.46, "step": 34179 }, { "epoch": 1.9139881285698288, "grad_norm": 1.3013900518417358, "learning_rate": 9.627236842105263e-05, "loss": 0.4791, "step": 34180 }, { "epoch": 1.9140441258819578, "grad_norm": 1.3461647033691406, "learning_rate": 9.62721052631579e-05, "loss": 0.388, "step": 34181 }, { "epoch": 1.9141001231940868, "grad_norm": 1.478873372077942, "learning_rate": 9.627184210526317e-05, "loss": 0.5939, "step": 34182 }, { "epoch": 1.9141561205062159, "grad_norm": 1.1562503576278687, "learning_rate": 9.627157894736843e-05, "loss": 0.4228, "step": 34183 }, { "epoch": 1.9142121178183449, "grad_norm": 1.2001547813415527, "learning_rate": 9.62713157894737e-05, "loss": 0.4393, "step": 34184 }, { "epoch": 1.914268115130474, "grad_norm": 1.3186362981796265, "learning_rate": 9.627105263157895e-05, "loss": 0.4394, "step": 34185 }, { "epoch": 1.914324112442603, "grad_norm": 1.3413364887237549, "learning_rate": 9.627078947368422e-05, "loss": 0.4327, "step": 34186 }, { "epoch": 1.9143801097547317, "grad_norm": 1.077527642250061, "learning_rate": 9.627052631578948e-05, "loss": 0.434, "step": 34187 }, { "epoch": 1.9144361070668607, "grad_norm": 1.1904281377792358, "learning_rate": 9.627026315789475e-05, "loss": 0.4208, "step": 34188 }, { "epoch": 1.9144921043789898, "grad_norm": 1.4086194038391113, "learning_rate": 9.627e-05, "loss": 0.4608, "step": 34189 }, { "epoch": 1.9145481016911188, "grad_norm": 1.7058290243148804, "learning_rate": 9.626973684210526e-05, "loss": 0.5709, "step": 34190 }, { "epoch": 1.9146040990032478, "grad_norm": 2.118589162826538, "learning_rate": 9.626947368421053e-05, "loss": 0.7264, "step": 34191 }, { "epoch": 1.9146600963153768, "grad_norm": 1.8971775770187378, "learning_rate": 9.626921052631579e-05, "loss": 0.4616, "step": 34192 }, { "epoch": 1.9147160936275058, "grad_norm": 1.6888585090637207, "learning_rate": 9.626894736842106e-05, "loss": 0.4564, "step": 34193 }, { "epoch": 1.9147720909396349, "grad_norm": 1.201965570449829, "learning_rate": 9.626868421052631e-05, "loss": 0.3806, "step": 34194 }, { "epoch": 1.9148280882517639, "grad_norm": 1.3241913318634033, "learning_rate": 9.626842105263158e-05, "loss": 0.4463, "step": 34195 }, { "epoch": 1.914884085563893, "grad_norm": 1.21066415309906, "learning_rate": 9.626815789473684e-05, "loss": 0.564, "step": 34196 }, { "epoch": 1.914940082876022, "grad_norm": 1.0874874591827393, "learning_rate": 9.626789473684212e-05, "loss": 0.3959, "step": 34197 }, { "epoch": 1.914996080188151, "grad_norm": 1.0003881454467773, "learning_rate": 9.626763157894738e-05, "loss": 0.3957, "step": 34198 }, { "epoch": 1.91505207750028, "grad_norm": 1.2091902494430542, "learning_rate": 9.626736842105264e-05, "loss": 0.4708, "step": 34199 }, { "epoch": 1.915108074812409, "grad_norm": 1.31708562374115, "learning_rate": 9.62671052631579e-05, "loss": 0.4529, "step": 34200 }, { "epoch": 1.915164072124538, "grad_norm": 1.1357673406600952, "learning_rate": 9.626684210526317e-05, "loss": 0.4211, "step": 34201 }, { "epoch": 1.915220069436667, "grad_norm": 1.5314455032348633, "learning_rate": 9.626657894736843e-05, "loss": 0.4606, "step": 34202 }, { "epoch": 1.915276066748796, "grad_norm": 1.7351003885269165, "learning_rate": 9.626631578947369e-05, "loss": 0.5584, "step": 34203 }, { "epoch": 1.915332064060925, "grad_norm": 1.1706600189208984, "learning_rate": 9.626605263157895e-05, "loss": 0.4167, "step": 34204 }, { "epoch": 1.915388061373054, "grad_norm": 1.2618739604949951, "learning_rate": 9.626578947368422e-05, "loss": 0.3504, "step": 34205 }, { "epoch": 1.915444058685183, "grad_norm": 2.1442837715148926, "learning_rate": 9.626552631578948e-05, "loss": 0.4904, "step": 34206 }, { "epoch": 1.915500055997312, "grad_norm": 1.4252557754516602, "learning_rate": 9.626526315789474e-05, "loss": 0.6217, "step": 34207 }, { "epoch": 1.9155560533094411, "grad_norm": 1.251801609992981, "learning_rate": 9.6265e-05, "loss": 0.4026, "step": 34208 }, { "epoch": 1.9156120506215701, "grad_norm": 1.2736146450042725, "learning_rate": 9.626473684210526e-05, "loss": 0.4117, "step": 34209 }, { "epoch": 1.9156680479336992, "grad_norm": 1.229191780090332, "learning_rate": 9.626447368421053e-05, "loss": 0.3847, "step": 34210 }, { "epoch": 1.9157240452458282, "grad_norm": 1.1892379522323608, "learning_rate": 9.62642105263158e-05, "loss": 0.3912, "step": 34211 }, { "epoch": 1.9157800425579572, "grad_norm": 0.9947324395179749, "learning_rate": 9.626394736842105e-05, "loss": 0.3347, "step": 34212 }, { "epoch": 1.9158360398700862, "grad_norm": 1.2207244634628296, "learning_rate": 9.626368421052631e-05, "loss": 0.4815, "step": 34213 }, { "epoch": 1.9158920371822152, "grad_norm": 1.3298957347869873, "learning_rate": 9.626342105263159e-05, "loss": 0.4308, "step": 34214 }, { "epoch": 1.9159480344943443, "grad_norm": 1.2501661777496338, "learning_rate": 9.626315789473685e-05, "loss": 0.4146, "step": 34215 }, { "epoch": 1.9160040318064733, "grad_norm": 1.244620442390442, "learning_rate": 9.626289473684212e-05, "loss": 0.4207, "step": 34216 }, { "epoch": 1.9160600291186023, "grad_norm": 1.4204108715057373, "learning_rate": 9.626263157894737e-05, "loss": 0.4191, "step": 34217 }, { "epoch": 1.9161160264307313, "grad_norm": 1.298211693763733, "learning_rate": 9.626236842105264e-05, "loss": 0.4649, "step": 34218 }, { "epoch": 1.9161720237428603, "grad_norm": 1.203240156173706, "learning_rate": 9.62621052631579e-05, "loss": 0.4979, "step": 34219 }, { "epoch": 1.9162280210549893, "grad_norm": 1.6693533658981323, "learning_rate": 9.626184210526317e-05, "loss": 0.4998, "step": 34220 }, { "epoch": 1.9162840183671184, "grad_norm": 1.4997769594192505, "learning_rate": 9.626157894736843e-05, "loss": 0.5273, "step": 34221 }, { "epoch": 1.9163400156792474, "grad_norm": 1.489009976387024, "learning_rate": 9.626131578947369e-05, "loss": 0.4585, "step": 34222 }, { "epoch": 1.9163960129913764, "grad_norm": 3.5480387210845947, "learning_rate": 9.626105263157895e-05, "loss": 0.4267, "step": 34223 }, { "epoch": 1.9164520103035054, "grad_norm": 1.8703699111938477, "learning_rate": 9.626078947368421e-05, "loss": 0.494, "step": 34224 }, { "epoch": 1.9165080076156344, "grad_norm": 1.5600225925445557, "learning_rate": 9.626052631578948e-05, "loss": 0.4672, "step": 34225 }, { "epoch": 1.9165640049277635, "grad_norm": 1.241592288017273, "learning_rate": 9.626026315789473e-05, "loss": 0.4302, "step": 34226 }, { "epoch": 1.9166200022398925, "grad_norm": 1.4656599760055542, "learning_rate": 9.626e-05, "loss": 0.6568, "step": 34227 }, { "epoch": 1.9166759995520215, "grad_norm": 1.0733925104141235, "learning_rate": 9.625973684210526e-05, "loss": 0.3584, "step": 34228 }, { "epoch": 1.9167319968641505, "grad_norm": 1.3101646900177002, "learning_rate": 9.625947368421054e-05, "loss": 0.5529, "step": 34229 }, { "epoch": 1.9167879941762795, "grad_norm": 1.2405332326889038, "learning_rate": 9.62592105263158e-05, "loss": 0.4275, "step": 34230 }, { "epoch": 1.9168439914884086, "grad_norm": 1.251200556755066, "learning_rate": 9.625894736842106e-05, "loss": 0.3557, "step": 34231 }, { "epoch": 1.9168999888005376, "grad_norm": 1.316773772239685, "learning_rate": 9.625868421052632e-05, "loss": 0.5058, "step": 34232 }, { "epoch": 1.9169559861126666, "grad_norm": 1.567847490310669, "learning_rate": 9.625842105263159e-05, "loss": 0.4152, "step": 34233 }, { "epoch": 1.9170119834247956, "grad_norm": 1.6433348655700684, "learning_rate": 9.625815789473685e-05, "loss": 0.5167, "step": 34234 }, { "epoch": 1.9170679807369246, "grad_norm": 1.2534894943237305, "learning_rate": 9.625789473684211e-05, "loss": 0.3704, "step": 34235 }, { "epoch": 1.9171239780490537, "grad_norm": 1.6590468883514404, "learning_rate": 9.625763157894737e-05, "loss": 0.4596, "step": 34236 }, { "epoch": 1.9171799753611827, "grad_norm": 1.4893572330474854, "learning_rate": 9.625736842105264e-05, "loss": 0.6242, "step": 34237 }, { "epoch": 1.9172359726733117, "grad_norm": 1.1379451751708984, "learning_rate": 9.62571052631579e-05, "loss": 0.4604, "step": 34238 }, { "epoch": 1.9172919699854407, "grad_norm": 1.4879343509674072, "learning_rate": 9.625684210526316e-05, "loss": 0.6344, "step": 34239 }, { "epoch": 1.9173479672975697, "grad_norm": 1.1161693334579468, "learning_rate": 9.625657894736842e-05, "loss": 0.4123, "step": 34240 }, { "epoch": 1.9174039646096988, "grad_norm": 1.2039239406585693, "learning_rate": 9.625631578947368e-05, "loss": 0.4103, "step": 34241 }, { "epoch": 1.9174599619218278, "grad_norm": 1.24535071849823, "learning_rate": 9.625605263157895e-05, "loss": 0.4896, "step": 34242 }, { "epoch": 1.9175159592339568, "grad_norm": 1.0158687829971313, "learning_rate": 9.625578947368421e-05, "loss": 0.3767, "step": 34243 }, { "epoch": 1.9175719565460858, "grad_norm": 1.027435541152954, "learning_rate": 9.625552631578947e-05, "loss": 0.4488, "step": 34244 }, { "epoch": 1.9176279538582148, "grad_norm": 1.675846815109253, "learning_rate": 9.625526315789473e-05, "loss": 0.6597, "step": 34245 }, { "epoch": 1.9176839511703438, "grad_norm": 1.5310463905334473, "learning_rate": 9.6255e-05, "loss": 0.549, "step": 34246 }, { "epoch": 1.9177399484824729, "grad_norm": 1.384231448173523, "learning_rate": 9.625473684210527e-05, "loss": 0.4122, "step": 34247 }, { "epoch": 1.9177959457946019, "grad_norm": 1.0827211141586304, "learning_rate": 9.625447368421054e-05, "loss": 0.3269, "step": 34248 }, { "epoch": 1.917851943106731, "grad_norm": 1.2690229415893555, "learning_rate": 9.625421052631579e-05, "loss": 0.4455, "step": 34249 }, { "epoch": 1.91790794041886, "grad_norm": 1.355848789215088, "learning_rate": 9.625394736842106e-05, "loss": 0.468, "step": 34250 }, { "epoch": 1.917963937730989, "grad_norm": 1.2106949090957642, "learning_rate": 9.625368421052632e-05, "loss": 0.4324, "step": 34251 }, { "epoch": 1.918019935043118, "grad_norm": 1.2169551849365234, "learning_rate": 9.625342105263159e-05, "loss": 0.5631, "step": 34252 }, { "epoch": 1.918075932355247, "grad_norm": 1.2722358703613281, "learning_rate": 9.625315789473685e-05, "loss": 0.3317, "step": 34253 }, { "epoch": 1.918131929667376, "grad_norm": 1.2074919939041138, "learning_rate": 9.625289473684211e-05, "loss": 0.5792, "step": 34254 }, { "epoch": 1.918187926979505, "grad_norm": 1.2313235998153687, "learning_rate": 9.625263157894737e-05, "loss": 0.4815, "step": 34255 }, { "epoch": 1.918243924291634, "grad_norm": 1.1061846017837524, "learning_rate": 9.625236842105264e-05, "loss": 0.4265, "step": 34256 }, { "epoch": 1.918299921603763, "grad_norm": 1.473401427268982, "learning_rate": 9.62521052631579e-05, "loss": 0.4969, "step": 34257 }, { "epoch": 1.918355918915892, "grad_norm": 1.4823957681655884, "learning_rate": 9.625184210526316e-05, "loss": 0.3321, "step": 34258 }, { "epoch": 1.918411916228021, "grad_norm": 1.396756887435913, "learning_rate": 9.625157894736842e-05, "loss": 0.5285, "step": 34259 }, { "epoch": 1.9184679135401501, "grad_norm": 1.2933837175369263, "learning_rate": 9.625131578947368e-05, "loss": 0.4284, "step": 34260 }, { "epoch": 1.9185239108522791, "grad_norm": 1.6208646297454834, "learning_rate": 9.625105263157896e-05, "loss": 0.496, "step": 34261 }, { "epoch": 1.9185799081644082, "grad_norm": 1.1522470712661743, "learning_rate": 9.625078947368422e-05, "loss": 0.5184, "step": 34262 }, { "epoch": 1.9186359054765372, "grad_norm": 1.5574489831924438, "learning_rate": 9.625052631578948e-05, "loss": 0.3969, "step": 34263 }, { "epoch": 1.9186919027886662, "grad_norm": 1.2130779027938843, "learning_rate": 9.625026315789474e-05, "loss": 0.3794, "step": 34264 }, { "epoch": 1.9187479001007952, "grad_norm": 1.3396785259246826, "learning_rate": 9.625000000000001e-05, "loss": 0.338, "step": 34265 }, { "epoch": 1.9188038974129242, "grad_norm": 1.2985972166061401, "learning_rate": 9.624973684210527e-05, "loss": 0.488, "step": 34266 }, { "epoch": 1.9188598947250533, "grad_norm": 1.207932472229004, "learning_rate": 9.624947368421053e-05, "loss": 0.4687, "step": 34267 }, { "epoch": 1.9189158920371823, "grad_norm": 1.3236061334609985, "learning_rate": 9.624921052631579e-05, "loss": 0.3557, "step": 34268 }, { "epoch": 1.9189718893493113, "grad_norm": 1.395732045173645, "learning_rate": 9.624894736842106e-05, "loss": 0.7787, "step": 34269 }, { "epoch": 1.91902788666144, "grad_norm": 1.53282630443573, "learning_rate": 9.624868421052632e-05, "loss": 0.4336, "step": 34270 }, { "epoch": 1.919083883973569, "grad_norm": 1.1582788228988647, "learning_rate": 9.62484210526316e-05, "loss": 0.3405, "step": 34271 }, { "epoch": 1.9191398812856981, "grad_norm": 1.5056872367858887, "learning_rate": 9.624815789473684e-05, "loss": 0.5928, "step": 34272 }, { "epoch": 1.9191958785978271, "grad_norm": 1.3590638637542725, "learning_rate": 9.624789473684211e-05, "loss": 0.5295, "step": 34273 }, { "epoch": 1.9192518759099562, "grad_norm": 1.653045892715454, "learning_rate": 9.624763157894737e-05, "loss": 0.4595, "step": 34274 }, { "epoch": 1.9193078732220852, "grad_norm": 1.2127115726470947, "learning_rate": 9.624736842105265e-05, "loss": 0.485, "step": 34275 }, { "epoch": 1.9193638705342142, "grad_norm": 1.2260850667953491, "learning_rate": 9.62471052631579e-05, "loss": 0.4518, "step": 34276 }, { "epoch": 1.9194198678463432, "grad_norm": 1.5495177507400513, "learning_rate": 9.624684210526315e-05, "loss": 0.5465, "step": 34277 }, { "epoch": 1.9194758651584722, "grad_norm": 1.3879324197769165, "learning_rate": 9.624657894736843e-05, "loss": 0.6113, "step": 34278 }, { "epoch": 1.9195318624706013, "grad_norm": 1.4819798469543457, "learning_rate": 9.624631578947369e-05, "loss": 0.5579, "step": 34279 }, { "epoch": 1.9195878597827303, "grad_norm": 1.6818416118621826, "learning_rate": 9.624605263157896e-05, "loss": 0.7239, "step": 34280 }, { "epoch": 1.9196438570948593, "grad_norm": 1.1855067014694214, "learning_rate": 9.62457894736842e-05, "loss": 0.425, "step": 34281 }, { "epoch": 1.9196998544069883, "grad_norm": 1.3974952697753906, "learning_rate": 9.624552631578948e-05, "loss": 0.6626, "step": 34282 }, { "epoch": 1.9197558517191173, "grad_norm": 1.5829144716262817, "learning_rate": 9.624526315789474e-05, "loss": 0.5308, "step": 34283 }, { "epoch": 1.9198118490312464, "grad_norm": 1.2658365964889526, "learning_rate": 9.624500000000001e-05, "loss": 0.4676, "step": 34284 }, { "epoch": 1.9198678463433754, "grad_norm": 1.507955551147461, "learning_rate": 9.624473684210527e-05, "loss": 0.5393, "step": 34285 }, { "epoch": 1.9199238436555044, "grad_norm": 1.2962963581085205, "learning_rate": 9.624447368421053e-05, "loss": 0.442, "step": 34286 }, { "epoch": 1.9199798409676334, "grad_norm": 1.0144649744033813, "learning_rate": 9.624421052631579e-05, "loss": 0.3287, "step": 34287 }, { "epoch": 1.9200358382797624, "grad_norm": 1.274845004081726, "learning_rate": 9.624394736842106e-05, "loss": 0.3465, "step": 34288 }, { "epoch": 1.9200918355918914, "grad_norm": 1.1636922359466553, "learning_rate": 9.624368421052632e-05, "loss": 0.3874, "step": 34289 }, { "epoch": 1.9201478329040205, "grad_norm": 1.9486541748046875, "learning_rate": 9.624342105263158e-05, "loss": 0.3908, "step": 34290 }, { "epoch": 1.9202038302161495, "grad_norm": 1.5582003593444824, "learning_rate": 9.624315789473684e-05, "loss": 0.6293, "step": 34291 }, { "epoch": 1.9202598275282785, "grad_norm": 1.5689884424209595, "learning_rate": 9.624289473684212e-05, "loss": 0.519, "step": 34292 }, { "epoch": 1.9203158248404075, "grad_norm": 1.3671067953109741, "learning_rate": 9.624263157894738e-05, "loss": 0.4712, "step": 34293 }, { "epoch": 1.9203718221525365, "grad_norm": 1.3742132186889648, "learning_rate": 9.624236842105264e-05, "loss": 0.3905, "step": 34294 }, { "epoch": 1.9204278194646656, "grad_norm": 2.128695011138916, "learning_rate": 9.62421052631579e-05, "loss": 0.4296, "step": 34295 }, { "epoch": 1.9204838167767946, "grad_norm": 1.2733598947525024, "learning_rate": 9.624184210526315e-05, "loss": 0.4557, "step": 34296 }, { "epoch": 1.9205398140889236, "grad_norm": 1.1369787454605103, "learning_rate": 9.624157894736843e-05, "loss": 0.3241, "step": 34297 }, { "epoch": 1.9205958114010526, "grad_norm": 1.1625676155090332, "learning_rate": 9.624131578947369e-05, "loss": 0.4057, "step": 34298 }, { "epoch": 1.9206518087131816, "grad_norm": 1.3650845289230347, "learning_rate": 9.624105263157895e-05, "loss": 0.5414, "step": 34299 }, { "epoch": 1.9207078060253107, "grad_norm": 1.284866213798523, "learning_rate": 9.624078947368421e-05, "loss": 0.615, "step": 34300 }, { "epoch": 1.9207638033374397, "grad_norm": 1.1933172941207886, "learning_rate": 9.624052631578948e-05, "loss": 0.4239, "step": 34301 }, { "epoch": 1.9208198006495687, "grad_norm": 1.2017443180084229, "learning_rate": 9.624026315789474e-05, "loss": 0.4367, "step": 34302 }, { "epoch": 1.9208757979616977, "grad_norm": 1.1451778411865234, "learning_rate": 9.624000000000001e-05, "loss": 0.4988, "step": 34303 }, { "epoch": 1.9209317952738267, "grad_norm": 1.1466450691223145, "learning_rate": 9.623973684210526e-05, "loss": 0.3327, "step": 34304 }, { "epoch": 1.9209877925859558, "grad_norm": 1.1683193445205688, "learning_rate": 9.623947368421053e-05, "loss": 0.4286, "step": 34305 }, { "epoch": 1.9210437898980848, "grad_norm": 2.1987383365631104, "learning_rate": 9.623921052631579e-05, "loss": 0.5721, "step": 34306 }, { "epoch": 1.9210997872102138, "grad_norm": 1.4699146747589111, "learning_rate": 9.623894736842107e-05, "loss": 0.3228, "step": 34307 }, { "epoch": 1.9211557845223428, "grad_norm": 1.1275240182876587, "learning_rate": 9.623868421052633e-05, "loss": 0.4266, "step": 34308 }, { "epoch": 1.9212117818344718, "grad_norm": 1.1163877248764038, "learning_rate": 9.623842105263159e-05, "loss": 0.3134, "step": 34309 }, { "epoch": 1.9212677791466009, "grad_norm": 1.2665023803710938, "learning_rate": 9.623815789473685e-05, "loss": 0.4593, "step": 34310 }, { "epoch": 1.9213237764587299, "grad_norm": 1.1689059734344482, "learning_rate": 9.62378947368421e-05, "loss": 0.3862, "step": 34311 }, { "epoch": 1.921379773770859, "grad_norm": 2.7677831649780273, "learning_rate": 9.623763157894738e-05, "loss": 0.6233, "step": 34312 }, { "epoch": 1.921435771082988, "grad_norm": 1.601834774017334, "learning_rate": 9.623736842105264e-05, "loss": 0.4761, "step": 34313 }, { "epoch": 1.921491768395117, "grad_norm": 1.2268656492233276, "learning_rate": 9.62371052631579e-05, "loss": 0.4992, "step": 34314 }, { "epoch": 1.921547765707246, "grad_norm": 1.2537119388580322, "learning_rate": 9.623684210526316e-05, "loss": 0.3951, "step": 34315 }, { "epoch": 1.921603763019375, "grad_norm": 1.186688780784607, "learning_rate": 9.623657894736843e-05, "loss": 0.4199, "step": 34316 }, { "epoch": 1.921659760331504, "grad_norm": 1.0517016649246216, "learning_rate": 9.623631578947369e-05, "loss": 0.3358, "step": 34317 }, { "epoch": 1.921715757643633, "grad_norm": 1.111101508140564, "learning_rate": 9.623605263157895e-05, "loss": 0.5862, "step": 34318 }, { "epoch": 1.921771754955762, "grad_norm": 1.5175127983093262, "learning_rate": 9.623578947368421e-05, "loss": 0.5376, "step": 34319 }, { "epoch": 1.921827752267891, "grad_norm": 1.1358342170715332, "learning_rate": 9.623552631578948e-05, "loss": 0.4517, "step": 34320 }, { "epoch": 1.92188374958002, "grad_norm": 1.2149403095245361, "learning_rate": 9.623526315789474e-05, "loss": 0.3261, "step": 34321 }, { "epoch": 1.921939746892149, "grad_norm": 1.4926055669784546, "learning_rate": 9.6235e-05, "loss": 0.3937, "step": 34322 }, { "epoch": 1.921995744204278, "grad_norm": 1.2512887716293335, "learning_rate": 9.623473684210526e-05, "loss": 0.4034, "step": 34323 }, { "epoch": 1.9220517415164071, "grad_norm": 1.4327195882797241, "learning_rate": 9.623447368421054e-05, "loss": 0.5098, "step": 34324 }, { "epoch": 1.9221077388285361, "grad_norm": 3.018702745437622, "learning_rate": 9.62342105263158e-05, "loss": 0.4752, "step": 34325 }, { "epoch": 1.9221637361406652, "grad_norm": 1.2417402267456055, "learning_rate": 9.623394736842107e-05, "loss": 0.4311, "step": 34326 }, { "epoch": 1.9222197334527942, "grad_norm": 1.3895093202590942, "learning_rate": 9.623368421052631e-05, "loss": 0.7125, "step": 34327 }, { "epoch": 1.9222757307649232, "grad_norm": 1.3928189277648926, "learning_rate": 9.623342105263157e-05, "loss": 0.4179, "step": 34328 }, { "epoch": 1.9223317280770522, "grad_norm": 1.251164197921753, "learning_rate": 9.623315789473685e-05, "loss": 0.4703, "step": 34329 }, { "epoch": 1.9223877253891812, "grad_norm": 1.836476445198059, "learning_rate": 9.623289473684211e-05, "loss": 0.3875, "step": 34330 }, { "epoch": 1.9224437227013103, "grad_norm": 1.1445401906967163, "learning_rate": 9.623263157894738e-05, "loss": 0.4549, "step": 34331 }, { "epoch": 1.9224997200134393, "grad_norm": 1.2746981382369995, "learning_rate": 9.623236842105263e-05, "loss": 0.5712, "step": 34332 }, { "epoch": 1.9225557173255683, "grad_norm": 1.4008656740188599, "learning_rate": 9.62321052631579e-05, "loss": 0.5445, "step": 34333 }, { "epoch": 1.9226117146376973, "grad_norm": 1.5213289260864258, "learning_rate": 9.623184210526316e-05, "loss": 0.4742, "step": 34334 }, { "epoch": 1.9226677119498263, "grad_norm": 1.2487987279891968, "learning_rate": 9.623157894736843e-05, "loss": 0.4239, "step": 34335 }, { "epoch": 1.9227237092619553, "grad_norm": 1.5003831386566162, "learning_rate": 9.623131578947368e-05, "loss": 0.6254, "step": 34336 }, { "epoch": 1.9227797065740844, "grad_norm": 1.2354856729507446, "learning_rate": 9.623105263157895e-05, "loss": 0.4269, "step": 34337 }, { "epoch": 1.9228357038862134, "grad_norm": 1.4254043102264404, "learning_rate": 9.623078947368421e-05, "loss": 0.6442, "step": 34338 }, { "epoch": 1.9228917011983424, "grad_norm": 1.3578306436538696, "learning_rate": 9.623052631578949e-05, "loss": 0.4991, "step": 34339 }, { "epoch": 1.9229476985104714, "grad_norm": 1.0492668151855469, "learning_rate": 9.623026315789475e-05, "loss": 0.3883, "step": 34340 }, { "epoch": 1.9230036958226004, "grad_norm": 1.4757280349731445, "learning_rate": 9.623e-05, "loss": 0.4509, "step": 34341 }, { "epoch": 1.9230596931347295, "grad_norm": 1.4086908102035522, "learning_rate": 9.622973684210527e-05, "loss": 0.4788, "step": 34342 }, { "epoch": 1.9231156904468585, "grad_norm": 1.3750075101852417, "learning_rate": 9.622947368421054e-05, "loss": 0.5515, "step": 34343 }, { "epoch": 1.9231716877589875, "grad_norm": 1.1854740381240845, "learning_rate": 9.62292105263158e-05, "loss": 0.4634, "step": 34344 }, { "epoch": 1.9232276850711165, "grad_norm": 1.2228724956512451, "learning_rate": 9.622894736842106e-05, "loss": 0.4002, "step": 34345 }, { "epoch": 1.9232836823832455, "grad_norm": 1.1499624252319336, "learning_rate": 9.622868421052632e-05, "loss": 0.5372, "step": 34346 }, { "epoch": 1.9233396796953746, "grad_norm": 1.2971465587615967, "learning_rate": 9.622842105263158e-05, "loss": 0.4611, "step": 34347 }, { "epoch": 1.9233956770075036, "grad_norm": 1.2563623189926147, "learning_rate": 9.622815789473685e-05, "loss": 0.4014, "step": 34348 }, { "epoch": 1.9234516743196326, "grad_norm": 1.2086490392684937, "learning_rate": 9.622789473684211e-05, "loss": 0.5549, "step": 34349 }, { "epoch": 1.9235076716317616, "grad_norm": 1.3588988780975342, "learning_rate": 9.622763157894737e-05, "loss": 0.5435, "step": 34350 }, { "epoch": 1.9235636689438906, "grad_norm": 1.1672650575637817, "learning_rate": 9.622736842105263e-05, "loss": 0.6131, "step": 34351 }, { "epoch": 1.9236196662560197, "grad_norm": 1.523383617401123, "learning_rate": 9.62271052631579e-05, "loss": 0.4516, "step": 34352 }, { "epoch": 1.9236756635681487, "grad_norm": 1.1716790199279785, "learning_rate": 9.622684210526316e-05, "loss": 0.4739, "step": 34353 }, { "epoch": 1.9237316608802777, "grad_norm": 1.0879019498825073, "learning_rate": 9.622657894736842e-05, "loss": 0.442, "step": 34354 }, { "epoch": 1.9237876581924067, "grad_norm": 1.4493409395217896, "learning_rate": 9.622631578947368e-05, "loss": 0.4492, "step": 34355 }, { "epoch": 1.9238436555045357, "grad_norm": 1.2001742124557495, "learning_rate": 9.622605263157896e-05, "loss": 0.4314, "step": 34356 }, { "epoch": 1.9238996528166648, "grad_norm": 1.4988598823547363, "learning_rate": 9.622578947368422e-05, "loss": 0.3817, "step": 34357 }, { "epoch": 1.9239556501287938, "grad_norm": 1.5891660451889038, "learning_rate": 9.622552631578949e-05, "loss": 0.5123, "step": 34358 }, { "epoch": 1.9240116474409228, "grad_norm": 3.146915912628174, "learning_rate": 9.622526315789473e-05, "loss": 0.4199, "step": 34359 }, { "epoch": 1.9240676447530518, "grad_norm": 1.290448784828186, "learning_rate": 9.622500000000001e-05, "loss": 0.4193, "step": 34360 }, { "epoch": 1.9241236420651808, "grad_norm": 1.2967422008514404, "learning_rate": 9.622473684210527e-05, "loss": 0.6199, "step": 34361 }, { "epoch": 1.9241796393773098, "grad_norm": 1.1139644384384155, "learning_rate": 9.622447368421053e-05, "loss": 0.4343, "step": 34362 }, { "epoch": 1.9242356366894389, "grad_norm": 2.0440478324890137, "learning_rate": 9.62242105263158e-05, "loss": 0.4569, "step": 34363 }, { "epoch": 1.9242916340015679, "grad_norm": 1.8078516721725464, "learning_rate": 9.622394736842105e-05, "loss": 0.5124, "step": 34364 }, { "epoch": 1.924347631313697, "grad_norm": 1.470478892326355, "learning_rate": 9.622368421052632e-05, "loss": 0.4409, "step": 34365 }, { "epoch": 1.924403628625826, "grad_norm": 1.3908190727233887, "learning_rate": 9.622342105263158e-05, "loss": 0.4864, "step": 34366 }, { "epoch": 1.924459625937955, "grad_norm": 1.82606041431427, "learning_rate": 9.622315789473685e-05, "loss": 0.6269, "step": 34367 }, { "epoch": 1.924515623250084, "grad_norm": 1.1784051656723022, "learning_rate": 9.622289473684211e-05, "loss": 0.4439, "step": 34368 }, { "epoch": 1.924571620562213, "grad_norm": 2.00205397605896, "learning_rate": 9.622263157894737e-05, "loss": 0.6574, "step": 34369 }, { "epoch": 1.924627617874342, "grad_norm": 1.235109806060791, "learning_rate": 9.622236842105263e-05, "loss": 0.5686, "step": 34370 }, { "epoch": 1.924683615186471, "grad_norm": 1.3443928956985474, "learning_rate": 9.62221052631579e-05, "loss": 0.4126, "step": 34371 }, { "epoch": 1.9247396124986, "grad_norm": 1.8188425302505493, "learning_rate": 9.622184210526317e-05, "loss": 0.63, "step": 34372 }, { "epoch": 1.924795609810729, "grad_norm": 0.9170876741409302, "learning_rate": 9.622157894736843e-05, "loss": 0.3381, "step": 34373 }, { "epoch": 1.924851607122858, "grad_norm": 1.3750289678573608, "learning_rate": 9.622131578947368e-05, "loss": 0.4304, "step": 34374 }, { "epoch": 1.924907604434987, "grad_norm": 1.1242152452468872, "learning_rate": 9.622105263157896e-05, "loss": 0.3476, "step": 34375 }, { "epoch": 1.9249636017471161, "grad_norm": 1.2126795053482056, "learning_rate": 9.622078947368422e-05, "loss": 0.3974, "step": 34376 }, { "epoch": 1.9250195990592451, "grad_norm": 1.937725305557251, "learning_rate": 9.622052631578948e-05, "loss": 0.4846, "step": 34377 }, { "epoch": 1.9250755963713742, "grad_norm": 1.781497597694397, "learning_rate": 9.622026315789474e-05, "loss": 0.512, "step": 34378 }, { "epoch": 1.9251315936835032, "grad_norm": 1.374320149421692, "learning_rate": 9.622000000000001e-05, "loss": 0.4105, "step": 34379 }, { "epoch": 1.9251875909956322, "grad_norm": 1.2288329601287842, "learning_rate": 9.621973684210527e-05, "loss": 0.4994, "step": 34380 }, { "epoch": 1.9252435883077612, "grad_norm": 1.423977017402649, "learning_rate": 9.621947368421053e-05, "loss": 0.5332, "step": 34381 }, { "epoch": 1.9252995856198902, "grad_norm": 1.233642339706421, "learning_rate": 9.621921052631579e-05, "loss": 0.4645, "step": 34382 }, { "epoch": 1.9253555829320192, "grad_norm": 1.2181953191757202, "learning_rate": 9.621894736842105e-05, "loss": 0.3515, "step": 34383 }, { "epoch": 1.9254115802441483, "grad_norm": 1.1944061517715454, "learning_rate": 9.621868421052632e-05, "loss": 0.3579, "step": 34384 }, { "epoch": 1.9254675775562773, "grad_norm": 1.306414246559143, "learning_rate": 9.621842105263158e-05, "loss": 0.4828, "step": 34385 }, { "epoch": 1.9255235748684063, "grad_norm": 1.5317976474761963, "learning_rate": 9.621815789473686e-05, "loss": 0.5148, "step": 34386 }, { "epoch": 1.9255795721805353, "grad_norm": 1.1759620904922485, "learning_rate": 9.62178947368421e-05, "loss": 0.3584, "step": 34387 }, { "epoch": 1.9256355694926643, "grad_norm": 1.2523274421691895, "learning_rate": 9.621763157894738e-05, "loss": 0.4177, "step": 34388 }, { "epoch": 1.9256915668047934, "grad_norm": 1.1623507738113403, "learning_rate": 9.621736842105263e-05, "loss": 0.4529, "step": 34389 }, { "epoch": 1.9257475641169224, "grad_norm": 1.6678543090820312, "learning_rate": 9.621710526315791e-05, "loss": 0.517, "step": 34390 }, { "epoch": 1.9258035614290514, "grad_norm": 1.3336390256881714, "learning_rate": 9.621684210526315e-05, "loss": 0.6577, "step": 34391 }, { "epoch": 1.9258595587411804, "grad_norm": 1.9441115856170654, "learning_rate": 9.621657894736843e-05, "loss": 0.5595, "step": 34392 }, { "epoch": 1.9259155560533094, "grad_norm": 1.3937158584594727, "learning_rate": 9.621631578947369e-05, "loss": 0.515, "step": 34393 }, { "epoch": 1.9259715533654385, "grad_norm": 1.1185667514801025, "learning_rate": 9.621605263157896e-05, "loss": 0.372, "step": 34394 }, { "epoch": 1.9260275506775675, "grad_norm": 1.1725096702575684, "learning_rate": 9.621578947368422e-05, "loss": 0.3736, "step": 34395 }, { "epoch": 1.9260835479896965, "grad_norm": 1.2225680351257324, "learning_rate": 9.621552631578948e-05, "loss": 0.434, "step": 34396 }, { "epoch": 1.9261395453018255, "grad_norm": 1.1347873210906982, "learning_rate": 9.621526315789474e-05, "loss": 0.3333, "step": 34397 }, { "epoch": 1.9261955426139545, "grad_norm": 1.5155844688415527, "learning_rate": 9.6215e-05, "loss": 0.4664, "step": 34398 }, { "epoch": 1.9262515399260836, "grad_norm": 1.376240611076355, "learning_rate": 9.621473684210527e-05, "loss": 0.4451, "step": 34399 }, { "epoch": 1.9263075372382126, "grad_norm": 1.0406979322433472, "learning_rate": 9.621447368421053e-05, "loss": 0.3307, "step": 34400 }, { "epoch": 1.9263635345503416, "grad_norm": 1.2049260139465332, "learning_rate": 9.621421052631579e-05, "loss": 0.5193, "step": 34401 }, { "epoch": 1.9264195318624706, "grad_norm": 1.3064241409301758, "learning_rate": 9.621394736842105e-05, "loss": 0.4781, "step": 34402 }, { "epoch": 1.9264755291745996, "grad_norm": 1.2402961254119873, "learning_rate": 9.621368421052633e-05, "loss": 0.4365, "step": 34403 }, { "epoch": 1.9265315264867287, "grad_norm": 1.203587532043457, "learning_rate": 9.621342105263158e-05, "loss": 0.3792, "step": 34404 }, { "epoch": 1.9265875237988577, "grad_norm": 1.3120694160461426, "learning_rate": 9.621315789473684e-05, "loss": 0.412, "step": 34405 }, { "epoch": 1.9266435211109867, "grad_norm": 1.1383551359176636, "learning_rate": 9.62128947368421e-05, "loss": 0.3611, "step": 34406 }, { "epoch": 1.9266995184231157, "grad_norm": 1.8300246000289917, "learning_rate": 9.621263157894738e-05, "loss": 0.577, "step": 34407 }, { "epoch": 1.9267555157352447, "grad_norm": 1.2871698141098022, "learning_rate": 9.621236842105264e-05, "loss": 0.405, "step": 34408 }, { "epoch": 1.9268115130473737, "grad_norm": 1.2269333600997925, "learning_rate": 9.62121052631579e-05, "loss": 0.4462, "step": 34409 }, { "epoch": 1.9268675103595028, "grad_norm": 1.1481660604476929, "learning_rate": 9.621184210526316e-05, "loss": 0.4579, "step": 34410 }, { "epoch": 1.9269235076716318, "grad_norm": 1.5203206539154053, "learning_rate": 9.621157894736843e-05, "loss": 0.4338, "step": 34411 }, { "epoch": 1.9269795049837608, "grad_norm": 1.0951775312423706, "learning_rate": 9.621131578947369e-05, "loss": 0.4529, "step": 34412 }, { "epoch": 1.9270355022958898, "grad_norm": 1.3437350988388062, "learning_rate": 9.621105263157896e-05, "loss": 0.4369, "step": 34413 }, { "epoch": 1.9270914996080188, "grad_norm": 1.1797075271606445, "learning_rate": 9.621078947368421e-05, "loss": 0.4991, "step": 34414 }, { "epoch": 1.9271474969201479, "grad_norm": 1.4596105813980103, "learning_rate": 9.621052631578947e-05, "loss": 0.4578, "step": 34415 }, { "epoch": 1.9272034942322769, "grad_norm": 1.2185550928115845, "learning_rate": 9.621026315789474e-05, "loss": 0.3894, "step": 34416 }, { "epoch": 1.927259491544406, "grad_norm": 1.472474217414856, "learning_rate": 9.621e-05, "loss": 0.3932, "step": 34417 }, { "epoch": 1.927315488856535, "grad_norm": 1.316421627998352, "learning_rate": 9.620973684210528e-05, "loss": 0.4032, "step": 34418 }, { "epoch": 1.927371486168664, "grad_norm": 1.1761480569839478, "learning_rate": 9.620947368421052e-05, "loss": 0.4049, "step": 34419 }, { "epoch": 1.927427483480793, "grad_norm": 1.1489301919937134, "learning_rate": 9.62092105263158e-05, "loss": 0.4376, "step": 34420 }, { "epoch": 1.927483480792922, "grad_norm": 1.3674211502075195, "learning_rate": 9.620894736842105e-05, "loss": 0.324, "step": 34421 }, { "epoch": 1.927539478105051, "grad_norm": 1.2294304370880127, "learning_rate": 9.620868421052633e-05, "loss": 0.5572, "step": 34422 }, { "epoch": 1.92759547541718, "grad_norm": 1.4763152599334717, "learning_rate": 9.620842105263159e-05, "loss": 0.4053, "step": 34423 }, { "epoch": 1.927651472729309, "grad_norm": 1.1901642084121704, "learning_rate": 9.620815789473685e-05, "loss": 0.4247, "step": 34424 }, { "epoch": 1.927707470041438, "grad_norm": 1.2052173614501953, "learning_rate": 9.620789473684211e-05, "loss": 0.4945, "step": 34425 }, { "epoch": 1.927763467353567, "grad_norm": 2.0695602893829346, "learning_rate": 9.620763157894738e-05, "loss": 0.464, "step": 34426 }, { "epoch": 1.927819464665696, "grad_norm": 1.0273263454437256, "learning_rate": 9.620736842105264e-05, "loss": 0.307, "step": 34427 }, { "epoch": 1.9278754619778251, "grad_norm": 1.4338146448135376, "learning_rate": 9.62071052631579e-05, "loss": 0.5388, "step": 34428 }, { "epoch": 1.9279314592899541, "grad_norm": 1.180341362953186, "learning_rate": 9.620684210526316e-05, "loss": 0.3366, "step": 34429 }, { "epoch": 1.9279874566020831, "grad_norm": 1.208812952041626, "learning_rate": 9.620657894736843e-05, "loss": 0.4029, "step": 34430 }, { "epoch": 1.9280434539142122, "grad_norm": 1.3337002992630005, "learning_rate": 9.620631578947369e-05, "loss": 0.5443, "step": 34431 }, { "epoch": 1.9280994512263412, "grad_norm": 1.5183266401290894, "learning_rate": 9.620605263157895e-05, "loss": 0.6829, "step": 34432 }, { "epoch": 1.9281554485384702, "grad_norm": 0.9501885175704956, "learning_rate": 9.620578947368421e-05, "loss": 0.2856, "step": 34433 }, { "epoch": 1.9282114458505992, "grad_norm": 1.473787546157837, "learning_rate": 9.620552631578947e-05, "loss": 0.6641, "step": 34434 }, { "epoch": 1.9282674431627282, "grad_norm": 1.1880011558532715, "learning_rate": 9.620526315789474e-05, "loss": 0.4315, "step": 34435 }, { "epoch": 1.9283234404748573, "grad_norm": 3.8558762073516846, "learning_rate": 9.6205e-05, "loss": 0.432, "step": 34436 }, { "epoch": 1.9283794377869863, "grad_norm": 1.1921058893203735, "learning_rate": 9.620473684210526e-05, "loss": 0.4802, "step": 34437 }, { "epoch": 1.9284354350991153, "grad_norm": 1.0664736032485962, "learning_rate": 9.620447368421052e-05, "loss": 0.4363, "step": 34438 }, { "epoch": 1.9284914324112443, "grad_norm": 1.2945587635040283, "learning_rate": 9.62042105263158e-05, "loss": 0.3728, "step": 34439 }, { "epoch": 1.9285474297233733, "grad_norm": 1.3429768085479736, "learning_rate": 9.620394736842106e-05, "loss": 0.4986, "step": 34440 }, { "epoch": 1.9286034270355024, "grad_norm": 1.4482563734054565, "learning_rate": 9.620368421052632e-05, "loss": 0.3196, "step": 34441 }, { "epoch": 1.9286594243476314, "grad_norm": 1.043186902999878, "learning_rate": 9.620342105263158e-05, "loss": 0.3654, "step": 34442 }, { "epoch": 1.9287154216597604, "grad_norm": 1.4350152015686035, "learning_rate": 9.620315789473685e-05, "loss": 0.6509, "step": 34443 }, { "epoch": 1.9287714189718894, "grad_norm": 1.1923483610153198, "learning_rate": 9.620289473684211e-05, "loss": 0.3241, "step": 34444 }, { "epoch": 1.9288274162840184, "grad_norm": 1.2620973587036133, "learning_rate": 9.620263157894738e-05, "loss": 0.4405, "step": 34445 }, { "epoch": 1.9288834135961475, "grad_norm": 1.403353214263916, "learning_rate": 9.620236842105263e-05, "loss": 0.4476, "step": 34446 }, { "epoch": 1.9289394109082765, "grad_norm": 1.1871943473815918, "learning_rate": 9.62021052631579e-05, "loss": 0.5248, "step": 34447 }, { "epoch": 1.9289954082204055, "grad_norm": 1.3543674945831299, "learning_rate": 9.620184210526316e-05, "loss": 0.4392, "step": 34448 }, { "epoch": 1.9290514055325345, "grad_norm": 1.3094923496246338, "learning_rate": 9.620157894736842e-05, "loss": 0.4446, "step": 34449 }, { "epoch": 1.9291074028446635, "grad_norm": 1.3161166906356812, "learning_rate": 9.62013157894737e-05, "loss": 0.4432, "step": 34450 }, { "epoch": 1.9291634001567926, "grad_norm": 1.849827766418457, "learning_rate": 9.620105263157894e-05, "loss": 0.4328, "step": 34451 }, { "epoch": 1.9292193974689216, "grad_norm": 1.1753054857254028, "learning_rate": 9.620078947368421e-05, "loss": 0.3896, "step": 34452 }, { "epoch": 1.9292753947810506, "grad_norm": 1.3388580083847046, "learning_rate": 9.620052631578947e-05, "loss": 0.5128, "step": 34453 }, { "epoch": 1.9293313920931796, "grad_norm": 1.2258212566375732, "learning_rate": 9.620026315789475e-05, "loss": 0.3657, "step": 34454 }, { "epoch": 1.9293873894053086, "grad_norm": 1.276568055152893, "learning_rate": 9.620000000000001e-05, "loss": 0.3678, "step": 34455 }, { "epoch": 1.9294433867174376, "grad_norm": 1.1893410682678223, "learning_rate": 9.619973684210527e-05, "loss": 0.4745, "step": 34456 }, { "epoch": 1.9294993840295667, "grad_norm": 1.2533750534057617, "learning_rate": 9.619947368421053e-05, "loss": 0.4409, "step": 34457 }, { "epoch": 1.9295553813416957, "grad_norm": 7.574336528778076, "learning_rate": 9.61992105263158e-05, "loss": 0.4121, "step": 34458 }, { "epoch": 1.9296113786538247, "grad_norm": 1.3594753742218018, "learning_rate": 9.619894736842106e-05, "loss": 0.411, "step": 34459 }, { "epoch": 1.9296673759659537, "grad_norm": 1.322891116142273, "learning_rate": 9.619868421052632e-05, "loss": 0.3755, "step": 34460 }, { "epoch": 1.9297233732780827, "grad_norm": 1.327690601348877, "learning_rate": 9.619842105263158e-05, "loss": 0.4629, "step": 34461 }, { "epoch": 1.9297793705902118, "grad_norm": 1.4175219535827637, "learning_rate": 9.619815789473685e-05, "loss": 0.5439, "step": 34462 }, { "epoch": 1.9298353679023408, "grad_norm": 1.6120556592941284, "learning_rate": 9.619789473684211e-05, "loss": 0.4773, "step": 34463 }, { "epoch": 1.9298913652144698, "grad_norm": 1.348920464515686, "learning_rate": 9.619763157894737e-05, "loss": 0.3932, "step": 34464 }, { "epoch": 1.9299473625265988, "grad_norm": 1.186859369277954, "learning_rate": 9.619736842105263e-05, "loss": 0.5529, "step": 34465 }, { "epoch": 1.9300033598387278, "grad_norm": 0.9572727680206299, "learning_rate": 9.619710526315789e-05, "loss": 0.3139, "step": 34466 }, { "epoch": 1.9300593571508569, "grad_norm": 1.2366197109222412, "learning_rate": 9.619684210526316e-05, "loss": 0.3454, "step": 34467 }, { "epoch": 1.9301153544629859, "grad_norm": 1.113550066947937, "learning_rate": 9.619657894736842e-05, "loss": 0.4842, "step": 34468 }, { "epoch": 1.930171351775115, "grad_norm": 0.9857762455940247, "learning_rate": 9.619631578947368e-05, "loss": 0.3599, "step": 34469 }, { "epoch": 1.930227349087244, "grad_norm": 1.1586251258850098, "learning_rate": 9.619605263157894e-05, "loss": 0.3874, "step": 34470 }, { "epoch": 1.930283346399373, "grad_norm": 1.244394063949585, "learning_rate": 9.619578947368422e-05, "loss": 0.4, "step": 34471 }, { "epoch": 1.930339343711502, "grad_norm": 1.2778180837631226, "learning_rate": 9.619552631578948e-05, "loss": 0.4722, "step": 34472 }, { "epoch": 1.930395341023631, "grad_norm": 5.057855606079102, "learning_rate": 9.619526315789475e-05, "loss": 0.447, "step": 34473 }, { "epoch": 1.93045133833576, "grad_norm": 1.5484793186187744, "learning_rate": 9.6195e-05, "loss": 0.5107, "step": 34474 }, { "epoch": 1.930507335647889, "grad_norm": 1.6093711853027344, "learning_rate": 9.619473684210527e-05, "loss": 0.6132, "step": 34475 }, { "epoch": 1.930563332960018, "grad_norm": 1.5866259336471558, "learning_rate": 9.619447368421053e-05, "loss": 0.4621, "step": 34476 }, { "epoch": 1.930619330272147, "grad_norm": 1.2830190658569336, "learning_rate": 9.61942105263158e-05, "loss": 0.4974, "step": 34477 }, { "epoch": 1.930675327584276, "grad_norm": 1.5656461715698242, "learning_rate": 9.619394736842106e-05, "loss": 0.4832, "step": 34478 }, { "epoch": 1.930731324896405, "grad_norm": 1.625942349433899, "learning_rate": 9.619368421052632e-05, "loss": 0.4119, "step": 34479 }, { "epoch": 1.930787322208534, "grad_norm": 1.3033757209777832, "learning_rate": 9.619342105263158e-05, "loss": 0.689, "step": 34480 }, { "epoch": 1.9308433195206631, "grad_norm": 1.2183730602264404, "learning_rate": 9.619315789473686e-05, "loss": 0.4074, "step": 34481 }, { "epoch": 1.9308993168327921, "grad_norm": 1.1757538318634033, "learning_rate": 9.619289473684211e-05, "loss": 0.3777, "step": 34482 }, { "epoch": 1.9309553141449212, "grad_norm": 2.666792392730713, "learning_rate": 9.619263157894736e-05, "loss": 0.467, "step": 34483 }, { "epoch": 1.9310113114570502, "grad_norm": 1.1633663177490234, "learning_rate": 9.619236842105263e-05, "loss": 0.4984, "step": 34484 }, { "epoch": 1.9310673087691792, "grad_norm": 1.1002119779586792, "learning_rate": 9.61921052631579e-05, "loss": 0.4097, "step": 34485 }, { "epoch": 1.9311233060813082, "grad_norm": 1.0700511932373047, "learning_rate": 9.619184210526317e-05, "loss": 0.403, "step": 34486 }, { "epoch": 1.9311793033934372, "grad_norm": 1.2530592679977417, "learning_rate": 9.619157894736843e-05, "loss": 0.5182, "step": 34487 }, { "epoch": 1.9312353007055663, "grad_norm": 1.1896612644195557, "learning_rate": 9.619131578947369e-05, "loss": 0.4402, "step": 34488 }, { "epoch": 1.9312912980176953, "grad_norm": 1.3995798826217651, "learning_rate": 9.619105263157895e-05, "loss": 0.4661, "step": 34489 }, { "epoch": 1.9313472953298243, "grad_norm": 1.2056599855422974, "learning_rate": 9.619078947368422e-05, "loss": 0.3773, "step": 34490 }, { "epoch": 1.9314032926419533, "grad_norm": 1.3707834482192993, "learning_rate": 9.619052631578948e-05, "loss": 0.4755, "step": 34491 }, { "epoch": 1.9314592899540823, "grad_norm": 1.335739254951477, "learning_rate": 9.619026315789474e-05, "loss": 0.4215, "step": 34492 }, { "epoch": 1.9315152872662114, "grad_norm": 1.5348047018051147, "learning_rate": 9.619e-05, "loss": 0.4623, "step": 34493 }, { "epoch": 1.9315712845783404, "grad_norm": 1.5599212646484375, "learning_rate": 9.618973684210527e-05, "loss": 0.4511, "step": 34494 }, { "epoch": 1.9316272818904694, "grad_norm": 1.3879213333129883, "learning_rate": 9.618947368421053e-05, "loss": 0.3944, "step": 34495 }, { "epoch": 1.9316832792025984, "grad_norm": 1.3457173109054565, "learning_rate": 9.618921052631579e-05, "loss": 0.4529, "step": 34496 }, { "epoch": 1.9317392765147274, "grad_norm": 1.2318543195724487, "learning_rate": 9.618894736842105e-05, "loss": 0.5488, "step": 34497 }, { "epoch": 1.9317952738268565, "grad_norm": 1.1020747423171997, "learning_rate": 9.618868421052632e-05, "loss": 0.3901, "step": 34498 }, { "epoch": 1.9318512711389855, "grad_norm": 5.155155658721924, "learning_rate": 9.618842105263158e-05, "loss": 0.5578, "step": 34499 }, { "epoch": 1.9319072684511145, "grad_norm": 1.4636818170547485, "learning_rate": 9.618815789473686e-05, "loss": 0.3723, "step": 34500 }, { "epoch": 1.9319632657632435, "grad_norm": 1.1043155193328857, "learning_rate": 9.61878947368421e-05, "loss": 0.5302, "step": 34501 }, { "epoch": 1.9320192630753725, "grad_norm": 1.0371291637420654, "learning_rate": 9.618763157894736e-05, "loss": 0.3842, "step": 34502 }, { "epoch": 1.9320752603875015, "grad_norm": 1.4405776262283325, "learning_rate": 9.618736842105264e-05, "loss": 0.4688, "step": 34503 }, { "epoch": 1.9321312576996306, "grad_norm": 1.1060543060302734, "learning_rate": 9.61871052631579e-05, "loss": 0.3822, "step": 34504 }, { "epoch": 1.9321872550117596, "grad_norm": 1.216041922569275, "learning_rate": 9.618684210526317e-05, "loss": 0.415, "step": 34505 }, { "epoch": 1.9322432523238886, "grad_norm": 1.065531611442566, "learning_rate": 9.618657894736842e-05, "loss": 0.4025, "step": 34506 }, { "epoch": 1.9322992496360176, "grad_norm": 1.2006736993789673, "learning_rate": 9.618631578947369e-05, "loss": 0.4577, "step": 34507 }, { "epoch": 1.9323552469481466, "grad_norm": 1.2080492973327637, "learning_rate": 9.618605263157895e-05, "loss": 0.4196, "step": 34508 }, { "epoch": 1.9324112442602757, "grad_norm": 1.1670516729354858, "learning_rate": 9.618578947368422e-05, "loss": 0.4129, "step": 34509 }, { "epoch": 1.9324672415724047, "grad_norm": 1.0263789892196655, "learning_rate": 9.618552631578948e-05, "loss": 0.3277, "step": 34510 }, { "epoch": 1.9325232388845337, "grad_norm": 1.4198226928710938, "learning_rate": 9.618526315789474e-05, "loss": 0.3526, "step": 34511 }, { "epoch": 1.9325792361966627, "grad_norm": 1.7080702781677246, "learning_rate": 9.6185e-05, "loss": 0.457, "step": 34512 }, { "epoch": 1.9326352335087917, "grad_norm": 1.5729179382324219, "learning_rate": 9.618473684210527e-05, "loss": 0.4614, "step": 34513 }, { "epoch": 1.9326912308209208, "grad_norm": 1.2717292308807373, "learning_rate": 9.618447368421053e-05, "loss": 0.3934, "step": 34514 }, { "epoch": 1.9327472281330498, "grad_norm": 2.0297207832336426, "learning_rate": 9.61842105263158e-05, "loss": 0.4119, "step": 34515 }, { "epoch": 1.9328032254451788, "grad_norm": 1.2161298990249634, "learning_rate": 9.618394736842105e-05, "loss": 0.4797, "step": 34516 }, { "epoch": 1.9328592227573078, "grad_norm": 1.2220958471298218, "learning_rate": 9.618368421052633e-05, "loss": 0.4447, "step": 34517 }, { "epoch": 1.9329152200694366, "grad_norm": 1.3285832405090332, "learning_rate": 9.618342105263159e-05, "loss": 0.4445, "step": 34518 }, { "epoch": 1.9329712173815656, "grad_norm": 1.2737245559692383, "learning_rate": 9.618315789473685e-05, "loss": 0.4862, "step": 34519 }, { "epoch": 1.9330272146936947, "grad_norm": 1.5205929279327393, "learning_rate": 9.61828947368421e-05, "loss": 0.6421, "step": 34520 }, { "epoch": 1.9330832120058237, "grad_norm": 1.1003897190093994, "learning_rate": 9.618263157894737e-05, "loss": 0.4398, "step": 34521 }, { "epoch": 1.9331392093179527, "grad_norm": 1.3158518075942993, "learning_rate": 9.618236842105264e-05, "loss": 0.3468, "step": 34522 }, { "epoch": 1.9331952066300817, "grad_norm": 1.0968856811523438, "learning_rate": 9.61821052631579e-05, "loss": 0.5677, "step": 34523 }, { "epoch": 1.9332512039422107, "grad_norm": 1.324910044670105, "learning_rate": 9.618184210526316e-05, "loss": 0.4995, "step": 34524 }, { "epoch": 1.9333072012543397, "grad_norm": 1.264108419418335, "learning_rate": 9.618157894736842e-05, "loss": 0.4034, "step": 34525 }, { "epoch": 1.9333631985664688, "grad_norm": 1.2521198987960815, "learning_rate": 9.618131578947369e-05, "loss": 0.4265, "step": 34526 }, { "epoch": 1.9334191958785978, "grad_norm": 1.1728318929672241, "learning_rate": 9.618105263157895e-05, "loss": 0.4831, "step": 34527 }, { "epoch": 1.9334751931907268, "grad_norm": 1.2251523733139038, "learning_rate": 9.618078947368422e-05, "loss": 0.3995, "step": 34528 }, { "epoch": 1.9335311905028558, "grad_norm": 1.1888328790664673, "learning_rate": 9.618052631578947e-05, "loss": 0.6765, "step": 34529 }, { "epoch": 1.9335871878149848, "grad_norm": 1.165860891342163, "learning_rate": 9.618026315789474e-05, "loss": 0.3834, "step": 34530 }, { "epoch": 1.9336431851271139, "grad_norm": 1.2785743474960327, "learning_rate": 9.618e-05, "loss": 0.4218, "step": 34531 }, { "epoch": 1.9336991824392429, "grad_norm": 1.2545628547668457, "learning_rate": 9.617973684210528e-05, "loss": 0.5435, "step": 34532 }, { "epoch": 1.933755179751372, "grad_norm": 1.1461656093597412, "learning_rate": 9.617947368421054e-05, "loss": 0.3866, "step": 34533 }, { "epoch": 1.933811177063501, "grad_norm": 1.384101390838623, "learning_rate": 9.61792105263158e-05, "loss": 0.4265, "step": 34534 }, { "epoch": 1.93386717437563, "grad_norm": 1.1068389415740967, "learning_rate": 9.617894736842106e-05, "loss": 0.4518, "step": 34535 }, { "epoch": 1.933923171687759, "grad_norm": 1.266049861907959, "learning_rate": 9.617868421052632e-05, "loss": 0.5008, "step": 34536 }, { "epoch": 1.933979168999888, "grad_norm": 1.5910966396331787, "learning_rate": 9.617842105263159e-05, "loss": 0.5164, "step": 34537 }, { "epoch": 1.934035166312017, "grad_norm": 1.6353645324707031, "learning_rate": 9.617815789473684e-05, "loss": 0.4413, "step": 34538 }, { "epoch": 1.934091163624146, "grad_norm": 1.3025604486465454, "learning_rate": 9.617789473684211e-05, "loss": 0.4211, "step": 34539 }, { "epoch": 1.934147160936275, "grad_norm": 1.4288135766983032, "learning_rate": 9.617763157894737e-05, "loss": 0.6824, "step": 34540 }, { "epoch": 1.934203158248404, "grad_norm": 1.650879144668579, "learning_rate": 9.617736842105264e-05, "loss": 0.4972, "step": 34541 }, { "epoch": 1.934259155560533, "grad_norm": 1.1612884998321533, "learning_rate": 9.61771052631579e-05, "loss": 0.4288, "step": 34542 }, { "epoch": 1.934315152872662, "grad_norm": 1.3062304258346558, "learning_rate": 9.617684210526316e-05, "loss": 0.4666, "step": 34543 }, { "epoch": 1.934371150184791, "grad_norm": 1.217829704284668, "learning_rate": 9.617657894736842e-05, "loss": 0.4608, "step": 34544 }, { "epoch": 1.9344271474969201, "grad_norm": 1.2446705102920532, "learning_rate": 9.61763157894737e-05, "loss": 0.4185, "step": 34545 }, { "epoch": 1.9344831448090491, "grad_norm": 1.3282055854797363, "learning_rate": 9.617605263157895e-05, "loss": 0.4442, "step": 34546 }, { "epoch": 1.9345391421211782, "grad_norm": 1.196389079093933, "learning_rate": 9.617578947368421e-05, "loss": 0.3877, "step": 34547 }, { "epoch": 1.9345951394333072, "grad_norm": 1.754717469215393, "learning_rate": 9.617552631578947e-05, "loss": 0.4662, "step": 34548 }, { "epoch": 1.9346511367454362, "grad_norm": 1.138705849647522, "learning_rate": 9.617526315789475e-05, "loss": 0.4915, "step": 34549 }, { "epoch": 1.9347071340575652, "grad_norm": 1.3363920450210571, "learning_rate": 9.6175e-05, "loss": 0.4322, "step": 34550 }, { "epoch": 1.9347631313696942, "grad_norm": 1.40372896194458, "learning_rate": 9.617473684210527e-05, "loss": 0.3927, "step": 34551 }, { "epoch": 1.9348191286818233, "grad_norm": 1.1117277145385742, "learning_rate": 9.617447368421053e-05, "loss": 0.4147, "step": 34552 }, { "epoch": 1.9348751259939523, "grad_norm": 1.5525130033493042, "learning_rate": 9.617421052631579e-05, "loss": 0.4016, "step": 34553 }, { "epoch": 1.9349311233060813, "grad_norm": 1.1578190326690674, "learning_rate": 9.617394736842106e-05, "loss": 0.4673, "step": 34554 }, { "epoch": 1.9349871206182103, "grad_norm": 1.208340048789978, "learning_rate": 9.617368421052632e-05, "loss": 0.4391, "step": 34555 }, { "epoch": 1.9350431179303393, "grad_norm": 1.1800040006637573, "learning_rate": 9.617342105263158e-05, "loss": 0.4267, "step": 34556 }, { "epoch": 1.9350991152424684, "grad_norm": 1.0394799709320068, "learning_rate": 9.617315789473684e-05, "loss": 0.4812, "step": 34557 }, { "epoch": 1.9351551125545974, "grad_norm": 1.281531572341919, "learning_rate": 9.617289473684211e-05, "loss": 0.4384, "step": 34558 }, { "epoch": 1.9352111098667264, "grad_norm": 1.0990893840789795, "learning_rate": 9.617263157894737e-05, "loss": 0.4131, "step": 34559 }, { "epoch": 1.9352671071788554, "grad_norm": 1.271069884300232, "learning_rate": 9.617236842105264e-05, "loss": 0.4101, "step": 34560 }, { "epoch": 1.9353231044909844, "grad_norm": 1.4581999778747559, "learning_rate": 9.617210526315789e-05, "loss": 0.5431, "step": 34561 }, { "epoch": 1.9353791018031135, "grad_norm": 1.7357470989227295, "learning_rate": 9.617184210526316e-05, "loss": 0.6159, "step": 34562 }, { "epoch": 1.9354350991152425, "grad_norm": 1.191070318222046, "learning_rate": 9.617157894736842e-05, "loss": 0.3845, "step": 34563 }, { "epoch": 1.9354910964273715, "grad_norm": 1.4071005582809448, "learning_rate": 9.61713157894737e-05, "loss": 0.6476, "step": 34564 }, { "epoch": 1.9355470937395005, "grad_norm": 1.4647846221923828, "learning_rate": 9.617105263157896e-05, "loss": 0.4518, "step": 34565 }, { "epoch": 1.9356030910516295, "grad_norm": 1.1088801622390747, "learning_rate": 9.617078947368422e-05, "loss": 0.4062, "step": 34566 }, { "epoch": 1.9356590883637586, "grad_norm": 1.1932700872421265, "learning_rate": 9.617052631578948e-05, "loss": 0.4368, "step": 34567 }, { "epoch": 1.9357150856758876, "grad_norm": 1.3187135457992554, "learning_rate": 9.617026315789475e-05, "loss": 0.5061, "step": 34568 }, { "epoch": 1.9357710829880166, "grad_norm": 1.1500115394592285, "learning_rate": 9.617000000000001e-05, "loss": 0.4545, "step": 34569 }, { "epoch": 1.9358270803001456, "grad_norm": 1.350380778312683, "learning_rate": 9.616973684210527e-05, "loss": 0.4808, "step": 34570 }, { "epoch": 1.9358830776122746, "grad_norm": 1.2474403381347656, "learning_rate": 9.616947368421053e-05, "loss": 0.4078, "step": 34571 }, { "epoch": 1.9359390749244036, "grad_norm": 1.3160289525985718, "learning_rate": 9.616921052631579e-05, "loss": 0.552, "step": 34572 }, { "epoch": 1.9359950722365327, "grad_norm": 1.6678022146224976, "learning_rate": 9.616894736842106e-05, "loss": 0.4072, "step": 34573 }, { "epoch": 1.9360510695486617, "grad_norm": 1.8979817628860474, "learning_rate": 9.616868421052632e-05, "loss": 0.4502, "step": 34574 }, { "epoch": 1.9361070668607907, "grad_norm": 1.8159265518188477, "learning_rate": 9.616842105263158e-05, "loss": 0.4216, "step": 34575 }, { "epoch": 1.9361630641729197, "grad_norm": 1.3641353845596313, "learning_rate": 9.616815789473684e-05, "loss": 0.2937, "step": 34576 }, { "epoch": 1.9362190614850487, "grad_norm": 1.1954460144042969, "learning_rate": 9.616789473684211e-05, "loss": 0.4449, "step": 34577 }, { "epoch": 1.9362750587971778, "grad_norm": 1.7169078588485718, "learning_rate": 9.616763157894737e-05, "loss": 0.6248, "step": 34578 }, { "epoch": 1.9363310561093068, "grad_norm": 1.1840386390686035, "learning_rate": 9.616736842105263e-05, "loss": 0.5037, "step": 34579 }, { "epoch": 1.9363870534214358, "grad_norm": 1.3257672786712646, "learning_rate": 9.61671052631579e-05, "loss": 0.4713, "step": 34580 }, { "epoch": 1.9364430507335648, "grad_norm": 1.4435157775878906, "learning_rate": 9.616684210526317e-05, "loss": 0.4189, "step": 34581 }, { "epoch": 1.9364990480456938, "grad_norm": 1.2479151487350464, "learning_rate": 9.616657894736843e-05, "loss": 0.4395, "step": 34582 }, { "epoch": 1.9365550453578229, "grad_norm": 1.1391650438308716, "learning_rate": 9.61663157894737e-05, "loss": 0.359, "step": 34583 }, { "epoch": 1.9366110426699519, "grad_norm": 1.1199320554733276, "learning_rate": 9.616605263157895e-05, "loss": 0.4245, "step": 34584 }, { "epoch": 1.936667039982081, "grad_norm": 1.3649721145629883, "learning_rate": 9.616578947368422e-05, "loss": 0.4402, "step": 34585 }, { "epoch": 1.93672303729421, "grad_norm": 1.2751247882843018, "learning_rate": 9.616552631578948e-05, "loss": 0.4888, "step": 34586 }, { "epoch": 1.936779034606339, "grad_norm": 1.2136073112487793, "learning_rate": 9.616526315789474e-05, "loss": 0.4776, "step": 34587 }, { "epoch": 1.936835031918468, "grad_norm": 1.2153640985488892, "learning_rate": 9.616500000000001e-05, "loss": 0.5074, "step": 34588 }, { "epoch": 1.936891029230597, "grad_norm": 1.2673554420471191, "learning_rate": 9.616473684210526e-05, "loss": 0.4076, "step": 34589 }, { "epoch": 1.936947026542726, "grad_norm": 1.2590012550354004, "learning_rate": 9.616447368421053e-05, "loss": 0.4068, "step": 34590 }, { "epoch": 1.937003023854855, "grad_norm": 1.1679997444152832, "learning_rate": 9.616421052631579e-05, "loss": 0.5273, "step": 34591 }, { "epoch": 1.937059021166984, "grad_norm": 1.422092318534851, "learning_rate": 9.616394736842106e-05, "loss": 0.413, "step": 34592 }, { "epoch": 1.937115018479113, "grad_norm": 1.30519700050354, "learning_rate": 9.616368421052631e-05, "loss": 0.458, "step": 34593 }, { "epoch": 1.937171015791242, "grad_norm": 1.289374828338623, "learning_rate": 9.616342105263158e-05, "loss": 0.3867, "step": 34594 }, { "epoch": 1.937227013103371, "grad_norm": 1.5045796632766724, "learning_rate": 9.616315789473684e-05, "loss": 0.5411, "step": 34595 }, { "epoch": 1.9372830104155, "grad_norm": 1.4894495010375977, "learning_rate": 9.616289473684212e-05, "loss": 0.4823, "step": 34596 }, { "epoch": 1.9373390077276291, "grad_norm": 1.1748589277267456, "learning_rate": 9.616263157894738e-05, "loss": 0.4262, "step": 34597 }, { "epoch": 1.9373950050397581, "grad_norm": 1.3518860340118408, "learning_rate": 9.616236842105264e-05, "loss": 0.4314, "step": 34598 }, { "epoch": 1.9374510023518872, "grad_norm": 1.6337566375732422, "learning_rate": 9.61621052631579e-05, "loss": 0.4184, "step": 34599 }, { "epoch": 1.937506999664016, "grad_norm": 1.634097933769226, "learning_rate": 9.616184210526317e-05, "loss": 0.3884, "step": 34600 }, { "epoch": 1.937562996976145, "grad_norm": 1.6530539989471436, "learning_rate": 9.616157894736843e-05, "loss": 0.4732, "step": 34601 }, { "epoch": 1.937618994288274, "grad_norm": 1.3201847076416016, "learning_rate": 9.616131578947369e-05, "loss": 0.4004, "step": 34602 }, { "epoch": 1.937674991600403, "grad_norm": 1.0525574684143066, "learning_rate": 9.616105263157895e-05, "loss": 0.3802, "step": 34603 }, { "epoch": 1.937730988912532, "grad_norm": 1.343254804611206, "learning_rate": 9.616078947368421e-05, "loss": 0.4366, "step": 34604 }, { "epoch": 1.937786986224661, "grad_norm": 1.4546691179275513, "learning_rate": 9.616052631578948e-05, "loss": 0.5366, "step": 34605 }, { "epoch": 1.93784298353679, "grad_norm": 1.1656324863433838, "learning_rate": 9.616026315789474e-05, "loss": 0.488, "step": 34606 }, { "epoch": 1.937898980848919, "grad_norm": 1.2189165353775024, "learning_rate": 9.616e-05, "loss": 0.4051, "step": 34607 }, { "epoch": 1.9379549781610481, "grad_norm": 1.1972731351852417, "learning_rate": 9.615973684210526e-05, "loss": 0.5443, "step": 34608 }, { "epoch": 1.9380109754731771, "grad_norm": 1.43869149684906, "learning_rate": 9.615947368421053e-05, "loss": 0.4481, "step": 34609 }, { "epoch": 1.9380669727853062, "grad_norm": 1.5553572177886963, "learning_rate": 9.61592105263158e-05, "loss": 0.5815, "step": 34610 }, { "epoch": 1.9381229700974352, "grad_norm": 1.141389012336731, "learning_rate": 9.615894736842105e-05, "loss": 0.4766, "step": 34611 }, { "epoch": 1.9381789674095642, "grad_norm": 1.2720290422439575, "learning_rate": 9.615868421052631e-05, "loss": 0.48, "step": 34612 }, { "epoch": 1.9382349647216932, "grad_norm": 1.0373291969299316, "learning_rate": 9.615842105263159e-05, "loss": 0.4641, "step": 34613 }, { "epoch": 1.9382909620338222, "grad_norm": 1.37346351146698, "learning_rate": 9.615815789473685e-05, "loss": 0.498, "step": 34614 }, { "epoch": 1.9383469593459512, "grad_norm": 1.2834287881851196, "learning_rate": 9.615789473684212e-05, "loss": 0.4999, "step": 34615 }, { "epoch": 1.9384029566580803, "grad_norm": 1.1452680826187134, "learning_rate": 9.615763157894737e-05, "loss": 0.4217, "step": 34616 }, { "epoch": 1.9384589539702093, "grad_norm": 1.2596207857131958, "learning_rate": 9.615736842105264e-05, "loss": 0.5259, "step": 34617 }, { "epoch": 1.9385149512823383, "grad_norm": 1.3268545866012573, "learning_rate": 9.61571052631579e-05, "loss": 0.3686, "step": 34618 }, { "epoch": 1.9385709485944673, "grad_norm": 1.4154986143112183, "learning_rate": 9.615684210526317e-05, "loss": 0.4974, "step": 34619 }, { "epoch": 1.9386269459065963, "grad_norm": 0.9902254939079285, "learning_rate": 9.615657894736843e-05, "loss": 0.3613, "step": 34620 }, { "epoch": 1.9386829432187254, "grad_norm": 1.471644401550293, "learning_rate": 9.615631578947369e-05, "loss": 0.4858, "step": 34621 }, { "epoch": 1.9387389405308544, "grad_norm": 1.3542932271957397, "learning_rate": 9.615605263157895e-05, "loss": 0.4886, "step": 34622 }, { "epoch": 1.9387949378429834, "grad_norm": 1.045552134513855, "learning_rate": 9.615578947368421e-05, "loss": 0.3742, "step": 34623 }, { "epoch": 1.9388509351551124, "grad_norm": 1.6668481826782227, "learning_rate": 9.615552631578948e-05, "loss": 0.5614, "step": 34624 }, { "epoch": 1.9389069324672414, "grad_norm": 1.5938810110092163, "learning_rate": 9.615526315789474e-05, "loss": 0.3317, "step": 34625 }, { "epoch": 1.9389629297793705, "grad_norm": 1.4155513048171997, "learning_rate": 9.6155e-05, "loss": 0.4982, "step": 34626 }, { "epoch": 1.9390189270914995, "grad_norm": 1.1545783281326294, "learning_rate": 9.615473684210526e-05, "loss": 0.4106, "step": 34627 }, { "epoch": 1.9390749244036285, "grad_norm": 1.1907387971878052, "learning_rate": 9.615447368421054e-05, "loss": 0.3877, "step": 34628 }, { "epoch": 1.9391309217157575, "grad_norm": 1.9938383102416992, "learning_rate": 9.61542105263158e-05, "loss": 0.4124, "step": 34629 }, { "epoch": 1.9391869190278865, "grad_norm": 1.1957322359085083, "learning_rate": 9.615394736842106e-05, "loss": 0.5507, "step": 34630 }, { "epoch": 1.9392429163400156, "grad_norm": 1.529099941253662, "learning_rate": 9.615368421052632e-05, "loss": 0.3914, "step": 34631 }, { "epoch": 1.9392989136521446, "grad_norm": 1.3665045499801636, "learning_rate": 9.615342105263159e-05, "loss": 0.5274, "step": 34632 }, { "epoch": 1.9393549109642736, "grad_norm": 1.3016235828399658, "learning_rate": 9.615315789473685e-05, "loss": 0.5063, "step": 34633 }, { "epoch": 1.9394109082764026, "grad_norm": 1.4059265851974487, "learning_rate": 9.615289473684211e-05, "loss": 0.4747, "step": 34634 }, { "epoch": 1.9394669055885316, "grad_norm": 1.2913436889648438, "learning_rate": 9.615263157894737e-05, "loss": 0.4082, "step": 34635 }, { "epoch": 1.9395229029006607, "grad_norm": 1.2261064052581787, "learning_rate": 9.615236842105264e-05, "loss": 0.4238, "step": 34636 }, { "epoch": 1.9395789002127897, "grad_norm": 1.096225619316101, "learning_rate": 9.61521052631579e-05, "loss": 0.4179, "step": 34637 }, { "epoch": 1.9396348975249187, "grad_norm": 1.0602062940597534, "learning_rate": 9.615184210526317e-05, "loss": 0.4675, "step": 34638 }, { "epoch": 1.9396908948370477, "grad_norm": 1.3400038480758667, "learning_rate": 9.615157894736842e-05, "loss": 0.3999, "step": 34639 }, { "epoch": 1.9397468921491767, "grad_norm": 1.1630491018295288, "learning_rate": 9.615131578947368e-05, "loss": 0.3586, "step": 34640 }, { "epoch": 1.9398028894613057, "grad_norm": 1.1611605882644653, "learning_rate": 9.615105263157895e-05, "loss": 0.54, "step": 34641 }, { "epoch": 1.9398588867734348, "grad_norm": 1.250673770904541, "learning_rate": 9.615078947368421e-05, "loss": 0.4018, "step": 34642 }, { "epoch": 1.9399148840855638, "grad_norm": 1.2914491891860962, "learning_rate": 9.615052631578947e-05, "loss": 0.566, "step": 34643 }, { "epoch": 1.9399708813976928, "grad_norm": 1.1089885234832764, "learning_rate": 9.615026315789473e-05, "loss": 0.5059, "step": 34644 }, { "epoch": 1.9400268787098218, "grad_norm": 1.4148918390274048, "learning_rate": 9.615e-05, "loss": 0.3945, "step": 34645 }, { "epoch": 1.9400828760219508, "grad_norm": 1.3594365119934082, "learning_rate": 9.614973684210527e-05, "loss": 0.4928, "step": 34646 }, { "epoch": 1.9401388733340799, "grad_norm": 1.026719331741333, "learning_rate": 9.614947368421054e-05, "loss": 0.2757, "step": 34647 }, { "epoch": 1.9401948706462089, "grad_norm": 1.564383625984192, "learning_rate": 9.614921052631579e-05, "loss": 0.6749, "step": 34648 }, { "epoch": 1.940250867958338, "grad_norm": 1.378625750541687, "learning_rate": 9.614894736842106e-05, "loss": 0.333, "step": 34649 }, { "epoch": 1.940306865270467, "grad_norm": 1.2058113813400269, "learning_rate": 9.614868421052632e-05, "loss": 0.4204, "step": 34650 }, { "epoch": 1.940362862582596, "grad_norm": 1.3051544427871704, "learning_rate": 9.614842105263159e-05, "loss": 0.3762, "step": 34651 }, { "epoch": 1.940418859894725, "grad_norm": 1.471980094909668, "learning_rate": 9.614815789473685e-05, "loss": 0.3494, "step": 34652 }, { "epoch": 1.940474857206854, "grad_norm": 1.1293929815292358, "learning_rate": 9.614789473684211e-05, "loss": 0.4105, "step": 34653 }, { "epoch": 1.940530854518983, "grad_norm": 1.0405559539794922, "learning_rate": 9.614763157894737e-05, "loss": 0.4182, "step": 34654 }, { "epoch": 1.940586851831112, "grad_norm": 1.2232109308242798, "learning_rate": 9.614736842105264e-05, "loss": 0.3722, "step": 34655 }, { "epoch": 1.940642849143241, "grad_norm": 1.5417680740356445, "learning_rate": 9.61471052631579e-05, "loss": 0.4721, "step": 34656 }, { "epoch": 1.94069884645537, "grad_norm": 1.6847895383834839, "learning_rate": 9.614684210526316e-05, "loss": 0.4847, "step": 34657 }, { "epoch": 1.940754843767499, "grad_norm": 1.2873189449310303, "learning_rate": 9.614657894736842e-05, "loss": 0.4079, "step": 34658 }, { "epoch": 1.940810841079628, "grad_norm": 1.2061678171157837, "learning_rate": 9.614631578947368e-05, "loss": 0.5552, "step": 34659 }, { "epoch": 1.940866838391757, "grad_norm": 31.84278106689453, "learning_rate": 9.614605263157896e-05, "loss": 0.354, "step": 34660 }, { "epoch": 1.9409228357038861, "grad_norm": 1.3271760940551758, "learning_rate": 9.614578947368422e-05, "loss": 0.4848, "step": 34661 }, { "epoch": 1.9409788330160151, "grad_norm": 1.1252739429473877, "learning_rate": 9.614552631578948e-05, "loss": 0.4151, "step": 34662 }, { "epoch": 1.9410348303281442, "grad_norm": 1.287384271621704, "learning_rate": 9.614526315789474e-05, "loss": 0.5251, "step": 34663 }, { "epoch": 1.9410908276402732, "grad_norm": 1.4087750911712646, "learning_rate": 9.614500000000001e-05, "loss": 0.5181, "step": 34664 }, { "epoch": 1.9411468249524022, "grad_norm": 1.1600946187973022, "learning_rate": 9.614473684210527e-05, "loss": 0.3616, "step": 34665 }, { "epoch": 1.9412028222645312, "grad_norm": 1.1475861072540283, "learning_rate": 9.614447368421053e-05, "loss": 0.5085, "step": 34666 }, { "epoch": 1.9412588195766602, "grad_norm": 1.1850228309631348, "learning_rate": 9.614421052631579e-05, "loss": 0.4361, "step": 34667 }, { "epoch": 1.9413148168887893, "grad_norm": 1.1274324655532837, "learning_rate": 9.614394736842106e-05, "loss": 0.3552, "step": 34668 }, { "epoch": 1.9413708142009183, "grad_norm": 1.6500643491744995, "learning_rate": 9.614368421052632e-05, "loss": 0.4626, "step": 34669 }, { "epoch": 1.9414268115130473, "grad_norm": 1.5146517753601074, "learning_rate": 9.61434210526316e-05, "loss": 0.6171, "step": 34670 }, { "epoch": 1.9414828088251763, "grad_norm": 1.4760044813156128, "learning_rate": 9.614315789473684e-05, "loss": 0.5422, "step": 34671 }, { "epoch": 1.9415388061373053, "grad_norm": 1.1378986835479736, "learning_rate": 9.614289473684211e-05, "loss": 0.4081, "step": 34672 }, { "epoch": 1.9415948034494344, "grad_norm": 1.2853001356124878, "learning_rate": 9.614263157894737e-05, "loss": 0.5046, "step": 34673 }, { "epoch": 1.9416508007615634, "grad_norm": 1.8204479217529297, "learning_rate": 9.614236842105263e-05, "loss": 0.5259, "step": 34674 }, { "epoch": 1.9417067980736924, "grad_norm": 1.2837580442428589, "learning_rate": 9.61421052631579e-05, "loss": 0.4325, "step": 34675 }, { "epoch": 1.9417627953858214, "grad_norm": 1.2756407260894775, "learning_rate": 9.614184210526315e-05, "loss": 0.4198, "step": 34676 }, { "epoch": 1.9418187926979504, "grad_norm": 1.205938696861267, "learning_rate": 9.614157894736843e-05, "loss": 0.4451, "step": 34677 }, { "epoch": 1.9418747900100795, "grad_norm": 1.0049076080322266, "learning_rate": 9.614131578947369e-05, "loss": 0.401, "step": 34678 }, { "epoch": 1.9419307873222085, "grad_norm": 1.1169884204864502, "learning_rate": 9.614105263157896e-05, "loss": 0.3919, "step": 34679 }, { "epoch": 1.9419867846343375, "grad_norm": 1.808873176574707, "learning_rate": 9.614078947368422e-05, "loss": 0.3812, "step": 34680 }, { "epoch": 1.9420427819464665, "grad_norm": 1.1550613641738892, "learning_rate": 9.614052631578948e-05, "loss": 0.3846, "step": 34681 }, { "epoch": 1.9420987792585955, "grad_norm": 1.4847160577774048, "learning_rate": 9.614026315789474e-05, "loss": 0.4704, "step": 34682 }, { "epoch": 1.9421547765707246, "grad_norm": 1.2507047653198242, "learning_rate": 9.614000000000001e-05, "loss": 0.4187, "step": 34683 }, { "epoch": 1.9422107738828536, "grad_norm": 1.1527881622314453, "learning_rate": 9.613973684210527e-05, "loss": 0.4345, "step": 34684 }, { "epoch": 1.9422667711949826, "grad_norm": 1.1177562475204468, "learning_rate": 9.613947368421053e-05, "loss": 0.4265, "step": 34685 }, { "epoch": 1.9423227685071116, "grad_norm": 1.0668704509735107, "learning_rate": 9.613921052631579e-05, "loss": 0.4395, "step": 34686 }, { "epoch": 1.9423787658192406, "grad_norm": 1.3068642616271973, "learning_rate": 9.613894736842106e-05, "loss": 0.3916, "step": 34687 }, { "epoch": 1.9424347631313696, "grad_norm": 1.35270094871521, "learning_rate": 9.613868421052632e-05, "loss": 0.3953, "step": 34688 }, { "epoch": 1.9424907604434987, "grad_norm": 1.1668734550476074, "learning_rate": 9.613842105263158e-05, "loss": 0.4772, "step": 34689 }, { "epoch": 1.9425467577556277, "grad_norm": 1.3212227821350098, "learning_rate": 9.613815789473684e-05, "loss": 0.4353, "step": 34690 }, { "epoch": 1.9426027550677567, "grad_norm": 1.2311409711837769, "learning_rate": 9.61378947368421e-05, "loss": 0.4164, "step": 34691 }, { "epoch": 1.9426587523798857, "grad_norm": 0.9628854393959045, "learning_rate": 9.613763157894738e-05, "loss": 0.3499, "step": 34692 }, { "epoch": 1.9427147496920147, "grad_norm": 1.4028321504592896, "learning_rate": 9.613736842105264e-05, "loss": 0.3137, "step": 34693 }, { "epoch": 1.9427707470041438, "grad_norm": 1.1342920064926147, "learning_rate": 9.61371052631579e-05, "loss": 0.363, "step": 34694 }, { "epoch": 1.9428267443162728, "grad_norm": 1.2898186445236206, "learning_rate": 9.613684210526316e-05, "loss": 0.4357, "step": 34695 }, { "epoch": 1.9428827416284018, "grad_norm": 1.3033515214920044, "learning_rate": 9.613657894736843e-05, "loss": 0.3959, "step": 34696 }, { "epoch": 1.9429387389405308, "grad_norm": 1.0831857919692993, "learning_rate": 9.613631578947369e-05, "loss": 0.5126, "step": 34697 }, { "epoch": 1.9429947362526598, "grad_norm": 1.411524772644043, "learning_rate": 9.613605263157895e-05, "loss": 0.4417, "step": 34698 }, { "epoch": 1.9430507335647889, "grad_norm": 1.2662649154663086, "learning_rate": 9.613578947368421e-05, "loss": 0.46, "step": 34699 }, { "epoch": 1.9431067308769179, "grad_norm": 0.9521045088768005, "learning_rate": 9.613552631578948e-05, "loss": 0.2692, "step": 34700 }, { "epoch": 1.943162728189047, "grad_norm": 1.4040699005126953, "learning_rate": 9.613526315789474e-05, "loss": 0.4689, "step": 34701 }, { "epoch": 1.943218725501176, "grad_norm": 1.1717408895492554, "learning_rate": 9.613500000000001e-05, "loss": 0.3754, "step": 34702 }, { "epoch": 1.943274722813305, "grad_norm": 25.933670043945312, "learning_rate": 9.613473684210526e-05, "loss": 0.5469, "step": 34703 }, { "epoch": 1.943330720125434, "grad_norm": 1.6813251972198486, "learning_rate": 9.613447368421053e-05, "loss": 0.5512, "step": 34704 }, { "epoch": 1.943386717437563, "grad_norm": 1.6740896701812744, "learning_rate": 9.613421052631579e-05, "loss": 0.5387, "step": 34705 }, { "epoch": 1.943442714749692, "grad_norm": 1.402829885482788, "learning_rate": 9.613394736842107e-05, "loss": 0.4147, "step": 34706 }, { "epoch": 1.943498712061821, "grad_norm": 1.3453236818313599, "learning_rate": 9.613368421052633e-05, "loss": 0.7019, "step": 34707 }, { "epoch": 1.94355470937395, "grad_norm": 1.191650390625, "learning_rate": 9.613342105263157e-05, "loss": 0.4072, "step": 34708 }, { "epoch": 1.943610706686079, "grad_norm": 0.9482975006103516, "learning_rate": 9.613315789473685e-05, "loss": 0.2927, "step": 34709 }, { "epoch": 1.943666703998208, "grad_norm": 1.2613580226898193, "learning_rate": 9.61328947368421e-05, "loss": 0.4612, "step": 34710 }, { "epoch": 1.943722701310337, "grad_norm": 1.2545610666275024, "learning_rate": 9.613263157894738e-05, "loss": 0.5167, "step": 34711 }, { "epoch": 1.943778698622466, "grad_norm": 1.2578083276748657, "learning_rate": 9.613236842105264e-05, "loss": 0.4072, "step": 34712 }, { "epoch": 1.9438346959345951, "grad_norm": 1.382576584815979, "learning_rate": 9.61321052631579e-05, "loss": 0.516, "step": 34713 }, { "epoch": 1.9438906932467241, "grad_norm": 1.1403379440307617, "learning_rate": 9.613184210526316e-05, "loss": 0.3981, "step": 34714 }, { "epoch": 1.9439466905588532, "grad_norm": 1.2607324123382568, "learning_rate": 9.613157894736843e-05, "loss": 0.5121, "step": 34715 }, { "epoch": 1.9440026878709822, "grad_norm": 1.231096625328064, "learning_rate": 9.613131578947369e-05, "loss": 0.3824, "step": 34716 }, { "epoch": 1.9440586851831112, "grad_norm": 1.4585281610488892, "learning_rate": 9.613105263157895e-05, "loss": 0.575, "step": 34717 }, { "epoch": 1.9441146824952402, "grad_norm": 1.4718551635742188, "learning_rate": 9.613078947368421e-05, "loss": 0.5371, "step": 34718 }, { "epoch": 1.9441706798073692, "grad_norm": 1.2319722175598145, "learning_rate": 9.613052631578948e-05, "loss": 0.4254, "step": 34719 }, { "epoch": 1.9442266771194983, "grad_norm": 1.2203540802001953, "learning_rate": 9.613026315789474e-05, "loss": 0.4326, "step": 34720 }, { "epoch": 1.9442826744316273, "grad_norm": 1.7114763259887695, "learning_rate": 9.613e-05, "loss": 0.3861, "step": 34721 }, { "epoch": 1.9443386717437563, "grad_norm": 1.3191580772399902, "learning_rate": 9.612973684210526e-05, "loss": 0.4873, "step": 34722 }, { "epoch": 1.9443946690558853, "grad_norm": 1.222227931022644, "learning_rate": 9.612947368421054e-05, "loss": 0.5539, "step": 34723 }, { "epoch": 1.9444506663680143, "grad_norm": 1.337488055229187, "learning_rate": 9.61292105263158e-05, "loss": 0.4003, "step": 34724 }, { "epoch": 1.9445066636801434, "grad_norm": 1.2201447486877441, "learning_rate": 9.612894736842106e-05, "loss": 0.3754, "step": 34725 }, { "epoch": 1.9445626609922724, "grad_norm": 1.3758704662322998, "learning_rate": 9.612868421052632e-05, "loss": 0.4832, "step": 34726 }, { "epoch": 1.9446186583044014, "grad_norm": 1.0471324920654297, "learning_rate": 9.612842105263157e-05, "loss": 0.4696, "step": 34727 }, { "epoch": 1.9446746556165304, "grad_norm": 2.534958839416504, "learning_rate": 9.612815789473685e-05, "loss": 0.6174, "step": 34728 }, { "epoch": 1.9447306529286594, "grad_norm": 1.1390732526779175, "learning_rate": 9.612789473684211e-05, "loss": 0.4587, "step": 34729 }, { "epoch": 1.9447866502407885, "grad_norm": 1.5098438262939453, "learning_rate": 9.612763157894738e-05, "loss": 0.4986, "step": 34730 }, { "epoch": 1.9448426475529175, "grad_norm": 1.1739249229431152, "learning_rate": 9.612736842105263e-05, "loss": 0.5055, "step": 34731 }, { "epoch": 1.9448986448650465, "grad_norm": 1.2637979984283447, "learning_rate": 9.61271052631579e-05, "loss": 0.4017, "step": 34732 }, { "epoch": 1.9449546421771755, "grad_norm": 1.1154446601867676, "learning_rate": 9.612684210526316e-05, "loss": 0.4069, "step": 34733 }, { "epoch": 1.9450106394893045, "grad_norm": 1.1956051588058472, "learning_rate": 9.612657894736843e-05, "loss": 0.406, "step": 34734 }, { "epoch": 1.9450666368014335, "grad_norm": 1.6822385787963867, "learning_rate": 9.61263157894737e-05, "loss": 0.4457, "step": 34735 }, { "epoch": 1.9451226341135626, "grad_norm": 1.1216477155685425, "learning_rate": 9.612605263157895e-05, "loss": 0.4441, "step": 34736 }, { "epoch": 1.9451786314256916, "grad_norm": 1.3763424158096313, "learning_rate": 9.612578947368421e-05, "loss": 0.4056, "step": 34737 }, { "epoch": 1.9452346287378206, "grad_norm": 1.4477684497833252, "learning_rate": 9.612552631578949e-05, "loss": 0.5025, "step": 34738 }, { "epoch": 1.9452906260499496, "grad_norm": 1.247282862663269, "learning_rate": 9.612526315789475e-05, "loss": 0.4293, "step": 34739 }, { "epoch": 1.9453466233620786, "grad_norm": 1.4127693176269531, "learning_rate": 9.6125e-05, "loss": 0.4844, "step": 34740 }, { "epoch": 1.9454026206742077, "grad_norm": 1.2757805585861206, "learning_rate": 9.612473684210527e-05, "loss": 0.5124, "step": 34741 }, { "epoch": 1.9454586179863367, "grad_norm": 1.2004910707473755, "learning_rate": 9.612447368421054e-05, "loss": 0.4907, "step": 34742 }, { "epoch": 1.9455146152984657, "grad_norm": 3.766026735305786, "learning_rate": 9.61242105263158e-05, "loss": 0.4712, "step": 34743 }, { "epoch": 1.9455706126105947, "grad_norm": 1.2574459314346313, "learning_rate": 9.612394736842106e-05, "loss": 0.5574, "step": 34744 }, { "epoch": 1.9456266099227237, "grad_norm": 1.0154584646224976, "learning_rate": 9.612368421052632e-05, "loss": 0.3641, "step": 34745 }, { "epoch": 1.9456826072348528, "grad_norm": 1.2783890962600708, "learning_rate": 9.612342105263158e-05, "loss": 0.4259, "step": 34746 }, { "epoch": 1.9457386045469818, "grad_norm": 1.3876056671142578, "learning_rate": 9.612315789473685e-05, "loss": 0.4498, "step": 34747 }, { "epoch": 1.9457946018591108, "grad_norm": 1.3685499429702759, "learning_rate": 9.612289473684211e-05, "loss": 0.6399, "step": 34748 }, { "epoch": 1.9458505991712398, "grad_norm": 1.4972935914993286, "learning_rate": 9.612263157894737e-05, "loss": 0.4284, "step": 34749 }, { "epoch": 1.9459065964833688, "grad_norm": 1.1798095703125, "learning_rate": 9.612236842105263e-05, "loss": 0.3664, "step": 34750 }, { "epoch": 1.9459625937954979, "grad_norm": 1.0813255310058594, "learning_rate": 9.61221052631579e-05, "loss": 0.3635, "step": 34751 }, { "epoch": 1.9460185911076269, "grad_norm": 1.1028759479522705, "learning_rate": 9.612184210526316e-05, "loss": 0.3612, "step": 34752 }, { "epoch": 1.946074588419756, "grad_norm": 1.4469988346099854, "learning_rate": 9.612157894736842e-05, "loss": 0.5838, "step": 34753 }, { "epoch": 1.946130585731885, "grad_norm": 1.3017935752868652, "learning_rate": 9.612131578947368e-05, "loss": 0.3959, "step": 34754 }, { "epoch": 1.946186583044014, "grad_norm": 1.4254018068313599, "learning_rate": 9.612105263157896e-05, "loss": 0.5403, "step": 34755 }, { "epoch": 1.946242580356143, "grad_norm": 1.5793259143829346, "learning_rate": 9.612078947368422e-05, "loss": 0.3522, "step": 34756 }, { "epoch": 1.946298577668272, "grad_norm": 1.7903027534484863, "learning_rate": 9.612052631578949e-05, "loss": 0.4963, "step": 34757 }, { "epoch": 1.946354574980401, "grad_norm": 1.037590503692627, "learning_rate": 9.612026315789473e-05, "loss": 0.335, "step": 34758 }, { "epoch": 1.94641057229253, "grad_norm": 1.1669597625732422, "learning_rate": 9.612000000000001e-05, "loss": 0.557, "step": 34759 }, { "epoch": 1.946466569604659, "grad_norm": 1.1176064014434814, "learning_rate": 9.611973684210527e-05, "loss": 0.34, "step": 34760 }, { "epoch": 1.946522566916788, "grad_norm": 1.3815439939498901, "learning_rate": 9.611947368421053e-05, "loss": 0.5729, "step": 34761 }, { "epoch": 1.946578564228917, "grad_norm": 1.4713155031204224, "learning_rate": 9.61192105263158e-05, "loss": 0.6223, "step": 34762 }, { "epoch": 1.946634561541046, "grad_norm": 1.3211703300476074, "learning_rate": 9.611894736842105e-05, "loss": 0.4572, "step": 34763 }, { "epoch": 1.946690558853175, "grad_norm": 1.4970693588256836, "learning_rate": 9.611868421052632e-05, "loss": 0.4681, "step": 34764 }, { "epoch": 1.9467465561653041, "grad_norm": 1.1335227489471436, "learning_rate": 9.611842105263158e-05, "loss": 0.3592, "step": 34765 }, { "epoch": 1.9468025534774331, "grad_norm": 1.2893025875091553, "learning_rate": 9.611815789473685e-05, "loss": 0.428, "step": 34766 }, { "epoch": 1.9468585507895622, "grad_norm": 1.251278281211853, "learning_rate": 9.611789473684211e-05, "loss": 0.4115, "step": 34767 }, { "epoch": 1.9469145481016912, "grad_norm": 1.3840177059173584, "learning_rate": 9.611763157894737e-05, "loss": 0.5638, "step": 34768 }, { "epoch": 1.9469705454138202, "grad_norm": 1.361457109451294, "learning_rate": 9.611736842105263e-05, "loss": 0.5049, "step": 34769 }, { "epoch": 1.9470265427259492, "grad_norm": 1.1503407955169678, "learning_rate": 9.61171052631579e-05, "loss": 0.4856, "step": 34770 }, { "epoch": 1.9470825400380782, "grad_norm": 1.0933759212493896, "learning_rate": 9.611684210526317e-05, "loss": 0.4899, "step": 34771 }, { "epoch": 1.9471385373502073, "grad_norm": 1.3940249681472778, "learning_rate": 9.611657894736843e-05, "loss": 0.5338, "step": 34772 }, { "epoch": 1.9471945346623363, "grad_norm": 1.3528846502304077, "learning_rate": 9.611631578947368e-05, "loss": 0.4647, "step": 34773 }, { "epoch": 1.9472505319744653, "grad_norm": 1.2399296760559082, "learning_rate": 9.611605263157896e-05, "loss": 0.4262, "step": 34774 }, { "epoch": 1.9473065292865943, "grad_norm": 1.2943211793899536, "learning_rate": 9.611578947368422e-05, "loss": 0.4055, "step": 34775 }, { "epoch": 1.9473625265987233, "grad_norm": 1.335343837738037, "learning_rate": 9.611552631578948e-05, "loss": 0.4456, "step": 34776 }, { "epoch": 1.9474185239108524, "grad_norm": 1.2676340341567993, "learning_rate": 9.611526315789474e-05, "loss": 0.422, "step": 34777 }, { "epoch": 1.9474745212229814, "grad_norm": 1.2184985876083374, "learning_rate": 9.6115e-05, "loss": 0.3713, "step": 34778 }, { "epoch": 1.9475305185351104, "grad_norm": 1.112838625907898, "learning_rate": 9.611473684210527e-05, "loss": 0.4254, "step": 34779 }, { "epoch": 1.9475865158472394, "grad_norm": 1.2566959857940674, "learning_rate": 9.611447368421053e-05, "loss": 0.3828, "step": 34780 }, { "epoch": 1.9476425131593684, "grad_norm": 1.3487221002578735, "learning_rate": 9.611421052631579e-05, "loss": 0.4829, "step": 34781 }, { "epoch": 1.9476985104714974, "grad_norm": 1.2248846292495728, "learning_rate": 9.611394736842105e-05, "loss": 0.5197, "step": 34782 }, { "epoch": 1.9477545077836265, "grad_norm": 1.3060572147369385, "learning_rate": 9.611368421052632e-05, "loss": 0.3828, "step": 34783 }, { "epoch": 1.9478105050957555, "grad_norm": 1.038963794708252, "learning_rate": 9.611342105263158e-05, "loss": 0.3626, "step": 34784 }, { "epoch": 1.9478665024078845, "grad_norm": 1.215692400932312, "learning_rate": 9.611315789473686e-05, "loss": 0.4817, "step": 34785 }, { "epoch": 1.9479224997200135, "grad_norm": 1.2252408266067505, "learning_rate": 9.61128947368421e-05, "loss": 0.3279, "step": 34786 }, { "epoch": 1.9479784970321425, "grad_norm": 1.3991550207138062, "learning_rate": 9.611263157894738e-05, "loss": 0.4552, "step": 34787 }, { "epoch": 1.9480344943442716, "grad_norm": 1.2616218328475952, "learning_rate": 9.611236842105264e-05, "loss": 0.4275, "step": 34788 }, { "epoch": 1.9480904916564006, "grad_norm": 1.5421687364578247, "learning_rate": 9.611210526315791e-05, "loss": 0.5387, "step": 34789 }, { "epoch": 1.9481464889685296, "grad_norm": 1.352452278137207, "learning_rate": 9.611184210526315e-05, "loss": 0.4082, "step": 34790 }, { "epoch": 1.9482024862806586, "grad_norm": 1.048331379890442, "learning_rate": 9.611157894736843e-05, "loss": 0.3665, "step": 34791 }, { "epoch": 1.9482584835927876, "grad_norm": 1.5675545930862427, "learning_rate": 9.611131578947369e-05, "loss": 0.4322, "step": 34792 }, { "epoch": 1.9483144809049167, "grad_norm": 1.3555084466934204, "learning_rate": 9.611105263157896e-05, "loss": 0.4585, "step": 34793 }, { "epoch": 1.9483704782170457, "grad_norm": 1.2935302257537842, "learning_rate": 9.611078947368422e-05, "loss": 0.4026, "step": 34794 }, { "epoch": 1.9484264755291747, "grad_norm": 1.2858777046203613, "learning_rate": 9.611052631578947e-05, "loss": 0.4629, "step": 34795 }, { "epoch": 1.9484824728413037, "grad_norm": 1.4356635808944702, "learning_rate": 9.611026315789474e-05, "loss": 0.457, "step": 34796 }, { "epoch": 1.9485384701534327, "grad_norm": 1.1528637409210205, "learning_rate": 9.611e-05, "loss": 0.4287, "step": 34797 }, { "epoch": 1.9485944674655618, "grad_norm": 1.2664382457733154, "learning_rate": 9.610973684210527e-05, "loss": 0.4413, "step": 34798 }, { "epoch": 1.9486504647776908, "grad_norm": 1.2803250551223755, "learning_rate": 9.610947368421053e-05, "loss": 0.3677, "step": 34799 }, { "epoch": 1.9487064620898198, "grad_norm": 1.3596066236495972, "learning_rate": 9.610921052631579e-05, "loss": 0.4128, "step": 34800 }, { "epoch": 1.9487624594019488, "grad_norm": 1.3163148164749146, "learning_rate": 9.610894736842105e-05, "loss": 0.4701, "step": 34801 }, { "epoch": 1.9488184567140778, "grad_norm": 1.1898585557937622, "learning_rate": 9.610868421052633e-05, "loss": 0.4409, "step": 34802 }, { "epoch": 1.9488744540262068, "grad_norm": 1.1987719535827637, "learning_rate": 9.610842105263159e-05, "loss": 0.4209, "step": 34803 }, { "epoch": 1.9489304513383359, "grad_norm": 1.1841790676116943, "learning_rate": 9.610815789473684e-05, "loss": 0.4202, "step": 34804 }, { "epoch": 1.9489864486504649, "grad_norm": 5.456706523895264, "learning_rate": 9.61078947368421e-05, "loss": 0.404, "step": 34805 }, { "epoch": 1.949042445962594, "grad_norm": 1.2145456075668335, "learning_rate": 9.610763157894738e-05, "loss": 0.4286, "step": 34806 }, { "epoch": 1.949098443274723, "grad_norm": 1.3066177368164062, "learning_rate": 9.610736842105264e-05, "loss": 0.4138, "step": 34807 }, { "epoch": 1.949154440586852, "grad_norm": 1.01899254322052, "learning_rate": 9.61071052631579e-05, "loss": 0.4498, "step": 34808 }, { "epoch": 1.949210437898981, "grad_norm": 1.2671171426773071, "learning_rate": 9.610684210526316e-05, "loss": 0.4301, "step": 34809 }, { "epoch": 1.94926643521111, "grad_norm": 1.398147702217102, "learning_rate": 9.610657894736843e-05, "loss": 0.5394, "step": 34810 }, { "epoch": 1.949322432523239, "grad_norm": 1.791809320449829, "learning_rate": 9.610631578947369e-05, "loss": 0.5838, "step": 34811 }, { "epoch": 1.949378429835368, "grad_norm": 2.0739619731903076, "learning_rate": 9.610605263157895e-05, "loss": 0.3496, "step": 34812 }, { "epoch": 1.949434427147497, "grad_norm": 1.506271243095398, "learning_rate": 9.610578947368421e-05, "loss": 0.5248, "step": 34813 }, { "epoch": 1.949490424459626, "grad_norm": 1.480880618095398, "learning_rate": 9.610552631578947e-05, "loss": 0.4734, "step": 34814 }, { "epoch": 1.949546421771755, "grad_norm": 1.0420907735824585, "learning_rate": 9.610526315789474e-05, "loss": 0.3872, "step": 34815 }, { "epoch": 1.949602419083884, "grad_norm": 1.2384741306304932, "learning_rate": 9.6105e-05, "loss": 0.5088, "step": 34816 }, { "epoch": 1.9496584163960131, "grad_norm": 1.5229109525680542, "learning_rate": 9.610473684210528e-05, "loss": 0.5538, "step": 34817 }, { "epoch": 1.9497144137081421, "grad_norm": 1.1750011444091797, "learning_rate": 9.610447368421052e-05, "loss": 0.49, "step": 34818 }, { "epoch": 1.9497704110202712, "grad_norm": 1.2158242464065552, "learning_rate": 9.61042105263158e-05, "loss": 0.5002, "step": 34819 }, { "epoch": 1.9498264083324002, "grad_norm": 1.3357243537902832, "learning_rate": 9.610394736842105e-05, "loss": 0.4838, "step": 34820 }, { "epoch": 1.9498824056445292, "grad_norm": 1.097816824913025, "learning_rate": 9.610368421052633e-05, "loss": 0.4567, "step": 34821 }, { "epoch": 1.9499384029566582, "grad_norm": 1.206210732460022, "learning_rate": 9.610342105263159e-05, "loss": 0.4362, "step": 34822 }, { "epoch": 1.9499944002687872, "grad_norm": 1.408158540725708, "learning_rate": 9.610315789473685e-05, "loss": 0.5342, "step": 34823 }, { "epoch": 1.9500503975809163, "grad_norm": 1.2987158298492432, "learning_rate": 9.610289473684211e-05, "loss": 0.4069, "step": 34824 }, { "epoch": 1.9501063948930453, "grad_norm": 1.2456448078155518, "learning_rate": 9.610263157894738e-05, "loss": 0.4683, "step": 34825 }, { "epoch": 1.9501623922051743, "grad_norm": 1.2461268901824951, "learning_rate": 9.610236842105264e-05, "loss": 0.4657, "step": 34826 }, { "epoch": 1.9502183895173033, "grad_norm": 1.597239375114441, "learning_rate": 9.61021052631579e-05, "loss": 0.5867, "step": 34827 }, { "epoch": 1.9502743868294323, "grad_norm": 1.554483413696289, "learning_rate": 9.610184210526316e-05, "loss": 0.4291, "step": 34828 }, { "epoch": 1.9503303841415613, "grad_norm": 1.6899200677871704, "learning_rate": 9.610157894736842e-05, "loss": 0.4518, "step": 34829 }, { "epoch": 1.9503863814536904, "grad_norm": 2.320903778076172, "learning_rate": 9.610131578947369e-05, "loss": 0.6214, "step": 34830 }, { "epoch": 1.9504423787658194, "grad_norm": 1.515985369682312, "learning_rate": 9.610105263157895e-05, "loss": 0.6437, "step": 34831 }, { "epoch": 1.9504983760779484, "grad_norm": 1.6087281703948975, "learning_rate": 9.610078947368421e-05, "loss": 0.4387, "step": 34832 }, { "epoch": 1.9505543733900774, "grad_norm": 1.1448251008987427, "learning_rate": 9.610052631578947e-05, "loss": 0.5262, "step": 34833 }, { "epoch": 1.9506103707022064, "grad_norm": 1.3280251026153564, "learning_rate": 9.610026315789475e-05, "loss": 0.4794, "step": 34834 }, { "epoch": 1.9506663680143355, "grad_norm": 1.1608219146728516, "learning_rate": 9.61e-05, "loss": 0.4233, "step": 34835 }, { "epoch": 1.9507223653264645, "grad_norm": 1.6304550170898438, "learning_rate": 9.609973684210526e-05, "loss": 0.5261, "step": 34836 }, { "epoch": 1.9507783626385935, "grad_norm": 1.2674919366836548, "learning_rate": 9.609947368421052e-05, "loss": 0.4454, "step": 34837 }, { "epoch": 1.9508343599507225, "grad_norm": 1.1969736814498901, "learning_rate": 9.60992105263158e-05, "loss": 0.3657, "step": 34838 }, { "epoch": 1.9508903572628515, "grad_norm": 1.1888172626495361, "learning_rate": 9.609894736842106e-05, "loss": 0.43, "step": 34839 }, { "epoch": 1.9509463545749806, "grad_norm": 1.346352458000183, "learning_rate": 9.609868421052633e-05, "loss": 0.388, "step": 34840 }, { "epoch": 1.9510023518871096, "grad_norm": 1.221828579902649, "learning_rate": 9.609842105263158e-05, "loss": 0.3512, "step": 34841 }, { "epoch": 1.9510583491992386, "grad_norm": 1.2034492492675781, "learning_rate": 9.609815789473685e-05, "loss": 0.4418, "step": 34842 }, { "epoch": 1.9511143465113676, "grad_norm": 1.2048699855804443, "learning_rate": 9.609789473684211e-05, "loss": 0.4099, "step": 34843 }, { "epoch": 1.9511703438234966, "grad_norm": 1.17389976978302, "learning_rate": 9.609763157894738e-05, "loss": 0.3761, "step": 34844 }, { "epoch": 1.9512263411356257, "grad_norm": 1.4838505983352661, "learning_rate": 9.609736842105263e-05, "loss": 0.4248, "step": 34845 }, { "epoch": 1.9512823384477547, "grad_norm": 1.2922229766845703, "learning_rate": 9.609710526315789e-05, "loss": 0.3797, "step": 34846 }, { "epoch": 1.9513383357598837, "grad_norm": 1.8989818096160889, "learning_rate": 9.609684210526316e-05, "loss": 0.373, "step": 34847 }, { "epoch": 1.9513943330720125, "grad_norm": 1.9176208972930908, "learning_rate": 9.609657894736842e-05, "loss": 0.5793, "step": 34848 }, { "epoch": 1.9514503303841415, "grad_norm": 1.0938307046890259, "learning_rate": 9.60963157894737e-05, "loss": 0.3922, "step": 34849 }, { "epoch": 1.9515063276962705, "grad_norm": 1.2032626867294312, "learning_rate": 9.609605263157894e-05, "loss": 0.3815, "step": 34850 }, { "epoch": 1.9515623250083995, "grad_norm": 1.1053210496902466, "learning_rate": 9.609578947368421e-05, "loss": 0.3817, "step": 34851 }, { "epoch": 1.9516183223205286, "grad_norm": 1.1857454776763916, "learning_rate": 9.609552631578947e-05, "loss": 0.5533, "step": 34852 }, { "epoch": 1.9516743196326576, "grad_norm": 1.1610991954803467, "learning_rate": 9.609526315789475e-05, "loss": 0.3912, "step": 34853 }, { "epoch": 1.9517303169447866, "grad_norm": 1.2906380891799927, "learning_rate": 9.609500000000001e-05, "loss": 0.5849, "step": 34854 }, { "epoch": 1.9517863142569156, "grad_norm": 1.6633220911026, "learning_rate": 9.609473684210527e-05, "loss": 0.4415, "step": 34855 }, { "epoch": 1.9518423115690446, "grad_norm": 1.0566508769989014, "learning_rate": 9.609447368421053e-05, "loss": 0.4489, "step": 34856 }, { "epoch": 1.9518983088811737, "grad_norm": 1.4683705568313599, "learning_rate": 9.60942105263158e-05, "loss": 0.4707, "step": 34857 }, { "epoch": 1.9519543061933027, "grad_norm": 1.2812076807022095, "learning_rate": 9.609394736842106e-05, "loss": 0.3667, "step": 34858 }, { "epoch": 1.9520103035054317, "grad_norm": 1.259263038635254, "learning_rate": 9.609368421052632e-05, "loss": 0.3893, "step": 34859 }, { "epoch": 1.9520663008175607, "grad_norm": 1.582943081855774, "learning_rate": 9.609342105263158e-05, "loss": 0.4785, "step": 34860 }, { "epoch": 1.9521222981296897, "grad_norm": 1.4122960567474365, "learning_rate": 9.609315789473685e-05, "loss": 0.489, "step": 34861 }, { "epoch": 1.9521782954418188, "grad_norm": 1.5275075435638428, "learning_rate": 9.609289473684211e-05, "loss": 0.5193, "step": 34862 }, { "epoch": 1.9522342927539478, "grad_norm": 1.2714399099349976, "learning_rate": 9.609263157894737e-05, "loss": 0.3157, "step": 34863 }, { "epoch": 1.9522902900660768, "grad_norm": 1.472785234451294, "learning_rate": 9.609236842105263e-05, "loss": 0.6184, "step": 34864 }, { "epoch": 1.9523462873782058, "grad_norm": 0.9683521389961243, "learning_rate": 9.609210526315789e-05, "loss": 0.3978, "step": 34865 }, { "epoch": 1.9524022846903348, "grad_norm": 1.248137354850769, "learning_rate": 9.609184210526316e-05, "loss": 0.4805, "step": 34866 }, { "epoch": 1.9524582820024639, "grad_norm": 1.1852248907089233, "learning_rate": 9.609157894736842e-05, "loss": 0.4315, "step": 34867 }, { "epoch": 1.9525142793145929, "grad_norm": 1.2343366146087646, "learning_rate": 9.609131578947368e-05, "loss": 0.3617, "step": 34868 }, { "epoch": 1.952570276626722, "grad_norm": 1.2805016040802002, "learning_rate": 9.609105263157894e-05, "loss": 0.4281, "step": 34869 }, { "epoch": 1.952626273938851, "grad_norm": 1.263858675956726, "learning_rate": 9.609078947368422e-05, "loss": 0.4489, "step": 34870 }, { "epoch": 1.95268227125098, "grad_norm": 1.2028876543045044, "learning_rate": 9.609052631578948e-05, "loss": 0.4111, "step": 34871 }, { "epoch": 1.952738268563109, "grad_norm": 1.2021613121032715, "learning_rate": 9.609026315789475e-05, "loss": 0.3796, "step": 34872 }, { "epoch": 1.952794265875238, "grad_norm": 1.22563636302948, "learning_rate": 9.609e-05, "loss": 0.3896, "step": 34873 }, { "epoch": 1.952850263187367, "grad_norm": 1.5378974676132202, "learning_rate": 9.608973684210527e-05, "loss": 0.4585, "step": 34874 }, { "epoch": 1.952906260499496, "grad_norm": 1.2581027746200562, "learning_rate": 9.608947368421053e-05, "loss": 0.458, "step": 34875 }, { "epoch": 1.952962257811625, "grad_norm": 1.5139538049697876, "learning_rate": 9.60892105263158e-05, "loss": 0.4948, "step": 34876 }, { "epoch": 1.953018255123754, "grad_norm": 1.8972543478012085, "learning_rate": 9.608894736842106e-05, "loss": 0.4417, "step": 34877 }, { "epoch": 1.953074252435883, "grad_norm": 1.1931805610656738, "learning_rate": 9.608868421052632e-05, "loss": 0.5338, "step": 34878 }, { "epoch": 1.953130249748012, "grad_norm": 1.2430371046066284, "learning_rate": 9.608842105263158e-05, "loss": 0.4099, "step": 34879 }, { "epoch": 1.953186247060141, "grad_norm": 1.3579869270324707, "learning_rate": 9.608815789473686e-05, "loss": 0.6221, "step": 34880 }, { "epoch": 1.9532422443722701, "grad_norm": 1.0779386758804321, "learning_rate": 9.608789473684211e-05, "loss": 0.361, "step": 34881 }, { "epoch": 1.9532982416843991, "grad_norm": 1.4587692022323608, "learning_rate": 9.608763157894737e-05, "loss": 0.4174, "step": 34882 }, { "epoch": 1.9533542389965282, "grad_norm": 1.1698614358901978, "learning_rate": 9.608736842105263e-05, "loss": 0.4295, "step": 34883 }, { "epoch": 1.9534102363086572, "grad_norm": 1.2938059568405151, "learning_rate": 9.60871052631579e-05, "loss": 0.4211, "step": 34884 }, { "epoch": 1.9534662336207862, "grad_norm": 1.0886733531951904, "learning_rate": 9.608684210526317e-05, "loss": 0.4006, "step": 34885 }, { "epoch": 1.9535222309329152, "grad_norm": 1.319171667098999, "learning_rate": 9.608657894736843e-05, "loss": 0.3378, "step": 34886 }, { "epoch": 1.9535782282450442, "grad_norm": 1.29500412940979, "learning_rate": 9.608631578947369e-05, "loss": 0.449, "step": 34887 }, { "epoch": 1.9536342255571733, "grad_norm": 1.24713134765625, "learning_rate": 9.608605263157895e-05, "loss": 0.4236, "step": 34888 }, { "epoch": 1.9536902228693023, "grad_norm": 6.881576061248779, "learning_rate": 9.608578947368422e-05, "loss": 0.3765, "step": 34889 }, { "epoch": 1.9537462201814313, "grad_norm": 1.4172170162200928, "learning_rate": 9.608552631578948e-05, "loss": 0.4722, "step": 34890 }, { "epoch": 1.9538022174935603, "grad_norm": 1.6852965354919434, "learning_rate": 9.608526315789474e-05, "loss": 0.4372, "step": 34891 }, { "epoch": 1.9538582148056893, "grad_norm": 1.4383089542388916, "learning_rate": 9.6085e-05, "loss": 0.5763, "step": 34892 }, { "epoch": 1.9539142121178183, "grad_norm": 1.2315882444381714, "learning_rate": 9.608473684210527e-05, "loss": 0.3473, "step": 34893 }, { "epoch": 1.9539702094299474, "grad_norm": 1.3040111064910889, "learning_rate": 9.608447368421053e-05, "loss": 0.432, "step": 34894 }, { "epoch": 1.9540262067420764, "grad_norm": 1.5814647674560547, "learning_rate": 9.60842105263158e-05, "loss": 0.5433, "step": 34895 }, { "epoch": 1.9540822040542054, "grad_norm": 1.3125485181808472, "learning_rate": 9.608394736842105e-05, "loss": 0.4704, "step": 34896 }, { "epoch": 1.9541382013663344, "grad_norm": 1.228325605392456, "learning_rate": 9.608368421052632e-05, "loss": 0.3786, "step": 34897 }, { "epoch": 1.9541941986784634, "grad_norm": 1.4973866939544678, "learning_rate": 9.608342105263158e-05, "loss": 0.39, "step": 34898 }, { "epoch": 1.9542501959905925, "grad_norm": 1.1977494955062866, "learning_rate": 9.608315789473684e-05, "loss": 0.4249, "step": 34899 }, { "epoch": 1.9543061933027215, "grad_norm": 1.2408409118652344, "learning_rate": 9.60828947368421e-05, "loss": 0.4704, "step": 34900 }, { "epoch": 1.9543621906148505, "grad_norm": 1.1670175790786743, "learning_rate": 9.608263157894736e-05, "loss": 0.5145, "step": 34901 }, { "epoch": 1.9544181879269795, "grad_norm": 1.4870555400848389, "learning_rate": 9.608236842105264e-05, "loss": 0.4691, "step": 34902 }, { "epoch": 1.9544741852391085, "grad_norm": 1.1747153997421265, "learning_rate": 9.60821052631579e-05, "loss": 0.3679, "step": 34903 }, { "epoch": 1.9545301825512376, "grad_norm": 1.1523088216781616, "learning_rate": 9.608184210526317e-05, "loss": 0.4698, "step": 34904 }, { "epoch": 1.9545861798633666, "grad_norm": 1.3853576183319092, "learning_rate": 9.608157894736842e-05, "loss": 0.4367, "step": 34905 }, { "epoch": 1.9546421771754956, "grad_norm": 1.1110098361968994, "learning_rate": 9.608131578947369e-05, "loss": 0.456, "step": 34906 }, { "epoch": 1.9546981744876246, "grad_norm": 0.9134936332702637, "learning_rate": 9.608105263157895e-05, "loss": 0.2954, "step": 34907 }, { "epoch": 1.9547541717997536, "grad_norm": 1.0795981884002686, "learning_rate": 9.608078947368422e-05, "loss": 0.3645, "step": 34908 }, { "epoch": 1.9548101691118827, "grad_norm": 1.0911567211151123, "learning_rate": 9.608052631578948e-05, "loss": 0.376, "step": 34909 }, { "epoch": 1.9548661664240117, "grad_norm": 1.2310270071029663, "learning_rate": 9.608026315789474e-05, "loss": 0.5349, "step": 34910 }, { "epoch": 1.9549221637361407, "grad_norm": 1.1885923147201538, "learning_rate": 9.608e-05, "loss": 0.3642, "step": 34911 }, { "epoch": 1.9549781610482697, "grad_norm": 1.2332559823989868, "learning_rate": 9.607973684210527e-05, "loss": 0.4253, "step": 34912 }, { "epoch": 1.9550341583603987, "grad_norm": 1.3652539253234863, "learning_rate": 9.607947368421053e-05, "loss": 0.3514, "step": 34913 }, { "epoch": 1.9550901556725278, "grad_norm": 1.0595307350158691, "learning_rate": 9.60792105263158e-05, "loss": 0.3562, "step": 34914 }, { "epoch": 1.9551461529846568, "grad_norm": 1.3542340993881226, "learning_rate": 9.607894736842105e-05, "loss": 0.342, "step": 34915 }, { "epoch": 1.9552021502967858, "grad_norm": 1.07725191116333, "learning_rate": 9.607868421052631e-05, "loss": 0.3868, "step": 34916 }, { "epoch": 1.9552581476089148, "grad_norm": 1.141705870628357, "learning_rate": 9.607842105263159e-05, "loss": 0.3354, "step": 34917 }, { "epoch": 1.9553141449210438, "grad_norm": 1.359151005744934, "learning_rate": 9.607815789473685e-05, "loss": 0.5093, "step": 34918 }, { "epoch": 1.9553701422331728, "grad_norm": 1.2323857545852661, "learning_rate": 9.60778947368421e-05, "loss": 0.4207, "step": 34919 }, { "epoch": 1.9554261395453019, "grad_norm": 1.1758780479431152, "learning_rate": 9.607763157894737e-05, "loss": 0.4891, "step": 34920 }, { "epoch": 1.9554821368574309, "grad_norm": 1.0693532228469849, "learning_rate": 9.607736842105264e-05, "loss": 0.3488, "step": 34921 }, { "epoch": 1.95553813416956, "grad_norm": 1.350893497467041, "learning_rate": 9.60771052631579e-05, "loss": 0.4233, "step": 34922 }, { "epoch": 1.955594131481689, "grad_norm": 1.453223705291748, "learning_rate": 9.607684210526316e-05, "loss": 0.4068, "step": 34923 }, { "epoch": 1.955650128793818, "grad_norm": 1.3900030851364136, "learning_rate": 9.607657894736842e-05, "loss": 0.4189, "step": 34924 }, { "epoch": 1.955706126105947, "grad_norm": 1.5687403678894043, "learning_rate": 9.607631578947369e-05, "loss": 0.5702, "step": 34925 }, { "epoch": 1.955762123418076, "grad_norm": 1.1448888778686523, "learning_rate": 9.607605263157895e-05, "loss": 0.5064, "step": 34926 }, { "epoch": 1.955818120730205, "grad_norm": 1.2428531646728516, "learning_rate": 9.607578947368423e-05, "loss": 0.4836, "step": 34927 }, { "epoch": 1.955874118042334, "grad_norm": 1.1179463863372803, "learning_rate": 9.607552631578947e-05, "loss": 0.406, "step": 34928 }, { "epoch": 1.955930115354463, "grad_norm": 1.4065165519714355, "learning_rate": 9.607526315789474e-05, "loss": 0.4229, "step": 34929 }, { "epoch": 1.955986112666592, "grad_norm": 1.1828871965408325, "learning_rate": 9.6075e-05, "loss": 0.569, "step": 34930 }, { "epoch": 1.9560421099787209, "grad_norm": 1.2322351932525635, "learning_rate": 9.607473684210528e-05, "loss": 0.4409, "step": 34931 }, { "epoch": 1.9560981072908499, "grad_norm": 1.1226717233657837, "learning_rate": 9.607447368421054e-05, "loss": 0.4559, "step": 34932 }, { "epoch": 1.956154104602979, "grad_norm": 1.2956340312957764, "learning_rate": 9.607421052631578e-05, "loss": 0.3332, "step": 34933 }, { "epoch": 1.956210101915108, "grad_norm": 1.3241641521453857, "learning_rate": 9.607394736842106e-05, "loss": 0.4315, "step": 34934 }, { "epoch": 1.956266099227237, "grad_norm": 1.2096668481826782, "learning_rate": 9.607368421052632e-05, "loss": 0.5941, "step": 34935 }, { "epoch": 1.956322096539366, "grad_norm": 1.2623282670974731, "learning_rate": 9.607342105263159e-05, "loss": 0.4363, "step": 34936 }, { "epoch": 1.956378093851495, "grad_norm": 1.1424297094345093, "learning_rate": 9.607315789473685e-05, "loss": 0.4514, "step": 34937 }, { "epoch": 1.956434091163624, "grad_norm": 1.1829099655151367, "learning_rate": 9.607289473684211e-05, "loss": 0.595, "step": 34938 }, { "epoch": 1.956490088475753, "grad_norm": 1.2078707218170166, "learning_rate": 9.607263157894737e-05, "loss": 0.5205, "step": 34939 }, { "epoch": 1.956546085787882, "grad_norm": 1.5366955995559692, "learning_rate": 9.607236842105264e-05, "loss": 0.4761, "step": 34940 }, { "epoch": 1.956602083100011, "grad_norm": 1.944976806640625, "learning_rate": 9.60721052631579e-05, "loss": 0.5052, "step": 34941 }, { "epoch": 1.95665808041214, "grad_norm": 1.1572849750518799, "learning_rate": 9.607184210526316e-05, "loss": 0.4216, "step": 34942 }, { "epoch": 1.956714077724269, "grad_norm": 1.136043906211853, "learning_rate": 9.607157894736842e-05, "loss": 0.5111, "step": 34943 }, { "epoch": 1.956770075036398, "grad_norm": 1.2086769342422485, "learning_rate": 9.60713157894737e-05, "loss": 0.4202, "step": 34944 }, { "epoch": 1.9568260723485271, "grad_norm": 1.2215800285339355, "learning_rate": 9.607105263157895e-05, "loss": 0.3991, "step": 34945 }, { "epoch": 1.9568820696606561, "grad_norm": 1.1990710496902466, "learning_rate": 9.607078947368421e-05, "loss": 0.6209, "step": 34946 }, { "epoch": 1.9569380669727852, "grad_norm": 1.2651270627975464, "learning_rate": 9.607052631578947e-05, "loss": 0.5738, "step": 34947 }, { "epoch": 1.9569940642849142, "grad_norm": 1.4180033206939697, "learning_rate": 9.607026315789475e-05, "loss": 0.5229, "step": 34948 }, { "epoch": 1.9570500615970432, "grad_norm": 1.2521084547042847, "learning_rate": 9.607000000000001e-05, "loss": 0.449, "step": 34949 }, { "epoch": 1.9571060589091722, "grad_norm": 1.58376944065094, "learning_rate": 9.606973684210527e-05, "loss": 0.4505, "step": 34950 }, { "epoch": 1.9571620562213012, "grad_norm": 1.3174633979797363, "learning_rate": 9.606947368421053e-05, "loss": 0.4375, "step": 34951 }, { "epoch": 1.9572180535334303, "grad_norm": 5.596231937408447, "learning_rate": 9.606921052631579e-05, "loss": 0.5268, "step": 34952 }, { "epoch": 1.9572740508455593, "grad_norm": 1.0874046087265015, "learning_rate": 9.606894736842106e-05, "loss": 0.3645, "step": 34953 }, { "epoch": 1.9573300481576883, "grad_norm": 1.0293734073638916, "learning_rate": 9.606868421052632e-05, "loss": 0.4514, "step": 34954 }, { "epoch": 1.9573860454698173, "grad_norm": 1.1275116205215454, "learning_rate": 9.606842105263158e-05, "loss": 0.366, "step": 34955 }, { "epoch": 1.9574420427819463, "grad_norm": 1.2125002145767212, "learning_rate": 9.606815789473684e-05, "loss": 0.3208, "step": 34956 }, { "epoch": 1.9574980400940754, "grad_norm": 1.4821226596832275, "learning_rate": 9.606789473684211e-05, "loss": 0.4257, "step": 34957 }, { "epoch": 1.9575540374062044, "grad_norm": 1.2202311754226685, "learning_rate": 9.606763157894737e-05, "loss": 0.4591, "step": 34958 }, { "epoch": 1.9576100347183334, "grad_norm": 1.2566643953323364, "learning_rate": 9.606736842105264e-05, "loss": 0.4185, "step": 34959 }, { "epoch": 1.9576660320304624, "grad_norm": 1.2568758726119995, "learning_rate": 9.606710526315789e-05, "loss": 0.4621, "step": 34960 }, { "epoch": 1.9577220293425914, "grad_norm": 1.2447750568389893, "learning_rate": 9.606684210526316e-05, "loss": 0.3596, "step": 34961 }, { "epoch": 1.9577780266547204, "grad_norm": 1.2768527269363403, "learning_rate": 9.606657894736842e-05, "loss": 0.3643, "step": 34962 }, { "epoch": 1.9578340239668495, "grad_norm": 1.2024080753326416, "learning_rate": 9.60663157894737e-05, "loss": 0.3704, "step": 34963 }, { "epoch": 1.9578900212789785, "grad_norm": 1.0775671005249023, "learning_rate": 9.606605263157896e-05, "loss": 0.4106, "step": 34964 }, { "epoch": 1.9579460185911075, "grad_norm": 1.339693546295166, "learning_rate": 9.606578947368422e-05, "loss": 0.4383, "step": 34965 }, { "epoch": 1.9580020159032365, "grad_norm": 1.3563727140426636, "learning_rate": 9.606552631578948e-05, "loss": 0.6099, "step": 34966 }, { "epoch": 1.9580580132153655, "grad_norm": 1.1151005029678345, "learning_rate": 9.606526315789474e-05, "loss": 0.4951, "step": 34967 }, { "epoch": 1.9581140105274946, "grad_norm": 1.5692121982574463, "learning_rate": 9.606500000000001e-05, "loss": 0.5409, "step": 34968 }, { "epoch": 1.9581700078396236, "grad_norm": 1.467974305152893, "learning_rate": 9.606473684210527e-05, "loss": 0.5498, "step": 34969 }, { "epoch": 1.9582260051517526, "grad_norm": 1.652017593383789, "learning_rate": 9.606447368421053e-05, "loss": 0.6711, "step": 34970 }, { "epoch": 1.9582820024638816, "grad_norm": 1.2267835140228271, "learning_rate": 9.606421052631579e-05, "loss": 0.4458, "step": 34971 }, { "epoch": 1.9583379997760106, "grad_norm": 1.3156280517578125, "learning_rate": 9.606394736842106e-05, "loss": 0.5704, "step": 34972 }, { "epoch": 1.9583939970881397, "grad_norm": 1.1479923725128174, "learning_rate": 9.606368421052632e-05, "loss": 0.4117, "step": 34973 }, { "epoch": 1.9584499944002687, "grad_norm": 1.268945574760437, "learning_rate": 9.606342105263158e-05, "loss": 0.4164, "step": 34974 }, { "epoch": 1.9585059917123977, "grad_norm": 1.2137863636016846, "learning_rate": 9.606315789473684e-05, "loss": 0.5185, "step": 34975 }, { "epoch": 1.9585619890245267, "grad_norm": 1.1835424900054932, "learning_rate": 9.606289473684211e-05, "loss": 0.4456, "step": 34976 }, { "epoch": 1.9586179863366557, "grad_norm": 1.148664951324463, "learning_rate": 9.606263157894737e-05, "loss": 0.574, "step": 34977 }, { "epoch": 1.9586739836487848, "grad_norm": 1.1673240661621094, "learning_rate": 9.606236842105263e-05, "loss": 0.3927, "step": 34978 }, { "epoch": 1.9587299809609138, "grad_norm": 1.0870554447174072, "learning_rate": 9.60621052631579e-05, "loss": 0.488, "step": 34979 }, { "epoch": 1.9587859782730428, "grad_norm": 1.343524694442749, "learning_rate": 9.606184210526317e-05, "loss": 0.6659, "step": 34980 }, { "epoch": 1.9588419755851718, "grad_norm": 1.2094429731369019, "learning_rate": 9.606157894736843e-05, "loss": 0.3796, "step": 34981 }, { "epoch": 1.9588979728973008, "grad_norm": 1.2557742595672607, "learning_rate": 9.60613157894737e-05, "loss": 0.4631, "step": 34982 }, { "epoch": 1.9589539702094299, "grad_norm": 1.2624608278274536, "learning_rate": 9.606105263157895e-05, "loss": 0.3857, "step": 34983 }, { "epoch": 1.9590099675215589, "grad_norm": 1.361048698425293, "learning_rate": 9.606078947368422e-05, "loss": 0.4774, "step": 34984 }, { "epoch": 1.959065964833688, "grad_norm": 1.2811528444290161, "learning_rate": 9.606052631578948e-05, "loss": 0.5225, "step": 34985 }, { "epoch": 1.959121962145817, "grad_norm": 1.0644234418869019, "learning_rate": 9.606026315789474e-05, "loss": 0.4803, "step": 34986 }, { "epoch": 1.959177959457946, "grad_norm": 1.3712126016616821, "learning_rate": 9.606000000000001e-05, "loss": 0.5319, "step": 34987 }, { "epoch": 1.959233956770075, "grad_norm": 1.1343204975128174, "learning_rate": 9.605973684210526e-05, "loss": 0.3498, "step": 34988 }, { "epoch": 1.959289954082204, "grad_norm": 1.7177571058273315, "learning_rate": 9.605947368421053e-05, "loss": 0.603, "step": 34989 }, { "epoch": 1.959345951394333, "grad_norm": 1.4027554988861084, "learning_rate": 9.605921052631579e-05, "loss": 0.506, "step": 34990 }, { "epoch": 1.959401948706462, "grad_norm": 1.1770683526992798, "learning_rate": 9.605894736842106e-05, "loss": 0.491, "step": 34991 }, { "epoch": 1.959457946018591, "grad_norm": 1.2766592502593994, "learning_rate": 9.605868421052631e-05, "loss": 0.545, "step": 34992 }, { "epoch": 1.95951394333072, "grad_norm": 1.336032748222351, "learning_rate": 9.605842105263158e-05, "loss": 0.4822, "step": 34993 }, { "epoch": 1.959569940642849, "grad_norm": 1.2164918184280396, "learning_rate": 9.605815789473684e-05, "loss": 0.402, "step": 34994 }, { "epoch": 1.959625937954978, "grad_norm": 1.2803086042404175, "learning_rate": 9.605789473684212e-05, "loss": 0.4547, "step": 34995 }, { "epoch": 1.959681935267107, "grad_norm": 1.3348603248596191, "learning_rate": 9.605763157894738e-05, "loss": 0.467, "step": 34996 }, { "epoch": 1.9597379325792361, "grad_norm": 1.2480894327163696, "learning_rate": 9.605736842105264e-05, "loss": 0.4815, "step": 34997 }, { "epoch": 1.9597939298913651, "grad_norm": 1.3180657625198364, "learning_rate": 9.60571052631579e-05, "loss": 0.3948, "step": 34998 }, { "epoch": 1.9598499272034942, "grad_norm": 1.760079264640808, "learning_rate": 9.605684210526317e-05, "loss": 0.4587, "step": 34999 }, { "epoch": 1.9599059245156232, "grad_norm": 1.2017862796783447, "learning_rate": 9.605657894736843e-05, "loss": 0.384, "step": 35000 }, { "epoch": 1.9599619218277522, "grad_norm": 1.046779751777649, "learning_rate": 9.605631578947369e-05, "loss": 0.4045, "step": 35001 }, { "epoch": 1.9600179191398812, "grad_norm": 1.3169909715652466, "learning_rate": 9.605605263157895e-05, "loss": 0.5204, "step": 35002 }, { "epoch": 1.9600739164520102, "grad_norm": 1.1759002208709717, "learning_rate": 9.605578947368421e-05, "loss": 0.5466, "step": 35003 }, { "epoch": 1.9601299137641393, "grad_norm": 1.4461716413497925, "learning_rate": 9.605552631578948e-05, "loss": 0.511, "step": 35004 }, { "epoch": 1.9601859110762683, "grad_norm": 1.152579426765442, "learning_rate": 9.605526315789474e-05, "loss": 0.387, "step": 35005 }, { "epoch": 1.9602419083883973, "grad_norm": 1.0224753618240356, "learning_rate": 9.6055e-05, "loss": 0.5103, "step": 35006 }, { "epoch": 1.9602979057005263, "grad_norm": 1.0869301557540894, "learning_rate": 9.605473684210526e-05, "loss": 0.3547, "step": 35007 }, { "epoch": 1.9603539030126553, "grad_norm": 1.0646460056304932, "learning_rate": 9.605447368421053e-05, "loss": 0.3916, "step": 35008 }, { "epoch": 1.9604099003247843, "grad_norm": 1.681850552558899, "learning_rate": 9.60542105263158e-05, "loss": 0.3659, "step": 35009 }, { "epoch": 1.9604658976369134, "grad_norm": 1.224524974822998, "learning_rate": 9.605394736842105e-05, "loss": 0.4143, "step": 35010 }, { "epoch": 1.9605218949490424, "grad_norm": 2.4472427368164062, "learning_rate": 9.605368421052631e-05, "loss": 0.3781, "step": 35011 }, { "epoch": 1.9605778922611714, "grad_norm": 1.1017571687698364, "learning_rate": 9.605342105263159e-05, "loss": 0.4201, "step": 35012 }, { "epoch": 1.9606338895733004, "grad_norm": 1.1993101835250854, "learning_rate": 9.605315789473685e-05, "loss": 0.4085, "step": 35013 }, { "epoch": 1.9606898868854294, "grad_norm": 1.0888237953186035, "learning_rate": 9.605289473684212e-05, "loss": 0.4382, "step": 35014 }, { "epoch": 1.9607458841975585, "grad_norm": 1.1229703426361084, "learning_rate": 9.605263157894737e-05, "loss": 0.379, "step": 35015 }, { "epoch": 1.9608018815096875, "grad_norm": 1.3178211450576782, "learning_rate": 9.605236842105264e-05, "loss": 0.3898, "step": 35016 }, { "epoch": 1.9608578788218165, "grad_norm": 1.2040702104568481, "learning_rate": 9.60521052631579e-05, "loss": 0.5029, "step": 35017 }, { "epoch": 1.9609138761339455, "grad_norm": 1.3323034048080444, "learning_rate": 9.605184210526317e-05, "loss": 0.4765, "step": 35018 }, { "epoch": 1.9609698734460745, "grad_norm": 1.4703279733657837, "learning_rate": 9.605157894736843e-05, "loss": 0.5044, "step": 35019 }, { "epoch": 1.9610258707582036, "grad_norm": 1.125908374786377, "learning_rate": 9.605131578947368e-05, "loss": 0.4539, "step": 35020 }, { "epoch": 1.9610818680703326, "grad_norm": 1.385450839996338, "learning_rate": 9.605105263157895e-05, "loss": 0.3706, "step": 35021 }, { "epoch": 1.9611378653824616, "grad_norm": 1.18792724609375, "learning_rate": 9.605078947368421e-05, "loss": 0.3762, "step": 35022 }, { "epoch": 1.9611938626945906, "grad_norm": 1.3209445476531982, "learning_rate": 9.605052631578948e-05, "loss": 0.4242, "step": 35023 }, { "epoch": 1.9612498600067196, "grad_norm": 1.3063178062438965, "learning_rate": 9.605026315789474e-05, "loss": 0.429, "step": 35024 }, { "epoch": 1.9613058573188487, "grad_norm": 1.4127497673034668, "learning_rate": 9.605e-05, "loss": 0.4488, "step": 35025 }, { "epoch": 1.9613618546309777, "grad_norm": 1.2043349742889404, "learning_rate": 9.604973684210526e-05, "loss": 0.3887, "step": 35026 }, { "epoch": 1.9614178519431067, "grad_norm": 1.132593035697937, "learning_rate": 9.604947368421054e-05, "loss": 0.4677, "step": 35027 }, { "epoch": 1.9614738492552357, "grad_norm": 1.2407653331756592, "learning_rate": 9.60492105263158e-05, "loss": 0.3957, "step": 35028 }, { "epoch": 1.9615298465673647, "grad_norm": 1.4393200874328613, "learning_rate": 9.604894736842106e-05, "loss": 0.4343, "step": 35029 }, { "epoch": 1.9615858438794938, "grad_norm": 1.5096509456634521, "learning_rate": 9.604868421052632e-05, "loss": 0.3698, "step": 35030 }, { "epoch": 1.9616418411916228, "grad_norm": 1.0236238241195679, "learning_rate": 9.604842105263159e-05, "loss": 0.3599, "step": 35031 }, { "epoch": 1.9616978385037518, "grad_norm": 1.436355471611023, "learning_rate": 9.604815789473685e-05, "loss": 0.5627, "step": 35032 }, { "epoch": 1.9617538358158808, "grad_norm": 1.5737653970718384, "learning_rate": 9.604789473684211e-05, "loss": 0.5378, "step": 35033 }, { "epoch": 1.9618098331280098, "grad_norm": 1.166304588317871, "learning_rate": 9.604763157894737e-05, "loss": 0.489, "step": 35034 }, { "epoch": 1.9618658304401388, "grad_norm": 1.2401537895202637, "learning_rate": 9.604736842105264e-05, "loss": 0.4317, "step": 35035 }, { "epoch": 1.9619218277522679, "grad_norm": 1.3277195692062378, "learning_rate": 9.60471052631579e-05, "loss": 0.357, "step": 35036 }, { "epoch": 1.9619778250643969, "grad_norm": 3.106485605239868, "learning_rate": 9.604684210526316e-05, "loss": 0.5722, "step": 35037 }, { "epoch": 1.962033822376526, "grad_norm": 1.1998252868652344, "learning_rate": 9.604657894736842e-05, "loss": 0.4039, "step": 35038 }, { "epoch": 1.962089819688655, "grad_norm": 1.1706624031066895, "learning_rate": 9.604631578947368e-05, "loss": 0.4741, "step": 35039 }, { "epoch": 1.962145817000784, "grad_norm": 1.3105096817016602, "learning_rate": 9.604605263157895e-05, "loss": 0.6032, "step": 35040 }, { "epoch": 1.962201814312913, "grad_norm": 1.3602538108825684, "learning_rate": 9.604578947368421e-05, "loss": 0.4094, "step": 35041 }, { "epoch": 1.962257811625042, "grad_norm": 1.3215928077697754, "learning_rate": 9.604552631578949e-05, "loss": 0.4288, "step": 35042 }, { "epoch": 1.962313808937171, "grad_norm": 1.5521959066390991, "learning_rate": 9.604526315789473e-05, "loss": 0.5863, "step": 35043 }, { "epoch": 1.9623698062493, "grad_norm": 1.3338600397109985, "learning_rate": 9.6045e-05, "loss": 0.4718, "step": 35044 }, { "epoch": 1.962425803561429, "grad_norm": 1.1608291864395142, "learning_rate": 9.604473684210527e-05, "loss": 0.4493, "step": 35045 }, { "epoch": 1.962481800873558, "grad_norm": 1.3158161640167236, "learning_rate": 9.604447368421054e-05, "loss": 0.4648, "step": 35046 }, { "epoch": 1.962537798185687, "grad_norm": 1.10756254196167, "learning_rate": 9.604421052631579e-05, "loss": 0.4545, "step": 35047 }, { "epoch": 1.962593795497816, "grad_norm": 1.2917457818984985, "learning_rate": 9.604394736842106e-05, "loss": 0.454, "step": 35048 }, { "epoch": 1.9626497928099451, "grad_norm": 1.4271832704544067, "learning_rate": 9.604368421052632e-05, "loss": 0.4267, "step": 35049 }, { "epoch": 1.9627057901220741, "grad_norm": 1.290459156036377, "learning_rate": 9.604342105263159e-05, "loss": 0.3639, "step": 35050 }, { "epoch": 1.9627617874342032, "grad_norm": 1.2243764400482178, "learning_rate": 9.604315789473685e-05, "loss": 0.3729, "step": 35051 }, { "epoch": 1.9628177847463322, "grad_norm": 1.481799840927124, "learning_rate": 9.604289473684211e-05, "loss": 0.507, "step": 35052 }, { "epoch": 1.9628737820584612, "grad_norm": 1.734131932258606, "learning_rate": 9.604263157894737e-05, "loss": 0.5044, "step": 35053 }, { "epoch": 1.9629297793705902, "grad_norm": 1.3061918020248413, "learning_rate": 9.604236842105263e-05, "loss": 0.5672, "step": 35054 }, { "epoch": 1.9629857766827192, "grad_norm": 1.0983169078826904, "learning_rate": 9.60421052631579e-05, "loss": 0.3827, "step": 35055 }, { "epoch": 1.9630417739948482, "grad_norm": 1.2631009817123413, "learning_rate": 9.604184210526316e-05, "loss": 0.422, "step": 35056 }, { "epoch": 1.9630977713069773, "grad_norm": 1.4536410570144653, "learning_rate": 9.604157894736842e-05, "loss": 0.5509, "step": 35057 }, { "epoch": 1.9631537686191063, "grad_norm": 1.2138433456420898, "learning_rate": 9.604131578947368e-05, "loss": 0.5279, "step": 35058 }, { "epoch": 1.9632097659312353, "grad_norm": 1.2809592485427856, "learning_rate": 9.604105263157896e-05, "loss": 0.4929, "step": 35059 }, { "epoch": 1.9632657632433643, "grad_norm": 1.2691136598587036, "learning_rate": 9.604078947368422e-05, "loss": 0.4245, "step": 35060 }, { "epoch": 1.9633217605554933, "grad_norm": 1.5423649549484253, "learning_rate": 9.604052631578948e-05, "loss": 0.5136, "step": 35061 }, { "epoch": 1.9633777578676224, "grad_norm": 1.168105959892273, "learning_rate": 9.604026315789474e-05, "loss": 0.4363, "step": 35062 }, { "epoch": 1.9634337551797514, "grad_norm": 1.3851975202560425, "learning_rate": 9.604000000000001e-05, "loss": 0.4109, "step": 35063 }, { "epoch": 1.9634897524918804, "grad_norm": 1.7474687099456787, "learning_rate": 9.603973684210527e-05, "loss": 0.4735, "step": 35064 }, { "epoch": 1.9635457498040094, "grad_norm": 1.3522891998291016, "learning_rate": 9.603947368421053e-05, "loss": 0.4313, "step": 35065 }, { "epoch": 1.9636017471161384, "grad_norm": 1.3846291303634644, "learning_rate": 9.603921052631579e-05, "loss": 0.4365, "step": 35066 }, { "epoch": 1.9636577444282675, "grad_norm": 1.3774664402008057, "learning_rate": 9.603894736842106e-05, "loss": 0.5081, "step": 35067 }, { "epoch": 1.9637137417403965, "grad_norm": 1.0783714056015015, "learning_rate": 9.603868421052632e-05, "loss": 0.3773, "step": 35068 }, { "epoch": 1.9637697390525255, "grad_norm": 1.2375034093856812, "learning_rate": 9.60384210526316e-05, "loss": 0.431, "step": 35069 }, { "epoch": 1.9638257363646545, "grad_norm": 1.3854904174804688, "learning_rate": 9.603815789473684e-05, "loss": 0.5805, "step": 35070 }, { "epoch": 1.9638817336767835, "grad_norm": 1.1335335969924927, "learning_rate": 9.60378947368421e-05, "loss": 0.4852, "step": 35071 }, { "epoch": 1.9639377309889126, "grad_norm": 1.6117701530456543, "learning_rate": 9.603763157894737e-05, "loss": 0.4522, "step": 35072 }, { "epoch": 1.9639937283010416, "grad_norm": 1.0952625274658203, "learning_rate": 9.603736842105263e-05, "loss": 0.4284, "step": 35073 }, { "epoch": 1.9640497256131706, "grad_norm": 1.6762878894805908, "learning_rate": 9.60371052631579e-05, "loss": 0.4316, "step": 35074 }, { "epoch": 1.9641057229252996, "grad_norm": 1.3426074981689453, "learning_rate": 9.603684210526315e-05, "loss": 0.4859, "step": 35075 }, { "epoch": 1.9641617202374286, "grad_norm": 1.27347731590271, "learning_rate": 9.603657894736843e-05, "loss": 0.5075, "step": 35076 }, { "epoch": 1.9642177175495577, "grad_norm": 1.244230031967163, "learning_rate": 9.603631578947369e-05, "loss": 0.4861, "step": 35077 }, { "epoch": 1.9642737148616867, "grad_norm": 1.130865216255188, "learning_rate": 9.603605263157896e-05, "loss": 0.5262, "step": 35078 }, { "epoch": 1.9643297121738157, "grad_norm": 1.3578885793685913, "learning_rate": 9.603578947368422e-05, "loss": 0.437, "step": 35079 }, { "epoch": 1.9643857094859447, "grad_norm": 1.1074336767196655, "learning_rate": 9.603552631578948e-05, "loss": 0.4588, "step": 35080 }, { "epoch": 1.9644417067980737, "grad_norm": 1.6396186351776123, "learning_rate": 9.603526315789474e-05, "loss": 0.3646, "step": 35081 }, { "epoch": 1.9644977041102027, "grad_norm": 0.9948999881744385, "learning_rate": 9.603500000000001e-05, "loss": 0.3672, "step": 35082 }, { "epoch": 1.9645537014223318, "grad_norm": 1.91913902759552, "learning_rate": 9.603473684210527e-05, "loss": 0.5184, "step": 35083 }, { "epoch": 1.9646096987344608, "grad_norm": 2.5451409816741943, "learning_rate": 9.603447368421053e-05, "loss": 0.4887, "step": 35084 }, { "epoch": 1.9646656960465898, "grad_norm": 1.2871602773666382, "learning_rate": 9.603421052631579e-05, "loss": 0.4339, "step": 35085 }, { "epoch": 1.9647216933587188, "grad_norm": 1.2026474475860596, "learning_rate": 9.603394736842106e-05, "loss": 0.3858, "step": 35086 }, { "epoch": 1.9647776906708478, "grad_norm": 1.516147494316101, "learning_rate": 9.603368421052632e-05, "loss": 0.4047, "step": 35087 }, { "epoch": 1.9648336879829769, "grad_norm": 1.432829737663269, "learning_rate": 9.603342105263158e-05, "loss": 0.4611, "step": 35088 }, { "epoch": 1.9648896852951059, "grad_norm": 1.2240043878555298, "learning_rate": 9.603315789473684e-05, "loss": 0.402, "step": 35089 }, { "epoch": 1.964945682607235, "grad_norm": 1.1273319721221924, "learning_rate": 9.60328947368421e-05, "loss": 0.4584, "step": 35090 }, { "epoch": 1.965001679919364, "grad_norm": 1.4825795888900757, "learning_rate": 9.603263157894738e-05, "loss": 0.6356, "step": 35091 }, { "epoch": 1.965057677231493, "grad_norm": 1.0835208892822266, "learning_rate": 9.603236842105264e-05, "loss": 0.4487, "step": 35092 }, { "epoch": 1.965113674543622, "grad_norm": 1.8915773630142212, "learning_rate": 9.60321052631579e-05, "loss": 0.5894, "step": 35093 }, { "epoch": 1.965169671855751, "grad_norm": 1.3294094800949097, "learning_rate": 9.603184210526316e-05, "loss": 0.452, "step": 35094 }, { "epoch": 1.96522566916788, "grad_norm": 1.1674472093582153, "learning_rate": 9.603157894736843e-05, "loss": 0.3874, "step": 35095 }, { "epoch": 1.965281666480009, "grad_norm": 1.1369107961654663, "learning_rate": 9.603131578947369e-05, "loss": 0.5593, "step": 35096 }, { "epoch": 1.965337663792138, "grad_norm": 1.2223368883132935, "learning_rate": 9.603105263157896e-05, "loss": 0.4747, "step": 35097 }, { "epoch": 1.965393661104267, "grad_norm": 1.2368556261062622, "learning_rate": 9.603078947368421e-05, "loss": 0.5067, "step": 35098 }, { "epoch": 1.965449658416396, "grad_norm": 1.1085302829742432, "learning_rate": 9.603052631578948e-05, "loss": 0.4162, "step": 35099 }, { "epoch": 1.965505655728525, "grad_norm": 1.0740025043487549, "learning_rate": 9.603026315789474e-05, "loss": 0.3344, "step": 35100 }, { "epoch": 1.965561653040654, "grad_norm": 1.219307541847229, "learning_rate": 9.603000000000001e-05, "loss": 0.536, "step": 35101 }, { "epoch": 1.9656176503527831, "grad_norm": 1.2384260892868042, "learning_rate": 9.602973684210526e-05, "loss": 0.361, "step": 35102 }, { "epoch": 1.9656736476649121, "grad_norm": 1.2756088972091675, "learning_rate": 9.602947368421053e-05, "loss": 0.4436, "step": 35103 }, { "epoch": 1.9657296449770412, "grad_norm": 1.2073445320129395, "learning_rate": 9.602921052631579e-05, "loss": 0.3952, "step": 35104 }, { "epoch": 1.9657856422891702, "grad_norm": 1.1042813062667847, "learning_rate": 9.602894736842107e-05, "loss": 0.3114, "step": 35105 }, { "epoch": 1.9658416396012992, "grad_norm": 1.1160542964935303, "learning_rate": 9.602868421052633e-05, "loss": 0.4083, "step": 35106 }, { "epoch": 1.9658976369134282, "grad_norm": 1.1463243961334229, "learning_rate": 9.602842105263157e-05, "loss": 0.4085, "step": 35107 }, { "epoch": 1.9659536342255572, "grad_norm": 1.1548633575439453, "learning_rate": 9.602815789473685e-05, "loss": 0.3938, "step": 35108 }, { "epoch": 1.9660096315376863, "grad_norm": 1.4418126344680786, "learning_rate": 9.60278947368421e-05, "loss": 0.4592, "step": 35109 }, { "epoch": 1.9660656288498153, "grad_norm": 1.1367462873458862, "learning_rate": 9.602763157894738e-05, "loss": 0.3902, "step": 35110 }, { "epoch": 1.9661216261619443, "grad_norm": 1.1419309377670288, "learning_rate": 9.602736842105264e-05, "loss": 0.4165, "step": 35111 }, { "epoch": 1.9661776234740733, "grad_norm": 1.337330937385559, "learning_rate": 9.60271052631579e-05, "loss": 0.4509, "step": 35112 }, { "epoch": 1.9662336207862023, "grad_norm": 1.4922298192977905, "learning_rate": 9.602684210526316e-05, "loss": 0.3353, "step": 35113 }, { "epoch": 1.9662896180983314, "grad_norm": 1.513649344444275, "learning_rate": 9.602657894736843e-05, "loss": 0.5482, "step": 35114 }, { "epoch": 1.9663456154104604, "grad_norm": 1.0241405963897705, "learning_rate": 9.602631578947369e-05, "loss": 0.4572, "step": 35115 }, { "epoch": 1.9664016127225894, "grad_norm": 1.1362054347991943, "learning_rate": 9.602605263157895e-05, "loss": 0.4123, "step": 35116 }, { "epoch": 1.9664576100347184, "grad_norm": 1.225488543510437, "learning_rate": 9.602578947368421e-05, "loss": 0.5372, "step": 35117 }, { "epoch": 1.9665136073468474, "grad_norm": 1.6217612028121948, "learning_rate": 9.602552631578948e-05, "loss": 0.4319, "step": 35118 }, { "epoch": 1.9665696046589765, "grad_norm": 1.2428851127624512, "learning_rate": 9.602526315789474e-05, "loss": 0.4407, "step": 35119 }, { "epoch": 1.9666256019711055, "grad_norm": 1.1516507863998413, "learning_rate": 9.6025e-05, "loss": 0.5326, "step": 35120 }, { "epoch": 1.9666815992832345, "grad_norm": 1.1538132429122925, "learning_rate": 9.602473684210526e-05, "loss": 0.3408, "step": 35121 }, { "epoch": 1.9667375965953635, "grad_norm": 1.265317678451538, "learning_rate": 9.602447368421054e-05, "loss": 0.4446, "step": 35122 }, { "epoch": 1.9667935939074925, "grad_norm": 1.145520806312561, "learning_rate": 9.60242105263158e-05, "loss": 0.3781, "step": 35123 }, { "epoch": 1.9668495912196216, "grad_norm": 1.13011634349823, "learning_rate": 9.602394736842106e-05, "loss": 0.4152, "step": 35124 }, { "epoch": 1.9669055885317506, "grad_norm": 1.4513499736785889, "learning_rate": 9.602368421052632e-05, "loss": 0.5631, "step": 35125 }, { "epoch": 1.9669615858438796, "grad_norm": 1.3004796504974365, "learning_rate": 9.602342105263157e-05, "loss": 0.4229, "step": 35126 }, { "epoch": 1.9670175831560086, "grad_norm": 1.1182887554168701, "learning_rate": 9.602315789473685e-05, "loss": 0.3526, "step": 35127 }, { "epoch": 1.9670735804681376, "grad_norm": 1.6951583623886108, "learning_rate": 9.602289473684211e-05, "loss": 0.3822, "step": 35128 }, { "epoch": 1.9671295777802666, "grad_norm": 1.7151211500167847, "learning_rate": 9.602263157894738e-05, "loss": 0.5269, "step": 35129 }, { "epoch": 1.9671855750923957, "grad_norm": 1.3853042125701904, "learning_rate": 9.602236842105263e-05, "loss": 0.4884, "step": 35130 }, { "epoch": 1.9672415724045247, "grad_norm": 1.8636727333068848, "learning_rate": 9.60221052631579e-05, "loss": 0.4545, "step": 35131 }, { "epoch": 1.9672975697166537, "grad_norm": 1.3396034240722656, "learning_rate": 9.602184210526316e-05, "loss": 0.5325, "step": 35132 }, { "epoch": 1.9673535670287827, "grad_norm": 5.555102348327637, "learning_rate": 9.602157894736843e-05, "loss": 0.4265, "step": 35133 }, { "epoch": 1.9674095643409117, "grad_norm": 1.3711774349212646, "learning_rate": 9.60213157894737e-05, "loss": 0.3228, "step": 35134 }, { "epoch": 1.9674655616530408, "grad_norm": 1.1278173923492432, "learning_rate": 9.602105263157895e-05, "loss": 0.4273, "step": 35135 }, { "epoch": 1.9675215589651698, "grad_norm": 1.406720519065857, "learning_rate": 9.602078947368421e-05, "loss": 0.507, "step": 35136 }, { "epoch": 1.9675775562772988, "grad_norm": 1.2888717651367188, "learning_rate": 9.602052631578949e-05, "loss": 0.5419, "step": 35137 }, { "epoch": 1.9676335535894278, "grad_norm": 2.0798962116241455, "learning_rate": 9.602026315789475e-05, "loss": 0.5126, "step": 35138 }, { "epoch": 1.9676895509015568, "grad_norm": 1.1820476055145264, "learning_rate": 9.602e-05, "loss": 0.5528, "step": 35139 }, { "epoch": 1.9677455482136859, "grad_norm": 1.4389581680297852, "learning_rate": 9.601973684210527e-05, "loss": 0.4277, "step": 35140 }, { "epoch": 1.9678015455258149, "grad_norm": 1.1537623405456543, "learning_rate": 9.601947368421053e-05, "loss": 0.4237, "step": 35141 }, { "epoch": 1.967857542837944, "grad_norm": 1.3462848663330078, "learning_rate": 9.60192105263158e-05, "loss": 0.4196, "step": 35142 }, { "epoch": 1.967913540150073, "grad_norm": 1.516627550125122, "learning_rate": 9.601894736842106e-05, "loss": 0.5463, "step": 35143 }, { "epoch": 1.967969537462202, "grad_norm": 1.5292978286743164, "learning_rate": 9.601868421052632e-05, "loss": 0.4922, "step": 35144 }, { "epoch": 1.968025534774331, "grad_norm": 1.6563143730163574, "learning_rate": 9.601842105263158e-05, "loss": 0.6334, "step": 35145 }, { "epoch": 1.96808153208646, "grad_norm": 1.3477717638015747, "learning_rate": 9.601815789473685e-05, "loss": 0.3483, "step": 35146 }, { "epoch": 1.968137529398589, "grad_norm": 1.1159671545028687, "learning_rate": 9.601789473684211e-05, "loss": 0.4548, "step": 35147 }, { "epoch": 1.968193526710718, "grad_norm": 1.2092177867889404, "learning_rate": 9.601763157894737e-05, "loss": 0.461, "step": 35148 }, { "epoch": 1.968249524022847, "grad_norm": 1.286135196685791, "learning_rate": 9.601736842105263e-05, "loss": 0.448, "step": 35149 }, { "epoch": 1.968305521334976, "grad_norm": 1.833780288696289, "learning_rate": 9.60171052631579e-05, "loss": 0.5254, "step": 35150 }, { "epoch": 1.968361518647105, "grad_norm": 1.0451806783676147, "learning_rate": 9.601684210526316e-05, "loss": 0.4319, "step": 35151 }, { "epoch": 1.968417515959234, "grad_norm": 1.2672401666641235, "learning_rate": 9.601657894736844e-05, "loss": 0.4771, "step": 35152 }, { "epoch": 1.968473513271363, "grad_norm": 1.365888237953186, "learning_rate": 9.601631578947368e-05, "loss": 0.3488, "step": 35153 }, { "epoch": 1.9685295105834921, "grad_norm": 1.1900831460952759, "learning_rate": 9.601605263157896e-05, "loss": 0.3894, "step": 35154 }, { "epoch": 1.9685855078956211, "grad_norm": 1.4087401628494263, "learning_rate": 9.601578947368422e-05, "loss": 0.3823, "step": 35155 }, { "epoch": 1.9686415052077502, "grad_norm": 1.3083221912384033, "learning_rate": 9.601552631578949e-05, "loss": 0.5053, "step": 35156 }, { "epoch": 1.9686975025198792, "grad_norm": 1.2480884790420532, "learning_rate": 9.601526315789473e-05, "loss": 0.4257, "step": 35157 }, { "epoch": 1.9687534998320082, "grad_norm": 1.2438790798187256, "learning_rate": 9.6015e-05, "loss": 0.3915, "step": 35158 }, { "epoch": 1.9688094971441372, "grad_norm": 1.1964640617370605, "learning_rate": 9.601473684210527e-05, "loss": 0.3754, "step": 35159 }, { "epoch": 1.9688654944562662, "grad_norm": 1.8770874738693237, "learning_rate": 9.601447368421053e-05, "loss": 0.3471, "step": 35160 }, { "epoch": 1.9689214917683953, "grad_norm": 1.4485785961151123, "learning_rate": 9.60142105263158e-05, "loss": 0.6088, "step": 35161 }, { "epoch": 1.9689774890805243, "grad_norm": 1.4133427143096924, "learning_rate": 9.601394736842105e-05, "loss": 0.5865, "step": 35162 }, { "epoch": 1.9690334863926533, "grad_norm": 1.1782273054122925, "learning_rate": 9.601368421052632e-05, "loss": 0.4794, "step": 35163 }, { "epoch": 1.9690894837047823, "grad_norm": 1.2187813520431519, "learning_rate": 9.601342105263158e-05, "loss": 0.3984, "step": 35164 }, { "epoch": 1.9691454810169113, "grad_norm": 1.7584083080291748, "learning_rate": 9.601315789473685e-05, "loss": 0.496, "step": 35165 }, { "epoch": 1.9692014783290404, "grad_norm": 1.381347894668579, "learning_rate": 9.601289473684211e-05, "loss": 0.4182, "step": 35166 }, { "epoch": 1.9692574756411694, "grad_norm": 1.3327884674072266, "learning_rate": 9.601263157894737e-05, "loss": 0.5346, "step": 35167 }, { "epoch": 1.9693134729532984, "grad_norm": 1.390777349472046, "learning_rate": 9.601236842105263e-05, "loss": 0.4846, "step": 35168 }, { "epoch": 1.9693694702654274, "grad_norm": 1.268707275390625, "learning_rate": 9.60121052631579e-05, "loss": 0.3648, "step": 35169 }, { "epoch": 1.9694254675775564, "grad_norm": 1.09173583984375, "learning_rate": 9.601184210526317e-05, "loss": 0.368, "step": 35170 }, { "epoch": 1.9694814648896855, "grad_norm": 1.4485660791397095, "learning_rate": 9.601157894736843e-05, "loss": 0.462, "step": 35171 }, { "epoch": 1.9695374622018145, "grad_norm": 1.302014708518982, "learning_rate": 9.601131578947369e-05, "loss": 0.4345, "step": 35172 }, { "epoch": 1.9695934595139435, "grad_norm": 1.0750302076339722, "learning_rate": 9.601105263157896e-05, "loss": 0.3762, "step": 35173 }, { "epoch": 1.9696494568260725, "grad_norm": 1.3170346021652222, "learning_rate": 9.601078947368422e-05, "loss": 0.4624, "step": 35174 }, { "epoch": 1.9697054541382015, "grad_norm": 1.2083524465560913, "learning_rate": 9.601052631578948e-05, "loss": 0.4129, "step": 35175 }, { "epoch": 1.9697614514503305, "grad_norm": 1.0780861377716064, "learning_rate": 9.601026315789474e-05, "loss": 0.3793, "step": 35176 }, { "epoch": 1.9698174487624596, "grad_norm": 1.239497184753418, "learning_rate": 9.601e-05, "loss": 0.5152, "step": 35177 }, { "epoch": 1.9698734460745886, "grad_norm": 1.0269932746887207, "learning_rate": 9.600973684210527e-05, "loss": 0.3444, "step": 35178 }, { "epoch": 1.9699294433867174, "grad_norm": 1.3105684518814087, "learning_rate": 9.600947368421053e-05, "loss": 0.4618, "step": 35179 }, { "epoch": 1.9699854406988464, "grad_norm": 1.0756056308746338, "learning_rate": 9.600921052631579e-05, "loss": 0.3688, "step": 35180 }, { "epoch": 1.9700414380109754, "grad_norm": 1.748188853263855, "learning_rate": 9.600894736842105e-05, "loss": 0.6373, "step": 35181 }, { "epoch": 1.9700974353231044, "grad_norm": 1.3499687910079956, "learning_rate": 9.600868421052632e-05, "loss": 0.5859, "step": 35182 }, { "epoch": 1.9701534326352335, "grad_norm": 1.5669392347335815, "learning_rate": 9.600842105263158e-05, "loss": 0.4076, "step": 35183 }, { "epoch": 1.9702094299473625, "grad_norm": 1.1693569421768188, "learning_rate": 9.600815789473686e-05, "loss": 0.5019, "step": 35184 }, { "epoch": 1.9702654272594915, "grad_norm": 1.2108407020568848, "learning_rate": 9.60078947368421e-05, "loss": 0.4216, "step": 35185 }, { "epoch": 1.9703214245716205, "grad_norm": 1.2133065462112427, "learning_rate": 9.600763157894738e-05, "loss": 0.3984, "step": 35186 }, { "epoch": 1.9703774218837495, "grad_norm": 3.837827205657959, "learning_rate": 9.600736842105264e-05, "loss": 0.3495, "step": 35187 }, { "epoch": 1.9704334191958786, "grad_norm": 1.2708792686462402, "learning_rate": 9.600710526315791e-05, "loss": 0.4606, "step": 35188 }, { "epoch": 1.9704894165080076, "grad_norm": 1.0991908311843872, "learning_rate": 9.600684210526317e-05, "loss": 0.3249, "step": 35189 }, { "epoch": 1.9705454138201366, "grad_norm": 1.2925313711166382, "learning_rate": 9.600657894736843e-05, "loss": 0.4992, "step": 35190 }, { "epoch": 1.9706014111322656, "grad_norm": 1.2970471382141113, "learning_rate": 9.600631578947369e-05, "loss": 0.5117, "step": 35191 }, { "epoch": 1.9706574084443946, "grad_norm": 1.265629529953003, "learning_rate": 9.600605263157895e-05, "loss": 0.4824, "step": 35192 }, { "epoch": 1.9707134057565237, "grad_norm": 1.2016805410385132, "learning_rate": 9.600578947368422e-05, "loss": 0.4069, "step": 35193 }, { "epoch": 1.9707694030686527, "grad_norm": 1.1600769758224487, "learning_rate": 9.600552631578947e-05, "loss": 0.3889, "step": 35194 }, { "epoch": 1.9708254003807817, "grad_norm": 2.4386913776397705, "learning_rate": 9.600526315789474e-05, "loss": 0.3638, "step": 35195 }, { "epoch": 1.9708813976929107, "grad_norm": 1.1993727684020996, "learning_rate": 9.6005e-05, "loss": 0.5336, "step": 35196 }, { "epoch": 1.9709373950050397, "grad_norm": 1.071684718132019, "learning_rate": 9.600473684210527e-05, "loss": 0.4054, "step": 35197 }, { "epoch": 1.9709933923171687, "grad_norm": 1.1378376483917236, "learning_rate": 9.600447368421053e-05, "loss": 0.4152, "step": 35198 }, { "epoch": 1.9710493896292978, "grad_norm": 1.4601490497589111, "learning_rate": 9.600421052631579e-05, "loss": 0.5032, "step": 35199 }, { "epoch": 1.9711053869414268, "grad_norm": 1.1496762037277222, "learning_rate": 9.600394736842105e-05, "loss": 0.3943, "step": 35200 }, { "epoch": 1.9711613842535558, "grad_norm": 1.4457305669784546, "learning_rate": 9.600368421052633e-05, "loss": 0.4355, "step": 35201 }, { "epoch": 1.9712173815656848, "grad_norm": 1.0772647857666016, "learning_rate": 9.600342105263159e-05, "loss": 0.4467, "step": 35202 }, { "epoch": 1.9712733788778138, "grad_norm": 1.120788812637329, "learning_rate": 9.600315789473685e-05, "loss": 0.3564, "step": 35203 }, { "epoch": 1.9713293761899429, "grad_norm": 1.5987862348556519, "learning_rate": 9.60028947368421e-05, "loss": 0.4844, "step": 35204 }, { "epoch": 1.9713853735020719, "grad_norm": 1.2389825582504272, "learning_rate": 9.600263157894738e-05, "loss": 0.5305, "step": 35205 }, { "epoch": 1.971441370814201, "grad_norm": 1.3866772651672363, "learning_rate": 9.600236842105264e-05, "loss": 0.4768, "step": 35206 }, { "epoch": 1.97149736812633, "grad_norm": 1.2331037521362305, "learning_rate": 9.600210526315791e-05, "loss": 0.5245, "step": 35207 }, { "epoch": 1.971553365438459, "grad_norm": 1.3958488702774048, "learning_rate": 9.600184210526316e-05, "loss": 0.5676, "step": 35208 }, { "epoch": 1.971609362750588, "grad_norm": 1.4546359777450562, "learning_rate": 9.600157894736842e-05, "loss": 0.4586, "step": 35209 }, { "epoch": 1.971665360062717, "grad_norm": 1.0753148794174194, "learning_rate": 9.600131578947369e-05, "loss": 0.4909, "step": 35210 }, { "epoch": 1.971721357374846, "grad_norm": 1.093936800956726, "learning_rate": 9.600105263157895e-05, "loss": 0.4042, "step": 35211 }, { "epoch": 1.971777354686975, "grad_norm": 1.210041880607605, "learning_rate": 9.600078947368421e-05, "loss": 0.4656, "step": 35212 }, { "epoch": 1.971833351999104, "grad_norm": 1.2268257141113281, "learning_rate": 9.600052631578947e-05, "loss": 0.5345, "step": 35213 }, { "epoch": 1.971889349311233, "grad_norm": 1.0761581659317017, "learning_rate": 9.600026315789474e-05, "loss": 0.3414, "step": 35214 }, { "epoch": 1.971945346623362, "grad_norm": 1.282917857170105, "learning_rate": 9.6e-05, "loss": 0.4907, "step": 35215 }, { "epoch": 1.972001343935491, "grad_norm": 1.3124475479125977, "learning_rate": 9.599973684210528e-05, "loss": 0.4386, "step": 35216 }, { "epoch": 1.97205734124762, "grad_norm": 1.1063408851623535, "learning_rate": 9.599947368421052e-05, "loss": 0.3806, "step": 35217 }, { "epoch": 1.9721133385597491, "grad_norm": 1.3533333539962769, "learning_rate": 9.59992105263158e-05, "loss": 0.4063, "step": 35218 }, { "epoch": 1.9721693358718781, "grad_norm": 1.1129554510116577, "learning_rate": 9.599894736842105e-05, "loss": 0.4131, "step": 35219 }, { "epoch": 1.9722253331840072, "grad_norm": 1.182013750076294, "learning_rate": 9.599868421052633e-05, "loss": 0.4233, "step": 35220 }, { "epoch": 1.9722813304961362, "grad_norm": 1.1397662162780762, "learning_rate": 9.599842105263159e-05, "loss": 0.4027, "step": 35221 }, { "epoch": 1.9723373278082652, "grad_norm": 1.0963093042373657, "learning_rate": 9.599815789473685e-05, "loss": 0.3613, "step": 35222 }, { "epoch": 1.9723933251203942, "grad_norm": 1.1787842512130737, "learning_rate": 9.599789473684211e-05, "loss": 0.4715, "step": 35223 }, { "epoch": 1.9724493224325232, "grad_norm": 1.245827078819275, "learning_rate": 9.599763157894738e-05, "loss": 0.448, "step": 35224 }, { "epoch": 1.9725053197446523, "grad_norm": 1.415168046951294, "learning_rate": 9.599736842105264e-05, "loss": 0.4246, "step": 35225 }, { "epoch": 1.9725613170567813, "grad_norm": 1.21942937374115, "learning_rate": 9.59971052631579e-05, "loss": 0.5603, "step": 35226 }, { "epoch": 1.9726173143689103, "grad_norm": 1.5196648836135864, "learning_rate": 9.599684210526316e-05, "loss": 0.4951, "step": 35227 }, { "epoch": 1.9726733116810393, "grad_norm": 1.3526822328567505, "learning_rate": 9.599657894736842e-05, "loss": 0.5213, "step": 35228 }, { "epoch": 1.9727293089931683, "grad_norm": 1.5978784561157227, "learning_rate": 9.599631578947369e-05, "loss": 0.4612, "step": 35229 }, { "epoch": 1.9727853063052974, "grad_norm": 1.035378336906433, "learning_rate": 9.599605263157895e-05, "loss": 0.3565, "step": 35230 }, { "epoch": 1.9728413036174264, "grad_norm": 1.2160054445266724, "learning_rate": 9.599578947368421e-05, "loss": 0.4434, "step": 35231 }, { "epoch": 1.9728973009295554, "grad_norm": 1.7078431844711304, "learning_rate": 9.599552631578947e-05, "loss": 0.5087, "step": 35232 }, { "epoch": 1.9729532982416844, "grad_norm": 1.4370431900024414, "learning_rate": 9.599526315789475e-05, "loss": 0.6214, "step": 35233 }, { "epoch": 1.9730092955538134, "grad_norm": 1.3612529039382935, "learning_rate": 9.5995e-05, "loss": 0.3608, "step": 35234 }, { "epoch": 1.9730652928659425, "grad_norm": 1.5312025547027588, "learning_rate": 9.599473684210526e-05, "loss": 0.6006, "step": 35235 }, { "epoch": 1.9731212901780715, "grad_norm": 1.21440851688385, "learning_rate": 9.599447368421052e-05, "loss": 0.4015, "step": 35236 }, { "epoch": 1.9731772874902005, "grad_norm": 1.1395652294158936, "learning_rate": 9.59942105263158e-05, "loss": 0.3825, "step": 35237 }, { "epoch": 1.9732332848023295, "grad_norm": 1.1796191930770874, "learning_rate": 9.599394736842106e-05, "loss": 0.3271, "step": 35238 }, { "epoch": 1.9732892821144585, "grad_norm": 1.3698086738586426, "learning_rate": 9.599368421052633e-05, "loss": 0.4568, "step": 35239 }, { "epoch": 1.9733452794265876, "grad_norm": 1.324042797088623, "learning_rate": 9.599342105263158e-05, "loss": 0.6767, "step": 35240 }, { "epoch": 1.9734012767387166, "grad_norm": 1.1679190397262573, "learning_rate": 9.599315789473685e-05, "loss": 0.4078, "step": 35241 }, { "epoch": 1.9734572740508456, "grad_norm": 1.365000605583191, "learning_rate": 9.599289473684211e-05, "loss": 0.4339, "step": 35242 }, { "epoch": 1.9735132713629746, "grad_norm": 1.2375390529632568, "learning_rate": 9.599263157894738e-05, "loss": 0.4164, "step": 35243 }, { "epoch": 1.9735692686751036, "grad_norm": 1.109804630279541, "learning_rate": 9.599236842105264e-05, "loss": 0.4037, "step": 35244 }, { "epoch": 1.9736252659872326, "grad_norm": 1.1338765621185303, "learning_rate": 9.599210526315789e-05, "loss": 0.406, "step": 35245 }, { "epoch": 1.9736812632993617, "grad_norm": 1.3704458475112915, "learning_rate": 9.599184210526316e-05, "loss": 0.5781, "step": 35246 }, { "epoch": 1.9737372606114907, "grad_norm": 1.5784547328948975, "learning_rate": 9.599157894736842e-05, "loss": 0.5274, "step": 35247 }, { "epoch": 1.9737932579236197, "grad_norm": 1.4383635520935059, "learning_rate": 9.59913157894737e-05, "loss": 0.5551, "step": 35248 }, { "epoch": 1.9738492552357487, "grad_norm": 1.0930918455123901, "learning_rate": 9.599105263157894e-05, "loss": 0.4649, "step": 35249 }, { "epoch": 1.9739052525478777, "grad_norm": 1.5893990993499756, "learning_rate": 9.599078947368421e-05, "loss": 0.3964, "step": 35250 }, { "epoch": 1.9739612498600068, "grad_norm": 1.1147282123565674, "learning_rate": 9.599052631578947e-05, "loss": 0.5089, "step": 35251 }, { "epoch": 1.9740172471721358, "grad_norm": 1.2224620580673218, "learning_rate": 9.599026315789475e-05, "loss": 0.384, "step": 35252 }, { "epoch": 1.9740732444842648, "grad_norm": 1.1296567916870117, "learning_rate": 9.599000000000001e-05, "loss": 0.3237, "step": 35253 }, { "epoch": 1.9741292417963938, "grad_norm": 1.2474920749664307, "learning_rate": 9.598973684210527e-05, "loss": 0.3414, "step": 35254 }, { "epoch": 1.9741852391085228, "grad_norm": 1.3443057537078857, "learning_rate": 9.598947368421053e-05, "loss": 0.3491, "step": 35255 }, { "epoch": 1.9742412364206519, "grad_norm": 1.239501714706421, "learning_rate": 9.59892105263158e-05, "loss": 0.4116, "step": 35256 }, { "epoch": 1.9742972337327809, "grad_norm": 1.2772403955459595, "learning_rate": 9.598894736842106e-05, "loss": 0.4355, "step": 35257 }, { "epoch": 1.97435323104491, "grad_norm": 1.468505620956421, "learning_rate": 9.598868421052632e-05, "loss": 0.5805, "step": 35258 }, { "epoch": 1.974409228357039, "grad_norm": 1.5236645936965942, "learning_rate": 9.598842105263158e-05, "loss": 0.5462, "step": 35259 }, { "epoch": 1.974465225669168, "grad_norm": 1.3489094972610474, "learning_rate": 9.598815789473685e-05, "loss": 0.4622, "step": 35260 }, { "epoch": 1.974521222981297, "grad_norm": 1.4805444478988647, "learning_rate": 9.598789473684211e-05, "loss": 0.4942, "step": 35261 }, { "epoch": 1.9745772202934257, "grad_norm": 1.370435118675232, "learning_rate": 9.598763157894737e-05, "loss": 0.3483, "step": 35262 }, { "epoch": 1.9746332176055548, "grad_norm": 1.4066795110702515, "learning_rate": 9.598736842105263e-05, "loss": 0.5351, "step": 35263 }, { "epoch": 1.9746892149176838, "grad_norm": 1.5087624788284302, "learning_rate": 9.598710526315789e-05, "loss": 0.5398, "step": 35264 }, { "epoch": 1.9747452122298128, "grad_norm": 1.315524935722351, "learning_rate": 9.598684210526317e-05, "loss": 0.4398, "step": 35265 }, { "epoch": 1.9748012095419418, "grad_norm": 1.300810694694519, "learning_rate": 9.598657894736842e-05, "loss": 0.4372, "step": 35266 }, { "epoch": 1.9748572068540708, "grad_norm": 1.5306553840637207, "learning_rate": 9.598631578947368e-05, "loss": 0.4294, "step": 35267 }, { "epoch": 1.9749132041661999, "grad_norm": 1.1755743026733398, "learning_rate": 9.598605263157894e-05, "loss": 0.4432, "step": 35268 }, { "epoch": 1.9749692014783289, "grad_norm": 1.3118199110031128, "learning_rate": 9.598578947368422e-05, "loss": 0.4627, "step": 35269 }, { "epoch": 1.975025198790458, "grad_norm": 1.2914881706237793, "learning_rate": 9.598552631578948e-05, "loss": 0.4053, "step": 35270 }, { "epoch": 1.975081196102587, "grad_norm": 1.8204461336135864, "learning_rate": 9.598526315789475e-05, "loss": 0.3838, "step": 35271 }, { "epoch": 1.975137193414716, "grad_norm": 1.2272690534591675, "learning_rate": 9.5985e-05, "loss": 0.3862, "step": 35272 }, { "epoch": 1.975193190726845, "grad_norm": 1.3409082889556885, "learning_rate": 9.598473684210527e-05, "loss": 0.4829, "step": 35273 }, { "epoch": 1.975249188038974, "grad_norm": 1.3153965473175049, "learning_rate": 9.598447368421053e-05, "loss": 0.421, "step": 35274 }, { "epoch": 1.975305185351103, "grad_norm": 1.3150523900985718, "learning_rate": 9.59842105263158e-05, "loss": 0.3628, "step": 35275 }, { "epoch": 1.975361182663232, "grad_norm": 1.2833306789398193, "learning_rate": 9.598394736842106e-05, "loss": 0.4401, "step": 35276 }, { "epoch": 1.975417179975361, "grad_norm": 1.3704347610473633, "learning_rate": 9.598368421052632e-05, "loss": 0.3553, "step": 35277 }, { "epoch": 1.97547317728749, "grad_norm": 1.6373692750930786, "learning_rate": 9.598342105263158e-05, "loss": 0.5005, "step": 35278 }, { "epoch": 1.975529174599619, "grad_norm": 1.0903794765472412, "learning_rate": 9.598315789473684e-05, "loss": 0.3671, "step": 35279 }, { "epoch": 1.975585171911748, "grad_norm": 1.225213646888733, "learning_rate": 9.598289473684212e-05, "loss": 0.341, "step": 35280 }, { "epoch": 1.9756411692238771, "grad_norm": 1.1427005529403687, "learning_rate": 9.598263157894737e-05, "loss": 0.551, "step": 35281 }, { "epoch": 1.9756971665360061, "grad_norm": 1.1421732902526855, "learning_rate": 9.598236842105263e-05, "loss": 0.3506, "step": 35282 }, { "epoch": 1.9757531638481352, "grad_norm": 1.3825337886810303, "learning_rate": 9.59821052631579e-05, "loss": 0.3827, "step": 35283 }, { "epoch": 1.9758091611602642, "grad_norm": 1.250551700592041, "learning_rate": 9.598184210526317e-05, "loss": 0.3922, "step": 35284 }, { "epoch": 1.9758651584723932, "grad_norm": 1.4467190504074097, "learning_rate": 9.598157894736843e-05, "loss": 0.608, "step": 35285 }, { "epoch": 1.9759211557845222, "grad_norm": 1.1037590503692627, "learning_rate": 9.598131578947369e-05, "loss": 0.4356, "step": 35286 }, { "epoch": 1.9759771530966512, "grad_norm": 1.0927555561065674, "learning_rate": 9.598105263157895e-05, "loss": 0.4336, "step": 35287 }, { "epoch": 1.9760331504087802, "grad_norm": 1.1293150186538696, "learning_rate": 9.598078947368422e-05, "loss": 0.3492, "step": 35288 }, { "epoch": 1.9760891477209093, "grad_norm": 1.0118589401245117, "learning_rate": 9.598052631578948e-05, "loss": 0.3937, "step": 35289 }, { "epoch": 1.9761451450330383, "grad_norm": 1.2155112028121948, "learning_rate": 9.598026315789474e-05, "loss": 0.4235, "step": 35290 }, { "epoch": 1.9762011423451673, "grad_norm": 1.6978845596313477, "learning_rate": 9.598e-05, "loss": 0.447, "step": 35291 }, { "epoch": 1.9762571396572963, "grad_norm": 1.2969763278961182, "learning_rate": 9.597973684210527e-05, "loss": 0.5313, "step": 35292 }, { "epoch": 1.9763131369694253, "grad_norm": 1.598415732383728, "learning_rate": 9.597947368421053e-05, "loss": 0.5412, "step": 35293 }, { "epoch": 1.9763691342815544, "grad_norm": 1.4855791330337524, "learning_rate": 9.59792105263158e-05, "loss": 0.5057, "step": 35294 }, { "epoch": 1.9764251315936834, "grad_norm": 1.07868492603302, "learning_rate": 9.597894736842105e-05, "loss": 0.6179, "step": 35295 }, { "epoch": 1.9764811289058124, "grad_norm": 1.5606224536895752, "learning_rate": 9.597868421052631e-05, "loss": 0.4144, "step": 35296 }, { "epoch": 1.9765371262179414, "grad_norm": 1.1112538576126099, "learning_rate": 9.597842105263158e-05, "loss": 0.401, "step": 35297 }, { "epoch": 1.9765931235300704, "grad_norm": 1.1307101249694824, "learning_rate": 9.597815789473684e-05, "loss": 0.4075, "step": 35298 }, { "epoch": 1.9766491208421995, "grad_norm": 1.7817635536193848, "learning_rate": 9.597789473684212e-05, "loss": 0.4521, "step": 35299 }, { "epoch": 1.9767051181543285, "grad_norm": 1.455461859703064, "learning_rate": 9.597763157894736e-05, "loss": 0.4451, "step": 35300 }, { "epoch": 1.9767611154664575, "grad_norm": 1.3352333307266235, "learning_rate": 9.597736842105264e-05, "loss": 0.408, "step": 35301 }, { "epoch": 1.9768171127785865, "grad_norm": 1.4200162887573242, "learning_rate": 9.59771052631579e-05, "loss": 0.6055, "step": 35302 }, { "epoch": 1.9768731100907155, "grad_norm": 1.4622924327850342, "learning_rate": 9.597684210526317e-05, "loss": 0.5753, "step": 35303 }, { "epoch": 1.9769291074028446, "grad_norm": 1.3756738901138306, "learning_rate": 9.597657894736842e-05, "loss": 0.3705, "step": 35304 }, { "epoch": 1.9769851047149736, "grad_norm": 1.5039740800857544, "learning_rate": 9.597631578947369e-05, "loss": 0.5865, "step": 35305 }, { "epoch": 1.9770411020271026, "grad_norm": 1.6791962385177612, "learning_rate": 9.597605263157895e-05, "loss": 0.4755, "step": 35306 }, { "epoch": 1.9770970993392316, "grad_norm": 1.1733907461166382, "learning_rate": 9.597578947368422e-05, "loss": 0.5278, "step": 35307 }, { "epoch": 1.9771530966513606, "grad_norm": 1.375545859336853, "learning_rate": 9.597552631578948e-05, "loss": 0.4223, "step": 35308 }, { "epoch": 1.9772090939634897, "grad_norm": 1.0620664358139038, "learning_rate": 9.597526315789474e-05, "loss": 0.3884, "step": 35309 }, { "epoch": 1.9772650912756187, "grad_norm": 1.1718782186508179, "learning_rate": 9.5975e-05, "loss": 0.4194, "step": 35310 }, { "epoch": 1.9773210885877477, "grad_norm": 1.4529876708984375, "learning_rate": 9.597473684210528e-05, "loss": 0.4884, "step": 35311 }, { "epoch": 1.9773770858998767, "grad_norm": 1.1633386611938477, "learning_rate": 9.597447368421053e-05, "loss": 0.4522, "step": 35312 }, { "epoch": 1.9774330832120057, "grad_norm": 1.3067140579223633, "learning_rate": 9.59742105263158e-05, "loss": 0.5606, "step": 35313 }, { "epoch": 1.9774890805241347, "grad_norm": 1.4889006614685059, "learning_rate": 9.597394736842105e-05, "loss": 0.3932, "step": 35314 }, { "epoch": 1.9775450778362638, "grad_norm": 1.1453803777694702, "learning_rate": 9.597368421052631e-05, "loss": 0.4174, "step": 35315 }, { "epoch": 1.9776010751483928, "grad_norm": 1.5066471099853516, "learning_rate": 9.597342105263159e-05, "loss": 0.4884, "step": 35316 }, { "epoch": 1.9776570724605218, "grad_norm": 1.270275592803955, "learning_rate": 9.597315789473685e-05, "loss": 0.4955, "step": 35317 }, { "epoch": 1.9777130697726508, "grad_norm": 1.5205512046813965, "learning_rate": 9.59728947368421e-05, "loss": 0.4563, "step": 35318 }, { "epoch": 1.9777690670847798, "grad_norm": 1.362046718597412, "learning_rate": 9.597263157894737e-05, "loss": 0.5449, "step": 35319 }, { "epoch": 1.9778250643969089, "grad_norm": 1.1937780380249023, "learning_rate": 9.597236842105264e-05, "loss": 0.4954, "step": 35320 }, { "epoch": 1.9778810617090379, "grad_norm": 1.38205885887146, "learning_rate": 9.59721052631579e-05, "loss": 0.5093, "step": 35321 }, { "epoch": 1.977937059021167, "grad_norm": 1.2679609060287476, "learning_rate": 9.597184210526316e-05, "loss": 0.3756, "step": 35322 }, { "epoch": 1.977993056333296, "grad_norm": 1.0841118097305298, "learning_rate": 9.597157894736842e-05, "loss": 0.3772, "step": 35323 }, { "epoch": 1.978049053645425, "grad_norm": 1.3491220474243164, "learning_rate": 9.597131578947369e-05, "loss": 0.4203, "step": 35324 }, { "epoch": 1.978105050957554, "grad_norm": 1.5062943696975708, "learning_rate": 9.597105263157895e-05, "loss": 0.4433, "step": 35325 }, { "epoch": 1.978161048269683, "grad_norm": 1.08420991897583, "learning_rate": 9.597078947368423e-05, "loss": 0.4247, "step": 35326 }, { "epoch": 1.978217045581812, "grad_norm": 1.4956640005111694, "learning_rate": 9.597052631578947e-05, "loss": 0.5669, "step": 35327 }, { "epoch": 1.978273042893941, "grad_norm": 1.1101624965667725, "learning_rate": 9.597026315789474e-05, "loss": 0.4106, "step": 35328 }, { "epoch": 1.97832904020607, "grad_norm": 1.8630064725875854, "learning_rate": 9.597e-05, "loss": 0.7349, "step": 35329 }, { "epoch": 1.978385037518199, "grad_norm": 1.4439631700515747, "learning_rate": 9.596973684210526e-05, "loss": 0.4387, "step": 35330 }, { "epoch": 1.978441034830328, "grad_norm": 1.2561832666397095, "learning_rate": 9.596947368421054e-05, "loss": 0.4245, "step": 35331 }, { "epoch": 1.978497032142457, "grad_norm": 1.7012962102890015, "learning_rate": 9.596921052631578e-05, "loss": 0.5488, "step": 35332 }, { "epoch": 1.978553029454586, "grad_norm": 1.9780102968215942, "learning_rate": 9.596894736842106e-05, "loss": 0.3328, "step": 35333 }, { "epoch": 1.9786090267667151, "grad_norm": 1.3113173246383667, "learning_rate": 9.596868421052632e-05, "loss": 0.4253, "step": 35334 }, { "epoch": 1.9786650240788441, "grad_norm": 1.534112811088562, "learning_rate": 9.596842105263159e-05, "loss": 0.5875, "step": 35335 }, { "epoch": 1.9787210213909732, "grad_norm": 1.8889906406402588, "learning_rate": 9.596815789473685e-05, "loss": 0.4843, "step": 35336 }, { "epoch": 1.9787770187031022, "grad_norm": 1.081467866897583, "learning_rate": 9.596789473684211e-05, "loss": 0.3644, "step": 35337 }, { "epoch": 1.9788330160152312, "grad_norm": 1.1088752746582031, "learning_rate": 9.596763157894737e-05, "loss": 0.3999, "step": 35338 }, { "epoch": 1.9788890133273602, "grad_norm": 1.027759313583374, "learning_rate": 9.596736842105264e-05, "loss": 0.36, "step": 35339 }, { "epoch": 1.9789450106394892, "grad_norm": 1.1924670934677124, "learning_rate": 9.59671052631579e-05, "loss": 0.3514, "step": 35340 }, { "epoch": 1.9790010079516183, "grad_norm": 1.1977674961090088, "learning_rate": 9.596684210526316e-05, "loss": 0.3228, "step": 35341 }, { "epoch": 1.9790570052637473, "grad_norm": 3.1098453998565674, "learning_rate": 9.596657894736842e-05, "loss": 0.5448, "step": 35342 }, { "epoch": 1.9791130025758763, "grad_norm": 1.3640179634094238, "learning_rate": 9.59663157894737e-05, "loss": 0.4023, "step": 35343 }, { "epoch": 1.9791689998880053, "grad_norm": 1.2926201820373535, "learning_rate": 9.596605263157895e-05, "loss": 0.4943, "step": 35344 }, { "epoch": 1.9792249972001343, "grad_norm": 1.4971150159835815, "learning_rate": 9.596578947368421e-05, "loss": 0.6926, "step": 35345 }, { "epoch": 1.9792809945122634, "grad_norm": 1.3296997547149658, "learning_rate": 9.596552631578947e-05, "loss": 0.5384, "step": 35346 }, { "epoch": 1.9793369918243924, "grad_norm": 1.3212518692016602, "learning_rate": 9.596526315789475e-05, "loss": 0.4779, "step": 35347 }, { "epoch": 1.9793929891365214, "grad_norm": 1.2288786172866821, "learning_rate": 9.596500000000001e-05, "loss": 0.3691, "step": 35348 }, { "epoch": 1.9794489864486504, "grad_norm": 1.3563345670700073, "learning_rate": 9.596473684210527e-05, "loss": 0.5572, "step": 35349 }, { "epoch": 1.9795049837607794, "grad_norm": 1.5157183408737183, "learning_rate": 9.596447368421053e-05, "loss": 0.4621, "step": 35350 }, { "epoch": 1.9795609810729085, "grad_norm": 1.6226670742034912, "learning_rate": 9.596421052631579e-05, "loss": 0.5195, "step": 35351 }, { "epoch": 1.9796169783850375, "grad_norm": 1.340502381324768, "learning_rate": 9.596394736842106e-05, "loss": 0.4098, "step": 35352 }, { "epoch": 1.9796729756971665, "grad_norm": 1.1395243406295776, "learning_rate": 9.596368421052632e-05, "loss": 0.5622, "step": 35353 }, { "epoch": 1.9797289730092955, "grad_norm": 1.2077263593673706, "learning_rate": 9.596342105263159e-05, "loss": 0.4037, "step": 35354 }, { "epoch": 1.9797849703214245, "grad_norm": 1.1225641965866089, "learning_rate": 9.596315789473684e-05, "loss": 0.3685, "step": 35355 }, { "epoch": 1.9798409676335536, "grad_norm": 1.0156373977661133, "learning_rate": 9.596289473684211e-05, "loss": 0.4296, "step": 35356 }, { "epoch": 1.9798969649456826, "grad_norm": 1.2100605964660645, "learning_rate": 9.596263157894737e-05, "loss": 0.3318, "step": 35357 }, { "epoch": 1.9799529622578116, "grad_norm": 1.1853482723236084, "learning_rate": 9.596236842105264e-05, "loss": 0.3517, "step": 35358 }, { "epoch": 1.9800089595699406, "grad_norm": 1.1259390115737915, "learning_rate": 9.596210526315789e-05, "loss": 0.4718, "step": 35359 }, { "epoch": 1.9800649568820696, "grad_norm": 0.9772047996520996, "learning_rate": 9.596184210526316e-05, "loss": 0.3278, "step": 35360 }, { "epoch": 1.9801209541941986, "grad_norm": 1.421338677406311, "learning_rate": 9.596157894736842e-05, "loss": 0.5354, "step": 35361 }, { "epoch": 1.9801769515063277, "grad_norm": 1.678537368774414, "learning_rate": 9.59613157894737e-05, "loss": 0.4781, "step": 35362 }, { "epoch": 1.9802329488184567, "grad_norm": 1.3583216667175293, "learning_rate": 9.596105263157896e-05, "loss": 0.5838, "step": 35363 }, { "epoch": 1.9802889461305857, "grad_norm": 1.3110512495040894, "learning_rate": 9.596078947368422e-05, "loss": 0.4631, "step": 35364 }, { "epoch": 1.9803449434427147, "grad_norm": 1.4307591915130615, "learning_rate": 9.596052631578948e-05, "loss": 0.4516, "step": 35365 }, { "epoch": 1.9804009407548437, "grad_norm": 1.3773220777511597, "learning_rate": 9.596026315789474e-05, "loss": 0.5839, "step": 35366 }, { "epoch": 1.9804569380669728, "grad_norm": 1.1750822067260742, "learning_rate": 9.596000000000001e-05, "loss": 0.4467, "step": 35367 }, { "epoch": 1.9805129353791018, "grad_norm": 1.1129287481307983, "learning_rate": 9.595973684210527e-05, "loss": 0.5654, "step": 35368 }, { "epoch": 1.9805689326912308, "grad_norm": 1.5263557434082031, "learning_rate": 9.595947368421053e-05, "loss": 0.4007, "step": 35369 }, { "epoch": 1.9806249300033598, "grad_norm": 1.5154064893722534, "learning_rate": 9.595921052631579e-05, "loss": 0.5398, "step": 35370 }, { "epoch": 1.9806809273154888, "grad_norm": 1.120880126953125, "learning_rate": 9.595894736842106e-05, "loss": 0.368, "step": 35371 }, { "epoch": 1.9807369246276179, "grad_norm": 1.1270954608917236, "learning_rate": 9.595868421052632e-05, "loss": 0.4904, "step": 35372 }, { "epoch": 1.9807929219397469, "grad_norm": 1.2989213466644287, "learning_rate": 9.595842105263158e-05, "loss": 0.4836, "step": 35373 }, { "epoch": 1.980848919251876, "grad_norm": 0.9655188918113708, "learning_rate": 9.595815789473684e-05, "loss": 0.3337, "step": 35374 }, { "epoch": 1.980904916564005, "grad_norm": 1.3269989490509033, "learning_rate": 9.595789473684211e-05, "loss": 0.4409, "step": 35375 }, { "epoch": 1.980960913876134, "grad_norm": 1.2914983034133911, "learning_rate": 9.595763157894737e-05, "loss": 0.466, "step": 35376 }, { "epoch": 1.981016911188263, "grad_norm": 1.32680082321167, "learning_rate": 9.595736842105263e-05, "loss": 0.466, "step": 35377 }, { "epoch": 1.981072908500392, "grad_norm": 1.374399185180664, "learning_rate": 9.59571052631579e-05, "loss": 0.5656, "step": 35378 }, { "epoch": 1.981128905812521, "grad_norm": 1.424728512763977, "learning_rate": 9.595684210526317e-05, "loss": 0.4942, "step": 35379 }, { "epoch": 1.98118490312465, "grad_norm": 1.3107686042785645, "learning_rate": 9.595657894736843e-05, "loss": 0.3764, "step": 35380 }, { "epoch": 1.981240900436779, "grad_norm": 1.2910714149475098, "learning_rate": 9.59563157894737e-05, "loss": 0.3745, "step": 35381 }, { "epoch": 1.981296897748908, "grad_norm": 1.1032922267913818, "learning_rate": 9.595605263157895e-05, "loss": 0.3212, "step": 35382 }, { "epoch": 1.981352895061037, "grad_norm": 0.9643816947937012, "learning_rate": 9.59557894736842e-05, "loss": 0.3388, "step": 35383 }, { "epoch": 1.981408892373166, "grad_norm": 1.2272729873657227, "learning_rate": 9.595552631578948e-05, "loss": 0.3363, "step": 35384 }, { "epoch": 1.981464889685295, "grad_norm": 1.1242787837982178, "learning_rate": 9.595526315789474e-05, "loss": 0.4673, "step": 35385 }, { "epoch": 1.9815208869974241, "grad_norm": 1.1283066272735596, "learning_rate": 9.595500000000001e-05, "loss": 0.4302, "step": 35386 }, { "epoch": 1.9815768843095531, "grad_norm": 1.655296802520752, "learning_rate": 9.595473684210526e-05, "loss": 0.4352, "step": 35387 }, { "epoch": 1.9816328816216822, "grad_norm": 1.2834354639053345, "learning_rate": 9.595447368421053e-05, "loss": 0.4599, "step": 35388 }, { "epoch": 1.9816888789338112, "grad_norm": 1.2974046468734741, "learning_rate": 9.595421052631579e-05, "loss": 0.5437, "step": 35389 }, { "epoch": 1.9817448762459402, "grad_norm": 1.1857694387435913, "learning_rate": 9.595394736842106e-05, "loss": 0.477, "step": 35390 }, { "epoch": 1.9818008735580692, "grad_norm": 1.22774076461792, "learning_rate": 9.595368421052632e-05, "loss": 0.4467, "step": 35391 }, { "epoch": 1.9818568708701982, "grad_norm": 1.5466434955596924, "learning_rate": 9.595342105263158e-05, "loss": 0.5265, "step": 35392 }, { "epoch": 1.9819128681823273, "grad_norm": 1.5745201110839844, "learning_rate": 9.595315789473684e-05, "loss": 0.4461, "step": 35393 }, { "epoch": 1.9819688654944563, "grad_norm": 1.8737821578979492, "learning_rate": 9.595289473684212e-05, "loss": 0.6758, "step": 35394 }, { "epoch": 1.9820248628065853, "grad_norm": 1.8726141452789307, "learning_rate": 9.595263157894738e-05, "loss": 0.6814, "step": 35395 }, { "epoch": 1.9820808601187143, "grad_norm": 1.350630760192871, "learning_rate": 9.595236842105264e-05, "loss": 0.3601, "step": 35396 }, { "epoch": 1.9821368574308433, "grad_norm": 1.2303060293197632, "learning_rate": 9.59521052631579e-05, "loss": 0.407, "step": 35397 }, { "epoch": 1.9821928547429724, "grad_norm": 1.2057762145996094, "learning_rate": 9.595184210526317e-05, "loss": 0.4318, "step": 35398 }, { "epoch": 1.9822488520551014, "grad_norm": 1.0356110334396362, "learning_rate": 9.595157894736843e-05, "loss": 0.4149, "step": 35399 }, { "epoch": 1.9823048493672304, "grad_norm": 1.197208285331726, "learning_rate": 9.595131578947369e-05, "loss": 0.4863, "step": 35400 }, { "epoch": 1.9823608466793594, "grad_norm": 1.2359369993209839, "learning_rate": 9.595105263157895e-05, "loss": 0.4589, "step": 35401 }, { "epoch": 1.9824168439914884, "grad_norm": 1.2901225090026855, "learning_rate": 9.595078947368421e-05, "loss": 0.471, "step": 35402 }, { "epoch": 1.9824728413036175, "grad_norm": 1.406449317932129, "learning_rate": 9.595052631578948e-05, "loss": 0.4205, "step": 35403 }, { "epoch": 1.9825288386157465, "grad_norm": 1.2992604970932007, "learning_rate": 9.595026315789474e-05, "loss": 0.4876, "step": 35404 }, { "epoch": 1.9825848359278755, "grad_norm": 1.2177059650421143, "learning_rate": 9.595e-05, "loss": 0.3949, "step": 35405 }, { "epoch": 1.9826408332400045, "grad_norm": 1.4046787023544312, "learning_rate": 9.594973684210526e-05, "loss": 0.467, "step": 35406 }, { "epoch": 1.9826968305521335, "grad_norm": 1.054486632347107, "learning_rate": 9.594947368421053e-05, "loss": 0.3462, "step": 35407 }, { "epoch": 1.9827528278642625, "grad_norm": 1.1303553581237793, "learning_rate": 9.59492105263158e-05, "loss": 0.3987, "step": 35408 }, { "epoch": 1.9828088251763916, "grad_norm": 1.245105266571045, "learning_rate": 9.594894736842107e-05, "loss": 0.5129, "step": 35409 }, { "epoch": 1.9828648224885206, "grad_norm": 1.9922682046890259, "learning_rate": 9.594868421052631e-05, "loss": 0.3798, "step": 35410 }, { "epoch": 1.9829208198006496, "grad_norm": 1.4982882738113403, "learning_rate": 9.594842105263159e-05, "loss": 0.4651, "step": 35411 }, { "epoch": 1.9829768171127786, "grad_norm": 1.1572262048721313, "learning_rate": 9.594815789473685e-05, "loss": 0.4673, "step": 35412 }, { "epoch": 1.9830328144249076, "grad_norm": 1.0734959840774536, "learning_rate": 9.594789473684212e-05, "loss": 0.5939, "step": 35413 }, { "epoch": 1.9830888117370367, "grad_norm": 1.112282633781433, "learning_rate": 9.594763157894737e-05, "loss": 0.344, "step": 35414 }, { "epoch": 1.9831448090491657, "grad_norm": 1.1996608972549438, "learning_rate": 9.594736842105264e-05, "loss": 0.658, "step": 35415 }, { "epoch": 1.9832008063612947, "grad_norm": 1.3755563497543335, "learning_rate": 9.59471052631579e-05, "loss": 0.407, "step": 35416 }, { "epoch": 1.9832568036734237, "grad_norm": 1.1868852376937866, "learning_rate": 9.594684210526316e-05, "loss": 0.4186, "step": 35417 }, { "epoch": 1.9833128009855527, "grad_norm": 1.6150925159454346, "learning_rate": 9.594657894736843e-05, "loss": 0.5411, "step": 35418 }, { "epoch": 1.9833687982976818, "grad_norm": 1.4090509414672852, "learning_rate": 9.594631578947368e-05, "loss": 0.5802, "step": 35419 }, { "epoch": 1.9834247956098108, "grad_norm": 1.4140605926513672, "learning_rate": 9.594605263157895e-05, "loss": 0.4161, "step": 35420 }, { "epoch": 1.9834807929219398, "grad_norm": 1.141640305519104, "learning_rate": 9.594578947368421e-05, "loss": 0.5105, "step": 35421 }, { "epoch": 1.9835367902340688, "grad_norm": 1.4441699981689453, "learning_rate": 9.594552631578948e-05, "loss": 0.401, "step": 35422 }, { "epoch": 1.9835927875461978, "grad_norm": 1.2866114377975464, "learning_rate": 9.594526315789474e-05, "loss": 0.5019, "step": 35423 }, { "epoch": 1.9836487848583269, "grad_norm": 1.2182586193084717, "learning_rate": 9.5945e-05, "loss": 0.4528, "step": 35424 }, { "epoch": 1.9837047821704559, "grad_norm": 1.0991075038909912, "learning_rate": 9.594473684210526e-05, "loss": 0.413, "step": 35425 }, { "epoch": 1.983760779482585, "grad_norm": 1.2246930599212646, "learning_rate": 9.594447368421054e-05, "loss": 0.5544, "step": 35426 }, { "epoch": 1.983816776794714, "grad_norm": 1.139660120010376, "learning_rate": 9.59442105263158e-05, "loss": 0.3881, "step": 35427 }, { "epoch": 1.983872774106843, "grad_norm": 1.272331714630127, "learning_rate": 9.594394736842106e-05, "loss": 0.4805, "step": 35428 }, { "epoch": 1.983928771418972, "grad_norm": 1.4612683057785034, "learning_rate": 9.594368421052632e-05, "loss": 0.4432, "step": 35429 }, { "epoch": 1.983984768731101, "grad_norm": 1.177104115486145, "learning_rate": 9.594342105263159e-05, "loss": 0.4355, "step": 35430 }, { "epoch": 1.98404076604323, "grad_norm": 1.148566484451294, "learning_rate": 9.594315789473685e-05, "loss": 0.3758, "step": 35431 }, { "epoch": 1.984096763355359, "grad_norm": 1.3939623832702637, "learning_rate": 9.594289473684211e-05, "loss": 0.4636, "step": 35432 }, { "epoch": 1.984152760667488, "grad_norm": 1.4306120872497559, "learning_rate": 9.594263157894737e-05, "loss": 0.3576, "step": 35433 }, { "epoch": 1.984208757979617, "grad_norm": 1.126480221748352, "learning_rate": 9.594236842105263e-05, "loss": 0.4908, "step": 35434 }, { "epoch": 1.984264755291746, "grad_norm": 1.2852604389190674, "learning_rate": 9.59421052631579e-05, "loss": 0.3717, "step": 35435 }, { "epoch": 1.984320752603875, "grad_norm": 1.189316987991333, "learning_rate": 9.594184210526316e-05, "loss": 0.4749, "step": 35436 }, { "epoch": 1.984376749916004, "grad_norm": 1.1891878843307495, "learning_rate": 9.594157894736842e-05, "loss": 0.4139, "step": 35437 }, { "epoch": 1.9844327472281331, "grad_norm": 1.2388026714324951, "learning_rate": 9.594131578947368e-05, "loss": 0.5164, "step": 35438 }, { "epoch": 1.9844887445402621, "grad_norm": 1.4272000789642334, "learning_rate": 9.594105263157895e-05, "loss": 0.4422, "step": 35439 }, { "epoch": 1.9845447418523912, "grad_norm": 1.5697245597839355, "learning_rate": 9.594078947368421e-05, "loss": 0.3517, "step": 35440 }, { "epoch": 1.9846007391645202, "grad_norm": 1.6420040130615234, "learning_rate": 9.594052631578949e-05, "loss": 0.4172, "step": 35441 }, { "epoch": 1.9846567364766492, "grad_norm": 1.081558346748352, "learning_rate": 9.594026315789473e-05, "loss": 0.3497, "step": 35442 }, { "epoch": 1.9847127337887782, "grad_norm": 1.1123874187469482, "learning_rate": 9.594e-05, "loss": 0.3874, "step": 35443 }, { "epoch": 1.9847687311009072, "grad_norm": 1.313855528831482, "learning_rate": 9.593973684210527e-05, "loss": 0.5523, "step": 35444 }, { "epoch": 1.9848247284130363, "grad_norm": 1.5975446701049805, "learning_rate": 9.593947368421054e-05, "loss": 0.4747, "step": 35445 }, { "epoch": 1.9848807257251653, "grad_norm": 1.0752410888671875, "learning_rate": 9.59392105263158e-05, "loss": 0.439, "step": 35446 }, { "epoch": 1.9849367230372943, "grad_norm": 1.4096988439559937, "learning_rate": 9.593894736842106e-05, "loss": 0.605, "step": 35447 }, { "epoch": 1.9849927203494233, "grad_norm": 1.220782995223999, "learning_rate": 9.593868421052632e-05, "loss": 0.4369, "step": 35448 }, { "epoch": 1.9850487176615523, "grad_norm": 1.346310019493103, "learning_rate": 9.593842105263159e-05, "loss": 0.5336, "step": 35449 }, { "epoch": 1.9851047149736814, "grad_norm": 1.2538485527038574, "learning_rate": 9.593815789473685e-05, "loss": 0.4565, "step": 35450 }, { "epoch": 1.9851607122858104, "grad_norm": 1.2544595003128052, "learning_rate": 9.59378947368421e-05, "loss": 0.4645, "step": 35451 }, { "epoch": 1.9852167095979394, "grad_norm": 1.2146624326705933, "learning_rate": 9.593763157894737e-05, "loss": 0.3921, "step": 35452 }, { "epoch": 1.9852727069100684, "grad_norm": 1.038583517074585, "learning_rate": 9.593736842105263e-05, "loss": 0.3153, "step": 35453 }, { "epoch": 1.9853287042221974, "grad_norm": 1.4133057594299316, "learning_rate": 9.59371052631579e-05, "loss": 0.4592, "step": 35454 }, { "epoch": 1.9853847015343264, "grad_norm": 1.3205456733703613, "learning_rate": 9.593684210526316e-05, "loss": 0.3537, "step": 35455 }, { "epoch": 1.9854406988464555, "grad_norm": 1.0945783853530884, "learning_rate": 9.593657894736842e-05, "loss": 0.4419, "step": 35456 }, { "epoch": 1.9854966961585845, "grad_norm": 1.373948097229004, "learning_rate": 9.593631578947368e-05, "loss": 0.5903, "step": 35457 }, { "epoch": 1.9855526934707135, "grad_norm": 1.7060059309005737, "learning_rate": 9.593605263157896e-05, "loss": 0.5437, "step": 35458 }, { "epoch": 1.9856086907828425, "grad_norm": 3.1456387042999268, "learning_rate": 9.593578947368422e-05, "loss": 0.5352, "step": 35459 }, { "epoch": 1.9856646880949715, "grad_norm": 1.105337142944336, "learning_rate": 9.593552631578948e-05, "loss": 0.3794, "step": 35460 }, { "epoch": 1.9857206854071006, "grad_norm": 1.2466400861740112, "learning_rate": 9.593526315789474e-05, "loss": 0.4434, "step": 35461 }, { "epoch": 1.9857766827192296, "grad_norm": 1.237947702407837, "learning_rate": 9.593500000000001e-05, "loss": 0.3995, "step": 35462 }, { "epoch": 1.9858326800313586, "grad_norm": 1.208706259727478, "learning_rate": 9.593473684210527e-05, "loss": 0.4239, "step": 35463 }, { "epoch": 1.9858886773434876, "grad_norm": 1.50117027759552, "learning_rate": 9.593447368421053e-05, "loss": 0.4797, "step": 35464 }, { "epoch": 1.9859446746556166, "grad_norm": 1.306710958480835, "learning_rate": 9.593421052631579e-05, "loss": 0.503, "step": 35465 }, { "epoch": 1.9860006719677457, "grad_norm": 4.107534408569336, "learning_rate": 9.593394736842106e-05, "loss": 0.7216, "step": 35466 }, { "epoch": 1.9860566692798747, "grad_norm": 1.4712620973587036, "learning_rate": 9.593368421052632e-05, "loss": 0.5996, "step": 35467 }, { "epoch": 1.9861126665920037, "grad_norm": 1.2280992269515991, "learning_rate": 9.59334210526316e-05, "loss": 0.4118, "step": 35468 }, { "epoch": 1.9861686639041327, "grad_norm": 1.087149977684021, "learning_rate": 9.593315789473684e-05, "loss": 0.3858, "step": 35469 }, { "epoch": 1.9862246612162617, "grad_norm": 1.139447569847107, "learning_rate": 9.59328947368421e-05, "loss": 0.5448, "step": 35470 }, { "epoch": 1.9862806585283908, "grad_norm": 1.401938557624817, "learning_rate": 9.593263157894737e-05, "loss": 0.4129, "step": 35471 }, { "epoch": 1.9863366558405198, "grad_norm": 1.299832820892334, "learning_rate": 9.593236842105263e-05, "loss": 0.4094, "step": 35472 }, { "epoch": 1.9863926531526488, "grad_norm": 1.196219801902771, "learning_rate": 9.59321052631579e-05, "loss": 0.4212, "step": 35473 }, { "epoch": 1.9864486504647778, "grad_norm": 1.2520298957824707, "learning_rate": 9.593184210526315e-05, "loss": 0.5987, "step": 35474 }, { "epoch": 1.9865046477769068, "grad_norm": 1.2894660234451294, "learning_rate": 9.593157894736843e-05, "loss": 0.4341, "step": 35475 }, { "epoch": 1.9865606450890358, "grad_norm": 1.1469789743423462, "learning_rate": 9.593131578947369e-05, "loss": 0.4556, "step": 35476 }, { "epoch": 1.9866166424011649, "grad_norm": 1.070273995399475, "learning_rate": 9.593105263157896e-05, "loss": 0.404, "step": 35477 }, { "epoch": 1.9866726397132939, "grad_norm": 1.1394150257110596, "learning_rate": 9.593078947368422e-05, "loss": 0.5053, "step": 35478 }, { "epoch": 1.986728637025423, "grad_norm": 1.0825096368789673, "learning_rate": 9.593052631578948e-05, "loss": 0.3979, "step": 35479 }, { "epoch": 1.986784634337552, "grad_norm": 1.304796814918518, "learning_rate": 9.593026315789474e-05, "loss": 0.4139, "step": 35480 }, { "epoch": 1.986840631649681, "grad_norm": 1.3459256887435913, "learning_rate": 9.593000000000001e-05, "loss": 0.5829, "step": 35481 }, { "epoch": 1.98689662896181, "grad_norm": 1.2086055278778076, "learning_rate": 9.592973684210527e-05, "loss": 0.4489, "step": 35482 }, { "epoch": 1.986952626273939, "grad_norm": 1.0581413507461548, "learning_rate": 9.592947368421053e-05, "loss": 0.5134, "step": 35483 }, { "epoch": 1.987008623586068, "grad_norm": 1.3694082498550415, "learning_rate": 9.592921052631579e-05, "loss": 0.5376, "step": 35484 }, { "epoch": 1.987064620898197, "grad_norm": 1.1337991952896118, "learning_rate": 9.592894736842106e-05, "loss": 0.4801, "step": 35485 }, { "epoch": 1.987120618210326, "grad_norm": 1.1556999683380127, "learning_rate": 9.592868421052632e-05, "loss": 0.4499, "step": 35486 }, { "epoch": 1.987176615522455, "grad_norm": 1.1705234050750732, "learning_rate": 9.592842105263158e-05, "loss": 0.3402, "step": 35487 }, { "epoch": 1.987232612834584, "grad_norm": 1.2650971412658691, "learning_rate": 9.592815789473684e-05, "loss": 0.4602, "step": 35488 }, { "epoch": 1.987288610146713, "grad_norm": 1.4428532123565674, "learning_rate": 9.59278947368421e-05, "loss": 0.4304, "step": 35489 }, { "epoch": 1.9873446074588421, "grad_norm": 1.4514931440353394, "learning_rate": 9.592763157894738e-05, "loss": 0.4974, "step": 35490 }, { "epoch": 1.9874006047709711, "grad_norm": 1.5715280771255493, "learning_rate": 9.592736842105264e-05, "loss": 0.5744, "step": 35491 }, { "epoch": 1.9874566020831002, "grad_norm": 1.3635389804840088, "learning_rate": 9.59271052631579e-05, "loss": 0.3976, "step": 35492 }, { "epoch": 1.9875125993952292, "grad_norm": 1.2272080183029175, "learning_rate": 9.592684210526316e-05, "loss": 0.4523, "step": 35493 }, { "epoch": 1.9875685967073582, "grad_norm": 1.288355827331543, "learning_rate": 9.592657894736843e-05, "loss": 0.4742, "step": 35494 }, { "epoch": 1.9876245940194872, "grad_norm": 1.020338773727417, "learning_rate": 9.592631578947369e-05, "loss": 0.4291, "step": 35495 }, { "epoch": 1.9876805913316162, "grad_norm": 1.267419457435608, "learning_rate": 9.592605263157896e-05, "loss": 0.4002, "step": 35496 }, { "epoch": 1.9877365886437453, "grad_norm": 1.3968697786331177, "learning_rate": 9.592578947368421e-05, "loss": 0.5103, "step": 35497 }, { "epoch": 1.9877925859558743, "grad_norm": 1.4301570653915405, "learning_rate": 9.592552631578948e-05, "loss": 0.4989, "step": 35498 }, { "epoch": 1.9878485832680033, "grad_norm": 1.1554651260375977, "learning_rate": 9.592526315789474e-05, "loss": 0.5512, "step": 35499 }, { "epoch": 1.9879045805801323, "grad_norm": 1.2478835582733154, "learning_rate": 9.592500000000001e-05, "loss": 0.4239, "step": 35500 }, { "epoch": 1.9879605778922613, "grad_norm": 1.1728347539901733, "learning_rate": 9.592473684210527e-05, "loss": 0.4987, "step": 35501 }, { "epoch": 1.9880165752043903, "grad_norm": 1.4364714622497559, "learning_rate": 9.592447368421053e-05, "loss": 0.3506, "step": 35502 }, { "epoch": 1.9880725725165194, "grad_norm": 1.3623409271240234, "learning_rate": 9.59242105263158e-05, "loss": 0.6151, "step": 35503 }, { "epoch": 1.9881285698286484, "grad_norm": 1.1457279920578003, "learning_rate": 9.592394736842105e-05, "loss": 0.4293, "step": 35504 }, { "epoch": 1.9881845671407774, "grad_norm": 1.4999864101409912, "learning_rate": 9.592368421052633e-05, "loss": 0.5702, "step": 35505 }, { "epoch": 1.9882405644529064, "grad_norm": 1.3519184589385986, "learning_rate": 9.592342105263157e-05, "loss": 0.443, "step": 35506 }, { "epoch": 1.9882965617650354, "grad_norm": 2.0611536502838135, "learning_rate": 9.592315789473685e-05, "loss": 0.5067, "step": 35507 }, { "epoch": 1.9883525590771645, "grad_norm": 1.4516867399215698, "learning_rate": 9.59228947368421e-05, "loss": 0.5665, "step": 35508 }, { "epoch": 1.9884085563892935, "grad_norm": 1.0792677402496338, "learning_rate": 9.592263157894738e-05, "loss": 0.3083, "step": 35509 }, { "epoch": 1.9884645537014223, "grad_norm": 1.0905338525772095, "learning_rate": 9.592236842105264e-05, "loss": 0.4128, "step": 35510 }, { "epoch": 1.9885205510135513, "grad_norm": 1.0899282693862915, "learning_rate": 9.59221052631579e-05, "loss": 0.4503, "step": 35511 }, { "epoch": 1.9885765483256803, "grad_norm": 1.273254156112671, "learning_rate": 9.592184210526316e-05, "loss": 0.4026, "step": 35512 }, { "epoch": 1.9886325456378093, "grad_norm": 1.2546851634979248, "learning_rate": 9.592157894736843e-05, "loss": 0.4344, "step": 35513 }, { "epoch": 1.9886885429499384, "grad_norm": 1.2890563011169434, "learning_rate": 9.592131578947369e-05, "loss": 0.5002, "step": 35514 }, { "epoch": 1.9887445402620674, "grad_norm": 1.2466799020767212, "learning_rate": 9.592105263157895e-05, "loss": 0.4146, "step": 35515 }, { "epoch": 1.9888005375741964, "grad_norm": 1.6226491928100586, "learning_rate": 9.592078947368421e-05, "loss": 0.4102, "step": 35516 }, { "epoch": 1.9888565348863254, "grad_norm": 1.557784914970398, "learning_rate": 9.592052631578948e-05, "loss": 0.4721, "step": 35517 }, { "epoch": 1.9889125321984544, "grad_norm": 1.1472299098968506, "learning_rate": 9.592026315789474e-05, "loss": 0.4538, "step": 35518 }, { "epoch": 1.9889685295105834, "grad_norm": 1.212196707725525, "learning_rate": 9.592e-05, "loss": 0.4624, "step": 35519 }, { "epoch": 1.9890245268227125, "grad_norm": 1.2951369285583496, "learning_rate": 9.591973684210526e-05, "loss": 0.361, "step": 35520 }, { "epoch": 1.9890805241348415, "grad_norm": 1.0945374965667725, "learning_rate": 9.591947368421052e-05, "loss": 0.4849, "step": 35521 }, { "epoch": 1.9891365214469705, "grad_norm": 1.3309515714645386, "learning_rate": 9.59192105263158e-05, "loss": 0.6063, "step": 35522 }, { "epoch": 1.9891925187590995, "grad_norm": 1.2102982997894287, "learning_rate": 9.591894736842106e-05, "loss": 0.4288, "step": 35523 }, { "epoch": 1.9892485160712285, "grad_norm": 1.8596972227096558, "learning_rate": 9.591868421052632e-05, "loss": 0.5483, "step": 35524 }, { "epoch": 1.9893045133833576, "grad_norm": 1.093278408050537, "learning_rate": 9.591842105263158e-05, "loss": 0.3525, "step": 35525 }, { "epoch": 1.9893605106954866, "grad_norm": 1.171967625617981, "learning_rate": 9.591815789473685e-05, "loss": 0.4379, "step": 35526 }, { "epoch": 1.9894165080076156, "grad_norm": 1.3306617736816406, "learning_rate": 9.591789473684211e-05, "loss": 0.5521, "step": 35527 }, { "epoch": 1.9894725053197446, "grad_norm": 1.178497314453125, "learning_rate": 9.591763157894738e-05, "loss": 0.3866, "step": 35528 }, { "epoch": 1.9895285026318736, "grad_norm": 1.358841061592102, "learning_rate": 9.591736842105263e-05, "loss": 0.3975, "step": 35529 }, { "epoch": 1.9895844999440027, "grad_norm": 1.3383830785751343, "learning_rate": 9.59171052631579e-05, "loss": 0.4068, "step": 35530 }, { "epoch": 1.9896404972561317, "grad_norm": 1.126112461090088, "learning_rate": 9.591684210526316e-05, "loss": 0.4247, "step": 35531 }, { "epoch": 1.9896964945682607, "grad_norm": 1.2307801246643066, "learning_rate": 9.591657894736843e-05, "loss": 0.4971, "step": 35532 }, { "epoch": 1.9897524918803897, "grad_norm": 1.073792815208435, "learning_rate": 9.59163157894737e-05, "loss": 0.4611, "step": 35533 }, { "epoch": 1.9898084891925187, "grad_norm": 1.284218192100525, "learning_rate": 9.591605263157895e-05, "loss": 0.3711, "step": 35534 }, { "epoch": 1.9898644865046478, "grad_norm": 2.1291611194610596, "learning_rate": 9.591578947368421e-05, "loss": 0.5127, "step": 35535 }, { "epoch": 1.9899204838167768, "grad_norm": 0.9030032753944397, "learning_rate": 9.591552631578949e-05, "loss": 0.208, "step": 35536 }, { "epoch": 1.9899764811289058, "grad_norm": 1.2215075492858887, "learning_rate": 9.591526315789475e-05, "loss": 0.35, "step": 35537 }, { "epoch": 1.9900324784410348, "grad_norm": 1.2426862716674805, "learning_rate": 9.5915e-05, "loss": 0.3986, "step": 35538 }, { "epoch": 1.9900884757531638, "grad_norm": 1.435858130455017, "learning_rate": 9.591473684210527e-05, "loss": 0.4092, "step": 35539 }, { "epoch": 1.9901444730652929, "grad_norm": 1.5430365800857544, "learning_rate": 9.591447368421053e-05, "loss": 0.5414, "step": 35540 }, { "epoch": 1.9902004703774219, "grad_norm": 1.3427220582962036, "learning_rate": 9.59142105263158e-05, "loss": 0.444, "step": 35541 }, { "epoch": 1.990256467689551, "grad_norm": 1.5588865280151367, "learning_rate": 9.591394736842106e-05, "loss": 0.6043, "step": 35542 }, { "epoch": 1.99031246500168, "grad_norm": 1.4000496864318848, "learning_rate": 9.591368421052632e-05, "loss": 0.4929, "step": 35543 }, { "epoch": 1.990368462313809, "grad_norm": 1.0791112184524536, "learning_rate": 9.591342105263158e-05, "loss": 0.3904, "step": 35544 }, { "epoch": 1.990424459625938, "grad_norm": 1.1854305267333984, "learning_rate": 9.591315789473685e-05, "loss": 0.3382, "step": 35545 }, { "epoch": 1.990480456938067, "grad_norm": 1.3216811418533325, "learning_rate": 9.591289473684211e-05, "loss": 0.5435, "step": 35546 }, { "epoch": 1.990536454250196, "grad_norm": 1.1134005784988403, "learning_rate": 9.591263157894737e-05, "loss": 0.3386, "step": 35547 }, { "epoch": 1.990592451562325, "grad_norm": 1.3041775226593018, "learning_rate": 9.591236842105263e-05, "loss": 0.5231, "step": 35548 }, { "epoch": 1.990648448874454, "grad_norm": 1.1259886026382446, "learning_rate": 9.59121052631579e-05, "loss": 0.4198, "step": 35549 }, { "epoch": 1.990704446186583, "grad_norm": 1.188585638999939, "learning_rate": 9.591184210526316e-05, "loss": 0.4411, "step": 35550 }, { "epoch": 1.990760443498712, "grad_norm": 1.2205523252487183, "learning_rate": 9.591157894736844e-05, "loss": 0.3702, "step": 35551 }, { "epoch": 1.990816440810841, "grad_norm": 1.3695698976516724, "learning_rate": 9.591131578947368e-05, "loss": 0.4383, "step": 35552 }, { "epoch": 1.99087243812297, "grad_norm": 1.3751276731491089, "learning_rate": 9.591105263157896e-05, "loss": 0.4639, "step": 35553 }, { "epoch": 1.9909284354350991, "grad_norm": 1.538450837135315, "learning_rate": 9.591078947368422e-05, "loss": 0.4652, "step": 35554 }, { "epoch": 1.9909844327472281, "grad_norm": 1.5518817901611328, "learning_rate": 9.591052631578948e-05, "loss": 0.5109, "step": 35555 }, { "epoch": 1.9910404300593572, "grad_norm": 1.3193330764770508, "learning_rate": 9.591026315789475e-05, "loss": 0.4631, "step": 35556 }, { "epoch": 1.9910964273714862, "grad_norm": 1.244462490081787, "learning_rate": 9.591e-05, "loss": 0.422, "step": 35557 }, { "epoch": 1.9911524246836152, "grad_norm": 1.370192050933838, "learning_rate": 9.590973684210527e-05, "loss": 0.4698, "step": 35558 }, { "epoch": 1.9912084219957442, "grad_norm": 1.4153363704681396, "learning_rate": 9.590947368421053e-05, "loss": 0.5244, "step": 35559 }, { "epoch": 1.9912644193078732, "grad_norm": 1.6399774551391602, "learning_rate": 9.59092105263158e-05, "loss": 0.4686, "step": 35560 }, { "epoch": 1.9913204166200023, "grad_norm": 1.1923521757125854, "learning_rate": 9.590894736842105e-05, "loss": 0.3494, "step": 35561 }, { "epoch": 1.9913764139321313, "grad_norm": 1.447489857673645, "learning_rate": 9.590868421052632e-05, "loss": 0.4389, "step": 35562 }, { "epoch": 1.9914324112442603, "grad_norm": 1.191446304321289, "learning_rate": 9.590842105263158e-05, "loss": 0.4822, "step": 35563 }, { "epoch": 1.9914884085563893, "grad_norm": 2.585723876953125, "learning_rate": 9.590815789473685e-05, "loss": 0.5502, "step": 35564 }, { "epoch": 1.9915444058685183, "grad_norm": 1.7500213384628296, "learning_rate": 9.590789473684211e-05, "loss": 0.3284, "step": 35565 }, { "epoch": 1.9916004031806473, "grad_norm": 1.3805952072143555, "learning_rate": 9.590763157894737e-05, "loss": 0.5546, "step": 35566 }, { "epoch": 1.9916564004927764, "grad_norm": 1.2405415773391724, "learning_rate": 9.590736842105263e-05, "loss": 0.4891, "step": 35567 }, { "epoch": 1.9917123978049054, "grad_norm": 1.6159594058990479, "learning_rate": 9.59071052631579e-05, "loss": 0.5214, "step": 35568 }, { "epoch": 1.9917683951170344, "grad_norm": 1.3045800924301147, "learning_rate": 9.590684210526317e-05, "loss": 0.5356, "step": 35569 }, { "epoch": 1.9918243924291634, "grad_norm": 1.3108443021774292, "learning_rate": 9.590657894736843e-05, "loss": 0.4313, "step": 35570 }, { "epoch": 1.9918803897412924, "grad_norm": 1.1155539751052856, "learning_rate": 9.590631578947369e-05, "loss": 0.3851, "step": 35571 }, { "epoch": 1.9919363870534215, "grad_norm": 1.0812451839447021, "learning_rate": 9.590605263157894e-05, "loss": 0.4684, "step": 35572 }, { "epoch": 1.9919923843655505, "grad_norm": 1.2073543071746826, "learning_rate": 9.590578947368422e-05, "loss": 0.541, "step": 35573 }, { "epoch": 1.9920483816776795, "grad_norm": 1.2165262699127197, "learning_rate": 9.590552631578948e-05, "loss": 0.3822, "step": 35574 }, { "epoch": 1.9921043789898085, "grad_norm": 1.5016649961471558, "learning_rate": 9.590526315789474e-05, "loss": 0.5133, "step": 35575 }, { "epoch": 1.9921603763019375, "grad_norm": 1.2360635995864868, "learning_rate": 9.5905e-05, "loss": 0.4435, "step": 35576 }, { "epoch": 1.9922163736140666, "grad_norm": 1.1395368576049805, "learning_rate": 9.590473684210527e-05, "loss": 0.4172, "step": 35577 }, { "epoch": 1.9922723709261956, "grad_norm": 1.3615334033966064, "learning_rate": 9.590447368421053e-05, "loss": 0.4527, "step": 35578 }, { "epoch": 1.9923283682383246, "grad_norm": 1.1413980722427368, "learning_rate": 9.590421052631579e-05, "loss": 0.4149, "step": 35579 }, { "epoch": 1.9923843655504536, "grad_norm": 1.5299428701400757, "learning_rate": 9.590394736842105e-05, "loss": 0.4049, "step": 35580 }, { "epoch": 1.9924403628625826, "grad_norm": 1.1139276027679443, "learning_rate": 9.590368421052632e-05, "loss": 0.5362, "step": 35581 }, { "epoch": 1.9924963601747117, "grad_norm": 1.3127061128616333, "learning_rate": 9.590342105263158e-05, "loss": 0.4276, "step": 35582 }, { "epoch": 1.9925523574868407, "grad_norm": 3.077648878097534, "learning_rate": 9.590315789473686e-05, "loss": 0.3951, "step": 35583 }, { "epoch": 1.9926083547989697, "grad_norm": 1.3226358890533447, "learning_rate": 9.59028947368421e-05, "loss": 0.3977, "step": 35584 }, { "epoch": 1.9926643521110987, "grad_norm": 1.651672601699829, "learning_rate": 9.590263157894738e-05, "loss": 0.4528, "step": 35585 }, { "epoch": 1.9927203494232277, "grad_norm": 1.2801048755645752, "learning_rate": 9.590236842105264e-05, "loss": 0.401, "step": 35586 }, { "epoch": 1.9927763467353568, "grad_norm": 1.3332183361053467, "learning_rate": 9.590210526315791e-05, "loss": 0.3641, "step": 35587 }, { "epoch": 1.9928323440474858, "grad_norm": 1.0959124565124512, "learning_rate": 9.590184210526317e-05, "loss": 0.3598, "step": 35588 }, { "epoch": 1.9928883413596148, "grad_norm": 1.2634996175765991, "learning_rate": 9.590157894736843e-05, "loss": 0.4075, "step": 35589 }, { "epoch": 1.9929443386717438, "grad_norm": 1.2024871110916138, "learning_rate": 9.590131578947369e-05, "loss": 0.4666, "step": 35590 }, { "epoch": 1.9930003359838728, "grad_norm": 1.1232930421829224, "learning_rate": 9.590105263157895e-05, "loss": 0.3872, "step": 35591 }, { "epoch": 1.9930563332960016, "grad_norm": 3.0282576084136963, "learning_rate": 9.590078947368422e-05, "loss": 0.3748, "step": 35592 }, { "epoch": 1.9931123306081306, "grad_norm": 1.2452056407928467, "learning_rate": 9.590052631578948e-05, "loss": 0.3512, "step": 35593 }, { "epoch": 1.9931683279202597, "grad_norm": 1.3730556964874268, "learning_rate": 9.590026315789474e-05, "loss": 0.603, "step": 35594 }, { "epoch": 1.9932243252323887, "grad_norm": 1.37942373752594, "learning_rate": 9.59e-05, "loss": 0.4558, "step": 35595 }, { "epoch": 1.9932803225445177, "grad_norm": 1.5861713886260986, "learning_rate": 9.589973684210527e-05, "loss": 0.5294, "step": 35596 }, { "epoch": 1.9933363198566467, "grad_norm": 1.404109001159668, "learning_rate": 9.589947368421053e-05, "loss": 0.447, "step": 35597 }, { "epoch": 1.9933923171687757, "grad_norm": 1.2659451961517334, "learning_rate": 9.589921052631579e-05, "loss": 0.5307, "step": 35598 }, { "epoch": 1.9934483144809048, "grad_norm": 1.0489815473556519, "learning_rate": 9.589894736842105e-05, "loss": 0.355, "step": 35599 }, { "epoch": 1.9935043117930338, "grad_norm": 1.2136744260787964, "learning_rate": 9.589868421052633e-05, "loss": 0.441, "step": 35600 }, { "epoch": 1.9935603091051628, "grad_norm": 1.2241686582565308, "learning_rate": 9.589842105263159e-05, "loss": 0.3985, "step": 35601 }, { "epoch": 1.9936163064172918, "grad_norm": 7.419262886047363, "learning_rate": 9.589815789473685e-05, "loss": 0.5303, "step": 35602 }, { "epoch": 1.9936723037294208, "grad_norm": 1.1942400932312012, "learning_rate": 9.58978947368421e-05, "loss": 0.2767, "step": 35603 }, { "epoch": 1.9937283010415499, "grad_norm": 1.4366904497146606, "learning_rate": 9.589763157894738e-05, "loss": 0.4674, "step": 35604 }, { "epoch": 1.9937842983536789, "grad_norm": 1.48849618434906, "learning_rate": 9.589736842105264e-05, "loss": 0.5505, "step": 35605 }, { "epoch": 1.993840295665808, "grad_norm": 1.5724461078643799, "learning_rate": 9.589710526315791e-05, "loss": 0.5464, "step": 35606 }, { "epoch": 1.993896292977937, "grad_norm": 1.2811603546142578, "learning_rate": 9.589684210526316e-05, "loss": 0.4419, "step": 35607 }, { "epoch": 1.993952290290066, "grad_norm": 1.5148351192474365, "learning_rate": 9.589657894736842e-05, "loss": 0.3906, "step": 35608 }, { "epoch": 1.994008287602195, "grad_norm": 1.3868930339813232, "learning_rate": 9.589631578947369e-05, "loss": 0.3752, "step": 35609 }, { "epoch": 1.994064284914324, "grad_norm": 1.1747761964797974, "learning_rate": 9.589605263157895e-05, "loss": 0.4691, "step": 35610 }, { "epoch": 1.994120282226453, "grad_norm": 1.198513150215149, "learning_rate": 9.589578947368421e-05, "loss": 0.4682, "step": 35611 }, { "epoch": 1.994176279538582, "grad_norm": 1.2227568626403809, "learning_rate": 9.589552631578947e-05, "loss": 0.4135, "step": 35612 }, { "epoch": 1.994232276850711, "grad_norm": 1.4125950336456299, "learning_rate": 9.589526315789474e-05, "loss": 0.5474, "step": 35613 }, { "epoch": 1.99428827416284, "grad_norm": 1.240922212600708, "learning_rate": 9.5895e-05, "loss": 0.5191, "step": 35614 }, { "epoch": 1.994344271474969, "grad_norm": 1.2878451347351074, "learning_rate": 9.589473684210528e-05, "loss": 0.5735, "step": 35615 }, { "epoch": 1.994400268787098, "grad_norm": 1.459401249885559, "learning_rate": 9.589447368421052e-05, "loss": 0.5433, "step": 35616 }, { "epoch": 1.994456266099227, "grad_norm": 1.2540624141693115, "learning_rate": 9.58942105263158e-05, "loss": 0.4477, "step": 35617 }, { "epoch": 1.9945122634113561, "grad_norm": 1.094946026802063, "learning_rate": 9.589394736842106e-05, "loss": 0.354, "step": 35618 }, { "epoch": 1.9945682607234851, "grad_norm": 1.1434732675552368, "learning_rate": 9.589368421052633e-05, "loss": 0.4327, "step": 35619 }, { "epoch": 1.9946242580356142, "grad_norm": 1.153523564338684, "learning_rate": 9.589342105263159e-05, "loss": 0.3438, "step": 35620 }, { "epoch": 1.9946802553477432, "grad_norm": 1.1065376996994019, "learning_rate": 9.589315789473685e-05, "loss": 0.4217, "step": 35621 }, { "epoch": 1.9947362526598722, "grad_norm": 1.3761993646621704, "learning_rate": 9.589289473684211e-05, "loss": 0.4477, "step": 35622 }, { "epoch": 1.9947922499720012, "grad_norm": 1.2070022821426392, "learning_rate": 9.589263157894738e-05, "loss": 0.4723, "step": 35623 }, { "epoch": 1.9948482472841302, "grad_norm": 1.2414604425430298, "learning_rate": 9.589236842105264e-05, "loss": 0.422, "step": 35624 }, { "epoch": 1.9949042445962593, "grad_norm": 1.2085981369018555, "learning_rate": 9.58921052631579e-05, "loss": 0.531, "step": 35625 }, { "epoch": 1.9949602419083883, "grad_norm": 1.2342300415039062, "learning_rate": 9.589184210526316e-05, "loss": 0.4963, "step": 35626 }, { "epoch": 1.9950162392205173, "grad_norm": 1.2143809795379639, "learning_rate": 9.589157894736842e-05, "loss": 0.512, "step": 35627 }, { "epoch": 1.9950722365326463, "grad_norm": 1.24850332736969, "learning_rate": 9.589131578947369e-05, "loss": 0.4538, "step": 35628 }, { "epoch": 1.9951282338447753, "grad_norm": 1.295248031616211, "learning_rate": 9.589105263157895e-05, "loss": 0.5132, "step": 35629 }, { "epoch": 1.9951842311569044, "grad_norm": 1.2572659254074097, "learning_rate": 9.589078947368421e-05, "loss": 0.393, "step": 35630 }, { "epoch": 1.9952402284690334, "grad_norm": 1.2328392267227173, "learning_rate": 9.589052631578947e-05, "loss": 0.4205, "step": 35631 }, { "epoch": 1.9952962257811624, "grad_norm": 15.718681335449219, "learning_rate": 9.589026315789475e-05, "loss": 0.4222, "step": 35632 }, { "epoch": 1.9953522230932914, "grad_norm": 0.9863313436508179, "learning_rate": 9.589e-05, "loss": 0.3564, "step": 35633 }, { "epoch": 1.9954082204054204, "grad_norm": 1.335634469985962, "learning_rate": 9.588973684210526e-05, "loss": 0.4485, "step": 35634 }, { "epoch": 1.9954642177175494, "grad_norm": 1.133293628692627, "learning_rate": 9.588947368421052e-05, "loss": 0.3722, "step": 35635 }, { "epoch": 1.9955202150296785, "grad_norm": 1.2661166191101074, "learning_rate": 9.58892105263158e-05, "loss": 0.367, "step": 35636 }, { "epoch": 1.9955762123418075, "grad_norm": 1.321421504020691, "learning_rate": 9.588894736842106e-05, "loss": 0.4103, "step": 35637 }, { "epoch": 1.9956322096539365, "grad_norm": 2.1491875648498535, "learning_rate": 9.588868421052633e-05, "loss": 0.4192, "step": 35638 }, { "epoch": 1.9956882069660655, "grad_norm": 1.3580129146575928, "learning_rate": 9.588842105263158e-05, "loss": 0.4743, "step": 35639 }, { "epoch": 1.9957442042781945, "grad_norm": 1.3840094804763794, "learning_rate": 9.588815789473685e-05, "loss": 0.6083, "step": 35640 }, { "epoch": 1.9958002015903236, "grad_norm": 1.2663124799728394, "learning_rate": 9.588789473684211e-05, "loss": 0.3718, "step": 35641 }, { "epoch": 1.9958561989024526, "grad_norm": 1.0742474794387817, "learning_rate": 9.588763157894737e-05, "loss": 0.411, "step": 35642 }, { "epoch": 1.9959121962145816, "grad_norm": 1.621517300605774, "learning_rate": 9.588736842105264e-05, "loss": 0.402, "step": 35643 }, { "epoch": 1.9959681935267106, "grad_norm": 1.3091566562652588, "learning_rate": 9.588710526315789e-05, "loss": 0.4042, "step": 35644 }, { "epoch": 1.9960241908388396, "grad_norm": 1.3030307292938232, "learning_rate": 9.588684210526316e-05, "loss": 0.4972, "step": 35645 }, { "epoch": 1.9960801881509687, "grad_norm": 1.2336479425430298, "learning_rate": 9.588657894736842e-05, "loss": 0.3389, "step": 35646 }, { "epoch": 1.9961361854630977, "grad_norm": 1.6403483152389526, "learning_rate": 9.58863157894737e-05, "loss": 0.5168, "step": 35647 }, { "epoch": 1.9961921827752267, "grad_norm": 1.3678525686264038, "learning_rate": 9.588605263157896e-05, "loss": 0.3554, "step": 35648 }, { "epoch": 1.9962481800873557, "grad_norm": 1.223106026649475, "learning_rate": 9.588578947368422e-05, "loss": 0.5403, "step": 35649 }, { "epoch": 1.9963041773994847, "grad_norm": 1.5367188453674316, "learning_rate": 9.588552631578947e-05, "loss": 0.5844, "step": 35650 }, { "epoch": 1.9963601747116138, "grad_norm": 1.2097409963607788, "learning_rate": 9.588526315789475e-05, "loss": 0.5129, "step": 35651 }, { "epoch": 1.9964161720237428, "grad_norm": 1.184790849685669, "learning_rate": 9.588500000000001e-05, "loss": 0.55, "step": 35652 }, { "epoch": 1.9964721693358718, "grad_norm": 1.5261874198913574, "learning_rate": 9.588473684210527e-05, "loss": 0.4259, "step": 35653 }, { "epoch": 1.9965281666480008, "grad_norm": 1.1492356061935425, "learning_rate": 9.588447368421053e-05, "loss": 0.5255, "step": 35654 }, { "epoch": 1.9965841639601298, "grad_norm": 1.4155291318893433, "learning_rate": 9.58842105263158e-05, "loss": 0.6317, "step": 35655 }, { "epoch": 1.9966401612722589, "grad_norm": 1.1190556287765503, "learning_rate": 9.588394736842106e-05, "loss": 0.4293, "step": 35656 }, { "epoch": 1.9966961585843879, "grad_norm": 1.1112204790115356, "learning_rate": 9.588368421052632e-05, "loss": 0.4493, "step": 35657 }, { "epoch": 1.996752155896517, "grad_norm": 1.1368733644485474, "learning_rate": 9.588342105263158e-05, "loss": 0.378, "step": 35658 }, { "epoch": 1.996808153208646, "grad_norm": 1.2593837976455688, "learning_rate": 9.588315789473684e-05, "loss": 0.3516, "step": 35659 }, { "epoch": 1.996864150520775, "grad_norm": 1.0613576173782349, "learning_rate": 9.588289473684211e-05, "loss": 0.3938, "step": 35660 }, { "epoch": 1.996920147832904, "grad_norm": 1.4086543321609497, "learning_rate": 9.588263157894737e-05, "loss": 0.596, "step": 35661 }, { "epoch": 1.996976145145033, "grad_norm": 1.358246088027954, "learning_rate": 9.588236842105263e-05, "loss": 0.4761, "step": 35662 }, { "epoch": 1.997032142457162, "grad_norm": 1.2722880840301514, "learning_rate": 9.588210526315789e-05, "loss": 0.3117, "step": 35663 }, { "epoch": 1.997088139769291, "grad_norm": 1.9498416185379028, "learning_rate": 9.588184210526317e-05, "loss": 0.8083, "step": 35664 }, { "epoch": 1.99714413708142, "grad_norm": 1.12445068359375, "learning_rate": 9.588157894736842e-05, "loss": 0.3728, "step": 35665 }, { "epoch": 1.997200134393549, "grad_norm": 1.3108285665512085, "learning_rate": 9.588131578947368e-05, "loss": 0.4414, "step": 35666 }, { "epoch": 1.997256131705678, "grad_norm": 1.588301658630371, "learning_rate": 9.588105263157894e-05, "loss": 0.5313, "step": 35667 }, { "epoch": 1.997312129017807, "grad_norm": 1.1827144622802734, "learning_rate": 9.588078947368422e-05, "loss": 0.3846, "step": 35668 }, { "epoch": 1.997368126329936, "grad_norm": 1.1902782917022705, "learning_rate": 9.588052631578948e-05, "loss": 0.3731, "step": 35669 }, { "epoch": 1.9974241236420651, "grad_norm": 1.0918718576431274, "learning_rate": 9.588026315789475e-05, "loss": 0.4079, "step": 35670 }, { "epoch": 1.9974801209541941, "grad_norm": 1.2798343896865845, "learning_rate": 9.588e-05, "loss": 0.4568, "step": 35671 }, { "epoch": 1.9975361182663232, "grad_norm": 1.6583809852600098, "learning_rate": 9.587973684210527e-05, "loss": 0.5875, "step": 35672 }, { "epoch": 1.9975921155784522, "grad_norm": 1.1150529384613037, "learning_rate": 9.587947368421053e-05, "loss": 0.4319, "step": 35673 }, { "epoch": 1.9976481128905812, "grad_norm": 1.0876377820968628, "learning_rate": 9.58792105263158e-05, "loss": 0.4841, "step": 35674 }, { "epoch": 1.9977041102027102, "grad_norm": 1.3036870956420898, "learning_rate": 9.587894736842106e-05, "loss": 0.4038, "step": 35675 }, { "epoch": 1.9977601075148392, "grad_norm": 1.0480951070785522, "learning_rate": 9.587868421052631e-05, "loss": 0.3024, "step": 35676 }, { "epoch": 1.9978161048269683, "grad_norm": 1.4002890586853027, "learning_rate": 9.587842105263158e-05, "loss": 0.4513, "step": 35677 }, { "epoch": 1.9978721021390973, "grad_norm": 1.1184619665145874, "learning_rate": 9.587815789473684e-05, "loss": 0.5511, "step": 35678 }, { "epoch": 1.9979280994512263, "grad_norm": 1.3167643547058105, "learning_rate": 9.587789473684212e-05, "loss": 0.3597, "step": 35679 }, { "epoch": 1.9979840967633553, "grad_norm": 1.3305026292800903, "learning_rate": 9.587763157894738e-05, "loss": 0.4304, "step": 35680 }, { "epoch": 1.9980400940754843, "grad_norm": 1.1540409326553345, "learning_rate": 9.587736842105263e-05, "loss": 0.4654, "step": 35681 }, { "epoch": 1.9980960913876133, "grad_norm": 1.2186146974563599, "learning_rate": 9.58771052631579e-05, "loss": 0.4228, "step": 35682 }, { "epoch": 1.9981520886997424, "grad_norm": 1.2066798210144043, "learning_rate": 9.587684210526317e-05, "loss": 0.3922, "step": 35683 }, { "epoch": 1.9982080860118714, "grad_norm": 1.2881914377212524, "learning_rate": 9.587657894736843e-05, "loss": 0.6669, "step": 35684 }, { "epoch": 1.9982640833240004, "grad_norm": 1.508577585220337, "learning_rate": 9.587631578947369e-05, "loss": 0.6911, "step": 35685 }, { "epoch": 1.9983200806361294, "grad_norm": 1.2731300592422485, "learning_rate": 9.587605263157895e-05, "loss": 0.4137, "step": 35686 }, { "epoch": 1.9983760779482584, "grad_norm": 1.101730465888977, "learning_rate": 9.587578947368422e-05, "loss": 0.3451, "step": 35687 }, { "epoch": 1.9984320752603875, "grad_norm": 1.026218295097351, "learning_rate": 9.587552631578948e-05, "loss": 0.3838, "step": 35688 }, { "epoch": 1.9984880725725165, "grad_norm": 1.415786623954773, "learning_rate": 9.587526315789474e-05, "loss": 0.4131, "step": 35689 }, { "epoch": 1.9985440698846455, "grad_norm": 1.2542513608932495, "learning_rate": 9.5875e-05, "loss": 0.4029, "step": 35690 }, { "epoch": 1.9986000671967745, "grad_norm": 1.2519434690475464, "learning_rate": 9.587473684210527e-05, "loss": 0.4694, "step": 35691 }, { "epoch": 1.9986560645089035, "grad_norm": 1.3884254693984985, "learning_rate": 9.587447368421053e-05, "loss": 0.5692, "step": 35692 }, { "epoch": 1.9987120618210326, "grad_norm": 1.1429213285446167, "learning_rate": 9.587421052631579e-05, "loss": 0.3987, "step": 35693 }, { "epoch": 1.9987680591331616, "grad_norm": 1.4305481910705566, "learning_rate": 9.587394736842105e-05, "loss": 0.4446, "step": 35694 }, { "epoch": 1.9988240564452906, "grad_norm": 1.106413722038269, "learning_rate": 9.587368421052631e-05, "loss": 0.3701, "step": 35695 }, { "epoch": 1.9988800537574196, "grad_norm": 1.6494601964950562, "learning_rate": 9.587342105263158e-05, "loss": 0.6221, "step": 35696 }, { "epoch": 1.9989360510695486, "grad_norm": 1.0754121541976929, "learning_rate": 9.587315789473684e-05, "loss": 0.4384, "step": 35697 }, { "epoch": 1.9989920483816777, "grad_norm": 1.6394118070602417, "learning_rate": 9.587289473684212e-05, "loss": 0.4564, "step": 35698 }, { "epoch": 1.9990480456938067, "grad_norm": 1.4092940092086792, "learning_rate": 9.587263157894736e-05, "loss": 0.4772, "step": 35699 }, { "epoch": 1.9991040430059357, "grad_norm": 1.2801393270492554, "learning_rate": 9.587236842105264e-05, "loss": 0.4322, "step": 35700 }, { "epoch": 1.9991600403180647, "grad_norm": 1.380327582359314, "learning_rate": 9.58721052631579e-05, "loss": 0.4788, "step": 35701 }, { "epoch": 1.9992160376301937, "grad_norm": 1.2255809307098389, "learning_rate": 9.587184210526317e-05, "loss": 0.5594, "step": 35702 }, { "epoch": 1.9992720349423228, "grad_norm": 1.1452220678329468, "learning_rate": 9.587157894736843e-05, "loss": 0.395, "step": 35703 }, { "epoch": 1.9993280322544518, "grad_norm": 1.340274691581726, "learning_rate": 9.587131578947369e-05, "loss": 0.4126, "step": 35704 }, { "epoch": 1.9993840295665808, "grad_norm": 1.1576058864593506, "learning_rate": 9.587105263157895e-05, "loss": 0.4399, "step": 35705 }, { "epoch": 1.9994400268787098, "grad_norm": 1.8450422286987305, "learning_rate": 9.587078947368422e-05, "loss": 0.6133, "step": 35706 }, { "epoch": 1.9994960241908388, "grad_norm": 1.3574519157409668, "learning_rate": 9.587052631578948e-05, "loss": 0.491, "step": 35707 }, { "epoch": 1.9995520215029678, "grad_norm": 1.2326457500457764, "learning_rate": 9.587026315789474e-05, "loss": 0.4273, "step": 35708 }, { "epoch": 1.9996080188150969, "grad_norm": 1.2883415222167969, "learning_rate": 9.587e-05, "loss": 0.5397, "step": 35709 }, { "epoch": 1.9996640161272259, "grad_norm": 1.2907953262329102, "learning_rate": 9.586973684210528e-05, "loss": 0.4966, "step": 35710 }, { "epoch": 1.999720013439355, "grad_norm": 1.2141108512878418, "learning_rate": 9.586947368421053e-05, "loss": 0.4072, "step": 35711 }, { "epoch": 1.999776010751484, "grad_norm": 1.2814353704452515, "learning_rate": 9.58692105263158e-05, "loss": 0.2963, "step": 35712 }, { "epoch": 1.999832008063613, "grad_norm": 1.3092714548110962, "learning_rate": 9.586894736842105e-05, "loss": 0.5815, "step": 35713 }, { "epoch": 1.999888005375742, "grad_norm": 1.191420316696167, "learning_rate": 9.586868421052631e-05, "loss": 0.5152, "step": 35714 }, { "epoch": 1.999944002687871, "grad_norm": 3.37546968460083, "learning_rate": 9.586842105263159e-05, "loss": 0.4066, "step": 35715 }, { "epoch": 2.0, "grad_norm": 2.749148368835449, "learning_rate": 9.586815789473685e-05, "loss": 0.3993, "step": 35716 }, { "epoch": 2.000055997312129, "grad_norm": 1.182232141494751, "learning_rate": 9.586789473684211e-05, "loss": 0.3358, "step": 35717 }, { "epoch": 2.000111994624258, "grad_norm": 1.2029112577438354, "learning_rate": 9.586763157894737e-05, "loss": 0.4234, "step": 35718 }, { "epoch": 2.000167991936387, "grad_norm": 1.4242031574249268, "learning_rate": 9.586736842105264e-05, "loss": 0.4894, "step": 35719 }, { "epoch": 2.000223989248516, "grad_norm": 4.750683307647705, "learning_rate": 9.58671052631579e-05, "loss": 0.5317, "step": 35720 }, { "epoch": 2.000279986560645, "grad_norm": 1.2182492017745972, "learning_rate": 9.586684210526316e-05, "loss": 0.3997, "step": 35721 }, { "epoch": 2.000335983872774, "grad_norm": 1.1296474933624268, "learning_rate": 9.586657894736842e-05, "loss": 0.3267, "step": 35722 }, { "epoch": 2.000391981184903, "grad_norm": 1.637326717376709, "learning_rate": 9.586631578947369e-05, "loss": 0.363, "step": 35723 }, { "epoch": 2.000447978497032, "grad_norm": 1.2048490047454834, "learning_rate": 9.586605263157895e-05, "loss": 0.3873, "step": 35724 }, { "epoch": 2.000503975809161, "grad_norm": 1.240618109703064, "learning_rate": 9.586578947368423e-05, "loss": 0.3913, "step": 35725 }, { "epoch": 2.00055997312129, "grad_norm": 1.374437928199768, "learning_rate": 9.586552631578947e-05, "loss": 0.4084, "step": 35726 }, { "epoch": 2.000615970433419, "grad_norm": 1.311750054359436, "learning_rate": 9.586526315789474e-05, "loss": 0.3994, "step": 35727 }, { "epoch": 2.0006719677455482, "grad_norm": 1.074459195137024, "learning_rate": 9.5865e-05, "loss": 0.3234, "step": 35728 }, { "epoch": 2.0007279650576772, "grad_norm": 1.2244187593460083, "learning_rate": 9.586473684210526e-05, "loss": 0.4227, "step": 35729 }, { "epoch": 2.0007839623698063, "grad_norm": 1.313676357269287, "learning_rate": 9.586447368421054e-05, "loss": 0.3667, "step": 35730 }, { "epoch": 2.0008399596819353, "grad_norm": 1.1889480352401733, "learning_rate": 9.586421052631578e-05, "loss": 0.3467, "step": 35731 }, { "epoch": 2.0008959569940643, "grad_norm": 1.0301470756530762, "learning_rate": 9.586394736842106e-05, "loss": 0.3365, "step": 35732 }, { "epoch": 2.0009519543061933, "grad_norm": 2.0698623657226562, "learning_rate": 9.586368421052632e-05, "loss": 0.5078, "step": 35733 }, { "epoch": 2.0010079516183223, "grad_norm": 1.75558340549469, "learning_rate": 9.586342105263159e-05, "loss": 0.489, "step": 35734 }, { "epoch": 2.0010639489304514, "grad_norm": 1.4140416383743286, "learning_rate": 9.586315789473685e-05, "loss": 0.6721, "step": 35735 }, { "epoch": 2.0011199462425804, "grad_norm": 1.1331034898757935, "learning_rate": 9.586289473684211e-05, "loss": 0.471, "step": 35736 }, { "epoch": 2.0011759435547094, "grad_norm": 1.2293968200683594, "learning_rate": 9.586263157894737e-05, "loss": 0.4646, "step": 35737 }, { "epoch": 2.0012319408668384, "grad_norm": 1.138527750968933, "learning_rate": 9.586236842105264e-05, "loss": 0.3917, "step": 35738 }, { "epoch": 2.0012879381789674, "grad_norm": 1.1666724681854248, "learning_rate": 9.58621052631579e-05, "loss": 0.3377, "step": 35739 }, { "epoch": 2.0013439354910965, "grad_norm": 1.2568024396896362, "learning_rate": 9.586184210526316e-05, "loss": 0.4457, "step": 35740 }, { "epoch": 2.0013999328032255, "grad_norm": 1.2787812948226929, "learning_rate": 9.586157894736842e-05, "loss": 0.5511, "step": 35741 }, { "epoch": 2.0014559301153545, "grad_norm": 1.2110960483551025, "learning_rate": 9.58613157894737e-05, "loss": 0.423, "step": 35742 }, { "epoch": 2.0015119274274835, "grad_norm": 1.179447054862976, "learning_rate": 9.586105263157895e-05, "loss": 0.4133, "step": 35743 }, { "epoch": 2.0015679247396125, "grad_norm": 0.9979178309440613, "learning_rate": 9.586078947368421e-05, "loss": 0.397, "step": 35744 }, { "epoch": 2.0016239220517416, "grad_norm": 1.2861733436584473, "learning_rate": 9.586052631578947e-05, "loss": 0.3606, "step": 35745 }, { "epoch": 2.0016799193638706, "grad_norm": 1.187597393989563, "learning_rate": 9.586026315789473e-05, "loss": 0.4863, "step": 35746 }, { "epoch": 2.0017359166759996, "grad_norm": 1.4008166790008545, "learning_rate": 9.586000000000001e-05, "loss": 0.4478, "step": 35747 }, { "epoch": 2.0017919139881286, "grad_norm": 1.7283551692962646, "learning_rate": 9.585973684210527e-05, "loss": 0.475, "step": 35748 }, { "epoch": 2.0018479113002576, "grad_norm": 0.9884861707687378, "learning_rate": 9.585947368421053e-05, "loss": 0.3559, "step": 35749 }, { "epoch": 2.0019039086123867, "grad_norm": 1.0272537469863892, "learning_rate": 9.585921052631579e-05, "loss": 0.3473, "step": 35750 }, { "epoch": 2.0019599059245157, "grad_norm": 1.2969883680343628, "learning_rate": 9.585894736842106e-05, "loss": 0.4908, "step": 35751 }, { "epoch": 2.0020159032366447, "grad_norm": 1.3099888563156128, "learning_rate": 9.585868421052632e-05, "loss": 0.5012, "step": 35752 }, { "epoch": 2.0020719005487737, "grad_norm": 1.1673057079315186, "learning_rate": 9.585842105263159e-05, "loss": 0.3225, "step": 35753 }, { "epoch": 2.0021278978609027, "grad_norm": 1.207336187362671, "learning_rate": 9.585815789473684e-05, "loss": 0.388, "step": 35754 }, { "epoch": 2.0021838951730317, "grad_norm": 1.1550965309143066, "learning_rate": 9.585789473684211e-05, "loss": 0.3822, "step": 35755 }, { "epoch": 2.0022398924851608, "grad_norm": 1.2960766553878784, "learning_rate": 9.585763157894737e-05, "loss": 0.4392, "step": 35756 }, { "epoch": 2.00229588979729, "grad_norm": 1.2397187948226929, "learning_rate": 9.585736842105265e-05, "loss": 0.4618, "step": 35757 }, { "epoch": 2.002351887109419, "grad_norm": 1.3082901239395142, "learning_rate": 9.58571052631579e-05, "loss": 0.3647, "step": 35758 }, { "epoch": 2.002407884421548, "grad_norm": 1.0328633785247803, "learning_rate": 9.585684210526316e-05, "loss": 0.3228, "step": 35759 }, { "epoch": 2.002463881733677, "grad_norm": 1.1539751291275024, "learning_rate": 9.585657894736842e-05, "loss": 0.4277, "step": 35760 }, { "epoch": 2.002519879045806, "grad_norm": 1.3577837944030762, "learning_rate": 9.58563157894737e-05, "loss": 0.3331, "step": 35761 }, { "epoch": 2.002575876357935, "grad_norm": 1.2352749109268188, "learning_rate": 9.585605263157896e-05, "loss": 0.3721, "step": 35762 }, { "epoch": 2.002631873670064, "grad_norm": 1.2029948234558105, "learning_rate": 9.58557894736842e-05, "loss": 0.4044, "step": 35763 }, { "epoch": 2.002687870982193, "grad_norm": 1.164831280708313, "learning_rate": 9.585552631578948e-05, "loss": 0.4242, "step": 35764 }, { "epoch": 2.002743868294322, "grad_norm": 1.022634744644165, "learning_rate": 9.585526315789474e-05, "loss": 0.3323, "step": 35765 }, { "epoch": 2.002799865606451, "grad_norm": 1.1480488777160645, "learning_rate": 9.585500000000001e-05, "loss": 0.4344, "step": 35766 }, { "epoch": 2.00285586291858, "grad_norm": 1.2631078958511353, "learning_rate": 9.585473684210527e-05, "loss": 0.3399, "step": 35767 }, { "epoch": 2.002911860230709, "grad_norm": 1.3309322595596313, "learning_rate": 9.585447368421053e-05, "loss": 0.3777, "step": 35768 }, { "epoch": 2.002967857542838, "grad_norm": 1.3151357173919678, "learning_rate": 9.585421052631579e-05, "loss": 0.4689, "step": 35769 }, { "epoch": 2.003023854854967, "grad_norm": 1.2066999673843384, "learning_rate": 9.585394736842106e-05, "loss": 0.4101, "step": 35770 }, { "epoch": 2.003079852167096, "grad_norm": 1.628219485282898, "learning_rate": 9.585368421052632e-05, "loss": 0.4464, "step": 35771 }, { "epoch": 2.003135849479225, "grad_norm": 1.8368597030639648, "learning_rate": 9.585342105263158e-05, "loss": 0.376, "step": 35772 }, { "epoch": 2.003191846791354, "grad_norm": 1.2289546728134155, "learning_rate": 9.585315789473684e-05, "loss": 0.3091, "step": 35773 }, { "epoch": 2.003247844103483, "grad_norm": 1.1499255895614624, "learning_rate": 9.585289473684211e-05, "loss": 0.3235, "step": 35774 }, { "epoch": 2.003303841415612, "grad_norm": 1.0664421319961548, "learning_rate": 9.585263157894737e-05, "loss": 0.3417, "step": 35775 }, { "epoch": 2.003359838727741, "grad_norm": 1.402164340019226, "learning_rate": 9.585236842105263e-05, "loss": 0.3967, "step": 35776 }, { "epoch": 2.00341583603987, "grad_norm": 1.247530460357666, "learning_rate": 9.58521052631579e-05, "loss": 0.4341, "step": 35777 }, { "epoch": 2.003471833351999, "grad_norm": 1.1795834302902222, "learning_rate": 9.585184210526317e-05, "loss": 0.3836, "step": 35778 }, { "epoch": 2.003527830664128, "grad_norm": 1.086114764213562, "learning_rate": 9.585157894736843e-05, "loss": 0.41, "step": 35779 }, { "epoch": 2.0035838279762572, "grad_norm": 1.1867990493774414, "learning_rate": 9.585131578947369e-05, "loss": 0.3922, "step": 35780 }, { "epoch": 2.0036398252883862, "grad_norm": 1.5759308338165283, "learning_rate": 9.585105263157895e-05, "loss": 0.4954, "step": 35781 }, { "epoch": 2.0036958226005153, "grad_norm": 1.1702313423156738, "learning_rate": 9.58507894736842e-05, "loss": 0.4298, "step": 35782 }, { "epoch": 2.0037518199126443, "grad_norm": 1.2212941646575928, "learning_rate": 9.585052631578948e-05, "loss": 0.4765, "step": 35783 }, { "epoch": 2.0038078172247733, "grad_norm": 1.2862499952316284, "learning_rate": 9.585026315789474e-05, "loss": 0.3819, "step": 35784 }, { "epoch": 2.0038638145369023, "grad_norm": 1.1117898225784302, "learning_rate": 9.585000000000001e-05, "loss": 0.4198, "step": 35785 }, { "epoch": 2.0039198118490313, "grad_norm": 1.0650211572647095, "learning_rate": 9.584973684210526e-05, "loss": 0.2853, "step": 35786 }, { "epoch": 2.0039758091611604, "grad_norm": 1.1335151195526123, "learning_rate": 9.584947368421053e-05, "loss": 0.3703, "step": 35787 }, { "epoch": 2.0040318064732894, "grad_norm": 1.1390008926391602, "learning_rate": 9.584921052631579e-05, "loss": 0.4315, "step": 35788 }, { "epoch": 2.0040878037854184, "grad_norm": 1.2367141246795654, "learning_rate": 9.584894736842106e-05, "loss": 0.3785, "step": 35789 }, { "epoch": 2.0041438010975474, "grad_norm": 1.0569877624511719, "learning_rate": 9.584868421052632e-05, "loss": 0.3036, "step": 35790 }, { "epoch": 2.0041997984096764, "grad_norm": 1.3080319166183472, "learning_rate": 9.584842105263158e-05, "loss": 0.3466, "step": 35791 }, { "epoch": 2.0042557957218055, "grad_norm": 1.15290367603302, "learning_rate": 9.584815789473684e-05, "loss": 0.3132, "step": 35792 }, { "epoch": 2.0043117930339345, "grad_norm": 1.5112502574920654, "learning_rate": 9.584789473684212e-05, "loss": 0.2645, "step": 35793 }, { "epoch": 2.0043677903460635, "grad_norm": 1.4089183807373047, "learning_rate": 9.584763157894738e-05, "loss": 0.4406, "step": 35794 }, { "epoch": 2.0044237876581925, "grad_norm": 1.319968581199646, "learning_rate": 9.584736842105264e-05, "loss": 0.4964, "step": 35795 }, { "epoch": 2.0044797849703215, "grad_norm": 1.488022804260254, "learning_rate": 9.58471052631579e-05, "loss": 0.4173, "step": 35796 }, { "epoch": 2.0045357822824506, "grad_norm": 0.944221019744873, "learning_rate": 9.584684210526316e-05, "loss": 0.3124, "step": 35797 }, { "epoch": 2.0045917795945796, "grad_norm": 1.1327488422393799, "learning_rate": 9.584657894736843e-05, "loss": 0.4095, "step": 35798 }, { "epoch": 2.0046477769067086, "grad_norm": 1.058314323425293, "learning_rate": 9.584631578947369e-05, "loss": 0.3432, "step": 35799 }, { "epoch": 2.0047037742188376, "grad_norm": 1.3376262187957764, "learning_rate": 9.584605263157895e-05, "loss": 0.4135, "step": 35800 }, { "epoch": 2.0047597715309666, "grad_norm": 1.2323520183563232, "learning_rate": 9.584578947368421e-05, "loss": 0.3806, "step": 35801 }, { "epoch": 2.0048157688430956, "grad_norm": 1.4351063966751099, "learning_rate": 9.584552631578948e-05, "loss": 0.4225, "step": 35802 }, { "epoch": 2.0048717661552247, "grad_norm": 1.1267834901809692, "learning_rate": 9.584526315789474e-05, "loss": 0.4466, "step": 35803 }, { "epoch": 2.0049277634673537, "grad_norm": 1.6285756826400757, "learning_rate": 9.5845e-05, "loss": 0.3928, "step": 35804 }, { "epoch": 2.0049837607794827, "grad_norm": 1.2738834619522095, "learning_rate": 9.584473684210526e-05, "loss": 0.4749, "step": 35805 }, { "epoch": 2.0050397580916117, "grad_norm": 1.0531673431396484, "learning_rate": 9.584447368421053e-05, "loss": 0.2921, "step": 35806 }, { "epoch": 2.0050957554037407, "grad_norm": 1.0331834554672241, "learning_rate": 9.58442105263158e-05, "loss": 0.3885, "step": 35807 }, { "epoch": 2.0051517527158698, "grad_norm": 1.1604411602020264, "learning_rate": 9.584394736842107e-05, "loss": 0.4515, "step": 35808 }, { "epoch": 2.005207750027999, "grad_norm": 1.1311315298080444, "learning_rate": 9.584368421052631e-05, "loss": 0.4762, "step": 35809 }, { "epoch": 2.005263747340128, "grad_norm": 1.404622197151184, "learning_rate": 9.584342105263159e-05, "loss": 0.4433, "step": 35810 }, { "epoch": 2.005319744652257, "grad_norm": 1.1629540920257568, "learning_rate": 9.584315789473685e-05, "loss": 0.4171, "step": 35811 }, { "epoch": 2.005375741964386, "grad_norm": 1.1784030199050903, "learning_rate": 9.584289473684212e-05, "loss": 0.3564, "step": 35812 }, { "epoch": 2.005431739276515, "grad_norm": 1.2254221439361572, "learning_rate": 9.584263157894737e-05, "loss": 0.5103, "step": 35813 }, { "epoch": 2.005487736588644, "grad_norm": 1.1892682313919067, "learning_rate": 9.584236842105263e-05, "loss": 0.3648, "step": 35814 }, { "epoch": 2.005543733900773, "grad_norm": 1.1096426248550415, "learning_rate": 9.58421052631579e-05, "loss": 0.3227, "step": 35815 }, { "epoch": 2.005599731212902, "grad_norm": 1.273844599723816, "learning_rate": 9.584184210526316e-05, "loss": 0.4548, "step": 35816 }, { "epoch": 2.005655728525031, "grad_norm": 1.563796877861023, "learning_rate": 9.584157894736843e-05, "loss": 0.5533, "step": 35817 }, { "epoch": 2.00571172583716, "grad_norm": 1.0604662895202637, "learning_rate": 9.584131578947368e-05, "loss": 0.3694, "step": 35818 }, { "epoch": 2.005767723149289, "grad_norm": 1.0590765476226807, "learning_rate": 9.584105263157895e-05, "loss": 0.3599, "step": 35819 }, { "epoch": 2.005823720461418, "grad_norm": 1.3057653903961182, "learning_rate": 9.584078947368421e-05, "loss": 0.3673, "step": 35820 }, { "epoch": 2.005879717773547, "grad_norm": 1.3801923990249634, "learning_rate": 9.584052631578948e-05, "loss": 0.4259, "step": 35821 }, { "epoch": 2.005935715085676, "grad_norm": 1.0347604751586914, "learning_rate": 9.584026315789474e-05, "loss": 0.4183, "step": 35822 }, { "epoch": 2.005991712397805, "grad_norm": 1.06027352809906, "learning_rate": 9.584e-05, "loss": 0.4672, "step": 35823 }, { "epoch": 2.006047709709934, "grad_norm": 1.2466121912002563, "learning_rate": 9.583973684210526e-05, "loss": 0.3761, "step": 35824 }, { "epoch": 2.006103707022063, "grad_norm": 1.213773250579834, "learning_rate": 9.583947368421054e-05, "loss": 0.3395, "step": 35825 }, { "epoch": 2.006159704334192, "grad_norm": 1.1831530332565308, "learning_rate": 9.58392105263158e-05, "loss": 0.4747, "step": 35826 }, { "epoch": 2.006215701646321, "grad_norm": 1.307273507118225, "learning_rate": 9.583894736842106e-05, "loss": 0.4007, "step": 35827 }, { "epoch": 2.00627169895845, "grad_norm": 1.3315942287445068, "learning_rate": 9.583868421052632e-05, "loss": 0.4129, "step": 35828 }, { "epoch": 2.006327696270579, "grad_norm": 1.1211820840835571, "learning_rate": 9.583842105263159e-05, "loss": 0.4427, "step": 35829 }, { "epoch": 2.006383693582708, "grad_norm": 1.229960560798645, "learning_rate": 9.583815789473685e-05, "loss": 0.3709, "step": 35830 }, { "epoch": 2.006439690894837, "grad_norm": 1.319773554801941, "learning_rate": 9.583789473684211e-05, "loss": 0.3947, "step": 35831 }, { "epoch": 2.006495688206966, "grad_norm": 1.1925987005233765, "learning_rate": 9.583763157894737e-05, "loss": 0.4663, "step": 35832 }, { "epoch": 2.0065516855190952, "grad_norm": 1.1169219017028809, "learning_rate": 9.583736842105263e-05, "loss": 0.5099, "step": 35833 }, { "epoch": 2.0066076828312243, "grad_norm": 1.2888444662094116, "learning_rate": 9.58371052631579e-05, "loss": 0.4546, "step": 35834 }, { "epoch": 2.0066636801433533, "grad_norm": 0.917158305644989, "learning_rate": 9.583684210526316e-05, "loss": 0.4137, "step": 35835 }, { "epoch": 2.0067196774554823, "grad_norm": 1.3366646766662598, "learning_rate": 9.583657894736842e-05, "loss": 0.5159, "step": 35836 }, { "epoch": 2.0067756747676113, "grad_norm": 1.1978360414505005, "learning_rate": 9.583631578947368e-05, "loss": 0.3692, "step": 35837 }, { "epoch": 2.0068316720797403, "grad_norm": 1.040445327758789, "learning_rate": 9.583605263157895e-05, "loss": 0.3494, "step": 35838 }, { "epoch": 2.0068876693918694, "grad_norm": 1.335199236869812, "learning_rate": 9.583578947368421e-05, "loss": 0.4833, "step": 35839 }, { "epoch": 2.0069436667039984, "grad_norm": 1.7000359296798706, "learning_rate": 9.583552631578949e-05, "loss": 0.5331, "step": 35840 }, { "epoch": 2.0069996640161274, "grad_norm": 1.0351409912109375, "learning_rate": 9.583526315789473e-05, "loss": 0.3562, "step": 35841 }, { "epoch": 2.0070556613282564, "grad_norm": 1.117228627204895, "learning_rate": 9.5835e-05, "loss": 0.3918, "step": 35842 }, { "epoch": 2.0071116586403854, "grad_norm": 1.4779503345489502, "learning_rate": 9.583473684210527e-05, "loss": 0.3577, "step": 35843 }, { "epoch": 2.0071676559525145, "grad_norm": 1.2578717470169067, "learning_rate": 9.583447368421054e-05, "loss": 0.4472, "step": 35844 }, { "epoch": 2.0072236532646435, "grad_norm": 1.2031910419464111, "learning_rate": 9.58342105263158e-05, "loss": 0.5332, "step": 35845 }, { "epoch": 2.0072796505767725, "grad_norm": 2.2340481281280518, "learning_rate": 9.583394736842106e-05, "loss": 0.4782, "step": 35846 }, { "epoch": 2.0073356478889015, "grad_norm": 1.074960708618164, "learning_rate": 9.583368421052632e-05, "loss": 0.4155, "step": 35847 }, { "epoch": 2.0073916452010305, "grad_norm": 1.1378623247146606, "learning_rate": 9.583342105263159e-05, "loss": 0.4665, "step": 35848 }, { "epoch": 2.0074476425131595, "grad_norm": 1.3077210187911987, "learning_rate": 9.583315789473685e-05, "loss": 0.386, "step": 35849 }, { "epoch": 2.0075036398252886, "grad_norm": 1.2918747663497925, "learning_rate": 9.583289473684211e-05, "loss": 0.3627, "step": 35850 }, { "epoch": 2.0075596371374176, "grad_norm": 1.1277105808258057, "learning_rate": 9.583263157894737e-05, "loss": 0.3799, "step": 35851 }, { "epoch": 2.0076156344495466, "grad_norm": 1.3728315830230713, "learning_rate": 9.583236842105263e-05, "loss": 0.5168, "step": 35852 }, { "epoch": 2.0076716317616756, "grad_norm": 1.2692402601242065, "learning_rate": 9.58321052631579e-05, "loss": 0.3778, "step": 35853 }, { "epoch": 2.0077276290738046, "grad_norm": 1.251347303390503, "learning_rate": 9.583184210526316e-05, "loss": 0.4026, "step": 35854 }, { "epoch": 2.0077836263859337, "grad_norm": 1.8793201446533203, "learning_rate": 9.583157894736842e-05, "loss": 0.3749, "step": 35855 }, { "epoch": 2.0078396236980627, "grad_norm": 1.4593137502670288, "learning_rate": 9.583131578947368e-05, "loss": 0.4253, "step": 35856 }, { "epoch": 2.0078956210101917, "grad_norm": 1.2291958332061768, "learning_rate": 9.583105263157896e-05, "loss": 0.4263, "step": 35857 }, { "epoch": 2.0079516183223207, "grad_norm": 1.1300537586212158, "learning_rate": 9.583078947368422e-05, "loss": 0.442, "step": 35858 }, { "epoch": 2.0080076156344497, "grad_norm": 2.294628381729126, "learning_rate": 9.583052631578948e-05, "loss": 0.5886, "step": 35859 }, { "epoch": 2.0080636129465788, "grad_norm": 1.2356338500976562, "learning_rate": 9.583026315789474e-05, "loss": 0.3758, "step": 35860 }, { "epoch": 2.0081196102587078, "grad_norm": 1.3109859228134155, "learning_rate": 9.583000000000001e-05, "loss": 0.437, "step": 35861 }, { "epoch": 2.008175607570837, "grad_norm": 1.2050403356552124, "learning_rate": 9.582973684210527e-05, "loss": 0.4448, "step": 35862 }, { "epoch": 2.008231604882966, "grad_norm": 1.7827585935592651, "learning_rate": 9.582947368421054e-05, "loss": 0.4063, "step": 35863 }, { "epoch": 2.008287602195095, "grad_norm": 1.5390233993530273, "learning_rate": 9.582921052631579e-05, "loss": 0.5479, "step": 35864 }, { "epoch": 2.008343599507224, "grad_norm": 1.1264935731887817, "learning_rate": 9.582894736842106e-05, "loss": 0.3283, "step": 35865 }, { "epoch": 2.008399596819353, "grad_norm": 1.103168249130249, "learning_rate": 9.582868421052632e-05, "loss": 0.4165, "step": 35866 }, { "epoch": 2.008455594131482, "grad_norm": 1.0895768404006958, "learning_rate": 9.582842105263158e-05, "loss": 0.3414, "step": 35867 }, { "epoch": 2.008511591443611, "grad_norm": 1.1353875398635864, "learning_rate": 9.582815789473684e-05, "loss": 0.4349, "step": 35868 }, { "epoch": 2.00856758875574, "grad_norm": 1.701114535331726, "learning_rate": 9.58278947368421e-05, "loss": 0.4884, "step": 35869 }, { "epoch": 2.008623586067869, "grad_norm": 1.1399396657943726, "learning_rate": 9.582763157894737e-05, "loss": 0.3099, "step": 35870 }, { "epoch": 2.008679583379998, "grad_norm": 1.2674838304519653, "learning_rate": 9.582736842105263e-05, "loss": 0.3562, "step": 35871 }, { "epoch": 2.008735580692127, "grad_norm": 1.2159181833267212, "learning_rate": 9.582710526315791e-05, "loss": 0.3508, "step": 35872 }, { "epoch": 2.008791578004256, "grad_norm": 1.099257469177246, "learning_rate": 9.582684210526315e-05, "loss": 0.3758, "step": 35873 }, { "epoch": 2.008847575316385, "grad_norm": 1.1834862232208252, "learning_rate": 9.582657894736843e-05, "loss": 0.3256, "step": 35874 }, { "epoch": 2.008903572628514, "grad_norm": 1.1165449619293213, "learning_rate": 9.582631578947369e-05, "loss": 0.3158, "step": 35875 }, { "epoch": 2.008959569940643, "grad_norm": 1.0332053899765015, "learning_rate": 9.582605263157896e-05, "loss": 0.3302, "step": 35876 }, { "epoch": 2.009015567252772, "grad_norm": 1.2526644468307495, "learning_rate": 9.582578947368422e-05, "loss": 0.3243, "step": 35877 }, { "epoch": 2.009071564564901, "grad_norm": 1.3846008777618408, "learning_rate": 9.582552631578948e-05, "loss": 0.5322, "step": 35878 }, { "epoch": 2.00912756187703, "grad_norm": 1.2860394716262817, "learning_rate": 9.582526315789474e-05, "loss": 0.4885, "step": 35879 }, { "epoch": 2.009183559189159, "grad_norm": 1.0912970304489136, "learning_rate": 9.582500000000001e-05, "loss": 0.3656, "step": 35880 }, { "epoch": 2.0092395565012877, "grad_norm": 1.3248655796051025, "learning_rate": 9.582473684210527e-05, "loss": 0.5045, "step": 35881 }, { "epoch": 2.0092955538134167, "grad_norm": 8.629794120788574, "learning_rate": 9.582447368421053e-05, "loss": 0.5859, "step": 35882 }, { "epoch": 2.0093515511255458, "grad_norm": 1.2444722652435303, "learning_rate": 9.582421052631579e-05, "loss": 0.3668, "step": 35883 }, { "epoch": 2.0094075484376748, "grad_norm": 1.117751121520996, "learning_rate": 9.582394736842105e-05, "loss": 0.3771, "step": 35884 }, { "epoch": 2.009463545749804, "grad_norm": 1.234378457069397, "learning_rate": 9.582368421052632e-05, "loss": 0.316, "step": 35885 }, { "epoch": 2.009519543061933, "grad_norm": 1.1099599599838257, "learning_rate": 9.582342105263158e-05, "loss": 0.3597, "step": 35886 }, { "epoch": 2.009575540374062, "grad_norm": 1.1704127788543701, "learning_rate": 9.582315789473684e-05, "loss": 0.6218, "step": 35887 }, { "epoch": 2.009631537686191, "grad_norm": 1.6880762577056885, "learning_rate": 9.58228947368421e-05, "loss": 0.5584, "step": 35888 }, { "epoch": 2.00968753499832, "grad_norm": 1.1258209943771362, "learning_rate": 9.582263157894738e-05, "loss": 0.3726, "step": 35889 }, { "epoch": 2.009743532310449, "grad_norm": 1.1790224313735962, "learning_rate": 9.582236842105264e-05, "loss": 0.3754, "step": 35890 }, { "epoch": 2.009799529622578, "grad_norm": 1.0744763612747192, "learning_rate": 9.58221052631579e-05, "loss": 0.4505, "step": 35891 }, { "epoch": 2.009855526934707, "grad_norm": 1.2784233093261719, "learning_rate": 9.582184210526316e-05, "loss": 0.3827, "step": 35892 }, { "epoch": 2.009911524246836, "grad_norm": 1.1017351150512695, "learning_rate": 9.582157894736843e-05, "loss": 0.3326, "step": 35893 }, { "epoch": 2.009967521558965, "grad_norm": 1.0899889469146729, "learning_rate": 9.582131578947369e-05, "loss": 0.3688, "step": 35894 }, { "epoch": 2.010023518871094, "grad_norm": 1.0779321193695068, "learning_rate": 9.582105263157896e-05, "loss": 0.456, "step": 35895 }, { "epoch": 2.010079516183223, "grad_norm": 1.6367486715316772, "learning_rate": 9.582078947368421e-05, "loss": 0.41, "step": 35896 }, { "epoch": 2.010135513495352, "grad_norm": 1.3015451431274414, "learning_rate": 9.582052631578948e-05, "loss": 0.5063, "step": 35897 }, { "epoch": 2.010191510807481, "grad_norm": 1.2744941711425781, "learning_rate": 9.582026315789474e-05, "loss": 0.3712, "step": 35898 }, { "epoch": 2.01024750811961, "grad_norm": 1.3015434741973877, "learning_rate": 9.582000000000001e-05, "loss": 0.4476, "step": 35899 }, { "epoch": 2.010303505431739, "grad_norm": 1.184593915939331, "learning_rate": 9.581973684210527e-05, "loss": 0.3752, "step": 35900 }, { "epoch": 2.010359502743868, "grad_norm": 1.2621227502822876, "learning_rate": 9.581947368421052e-05, "loss": 0.5791, "step": 35901 }, { "epoch": 2.010415500055997, "grad_norm": 1.1744240522384644, "learning_rate": 9.58192105263158e-05, "loss": 0.5119, "step": 35902 }, { "epoch": 2.010471497368126, "grad_norm": 1.249359130859375, "learning_rate": 9.581894736842105e-05, "loss": 0.4097, "step": 35903 }, { "epoch": 2.010527494680255, "grad_norm": 1.1291635036468506, "learning_rate": 9.581868421052633e-05, "loss": 0.5405, "step": 35904 }, { "epoch": 2.010583491992384, "grad_norm": 1.0455520153045654, "learning_rate": 9.581842105263159e-05, "loss": 0.2966, "step": 35905 }, { "epoch": 2.010639489304513, "grad_norm": 1.5298330783843994, "learning_rate": 9.581815789473685e-05, "loss": 0.3256, "step": 35906 }, { "epoch": 2.010695486616642, "grad_norm": 1.1614567041397095, "learning_rate": 9.58178947368421e-05, "loss": 0.3451, "step": 35907 }, { "epoch": 2.0107514839287712, "grad_norm": 1.3921276330947876, "learning_rate": 9.581763157894738e-05, "loss": 0.5074, "step": 35908 }, { "epoch": 2.0108074812409003, "grad_norm": 1.4734488725662231, "learning_rate": 9.581736842105264e-05, "loss": 0.4119, "step": 35909 }, { "epoch": 2.0108634785530293, "grad_norm": 1.422045350074768, "learning_rate": 9.58171052631579e-05, "loss": 0.4492, "step": 35910 }, { "epoch": 2.0109194758651583, "grad_norm": 0.9959626197814941, "learning_rate": 9.581684210526316e-05, "loss": 0.3575, "step": 35911 }, { "epoch": 2.0109754731772873, "grad_norm": 1.044052004814148, "learning_rate": 9.581657894736843e-05, "loss": 0.3031, "step": 35912 }, { "epoch": 2.0110314704894163, "grad_norm": 1.3160734176635742, "learning_rate": 9.581631578947369e-05, "loss": 0.3027, "step": 35913 }, { "epoch": 2.0110874678015453, "grad_norm": 1.310926079750061, "learning_rate": 9.581605263157895e-05, "loss": 0.5398, "step": 35914 }, { "epoch": 2.0111434651136744, "grad_norm": 1.1180884838104248, "learning_rate": 9.581578947368421e-05, "loss": 0.3767, "step": 35915 }, { "epoch": 2.0111994624258034, "grad_norm": 1.181401252746582, "learning_rate": 9.581552631578948e-05, "loss": 0.4468, "step": 35916 }, { "epoch": 2.0112554597379324, "grad_norm": 0.9832403063774109, "learning_rate": 9.581526315789474e-05, "loss": 0.3448, "step": 35917 }, { "epoch": 2.0113114570500614, "grad_norm": 1.194062352180481, "learning_rate": 9.5815e-05, "loss": 0.3539, "step": 35918 }, { "epoch": 2.0113674543621904, "grad_norm": 1.1809194087982178, "learning_rate": 9.581473684210526e-05, "loss": 0.5476, "step": 35919 }, { "epoch": 2.0114234516743195, "grad_norm": 1.3839787244796753, "learning_rate": 9.581447368421052e-05, "loss": 0.3048, "step": 35920 }, { "epoch": 2.0114794489864485, "grad_norm": 1.1443463563919067, "learning_rate": 9.58142105263158e-05, "loss": 0.4274, "step": 35921 }, { "epoch": 2.0115354462985775, "grad_norm": 1.2282607555389404, "learning_rate": 9.581394736842106e-05, "loss": 0.4215, "step": 35922 }, { "epoch": 2.0115914436107065, "grad_norm": 1.3101545572280884, "learning_rate": 9.581368421052632e-05, "loss": 0.4682, "step": 35923 }, { "epoch": 2.0116474409228355, "grad_norm": 1.539891242980957, "learning_rate": 9.581342105263158e-05, "loss": 0.5432, "step": 35924 }, { "epoch": 2.0117034382349646, "grad_norm": 1.0843690633773804, "learning_rate": 9.581315789473685e-05, "loss": 0.3139, "step": 35925 }, { "epoch": 2.0117594355470936, "grad_norm": 1.157828688621521, "learning_rate": 9.581289473684211e-05, "loss": 0.4349, "step": 35926 }, { "epoch": 2.0118154328592226, "grad_norm": 1.4255735874176025, "learning_rate": 9.581263157894738e-05, "loss": 0.4029, "step": 35927 }, { "epoch": 2.0118714301713516, "grad_norm": 1.1901848316192627, "learning_rate": 9.581236842105263e-05, "loss": 0.3723, "step": 35928 }, { "epoch": 2.0119274274834806, "grad_norm": 4.131755828857422, "learning_rate": 9.58121052631579e-05, "loss": 0.5211, "step": 35929 }, { "epoch": 2.0119834247956097, "grad_norm": 1.1934020519256592, "learning_rate": 9.581184210526316e-05, "loss": 0.3866, "step": 35930 }, { "epoch": 2.0120394221077387, "grad_norm": 1.4586005210876465, "learning_rate": 9.581157894736843e-05, "loss": 0.4046, "step": 35931 }, { "epoch": 2.0120954194198677, "grad_norm": 0.9839408993721008, "learning_rate": 9.58113157894737e-05, "loss": 0.3388, "step": 35932 }, { "epoch": 2.0121514167319967, "grad_norm": 1.388522982597351, "learning_rate": 9.581105263157895e-05, "loss": 0.4598, "step": 35933 }, { "epoch": 2.0122074140441257, "grad_norm": 1.2487350702285767, "learning_rate": 9.581078947368421e-05, "loss": 0.4325, "step": 35934 }, { "epoch": 2.0122634113562547, "grad_norm": 1.2178984880447388, "learning_rate": 9.581052631578947e-05, "loss": 0.4311, "step": 35935 }, { "epoch": 2.0123194086683838, "grad_norm": 1.1398550271987915, "learning_rate": 9.581026315789475e-05, "loss": 0.4934, "step": 35936 }, { "epoch": 2.012375405980513, "grad_norm": 1.0782445669174194, "learning_rate": 9.581e-05, "loss": 0.4676, "step": 35937 }, { "epoch": 2.012431403292642, "grad_norm": 1.114208698272705, "learning_rate": 9.580973684210527e-05, "loss": 0.3577, "step": 35938 }, { "epoch": 2.012487400604771, "grad_norm": 1.2817232608795166, "learning_rate": 9.580947368421053e-05, "loss": 0.4958, "step": 35939 }, { "epoch": 2.0125433979169, "grad_norm": 1.174094796180725, "learning_rate": 9.58092105263158e-05, "loss": 0.5193, "step": 35940 }, { "epoch": 2.012599395229029, "grad_norm": 1.1445204019546509, "learning_rate": 9.580894736842106e-05, "loss": 0.3182, "step": 35941 }, { "epoch": 2.012655392541158, "grad_norm": 1.3254717588424683, "learning_rate": 9.580868421052632e-05, "loss": 0.4159, "step": 35942 }, { "epoch": 2.012711389853287, "grad_norm": 3.3294131755828857, "learning_rate": 9.580842105263158e-05, "loss": 0.3783, "step": 35943 }, { "epoch": 2.012767387165416, "grad_norm": 1.3575527667999268, "learning_rate": 9.580815789473685e-05, "loss": 0.4383, "step": 35944 }, { "epoch": 2.012823384477545, "grad_norm": 1.1147736310958862, "learning_rate": 9.580789473684211e-05, "loss": 0.4005, "step": 35945 }, { "epoch": 2.012879381789674, "grad_norm": 0.9356734752655029, "learning_rate": 9.580763157894737e-05, "loss": 0.3653, "step": 35946 }, { "epoch": 2.012935379101803, "grad_norm": 1.1593421697616577, "learning_rate": 9.580736842105263e-05, "loss": 0.3229, "step": 35947 }, { "epoch": 2.012991376413932, "grad_norm": 1.0800411701202393, "learning_rate": 9.58071052631579e-05, "loss": 0.5072, "step": 35948 }, { "epoch": 2.013047373726061, "grad_norm": 1.129819631576538, "learning_rate": 9.580684210526316e-05, "loss": 0.317, "step": 35949 }, { "epoch": 2.01310337103819, "grad_norm": 1.2670780420303345, "learning_rate": 9.580657894736844e-05, "loss": 0.4502, "step": 35950 }, { "epoch": 2.013159368350319, "grad_norm": 1.0401873588562012, "learning_rate": 9.580631578947368e-05, "loss": 0.2882, "step": 35951 }, { "epoch": 2.013215365662448, "grad_norm": 1.3120824098587036, "learning_rate": 9.580605263157896e-05, "loss": 0.4081, "step": 35952 }, { "epoch": 2.013271362974577, "grad_norm": 1.2080658674240112, "learning_rate": 9.580578947368422e-05, "loss": 0.2895, "step": 35953 }, { "epoch": 2.013327360286706, "grad_norm": 1.1258790493011475, "learning_rate": 9.580552631578948e-05, "loss": 0.3108, "step": 35954 }, { "epoch": 2.013383357598835, "grad_norm": 1.244515061378479, "learning_rate": 9.580526315789475e-05, "loss": 0.4727, "step": 35955 }, { "epoch": 2.013439354910964, "grad_norm": 1.1742671728134155, "learning_rate": 9.5805e-05, "loss": 0.3761, "step": 35956 }, { "epoch": 2.013495352223093, "grad_norm": 1.2236824035644531, "learning_rate": 9.580473684210527e-05, "loss": 0.3268, "step": 35957 }, { "epoch": 2.013551349535222, "grad_norm": 0.9712657928466797, "learning_rate": 9.580447368421053e-05, "loss": 0.3369, "step": 35958 }, { "epoch": 2.013607346847351, "grad_norm": 1.0330934524536133, "learning_rate": 9.58042105263158e-05, "loss": 0.3252, "step": 35959 }, { "epoch": 2.0136633441594802, "grad_norm": 1.1708682775497437, "learning_rate": 9.580394736842106e-05, "loss": 0.4575, "step": 35960 }, { "epoch": 2.0137193414716092, "grad_norm": 1.1404727697372437, "learning_rate": 9.580368421052632e-05, "loss": 0.3624, "step": 35961 }, { "epoch": 2.0137753387837383, "grad_norm": 1.5483572483062744, "learning_rate": 9.580342105263158e-05, "loss": 0.3849, "step": 35962 }, { "epoch": 2.0138313360958673, "grad_norm": 1.1538753509521484, "learning_rate": 9.580315789473685e-05, "loss": 0.4729, "step": 35963 }, { "epoch": 2.0138873334079963, "grad_norm": 1.1414966583251953, "learning_rate": 9.580289473684211e-05, "loss": 0.4258, "step": 35964 }, { "epoch": 2.0139433307201253, "grad_norm": 1.4914534091949463, "learning_rate": 9.580263157894737e-05, "loss": 0.3606, "step": 35965 }, { "epoch": 2.0139993280322543, "grad_norm": 1.128311038017273, "learning_rate": 9.580236842105263e-05, "loss": 0.4615, "step": 35966 }, { "epoch": 2.0140553253443834, "grad_norm": 1.1732231378555298, "learning_rate": 9.58021052631579e-05, "loss": 0.4487, "step": 35967 }, { "epoch": 2.0141113226565124, "grad_norm": 1.0852938890457153, "learning_rate": 9.580184210526317e-05, "loss": 0.3207, "step": 35968 }, { "epoch": 2.0141673199686414, "grad_norm": 1.2399272918701172, "learning_rate": 9.580157894736843e-05, "loss": 0.4097, "step": 35969 }, { "epoch": 2.0142233172807704, "grad_norm": 1.2835215330123901, "learning_rate": 9.580131578947369e-05, "loss": 0.4627, "step": 35970 }, { "epoch": 2.0142793145928994, "grad_norm": 1.1350926160812378, "learning_rate": 9.580105263157895e-05, "loss": 0.3804, "step": 35971 }, { "epoch": 2.0143353119050285, "grad_norm": 1.1614155769348145, "learning_rate": 9.580078947368422e-05, "loss": 0.4744, "step": 35972 }, { "epoch": 2.0143913092171575, "grad_norm": 1.3877421617507935, "learning_rate": 9.580052631578948e-05, "loss": 0.5546, "step": 35973 }, { "epoch": 2.0144473065292865, "grad_norm": 1.0699504613876343, "learning_rate": 9.580026315789474e-05, "loss": 0.4023, "step": 35974 }, { "epoch": 2.0145033038414155, "grad_norm": 1.6108322143554688, "learning_rate": 9.58e-05, "loss": 0.4144, "step": 35975 }, { "epoch": 2.0145593011535445, "grad_norm": 1.1560313701629639, "learning_rate": 9.579973684210527e-05, "loss": 0.4657, "step": 35976 }, { "epoch": 2.0146152984656736, "grad_norm": 1.2327479124069214, "learning_rate": 9.579947368421053e-05, "loss": 0.3902, "step": 35977 }, { "epoch": 2.0146712957778026, "grad_norm": 1.1427966356277466, "learning_rate": 9.579921052631579e-05, "loss": 0.3219, "step": 35978 }, { "epoch": 2.0147272930899316, "grad_norm": 1.204358458518982, "learning_rate": 9.579894736842105e-05, "loss": 0.3552, "step": 35979 }, { "epoch": 2.0147832904020606, "grad_norm": 1.119761347770691, "learning_rate": 9.579868421052632e-05, "loss": 0.3405, "step": 35980 }, { "epoch": 2.0148392877141896, "grad_norm": 1.1006317138671875, "learning_rate": 9.579842105263158e-05, "loss": 0.3676, "step": 35981 }, { "epoch": 2.0148952850263186, "grad_norm": 1.3845229148864746, "learning_rate": 9.579815789473686e-05, "loss": 0.391, "step": 35982 }, { "epoch": 2.0149512823384477, "grad_norm": 1.2657369375228882, "learning_rate": 9.57978947368421e-05, "loss": 0.3135, "step": 35983 }, { "epoch": 2.0150072796505767, "grad_norm": 1.659509301185608, "learning_rate": 9.579763157894738e-05, "loss": 0.4413, "step": 35984 }, { "epoch": 2.0150632769627057, "grad_norm": 1.4364237785339355, "learning_rate": 9.579736842105264e-05, "loss": 0.3885, "step": 35985 }, { "epoch": 2.0151192742748347, "grad_norm": 1.399691104888916, "learning_rate": 9.579710526315791e-05, "loss": 0.4291, "step": 35986 }, { "epoch": 2.0151752715869637, "grad_norm": 1.1163642406463623, "learning_rate": 9.579684210526317e-05, "loss": 0.3634, "step": 35987 }, { "epoch": 2.0152312688990928, "grad_norm": 1.1183875799179077, "learning_rate": 9.579657894736841e-05, "loss": 0.476, "step": 35988 }, { "epoch": 2.015287266211222, "grad_norm": 1.3187004327774048, "learning_rate": 9.579631578947369e-05, "loss": 0.4876, "step": 35989 }, { "epoch": 2.015343263523351, "grad_norm": 1.2039744853973389, "learning_rate": 9.579605263157895e-05, "loss": 0.3554, "step": 35990 }, { "epoch": 2.01539926083548, "grad_norm": 1.1927218437194824, "learning_rate": 9.579578947368422e-05, "loss": 0.4803, "step": 35991 }, { "epoch": 2.015455258147609, "grad_norm": 0.9970502257347107, "learning_rate": 9.579552631578948e-05, "loss": 0.3802, "step": 35992 }, { "epoch": 2.015511255459738, "grad_norm": 1.1429189443588257, "learning_rate": 9.579526315789474e-05, "loss": 0.3696, "step": 35993 }, { "epoch": 2.015567252771867, "grad_norm": 1.4234963655471802, "learning_rate": 9.5795e-05, "loss": 0.435, "step": 35994 }, { "epoch": 2.015623250083996, "grad_norm": 1.411230444908142, "learning_rate": 9.579473684210527e-05, "loss": 0.5022, "step": 35995 }, { "epoch": 2.015679247396125, "grad_norm": 1.1422884464263916, "learning_rate": 9.579447368421053e-05, "loss": 0.3783, "step": 35996 }, { "epoch": 2.015735244708254, "grad_norm": 1.1443277597427368, "learning_rate": 9.579421052631579e-05, "loss": 0.3681, "step": 35997 }, { "epoch": 2.015791242020383, "grad_norm": 1.1956120729446411, "learning_rate": 9.579394736842105e-05, "loss": 0.5532, "step": 35998 }, { "epoch": 2.015847239332512, "grad_norm": 1.4882780313491821, "learning_rate": 9.579368421052633e-05, "loss": 0.4715, "step": 35999 }, { "epoch": 2.015903236644641, "grad_norm": 1.3563767671585083, "learning_rate": 9.579342105263159e-05, "loss": 0.3056, "step": 36000 }, { "epoch": 2.01595923395677, "grad_norm": 1.0231021642684937, "learning_rate": 9.579315789473685e-05, "loss": 0.3587, "step": 36001 }, { "epoch": 2.016015231268899, "grad_norm": 1.1333285570144653, "learning_rate": 9.57928947368421e-05, "loss": 0.3389, "step": 36002 }, { "epoch": 2.016071228581028, "grad_norm": 1.1697754859924316, "learning_rate": 9.579263157894738e-05, "loss": 0.4175, "step": 36003 }, { "epoch": 2.016127225893157, "grad_norm": 1.0883378982543945, "learning_rate": 9.579236842105264e-05, "loss": 0.3309, "step": 36004 }, { "epoch": 2.016183223205286, "grad_norm": 1.1815637350082397, "learning_rate": 9.57921052631579e-05, "loss": 0.3602, "step": 36005 }, { "epoch": 2.016239220517415, "grad_norm": 1.048217535018921, "learning_rate": 9.579184210526316e-05, "loss": 0.2999, "step": 36006 }, { "epoch": 2.016295217829544, "grad_norm": 1.4316154718399048, "learning_rate": 9.579157894736842e-05, "loss": 0.608, "step": 36007 }, { "epoch": 2.016351215141673, "grad_norm": 1.4802874326705933, "learning_rate": 9.579131578947369e-05, "loss": 0.4365, "step": 36008 }, { "epoch": 2.016407212453802, "grad_norm": 1.0527253150939941, "learning_rate": 9.579105263157895e-05, "loss": 0.3922, "step": 36009 }, { "epoch": 2.016463209765931, "grad_norm": 1.1508678197860718, "learning_rate": 9.579078947368422e-05, "loss": 0.4145, "step": 36010 }, { "epoch": 2.01651920707806, "grad_norm": 1.1785411834716797, "learning_rate": 9.579052631578947e-05, "loss": 0.481, "step": 36011 }, { "epoch": 2.0165752043901892, "grad_norm": 1.0644607543945312, "learning_rate": 9.579026315789474e-05, "loss": 0.423, "step": 36012 }, { "epoch": 2.0166312017023182, "grad_norm": 1.374876856803894, "learning_rate": 9.579e-05, "loss": 0.3902, "step": 36013 }, { "epoch": 2.0166871990144473, "grad_norm": 1.7422996759414673, "learning_rate": 9.578973684210528e-05, "loss": 0.3413, "step": 36014 }, { "epoch": 2.0167431963265763, "grad_norm": 1.0457087755203247, "learning_rate": 9.578947368421052e-05, "loss": 0.337, "step": 36015 }, { "epoch": 2.0167991936387053, "grad_norm": 1.2817370891571045, "learning_rate": 9.57892105263158e-05, "loss": 0.3393, "step": 36016 }, { "epoch": 2.0168551909508343, "grad_norm": 1.215639591217041, "learning_rate": 9.578894736842106e-05, "loss": 0.3777, "step": 36017 }, { "epoch": 2.0169111882629633, "grad_norm": 1.1111069917678833, "learning_rate": 9.578868421052633e-05, "loss": 0.365, "step": 36018 }, { "epoch": 2.0169671855750924, "grad_norm": 1.325737714767456, "learning_rate": 9.578842105263159e-05, "loss": 0.405, "step": 36019 }, { "epoch": 2.0170231828872214, "grad_norm": 1.307184100151062, "learning_rate": 9.578815789473685e-05, "loss": 0.4497, "step": 36020 }, { "epoch": 2.0170791801993504, "grad_norm": 1.1664663553237915, "learning_rate": 9.578789473684211e-05, "loss": 0.3073, "step": 36021 }, { "epoch": 2.0171351775114794, "grad_norm": 1.1457533836364746, "learning_rate": 9.578763157894737e-05, "loss": 0.4955, "step": 36022 }, { "epoch": 2.0171911748236084, "grad_norm": 1.065338373184204, "learning_rate": 9.578736842105264e-05, "loss": 0.384, "step": 36023 }, { "epoch": 2.0172471721357375, "grad_norm": 1.027599573135376, "learning_rate": 9.57871052631579e-05, "loss": 0.3274, "step": 36024 }, { "epoch": 2.0173031694478665, "grad_norm": 1.402746558189392, "learning_rate": 9.578684210526316e-05, "loss": 0.3838, "step": 36025 }, { "epoch": 2.0173591667599955, "grad_norm": 1.1714805364608765, "learning_rate": 9.578657894736842e-05, "loss": 0.3866, "step": 36026 }, { "epoch": 2.0174151640721245, "grad_norm": 1.1373165845870972, "learning_rate": 9.578631578947369e-05, "loss": 0.3817, "step": 36027 }, { "epoch": 2.0174711613842535, "grad_norm": 1.0959217548370361, "learning_rate": 9.578605263157895e-05, "loss": 0.3602, "step": 36028 }, { "epoch": 2.0175271586963825, "grad_norm": 1.457277536392212, "learning_rate": 9.578578947368421e-05, "loss": 0.5121, "step": 36029 }, { "epoch": 2.0175831560085116, "grad_norm": 1.3429263830184937, "learning_rate": 9.578552631578947e-05, "loss": 0.4155, "step": 36030 }, { "epoch": 2.0176391533206406, "grad_norm": 1.2870543003082275, "learning_rate": 9.578526315789475e-05, "loss": 0.4816, "step": 36031 }, { "epoch": 2.0176951506327696, "grad_norm": 1.4878095388412476, "learning_rate": 9.5785e-05, "loss": 0.5135, "step": 36032 }, { "epoch": 2.0177511479448986, "grad_norm": 1.2382071018218994, "learning_rate": 9.578473684210527e-05, "loss": 0.3768, "step": 36033 }, { "epoch": 2.0178071452570276, "grad_norm": 1.2249504327774048, "learning_rate": 9.578447368421052e-05, "loss": 0.3401, "step": 36034 }, { "epoch": 2.0178631425691567, "grad_norm": 1.1332892179489136, "learning_rate": 9.57842105263158e-05, "loss": 0.3329, "step": 36035 }, { "epoch": 2.0179191398812857, "grad_norm": 1.102394700050354, "learning_rate": 9.578394736842106e-05, "loss": 0.3353, "step": 36036 }, { "epoch": 2.0179751371934147, "grad_norm": 1.173794150352478, "learning_rate": 9.578368421052633e-05, "loss": 0.3378, "step": 36037 }, { "epoch": 2.0180311345055437, "grad_norm": 1.0940375328063965, "learning_rate": 9.578342105263158e-05, "loss": 0.3304, "step": 36038 }, { "epoch": 2.0180871318176727, "grad_norm": 1.1070879697799683, "learning_rate": 9.578315789473684e-05, "loss": 0.2866, "step": 36039 }, { "epoch": 2.0181431291298018, "grad_norm": 1.2510130405426025, "learning_rate": 9.578289473684211e-05, "loss": 0.3679, "step": 36040 }, { "epoch": 2.018199126441931, "grad_norm": 1.1776942014694214, "learning_rate": 9.578263157894737e-05, "loss": 0.3003, "step": 36041 }, { "epoch": 2.01825512375406, "grad_norm": 1.3806794881820679, "learning_rate": 9.578236842105264e-05, "loss": 0.4139, "step": 36042 }, { "epoch": 2.018311121066189, "grad_norm": 1.2915743589401245, "learning_rate": 9.578210526315789e-05, "loss": 0.3807, "step": 36043 }, { "epoch": 2.018367118378318, "grad_norm": 1.0765838623046875, "learning_rate": 9.578184210526316e-05, "loss": 0.3254, "step": 36044 }, { "epoch": 2.018423115690447, "grad_norm": 1.1900556087493896, "learning_rate": 9.578157894736842e-05, "loss": 0.3332, "step": 36045 }, { "epoch": 2.018479113002576, "grad_norm": 1.3256704807281494, "learning_rate": 9.57813157894737e-05, "loss": 0.3772, "step": 36046 }, { "epoch": 2.018535110314705, "grad_norm": 1.034179925918579, "learning_rate": 9.578105263157896e-05, "loss": 0.335, "step": 36047 }, { "epoch": 2.018591107626834, "grad_norm": 1.486627221107483, "learning_rate": 9.578078947368422e-05, "loss": 0.4926, "step": 36048 }, { "epoch": 2.018647104938963, "grad_norm": 1.0806629657745361, "learning_rate": 9.578052631578947e-05, "loss": 0.4818, "step": 36049 }, { "epoch": 2.018703102251092, "grad_norm": 1.1111974716186523, "learning_rate": 9.578026315789475e-05, "loss": 0.3146, "step": 36050 }, { "epoch": 2.018759099563221, "grad_norm": 1.6517853736877441, "learning_rate": 9.578000000000001e-05, "loss": 0.3759, "step": 36051 }, { "epoch": 2.01881509687535, "grad_norm": 1.2819726467132568, "learning_rate": 9.577973684210527e-05, "loss": 0.3637, "step": 36052 }, { "epoch": 2.018871094187479, "grad_norm": 1.3480576276779175, "learning_rate": 9.577947368421053e-05, "loss": 0.4443, "step": 36053 }, { "epoch": 2.018927091499608, "grad_norm": 1.1057201623916626, "learning_rate": 9.57792105263158e-05, "loss": 0.4054, "step": 36054 }, { "epoch": 2.018983088811737, "grad_norm": 1.2916324138641357, "learning_rate": 9.577894736842106e-05, "loss": 0.4222, "step": 36055 }, { "epoch": 2.019039086123866, "grad_norm": 1.1482441425323486, "learning_rate": 9.577868421052632e-05, "loss": 0.3983, "step": 36056 }, { "epoch": 2.019095083435995, "grad_norm": 1.3948460817337036, "learning_rate": 9.577842105263158e-05, "loss": 0.4187, "step": 36057 }, { "epoch": 2.019151080748124, "grad_norm": 1.170255422592163, "learning_rate": 9.577815789473684e-05, "loss": 0.3615, "step": 36058 }, { "epoch": 2.019207078060253, "grad_norm": 1.0794289112091064, "learning_rate": 9.577789473684211e-05, "loss": 0.3487, "step": 36059 }, { "epoch": 2.019263075372382, "grad_norm": 1.2683566808700562, "learning_rate": 9.577763157894737e-05, "loss": 0.3541, "step": 36060 }, { "epoch": 2.019319072684511, "grad_norm": 1.210978627204895, "learning_rate": 9.577736842105263e-05, "loss": 0.359, "step": 36061 }, { "epoch": 2.01937506999664, "grad_norm": 1.1657421588897705, "learning_rate": 9.577710526315789e-05, "loss": 0.3847, "step": 36062 }, { "epoch": 2.019431067308769, "grad_norm": 1.3235756158828735, "learning_rate": 9.577684210526317e-05, "loss": 0.369, "step": 36063 }, { "epoch": 2.019487064620898, "grad_norm": 1.171451449394226, "learning_rate": 9.577657894736843e-05, "loss": 0.4012, "step": 36064 }, { "epoch": 2.0195430619330272, "grad_norm": 1.0404460430145264, "learning_rate": 9.57763157894737e-05, "loss": 0.3322, "step": 36065 }, { "epoch": 2.0195990592451563, "grad_norm": 1.0139400959014893, "learning_rate": 9.577605263157894e-05, "loss": 0.3745, "step": 36066 }, { "epoch": 2.0196550565572853, "grad_norm": 1.1442186832427979, "learning_rate": 9.577578947368422e-05, "loss": 0.464, "step": 36067 }, { "epoch": 2.0197110538694143, "grad_norm": 1.219778060913086, "learning_rate": 9.577552631578948e-05, "loss": 0.4836, "step": 36068 }, { "epoch": 2.0197670511815433, "grad_norm": 1.1053717136383057, "learning_rate": 9.577526315789475e-05, "loss": 0.3311, "step": 36069 }, { "epoch": 2.0198230484936723, "grad_norm": 1.0756784677505493, "learning_rate": 9.5775e-05, "loss": 0.4954, "step": 36070 }, { "epoch": 2.0198790458058014, "grad_norm": 1.1235644817352295, "learning_rate": 9.577473684210527e-05, "loss": 0.325, "step": 36071 }, { "epoch": 2.0199350431179304, "grad_norm": 1.427750587463379, "learning_rate": 9.577447368421053e-05, "loss": 0.3647, "step": 36072 }, { "epoch": 2.0199910404300594, "grad_norm": 1.2528737783432007, "learning_rate": 9.57742105263158e-05, "loss": 0.3711, "step": 36073 }, { "epoch": 2.0200470377421884, "grad_norm": 1.7312415838241577, "learning_rate": 9.577394736842106e-05, "loss": 0.4634, "step": 36074 }, { "epoch": 2.0201030350543174, "grad_norm": 1.1633621454238892, "learning_rate": 9.577368421052631e-05, "loss": 0.2956, "step": 36075 }, { "epoch": 2.0201590323664464, "grad_norm": 1.3151518106460571, "learning_rate": 9.577342105263158e-05, "loss": 0.4186, "step": 36076 }, { "epoch": 2.0202150296785755, "grad_norm": 1.3769540786743164, "learning_rate": 9.577315789473684e-05, "loss": 0.516, "step": 36077 }, { "epoch": 2.0202710269907045, "grad_norm": 1.6785788536071777, "learning_rate": 9.577289473684212e-05, "loss": 0.4451, "step": 36078 }, { "epoch": 2.0203270243028335, "grad_norm": 1.1740608215332031, "learning_rate": 9.577263157894738e-05, "loss": 0.3494, "step": 36079 }, { "epoch": 2.0203830216149625, "grad_norm": 1.2428419589996338, "learning_rate": 9.577236842105263e-05, "loss": 0.3836, "step": 36080 }, { "epoch": 2.0204390189270915, "grad_norm": 1.0339562892913818, "learning_rate": 9.57721052631579e-05, "loss": 0.3294, "step": 36081 }, { "epoch": 2.0204950162392206, "grad_norm": 0.9753482937812805, "learning_rate": 9.577184210526317e-05, "loss": 0.2998, "step": 36082 }, { "epoch": 2.0205510135513496, "grad_norm": 1.2974295616149902, "learning_rate": 9.577157894736843e-05, "loss": 0.4364, "step": 36083 }, { "epoch": 2.0206070108634786, "grad_norm": 1.1775233745574951, "learning_rate": 9.577131578947369e-05, "loss": 0.3805, "step": 36084 }, { "epoch": 2.0206630081756076, "grad_norm": 1.050278902053833, "learning_rate": 9.577105263157895e-05, "loss": 0.3641, "step": 36085 }, { "epoch": 2.0207190054877366, "grad_norm": 1.3361601829528809, "learning_rate": 9.577078947368422e-05, "loss": 0.4103, "step": 36086 }, { "epoch": 2.0207750027998657, "grad_norm": 1.1317722797393799, "learning_rate": 9.577052631578948e-05, "loss": 0.4682, "step": 36087 }, { "epoch": 2.0208310001119947, "grad_norm": 1.1627200841903687, "learning_rate": 9.577026315789474e-05, "loss": 0.5429, "step": 36088 }, { "epoch": 2.0208869974241237, "grad_norm": 1.1527284383773804, "learning_rate": 9.577e-05, "loss": 0.3745, "step": 36089 }, { "epoch": 2.0209429947362527, "grad_norm": 1.3439584970474243, "learning_rate": 9.576973684210527e-05, "loss": 0.4224, "step": 36090 }, { "epoch": 2.0209989920483817, "grad_norm": 1.7126879692077637, "learning_rate": 9.576947368421053e-05, "loss": 0.4834, "step": 36091 }, { "epoch": 2.0210549893605108, "grad_norm": 1.350543737411499, "learning_rate": 9.576921052631579e-05, "loss": 0.4606, "step": 36092 }, { "epoch": 2.0211109866726398, "grad_norm": 1.043258786201477, "learning_rate": 9.576894736842105e-05, "loss": 0.3486, "step": 36093 }, { "epoch": 2.021166983984769, "grad_norm": 1.2732752561569214, "learning_rate": 9.576868421052631e-05, "loss": 0.3951, "step": 36094 }, { "epoch": 2.021222981296898, "grad_norm": 1.2452353239059448, "learning_rate": 9.576842105263159e-05, "loss": 0.4, "step": 36095 }, { "epoch": 2.021278978609027, "grad_norm": 1.2646691799163818, "learning_rate": 9.576815789473684e-05, "loss": 0.3664, "step": 36096 }, { "epoch": 2.021334975921156, "grad_norm": 2.23759388923645, "learning_rate": 9.576789473684212e-05, "loss": 0.4669, "step": 36097 }, { "epoch": 2.021390973233285, "grad_norm": 1.27659010887146, "learning_rate": 9.576763157894736e-05, "loss": 0.3555, "step": 36098 }, { "epoch": 2.021446970545414, "grad_norm": 1.1369566917419434, "learning_rate": 9.576736842105264e-05, "loss": 0.4216, "step": 36099 }, { "epoch": 2.021502967857543, "grad_norm": 1.1875971555709839, "learning_rate": 9.57671052631579e-05, "loss": 0.4602, "step": 36100 }, { "epoch": 2.021558965169672, "grad_norm": 1.323009729385376, "learning_rate": 9.576684210526317e-05, "loss": 0.4474, "step": 36101 }, { "epoch": 2.021614962481801, "grad_norm": 1.0964728593826294, "learning_rate": 9.576657894736843e-05, "loss": 0.3779, "step": 36102 }, { "epoch": 2.02167095979393, "grad_norm": 1.629712700843811, "learning_rate": 9.576631578947369e-05, "loss": 0.4766, "step": 36103 }, { "epoch": 2.021726957106059, "grad_norm": 0.9526477456092834, "learning_rate": 9.576605263157895e-05, "loss": 0.347, "step": 36104 }, { "epoch": 2.021782954418188, "grad_norm": 1.4993746280670166, "learning_rate": 9.576578947368422e-05, "loss": 0.4092, "step": 36105 }, { "epoch": 2.021838951730317, "grad_norm": 1.4700255393981934, "learning_rate": 9.576552631578948e-05, "loss": 0.4275, "step": 36106 }, { "epoch": 2.021894949042446, "grad_norm": 1.217809796333313, "learning_rate": 9.576526315789474e-05, "loss": 0.4027, "step": 36107 }, { "epoch": 2.021950946354575, "grad_norm": 1.048000693321228, "learning_rate": 9.5765e-05, "loss": 0.3954, "step": 36108 }, { "epoch": 2.022006943666704, "grad_norm": 1.0968892574310303, "learning_rate": 9.576473684210526e-05, "loss": 0.377, "step": 36109 }, { "epoch": 2.022062940978833, "grad_norm": 1.4301968812942505, "learning_rate": 9.576447368421054e-05, "loss": 0.381, "step": 36110 }, { "epoch": 2.022118938290962, "grad_norm": 1.4264206886291504, "learning_rate": 9.57642105263158e-05, "loss": 0.4448, "step": 36111 }, { "epoch": 2.022174935603091, "grad_norm": 1.0612566471099854, "learning_rate": 9.576394736842105e-05, "loss": 0.371, "step": 36112 }, { "epoch": 2.02223093291522, "grad_norm": 1.2199323177337646, "learning_rate": 9.576368421052631e-05, "loss": 0.3894, "step": 36113 }, { "epoch": 2.022286930227349, "grad_norm": 1.1175472736358643, "learning_rate": 9.576342105263159e-05, "loss": 0.3095, "step": 36114 }, { "epoch": 2.022342927539478, "grad_norm": 1.0955042839050293, "learning_rate": 9.576315789473685e-05, "loss": 0.4419, "step": 36115 }, { "epoch": 2.022398924851607, "grad_norm": 1.4427772760391235, "learning_rate": 9.576289473684211e-05, "loss": 0.6101, "step": 36116 }, { "epoch": 2.0224549221637362, "grad_norm": 1.2829735279083252, "learning_rate": 9.576263157894737e-05, "loss": 0.4075, "step": 36117 }, { "epoch": 2.0225109194758653, "grad_norm": 1.0631839036941528, "learning_rate": 9.576236842105264e-05, "loss": 0.3797, "step": 36118 }, { "epoch": 2.0225669167879943, "grad_norm": 0.9853391051292419, "learning_rate": 9.57621052631579e-05, "loss": 0.2773, "step": 36119 }, { "epoch": 2.0226229141001233, "grad_norm": 1.6444292068481445, "learning_rate": 9.576184210526317e-05, "loss": 0.4226, "step": 36120 }, { "epoch": 2.0226789114122523, "grad_norm": 1.163071632385254, "learning_rate": 9.576157894736842e-05, "loss": 0.4074, "step": 36121 }, { "epoch": 2.0227349087243813, "grad_norm": 1.309867262840271, "learning_rate": 9.576131578947369e-05, "loss": 0.3962, "step": 36122 }, { "epoch": 2.0227909060365103, "grad_norm": 1.4894245862960815, "learning_rate": 9.576105263157895e-05, "loss": 0.5097, "step": 36123 }, { "epoch": 2.0228469033486394, "grad_norm": 1.0927071571350098, "learning_rate": 9.576078947368423e-05, "loss": 0.2955, "step": 36124 }, { "epoch": 2.0229029006607684, "grad_norm": 1.075710415840149, "learning_rate": 9.576052631578947e-05, "loss": 0.3185, "step": 36125 }, { "epoch": 2.0229588979728974, "grad_norm": 1.3109391927719116, "learning_rate": 9.576026315789473e-05, "loss": 0.4119, "step": 36126 }, { "epoch": 2.0230148952850264, "grad_norm": 1.2229167222976685, "learning_rate": 9.576e-05, "loss": 0.4157, "step": 36127 }, { "epoch": 2.0230708925971554, "grad_norm": 1.074326515197754, "learning_rate": 9.575973684210526e-05, "loss": 0.2969, "step": 36128 }, { "epoch": 2.0231268899092845, "grad_norm": 1.3686977624893188, "learning_rate": 9.575947368421054e-05, "loss": 0.42, "step": 36129 }, { "epoch": 2.0231828872214135, "grad_norm": 1.3230191469192505, "learning_rate": 9.575921052631578e-05, "loss": 0.414, "step": 36130 }, { "epoch": 2.0232388845335425, "grad_norm": 1.3710585832595825, "learning_rate": 9.575894736842106e-05, "loss": 0.4667, "step": 36131 }, { "epoch": 2.0232948818456715, "grad_norm": 1.0677279233932495, "learning_rate": 9.575868421052632e-05, "loss": 0.2917, "step": 36132 }, { "epoch": 2.0233508791578005, "grad_norm": 1.244718313217163, "learning_rate": 9.575842105263159e-05, "loss": 0.4061, "step": 36133 }, { "epoch": 2.0234068764699296, "grad_norm": 1.2666012048721313, "learning_rate": 9.575815789473685e-05, "loss": 0.5106, "step": 36134 }, { "epoch": 2.0234628737820586, "grad_norm": 1.3248134851455688, "learning_rate": 9.575789473684211e-05, "loss": 0.4333, "step": 36135 }, { "epoch": 2.0235188710941876, "grad_norm": 1.3805012702941895, "learning_rate": 9.575763157894737e-05, "loss": 0.4819, "step": 36136 }, { "epoch": 2.0235748684063166, "grad_norm": 1.1341010332107544, "learning_rate": 9.575736842105264e-05, "loss": 0.4437, "step": 36137 }, { "epoch": 2.0236308657184456, "grad_norm": 1.5677810907363892, "learning_rate": 9.57571052631579e-05, "loss": 0.3928, "step": 36138 }, { "epoch": 2.0236868630305747, "grad_norm": 1.3011412620544434, "learning_rate": 9.575684210526316e-05, "loss": 0.4063, "step": 36139 }, { "epoch": 2.0237428603427037, "grad_norm": 1.318723201751709, "learning_rate": 9.575657894736842e-05, "loss": 0.4089, "step": 36140 }, { "epoch": 2.0237988576548327, "grad_norm": 1.2499979734420776, "learning_rate": 9.57563157894737e-05, "loss": 0.3874, "step": 36141 }, { "epoch": 2.0238548549669617, "grad_norm": 1.162208080291748, "learning_rate": 9.575605263157895e-05, "loss": 0.3703, "step": 36142 }, { "epoch": 2.0239108522790907, "grad_norm": 1.1510460376739502, "learning_rate": 9.575578947368421e-05, "loss": 0.4627, "step": 36143 }, { "epoch": 2.0239668495912198, "grad_norm": 1.3512641191482544, "learning_rate": 9.575552631578947e-05, "loss": 0.544, "step": 36144 }, { "epoch": 2.0240228469033488, "grad_norm": 1.150254249572754, "learning_rate": 9.575526315789473e-05, "loss": 0.5287, "step": 36145 }, { "epoch": 2.024078844215478, "grad_norm": 1.109472632408142, "learning_rate": 9.575500000000001e-05, "loss": 0.3934, "step": 36146 }, { "epoch": 2.024134841527607, "grad_norm": 1.3962764739990234, "learning_rate": 9.575473684210527e-05, "loss": 0.3851, "step": 36147 }, { "epoch": 2.024190838839736, "grad_norm": 1.1645554304122925, "learning_rate": 9.575447368421053e-05, "loss": 0.3797, "step": 36148 }, { "epoch": 2.024246836151865, "grad_norm": 1.3559650182724, "learning_rate": 9.575421052631579e-05, "loss": 0.4167, "step": 36149 }, { "epoch": 2.024302833463994, "grad_norm": 1.0167299509048462, "learning_rate": 9.575394736842106e-05, "loss": 0.395, "step": 36150 }, { "epoch": 2.024358830776123, "grad_norm": 0.9587291479110718, "learning_rate": 9.575368421052632e-05, "loss": 0.3134, "step": 36151 }, { "epoch": 2.024414828088252, "grad_norm": 1.1310670375823975, "learning_rate": 9.575342105263159e-05, "loss": 0.3498, "step": 36152 }, { "epoch": 2.024470825400381, "grad_norm": 1.0973423719406128, "learning_rate": 9.575315789473684e-05, "loss": 0.3597, "step": 36153 }, { "epoch": 2.02452682271251, "grad_norm": 0.9995084404945374, "learning_rate": 9.575289473684211e-05, "loss": 0.352, "step": 36154 }, { "epoch": 2.024582820024639, "grad_norm": 1.0361990928649902, "learning_rate": 9.575263157894737e-05, "loss": 0.3934, "step": 36155 }, { "epoch": 2.024638817336768, "grad_norm": 1.2091307640075684, "learning_rate": 9.575236842105265e-05, "loss": 0.5091, "step": 36156 }, { "epoch": 2.024694814648897, "grad_norm": 1.361951231956482, "learning_rate": 9.57521052631579e-05, "loss": 0.6277, "step": 36157 }, { "epoch": 2.024750811961026, "grad_norm": 1.2040526866912842, "learning_rate": 9.575184210526316e-05, "loss": 0.3802, "step": 36158 }, { "epoch": 2.024806809273155, "grad_norm": 1.1696475744247437, "learning_rate": 9.575157894736842e-05, "loss": 0.3907, "step": 36159 }, { "epoch": 2.024862806585284, "grad_norm": 1.2047109603881836, "learning_rate": 9.575131578947368e-05, "loss": 0.3811, "step": 36160 }, { "epoch": 2.024918803897413, "grad_norm": 1.2403868436813354, "learning_rate": 9.575105263157896e-05, "loss": 0.4177, "step": 36161 }, { "epoch": 2.024974801209542, "grad_norm": 1.7188231945037842, "learning_rate": 9.57507894736842e-05, "loss": 0.4631, "step": 36162 }, { "epoch": 2.025030798521671, "grad_norm": 1.296467661857605, "learning_rate": 9.575052631578948e-05, "loss": 0.4341, "step": 36163 }, { "epoch": 2.0250867958338, "grad_norm": 1.1680549383163452, "learning_rate": 9.575026315789474e-05, "loss": 0.4633, "step": 36164 }, { "epoch": 2.025142793145929, "grad_norm": 1.1548320055007935, "learning_rate": 9.575000000000001e-05, "loss": 0.3898, "step": 36165 }, { "epoch": 2.025198790458058, "grad_norm": 1.3048478364944458, "learning_rate": 9.574973684210527e-05, "loss": 0.3727, "step": 36166 }, { "epoch": 2.025254787770187, "grad_norm": 0.9240673780441284, "learning_rate": 9.574947368421053e-05, "loss": 0.2686, "step": 36167 }, { "epoch": 2.025310785082316, "grad_norm": 1.121976613998413, "learning_rate": 9.574921052631579e-05, "loss": 0.3149, "step": 36168 }, { "epoch": 2.0253667823944452, "grad_norm": 1.2317181825637817, "learning_rate": 9.574894736842106e-05, "loss": 0.3576, "step": 36169 }, { "epoch": 2.0254227797065743, "grad_norm": 1.183701515197754, "learning_rate": 9.574868421052632e-05, "loss": 0.4173, "step": 36170 }, { "epoch": 2.0254787770187033, "grad_norm": 1.2518954277038574, "learning_rate": 9.574842105263158e-05, "loss": 0.3794, "step": 36171 }, { "epoch": 2.0255347743308323, "grad_norm": 1.0745656490325928, "learning_rate": 9.574815789473684e-05, "loss": 0.3689, "step": 36172 }, { "epoch": 2.0255907716429613, "grad_norm": 1.2014179229736328, "learning_rate": 9.574789473684211e-05, "loss": 0.4485, "step": 36173 }, { "epoch": 2.0256467689550903, "grad_norm": 1.394370675086975, "learning_rate": 9.574763157894737e-05, "loss": 0.5683, "step": 36174 }, { "epoch": 2.0257027662672193, "grad_norm": 1.0620108842849731, "learning_rate": 9.574736842105265e-05, "loss": 0.306, "step": 36175 }, { "epoch": 2.0257587635793484, "grad_norm": 0.99882572889328, "learning_rate": 9.57471052631579e-05, "loss": 0.3377, "step": 36176 }, { "epoch": 2.0258147608914774, "grad_norm": 1.4434428215026855, "learning_rate": 9.574684210526315e-05, "loss": 0.3261, "step": 36177 }, { "epoch": 2.0258707582036064, "grad_norm": 1.2017641067504883, "learning_rate": 9.574657894736843e-05, "loss": 0.3859, "step": 36178 }, { "epoch": 2.0259267555157354, "grad_norm": 1.434136986732483, "learning_rate": 9.574631578947369e-05, "loss": 0.3284, "step": 36179 }, { "epoch": 2.0259827528278644, "grad_norm": 1.265540361404419, "learning_rate": 9.574605263157895e-05, "loss": 0.4311, "step": 36180 }, { "epoch": 2.0260387501399935, "grad_norm": 1.2686388492584229, "learning_rate": 9.57457894736842e-05, "loss": 0.3066, "step": 36181 }, { "epoch": 2.0260947474521225, "grad_norm": 1.158517837524414, "learning_rate": 9.574552631578948e-05, "loss": 0.3474, "step": 36182 }, { "epoch": 2.0261507447642515, "grad_norm": 1.011753797531128, "learning_rate": 9.574526315789474e-05, "loss": 0.328, "step": 36183 }, { "epoch": 2.0262067420763805, "grad_norm": 0.9872798323631287, "learning_rate": 9.574500000000001e-05, "loss": 0.4186, "step": 36184 }, { "epoch": 2.0262627393885095, "grad_norm": 1.162466287612915, "learning_rate": 9.574473684210526e-05, "loss": 0.4061, "step": 36185 }, { "epoch": 2.0263187367006386, "grad_norm": 1.0803300142288208, "learning_rate": 9.574447368421053e-05, "loss": 0.3805, "step": 36186 }, { "epoch": 2.0263747340127676, "grad_norm": 1.0123308897018433, "learning_rate": 9.574421052631579e-05, "loss": 0.3352, "step": 36187 }, { "epoch": 2.0264307313248966, "grad_norm": 1.1157196760177612, "learning_rate": 9.574394736842106e-05, "loss": 0.4009, "step": 36188 }, { "epoch": 2.0264867286370256, "grad_norm": 1.3922655582427979, "learning_rate": 9.574368421052632e-05, "loss": 0.588, "step": 36189 }, { "epoch": 2.0265427259491546, "grad_norm": 2.8528687953948975, "learning_rate": 9.574342105263158e-05, "loss": 0.4321, "step": 36190 }, { "epoch": 2.0265987232612837, "grad_norm": 1.2426711320877075, "learning_rate": 9.574315789473684e-05, "loss": 0.3454, "step": 36191 }, { "epoch": 2.0266547205734127, "grad_norm": 1.2248778343200684, "learning_rate": 9.574289473684212e-05, "loss": 0.4249, "step": 36192 }, { "epoch": 2.0267107178855417, "grad_norm": 1.218500018119812, "learning_rate": 9.574263157894738e-05, "loss": 0.4198, "step": 36193 }, { "epoch": 2.0267667151976707, "grad_norm": 1.039161205291748, "learning_rate": 9.574236842105264e-05, "loss": 0.327, "step": 36194 }, { "epoch": 2.0268227125097997, "grad_norm": 1.3586393594741821, "learning_rate": 9.57421052631579e-05, "loss": 0.4643, "step": 36195 }, { "epoch": 2.0268787098219287, "grad_norm": 1.3972834348678589, "learning_rate": 9.574184210526316e-05, "loss": 0.5018, "step": 36196 }, { "epoch": 2.0269347071340578, "grad_norm": 1.2016923427581787, "learning_rate": 9.574157894736843e-05, "loss": 0.4195, "step": 36197 }, { "epoch": 2.026990704446187, "grad_norm": 1.1543623208999634, "learning_rate": 9.574131578947369e-05, "loss": 0.5065, "step": 36198 }, { "epoch": 2.027046701758316, "grad_norm": 1.2000309228897095, "learning_rate": 9.574105263157895e-05, "loss": 0.4289, "step": 36199 }, { "epoch": 2.027102699070445, "grad_norm": 1.1484936475753784, "learning_rate": 9.574078947368421e-05, "loss": 0.3308, "step": 36200 }, { "epoch": 2.027158696382574, "grad_norm": 1.168464183807373, "learning_rate": 9.574052631578948e-05, "loss": 0.4177, "step": 36201 }, { "epoch": 2.027214693694703, "grad_norm": 1.098473072052002, "learning_rate": 9.574026315789474e-05, "loss": 0.3013, "step": 36202 }, { "epoch": 2.027270691006832, "grad_norm": 1.334673285484314, "learning_rate": 9.574e-05, "loss": 0.4392, "step": 36203 }, { "epoch": 2.027326688318961, "grad_norm": 1.0094263553619385, "learning_rate": 9.573973684210526e-05, "loss": 0.2937, "step": 36204 }, { "epoch": 2.02738268563109, "grad_norm": 1.306365728378296, "learning_rate": 9.573947368421053e-05, "loss": 0.3798, "step": 36205 }, { "epoch": 2.027438682943219, "grad_norm": 1.7061251401901245, "learning_rate": 9.57392105263158e-05, "loss": 0.4361, "step": 36206 }, { "epoch": 2.027494680255348, "grad_norm": 1.1141397953033447, "learning_rate": 9.573894736842107e-05, "loss": 0.4646, "step": 36207 }, { "epoch": 2.027550677567477, "grad_norm": 1.0769239664077759, "learning_rate": 9.573868421052631e-05, "loss": 0.4355, "step": 36208 }, { "epoch": 2.027606674879606, "grad_norm": 1.003715991973877, "learning_rate": 9.573842105263159e-05, "loss": 0.4171, "step": 36209 }, { "epoch": 2.027662672191735, "grad_norm": 1.2618032693862915, "learning_rate": 9.573815789473685e-05, "loss": 0.4016, "step": 36210 }, { "epoch": 2.027718669503864, "grad_norm": 1.1518477201461792, "learning_rate": 9.573789473684212e-05, "loss": 0.2621, "step": 36211 }, { "epoch": 2.0277746668159926, "grad_norm": 1.785696029663086, "learning_rate": 9.573763157894738e-05, "loss": 0.4088, "step": 36212 }, { "epoch": 2.027830664128122, "grad_norm": 1.5694187879562378, "learning_rate": 9.573736842105263e-05, "loss": 0.3665, "step": 36213 }, { "epoch": 2.0278866614402506, "grad_norm": 1.0264112949371338, "learning_rate": 9.57371052631579e-05, "loss": 0.3302, "step": 36214 }, { "epoch": 2.0279426587523797, "grad_norm": 1.345012903213501, "learning_rate": 9.573684210526316e-05, "loss": 0.4916, "step": 36215 }, { "epoch": 2.0279986560645087, "grad_norm": 1.4593931436538696, "learning_rate": 9.573657894736843e-05, "loss": 0.6602, "step": 36216 }, { "epoch": 2.0280546533766377, "grad_norm": 1.0988857746124268, "learning_rate": 9.573631578947368e-05, "loss": 0.4485, "step": 36217 }, { "epoch": 2.0281106506887667, "grad_norm": 1.1506752967834473, "learning_rate": 9.573605263157895e-05, "loss": 0.3753, "step": 36218 }, { "epoch": 2.0281666480008957, "grad_norm": 1.1675409078598022, "learning_rate": 9.573578947368421e-05, "loss": 0.3075, "step": 36219 }, { "epoch": 2.0282226453130248, "grad_norm": 1.2139896154403687, "learning_rate": 9.573552631578948e-05, "loss": 0.376, "step": 36220 }, { "epoch": 2.028278642625154, "grad_norm": 1.1897326707839966, "learning_rate": 9.573526315789474e-05, "loss": 0.5029, "step": 36221 }, { "epoch": 2.028334639937283, "grad_norm": 1.5655843019485474, "learning_rate": 9.5735e-05, "loss": 0.3787, "step": 36222 }, { "epoch": 2.028390637249412, "grad_norm": 1.1911202669143677, "learning_rate": 9.573473684210526e-05, "loss": 0.3281, "step": 36223 }, { "epoch": 2.028446634561541, "grad_norm": 1.3864094018936157, "learning_rate": 9.573447368421054e-05, "loss": 0.4893, "step": 36224 }, { "epoch": 2.02850263187367, "grad_norm": 1.345996379852295, "learning_rate": 9.57342105263158e-05, "loss": 0.4782, "step": 36225 }, { "epoch": 2.028558629185799, "grad_norm": 1.1793789863586426, "learning_rate": 9.573394736842106e-05, "loss": 0.5286, "step": 36226 }, { "epoch": 2.028614626497928, "grad_norm": 1.0208584070205688, "learning_rate": 9.573368421052632e-05, "loss": 0.4353, "step": 36227 }, { "epoch": 2.028670623810057, "grad_norm": 1.3628425598144531, "learning_rate": 9.573342105263159e-05, "loss": 0.4194, "step": 36228 }, { "epoch": 2.028726621122186, "grad_norm": 1.5805126428604126, "learning_rate": 9.573315789473685e-05, "loss": 0.469, "step": 36229 }, { "epoch": 2.028782618434315, "grad_norm": 0.9696323275566101, "learning_rate": 9.573289473684211e-05, "loss": 0.2908, "step": 36230 }, { "epoch": 2.028838615746444, "grad_norm": 1.1465381383895874, "learning_rate": 9.573263157894737e-05, "loss": 0.3607, "step": 36231 }, { "epoch": 2.028894613058573, "grad_norm": 1.2499195337295532, "learning_rate": 9.573236842105263e-05, "loss": 0.6442, "step": 36232 }, { "epoch": 2.028950610370702, "grad_norm": 1.4390406608581543, "learning_rate": 9.57321052631579e-05, "loss": 0.4236, "step": 36233 }, { "epoch": 2.029006607682831, "grad_norm": 1.2855228185653687, "learning_rate": 9.573184210526316e-05, "loss": 0.4247, "step": 36234 }, { "epoch": 2.02906260499496, "grad_norm": 1.1419548988342285, "learning_rate": 9.573157894736842e-05, "loss": 0.4055, "step": 36235 }, { "epoch": 2.029118602307089, "grad_norm": 0.9791093468666077, "learning_rate": 9.573131578947368e-05, "loss": 0.3513, "step": 36236 }, { "epoch": 2.029174599619218, "grad_norm": 1.3781180381774902, "learning_rate": 9.573105263157895e-05, "loss": 0.4697, "step": 36237 }, { "epoch": 2.029230596931347, "grad_norm": 1.1535961627960205, "learning_rate": 9.573078947368421e-05, "loss": 0.3634, "step": 36238 }, { "epoch": 2.029286594243476, "grad_norm": 3.7169346809387207, "learning_rate": 9.573052631578949e-05, "loss": 0.3134, "step": 36239 }, { "epoch": 2.029342591555605, "grad_norm": 1.3666712045669556, "learning_rate": 9.573026315789473e-05, "loss": 0.3581, "step": 36240 }, { "epoch": 2.029398588867734, "grad_norm": 1.0228607654571533, "learning_rate": 9.573e-05, "loss": 0.3921, "step": 36241 }, { "epoch": 2.029454586179863, "grad_norm": 1.2561143636703491, "learning_rate": 9.572973684210527e-05, "loss": 0.3713, "step": 36242 }, { "epoch": 2.029510583491992, "grad_norm": 1.2982616424560547, "learning_rate": 9.572947368421054e-05, "loss": 0.4248, "step": 36243 }, { "epoch": 2.0295665808041212, "grad_norm": 1.1033931970596313, "learning_rate": 9.57292105263158e-05, "loss": 0.4484, "step": 36244 }, { "epoch": 2.0296225781162502, "grad_norm": 1.2534352540969849, "learning_rate": 9.572894736842106e-05, "loss": 0.3661, "step": 36245 }, { "epoch": 2.0296785754283793, "grad_norm": 1.1837644577026367, "learning_rate": 9.572868421052632e-05, "loss": 0.4713, "step": 36246 }, { "epoch": 2.0297345727405083, "grad_norm": 1.0372940301895142, "learning_rate": 9.572842105263158e-05, "loss": 0.4233, "step": 36247 }, { "epoch": 2.0297905700526373, "grad_norm": 1.0914605855941772, "learning_rate": 9.572815789473685e-05, "loss": 0.4265, "step": 36248 }, { "epoch": 2.0298465673647663, "grad_norm": 0.9688382744789124, "learning_rate": 9.572789473684211e-05, "loss": 0.3353, "step": 36249 }, { "epoch": 2.0299025646768953, "grad_norm": 1.4337506294250488, "learning_rate": 9.572763157894737e-05, "loss": 0.3795, "step": 36250 }, { "epoch": 2.0299585619890244, "grad_norm": 1.0688773393630981, "learning_rate": 9.572736842105263e-05, "loss": 0.3368, "step": 36251 }, { "epoch": 2.0300145593011534, "grad_norm": 1.1917976140975952, "learning_rate": 9.57271052631579e-05, "loss": 0.3923, "step": 36252 }, { "epoch": 2.0300705566132824, "grad_norm": 1.224181056022644, "learning_rate": 9.572684210526316e-05, "loss": 0.3594, "step": 36253 }, { "epoch": 2.0301265539254114, "grad_norm": 1.2333035469055176, "learning_rate": 9.572657894736842e-05, "loss": 0.4228, "step": 36254 }, { "epoch": 2.0301825512375404, "grad_norm": 1.189501404762268, "learning_rate": 9.572631578947368e-05, "loss": 0.496, "step": 36255 }, { "epoch": 2.0302385485496695, "grad_norm": 1.214761734008789, "learning_rate": 9.572605263157896e-05, "loss": 0.4171, "step": 36256 }, { "epoch": 2.0302945458617985, "grad_norm": 1.0537689924240112, "learning_rate": 9.572578947368422e-05, "loss": 0.3346, "step": 36257 }, { "epoch": 2.0303505431739275, "grad_norm": 1.2068097591400146, "learning_rate": 9.572552631578948e-05, "loss": 0.3823, "step": 36258 }, { "epoch": 2.0304065404860565, "grad_norm": 1.2687188386917114, "learning_rate": 9.572526315789474e-05, "loss": 0.446, "step": 36259 }, { "epoch": 2.0304625377981855, "grad_norm": 1.2734394073486328, "learning_rate": 9.572500000000001e-05, "loss": 0.3045, "step": 36260 }, { "epoch": 2.0305185351103145, "grad_norm": 1.3959215879440308, "learning_rate": 9.572473684210527e-05, "loss": 0.3452, "step": 36261 }, { "epoch": 2.0305745324224436, "grad_norm": 1.2616201639175415, "learning_rate": 9.572447368421054e-05, "loss": 0.3113, "step": 36262 }, { "epoch": 2.0306305297345726, "grad_norm": 1.3425836563110352, "learning_rate": 9.572421052631579e-05, "loss": 0.3892, "step": 36263 }, { "epoch": 2.0306865270467016, "grad_norm": 0.9696201086044312, "learning_rate": 9.572394736842105e-05, "loss": 0.3914, "step": 36264 }, { "epoch": 2.0307425243588306, "grad_norm": 1.2023886442184448, "learning_rate": 9.572368421052632e-05, "loss": 0.3857, "step": 36265 }, { "epoch": 2.0307985216709596, "grad_norm": 1.0438746213912964, "learning_rate": 9.572342105263158e-05, "loss": 0.4836, "step": 36266 }, { "epoch": 2.0308545189830887, "grad_norm": 1.060734510421753, "learning_rate": 9.572315789473685e-05, "loss": 0.3443, "step": 36267 }, { "epoch": 2.0309105162952177, "grad_norm": 1.1171808242797852, "learning_rate": 9.57228947368421e-05, "loss": 0.5194, "step": 36268 }, { "epoch": 2.0309665136073467, "grad_norm": 1.2432234287261963, "learning_rate": 9.572263157894737e-05, "loss": 0.3651, "step": 36269 }, { "epoch": 2.0310225109194757, "grad_norm": 1.3481745719909668, "learning_rate": 9.572236842105263e-05, "loss": 0.4087, "step": 36270 }, { "epoch": 2.0310785082316047, "grad_norm": 1.2913042306900024, "learning_rate": 9.572210526315791e-05, "loss": 0.3931, "step": 36271 }, { "epoch": 2.0311345055437338, "grad_norm": 1.2349194288253784, "learning_rate": 9.572184210526315e-05, "loss": 0.4094, "step": 36272 }, { "epoch": 2.031190502855863, "grad_norm": 1.427011489868164, "learning_rate": 9.572157894736843e-05, "loss": 0.5283, "step": 36273 }, { "epoch": 2.031246500167992, "grad_norm": 1.7662633657455444, "learning_rate": 9.572131578947369e-05, "loss": 0.5626, "step": 36274 }, { "epoch": 2.031302497480121, "grad_norm": 1.3289101123809814, "learning_rate": 9.572105263157896e-05, "loss": 0.459, "step": 36275 }, { "epoch": 2.03135849479225, "grad_norm": 1.0823477506637573, "learning_rate": 9.572078947368422e-05, "loss": 0.3981, "step": 36276 }, { "epoch": 2.031414492104379, "grad_norm": 1.23638916015625, "learning_rate": 9.572052631578948e-05, "loss": 0.4047, "step": 36277 }, { "epoch": 2.031470489416508, "grad_norm": 1.3539667129516602, "learning_rate": 9.572026315789474e-05, "loss": 0.4088, "step": 36278 }, { "epoch": 2.031526486728637, "grad_norm": 1.067913293838501, "learning_rate": 9.572000000000001e-05, "loss": 0.4061, "step": 36279 }, { "epoch": 2.031582484040766, "grad_norm": 1.0562572479248047, "learning_rate": 9.571973684210527e-05, "loss": 0.4266, "step": 36280 }, { "epoch": 2.031638481352895, "grad_norm": 1.4439635276794434, "learning_rate": 9.571947368421053e-05, "loss": 0.3692, "step": 36281 }, { "epoch": 2.031694478665024, "grad_norm": 1.1870864629745483, "learning_rate": 9.571921052631579e-05, "loss": 0.4067, "step": 36282 }, { "epoch": 2.031750475977153, "grad_norm": 1.0863558053970337, "learning_rate": 9.571894736842105e-05, "loss": 0.3113, "step": 36283 }, { "epoch": 2.031806473289282, "grad_norm": 1.1814287900924683, "learning_rate": 9.571868421052632e-05, "loss": 0.4776, "step": 36284 }, { "epoch": 2.031862470601411, "grad_norm": 1.1033085584640503, "learning_rate": 9.571842105263158e-05, "loss": 0.36, "step": 36285 }, { "epoch": 2.03191846791354, "grad_norm": 1.1536273956298828, "learning_rate": 9.571815789473684e-05, "loss": 0.4738, "step": 36286 }, { "epoch": 2.031974465225669, "grad_norm": 1.1841963529586792, "learning_rate": 9.57178947368421e-05, "loss": 0.4056, "step": 36287 }, { "epoch": 2.032030462537798, "grad_norm": 0.9759486317634583, "learning_rate": 9.571763157894738e-05, "loss": 0.3167, "step": 36288 }, { "epoch": 2.032086459849927, "grad_norm": 1.9350852966308594, "learning_rate": 9.571736842105264e-05, "loss": 0.6437, "step": 36289 }, { "epoch": 2.032142457162056, "grad_norm": 1.4433387517929077, "learning_rate": 9.57171052631579e-05, "loss": 0.4003, "step": 36290 }, { "epoch": 2.032198454474185, "grad_norm": 1.247506856918335, "learning_rate": 9.571684210526316e-05, "loss": 0.3645, "step": 36291 }, { "epoch": 2.032254451786314, "grad_norm": 1.0808452367782593, "learning_rate": 9.571657894736843e-05, "loss": 0.3193, "step": 36292 }, { "epoch": 2.032310449098443, "grad_norm": 1.2305854558944702, "learning_rate": 9.571631578947369e-05, "loss": 0.348, "step": 36293 }, { "epoch": 2.032366446410572, "grad_norm": 1.2643438577651978, "learning_rate": 9.571605263157896e-05, "loss": 0.5225, "step": 36294 }, { "epoch": 2.032422443722701, "grad_norm": 1.1478748321533203, "learning_rate": 9.571578947368421e-05, "loss": 0.3745, "step": 36295 }, { "epoch": 2.03247844103483, "grad_norm": 1.0920768976211548, "learning_rate": 9.571552631578948e-05, "loss": 0.3905, "step": 36296 }, { "epoch": 2.0325344383469592, "grad_norm": 1.1317269802093506, "learning_rate": 9.571526315789474e-05, "loss": 0.458, "step": 36297 }, { "epoch": 2.0325904356590883, "grad_norm": 0.9771817922592163, "learning_rate": 9.5715e-05, "loss": 0.3913, "step": 36298 }, { "epoch": 2.0326464329712173, "grad_norm": 1.0951465368270874, "learning_rate": 9.571473684210527e-05, "loss": 0.3489, "step": 36299 }, { "epoch": 2.0327024302833463, "grad_norm": 1.08945631980896, "learning_rate": 9.571447368421052e-05, "loss": 0.4503, "step": 36300 }, { "epoch": 2.0327584275954753, "grad_norm": 1.3406882286071777, "learning_rate": 9.57142105263158e-05, "loss": 0.3598, "step": 36301 }, { "epoch": 2.0328144249076043, "grad_norm": 1.3288211822509766, "learning_rate": 9.571394736842105e-05, "loss": 0.3867, "step": 36302 }, { "epoch": 2.0328704222197334, "grad_norm": 1.317568302154541, "learning_rate": 9.571368421052633e-05, "loss": 0.4894, "step": 36303 }, { "epoch": 2.0329264195318624, "grad_norm": 1.071881651878357, "learning_rate": 9.571342105263159e-05, "loss": 0.4839, "step": 36304 }, { "epoch": 2.0329824168439914, "grad_norm": 1.4124839305877686, "learning_rate": 9.571315789473685e-05, "loss": 0.4885, "step": 36305 }, { "epoch": 2.0330384141561204, "grad_norm": 1.169803261756897, "learning_rate": 9.57128947368421e-05, "loss": 0.4676, "step": 36306 }, { "epoch": 2.0330944114682494, "grad_norm": 1.0720146894454956, "learning_rate": 9.571263157894738e-05, "loss": 0.2855, "step": 36307 }, { "epoch": 2.0331504087803784, "grad_norm": 1.2608129978179932, "learning_rate": 9.571236842105264e-05, "loss": 0.3826, "step": 36308 }, { "epoch": 2.0332064060925075, "grad_norm": 1.0856701135635376, "learning_rate": 9.57121052631579e-05, "loss": 0.322, "step": 36309 }, { "epoch": 2.0332624034046365, "grad_norm": 1.2446951866149902, "learning_rate": 9.571184210526316e-05, "loss": 0.317, "step": 36310 }, { "epoch": 2.0333184007167655, "grad_norm": 1.3527703285217285, "learning_rate": 9.571157894736843e-05, "loss": 0.3809, "step": 36311 }, { "epoch": 2.0333743980288945, "grad_norm": 1.168303370475769, "learning_rate": 9.571131578947369e-05, "loss": 0.3102, "step": 36312 }, { "epoch": 2.0334303953410235, "grad_norm": 1.1110092401504517, "learning_rate": 9.571105263157895e-05, "loss": 0.3578, "step": 36313 }, { "epoch": 2.0334863926531526, "grad_norm": 1.3893972635269165, "learning_rate": 9.571078947368421e-05, "loss": 0.3737, "step": 36314 }, { "epoch": 2.0335423899652816, "grad_norm": 1.1075295209884644, "learning_rate": 9.571052631578948e-05, "loss": 0.3095, "step": 36315 }, { "epoch": 2.0335983872774106, "grad_norm": 1.4017988443374634, "learning_rate": 9.571026315789474e-05, "loss": 0.4303, "step": 36316 }, { "epoch": 2.0336543845895396, "grad_norm": 1.1054296493530273, "learning_rate": 9.571e-05, "loss": 0.4027, "step": 36317 }, { "epoch": 2.0337103819016686, "grad_norm": 1.3854578733444214, "learning_rate": 9.570973684210526e-05, "loss": 0.4309, "step": 36318 }, { "epoch": 2.0337663792137977, "grad_norm": 1.179198980331421, "learning_rate": 9.570947368421052e-05, "loss": 0.3861, "step": 36319 }, { "epoch": 2.0338223765259267, "grad_norm": 1.0702497959136963, "learning_rate": 9.57092105263158e-05, "loss": 0.4061, "step": 36320 }, { "epoch": 2.0338783738380557, "grad_norm": 1.2873876094818115, "learning_rate": 9.570894736842106e-05, "loss": 0.3976, "step": 36321 }, { "epoch": 2.0339343711501847, "grad_norm": 1.2305032014846802, "learning_rate": 9.570868421052633e-05, "loss": 0.427, "step": 36322 }, { "epoch": 2.0339903684623137, "grad_norm": 1.0279797315597534, "learning_rate": 9.570842105263158e-05, "loss": 0.2978, "step": 36323 }, { "epoch": 2.0340463657744428, "grad_norm": 1.109320878982544, "learning_rate": 9.570815789473685e-05, "loss": 0.417, "step": 36324 }, { "epoch": 2.0341023630865718, "grad_norm": 1.2711796760559082, "learning_rate": 9.570789473684211e-05, "loss": 0.5205, "step": 36325 }, { "epoch": 2.034158360398701, "grad_norm": 1.1478742361068726, "learning_rate": 9.570763157894738e-05, "loss": 0.3033, "step": 36326 }, { "epoch": 2.03421435771083, "grad_norm": 1.1631360054016113, "learning_rate": 9.570736842105263e-05, "loss": 0.3176, "step": 36327 }, { "epoch": 2.034270355022959, "grad_norm": 1.0805273056030273, "learning_rate": 9.57071052631579e-05, "loss": 0.4554, "step": 36328 }, { "epoch": 2.034326352335088, "grad_norm": 1.1540974378585815, "learning_rate": 9.570684210526316e-05, "loss": 0.3262, "step": 36329 }, { "epoch": 2.034382349647217, "grad_norm": 1.108595609664917, "learning_rate": 9.570657894736843e-05, "loss": 0.3023, "step": 36330 }, { "epoch": 2.034438346959346, "grad_norm": 1.475795865058899, "learning_rate": 9.57063157894737e-05, "loss": 0.383, "step": 36331 }, { "epoch": 2.034494344271475, "grad_norm": 1.4235014915466309, "learning_rate": 9.570605263157895e-05, "loss": 0.4574, "step": 36332 }, { "epoch": 2.034550341583604, "grad_norm": 1.1427555084228516, "learning_rate": 9.570578947368421e-05, "loss": 0.3371, "step": 36333 }, { "epoch": 2.034606338895733, "grad_norm": 1.497515082359314, "learning_rate": 9.570552631578947e-05, "loss": 0.5595, "step": 36334 }, { "epoch": 2.034662336207862, "grad_norm": 1.7947185039520264, "learning_rate": 9.570526315789475e-05, "loss": 0.4902, "step": 36335 }, { "epoch": 2.034718333519991, "grad_norm": 1.197310447692871, "learning_rate": 9.5705e-05, "loss": 0.3562, "step": 36336 }, { "epoch": 2.03477433083212, "grad_norm": 1.3201531171798706, "learning_rate": 9.570473684210527e-05, "loss": 0.358, "step": 36337 }, { "epoch": 2.034830328144249, "grad_norm": 1.0975604057312012, "learning_rate": 9.570447368421053e-05, "loss": 0.4022, "step": 36338 }, { "epoch": 2.034886325456378, "grad_norm": 1.3169515132904053, "learning_rate": 9.57042105263158e-05, "loss": 0.4562, "step": 36339 }, { "epoch": 2.034942322768507, "grad_norm": 1.1284713745117188, "learning_rate": 9.570394736842106e-05, "loss": 0.3666, "step": 36340 }, { "epoch": 2.034998320080636, "grad_norm": 1.9148318767547607, "learning_rate": 9.570368421052632e-05, "loss": 0.5468, "step": 36341 }, { "epoch": 2.035054317392765, "grad_norm": 1.2590042352676392, "learning_rate": 9.570342105263158e-05, "loss": 0.3115, "step": 36342 }, { "epoch": 2.035110314704894, "grad_norm": 1.3185571432113647, "learning_rate": 9.570315789473685e-05, "loss": 0.45, "step": 36343 }, { "epoch": 2.035166312017023, "grad_norm": 1.2616349458694458, "learning_rate": 9.570289473684211e-05, "loss": 0.4616, "step": 36344 }, { "epoch": 2.035222309329152, "grad_norm": 1.0101739168167114, "learning_rate": 9.570263157894737e-05, "loss": 0.3045, "step": 36345 }, { "epoch": 2.035278306641281, "grad_norm": 1.2091948986053467, "learning_rate": 9.570236842105263e-05, "loss": 0.4284, "step": 36346 }, { "epoch": 2.03533430395341, "grad_norm": 1.174495816230774, "learning_rate": 9.57021052631579e-05, "loss": 0.4017, "step": 36347 }, { "epoch": 2.035390301265539, "grad_norm": 1.139573335647583, "learning_rate": 9.570184210526316e-05, "loss": 0.5467, "step": 36348 }, { "epoch": 2.0354462985776682, "grad_norm": 1.0842411518096924, "learning_rate": 9.570157894736844e-05, "loss": 0.4468, "step": 36349 }, { "epoch": 2.0355022958897973, "grad_norm": 0.9883029460906982, "learning_rate": 9.570131578947368e-05, "loss": 0.2937, "step": 36350 }, { "epoch": 2.0355582932019263, "grad_norm": 1.8533591032028198, "learning_rate": 9.570105263157894e-05, "loss": 0.4119, "step": 36351 }, { "epoch": 2.0356142905140553, "grad_norm": 1.1652733087539673, "learning_rate": 9.570078947368422e-05, "loss": 0.3923, "step": 36352 }, { "epoch": 2.0356702878261843, "grad_norm": 1.3475103378295898, "learning_rate": 9.570052631578948e-05, "loss": 0.4403, "step": 36353 }, { "epoch": 2.0357262851383133, "grad_norm": 1.1191695928573608, "learning_rate": 9.570026315789475e-05, "loss": 0.4404, "step": 36354 }, { "epoch": 2.0357822824504423, "grad_norm": 1.1079308986663818, "learning_rate": 9.57e-05, "loss": 0.2964, "step": 36355 }, { "epoch": 2.0358382797625714, "grad_norm": 1.2974822521209717, "learning_rate": 9.569973684210527e-05, "loss": 0.4669, "step": 36356 }, { "epoch": 2.0358942770747004, "grad_norm": 1.1397373676300049, "learning_rate": 9.569947368421053e-05, "loss": 0.4338, "step": 36357 }, { "epoch": 2.0359502743868294, "grad_norm": 1.115784764289856, "learning_rate": 9.56992105263158e-05, "loss": 0.37, "step": 36358 }, { "epoch": 2.0360062716989584, "grad_norm": 1.3924771547317505, "learning_rate": 9.569894736842106e-05, "loss": 0.5537, "step": 36359 }, { "epoch": 2.0360622690110874, "grad_norm": 1.061295986175537, "learning_rate": 9.569868421052632e-05, "loss": 0.3646, "step": 36360 }, { "epoch": 2.0361182663232165, "grad_norm": 1.1816364526748657, "learning_rate": 9.569842105263158e-05, "loss": 0.4101, "step": 36361 }, { "epoch": 2.0361742636353455, "grad_norm": 1.3380966186523438, "learning_rate": 9.569815789473685e-05, "loss": 0.4725, "step": 36362 }, { "epoch": 2.0362302609474745, "grad_norm": 1.329868197441101, "learning_rate": 9.569789473684211e-05, "loss": 0.4685, "step": 36363 }, { "epoch": 2.0362862582596035, "grad_norm": 2.372368097305298, "learning_rate": 9.569763157894737e-05, "loss": 0.355, "step": 36364 }, { "epoch": 2.0363422555717325, "grad_norm": 1.1149588823318481, "learning_rate": 9.569736842105263e-05, "loss": 0.397, "step": 36365 }, { "epoch": 2.0363982528838616, "grad_norm": 1.4281861782073975, "learning_rate": 9.56971052631579e-05, "loss": 0.4194, "step": 36366 }, { "epoch": 2.0364542501959906, "grad_norm": 1.0964142084121704, "learning_rate": 9.569684210526317e-05, "loss": 0.464, "step": 36367 }, { "epoch": 2.0365102475081196, "grad_norm": 1.3055789470672607, "learning_rate": 9.569657894736843e-05, "loss": 0.4803, "step": 36368 }, { "epoch": 2.0365662448202486, "grad_norm": 1.1180106401443481, "learning_rate": 9.569631578947369e-05, "loss": 0.3562, "step": 36369 }, { "epoch": 2.0366222421323776, "grad_norm": 1.1111477613449097, "learning_rate": 9.569605263157895e-05, "loss": 0.3256, "step": 36370 }, { "epoch": 2.0366782394445067, "grad_norm": 1.2732056379318237, "learning_rate": 9.569578947368422e-05, "loss": 0.5059, "step": 36371 }, { "epoch": 2.0367342367566357, "grad_norm": 0.9448058009147644, "learning_rate": 9.569552631578948e-05, "loss": 0.3496, "step": 36372 }, { "epoch": 2.0367902340687647, "grad_norm": 1.2061527967453003, "learning_rate": 9.569526315789474e-05, "loss": 0.4017, "step": 36373 }, { "epoch": 2.0368462313808937, "grad_norm": 1.293936014175415, "learning_rate": 9.5695e-05, "loss": 0.3569, "step": 36374 }, { "epoch": 2.0369022286930227, "grad_norm": 1.329754114151001, "learning_rate": 9.569473684210527e-05, "loss": 0.4799, "step": 36375 }, { "epoch": 2.0369582260051518, "grad_norm": 1.2398200035095215, "learning_rate": 9.569447368421053e-05, "loss": 0.4471, "step": 36376 }, { "epoch": 2.0370142233172808, "grad_norm": 1.3133492469787598, "learning_rate": 9.56942105263158e-05, "loss": 0.3627, "step": 36377 }, { "epoch": 2.03707022062941, "grad_norm": 1.4421110153198242, "learning_rate": 9.569394736842105e-05, "loss": 0.5083, "step": 36378 }, { "epoch": 2.037126217941539, "grad_norm": 15.928050994873047, "learning_rate": 9.569368421052632e-05, "loss": 0.3977, "step": 36379 }, { "epoch": 2.037182215253668, "grad_norm": 1.1906312704086304, "learning_rate": 9.569342105263158e-05, "loss": 0.3724, "step": 36380 }, { "epoch": 2.037238212565797, "grad_norm": 1.338821291923523, "learning_rate": 9.569315789473686e-05, "loss": 0.3934, "step": 36381 }, { "epoch": 2.037294209877926, "grad_norm": 1.6535534858703613, "learning_rate": 9.56928947368421e-05, "loss": 0.4688, "step": 36382 }, { "epoch": 2.037350207190055, "grad_norm": 1.2338567972183228, "learning_rate": 9.569263157894738e-05, "loss": 0.363, "step": 36383 }, { "epoch": 2.037406204502184, "grad_norm": 1.1587836742401123, "learning_rate": 9.569236842105264e-05, "loss": 0.3548, "step": 36384 }, { "epoch": 2.037462201814313, "grad_norm": 1.517890214920044, "learning_rate": 9.56921052631579e-05, "loss": 0.5015, "step": 36385 }, { "epoch": 2.037518199126442, "grad_norm": 1.1395021677017212, "learning_rate": 9.569184210526317e-05, "loss": 0.4391, "step": 36386 }, { "epoch": 2.037574196438571, "grad_norm": 2.258161783218384, "learning_rate": 9.569157894736841e-05, "loss": 0.4614, "step": 36387 }, { "epoch": 2.0376301937507, "grad_norm": 1.120851755142212, "learning_rate": 9.569131578947369e-05, "loss": 0.3919, "step": 36388 }, { "epoch": 2.037686191062829, "grad_norm": 1.4719966650009155, "learning_rate": 9.569105263157895e-05, "loss": 0.4211, "step": 36389 }, { "epoch": 2.037742188374958, "grad_norm": 1.056656837463379, "learning_rate": 9.569078947368422e-05, "loss": 0.3292, "step": 36390 }, { "epoch": 2.037798185687087, "grad_norm": 1.1554722785949707, "learning_rate": 9.569052631578948e-05, "loss": 0.3393, "step": 36391 }, { "epoch": 2.037854182999216, "grad_norm": 1.3846722841262817, "learning_rate": 9.569026315789474e-05, "loss": 0.4035, "step": 36392 }, { "epoch": 2.037910180311345, "grad_norm": 1.2527151107788086, "learning_rate": 9.569e-05, "loss": 0.3506, "step": 36393 }, { "epoch": 2.037966177623474, "grad_norm": 1.1692259311676025, "learning_rate": 9.568973684210527e-05, "loss": 0.3963, "step": 36394 }, { "epoch": 2.038022174935603, "grad_norm": 1.0423853397369385, "learning_rate": 9.568947368421053e-05, "loss": 0.3444, "step": 36395 }, { "epoch": 2.038078172247732, "grad_norm": 1.27071213722229, "learning_rate": 9.568921052631579e-05, "loss": 0.3218, "step": 36396 }, { "epoch": 2.038134169559861, "grad_norm": 1.1114684343338013, "learning_rate": 9.568894736842105e-05, "loss": 0.3699, "step": 36397 }, { "epoch": 2.03819016687199, "grad_norm": 1.2327271699905396, "learning_rate": 9.568868421052633e-05, "loss": 0.3797, "step": 36398 }, { "epoch": 2.038246164184119, "grad_norm": 1.254450798034668, "learning_rate": 9.568842105263159e-05, "loss": 0.5084, "step": 36399 }, { "epoch": 2.038302161496248, "grad_norm": 1.3284070491790771, "learning_rate": 9.568815789473685e-05, "loss": 0.3726, "step": 36400 }, { "epoch": 2.0383581588083772, "grad_norm": 1.1129344701766968, "learning_rate": 9.56878947368421e-05, "loss": 0.4085, "step": 36401 }, { "epoch": 2.0384141561205062, "grad_norm": 1.1655981540679932, "learning_rate": 9.568763157894736e-05, "loss": 0.3665, "step": 36402 }, { "epoch": 2.0384701534326353, "grad_norm": 1.2138686180114746, "learning_rate": 9.568736842105264e-05, "loss": 0.421, "step": 36403 }, { "epoch": 2.0385261507447643, "grad_norm": 1.2618060111999512, "learning_rate": 9.56871052631579e-05, "loss": 0.5025, "step": 36404 }, { "epoch": 2.0385821480568933, "grad_norm": 1.064089298248291, "learning_rate": 9.568684210526316e-05, "loss": 0.4081, "step": 36405 }, { "epoch": 2.0386381453690223, "grad_norm": 1.2715030908584595, "learning_rate": 9.568657894736842e-05, "loss": 0.4756, "step": 36406 }, { "epoch": 2.0386941426811513, "grad_norm": 1.3624147176742554, "learning_rate": 9.568631578947369e-05, "loss": 0.4342, "step": 36407 }, { "epoch": 2.0387501399932804, "grad_norm": 1.3513011932373047, "learning_rate": 9.568605263157895e-05, "loss": 0.3672, "step": 36408 }, { "epoch": 2.0388061373054094, "grad_norm": 1.0731122493743896, "learning_rate": 9.568578947368422e-05, "loss": 0.3693, "step": 36409 }, { "epoch": 2.0388621346175384, "grad_norm": 1.1919761896133423, "learning_rate": 9.568552631578947e-05, "loss": 0.3531, "step": 36410 }, { "epoch": 2.0389181319296674, "grad_norm": 1.0832617282867432, "learning_rate": 9.568526315789474e-05, "loss": 0.3769, "step": 36411 }, { "epoch": 2.0389741292417964, "grad_norm": 1.413251280784607, "learning_rate": 9.5685e-05, "loss": 0.5257, "step": 36412 }, { "epoch": 2.0390301265539255, "grad_norm": 1.0677169561386108, "learning_rate": 9.568473684210528e-05, "loss": 0.4086, "step": 36413 }, { "epoch": 2.0390861238660545, "grad_norm": 1.272536039352417, "learning_rate": 9.568447368421054e-05, "loss": 0.3315, "step": 36414 }, { "epoch": 2.0391421211781835, "grad_norm": 1.1808593273162842, "learning_rate": 9.56842105263158e-05, "loss": 0.3294, "step": 36415 }, { "epoch": 2.0391981184903125, "grad_norm": 1.25516939163208, "learning_rate": 9.568394736842106e-05, "loss": 0.3966, "step": 36416 }, { "epoch": 2.0392541158024415, "grad_norm": 1.088417410850525, "learning_rate": 9.568368421052633e-05, "loss": 0.2943, "step": 36417 }, { "epoch": 2.0393101131145706, "grad_norm": 1.3179112672805786, "learning_rate": 9.568342105263159e-05, "loss": 0.4835, "step": 36418 }, { "epoch": 2.0393661104266996, "grad_norm": 1.8994653224945068, "learning_rate": 9.568315789473683e-05, "loss": 0.546, "step": 36419 }, { "epoch": 2.0394221077388286, "grad_norm": 1.1465600728988647, "learning_rate": 9.568289473684211e-05, "loss": 0.3404, "step": 36420 }, { "epoch": 2.0394781050509576, "grad_norm": 1.4661141633987427, "learning_rate": 9.568263157894737e-05, "loss": 0.4156, "step": 36421 }, { "epoch": 2.0395341023630866, "grad_norm": 1.3896393775939941, "learning_rate": 9.568236842105264e-05, "loss": 0.4228, "step": 36422 }, { "epoch": 2.0395900996752157, "grad_norm": 1.2230881452560425, "learning_rate": 9.56821052631579e-05, "loss": 0.3906, "step": 36423 }, { "epoch": 2.0396460969873447, "grad_norm": 1.1439485549926758, "learning_rate": 9.568184210526316e-05, "loss": 0.3726, "step": 36424 }, { "epoch": 2.0397020942994737, "grad_norm": 1.0547151565551758, "learning_rate": 9.568157894736842e-05, "loss": 0.3595, "step": 36425 }, { "epoch": 2.0397580916116027, "grad_norm": 1.3836079835891724, "learning_rate": 9.56813157894737e-05, "loss": 0.5307, "step": 36426 }, { "epoch": 2.0398140889237317, "grad_norm": 1.029758095741272, "learning_rate": 9.568105263157895e-05, "loss": 0.377, "step": 36427 }, { "epoch": 2.0398700862358607, "grad_norm": 0.9970435500144958, "learning_rate": 9.568078947368421e-05, "loss": 0.3412, "step": 36428 }, { "epoch": 2.0399260835479898, "grad_norm": 1.1629329919815063, "learning_rate": 9.568052631578947e-05, "loss": 0.3815, "step": 36429 }, { "epoch": 2.039982080860119, "grad_norm": 1.4850358963012695, "learning_rate": 9.568026315789475e-05, "loss": 0.4581, "step": 36430 }, { "epoch": 2.040038078172248, "grad_norm": 1.164542317390442, "learning_rate": 9.568e-05, "loss": 0.3491, "step": 36431 }, { "epoch": 2.040094075484377, "grad_norm": 1.3895856142044067, "learning_rate": 9.567973684210527e-05, "loss": 0.3756, "step": 36432 }, { "epoch": 2.040150072796506, "grad_norm": 1.2311501502990723, "learning_rate": 9.567947368421052e-05, "loss": 0.5004, "step": 36433 }, { "epoch": 2.040206070108635, "grad_norm": 1.1164169311523438, "learning_rate": 9.56792105263158e-05, "loss": 0.311, "step": 36434 }, { "epoch": 2.040262067420764, "grad_norm": 1.1234296560287476, "learning_rate": 9.567894736842106e-05, "loss": 0.3489, "step": 36435 }, { "epoch": 2.040318064732893, "grad_norm": 1.1429530382156372, "learning_rate": 9.567868421052633e-05, "loss": 0.4242, "step": 36436 }, { "epoch": 2.040374062045022, "grad_norm": 1.1247172355651855, "learning_rate": 9.567842105263158e-05, "loss": 0.4346, "step": 36437 }, { "epoch": 2.040430059357151, "grad_norm": 1.36590576171875, "learning_rate": 9.567815789473684e-05, "loss": 0.4237, "step": 36438 }, { "epoch": 2.04048605666928, "grad_norm": 1.1380928754806519, "learning_rate": 9.567789473684211e-05, "loss": 0.4228, "step": 36439 }, { "epoch": 2.040542053981409, "grad_norm": 1.4909753799438477, "learning_rate": 9.567763157894737e-05, "loss": 0.4079, "step": 36440 }, { "epoch": 2.040598051293538, "grad_norm": 1.1781961917877197, "learning_rate": 9.567736842105264e-05, "loss": 0.3907, "step": 36441 }, { "epoch": 2.040654048605667, "grad_norm": 1.4378325939178467, "learning_rate": 9.567710526315789e-05, "loss": 0.4051, "step": 36442 }, { "epoch": 2.040710045917796, "grad_norm": 1.249224305152893, "learning_rate": 9.567684210526316e-05, "loss": 0.4587, "step": 36443 }, { "epoch": 2.040766043229925, "grad_norm": 1.0237922668457031, "learning_rate": 9.567657894736842e-05, "loss": 0.3823, "step": 36444 }, { "epoch": 2.040822040542054, "grad_norm": 0.989255428314209, "learning_rate": 9.56763157894737e-05, "loss": 0.3499, "step": 36445 }, { "epoch": 2.040878037854183, "grad_norm": 1.6551977396011353, "learning_rate": 9.567605263157896e-05, "loss": 0.3919, "step": 36446 }, { "epoch": 2.040934035166312, "grad_norm": 1.178074836730957, "learning_rate": 9.567578947368422e-05, "loss": 0.3257, "step": 36447 }, { "epoch": 2.040990032478441, "grad_norm": 1.409286618232727, "learning_rate": 9.567552631578948e-05, "loss": 0.5432, "step": 36448 }, { "epoch": 2.04104602979057, "grad_norm": 1.3017104864120483, "learning_rate": 9.567526315789475e-05, "loss": 0.3588, "step": 36449 }, { "epoch": 2.041102027102699, "grad_norm": 1.182559847831726, "learning_rate": 9.567500000000001e-05, "loss": 0.3466, "step": 36450 }, { "epoch": 2.041158024414828, "grad_norm": 1.0540881156921387, "learning_rate": 9.567473684210527e-05, "loss": 0.377, "step": 36451 }, { "epoch": 2.041214021726957, "grad_norm": 1.5845394134521484, "learning_rate": 9.567447368421053e-05, "loss": 0.3711, "step": 36452 }, { "epoch": 2.0412700190390862, "grad_norm": 1.032497763633728, "learning_rate": 9.56742105263158e-05, "loss": 0.3749, "step": 36453 }, { "epoch": 2.0413260163512152, "grad_norm": 1.2808810472488403, "learning_rate": 9.567394736842106e-05, "loss": 0.388, "step": 36454 }, { "epoch": 2.0413820136633443, "grad_norm": 1.5016989707946777, "learning_rate": 9.567368421052632e-05, "loss": 0.3864, "step": 36455 }, { "epoch": 2.0414380109754733, "grad_norm": 1.0901206731796265, "learning_rate": 9.567342105263158e-05, "loss": 0.3969, "step": 36456 }, { "epoch": 2.0414940082876023, "grad_norm": 1.341513752937317, "learning_rate": 9.567315789473684e-05, "loss": 0.4181, "step": 36457 }, { "epoch": 2.0415500055997313, "grad_norm": 1.3783624172210693, "learning_rate": 9.567289473684211e-05, "loss": 0.5157, "step": 36458 }, { "epoch": 2.0416060029118603, "grad_norm": 1.3884556293487549, "learning_rate": 9.567263157894737e-05, "loss": 0.4305, "step": 36459 }, { "epoch": 2.0416620002239894, "grad_norm": 1.177894949913025, "learning_rate": 9.567236842105263e-05, "loss": 0.5656, "step": 36460 }, { "epoch": 2.0417179975361184, "grad_norm": 1.1024378538131714, "learning_rate": 9.567210526315789e-05, "loss": 0.4329, "step": 36461 }, { "epoch": 2.0417739948482474, "grad_norm": 1.2017886638641357, "learning_rate": 9.567184210526317e-05, "loss": 0.3496, "step": 36462 }, { "epoch": 2.0418299921603764, "grad_norm": 1.2188318967819214, "learning_rate": 9.567157894736843e-05, "loss": 0.376, "step": 36463 }, { "epoch": 2.0418859894725054, "grad_norm": 1.315497875213623, "learning_rate": 9.56713157894737e-05, "loss": 0.4797, "step": 36464 }, { "epoch": 2.0419419867846345, "grad_norm": 1.1133599281311035, "learning_rate": 9.567105263157894e-05, "loss": 0.374, "step": 36465 }, { "epoch": 2.0419979840967635, "grad_norm": 1.440004587173462, "learning_rate": 9.567078947368422e-05, "loss": 0.4988, "step": 36466 }, { "epoch": 2.0420539814088925, "grad_norm": 1.364035725593567, "learning_rate": 9.567052631578948e-05, "loss": 0.5888, "step": 36467 }, { "epoch": 2.0421099787210215, "grad_norm": 1.1423653364181519, "learning_rate": 9.567026315789475e-05, "loss": 0.3866, "step": 36468 }, { "epoch": 2.0421659760331505, "grad_norm": 1.2399284839630127, "learning_rate": 9.567000000000001e-05, "loss": 0.4427, "step": 36469 }, { "epoch": 2.0422219733452796, "grad_norm": 2.5457022190093994, "learning_rate": 9.566973684210527e-05, "loss": 0.3276, "step": 36470 }, { "epoch": 2.0422779706574086, "grad_norm": 1.1251628398895264, "learning_rate": 9.566947368421053e-05, "loss": 0.3402, "step": 36471 }, { "epoch": 2.0423339679695376, "grad_norm": 1.359660029411316, "learning_rate": 9.566921052631579e-05, "loss": 0.4478, "step": 36472 }, { "epoch": 2.0423899652816666, "grad_norm": 1.1996190547943115, "learning_rate": 9.566894736842106e-05, "loss": 0.3956, "step": 36473 }, { "epoch": 2.0424459625937956, "grad_norm": 1.0779095888137817, "learning_rate": 9.566868421052631e-05, "loss": 0.4106, "step": 36474 }, { "epoch": 2.0425019599059246, "grad_norm": 1.1836365461349487, "learning_rate": 9.566842105263158e-05, "loss": 0.4242, "step": 36475 }, { "epoch": 2.0425579572180537, "grad_norm": 1.069923758506775, "learning_rate": 9.566815789473684e-05, "loss": 0.3652, "step": 36476 }, { "epoch": 2.0426139545301827, "grad_norm": 1.194624423980713, "learning_rate": 9.566789473684212e-05, "loss": 0.4332, "step": 36477 }, { "epoch": 2.0426699518423117, "grad_norm": 1.1308766603469849, "learning_rate": 9.566763157894738e-05, "loss": 0.3135, "step": 36478 }, { "epoch": 2.0427259491544407, "grad_norm": 1.3823795318603516, "learning_rate": 9.566736842105264e-05, "loss": 0.2934, "step": 36479 }, { "epoch": 2.0427819464665697, "grad_norm": 1.156907081604004, "learning_rate": 9.56671052631579e-05, "loss": 0.3417, "step": 36480 }, { "epoch": 2.0428379437786988, "grad_norm": 2.377918243408203, "learning_rate": 9.566684210526317e-05, "loss": 0.4117, "step": 36481 }, { "epoch": 2.042893941090828, "grad_norm": 1.466359257698059, "learning_rate": 9.566657894736843e-05, "loss": 0.402, "step": 36482 }, { "epoch": 2.042949938402957, "grad_norm": 1.3489794731140137, "learning_rate": 9.566631578947369e-05, "loss": 0.4012, "step": 36483 }, { "epoch": 2.043005935715086, "grad_norm": 1.6134717464447021, "learning_rate": 9.566605263157895e-05, "loss": 0.4087, "step": 36484 }, { "epoch": 2.043061933027215, "grad_norm": 1.2730505466461182, "learning_rate": 9.566578947368422e-05, "loss": 0.3468, "step": 36485 }, { "epoch": 2.043117930339344, "grad_norm": 1.3391703367233276, "learning_rate": 9.566552631578948e-05, "loss": 0.5541, "step": 36486 }, { "epoch": 2.043173927651473, "grad_norm": 1.3690104484558105, "learning_rate": 9.566526315789474e-05, "loss": 0.455, "step": 36487 }, { "epoch": 2.043229924963602, "grad_norm": 1.2672092914581299, "learning_rate": 9.5665e-05, "loss": 0.3785, "step": 36488 }, { "epoch": 2.043285922275731, "grad_norm": 1.551269292831421, "learning_rate": 9.566473684210526e-05, "loss": 0.4424, "step": 36489 }, { "epoch": 2.04334191958786, "grad_norm": 1.2057708501815796, "learning_rate": 9.566447368421053e-05, "loss": 0.3383, "step": 36490 }, { "epoch": 2.043397916899989, "grad_norm": 1.1033844947814941, "learning_rate": 9.566421052631579e-05, "loss": 0.3056, "step": 36491 }, { "epoch": 2.043453914212118, "grad_norm": 1.648995280265808, "learning_rate": 9.566394736842105e-05, "loss": 0.3907, "step": 36492 }, { "epoch": 2.043509911524247, "grad_norm": 1.2089917659759521, "learning_rate": 9.566368421052631e-05, "loss": 0.346, "step": 36493 }, { "epoch": 2.043565908836376, "grad_norm": 1.0456515550613403, "learning_rate": 9.566342105263159e-05, "loss": 0.426, "step": 36494 }, { "epoch": 2.043621906148505, "grad_norm": 1.1843595504760742, "learning_rate": 9.566315789473684e-05, "loss": 0.3329, "step": 36495 }, { "epoch": 2.043677903460634, "grad_norm": 1.4157453775405884, "learning_rate": 9.566289473684212e-05, "loss": 0.3362, "step": 36496 }, { "epoch": 2.043733900772763, "grad_norm": 1.4117177724838257, "learning_rate": 9.566263157894736e-05, "loss": 0.3274, "step": 36497 }, { "epoch": 2.043789898084892, "grad_norm": 1.9223222732543945, "learning_rate": 9.566236842105264e-05, "loss": 0.4338, "step": 36498 }, { "epoch": 2.043845895397021, "grad_norm": 1.2246278524398804, "learning_rate": 9.56621052631579e-05, "loss": 0.5161, "step": 36499 }, { "epoch": 2.04390189270915, "grad_norm": 1.1343804597854614, "learning_rate": 9.566184210526317e-05, "loss": 0.3901, "step": 36500 }, { "epoch": 2.043957890021279, "grad_norm": 1.1578418016433716, "learning_rate": 9.566157894736843e-05, "loss": 0.3797, "step": 36501 }, { "epoch": 2.044013887333408, "grad_norm": 1.0334910154342651, "learning_rate": 9.566131578947369e-05, "loss": 0.3632, "step": 36502 }, { "epoch": 2.044069884645537, "grad_norm": 1.2720609903335571, "learning_rate": 9.566105263157895e-05, "loss": 0.4558, "step": 36503 }, { "epoch": 2.044125881957666, "grad_norm": 1.2591471672058105, "learning_rate": 9.566078947368422e-05, "loss": 0.5534, "step": 36504 }, { "epoch": 2.044181879269795, "grad_norm": 1.1023025512695312, "learning_rate": 9.566052631578948e-05, "loss": 0.4034, "step": 36505 }, { "epoch": 2.0442378765819242, "grad_norm": 1.1416971683502197, "learning_rate": 9.566026315789474e-05, "loss": 0.4341, "step": 36506 }, { "epoch": 2.0442938738940533, "grad_norm": 1.21024489402771, "learning_rate": 9.566e-05, "loss": 0.4344, "step": 36507 }, { "epoch": 2.0443498712061823, "grad_norm": 1.142454743385315, "learning_rate": 9.565973684210526e-05, "loss": 0.4636, "step": 36508 }, { "epoch": 2.0444058685183113, "grad_norm": 1.2204978466033936, "learning_rate": 9.565947368421054e-05, "loss": 0.6929, "step": 36509 }, { "epoch": 2.0444618658304403, "grad_norm": 1.3453527688980103, "learning_rate": 9.56592105263158e-05, "loss": 0.4727, "step": 36510 }, { "epoch": 2.0445178631425693, "grad_norm": 1.1923243999481201, "learning_rate": 9.565894736842105e-05, "loss": 0.4869, "step": 36511 }, { "epoch": 2.0445738604546984, "grad_norm": 1.0655434131622314, "learning_rate": 9.565868421052631e-05, "loss": 0.3825, "step": 36512 }, { "epoch": 2.0446298577668274, "grad_norm": 0.9945422410964966, "learning_rate": 9.565842105263159e-05, "loss": 0.325, "step": 36513 }, { "epoch": 2.0446858550789564, "grad_norm": 1.1645029783248901, "learning_rate": 9.565815789473685e-05, "loss": 0.3556, "step": 36514 }, { "epoch": 2.0447418523910854, "grad_norm": 1.2809237241744995, "learning_rate": 9.565789473684211e-05, "loss": 0.4214, "step": 36515 }, { "epoch": 2.0447978497032144, "grad_norm": 1.6623132228851318, "learning_rate": 9.565763157894737e-05, "loss": 0.3907, "step": 36516 }, { "epoch": 2.0448538470153435, "grad_norm": 1.1635187864303589, "learning_rate": 9.565736842105264e-05, "loss": 0.3445, "step": 36517 }, { "epoch": 2.0449098443274725, "grad_norm": 1.2451426982879639, "learning_rate": 9.56571052631579e-05, "loss": 0.3665, "step": 36518 }, { "epoch": 2.0449658416396015, "grad_norm": 1.2910524606704712, "learning_rate": 9.565684210526317e-05, "loss": 0.4116, "step": 36519 }, { "epoch": 2.0450218389517305, "grad_norm": 1.2206307649612427, "learning_rate": 9.565657894736842e-05, "loss": 0.4924, "step": 36520 }, { "epoch": 2.0450778362638595, "grad_norm": 1.2187786102294922, "learning_rate": 9.565631578947369e-05, "loss": 0.3896, "step": 36521 }, { "epoch": 2.0451338335759885, "grad_norm": 1.055055856704712, "learning_rate": 9.565605263157895e-05, "loss": 0.3306, "step": 36522 }, { "epoch": 2.0451898308881176, "grad_norm": 1.1805987358093262, "learning_rate": 9.565578947368421e-05, "loss": 0.3498, "step": 36523 }, { "epoch": 2.0452458282002466, "grad_norm": 1.7351652383804321, "learning_rate": 9.565552631578949e-05, "loss": 0.3427, "step": 36524 }, { "epoch": 2.0453018255123756, "grad_norm": 1.3048440217971802, "learning_rate": 9.565526315789473e-05, "loss": 0.3714, "step": 36525 }, { "epoch": 2.0453578228245046, "grad_norm": 1.1941004991531372, "learning_rate": 9.5655e-05, "loss": 0.3627, "step": 36526 }, { "epoch": 2.0454138201366336, "grad_norm": 1.2364619970321655, "learning_rate": 9.565473684210526e-05, "loss": 0.4008, "step": 36527 }, { "epoch": 2.0454698174487627, "grad_norm": 1.2263951301574707, "learning_rate": 9.565447368421054e-05, "loss": 0.3941, "step": 36528 }, { "epoch": 2.0455258147608917, "grad_norm": 1.3391637802124023, "learning_rate": 9.565421052631578e-05, "loss": 0.3986, "step": 36529 }, { "epoch": 2.0455818120730207, "grad_norm": 1.071968674659729, "learning_rate": 9.565394736842106e-05, "loss": 0.3317, "step": 36530 }, { "epoch": 2.0456378093851497, "grad_norm": 1.5218069553375244, "learning_rate": 9.565368421052632e-05, "loss": 0.485, "step": 36531 }, { "epoch": 2.0456938066972787, "grad_norm": 1.1150939464569092, "learning_rate": 9.565342105263159e-05, "loss": 0.3682, "step": 36532 }, { "epoch": 2.0457498040094078, "grad_norm": 1.6196460723876953, "learning_rate": 9.565315789473685e-05, "loss": 0.4308, "step": 36533 }, { "epoch": 2.0458058013215368, "grad_norm": 1.3216304779052734, "learning_rate": 9.565289473684211e-05, "loss": 0.3898, "step": 36534 }, { "epoch": 2.045861798633666, "grad_norm": 1.3647665977478027, "learning_rate": 9.565263157894737e-05, "loss": 0.368, "step": 36535 }, { "epoch": 2.045917795945795, "grad_norm": 1.0381462574005127, "learning_rate": 9.565236842105264e-05, "loss": 0.258, "step": 36536 }, { "epoch": 2.045973793257924, "grad_norm": 1.904181957244873, "learning_rate": 9.56521052631579e-05, "loss": 0.4845, "step": 36537 }, { "epoch": 2.046029790570053, "grad_norm": 1.441455364227295, "learning_rate": 9.565184210526316e-05, "loss": 0.6119, "step": 36538 }, { "epoch": 2.046085787882182, "grad_norm": 1.2802162170410156, "learning_rate": 9.565157894736842e-05, "loss": 0.411, "step": 36539 }, { "epoch": 2.046141785194311, "grad_norm": 1.4507910013198853, "learning_rate": 9.565131578947368e-05, "loss": 0.4691, "step": 36540 }, { "epoch": 2.0461977825064395, "grad_norm": 1.3461358547210693, "learning_rate": 9.565105263157896e-05, "loss": 0.304, "step": 36541 }, { "epoch": 2.046253779818569, "grad_norm": 1.265123724937439, "learning_rate": 9.565078947368421e-05, "loss": 0.4243, "step": 36542 }, { "epoch": 2.0463097771306975, "grad_norm": 1.1044375896453857, "learning_rate": 9.565052631578947e-05, "loss": 0.4004, "step": 36543 }, { "epoch": 2.046365774442827, "grad_norm": 1.3630045652389526, "learning_rate": 9.565026315789473e-05, "loss": 0.4935, "step": 36544 }, { "epoch": 2.0464217717549555, "grad_norm": 1.0712352991104126, "learning_rate": 9.565000000000001e-05, "loss": 0.3826, "step": 36545 }, { "epoch": 2.0464777690670846, "grad_norm": 1.3000266551971436, "learning_rate": 9.564973684210527e-05, "loss": 0.469, "step": 36546 }, { "epoch": 2.0465337663792136, "grad_norm": 1.2789708375930786, "learning_rate": 9.564947368421053e-05, "loss": 0.4323, "step": 36547 }, { "epoch": 2.0465897636913426, "grad_norm": 1.2918531894683838, "learning_rate": 9.564921052631579e-05, "loss": 0.4387, "step": 36548 }, { "epoch": 2.0466457610034716, "grad_norm": 0.9122598767280579, "learning_rate": 9.564894736842106e-05, "loss": 0.297, "step": 36549 }, { "epoch": 2.0467017583156006, "grad_norm": 1.3538532257080078, "learning_rate": 9.564868421052632e-05, "loss": 0.4952, "step": 36550 }, { "epoch": 2.0467577556277297, "grad_norm": 1.0562924146652222, "learning_rate": 9.564842105263159e-05, "loss": 0.4113, "step": 36551 }, { "epoch": 2.0468137529398587, "grad_norm": 1.6232126951217651, "learning_rate": 9.564815789473684e-05, "loss": 0.5564, "step": 36552 }, { "epoch": 2.0468697502519877, "grad_norm": 1.0307356119155884, "learning_rate": 9.564789473684211e-05, "loss": 0.3868, "step": 36553 }, { "epoch": 2.0469257475641167, "grad_norm": 1.0779378414154053, "learning_rate": 9.564763157894737e-05, "loss": 0.357, "step": 36554 }, { "epoch": 2.0469817448762457, "grad_norm": 1.2754429578781128, "learning_rate": 9.564736842105265e-05, "loss": 0.3737, "step": 36555 }, { "epoch": 2.0470377421883748, "grad_norm": 1.0556129217147827, "learning_rate": 9.56471052631579e-05, "loss": 0.4356, "step": 36556 }, { "epoch": 2.0470937395005038, "grad_norm": 1.3854846954345703, "learning_rate": 9.564684210526316e-05, "loss": 0.3546, "step": 36557 }, { "epoch": 2.047149736812633, "grad_norm": 1.2450940608978271, "learning_rate": 9.564657894736842e-05, "loss": 0.4489, "step": 36558 }, { "epoch": 2.047205734124762, "grad_norm": 1.2619494199752808, "learning_rate": 9.564631578947368e-05, "loss": 0.396, "step": 36559 }, { "epoch": 2.047261731436891, "grad_norm": 1.088112711906433, "learning_rate": 9.564605263157896e-05, "loss": 0.338, "step": 36560 }, { "epoch": 2.04731772874902, "grad_norm": 1.1738293170928955, "learning_rate": 9.564578947368422e-05, "loss": 0.3759, "step": 36561 }, { "epoch": 2.047373726061149, "grad_norm": 1.100028157234192, "learning_rate": 9.564552631578948e-05, "loss": 0.4259, "step": 36562 }, { "epoch": 2.047429723373278, "grad_norm": 1.1526410579681396, "learning_rate": 9.564526315789474e-05, "loss": 0.3414, "step": 36563 }, { "epoch": 2.047485720685407, "grad_norm": 1.3450732231140137, "learning_rate": 9.564500000000001e-05, "loss": 0.482, "step": 36564 }, { "epoch": 2.047541717997536, "grad_norm": 1.2071354389190674, "learning_rate": 9.564473684210527e-05, "loss": 0.3974, "step": 36565 }, { "epoch": 2.047597715309665, "grad_norm": 1.3167060613632202, "learning_rate": 9.564447368421053e-05, "loss": 0.4767, "step": 36566 }, { "epoch": 2.047653712621794, "grad_norm": 1.1307607889175415, "learning_rate": 9.564421052631579e-05, "loss": 0.3596, "step": 36567 }, { "epoch": 2.047709709933923, "grad_norm": 1.1424685716629028, "learning_rate": 9.564394736842106e-05, "loss": 0.4913, "step": 36568 }, { "epoch": 2.047765707246052, "grad_norm": 1.3867213726043701, "learning_rate": 9.564368421052632e-05, "loss": 0.468, "step": 36569 }, { "epoch": 2.047821704558181, "grad_norm": 0.9291280508041382, "learning_rate": 9.564342105263158e-05, "loss": 0.311, "step": 36570 }, { "epoch": 2.04787770187031, "grad_norm": 1.4469846487045288, "learning_rate": 9.564315789473684e-05, "loss": 0.4706, "step": 36571 }, { "epoch": 2.047933699182439, "grad_norm": 1.6718295812606812, "learning_rate": 9.564289473684211e-05, "loss": 0.3678, "step": 36572 }, { "epoch": 2.047989696494568, "grad_norm": 1.3264329433441162, "learning_rate": 9.564263157894737e-05, "loss": 0.4753, "step": 36573 }, { "epoch": 2.048045693806697, "grad_norm": 1.3022792339324951, "learning_rate": 9.564236842105265e-05, "loss": 0.3413, "step": 36574 }, { "epoch": 2.048101691118826, "grad_norm": 1.0563435554504395, "learning_rate": 9.56421052631579e-05, "loss": 0.4405, "step": 36575 }, { "epoch": 2.048157688430955, "grad_norm": 1.2141474485397339, "learning_rate": 9.564184210526315e-05, "loss": 0.3618, "step": 36576 }, { "epoch": 2.048213685743084, "grad_norm": 1.396917700767517, "learning_rate": 9.564157894736843e-05, "loss": 0.4116, "step": 36577 }, { "epoch": 2.048269683055213, "grad_norm": 1.1717616319656372, "learning_rate": 9.564131578947369e-05, "loss": 0.3369, "step": 36578 }, { "epoch": 2.048325680367342, "grad_norm": 1.2501972913742065, "learning_rate": 9.564105263157896e-05, "loss": 0.3603, "step": 36579 }, { "epoch": 2.048381677679471, "grad_norm": 1.0641067028045654, "learning_rate": 9.56407894736842e-05, "loss": 0.317, "step": 36580 }, { "epoch": 2.0484376749916002, "grad_norm": 1.1840697526931763, "learning_rate": 9.564052631578948e-05, "loss": 0.3738, "step": 36581 }, { "epoch": 2.0484936723037293, "grad_norm": 1.3396750688552856, "learning_rate": 9.564026315789474e-05, "loss": 0.4202, "step": 36582 }, { "epoch": 2.0485496696158583, "grad_norm": 1.1548391580581665, "learning_rate": 9.564000000000001e-05, "loss": 0.4973, "step": 36583 }, { "epoch": 2.0486056669279873, "grad_norm": 0.9568310379981995, "learning_rate": 9.563973684210526e-05, "loss": 0.2957, "step": 36584 }, { "epoch": 2.0486616642401163, "grad_norm": 1.3702946901321411, "learning_rate": 9.563947368421053e-05, "loss": 0.4046, "step": 36585 }, { "epoch": 2.0487176615522453, "grad_norm": 1.2944976091384888, "learning_rate": 9.563921052631579e-05, "loss": 0.3728, "step": 36586 }, { "epoch": 2.0487736588643743, "grad_norm": 6.153049468994141, "learning_rate": 9.563894736842107e-05, "loss": 0.4144, "step": 36587 }, { "epoch": 2.0488296561765034, "grad_norm": 1.2084838151931763, "learning_rate": 9.563868421052632e-05, "loss": 0.4585, "step": 36588 }, { "epoch": 2.0488856534886324, "grad_norm": 1.2019833326339722, "learning_rate": 9.563842105263158e-05, "loss": 0.4591, "step": 36589 }, { "epoch": 2.0489416508007614, "grad_norm": 1.2909711599349976, "learning_rate": 9.563815789473684e-05, "loss": 0.5021, "step": 36590 }, { "epoch": 2.0489976481128904, "grad_norm": 1.2167983055114746, "learning_rate": 9.563789473684212e-05, "loss": 0.301, "step": 36591 }, { "epoch": 2.0490536454250194, "grad_norm": 1.307668685913086, "learning_rate": 9.563763157894738e-05, "loss": 0.4496, "step": 36592 }, { "epoch": 2.0491096427371485, "grad_norm": 1.1638180017471313, "learning_rate": 9.563736842105264e-05, "loss": 0.3485, "step": 36593 }, { "epoch": 2.0491656400492775, "grad_norm": 1.4204975366592407, "learning_rate": 9.56371052631579e-05, "loss": 0.5176, "step": 36594 }, { "epoch": 2.0492216373614065, "grad_norm": 2.032364845275879, "learning_rate": 9.563684210526316e-05, "loss": 0.3821, "step": 36595 }, { "epoch": 2.0492776346735355, "grad_norm": 1.2145661115646362, "learning_rate": 9.563657894736843e-05, "loss": 0.3932, "step": 36596 }, { "epoch": 2.0493336319856645, "grad_norm": 1.2356343269348145, "learning_rate": 9.563631578947369e-05, "loss": 0.3657, "step": 36597 }, { "epoch": 2.0493896292977936, "grad_norm": 1.2230480909347534, "learning_rate": 9.563605263157895e-05, "loss": 0.3893, "step": 36598 }, { "epoch": 2.0494456266099226, "grad_norm": 1.3757137060165405, "learning_rate": 9.563578947368421e-05, "loss": 0.4167, "step": 36599 }, { "epoch": 2.0495016239220516, "grad_norm": 1.1271175146102905, "learning_rate": 9.563552631578948e-05, "loss": 0.3841, "step": 36600 }, { "epoch": 2.0495576212341806, "grad_norm": 1.030759334564209, "learning_rate": 9.563526315789474e-05, "loss": 0.3127, "step": 36601 }, { "epoch": 2.0496136185463096, "grad_norm": 1.291495442390442, "learning_rate": 9.5635e-05, "loss": 0.51, "step": 36602 }, { "epoch": 2.0496696158584387, "grad_norm": 0.9779292941093445, "learning_rate": 9.563473684210526e-05, "loss": 0.3252, "step": 36603 }, { "epoch": 2.0497256131705677, "grad_norm": 1.4492605924606323, "learning_rate": 9.563447368421053e-05, "loss": 0.5175, "step": 36604 }, { "epoch": 2.0497816104826967, "grad_norm": 1.5373119115829468, "learning_rate": 9.56342105263158e-05, "loss": 0.3834, "step": 36605 }, { "epoch": 2.0498376077948257, "grad_norm": 1.162558913230896, "learning_rate": 9.563394736842107e-05, "loss": 0.3834, "step": 36606 }, { "epoch": 2.0498936051069547, "grad_norm": 1.3218050003051758, "learning_rate": 9.563368421052631e-05, "loss": 0.3625, "step": 36607 }, { "epoch": 2.0499496024190837, "grad_norm": 1.0814683437347412, "learning_rate": 9.563342105263159e-05, "loss": 0.3973, "step": 36608 }, { "epoch": 2.0500055997312128, "grad_norm": 1.1715924739837646, "learning_rate": 9.563315789473685e-05, "loss": 0.3398, "step": 36609 }, { "epoch": 2.050061597043342, "grad_norm": 1.4802566766738892, "learning_rate": 9.56328947368421e-05, "loss": 0.4495, "step": 36610 }, { "epoch": 2.050117594355471, "grad_norm": 1.1524264812469482, "learning_rate": 9.563263157894738e-05, "loss": 0.3766, "step": 36611 }, { "epoch": 2.0501735916676, "grad_norm": 1.2625375986099243, "learning_rate": 9.563236842105263e-05, "loss": 0.3898, "step": 36612 }, { "epoch": 2.050229588979729, "grad_norm": 1.2026548385620117, "learning_rate": 9.56321052631579e-05, "loss": 0.4932, "step": 36613 }, { "epoch": 2.050285586291858, "grad_norm": 1.5375016927719116, "learning_rate": 9.563184210526316e-05, "loss": 0.5922, "step": 36614 }, { "epoch": 2.050341583603987, "grad_norm": 1.1345345973968506, "learning_rate": 9.563157894736843e-05, "loss": 0.2752, "step": 36615 }, { "epoch": 2.050397580916116, "grad_norm": 1.2007031440734863, "learning_rate": 9.563131578947369e-05, "loss": 0.4468, "step": 36616 }, { "epoch": 2.050453578228245, "grad_norm": 1.315387487411499, "learning_rate": 9.563105263157895e-05, "loss": 0.5099, "step": 36617 }, { "epoch": 2.050509575540374, "grad_norm": 1.5523185729980469, "learning_rate": 9.563078947368421e-05, "loss": 0.4931, "step": 36618 }, { "epoch": 2.050565572852503, "grad_norm": 1.0699571371078491, "learning_rate": 9.563052631578948e-05, "loss": 0.3211, "step": 36619 }, { "epoch": 2.050621570164632, "grad_norm": 1.1341267824172974, "learning_rate": 9.563026315789474e-05, "loss": 0.287, "step": 36620 }, { "epoch": 2.050677567476761, "grad_norm": 1.1612727642059326, "learning_rate": 9.563e-05, "loss": 0.3456, "step": 36621 }, { "epoch": 2.05073356478889, "grad_norm": 1.3249485492706299, "learning_rate": 9.562973684210526e-05, "loss": 0.4748, "step": 36622 }, { "epoch": 2.050789562101019, "grad_norm": 1.222671389579773, "learning_rate": 9.562947368421054e-05, "loss": 0.4131, "step": 36623 }, { "epoch": 2.050845559413148, "grad_norm": 1.3996583223342896, "learning_rate": 9.56292105263158e-05, "loss": 0.3747, "step": 36624 }, { "epoch": 2.050901556725277, "grad_norm": 1.175726056098938, "learning_rate": 9.562894736842106e-05, "loss": 0.356, "step": 36625 }, { "epoch": 2.050957554037406, "grad_norm": 1.2267580032348633, "learning_rate": 9.562868421052632e-05, "loss": 0.5676, "step": 36626 }, { "epoch": 2.051013551349535, "grad_norm": 1.312058925628662, "learning_rate": 9.562842105263158e-05, "loss": 0.4798, "step": 36627 }, { "epoch": 2.051069548661664, "grad_norm": 1.1690709590911865, "learning_rate": 9.562815789473685e-05, "loss": 0.4803, "step": 36628 }, { "epoch": 2.051125545973793, "grad_norm": 1.144781231880188, "learning_rate": 9.562789473684211e-05, "loss": 0.3738, "step": 36629 }, { "epoch": 2.051181543285922, "grad_norm": 1.0730394124984741, "learning_rate": 9.562763157894737e-05, "loss": 0.3401, "step": 36630 }, { "epoch": 2.051237540598051, "grad_norm": 1.4585849046707153, "learning_rate": 9.562736842105263e-05, "loss": 0.5125, "step": 36631 }, { "epoch": 2.05129353791018, "grad_norm": 1.3115953207015991, "learning_rate": 9.56271052631579e-05, "loss": 0.4958, "step": 36632 }, { "epoch": 2.0513495352223092, "grad_norm": 1.5912495851516724, "learning_rate": 9.562684210526316e-05, "loss": 0.3862, "step": 36633 }, { "epoch": 2.0514055325344382, "grad_norm": 1.532239317893982, "learning_rate": 9.562657894736842e-05, "loss": 0.3606, "step": 36634 }, { "epoch": 2.0514615298465673, "grad_norm": 1.2204035520553589, "learning_rate": 9.562631578947368e-05, "loss": 0.4069, "step": 36635 }, { "epoch": 2.0515175271586963, "grad_norm": 1.1472162008285522, "learning_rate": 9.562605263157895e-05, "loss": 0.295, "step": 36636 }, { "epoch": 2.0515735244708253, "grad_norm": 1.5692294836044312, "learning_rate": 9.562578947368421e-05, "loss": 0.3696, "step": 36637 }, { "epoch": 2.0516295217829543, "grad_norm": 1.2900413274765015, "learning_rate": 9.562552631578949e-05, "loss": 0.3143, "step": 36638 }, { "epoch": 2.0516855190950833, "grad_norm": 1.001652479171753, "learning_rate": 9.562526315789473e-05, "loss": 0.2978, "step": 36639 }, { "epoch": 2.0517415164072124, "grad_norm": 1.2048441171646118, "learning_rate": 9.562500000000001e-05, "loss": 0.4021, "step": 36640 }, { "epoch": 2.0517975137193414, "grad_norm": 1.0572541952133179, "learning_rate": 9.562473684210527e-05, "loss": 0.4368, "step": 36641 }, { "epoch": 2.0518535110314704, "grad_norm": 1.110068678855896, "learning_rate": 9.562447368421054e-05, "loss": 0.3574, "step": 36642 }, { "epoch": 2.0519095083435994, "grad_norm": 1.2250903844833374, "learning_rate": 9.56242105263158e-05, "loss": 0.3853, "step": 36643 }, { "epoch": 2.0519655056557284, "grad_norm": 1.0479674339294434, "learning_rate": 9.562394736842105e-05, "loss": 0.2793, "step": 36644 }, { "epoch": 2.0520215029678575, "grad_norm": 1.0499855279922485, "learning_rate": 9.562368421052632e-05, "loss": 0.362, "step": 36645 }, { "epoch": 2.0520775002799865, "grad_norm": 1.1197271347045898, "learning_rate": 9.562342105263158e-05, "loss": 0.389, "step": 36646 }, { "epoch": 2.0521334975921155, "grad_norm": 1.198747158050537, "learning_rate": 9.562315789473685e-05, "loss": 0.365, "step": 36647 }, { "epoch": 2.0521894949042445, "grad_norm": 2.027738094329834, "learning_rate": 9.562289473684211e-05, "loss": 0.4487, "step": 36648 }, { "epoch": 2.0522454922163735, "grad_norm": 1.1502078771591187, "learning_rate": 9.562263157894737e-05, "loss": 0.4557, "step": 36649 }, { "epoch": 2.0523014895285026, "grad_norm": 1.2851338386535645, "learning_rate": 9.562236842105263e-05, "loss": 0.5174, "step": 36650 }, { "epoch": 2.0523574868406316, "grad_norm": 1.2322546243667603, "learning_rate": 9.56221052631579e-05, "loss": 0.4128, "step": 36651 }, { "epoch": 2.0524134841527606, "grad_norm": 1.146069884300232, "learning_rate": 9.562184210526316e-05, "loss": 0.376, "step": 36652 }, { "epoch": 2.0524694814648896, "grad_norm": 1.13925039768219, "learning_rate": 9.562157894736842e-05, "loss": 0.3688, "step": 36653 }, { "epoch": 2.0525254787770186, "grad_norm": 1.842946171760559, "learning_rate": 9.562131578947368e-05, "loss": 0.4901, "step": 36654 }, { "epoch": 2.0525814760891476, "grad_norm": 1.4088022708892822, "learning_rate": 9.562105263157896e-05, "loss": 0.5086, "step": 36655 }, { "epoch": 2.0526374734012767, "grad_norm": 1.1978005170822144, "learning_rate": 9.562078947368422e-05, "loss": 0.4408, "step": 36656 }, { "epoch": 2.0526934707134057, "grad_norm": 1.1670106649398804, "learning_rate": 9.562052631578948e-05, "loss": 0.3121, "step": 36657 }, { "epoch": 2.0527494680255347, "grad_norm": 1.2237215042114258, "learning_rate": 9.562026315789474e-05, "loss": 0.3633, "step": 36658 }, { "epoch": 2.0528054653376637, "grad_norm": 1.2898637056350708, "learning_rate": 9.562000000000001e-05, "loss": 0.2586, "step": 36659 }, { "epoch": 2.0528614626497927, "grad_norm": 1.11089289188385, "learning_rate": 9.561973684210527e-05, "loss": 0.3754, "step": 36660 }, { "epoch": 2.0529174599619218, "grad_norm": 1.5122790336608887, "learning_rate": 9.561947368421053e-05, "loss": 0.3777, "step": 36661 }, { "epoch": 2.052973457274051, "grad_norm": 1.288523554801941, "learning_rate": 9.561921052631579e-05, "loss": 0.3686, "step": 36662 }, { "epoch": 2.05302945458618, "grad_norm": 1.0326403379440308, "learning_rate": 9.561894736842105e-05, "loss": 0.3671, "step": 36663 }, { "epoch": 2.053085451898309, "grad_norm": 1.1743468046188354, "learning_rate": 9.561868421052632e-05, "loss": 0.3457, "step": 36664 }, { "epoch": 2.053141449210438, "grad_norm": 1.2513277530670166, "learning_rate": 9.561842105263158e-05, "loss": 0.3704, "step": 36665 }, { "epoch": 2.053197446522567, "grad_norm": 1.179674506187439, "learning_rate": 9.561815789473685e-05, "loss": 0.3202, "step": 36666 }, { "epoch": 2.053253443834696, "grad_norm": 1.2520854473114014, "learning_rate": 9.56178947368421e-05, "loss": 0.3949, "step": 36667 }, { "epoch": 2.053309441146825, "grad_norm": 1.4296391010284424, "learning_rate": 9.561763157894737e-05, "loss": 0.3317, "step": 36668 }, { "epoch": 2.053365438458954, "grad_norm": 1.0658658742904663, "learning_rate": 9.561736842105263e-05, "loss": 0.3226, "step": 36669 }, { "epoch": 2.053421435771083, "grad_norm": 1.2129640579223633, "learning_rate": 9.561710526315791e-05, "loss": 0.3908, "step": 36670 }, { "epoch": 2.053477433083212, "grad_norm": 1.2850233316421509, "learning_rate": 9.561684210526317e-05, "loss": 0.4048, "step": 36671 }, { "epoch": 2.053533430395341, "grad_norm": 1.4278819561004639, "learning_rate": 9.561657894736843e-05, "loss": 0.4404, "step": 36672 }, { "epoch": 2.05358942770747, "grad_norm": 1.3692359924316406, "learning_rate": 9.561631578947369e-05, "loss": 0.3954, "step": 36673 }, { "epoch": 2.053645425019599, "grad_norm": 1.4084349870681763, "learning_rate": 9.561605263157896e-05, "loss": 0.6925, "step": 36674 }, { "epoch": 2.053701422331728, "grad_norm": 1.0658395290374756, "learning_rate": 9.561578947368422e-05, "loss": 0.3643, "step": 36675 }, { "epoch": 2.053757419643857, "grad_norm": 1.1298015117645264, "learning_rate": 9.561552631578948e-05, "loss": 0.3538, "step": 36676 }, { "epoch": 2.053813416955986, "grad_norm": 1.3170356750488281, "learning_rate": 9.561526315789474e-05, "loss": 0.5105, "step": 36677 }, { "epoch": 2.053869414268115, "grad_norm": 1.5675780773162842, "learning_rate": 9.561500000000001e-05, "loss": 0.3579, "step": 36678 }, { "epoch": 2.053925411580244, "grad_norm": 1.1146208047866821, "learning_rate": 9.561473684210527e-05, "loss": 0.3776, "step": 36679 }, { "epoch": 2.053981408892373, "grad_norm": 1.0981762409210205, "learning_rate": 9.561447368421053e-05, "loss": 0.3284, "step": 36680 }, { "epoch": 2.054037406204502, "grad_norm": 1.0860785245895386, "learning_rate": 9.561421052631579e-05, "loss": 0.4252, "step": 36681 }, { "epoch": 2.054093403516631, "grad_norm": 1.0068347454071045, "learning_rate": 9.561394736842105e-05, "loss": 0.3325, "step": 36682 }, { "epoch": 2.05414940082876, "grad_norm": 1.085824728012085, "learning_rate": 9.561368421052632e-05, "loss": 0.4278, "step": 36683 }, { "epoch": 2.054205398140889, "grad_norm": 1.3052728176116943, "learning_rate": 9.561342105263158e-05, "loss": 0.3933, "step": 36684 }, { "epoch": 2.0542613954530182, "grad_norm": 1.1264413595199585, "learning_rate": 9.561315789473684e-05, "loss": 0.4286, "step": 36685 }, { "epoch": 2.0543173927651472, "grad_norm": 1.3334736824035645, "learning_rate": 9.56128947368421e-05, "loss": 0.3909, "step": 36686 }, { "epoch": 2.0543733900772763, "grad_norm": 1.1422628164291382, "learning_rate": 9.561263157894738e-05, "loss": 0.3922, "step": 36687 }, { "epoch": 2.0544293873894053, "grad_norm": 1.257146954536438, "learning_rate": 9.561236842105264e-05, "loss": 0.3271, "step": 36688 }, { "epoch": 2.0544853847015343, "grad_norm": 1.1288529634475708, "learning_rate": 9.56121052631579e-05, "loss": 0.3966, "step": 36689 }, { "epoch": 2.0545413820136633, "grad_norm": 1.121405005455017, "learning_rate": 9.561184210526316e-05, "loss": 0.3233, "step": 36690 }, { "epoch": 2.0545973793257923, "grad_norm": 1.1121511459350586, "learning_rate": 9.561157894736843e-05, "loss": 0.3844, "step": 36691 }, { "epoch": 2.0546533766379214, "grad_norm": 1.195992112159729, "learning_rate": 9.561131578947369e-05, "loss": 0.3577, "step": 36692 }, { "epoch": 2.0547093739500504, "grad_norm": 1.3259330987930298, "learning_rate": 9.561105263157896e-05, "loss": 0.372, "step": 36693 }, { "epoch": 2.0547653712621794, "grad_norm": 1.043801188468933, "learning_rate": 9.561078947368421e-05, "loss": 0.3794, "step": 36694 }, { "epoch": 2.0548213685743084, "grad_norm": 1.0895954370498657, "learning_rate": 9.561052631578948e-05, "loss": 0.4274, "step": 36695 }, { "epoch": 2.0548773658864374, "grad_norm": 1.207619309425354, "learning_rate": 9.561026315789474e-05, "loss": 0.4047, "step": 36696 }, { "epoch": 2.0549333631985665, "grad_norm": 1.4847220182418823, "learning_rate": 9.561e-05, "loss": 0.5094, "step": 36697 }, { "epoch": 2.0549893605106955, "grad_norm": 1.1867951154708862, "learning_rate": 9.560973684210527e-05, "loss": 0.2936, "step": 36698 }, { "epoch": 2.0550453578228245, "grad_norm": 1.5140495300292969, "learning_rate": 9.560947368421052e-05, "loss": 0.507, "step": 36699 }, { "epoch": 2.0551013551349535, "grad_norm": 1.1378545761108398, "learning_rate": 9.56092105263158e-05, "loss": 0.371, "step": 36700 }, { "epoch": 2.0551573524470825, "grad_norm": 1.1760331392288208, "learning_rate": 9.560894736842105e-05, "loss": 0.395, "step": 36701 }, { "epoch": 2.0552133497592115, "grad_norm": 1.1640903949737549, "learning_rate": 9.560868421052633e-05, "loss": 0.4455, "step": 36702 }, { "epoch": 2.0552693470713406, "grad_norm": 1.2893195152282715, "learning_rate": 9.560842105263159e-05, "loss": 0.5465, "step": 36703 }, { "epoch": 2.0553253443834696, "grad_norm": 1.3953769207000732, "learning_rate": 9.560815789473685e-05, "loss": 0.4445, "step": 36704 }, { "epoch": 2.0553813416955986, "grad_norm": 1.0773518085479736, "learning_rate": 9.56078947368421e-05, "loss": 0.3723, "step": 36705 }, { "epoch": 2.0554373390077276, "grad_norm": 1.1542632579803467, "learning_rate": 9.560763157894738e-05, "loss": 0.4239, "step": 36706 }, { "epoch": 2.0554933363198566, "grad_norm": 1.3278210163116455, "learning_rate": 9.560736842105264e-05, "loss": 0.4774, "step": 36707 }, { "epoch": 2.0555493336319857, "grad_norm": 1.103522777557373, "learning_rate": 9.56071052631579e-05, "loss": 0.392, "step": 36708 }, { "epoch": 2.0556053309441147, "grad_norm": 1.2399300336837769, "learning_rate": 9.560684210526316e-05, "loss": 0.5026, "step": 36709 }, { "epoch": 2.0556613282562437, "grad_norm": 1.3378663063049316, "learning_rate": 9.560657894736843e-05, "loss": 0.4297, "step": 36710 }, { "epoch": 2.0557173255683727, "grad_norm": 1.0820890665054321, "learning_rate": 9.560631578947369e-05, "loss": 0.37, "step": 36711 }, { "epoch": 2.0557733228805017, "grad_norm": 1.1408289670944214, "learning_rate": 9.560605263157895e-05, "loss": 0.4069, "step": 36712 }, { "epoch": 2.0558293201926308, "grad_norm": 1.0384246110916138, "learning_rate": 9.560578947368421e-05, "loss": 0.349, "step": 36713 }, { "epoch": 2.05588531750476, "grad_norm": 1.3504265546798706, "learning_rate": 9.560552631578947e-05, "loss": 0.5331, "step": 36714 }, { "epoch": 2.055941314816889, "grad_norm": 1.1632843017578125, "learning_rate": 9.560526315789474e-05, "loss": 0.3979, "step": 36715 }, { "epoch": 2.055997312129018, "grad_norm": 1.5368894338607788, "learning_rate": 9.5605e-05, "loss": 0.5003, "step": 36716 }, { "epoch": 2.056053309441147, "grad_norm": 1.1824567317962646, "learning_rate": 9.560473684210526e-05, "loss": 0.4659, "step": 36717 }, { "epoch": 2.056109306753276, "grad_norm": 1.1657074689865112, "learning_rate": 9.560447368421052e-05, "loss": 0.4018, "step": 36718 }, { "epoch": 2.056165304065405, "grad_norm": 1.632127285003662, "learning_rate": 9.56042105263158e-05, "loss": 0.3002, "step": 36719 }, { "epoch": 2.056221301377534, "grad_norm": 1.1505695581436157, "learning_rate": 9.560394736842106e-05, "loss": 0.3992, "step": 36720 }, { "epoch": 2.056277298689663, "grad_norm": 1.2248297929763794, "learning_rate": 9.560368421052633e-05, "loss": 0.42, "step": 36721 }, { "epoch": 2.056333296001792, "grad_norm": 1.1423290967941284, "learning_rate": 9.560342105263158e-05, "loss": 0.3792, "step": 36722 }, { "epoch": 2.056389293313921, "grad_norm": 1.2099378108978271, "learning_rate": 9.560315789473685e-05, "loss": 0.3225, "step": 36723 }, { "epoch": 2.05644529062605, "grad_norm": 1.0739911794662476, "learning_rate": 9.560289473684211e-05, "loss": 0.4086, "step": 36724 }, { "epoch": 2.056501287938179, "grad_norm": 1.005111813545227, "learning_rate": 9.560263157894738e-05, "loss": 0.3213, "step": 36725 }, { "epoch": 2.056557285250308, "grad_norm": 1.2234336137771606, "learning_rate": 9.560236842105264e-05, "loss": 0.487, "step": 36726 }, { "epoch": 2.056613282562437, "grad_norm": 1.187186360359192, "learning_rate": 9.56021052631579e-05, "loss": 0.3481, "step": 36727 }, { "epoch": 2.056669279874566, "grad_norm": 1.2229480743408203, "learning_rate": 9.560184210526316e-05, "loss": 0.3494, "step": 36728 }, { "epoch": 2.056725277186695, "grad_norm": 1.3513422012329102, "learning_rate": 9.560157894736843e-05, "loss": 0.4719, "step": 36729 }, { "epoch": 2.056781274498824, "grad_norm": 1.0294100046157837, "learning_rate": 9.56013157894737e-05, "loss": 0.3285, "step": 36730 }, { "epoch": 2.056837271810953, "grad_norm": 1.206562876701355, "learning_rate": 9.560105263157894e-05, "loss": 0.3717, "step": 36731 }, { "epoch": 2.056893269123082, "grad_norm": 1.3168164491653442, "learning_rate": 9.560078947368421e-05, "loss": 0.3847, "step": 36732 }, { "epoch": 2.056949266435211, "grad_norm": 1.4654356241226196, "learning_rate": 9.560052631578947e-05, "loss": 0.443, "step": 36733 }, { "epoch": 2.05700526374734, "grad_norm": 1.2013304233551025, "learning_rate": 9.560026315789475e-05, "loss": 0.4463, "step": 36734 }, { "epoch": 2.057061261059469, "grad_norm": 1.1533199548721313, "learning_rate": 9.56e-05, "loss": 0.3376, "step": 36735 }, { "epoch": 2.057117258371598, "grad_norm": 1.1251952648162842, "learning_rate": 9.559973684210527e-05, "loss": 0.3469, "step": 36736 }, { "epoch": 2.057173255683727, "grad_norm": 1.171920657157898, "learning_rate": 9.559947368421053e-05, "loss": 0.3549, "step": 36737 }, { "epoch": 2.0572292529958562, "grad_norm": 1.260909914970398, "learning_rate": 9.55992105263158e-05, "loss": 0.435, "step": 36738 }, { "epoch": 2.0572852503079853, "grad_norm": 1.2944713830947876, "learning_rate": 9.559894736842106e-05, "loss": 0.5194, "step": 36739 }, { "epoch": 2.0573412476201143, "grad_norm": 1.2429436445236206, "learning_rate": 9.559868421052632e-05, "loss": 0.3908, "step": 36740 }, { "epoch": 2.0573972449322433, "grad_norm": 1.3853048086166382, "learning_rate": 9.559842105263158e-05, "loss": 0.5268, "step": 36741 }, { "epoch": 2.0574532422443723, "grad_norm": 1.4531614780426025, "learning_rate": 9.559815789473685e-05, "loss": 0.4978, "step": 36742 }, { "epoch": 2.0575092395565013, "grad_norm": 1.424088478088379, "learning_rate": 9.559789473684211e-05, "loss": 0.5105, "step": 36743 }, { "epoch": 2.0575652368686304, "grad_norm": 1.2815285921096802, "learning_rate": 9.559763157894737e-05, "loss": 0.4422, "step": 36744 }, { "epoch": 2.0576212341807594, "grad_norm": 1.1937334537506104, "learning_rate": 9.559736842105263e-05, "loss": 0.338, "step": 36745 }, { "epoch": 2.0576772314928884, "grad_norm": 1.1087217330932617, "learning_rate": 9.55971052631579e-05, "loss": 0.3856, "step": 36746 }, { "epoch": 2.0577332288050174, "grad_norm": 1.173322319984436, "learning_rate": 9.559684210526316e-05, "loss": 0.3706, "step": 36747 }, { "epoch": 2.0577892261171464, "grad_norm": 1.1126890182495117, "learning_rate": 9.559657894736842e-05, "loss": 0.3902, "step": 36748 }, { "epoch": 2.0578452234292754, "grad_norm": 1.0003615617752075, "learning_rate": 9.559631578947368e-05, "loss": 0.3936, "step": 36749 }, { "epoch": 2.0579012207414045, "grad_norm": 1.1534682512283325, "learning_rate": 9.559605263157894e-05, "loss": 0.404, "step": 36750 }, { "epoch": 2.0579572180535335, "grad_norm": 1.2277191877365112, "learning_rate": 9.559578947368422e-05, "loss": 0.3616, "step": 36751 }, { "epoch": 2.0580132153656625, "grad_norm": 1.795446753501892, "learning_rate": 9.559552631578948e-05, "loss": 0.4194, "step": 36752 }, { "epoch": 2.0580692126777915, "grad_norm": 1.590470790863037, "learning_rate": 9.559526315789475e-05, "loss": 0.583, "step": 36753 }, { "epoch": 2.0581252099899205, "grad_norm": 1.2584571838378906, "learning_rate": 9.5595e-05, "loss": 0.413, "step": 36754 }, { "epoch": 2.0581812073020496, "grad_norm": 1.623884677886963, "learning_rate": 9.559473684210527e-05, "loss": 0.5619, "step": 36755 }, { "epoch": 2.0582372046141786, "grad_norm": 1.3221566677093506, "learning_rate": 9.559447368421053e-05, "loss": 0.3693, "step": 36756 }, { "epoch": 2.0582932019263076, "grad_norm": 1.0448098182678223, "learning_rate": 9.55942105263158e-05, "loss": 0.5021, "step": 36757 }, { "epoch": 2.0583491992384366, "grad_norm": 1.2021024227142334, "learning_rate": 9.559394736842106e-05, "loss": 0.4689, "step": 36758 }, { "epoch": 2.0584051965505656, "grad_norm": 1.207700490951538, "learning_rate": 9.559368421052632e-05, "loss": 0.4446, "step": 36759 }, { "epoch": 2.0584611938626947, "grad_norm": 1.25325345993042, "learning_rate": 9.559342105263158e-05, "loss": 0.4801, "step": 36760 }, { "epoch": 2.0585171911748237, "grad_norm": 1.231303334236145, "learning_rate": 9.559315789473685e-05, "loss": 0.3779, "step": 36761 }, { "epoch": 2.0585731884869527, "grad_norm": 1.3286206722259521, "learning_rate": 9.559289473684211e-05, "loss": 0.7109, "step": 36762 }, { "epoch": 2.0586291857990817, "grad_norm": 1.3455702066421509, "learning_rate": 9.559263157894737e-05, "loss": 0.4081, "step": 36763 }, { "epoch": 2.0586851831112107, "grad_norm": 1.2441279888153076, "learning_rate": 9.559236842105263e-05, "loss": 0.4919, "step": 36764 }, { "epoch": 2.0587411804233398, "grad_norm": 1.1973718404769897, "learning_rate": 9.559210526315789e-05, "loss": 0.5086, "step": 36765 }, { "epoch": 2.0587971777354688, "grad_norm": 1.0812766551971436, "learning_rate": 9.559184210526317e-05, "loss": 0.2827, "step": 36766 }, { "epoch": 2.058853175047598, "grad_norm": 1.2745712995529175, "learning_rate": 9.559157894736843e-05, "loss": 0.5086, "step": 36767 }, { "epoch": 2.058909172359727, "grad_norm": 1.1130268573760986, "learning_rate": 9.559131578947369e-05, "loss": 0.4475, "step": 36768 }, { "epoch": 2.058965169671856, "grad_norm": 1.0718028545379639, "learning_rate": 9.559105263157895e-05, "loss": 0.3888, "step": 36769 }, { "epoch": 2.059021166983985, "grad_norm": 1.0981756448745728, "learning_rate": 9.559078947368422e-05, "loss": 0.4471, "step": 36770 }, { "epoch": 2.059077164296114, "grad_norm": 1.0213072299957275, "learning_rate": 9.559052631578948e-05, "loss": 0.3512, "step": 36771 }, { "epoch": 2.059133161608243, "grad_norm": 1.099674105644226, "learning_rate": 9.559026315789474e-05, "loss": 0.3833, "step": 36772 }, { "epoch": 2.059189158920372, "grad_norm": 1.295213222503662, "learning_rate": 9.559e-05, "loss": 0.3492, "step": 36773 }, { "epoch": 2.059245156232501, "grad_norm": 1.330477237701416, "learning_rate": 9.558973684210527e-05, "loss": 0.5201, "step": 36774 }, { "epoch": 2.05930115354463, "grad_norm": 1.1716090440750122, "learning_rate": 9.558947368421053e-05, "loss": 0.367, "step": 36775 }, { "epoch": 2.059357150856759, "grad_norm": 1.090935468673706, "learning_rate": 9.55892105263158e-05, "loss": 0.312, "step": 36776 }, { "epoch": 2.059413148168888, "grad_norm": 1.1700232028961182, "learning_rate": 9.558894736842105e-05, "loss": 0.3055, "step": 36777 }, { "epoch": 2.059469145481017, "grad_norm": 1.268692135810852, "learning_rate": 9.558868421052632e-05, "loss": 0.3626, "step": 36778 }, { "epoch": 2.059525142793146, "grad_norm": 0.9433198571205139, "learning_rate": 9.558842105263158e-05, "loss": 0.3012, "step": 36779 }, { "epoch": 2.059581140105275, "grad_norm": 1.2524492740631104, "learning_rate": 9.558815789473686e-05, "loss": 0.429, "step": 36780 }, { "epoch": 2.059637137417404, "grad_norm": 1.4334501028060913, "learning_rate": 9.558789473684212e-05, "loss": 0.4208, "step": 36781 }, { "epoch": 2.059693134729533, "grad_norm": 1.0466660261154175, "learning_rate": 9.558763157894736e-05, "loss": 0.3498, "step": 36782 }, { "epoch": 2.059749132041662, "grad_norm": 1.1548635959625244, "learning_rate": 9.558736842105264e-05, "loss": 0.3169, "step": 36783 }, { "epoch": 2.059805129353791, "grad_norm": 1.2209688425064087, "learning_rate": 9.55871052631579e-05, "loss": 0.4727, "step": 36784 }, { "epoch": 2.05986112666592, "grad_norm": 1.0632095336914062, "learning_rate": 9.558684210526317e-05, "loss": 0.3624, "step": 36785 }, { "epoch": 2.059917123978049, "grad_norm": 1.1065341234207153, "learning_rate": 9.558657894736842e-05, "loss": 0.3032, "step": 36786 }, { "epoch": 2.059973121290178, "grad_norm": 1.1085914373397827, "learning_rate": 9.558631578947369e-05, "loss": 0.3335, "step": 36787 }, { "epoch": 2.060029118602307, "grad_norm": 1.2254257202148438, "learning_rate": 9.558605263157895e-05, "loss": 0.4998, "step": 36788 }, { "epoch": 2.060085115914436, "grad_norm": 1.5903806686401367, "learning_rate": 9.558578947368422e-05, "loss": 0.3586, "step": 36789 }, { "epoch": 2.0601411132265652, "grad_norm": 1.1134637594223022, "learning_rate": 9.558552631578948e-05, "loss": 0.3995, "step": 36790 }, { "epoch": 2.0601971105386943, "grad_norm": 1.1553540229797363, "learning_rate": 9.558526315789474e-05, "loss": 0.44, "step": 36791 }, { "epoch": 2.0602531078508233, "grad_norm": 1.0799517631530762, "learning_rate": 9.5585e-05, "loss": 0.3405, "step": 36792 }, { "epoch": 2.0603091051629523, "grad_norm": 1.3857663869857788, "learning_rate": 9.558473684210527e-05, "loss": 0.475, "step": 36793 }, { "epoch": 2.0603651024750813, "grad_norm": 1.163667917251587, "learning_rate": 9.558447368421053e-05, "loss": 0.4205, "step": 36794 }, { "epoch": 2.0604210997872103, "grad_norm": 1.2941070795059204, "learning_rate": 9.558421052631579e-05, "loss": 0.4197, "step": 36795 }, { "epoch": 2.0604770970993393, "grad_norm": 1.1041325330734253, "learning_rate": 9.558394736842105e-05, "loss": 0.5431, "step": 36796 }, { "epoch": 2.0605330944114684, "grad_norm": 1.222534418106079, "learning_rate": 9.558368421052633e-05, "loss": 0.424, "step": 36797 }, { "epoch": 2.0605890917235974, "grad_norm": 1.4718118906021118, "learning_rate": 9.558342105263159e-05, "loss": 0.4878, "step": 36798 }, { "epoch": 2.0606450890357264, "grad_norm": 1.2191308736801147, "learning_rate": 9.558315789473685e-05, "loss": 0.3631, "step": 36799 }, { "epoch": 2.0607010863478554, "grad_norm": 1.1393415927886963, "learning_rate": 9.55828947368421e-05, "loss": 0.3554, "step": 36800 }, { "epoch": 2.0607570836599844, "grad_norm": 1.2141400575637817, "learning_rate": 9.558263157894737e-05, "loss": 0.4108, "step": 36801 }, { "epoch": 2.0608130809721135, "grad_norm": 1.2095710039138794, "learning_rate": 9.558236842105264e-05, "loss": 0.4936, "step": 36802 }, { "epoch": 2.0608690782842425, "grad_norm": 1.177917718887329, "learning_rate": 9.55821052631579e-05, "loss": 0.4903, "step": 36803 }, { "epoch": 2.0609250755963715, "grad_norm": 1.1616653203964233, "learning_rate": 9.558184210526316e-05, "loss": 0.3547, "step": 36804 }, { "epoch": 2.0609810729085005, "grad_norm": 1.171865463256836, "learning_rate": 9.558157894736842e-05, "loss": 0.3489, "step": 36805 }, { "epoch": 2.0610370702206295, "grad_norm": 1.3349839448928833, "learning_rate": 9.558131578947369e-05, "loss": 0.3907, "step": 36806 }, { "epoch": 2.0610930675327586, "grad_norm": 1.1776931285858154, "learning_rate": 9.558105263157895e-05, "loss": 0.3885, "step": 36807 }, { "epoch": 2.0611490648448876, "grad_norm": 1.3368803262710571, "learning_rate": 9.558078947368422e-05, "loss": 0.565, "step": 36808 }, { "epoch": 2.0612050621570166, "grad_norm": 1.3812310695648193, "learning_rate": 9.558052631578947e-05, "loss": 0.4339, "step": 36809 }, { "epoch": 2.0612610594691456, "grad_norm": 1.1804356575012207, "learning_rate": 9.558026315789474e-05, "loss": 0.3922, "step": 36810 }, { "epoch": 2.0613170567812746, "grad_norm": 1.4295260906219482, "learning_rate": 9.558e-05, "loss": 0.3222, "step": 36811 }, { "epoch": 2.0613730540934037, "grad_norm": 1.125641107559204, "learning_rate": 9.557973684210528e-05, "loss": 0.3687, "step": 36812 }, { "epoch": 2.0614290514055327, "grad_norm": 1.2327799797058105, "learning_rate": 9.557947368421054e-05, "loss": 0.3693, "step": 36813 }, { "epoch": 2.0614850487176617, "grad_norm": 1.180492639541626, "learning_rate": 9.55792105263158e-05, "loss": 0.4699, "step": 36814 }, { "epoch": 2.0615410460297907, "grad_norm": 1.3446556329727173, "learning_rate": 9.557894736842106e-05, "loss": 0.4611, "step": 36815 }, { "epoch": 2.0615970433419197, "grad_norm": 1.1068192720413208, "learning_rate": 9.557868421052633e-05, "loss": 0.335, "step": 36816 }, { "epoch": 2.0616530406540488, "grad_norm": 1.1895300149917603, "learning_rate": 9.557842105263159e-05, "loss": 0.3293, "step": 36817 }, { "epoch": 2.0617090379661778, "grad_norm": 1.337397813796997, "learning_rate": 9.557815789473685e-05, "loss": 0.4127, "step": 36818 }, { "epoch": 2.061765035278307, "grad_norm": 1.1548151969909668, "learning_rate": 9.557789473684211e-05, "loss": 0.3771, "step": 36819 }, { "epoch": 2.061821032590436, "grad_norm": 1.3712074756622314, "learning_rate": 9.557763157894737e-05, "loss": 0.47, "step": 36820 }, { "epoch": 2.061877029902565, "grad_norm": 1.128764033317566, "learning_rate": 9.557736842105264e-05, "loss": 0.4657, "step": 36821 }, { "epoch": 2.061933027214694, "grad_norm": 1.2151983976364136, "learning_rate": 9.55771052631579e-05, "loss": 0.5197, "step": 36822 }, { "epoch": 2.061989024526823, "grad_norm": 1.4019395112991333, "learning_rate": 9.557684210526316e-05, "loss": 0.4865, "step": 36823 }, { "epoch": 2.062045021838952, "grad_norm": 1.0516685247421265, "learning_rate": 9.557657894736842e-05, "loss": 0.3587, "step": 36824 }, { "epoch": 2.062101019151081, "grad_norm": 1.0749855041503906, "learning_rate": 9.55763157894737e-05, "loss": 0.4393, "step": 36825 }, { "epoch": 2.06215701646321, "grad_norm": 1.1283698081970215, "learning_rate": 9.557605263157895e-05, "loss": 0.3505, "step": 36826 }, { "epoch": 2.062213013775339, "grad_norm": 1.0680946111679077, "learning_rate": 9.557578947368421e-05, "loss": 0.3366, "step": 36827 }, { "epoch": 2.062269011087468, "grad_norm": 1.1984539031982422, "learning_rate": 9.557552631578947e-05, "loss": 0.3697, "step": 36828 }, { "epoch": 2.062325008399597, "grad_norm": 0.9972808957099915, "learning_rate": 9.557526315789475e-05, "loss": 0.3212, "step": 36829 }, { "epoch": 2.062381005711726, "grad_norm": 1.148760437965393, "learning_rate": 9.5575e-05, "loss": 0.4308, "step": 36830 }, { "epoch": 2.062437003023855, "grad_norm": 1.0589383840560913, "learning_rate": 9.557473684210528e-05, "loss": 0.3277, "step": 36831 }, { "epoch": 2.062493000335984, "grad_norm": 1.4211186170578003, "learning_rate": 9.557447368421053e-05, "loss": 0.4505, "step": 36832 }, { "epoch": 2.062548997648113, "grad_norm": 1.2813228368759155, "learning_rate": 9.55742105263158e-05, "loss": 0.528, "step": 36833 }, { "epoch": 2.062604994960242, "grad_norm": 1.42642343044281, "learning_rate": 9.557394736842106e-05, "loss": 0.3632, "step": 36834 }, { "epoch": 2.062660992272371, "grad_norm": 1.537554144859314, "learning_rate": 9.557368421052632e-05, "loss": 0.4565, "step": 36835 }, { "epoch": 2.0627169895845, "grad_norm": 1.445787787437439, "learning_rate": 9.557342105263158e-05, "loss": 0.4884, "step": 36836 }, { "epoch": 2.062772986896629, "grad_norm": 1.2845577001571655, "learning_rate": 9.557315789473684e-05, "loss": 0.3517, "step": 36837 }, { "epoch": 2.062828984208758, "grad_norm": 1.3901147842407227, "learning_rate": 9.557289473684211e-05, "loss": 0.4411, "step": 36838 }, { "epoch": 2.062884981520887, "grad_norm": 1.1573408842086792, "learning_rate": 9.557263157894737e-05, "loss": 0.3688, "step": 36839 }, { "epoch": 2.062940978833016, "grad_norm": 1.2008999586105347, "learning_rate": 9.557236842105264e-05, "loss": 0.3334, "step": 36840 }, { "epoch": 2.062996976145145, "grad_norm": 1.3475545644760132, "learning_rate": 9.557210526315789e-05, "loss": 0.3896, "step": 36841 }, { "epoch": 2.0630529734572742, "grad_norm": 0.9694061279296875, "learning_rate": 9.557184210526316e-05, "loss": 0.3302, "step": 36842 }, { "epoch": 2.0631089707694032, "grad_norm": 1.5760122537612915, "learning_rate": 9.557157894736842e-05, "loss": 0.3356, "step": 36843 }, { "epoch": 2.0631649680815323, "grad_norm": 1.191697359085083, "learning_rate": 9.55713157894737e-05, "loss": 0.3838, "step": 36844 }, { "epoch": 2.0632209653936613, "grad_norm": 1.1822952032089233, "learning_rate": 9.557105263157896e-05, "loss": 0.3592, "step": 36845 }, { "epoch": 2.0632769627057903, "grad_norm": 1.1636078357696533, "learning_rate": 9.557078947368422e-05, "loss": 0.396, "step": 36846 }, { "epoch": 2.0633329600179193, "grad_norm": 1.4038398265838623, "learning_rate": 9.557052631578948e-05, "loss": 0.459, "step": 36847 }, { "epoch": 2.0633889573300483, "grad_norm": 1.333531141281128, "learning_rate": 9.557026315789475e-05, "loss": 0.4123, "step": 36848 }, { "epoch": 2.0634449546421774, "grad_norm": 1.1316839456558228, "learning_rate": 9.557000000000001e-05, "loss": 0.3526, "step": 36849 }, { "epoch": 2.0635009519543064, "grad_norm": 1.2709190845489502, "learning_rate": 9.556973684210527e-05, "loss": 0.4048, "step": 36850 }, { "epoch": 2.0635569492664354, "grad_norm": 1.1169583797454834, "learning_rate": 9.556947368421053e-05, "loss": 0.4244, "step": 36851 }, { "epoch": 2.0636129465785644, "grad_norm": 1.2875707149505615, "learning_rate": 9.556921052631579e-05, "loss": 0.4093, "step": 36852 }, { "epoch": 2.0636689438906934, "grad_norm": 1.0595837831497192, "learning_rate": 9.556894736842106e-05, "loss": 0.3113, "step": 36853 }, { "epoch": 2.0637249412028225, "grad_norm": 1.3138606548309326, "learning_rate": 9.556868421052632e-05, "loss": 0.5959, "step": 36854 }, { "epoch": 2.0637809385149515, "grad_norm": 1.1631929874420166, "learning_rate": 9.556842105263158e-05, "loss": 0.4212, "step": 36855 }, { "epoch": 2.0638369358270805, "grad_norm": 1.0259475708007812, "learning_rate": 9.556815789473684e-05, "loss": 0.3314, "step": 36856 }, { "epoch": 2.0638929331392095, "grad_norm": 1.2289806604385376, "learning_rate": 9.556789473684211e-05, "loss": 0.4292, "step": 36857 }, { "epoch": 2.0639489304513385, "grad_norm": 1.1908369064331055, "learning_rate": 9.556763157894737e-05, "loss": 0.3178, "step": 36858 }, { "epoch": 2.0640049277634676, "grad_norm": 1.1122509241104126, "learning_rate": 9.556736842105263e-05, "loss": 0.3663, "step": 36859 }, { "epoch": 2.0640609250755966, "grad_norm": 1.2240458726882935, "learning_rate": 9.556710526315789e-05, "loss": 0.4061, "step": 36860 }, { "epoch": 2.0641169223877256, "grad_norm": 1.0180732011795044, "learning_rate": 9.556684210526317e-05, "loss": 0.4708, "step": 36861 }, { "epoch": 2.0641729196998546, "grad_norm": 1.2493585348129272, "learning_rate": 9.556657894736843e-05, "loss": 0.4976, "step": 36862 }, { "epoch": 2.0642289170119836, "grad_norm": 1.1532797813415527, "learning_rate": 9.55663157894737e-05, "loss": 0.4071, "step": 36863 }, { "epoch": 2.0642849143241127, "grad_norm": 1.1726077795028687, "learning_rate": 9.556605263157894e-05, "loss": 0.4969, "step": 36864 }, { "epoch": 2.0643409116362417, "grad_norm": 1.117709755897522, "learning_rate": 9.556578947368422e-05, "loss": 0.4236, "step": 36865 }, { "epoch": 2.0643969089483707, "grad_norm": 1.331722378730774, "learning_rate": 9.556552631578948e-05, "loss": 0.4288, "step": 36866 }, { "epoch": 2.0644529062604997, "grad_norm": 1.3825278282165527, "learning_rate": 9.556526315789475e-05, "loss": 0.4228, "step": 36867 }, { "epoch": 2.0645089035726287, "grad_norm": 1.1908832788467407, "learning_rate": 9.556500000000001e-05, "loss": 0.3601, "step": 36868 }, { "epoch": 2.0645649008847577, "grad_norm": 1.097708821296692, "learning_rate": 9.556473684210526e-05, "loss": 0.4395, "step": 36869 }, { "epoch": 2.0646208981968863, "grad_norm": 1.173099398612976, "learning_rate": 9.556447368421053e-05, "loss": 0.4075, "step": 36870 }, { "epoch": 2.064676895509016, "grad_norm": 1.0582363605499268, "learning_rate": 9.556421052631579e-05, "loss": 0.4063, "step": 36871 }, { "epoch": 2.0647328928211444, "grad_norm": 1.4180046319961548, "learning_rate": 9.556394736842106e-05, "loss": 0.3628, "step": 36872 }, { "epoch": 2.064788890133274, "grad_norm": 1.1703236103057861, "learning_rate": 9.556368421052632e-05, "loss": 0.4244, "step": 36873 }, { "epoch": 2.0648448874454024, "grad_norm": 1.1510074138641357, "learning_rate": 9.556342105263158e-05, "loss": 0.469, "step": 36874 }, { "epoch": 2.064900884757532, "grad_norm": 1.5087875127792358, "learning_rate": 9.556315789473684e-05, "loss": 0.4926, "step": 36875 }, { "epoch": 2.0649568820696604, "grad_norm": 1.0679322481155396, "learning_rate": 9.556289473684212e-05, "loss": 0.4444, "step": 36876 }, { "epoch": 2.06501287938179, "grad_norm": 1.0772632360458374, "learning_rate": 9.556263157894738e-05, "loss": 0.2651, "step": 36877 }, { "epoch": 2.0650688766939185, "grad_norm": 1.3106554746627808, "learning_rate": 9.556236842105264e-05, "loss": 0.4676, "step": 36878 }, { "epoch": 2.0651248740060475, "grad_norm": 1.3430856466293335, "learning_rate": 9.55621052631579e-05, "loss": 0.5481, "step": 36879 }, { "epoch": 2.0651808713181765, "grad_norm": 1.429925560951233, "learning_rate": 9.556184210526317e-05, "loss": 0.5065, "step": 36880 }, { "epoch": 2.0652368686303055, "grad_norm": 1.942503809928894, "learning_rate": 9.556157894736843e-05, "loss": 0.5501, "step": 36881 }, { "epoch": 2.0652928659424346, "grad_norm": 1.3585002422332764, "learning_rate": 9.556131578947369e-05, "loss": 0.3248, "step": 36882 }, { "epoch": 2.0653488632545636, "grad_norm": 1.0788979530334473, "learning_rate": 9.556105263157895e-05, "loss": 0.3938, "step": 36883 }, { "epoch": 2.0654048605666926, "grad_norm": 1.2493810653686523, "learning_rate": 9.556078947368422e-05, "loss": 0.4479, "step": 36884 }, { "epoch": 2.0654608578788216, "grad_norm": 1.247441053390503, "learning_rate": 9.556052631578948e-05, "loss": 0.3806, "step": 36885 }, { "epoch": 2.0655168551909506, "grad_norm": 1.31350576877594, "learning_rate": 9.556026315789474e-05, "loss": 0.4712, "step": 36886 }, { "epoch": 2.0655728525030796, "grad_norm": 1.229966640472412, "learning_rate": 9.556e-05, "loss": 0.3978, "step": 36887 }, { "epoch": 2.0656288498152087, "grad_norm": 1.129414439201355, "learning_rate": 9.555973684210526e-05, "loss": 0.4401, "step": 36888 }, { "epoch": 2.0656848471273377, "grad_norm": 1.2380365133285522, "learning_rate": 9.555947368421053e-05, "loss": 0.5639, "step": 36889 }, { "epoch": 2.0657408444394667, "grad_norm": 1.3425729274749756, "learning_rate": 9.555921052631579e-05, "loss": 0.467, "step": 36890 }, { "epoch": 2.0657968417515957, "grad_norm": 1.3067071437835693, "learning_rate": 9.555894736842105e-05, "loss": 0.5442, "step": 36891 }, { "epoch": 2.0658528390637247, "grad_norm": 0.9647210836410522, "learning_rate": 9.555868421052631e-05, "loss": 0.3612, "step": 36892 }, { "epoch": 2.0659088363758538, "grad_norm": 1.132678508758545, "learning_rate": 9.555842105263159e-05, "loss": 0.459, "step": 36893 }, { "epoch": 2.065964833687983, "grad_norm": 1.348985195159912, "learning_rate": 9.555815789473685e-05, "loss": 0.3985, "step": 36894 }, { "epoch": 2.066020831000112, "grad_norm": 1.496087670326233, "learning_rate": 9.555789473684212e-05, "loss": 0.4345, "step": 36895 }, { "epoch": 2.066076828312241, "grad_norm": 1.0980764627456665, "learning_rate": 9.555763157894736e-05, "loss": 0.3042, "step": 36896 }, { "epoch": 2.06613282562437, "grad_norm": 1.1323881149291992, "learning_rate": 9.555736842105264e-05, "loss": 0.3571, "step": 36897 }, { "epoch": 2.066188822936499, "grad_norm": 1.0366615056991577, "learning_rate": 9.55571052631579e-05, "loss": 0.4156, "step": 36898 }, { "epoch": 2.066244820248628, "grad_norm": 1.3057920932769775, "learning_rate": 9.555684210526317e-05, "loss": 0.3661, "step": 36899 }, { "epoch": 2.066300817560757, "grad_norm": 1.364570140838623, "learning_rate": 9.555657894736843e-05, "loss": 0.4026, "step": 36900 }, { "epoch": 2.066356814872886, "grad_norm": 1.1887586116790771, "learning_rate": 9.555631578947369e-05, "loss": 0.4218, "step": 36901 }, { "epoch": 2.066412812185015, "grad_norm": 1.0178182125091553, "learning_rate": 9.555605263157895e-05, "loss": 0.4016, "step": 36902 }, { "epoch": 2.066468809497144, "grad_norm": 1.231207013130188, "learning_rate": 9.555578947368421e-05, "loss": 0.4729, "step": 36903 }, { "epoch": 2.066524806809273, "grad_norm": 1.0676156282424927, "learning_rate": 9.555552631578948e-05, "loss": 0.3729, "step": 36904 }, { "epoch": 2.066580804121402, "grad_norm": 1.0702403783798218, "learning_rate": 9.555526315789474e-05, "loss": 0.3773, "step": 36905 }, { "epoch": 2.066636801433531, "grad_norm": 1.214680790901184, "learning_rate": 9.5555e-05, "loss": 0.3497, "step": 36906 }, { "epoch": 2.06669279874566, "grad_norm": 1.245222568511963, "learning_rate": 9.555473684210526e-05, "loss": 0.4053, "step": 36907 }, { "epoch": 2.066748796057789, "grad_norm": 0.9513360261917114, "learning_rate": 9.555447368421054e-05, "loss": 0.2561, "step": 36908 }, { "epoch": 2.066804793369918, "grad_norm": 1.1846604347229004, "learning_rate": 9.55542105263158e-05, "loss": 0.3632, "step": 36909 }, { "epoch": 2.066860790682047, "grad_norm": 1.0984668731689453, "learning_rate": 9.555394736842105e-05, "loss": 0.5049, "step": 36910 }, { "epoch": 2.066916787994176, "grad_norm": 1.2024413347244263, "learning_rate": 9.555368421052631e-05, "loss": 0.4477, "step": 36911 }, { "epoch": 2.066972785306305, "grad_norm": 1.6307432651519775, "learning_rate": 9.555342105263159e-05, "loss": 0.5437, "step": 36912 }, { "epoch": 2.067028782618434, "grad_norm": 1.186331868171692, "learning_rate": 9.555315789473685e-05, "loss": 0.3038, "step": 36913 }, { "epoch": 2.067084779930563, "grad_norm": 1.291262149810791, "learning_rate": 9.555289473684211e-05, "loss": 0.3901, "step": 36914 }, { "epoch": 2.067140777242692, "grad_norm": 1.1227818727493286, "learning_rate": 9.555263157894737e-05, "loss": 0.3818, "step": 36915 }, { "epoch": 2.067196774554821, "grad_norm": 1.1458094120025635, "learning_rate": 9.555236842105264e-05, "loss": 0.5112, "step": 36916 }, { "epoch": 2.0672527718669502, "grad_norm": 1.233750343322754, "learning_rate": 9.55521052631579e-05, "loss": 0.347, "step": 36917 }, { "epoch": 2.0673087691790792, "grad_norm": 1.522252082824707, "learning_rate": 9.555184210526317e-05, "loss": 0.459, "step": 36918 }, { "epoch": 2.0673647664912083, "grad_norm": 1.088221549987793, "learning_rate": 9.555157894736842e-05, "loss": 0.3287, "step": 36919 }, { "epoch": 2.0674207638033373, "grad_norm": 1.0218133926391602, "learning_rate": 9.555131578947369e-05, "loss": 0.3374, "step": 36920 }, { "epoch": 2.0674767611154663, "grad_norm": 1.201037049293518, "learning_rate": 9.555105263157895e-05, "loss": 0.4101, "step": 36921 }, { "epoch": 2.0675327584275953, "grad_norm": 1.1600815057754517, "learning_rate": 9.555078947368421e-05, "loss": 0.365, "step": 36922 }, { "epoch": 2.0675887557397243, "grad_norm": 1.1735756397247314, "learning_rate": 9.555052631578949e-05, "loss": 0.4629, "step": 36923 }, { "epoch": 2.0676447530518534, "grad_norm": 1.163258671760559, "learning_rate": 9.555026315789473e-05, "loss": 0.3683, "step": 36924 }, { "epoch": 2.0677007503639824, "grad_norm": 1.4746806621551514, "learning_rate": 9.555e-05, "loss": 0.4052, "step": 36925 }, { "epoch": 2.0677567476761114, "grad_norm": 1.3272709846496582, "learning_rate": 9.554973684210526e-05, "loss": 0.4694, "step": 36926 }, { "epoch": 2.0678127449882404, "grad_norm": 1.1801903247833252, "learning_rate": 9.554947368421054e-05, "loss": 0.3871, "step": 36927 }, { "epoch": 2.0678687423003694, "grad_norm": 1.1899926662445068, "learning_rate": 9.55492105263158e-05, "loss": 0.4908, "step": 36928 }, { "epoch": 2.0679247396124985, "grad_norm": 1.074407935142517, "learning_rate": 9.554894736842106e-05, "loss": 0.3928, "step": 36929 }, { "epoch": 2.0679807369246275, "grad_norm": 1.132913589477539, "learning_rate": 9.554868421052632e-05, "loss": 0.5435, "step": 36930 }, { "epoch": 2.0680367342367565, "grad_norm": 1.0559227466583252, "learning_rate": 9.554842105263159e-05, "loss": 0.3458, "step": 36931 }, { "epoch": 2.0680927315488855, "grad_norm": 1.2586069107055664, "learning_rate": 9.554815789473685e-05, "loss": 0.3804, "step": 36932 }, { "epoch": 2.0681487288610145, "grad_norm": 1.3245611190795898, "learning_rate": 9.554789473684211e-05, "loss": 0.5137, "step": 36933 }, { "epoch": 2.0682047261731435, "grad_norm": 1.4646902084350586, "learning_rate": 9.554763157894737e-05, "loss": 0.4418, "step": 36934 }, { "epoch": 2.0682607234852726, "grad_norm": 1.1135940551757812, "learning_rate": 9.554736842105264e-05, "loss": 0.3913, "step": 36935 }, { "epoch": 2.0683167207974016, "grad_norm": 1.0342915058135986, "learning_rate": 9.55471052631579e-05, "loss": 0.3362, "step": 36936 }, { "epoch": 2.0683727181095306, "grad_norm": 1.3088523149490356, "learning_rate": 9.554684210526316e-05, "loss": 0.4989, "step": 36937 }, { "epoch": 2.0684287154216596, "grad_norm": 1.4924994707107544, "learning_rate": 9.554657894736842e-05, "loss": 0.4135, "step": 36938 }, { "epoch": 2.0684847127337886, "grad_norm": 1.2817860841751099, "learning_rate": 9.554631578947368e-05, "loss": 0.467, "step": 36939 }, { "epoch": 2.0685407100459177, "grad_norm": 1.0914523601531982, "learning_rate": 9.554605263157896e-05, "loss": 0.3482, "step": 36940 }, { "epoch": 2.0685967073580467, "grad_norm": 1.3932172060012817, "learning_rate": 9.554578947368421e-05, "loss": 0.3755, "step": 36941 }, { "epoch": 2.0686527046701757, "grad_norm": 1.1626859903335571, "learning_rate": 9.554552631578947e-05, "loss": 0.356, "step": 36942 }, { "epoch": 2.0687087019823047, "grad_norm": 1.333052158355713, "learning_rate": 9.554526315789473e-05, "loss": 0.4533, "step": 36943 }, { "epoch": 2.0687646992944337, "grad_norm": 1.1898797750473022, "learning_rate": 9.554500000000001e-05, "loss": 0.3425, "step": 36944 }, { "epoch": 2.0688206966065628, "grad_norm": 1.055832862854004, "learning_rate": 9.554473684210527e-05, "loss": 0.3349, "step": 36945 }, { "epoch": 2.068876693918692, "grad_norm": 1.3232004642486572, "learning_rate": 9.554447368421053e-05, "loss": 0.3759, "step": 36946 }, { "epoch": 2.068932691230821, "grad_norm": 1.226897120475769, "learning_rate": 9.554421052631579e-05, "loss": 0.3898, "step": 36947 }, { "epoch": 2.06898868854295, "grad_norm": 1.1153355836868286, "learning_rate": 9.554394736842106e-05, "loss": 0.3346, "step": 36948 }, { "epoch": 2.069044685855079, "grad_norm": 1.2123972177505493, "learning_rate": 9.554368421052632e-05, "loss": 0.4014, "step": 36949 }, { "epoch": 2.069100683167208, "grad_norm": 1.141378402709961, "learning_rate": 9.554342105263159e-05, "loss": 0.4785, "step": 36950 }, { "epoch": 2.069156680479337, "grad_norm": 1.3106846809387207, "learning_rate": 9.554315789473684e-05, "loss": 0.3313, "step": 36951 }, { "epoch": 2.069212677791466, "grad_norm": 1.13906991481781, "learning_rate": 9.554289473684211e-05, "loss": 0.3155, "step": 36952 }, { "epoch": 2.069268675103595, "grad_norm": 1.2419027090072632, "learning_rate": 9.554263157894737e-05, "loss": 0.3622, "step": 36953 }, { "epoch": 2.069324672415724, "grad_norm": 1.2661066055297852, "learning_rate": 9.554236842105265e-05, "loss": 0.4009, "step": 36954 }, { "epoch": 2.069380669727853, "grad_norm": 1.1473249197006226, "learning_rate": 9.55421052631579e-05, "loss": 0.3986, "step": 36955 }, { "epoch": 2.069436667039982, "grad_norm": 1.9443079233169556, "learning_rate": 9.554184210526315e-05, "loss": 0.3873, "step": 36956 }, { "epoch": 2.069492664352111, "grad_norm": 1.073199987411499, "learning_rate": 9.554157894736842e-05, "loss": 0.4132, "step": 36957 }, { "epoch": 2.06954866166424, "grad_norm": 1.2984119653701782, "learning_rate": 9.554131578947368e-05, "loss": 0.362, "step": 36958 }, { "epoch": 2.069604658976369, "grad_norm": 1.2506521940231323, "learning_rate": 9.554105263157896e-05, "loss": 0.6003, "step": 36959 }, { "epoch": 2.069660656288498, "grad_norm": 1.4506969451904297, "learning_rate": 9.554078947368422e-05, "loss": 0.3888, "step": 36960 }, { "epoch": 2.069716653600627, "grad_norm": 1.0450642108917236, "learning_rate": 9.554052631578948e-05, "loss": 0.3111, "step": 36961 }, { "epoch": 2.069772650912756, "grad_norm": 1.128321886062622, "learning_rate": 9.554026315789474e-05, "loss": 0.4417, "step": 36962 }, { "epoch": 2.069828648224885, "grad_norm": 1.2313975095748901, "learning_rate": 9.554000000000001e-05, "loss": 0.4691, "step": 36963 }, { "epoch": 2.069884645537014, "grad_norm": 0.8868594169616699, "learning_rate": 9.553973684210527e-05, "loss": 0.2942, "step": 36964 }, { "epoch": 2.069940642849143, "grad_norm": 1.2188931703567505, "learning_rate": 9.553947368421053e-05, "loss": 0.4984, "step": 36965 }, { "epoch": 2.069996640161272, "grad_norm": 1.3257720470428467, "learning_rate": 9.553921052631579e-05, "loss": 0.442, "step": 36966 }, { "epoch": 2.070052637473401, "grad_norm": 0.9913079738616943, "learning_rate": 9.553894736842106e-05, "loss": 0.3307, "step": 36967 }, { "epoch": 2.07010863478553, "grad_norm": 1.6526544094085693, "learning_rate": 9.553868421052632e-05, "loss": 0.3479, "step": 36968 }, { "epoch": 2.070164632097659, "grad_norm": 1.077796220779419, "learning_rate": 9.553842105263158e-05, "loss": 0.4324, "step": 36969 }, { "epoch": 2.0702206294097882, "grad_norm": 1.4535760879516602, "learning_rate": 9.553815789473684e-05, "loss": 0.4425, "step": 36970 }, { "epoch": 2.0702766267219173, "grad_norm": 1.2667109966278076, "learning_rate": 9.553789473684212e-05, "loss": 0.4282, "step": 36971 }, { "epoch": 2.0703326240340463, "grad_norm": 1.4694347381591797, "learning_rate": 9.553763157894737e-05, "loss": 0.3872, "step": 36972 }, { "epoch": 2.0703886213461753, "grad_norm": 1.3678876161575317, "learning_rate": 9.553736842105263e-05, "loss": 0.395, "step": 36973 }, { "epoch": 2.0704446186583043, "grad_norm": 1.1957201957702637, "learning_rate": 9.55371052631579e-05, "loss": 0.4489, "step": 36974 }, { "epoch": 2.0705006159704333, "grad_norm": 1.1299457550048828, "learning_rate": 9.553684210526315e-05, "loss": 0.3781, "step": 36975 }, { "epoch": 2.0705566132825624, "grad_norm": 1.2196074724197388, "learning_rate": 9.553657894736843e-05, "loss": 0.4459, "step": 36976 }, { "epoch": 2.0706126105946914, "grad_norm": 1.035150408744812, "learning_rate": 9.553631578947369e-05, "loss": 0.4042, "step": 36977 }, { "epoch": 2.0706686079068204, "grad_norm": 1.177608847618103, "learning_rate": 9.553605263157896e-05, "loss": 0.3662, "step": 36978 }, { "epoch": 2.0707246052189494, "grad_norm": 1.466491460800171, "learning_rate": 9.55357894736842e-05, "loss": 0.4265, "step": 36979 }, { "epoch": 2.0707806025310784, "grad_norm": 1.8743799924850464, "learning_rate": 9.553552631578948e-05, "loss": 0.4758, "step": 36980 }, { "epoch": 2.0708365998432074, "grad_norm": 1.1449819803237915, "learning_rate": 9.553526315789474e-05, "loss": 0.3368, "step": 36981 }, { "epoch": 2.0708925971553365, "grad_norm": 1.0853910446166992, "learning_rate": 9.553500000000001e-05, "loss": 0.3319, "step": 36982 }, { "epoch": 2.0709485944674655, "grad_norm": 0.9453714489936829, "learning_rate": 9.553473684210526e-05, "loss": 0.308, "step": 36983 }, { "epoch": 2.0710045917795945, "grad_norm": 1.0927577018737793, "learning_rate": 9.553447368421053e-05, "loss": 0.4955, "step": 36984 }, { "epoch": 2.0710605890917235, "grad_norm": 1.1222269535064697, "learning_rate": 9.553421052631579e-05, "loss": 0.3911, "step": 36985 }, { "epoch": 2.0711165864038525, "grad_norm": 1.1565266847610474, "learning_rate": 9.553394736842107e-05, "loss": 0.4005, "step": 36986 }, { "epoch": 2.0711725837159816, "grad_norm": 1.2788268327713013, "learning_rate": 9.553368421052633e-05, "loss": 0.3073, "step": 36987 }, { "epoch": 2.0712285810281106, "grad_norm": 1.2404124736785889, "learning_rate": 9.553342105263158e-05, "loss": 0.3663, "step": 36988 }, { "epoch": 2.0712845783402396, "grad_norm": 1.246645212173462, "learning_rate": 9.553315789473684e-05, "loss": 0.4515, "step": 36989 }, { "epoch": 2.0713405756523686, "grad_norm": 1.140640139579773, "learning_rate": 9.55328947368421e-05, "loss": 0.3704, "step": 36990 }, { "epoch": 2.0713965729644976, "grad_norm": 1.2302244901657104, "learning_rate": 9.553263157894738e-05, "loss": 0.451, "step": 36991 }, { "epoch": 2.0714525702766267, "grad_norm": 1.0759280920028687, "learning_rate": 9.553236842105264e-05, "loss": 0.4187, "step": 36992 }, { "epoch": 2.0715085675887557, "grad_norm": 0.9919374585151672, "learning_rate": 9.55321052631579e-05, "loss": 0.3183, "step": 36993 }, { "epoch": 2.0715645649008847, "grad_norm": 1.2650642395019531, "learning_rate": 9.553184210526316e-05, "loss": 0.4595, "step": 36994 }, { "epoch": 2.0716205622130137, "grad_norm": 1.0574581623077393, "learning_rate": 9.553157894736843e-05, "loss": 0.3302, "step": 36995 }, { "epoch": 2.0716765595251427, "grad_norm": 1.3501040935516357, "learning_rate": 9.553131578947369e-05, "loss": 0.522, "step": 36996 }, { "epoch": 2.0717325568372718, "grad_norm": 1.5992681980133057, "learning_rate": 9.553105263157895e-05, "loss": 0.3718, "step": 36997 }, { "epoch": 2.0717885541494008, "grad_norm": 1.3846632242202759, "learning_rate": 9.553078947368421e-05, "loss": 0.4699, "step": 36998 }, { "epoch": 2.07184455146153, "grad_norm": 1.1945549249649048, "learning_rate": 9.553052631578948e-05, "loss": 0.3624, "step": 36999 }, { "epoch": 2.071900548773659, "grad_norm": 1.1957957744598389, "learning_rate": 9.553026315789474e-05, "loss": 0.3635, "step": 37000 }, { "epoch": 2.071956546085788, "grad_norm": 1.0470725297927856, "learning_rate": 9.553e-05, "loss": 0.3627, "step": 37001 }, { "epoch": 2.072012543397917, "grad_norm": 1.5645742416381836, "learning_rate": 9.552973684210526e-05, "loss": 0.5567, "step": 37002 }, { "epoch": 2.072068540710046, "grad_norm": 1.2169232368469238, "learning_rate": 9.552947368421053e-05, "loss": 0.5565, "step": 37003 }, { "epoch": 2.072124538022175, "grad_norm": 1.127480387687683, "learning_rate": 9.55292105263158e-05, "loss": 0.3291, "step": 37004 }, { "epoch": 2.072180535334304, "grad_norm": 1.265182375907898, "learning_rate": 9.552894736842107e-05, "loss": 0.4051, "step": 37005 }, { "epoch": 2.072236532646433, "grad_norm": 1.5591259002685547, "learning_rate": 9.552868421052631e-05, "loss": 0.3732, "step": 37006 }, { "epoch": 2.072292529958562, "grad_norm": 1.1799579858779907, "learning_rate": 9.552842105263157e-05, "loss": 0.4394, "step": 37007 }, { "epoch": 2.072348527270691, "grad_norm": 1.1281867027282715, "learning_rate": 9.552815789473685e-05, "loss": 0.3402, "step": 37008 }, { "epoch": 2.07240452458282, "grad_norm": 1.11223304271698, "learning_rate": 9.552789473684211e-05, "loss": 0.3626, "step": 37009 }, { "epoch": 2.072460521894949, "grad_norm": 1.16005277633667, "learning_rate": 9.552763157894738e-05, "loss": 0.3698, "step": 37010 }, { "epoch": 2.072516519207078, "grad_norm": 1.0963557958602905, "learning_rate": 9.552736842105263e-05, "loss": 0.333, "step": 37011 }, { "epoch": 2.072572516519207, "grad_norm": 1.276484489440918, "learning_rate": 9.55271052631579e-05, "loss": 0.4115, "step": 37012 }, { "epoch": 2.072628513831336, "grad_norm": 1.0017668008804321, "learning_rate": 9.552684210526316e-05, "loss": 0.2998, "step": 37013 }, { "epoch": 2.072684511143465, "grad_norm": 1.1222561597824097, "learning_rate": 9.552657894736843e-05, "loss": 0.4406, "step": 37014 }, { "epoch": 2.072740508455594, "grad_norm": 1.30161452293396, "learning_rate": 9.552631578947369e-05, "loss": 0.3866, "step": 37015 }, { "epoch": 2.072796505767723, "grad_norm": 1.3920040130615234, "learning_rate": 9.552605263157895e-05, "loss": 0.4152, "step": 37016 }, { "epoch": 2.072852503079852, "grad_norm": 1.2607569694519043, "learning_rate": 9.552578947368421e-05, "loss": 0.3916, "step": 37017 }, { "epoch": 2.072908500391981, "grad_norm": 1.0879734754562378, "learning_rate": 9.552552631578948e-05, "loss": 0.4235, "step": 37018 }, { "epoch": 2.07296449770411, "grad_norm": 1.5044060945510864, "learning_rate": 9.552526315789474e-05, "loss": 0.3682, "step": 37019 }, { "epoch": 2.073020495016239, "grad_norm": 1.2094651460647583, "learning_rate": 9.5525e-05, "loss": 0.3546, "step": 37020 }, { "epoch": 2.073076492328368, "grad_norm": 1.2888457775115967, "learning_rate": 9.552473684210526e-05, "loss": 0.3719, "step": 37021 }, { "epoch": 2.0731324896404972, "grad_norm": 1.185799479484558, "learning_rate": 9.552447368421054e-05, "loss": 0.6407, "step": 37022 }, { "epoch": 2.0731884869526263, "grad_norm": 1.4544696807861328, "learning_rate": 9.55242105263158e-05, "loss": 0.5527, "step": 37023 }, { "epoch": 2.0732444842647553, "grad_norm": 1.117397665977478, "learning_rate": 9.552394736842106e-05, "loss": 0.3854, "step": 37024 }, { "epoch": 2.0733004815768843, "grad_norm": 1.1623637676239014, "learning_rate": 9.552368421052632e-05, "loss": 0.368, "step": 37025 }, { "epoch": 2.0733564788890133, "grad_norm": 1.2776652574539185, "learning_rate": 9.552342105263158e-05, "loss": 0.3656, "step": 37026 }, { "epoch": 2.0734124762011423, "grad_norm": 1.075451135635376, "learning_rate": 9.552315789473685e-05, "loss": 0.4179, "step": 37027 }, { "epoch": 2.0734684735132713, "grad_norm": 1.8438770771026611, "learning_rate": 9.552289473684211e-05, "loss": 0.442, "step": 37028 }, { "epoch": 2.0735244708254004, "grad_norm": 1.3850990533828735, "learning_rate": 9.552263157894737e-05, "loss": 0.3629, "step": 37029 }, { "epoch": 2.0735804681375294, "grad_norm": 2.116248369216919, "learning_rate": 9.552236842105263e-05, "loss": 0.3262, "step": 37030 }, { "epoch": 2.0736364654496584, "grad_norm": 1.1774441003799438, "learning_rate": 9.55221052631579e-05, "loss": 0.3812, "step": 37031 }, { "epoch": 2.0736924627617874, "grad_norm": 1.1282743215560913, "learning_rate": 9.552184210526316e-05, "loss": 0.449, "step": 37032 }, { "epoch": 2.0737484600739164, "grad_norm": 1.1806365251541138, "learning_rate": 9.552157894736844e-05, "loss": 0.4776, "step": 37033 }, { "epoch": 2.0738044573860455, "grad_norm": 1.3845775127410889, "learning_rate": 9.552131578947368e-05, "loss": 0.39, "step": 37034 }, { "epoch": 2.0738604546981745, "grad_norm": 1.1486878395080566, "learning_rate": 9.552105263157895e-05, "loss": 0.372, "step": 37035 }, { "epoch": 2.0739164520103035, "grad_norm": 1.1615314483642578, "learning_rate": 9.552078947368421e-05, "loss": 0.4529, "step": 37036 }, { "epoch": 2.0739724493224325, "grad_norm": 1.048534631729126, "learning_rate": 9.552052631578949e-05, "loss": 0.3942, "step": 37037 }, { "epoch": 2.0740284466345615, "grad_norm": 1.3325378894805908, "learning_rate": 9.552026315789473e-05, "loss": 0.4338, "step": 37038 }, { "epoch": 2.0740844439466906, "grad_norm": 1.5767453908920288, "learning_rate": 9.552000000000001e-05, "loss": 0.4125, "step": 37039 }, { "epoch": 2.0741404412588196, "grad_norm": 1.3049664497375488, "learning_rate": 9.551973684210527e-05, "loss": 0.4525, "step": 37040 }, { "epoch": 2.0741964385709486, "grad_norm": 1.1605595350265503, "learning_rate": 9.551947368421054e-05, "loss": 0.3554, "step": 37041 }, { "epoch": 2.0742524358830776, "grad_norm": 1.7675814628601074, "learning_rate": 9.55192105263158e-05, "loss": 0.4183, "step": 37042 }, { "epoch": 2.0743084331952066, "grad_norm": 2.703983783721924, "learning_rate": 9.551894736842105e-05, "loss": 0.4311, "step": 37043 }, { "epoch": 2.0743644305073357, "grad_norm": 1.2000855207443237, "learning_rate": 9.551868421052632e-05, "loss": 0.4255, "step": 37044 }, { "epoch": 2.0744204278194647, "grad_norm": 1.2760854959487915, "learning_rate": 9.551842105263158e-05, "loss": 0.5025, "step": 37045 }, { "epoch": 2.0744764251315937, "grad_norm": 1.0725868940353394, "learning_rate": 9.551815789473685e-05, "loss": 0.2992, "step": 37046 }, { "epoch": 2.0745324224437227, "grad_norm": 1.1692692041397095, "learning_rate": 9.551789473684211e-05, "loss": 0.5226, "step": 37047 }, { "epoch": 2.0745884197558517, "grad_norm": 1.4657577276229858, "learning_rate": 9.551763157894737e-05, "loss": 0.3711, "step": 37048 }, { "epoch": 2.0746444170679808, "grad_norm": 2.478576421737671, "learning_rate": 9.551736842105263e-05, "loss": 0.3464, "step": 37049 }, { "epoch": 2.0747004143801098, "grad_norm": 1.212652325630188, "learning_rate": 9.55171052631579e-05, "loss": 0.3771, "step": 37050 }, { "epoch": 2.074756411692239, "grad_norm": 2.1155765056610107, "learning_rate": 9.551684210526316e-05, "loss": 0.4901, "step": 37051 }, { "epoch": 2.074812409004368, "grad_norm": 1.8258616924285889, "learning_rate": 9.551657894736842e-05, "loss": 0.7175, "step": 37052 }, { "epoch": 2.074868406316497, "grad_norm": 1.197744607925415, "learning_rate": 9.551631578947368e-05, "loss": 0.3696, "step": 37053 }, { "epoch": 2.074924403628626, "grad_norm": 1.0415812730789185, "learning_rate": 9.551605263157896e-05, "loss": 0.3591, "step": 37054 }, { "epoch": 2.074980400940755, "grad_norm": 1.1505001783370972, "learning_rate": 9.551578947368422e-05, "loss": 0.3208, "step": 37055 }, { "epoch": 2.075036398252884, "grad_norm": 1.2407912015914917, "learning_rate": 9.551552631578948e-05, "loss": 0.4863, "step": 37056 }, { "epoch": 2.075092395565013, "grad_norm": 1.2999475002288818, "learning_rate": 9.551526315789474e-05, "loss": 0.3376, "step": 37057 }, { "epoch": 2.075148392877142, "grad_norm": 1.7863974571228027, "learning_rate": 9.551500000000001e-05, "loss": 0.3528, "step": 37058 }, { "epoch": 2.075204390189271, "grad_norm": 1.103898525238037, "learning_rate": 9.551473684210527e-05, "loss": 0.4201, "step": 37059 }, { "epoch": 2.0752603875014, "grad_norm": 1.8107049465179443, "learning_rate": 9.551447368421053e-05, "loss": 0.3832, "step": 37060 }, { "epoch": 2.075316384813529, "grad_norm": 1.3128491640090942, "learning_rate": 9.551421052631579e-05, "loss": 0.4369, "step": 37061 }, { "epoch": 2.075372382125658, "grad_norm": 1.1766630411148071, "learning_rate": 9.551394736842105e-05, "loss": 0.3435, "step": 37062 }, { "epoch": 2.075428379437787, "grad_norm": 1.3892816305160522, "learning_rate": 9.551368421052632e-05, "loss": 0.4505, "step": 37063 }, { "epoch": 2.075484376749916, "grad_norm": 1.3339816331863403, "learning_rate": 9.551342105263158e-05, "loss": 0.3003, "step": 37064 }, { "epoch": 2.075540374062045, "grad_norm": 1.2571492195129395, "learning_rate": 9.551315789473685e-05, "loss": 0.3149, "step": 37065 }, { "epoch": 2.075596371374174, "grad_norm": 1.3235915899276733, "learning_rate": 9.55128947368421e-05, "loss": 0.437, "step": 37066 }, { "epoch": 2.075652368686303, "grad_norm": 1.3600482940673828, "learning_rate": 9.551263157894737e-05, "loss": 0.3727, "step": 37067 }, { "epoch": 2.075708365998432, "grad_norm": 1.1924700736999512, "learning_rate": 9.551236842105263e-05, "loss": 0.4372, "step": 37068 }, { "epoch": 2.075764363310561, "grad_norm": 1.0132958889007568, "learning_rate": 9.551210526315791e-05, "loss": 0.3187, "step": 37069 }, { "epoch": 2.07582036062269, "grad_norm": 1.1740894317626953, "learning_rate": 9.551184210526317e-05, "loss": 0.3479, "step": 37070 }, { "epoch": 2.075876357934819, "grad_norm": 1.0848509073257446, "learning_rate": 9.551157894736843e-05, "loss": 0.3972, "step": 37071 }, { "epoch": 2.075932355246948, "grad_norm": 1.0278829336166382, "learning_rate": 9.551131578947369e-05, "loss": 0.3416, "step": 37072 }, { "epoch": 2.075988352559077, "grad_norm": 1.0650432109832764, "learning_rate": 9.551105263157896e-05, "loss": 0.321, "step": 37073 }, { "epoch": 2.0760443498712062, "grad_norm": 1.2951693534851074, "learning_rate": 9.551078947368422e-05, "loss": 0.4047, "step": 37074 }, { "epoch": 2.0761003471833352, "grad_norm": 1.3263237476348877, "learning_rate": 9.551052631578948e-05, "loss": 0.5355, "step": 37075 }, { "epoch": 2.0761563444954643, "grad_norm": 1.0165356397628784, "learning_rate": 9.551026315789474e-05, "loss": 0.35, "step": 37076 }, { "epoch": 2.0762123418075933, "grad_norm": 1.2454731464385986, "learning_rate": 9.551e-05, "loss": 0.3496, "step": 37077 }, { "epoch": 2.0762683391197223, "grad_norm": 1.1699351072311401, "learning_rate": 9.550973684210527e-05, "loss": 0.4812, "step": 37078 }, { "epoch": 2.0763243364318513, "grad_norm": 1.3235853910446167, "learning_rate": 9.550947368421053e-05, "loss": 0.3165, "step": 37079 }, { "epoch": 2.0763803337439803, "grad_norm": 1.433682918548584, "learning_rate": 9.550921052631579e-05, "loss": 0.4355, "step": 37080 }, { "epoch": 2.0764363310561094, "grad_norm": 1.1682240962982178, "learning_rate": 9.550894736842105e-05, "loss": 0.4433, "step": 37081 }, { "epoch": 2.0764923283682384, "grad_norm": 1.4480180740356445, "learning_rate": 9.550868421052632e-05, "loss": 0.268, "step": 37082 }, { "epoch": 2.0765483256803674, "grad_norm": 1.1202014684677124, "learning_rate": 9.550842105263158e-05, "loss": 0.3283, "step": 37083 }, { "epoch": 2.0766043229924964, "grad_norm": 1.251604676246643, "learning_rate": 9.550815789473684e-05, "loss": 0.4132, "step": 37084 }, { "epoch": 2.0766603203046254, "grad_norm": 1.2931939363479614, "learning_rate": 9.55078947368421e-05, "loss": 0.3654, "step": 37085 }, { "epoch": 2.0767163176167545, "grad_norm": 1.807619571685791, "learning_rate": 9.550763157894738e-05, "loss": 0.6579, "step": 37086 }, { "epoch": 2.0767723149288835, "grad_norm": 0.9698479771614075, "learning_rate": 9.550736842105264e-05, "loss": 0.3679, "step": 37087 }, { "epoch": 2.0768283122410125, "grad_norm": 1.1213072538375854, "learning_rate": 9.550710526315791e-05, "loss": 0.4671, "step": 37088 }, { "epoch": 2.0768843095531415, "grad_norm": 1.1348891258239746, "learning_rate": 9.550684210526316e-05, "loss": 0.3589, "step": 37089 }, { "epoch": 2.0769403068652705, "grad_norm": 1.299492359161377, "learning_rate": 9.550657894736843e-05, "loss": 0.4255, "step": 37090 }, { "epoch": 2.0769963041773996, "grad_norm": 1.1695098876953125, "learning_rate": 9.550631578947369e-05, "loss": 0.4946, "step": 37091 }, { "epoch": 2.0770523014895286, "grad_norm": 1.1541613340377808, "learning_rate": 9.550605263157896e-05, "loss": 0.3691, "step": 37092 }, { "epoch": 2.0771082988016576, "grad_norm": 1.1366599798202515, "learning_rate": 9.550578947368421e-05, "loss": 0.3645, "step": 37093 }, { "epoch": 2.0771642961137866, "grad_norm": 1.00677490234375, "learning_rate": 9.550552631578947e-05, "loss": 0.3854, "step": 37094 }, { "epoch": 2.0772202934259156, "grad_norm": 1.0176050662994385, "learning_rate": 9.550526315789474e-05, "loss": 0.3488, "step": 37095 }, { "epoch": 2.0772762907380447, "grad_norm": 1.3522952795028687, "learning_rate": 9.5505e-05, "loss": 0.4183, "step": 37096 }, { "epoch": 2.0773322880501737, "grad_norm": 1.144392490386963, "learning_rate": 9.550473684210527e-05, "loss": 0.4108, "step": 37097 }, { "epoch": 2.0773882853623027, "grad_norm": 1.8258731365203857, "learning_rate": 9.550447368421052e-05, "loss": 0.4787, "step": 37098 }, { "epoch": 2.0774442826744317, "grad_norm": 1.1547350883483887, "learning_rate": 9.55042105263158e-05, "loss": 0.3333, "step": 37099 }, { "epoch": 2.0775002799865607, "grad_norm": 1.4513922929763794, "learning_rate": 9.550394736842105e-05, "loss": 0.5042, "step": 37100 }, { "epoch": 2.0775562772986897, "grad_norm": 1.0561192035675049, "learning_rate": 9.550368421052633e-05, "loss": 0.3526, "step": 37101 }, { "epoch": 2.0776122746108188, "grad_norm": 1.2201570272445679, "learning_rate": 9.550342105263159e-05, "loss": 0.5021, "step": 37102 }, { "epoch": 2.077668271922948, "grad_norm": 1.092822790145874, "learning_rate": 9.550315789473685e-05, "loss": 0.3379, "step": 37103 }, { "epoch": 2.077724269235077, "grad_norm": 1.3406414985656738, "learning_rate": 9.55028947368421e-05, "loss": 0.4214, "step": 37104 }, { "epoch": 2.077780266547206, "grad_norm": 1.097962498664856, "learning_rate": 9.550263157894738e-05, "loss": 0.4013, "step": 37105 }, { "epoch": 2.077836263859335, "grad_norm": 1.019471526145935, "learning_rate": 9.550236842105264e-05, "loss": 0.2978, "step": 37106 }, { "epoch": 2.077892261171464, "grad_norm": 1.233687400817871, "learning_rate": 9.55021052631579e-05, "loss": 0.4046, "step": 37107 }, { "epoch": 2.077948258483593, "grad_norm": 1.3127537965774536, "learning_rate": 9.550184210526316e-05, "loss": 0.3596, "step": 37108 }, { "epoch": 2.078004255795722, "grad_norm": 1.0794765949249268, "learning_rate": 9.550157894736843e-05, "loss": 0.3634, "step": 37109 }, { "epoch": 2.078060253107851, "grad_norm": 1.2233608961105347, "learning_rate": 9.550131578947369e-05, "loss": 0.4409, "step": 37110 }, { "epoch": 2.07811625041998, "grad_norm": 1.390821099281311, "learning_rate": 9.550105263157895e-05, "loss": 0.4429, "step": 37111 }, { "epoch": 2.078172247732109, "grad_norm": 1.3868762254714966, "learning_rate": 9.550078947368421e-05, "loss": 0.4688, "step": 37112 }, { "epoch": 2.078228245044238, "grad_norm": 1.585021734237671, "learning_rate": 9.550052631578947e-05, "loss": 0.4357, "step": 37113 }, { "epoch": 2.078284242356367, "grad_norm": 1.3313313722610474, "learning_rate": 9.550026315789474e-05, "loss": 0.4094, "step": 37114 }, { "epoch": 2.078340239668496, "grad_norm": 1.19365394115448, "learning_rate": 9.55e-05, "loss": 0.4417, "step": 37115 }, { "epoch": 2.078396236980625, "grad_norm": 1.2019215822219849, "learning_rate": 9.549973684210526e-05, "loss": 0.3783, "step": 37116 }, { "epoch": 2.078452234292754, "grad_norm": 1.3380273580551147, "learning_rate": 9.549947368421052e-05, "loss": 0.4253, "step": 37117 }, { "epoch": 2.078508231604883, "grad_norm": 1.0407902002334595, "learning_rate": 9.54992105263158e-05, "loss": 0.3846, "step": 37118 }, { "epoch": 2.078564228917012, "grad_norm": 1.1597486734390259, "learning_rate": 9.549894736842106e-05, "loss": 0.3388, "step": 37119 }, { "epoch": 2.078620226229141, "grad_norm": 1.4331289529800415, "learning_rate": 9.549868421052633e-05, "loss": 0.3774, "step": 37120 }, { "epoch": 2.07867622354127, "grad_norm": 1.128772497177124, "learning_rate": 9.549842105263158e-05, "loss": 0.4266, "step": 37121 }, { "epoch": 2.078732220853399, "grad_norm": 2.545440435409546, "learning_rate": 9.549815789473685e-05, "loss": 0.4797, "step": 37122 }, { "epoch": 2.078788218165528, "grad_norm": 1.110619306564331, "learning_rate": 9.549789473684211e-05, "loss": 0.3519, "step": 37123 }, { "epoch": 2.078844215477657, "grad_norm": 1.1616355180740356, "learning_rate": 9.549763157894738e-05, "loss": 0.3368, "step": 37124 }, { "epoch": 2.078900212789786, "grad_norm": 1.4003827571868896, "learning_rate": 9.549736842105264e-05, "loss": 0.5483, "step": 37125 }, { "epoch": 2.0789562101019152, "grad_norm": 1.0419765710830688, "learning_rate": 9.54971052631579e-05, "loss": 0.3862, "step": 37126 }, { "epoch": 2.0790122074140442, "grad_norm": 1.1480308771133423, "learning_rate": 9.549684210526316e-05, "loss": 0.393, "step": 37127 }, { "epoch": 2.0790682047261733, "grad_norm": 1.9641711711883545, "learning_rate": 9.549657894736842e-05, "loss": 0.5263, "step": 37128 }, { "epoch": 2.0791242020383023, "grad_norm": 1.1566872596740723, "learning_rate": 9.54963157894737e-05, "loss": 0.3837, "step": 37129 }, { "epoch": 2.0791801993504313, "grad_norm": 1.0126267671585083, "learning_rate": 9.549605263157895e-05, "loss": 0.3531, "step": 37130 }, { "epoch": 2.0792361966625603, "grad_norm": 1.4017198085784912, "learning_rate": 9.549578947368421e-05, "loss": 0.4071, "step": 37131 }, { "epoch": 2.0792921939746893, "grad_norm": 1.0598618984222412, "learning_rate": 9.549552631578947e-05, "loss": 0.3409, "step": 37132 }, { "epoch": 2.0793481912868184, "grad_norm": 1.2616815567016602, "learning_rate": 9.549526315789475e-05, "loss": 0.3222, "step": 37133 }, { "epoch": 2.0794041885989474, "grad_norm": 1.5376297235488892, "learning_rate": 9.5495e-05, "loss": 0.3813, "step": 37134 }, { "epoch": 2.0794601859110764, "grad_norm": 1.044512152671814, "learning_rate": 9.549473684210527e-05, "loss": 0.4686, "step": 37135 }, { "epoch": 2.0795161832232054, "grad_norm": 1.1966419219970703, "learning_rate": 9.549447368421053e-05, "loss": 0.3618, "step": 37136 }, { "epoch": 2.0795721805353344, "grad_norm": 1.2195829153060913, "learning_rate": 9.54942105263158e-05, "loss": 0.361, "step": 37137 }, { "epoch": 2.0796281778474635, "grad_norm": 1.0981051921844482, "learning_rate": 9.549394736842106e-05, "loss": 0.3469, "step": 37138 }, { "epoch": 2.0796841751595925, "grad_norm": 1.322159767150879, "learning_rate": 9.549368421052632e-05, "loss": 0.4314, "step": 37139 }, { "epoch": 2.0797401724717215, "grad_norm": 1.1160557270050049, "learning_rate": 9.549342105263158e-05, "loss": 0.3706, "step": 37140 }, { "epoch": 2.0797961697838505, "grad_norm": 0.995877742767334, "learning_rate": 9.549315789473685e-05, "loss": 0.355, "step": 37141 }, { "epoch": 2.0798521670959795, "grad_norm": 1.0276455879211426, "learning_rate": 9.549289473684211e-05, "loss": 0.408, "step": 37142 }, { "epoch": 2.0799081644081086, "grad_norm": 1.3109102249145508, "learning_rate": 9.549263157894738e-05, "loss": 0.5016, "step": 37143 }, { "epoch": 2.0799641617202376, "grad_norm": 1.0983550548553467, "learning_rate": 9.549236842105263e-05, "loss": 0.4917, "step": 37144 }, { "epoch": 2.0800201590323666, "grad_norm": 1.2872225046157837, "learning_rate": 9.549210526315789e-05, "loss": 0.3916, "step": 37145 }, { "epoch": 2.0800761563444956, "grad_norm": 1.0223556756973267, "learning_rate": 9.549184210526316e-05, "loss": 0.3483, "step": 37146 }, { "epoch": 2.0801321536566246, "grad_norm": 1.2347514629364014, "learning_rate": 9.549157894736842e-05, "loss": 0.3918, "step": 37147 }, { "epoch": 2.0801881509687536, "grad_norm": 1.2691410779953003, "learning_rate": 9.549131578947368e-05, "loss": 0.4163, "step": 37148 }, { "epoch": 2.0802441482808827, "grad_norm": 1.1786844730377197, "learning_rate": 9.549105263157894e-05, "loss": 0.3879, "step": 37149 }, { "epoch": 2.0803001455930117, "grad_norm": 1.1877529621124268, "learning_rate": 9.549078947368422e-05, "loss": 0.4794, "step": 37150 }, { "epoch": 2.0803561429051407, "grad_norm": 1.334516167640686, "learning_rate": 9.549052631578948e-05, "loss": 0.4243, "step": 37151 }, { "epoch": 2.0804121402172697, "grad_norm": 1.169430136680603, "learning_rate": 9.549026315789475e-05, "loss": 0.3405, "step": 37152 }, { "epoch": 2.0804681375293987, "grad_norm": 1.3155646324157715, "learning_rate": 9.549e-05, "loss": 0.3505, "step": 37153 }, { "epoch": 2.0805241348415278, "grad_norm": 1.1126888990402222, "learning_rate": 9.548973684210527e-05, "loss": 0.3629, "step": 37154 }, { "epoch": 2.080580132153657, "grad_norm": 1.2429786920547485, "learning_rate": 9.548947368421053e-05, "loss": 0.3864, "step": 37155 }, { "epoch": 2.080636129465786, "grad_norm": 1.2071207761764526, "learning_rate": 9.54892105263158e-05, "loss": 0.4656, "step": 37156 }, { "epoch": 2.080692126777915, "grad_norm": 1.2748379707336426, "learning_rate": 9.548894736842106e-05, "loss": 0.4055, "step": 37157 }, { "epoch": 2.080748124090044, "grad_norm": 1.146135687828064, "learning_rate": 9.548868421052632e-05, "loss": 0.3264, "step": 37158 }, { "epoch": 2.080804121402173, "grad_norm": 1.2361005544662476, "learning_rate": 9.548842105263158e-05, "loss": 0.3694, "step": 37159 }, { "epoch": 2.080860118714302, "grad_norm": 1.1005116701126099, "learning_rate": 9.548815789473685e-05, "loss": 0.3446, "step": 37160 }, { "epoch": 2.080916116026431, "grad_norm": 1.1418181657791138, "learning_rate": 9.548789473684211e-05, "loss": 0.3424, "step": 37161 }, { "epoch": 2.08097211333856, "grad_norm": 1.3484691381454468, "learning_rate": 9.548763157894737e-05, "loss": 0.4909, "step": 37162 }, { "epoch": 2.081028110650689, "grad_norm": 1.1424063444137573, "learning_rate": 9.548736842105263e-05, "loss": 0.4426, "step": 37163 }, { "epoch": 2.081084107962818, "grad_norm": 1.1802825927734375, "learning_rate": 9.548710526315789e-05, "loss": 0.4285, "step": 37164 }, { "epoch": 2.081140105274947, "grad_norm": 1.1364519596099854, "learning_rate": 9.548684210526317e-05, "loss": 0.4595, "step": 37165 }, { "epoch": 2.081196102587076, "grad_norm": 1.2519359588623047, "learning_rate": 9.548657894736843e-05, "loss": 0.3414, "step": 37166 }, { "epoch": 2.081252099899205, "grad_norm": 1.1235076189041138, "learning_rate": 9.548631578947369e-05, "loss": 0.4133, "step": 37167 }, { "epoch": 2.081308097211334, "grad_norm": 1.2762763500213623, "learning_rate": 9.548605263157895e-05, "loss": 0.4115, "step": 37168 }, { "epoch": 2.081364094523463, "grad_norm": 1.2176369428634644, "learning_rate": 9.548578947368422e-05, "loss": 0.4538, "step": 37169 }, { "epoch": 2.081420091835592, "grad_norm": 1.1449079513549805, "learning_rate": 9.548552631578948e-05, "loss": 0.4054, "step": 37170 }, { "epoch": 2.081476089147721, "grad_norm": 1.3267099857330322, "learning_rate": 9.548526315789474e-05, "loss": 0.4364, "step": 37171 }, { "epoch": 2.08153208645985, "grad_norm": 1.049601674079895, "learning_rate": 9.5485e-05, "loss": 0.3338, "step": 37172 }, { "epoch": 2.081588083771979, "grad_norm": 1.4149084091186523, "learning_rate": 9.548473684210527e-05, "loss": 0.4344, "step": 37173 }, { "epoch": 2.081644081084108, "grad_norm": 1.3202974796295166, "learning_rate": 9.548447368421053e-05, "loss": 0.3549, "step": 37174 }, { "epoch": 2.081700078396237, "grad_norm": 1.240950345993042, "learning_rate": 9.54842105263158e-05, "loss": 0.3334, "step": 37175 }, { "epoch": 2.081756075708366, "grad_norm": 1.1495739221572876, "learning_rate": 9.548394736842105e-05, "loss": 0.3302, "step": 37176 }, { "epoch": 2.081812073020495, "grad_norm": 1.6975127458572388, "learning_rate": 9.548368421052632e-05, "loss": 0.4443, "step": 37177 }, { "epoch": 2.081868070332624, "grad_norm": 0.9897533059120178, "learning_rate": 9.548342105263158e-05, "loss": 0.2651, "step": 37178 }, { "epoch": 2.0819240676447532, "grad_norm": 1.1337271928787231, "learning_rate": 9.548315789473686e-05, "loss": 0.3466, "step": 37179 }, { "epoch": 2.0819800649568823, "grad_norm": 1.257933497428894, "learning_rate": 9.548289473684212e-05, "loss": 0.3768, "step": 37180 }, { "epoch": 2.0820360622690113, "grad_norm": 1.4609490633010864, "learning_rate": 9.548263157894736e-05, "loss": 0.4275, "step": 37181 }, { "epoch": 2.0820920595811403, "grad_norm": 1.231135606765747, "learning_rate": 9.548236842105264e-05, "loss": 0.5232, "step": 37182 }, { "epoch": 2.0821480568932693, "grad_norm": 1.0626839399337769, "learning_rate": 9.54821052631579e-05, "loss": 0.4598, "step": 37183 }, { "epoch": 2.0822040542053983, "grad_norm": 1.3281725645065308, "learning_rate": 9.548184210526317e-05, "loss": 0.4482, "step": 37184 }, { "epoch": 2.0822600515175274, "grad_norm": 1.34035062789917, "learning_rate": 9.548157894736842e-05, "loss": 0.4053, "step": 37185 }, { "epoch": 2.0823160488296564, "grad_norm": 1.2800393104553223, "learning_rate": 9.548131578947369e-05, "loss": 0.4128, "step": 37186 }, { "epoch": 2.0823720461417854, "grad_norm": 1.0975431203842163, "learning_rate": 9.548105263157895e-05, "loss": 0.3698, "step": 37187 }, { "epoch": 2.0824280434539144, "grad_norm": 1.3764777183532715, "learning_rate": 9.548078947368422e-05, "loss": 0.3715, "step": 37188 }, { "epoch": 2.0824840407660434, "grad_norm": 1.9758543968200684, "learning_rate": 9.548052631578948e-05, "loss": 0.3885, "step": 37189 }, { "epoch": 2.0825400380781725, "grad_norm": 1.273565411567688, "learning_rate": 9.548026315789474e-05, "loss": 0.4156, "step": 37190 }, { "epoch": 2.0825960353903015, "grad_norm": 1.8587754964828491, "learning_rate": 9.548e-05, "loss": 0.3303, "step": 37191 }, { "epoch": 2.0826520327024305, "grad_norm": 1.2714428901672363, "learning_rate": 9.547973684210527e-05, "loss": 0.3596, "step": 37192 }, { "epoch": 2.0827080300145595, "grad_norm": 1.3159571886062622, "learning_rate": 9.547947368421053e-05, "loss": 0.393, "step": 37193 }, { "epoch": 2.0827640273266885, "grad_norm": 1.1187297105789185, "learning_rate": 9.54792105263158e-05, "loss": 0.3307, "step": 37194 }, { "epoch": 2.0828200246388175, "grad_norm": 1.2388211488723755, "learning_rate": 9.547894736842105e-05, "loss": 0.4144, "step": 37195 }, { "epoch": 2.0828760219509466, "grad_norm": 1.1398829221725464, "learning_rate": 9.547868421052633e-05, "loss": 0.44, "step": 37196 }, { "epoch": 2.0829320192630756, "grad_norm": 1.4079337120056152, "learning_rate": 9.547842105263159e-05, "loss": 0.4342, "step": 37197 }, { "epoch": 2.0829880165752046, "grad_norm": 1.1257526874542236, "learning_rate": 9.547815789473685e-05, "loss": 0.3424, "step": 37198 }, { "epoch": 2.083044013887333, "grad_norm": 1.1795446872711182, "learning_rate": 9.54778947368421e-05, "loss": 0.4446, "step": 37199 }, { "epoch": 2.0831000111994626, "grad_norm": 1.1247566938400269, "learning_rate": 9.547763157894737e-05, "loss": 0.3227, "step": 37200 }, { "epoch": 2.083156008511591, "grad_norm": 1.0809983015060425, "learning_rate": 9.547736842105264e-05, "loss": 0.3475, "step": 37201 }, { "epoch": 2.0832120058237207, "grad_norm": 1.2458832263946533, "learning_rate": 9.54771052631579e-05, "loss": 0.3239, "step": 37202 }, { "epoch": 2.0832680031358493, "grad_norm": 1.3078027963638306, "learning_rate": 9.547684210526316e-05, "loss": 0.4875, "step": 37203 }, { "epoch": 2.0833240004479787, "grad_norm": 1.0511339902877808, "learning_rate": 9.547657894736842e-05, "loss": 0.3621, "step": 37204 }, { "epoch": 2.0833799977601073, "grad_norm": 1.3642507791519165, "learning_rate": 9.547631578947369e-05, "loss": 0.3654, "step": 37205 }, { "epoch": 2.0834359950722368, "grad_norm": 1.306526780128479, "learning_rate": 9.547605263157895e-05, "loss": 0.3201, "step": 37206 }, { "epoch": 2.0834919923843653, "grad_norm": 1.1872780323028564, "learning_rate": 9.547578947368422e-05, "loss": 0.3796, "step": 37207 }, { "epoch": 2.083547989696495, "grad_norm": 1.3992371559143066, "learning_rate": 9.547552631578947e-05, "loss": 0.6462, "step": 37208 }, { "epoch": 2.0836039870086234, "grad_norm": 1.2817180156707764, "learning_rate": 9.547526315789474e-05, "loss": 0.4204, "step": 37209 }, { "epoch": 2.0836599843207524, "grad_norm": 1.1339430809020996, "learning_rate": 9.5475e-05, "loss": 0.3755, "step": 37210 }, { "epoch": 2.0837159816328814, "grad_norm": 1.2353686094284058, "learning_rate": 9.547473684210528e-05, "loss": 0.3471, "step": 37211 }, { "epoch": 2.0837719789450104, "grad_norm": 1.0560917854309082, "learning_rate": 9.547447368421054e-05, "loss": 0.3901, "step": 37212 }, { "epoch": 2.0838279762571394, "grad_norm": 1.170471429824829, "learning_rate": 9.54742105263158e-05, "loss": 0.3709, "step": 37213 }, { "epoch": 2.0838839735692685, "grad_norm": 1.3219298124313354, "learning_rate": 9.547394736842106e-05, "loss": 0.4167, "step": 37214 }, { "epoch": 2.0839399708813975, "grad_norm": 3.4093730449676514, "learning_rate": 9.547368421052632e-05, "loss": 0.4378, "step": 37215 }, { "epoch": 2.0839959681935265, "grad_norm": 1.3299362659454346, "learning_rate": 9.547342105263159e-05, "loss": 0.3874, "step": 37216 }, { "epoch": 2.0840519655056555, "grad_norm": 1.4559849500656128, "learning_rate": 9.547315789473685e-05, "loss": 0.5026, "step": 37217 }, { "epoch": 2.0841079628177845, "grad_norm": 1.4420535564422607, "learning_rate": 9.547289473684211e-05, "loss": 0.4487, "step": 37218 }, { "epoch": 2.0841639601299136, "grad_norm": 1.241303563117981, "learning_rate": 9.547263157894737e-05, "loss": 0.4259, "step": 37219 }, { "epoch": 2.0842199574420426, "grad_norm": 1.065914511680603, "learning_rate": 9.547236842105264e-05, "loss": 0.2619, "step": 37220 }, { "epoch": 2.0842759547541716, "grad_norm": 1.2404170036315918, "learning_rate": 9.54721052631579e-05, "loss": 0.3328, "step": 37221 }, { "epoch": 2.0843319520663006, "grad_norm": 1.225048303604126, "learning_rate": 9.547184210526316e-05, "loss": 0.3751, "step": 37222 }, { "epoch": 2.0843879493784296, "grad_norm": 1.1892751455307007, "learning_rate": 9.547157894736842e-05, "loss": 0.3604, "step": 37223 }, { "epoch": 2.0844439466905587, "grad_norm": 1.1867549419403076, "learning_rate": 9.54713157894737e-05, "loss": 0.3135, "step": 37224 }, { "epoch": 2.0844999440026877, "grad_norm": 1.1025311946868896, "learning_rate": 9.547105263157895e-05, "loss": 0.441, "step": 37225 }, { "epoch": 2.0845559413148167, "grad_norm": 1.0714370012283325, "learning_rate": 9.547078947368421e-05, "loss": 0.3109, "step": 37226 }, { "epoch": 2.0846119386269457, "grad_norm": 1.4138555526733398, "learning_rate": 9.547052631578947e-05, "loss": 0.3932, "step": 37227 }, { "epoch": 2.0846679359390747, "grad_norm": 1.2056083679199219, "learning_rate": 9.547026315789475e-05, "loss": 0.35, "step": 37228 }, { "epoch": 2.0847239332512038, "grad_norm": 2.1281239986419678, "learning_rate": 9.547e-05, "loss": 0.5038, "step": 37229 }, { "epoch": 2.0847799305633328, "grad_norm": 0.9300613403320312, "learning_rate": 9.546973684210528e-05, "loss": 0.2665, "step": 37230 }, { "epoch": 2.084835927875462, "grad_norm": 1.1529935598373413, "learning_rate": 9.546947368421053e-05, "loss": 0.3517, "step": 37231 }, { "epoch": 2.084891925187591, "grad_norm": 1.3257426023483276, "learning_rate": 9.546921052631578e-05, "loss": 0.4568, "step": 37232 }, { "epoch": 2.08494792249972, "grad_norm": Infinity, "learning_rate": 9.546921052631578e-05, "loss": 0.3722, "step": 37233 }, { "epoch": 2.085003919811849, "grad_norm": 1.205477237701416, "learning_rate": 9.546894736842106e-05, "loss": 0.4136, "step": 37234 }, { "epoch": 2.085059917123978, "grad_norm": 1.3416367769241333, "learning_rate": 9.546868421052632e-05, "loss": 0.4135, "step": 37235 }, { "epoch": 2.085115914436107, "grad_norm": 1.068905234336853, "learning_rate": 9.546842105263159e-05, "loss": 0.3123, "step": 37236 }, { "epoch": 2.085171911748236, "grad_norm": 1.1558846235275269, "learning_rate": 9.546815789473684e-05, "loss": 0.3197, "step": 37237 }, { "epoch": 2.085227909060365, "grad_norm": 1.882606863975525, "learning_rate": 9.546789473684211e-05, "loss": 0.4793, "step": 37238 }, { "epoch": 2.085283906372494, "grad_norm": 1.1726343631744385, "learning_rate": 9.546763157894737e-05, "loss": 0.4171, "step": 37239 }, { "epoch": 2.085339903684623, "grad_norm": 3.1281239986419678, "learning_rate": 9.546736842105264e-05, "loss": 0.472, "step": 37240 }, { "epoch": 2.085395900996752, "grad_norm": 1.4461760520935059, "learning_rate": 9.546710526315789e-05, "loss": 0.5411, "step": 37241 }, { "epoch": 2.085451898308881, "grad_norm": 1.129280686378479, "learning_rate": 9.546684210526316e-05, "loss": 0.3339, "step": 37242 }, { "epoch": 2.08550789562101, "grad_norm": 1.1016062498092651, "learning_rate": 9.546657894736842e-05, "loss": 0.4184, "step": 37243 }, { "epoch": 2.085563892933139, "grad_norm": 1.110470175743103, "learning_rate": 9.54663157894737e-05, "loss": 0.512, "step": 37244 }, { "epoch": 2.085619890245268, "grad_norm": 1.012777328491211, "learning_rate": 9.546605263157896e-05, "loss": 0.2953, "step": 37245 }, { "epoch": 2.085675887557397, "grad_norm": 1.058820366859436, "learning_rate": 9.546578947368422e-05, "loss": 0.3438, "step": 37246 }, { "epoch": 2.085731884869526, "grad_norm": 1.2653052806854248, "learning_rate": 9.546552631578948e-05, "loss": 0.3219, "step": 37247 }, { "epoch": 2.085787882181655, "grad_norm": 1.0162802934646606, "learning_rate": 9.546526315789475e-05, "loss": 0.3695, "step": 37248 }, { "epoch": 2.085843879493784, "grad_norm": 1.2537152767181396, "learning_rate": 9.546500000000001e-05, "loss": 0.4265, "step": 37249 }, { "epoch": 2.085899876805913, "grad_norm": 1.3454452753067017, "learning_rate": 9.546473684210527e-05, "loss": 0.383, "step": 37250 }, { "epoch": 2.085955874118042, "grad_norm": 1.2169373035430908, "learning_rate": 9.546447368421053e-05, "loss": 0.3409, "step": 37251 }, { "epoch": 2.086011871430171, "grad_norm": 1.344805359840393, "learning_rate": 9.546421052631579e-05, "loss": 0.4391, "step": 37252 }, { "epoch": 2.0860678687423, "grad_norm": 1.2403202056884766, "learning_rate": 9.546394736842106e-05, "loss": 0.3225, "step": 37253 }, { "epoch": 2.0861238660544292, "grad_norm": 1.2060853242874146, "learning_rate": 9.546368421052632e-05, "loss": 0.4632, "step": 37254 }, { "epoch": 2.0861798633665583, "grad_norm": 1.178431749343872, "learning_rate": 9.546342105263158e-05, "loss": 0.3967, "step": 37255 }, { "epoch": 2.0862358606786873, "grad_norm": 1.0614399909973145, "learning_rate": 9.546315789473684e-05, "loss": 0.3317, "step": 37256 }, { "epoch": 2.0862918579908163, "grad_norm": 1.1768767833709717, "learning_rate": 9.546289473684211e-05, "loss": 0.4176, "step": 37257 }, { "epoch": 2.0863478553029453, "grad_norm": 1.1512436866760254, "learning_rate": 9.546263157894737e-05, "loss": 0.3549, "step": 37258 }, { "epoch": 2.0864038526150743, "grad_norm": 2.1010122299194336, "learning_rate": 9.546236842105263e-05, "loss": 0.496, "step": 37259 }, { "epoch": 2.0864598499272033, "grad_norm": 1.272742748260498, "learning_rate": 9.546210526315789e-05, "loss": 0.3605, "step": 37260 }, { "epoch": 2.0865158472393324, "grad_norm": 1.2747138738632202, "learning_rate": 9.546184210526317e-05, "loss": 0.4403, "step": 37261 }, { "epoch": 2.0865718445514614, "grad_norm": 1.161557912826538, "learning_rate": 9.546157894736843e-05, "loss": 0.378, "step": 37262 }, { "epoch": 2.0866278418635904, "grad_norm": 1.02803635597229, "learning_rate": 9.54613157894737e-05, "loss": 0.3952, "step": 37263 }, { "epoch": 2.0866838391757194, "grad_norm": 1.4038516283035278, "learning_rate": 9.546105263157894e-05, "loss": 0.3973, "step": 37264 }, { "epoch": 2.0867398364878484, "grad_norm": 0.9738485217094421, "learning_rate": 9.546078947368422e-05, "loss": 0.3993, "step": 37265 }, { "epoch": 2.0867958337999775, "grad_norm": 1.2438774108886719, "learning_rate": 9.546052631578948e-05, "loss": 0.3888, "step": 37266 }, { "epoch": 2.0868518311121065, "grad_norm": 1.4068541526794434, "learning_rate": 9.546026315789474e-05, "loss": 0.4481, "step": 37267 }, { "epoch": 2.0869078284242355, "grad_norm": 1.2054994106292725, "learning_rate": 9.546000000000001e-05, "loss": 0.4418, "step": 37268 }, { "epoch": 2.0869638257363645, "grad_norm": 1.1550027132034302, "learning_rate": 9.545973684210526e-05, "loss": 0.3843, "step": 37269 }, { "epoch": 2.0870198230484935, "grad_norm": 1.2914761304855347, "learning_rate": 9.545947368421053e-05, "loss": 0.4585, "step": 37270 }, { "epoch": 2.0870758203606226, "grad_norm": 1.358154535293579, "learning_rate": 9.545921052631579e-05, "loss": 0.4164, "step": 37271 }, { "epoch": 2.0871318176727516, "grad_norm": 1.304789662361145, "learning_rate": 9.545894736842106e-05, "loss": 0.5312, "step": 37272 }, { "epoch": 2.0871878149848806, "grad_norm": 1.2800456285476685, "learning_rate": 9.545868421052632e-05, "loss": 0.4901, "step": 37273 }, { "epoch": 2.0872438122970096, "grad_norm": 1.2947059869766235, "learning_rate": 9.545842105263158e-05, "loss": 0.3777, "step": 37274 }, { "epoch": 2.0872998096091386, "grad_norm": 1.2609716653823853, "learning_rate": 9.545815789473684e-05, "loss": 0.3797, "step": 37275 }, { "epoch": 2.0873558069212677, "grad_norm": 1.1620147228240967, "learning_rate": 9.545789473684212e-05, "loss": 0.4261, "step": 37276 }, { "epoch": 2.0874118042333967, "grad_norm": 1.3531605005264282, "learning_rate": 9.545763157894738e-05, "loss": 0.3702, "step": 37277 }, { "epoch": 2.0874678015455257, "grad_norm": 1.0374218225479126, "learning_rate": 9.545736842105264e-05, "loss": 0.3551, "step": 37278 }, { "epoch": 2.0875237988576547, "grad_norm": 1.289520263671875, "learning_rate": 9.54571052631579e-05, "loss": 0.3349, "step": 37279 }, { "epoch": 2.0875797961697837, "grad_norm": 1.2119457721710205, "learning_rate": 9.545684210526317e-05, "loss": 0.4018, "step": 37280 }, { "epoch": 2.0876357934819127, "grad_norm": 1.100595474243164, "learning_rate": 9.545657894736843e-05, "loss": 0.4861, "step": 37281 }, { "epoch": 2.0876917907940418, "grad_norm": 1.183851957321167, "learning_rate": 9.545631578947369e-05, "loss": 0.5076, "step": 37282 }, { "epoch": 2.087747788106171, "grad_norm": 1.2981088161468506, "learning_rate": 9.545605263157895e-05, "loss": 0.506, "step": 37283 }, { "epoch": 2.0878037854183, "grad_norm": 1.307874321937561, "learning_rate": 9.545578947368422e-05, "loss": 0.4809, "step": 37284 }, { "epoch": 2.087859782730429, "grad_norm": 1.1047242879867554, "learning_rate": 9.545552631578948e-05, "loss": 0.368, "step": 37285 }, { "epoch": 2.087915780042558, "grad_norm": 1.3026601076126099, "learning_rate": 9.545526315789474e-05, "loss": 0.4654, "step": 37286 }, { "epoch": 2.087971777354687, "grad_norm": 0.9914889931678772, "learning_rate": 9.5455e-05, "loss": 0.3641, "step": 37287 }, { "epoch": 2.088027774666816, "grad_norm": 1.3385239839553833, "learning_rate": 9.545473684210526e-05, "loss": 0.3314, "step": 37288 }, { "epoch": 2.088083771978945, "grad_norm": 1.364428997039795, "learning_rate": 9.545447368421053e-05, "loss": 0.6244, "step": 37289 }, { "epoch": 2.088139769291074, "grad_norm": 1.5114381313323975, "learning_rate": 9.545421052631579e-05, "loss": 0.3504, "step": 37290 }, { "epoch": 2.088195766603203, "grad_norm": 0.9901379346847534, "learning_rate": 9.545394736842107e-05, "loss": 0.302, "step": 37291 }, { "epoch": 2.088251763915332, "grad_norm": 1.1798889636993408, "learning_rate": 9.545368421052631e-05, "loss": 0.4512, "step": 37292 }, { "epoch": 2.088307761227461, "grad_norm": 1.1171339750289917, "learning_rate": 9.545342105263159e-05, "loss": 0.4173, "step": 37293 }, { "epoch": 2.08836375853959, "grad_norm": 1.1986720561981201, "learning_rate": 9.545315789473685e-05, "loss": 0.4038, "step": 37294 }, { "epoch": 2.088419755851719, "grad_norm": 1.1060482263565063, "learning_rate": 9.545289473684212e-05, "loss": 0.3582, "step": 37295 }, { "epoch": 2.088475753163848, "grad_norm": 1.1861315965652466, "learning_rate": 9.545263157894736e-05, "loss": 0.4357, "step": 37296 }, { "epoch": 2.088531750475977, "grad_norm": 1.3403488397598267, "learning_rate": 9.545236842105264e-05, "loss": 0.3187, "step": 37297 }, { "epoch": 2.088587747788106, "grad_norm": 1.2128984928131104, "learning_rate": 9.54521052631579e-05, "loss": 0.4774, "step": 37298 }, { "epoch": 2.088643745100235, "grad_norm": 1.0585711002349854, "learning_rate": 9.545184210526317e-05, "loss": 0.3591, "step": 37299 }, { "epoch": 2.088699742412364, "grad_norm": 1.1522780656814575, "learning_rate": 9.545157894736843e-05, "loss": 0.4074, "step": 37300 }, { "epoch": 2.088755739724493, "grad_norm": 1.0165148973464966, "learning_rate": 9.545131578947369e-05, "loss": 0.2194, "step": 37301 }, { "epoch": 2.088811737036622, "grad_norm": 1.1626001596450806, "learning_rate": 9.545105263157895e-05, "loss": 0.3202, "step": 37302 }, { "epoch": 2.088867734348751, "grad_norm": 1.2696199417114258, "learning_rate": 9.545078947368421e-05, "loss": 0.353, "step": 37303 }, { "epoch": 2.08892373166088, "grad_norm": 1.1939014196395874, "learning_rate": 9.545052631578948e-05, "loss": 0.4128, "step": 37304 }, { "epoch": 2.088979728973009, "grad_norm": 1.1139200925827026, "learning_rate": 9.545026315789474e-05, "loss": 0.3994, "step": 37305 }, { "epoch": 2.0890357262851382, "grad_norm": 1.2029638290405273, "learning_rate": 9.545e-05, "loss": 0.6494, "step": 37306 }, { "epoch": 2.0890917235972672, "grad_norm": 1.1315141916275024, "learning_rate": 9.544973684210526e-05, "loss": 0.3814, "step": 37307 }, { "epoch": 2.0891477209093963, "grad_norm": 1.1952275037765503, "learning_rate": 9.544947368421054e-05, "loss": 0.5266, "step": 37308 }, { "epoch": 2.0892037182215253, "grad_norm": 1.12501859664917, "learning_rate": 9.54492105263158e-05, "loss": 0.4222, "step": 37309 }, { "epoch": 2.0892597155336543, "grad_norm": 1.1186892986297607, "learning_rate": 9.544894736842106e-05, "loss": 0.406, "step": 37310 }, { "epoch": 2.0893157128457833, "grad_norm": 1.1692185401916504, "learning_rate": 9.544868421052631e-05, "loss": 0.3359, "step": 37311 }, { "epoch": 2.0893717101579123, "grad_norm": 1.2075005769729614, "learning_rate": 9.544842105263159e-05, "loss": 0.4156, "step": 37312 }, { "epoch": 2.0894277074700414, "grad_norm": 1.2053308486938477, "learning_rate": 9.544815789473685e-05, "loss": 0.3962, "step": 37313 }, { "epoch": 2.0894837047821704, "grad_norm": 1.2531945705413818, "learning_rate": 9.544789473684211e-05, "loss": 0.3937, "step": 37314 }, { "epoch": 2.0895397020942994, "grad_norm": 1.2690480947494507, "learning_rate": 9.544763157894737e-05, "loss": 0.4204, "step": 37315 }, { "epoch": 2.0895956994064284, "grad_norm": 1.3338758945465088, "learning_rate": 9.544736842105264e-05, "loss": 0.4211, "step": 37316 }, { "epoch": 2.0896516967185574, "grad_norm": 1.301088809967041, "learning_rate": 9.54471052631579e-05, "loss": 0.3492, "step": 37317 }, { "epoch": 2.0897076940306865, "grad_norm": 1.335763931274414, "learning_rate": 9.544684210526317e-05, "loss": 0.345, "step": 37318 }, { "epoch": 2.0897636913428155, "grad_norm": 1.5093432664871216, "learning_rate": 9.544657894736842e-05, "loss": 0.3776, "step": 37319 }, { "epoch": 2.0898196886549445, "grad_norm": 1.188510537147522, "learning_rate": 9.544631578947368e-05, "loss": 0.4294, "step": 37320 }, { "epoch": 2.0898756859670735, "grad_norm": 0.995898425579071, "learning_rate": 9.544605263157895e-05, "loss": 0.2933, "step": 37321 }, { "epoch": 2.0899316832792025, "grad_norm": 1.7647291421890259, "learning_rate": 9.544578947368421e-05, "loss": 0.5691, "step": 37322 }, { "epoch": 2.0899876805913316, "grad_norm": 1.2489018440246582, "learning_rate": 9.544552631578949e-05, "loss": 0.4336, "step": 37323 }, { "epoch": 2.0900436779034606, "grad_norm": 1.1707091331481934, "learning_rate": 9.544526315789473e-05, "loss": 0.4447, "step": 37324 }, { "epoch": 2.0900996752155896, "grad_norm": 1.239990234375, "learning_rate": 9.5445e-05, "loss": 0.4153, "step": 37325 }, { "epoch": 2.0901556725277186, "grad_norm": 1.0833947658538818, "learning_rate": 9.544473684210526e-05, "loss": 0.3389, "step": 37326 }, { "epoch": 2.0902116698398476, "grad_norm": 1.1745514869689941, "learning_rate": 9.544447368421054e-05, "loss": 0.4703, "step": 37327 }, { "epoch": 2.0902676671519766, "grad_norm": 1.8172285556793213, "learning_rate": 9.54442105263158e-05, "loss": 0.4035, "step": 37328 }, { "epoch": 2.0903236644641057, "grad_norm": 1.1282519102096558, "learning_rate": 9.544394736842106e-05, "loss": 0.332, "step": 37329 }, { "epoch": 2.0903796617762347, "grad_norm": 1.2324405908584595, "learning_rate": 9.544368421052632e-05, "loss": 0.4185, "step": 37330 }, { "epoch": 2.0904356590883637, "grad_norm": 1.5262449979782104, "learning_rate": 9.544342105263159e-05, "loss": 0.3879, "step": 37331 }, { "epoch": 2.0904916564004927, "grad_norm": 1.236594557762146, "learning_rate": 9.544315789473685e-05, "loss": 0.4264, "step": 37332 }, { "epoch": 2.0905476537126217, "grad_norm": 1.2415767908096313, "learning_rate": 9.544289473684211e-05, "loss": 0.4047, "step": 37333 }, { "epoch": 2.0906036510247508, "grad_norm": 1.2237541675567627, "learning_rate": 9.544263157894737e-05, "loss": 0.4503, "step": 37334 }, { "epoch": 2.09065964833688, "grad_norm": 1.3254679441452026, "learning_rate": 9.544236842105264e-05, "loss": 0.332, "step": 37335 }, { "epoch": 2.090715645649009, "grad_norm": 1.1052825450897217, "learning_rate": 9.54421052631579e-05, "loss": 0.3597, "step": 37336 }, { "epoch": 2.090771642961138, "grad_norm": 1.513141393661499, "learning_rate": 9.544184210526316e-05, "loss": 0.5085, "step": 37337 }, { "epoch": 2.090827640273267, "grad_norm": 1.5670636892318726, "learning_rate": 9.544157894736842e-05, "loss": 0.3812, "step": 37338 }, { "epoch": 2.090883637585396, "grad_norm": 1.4884428977966309, "learning_rate": 9.544131578947368e-05, "loss": 0.3865, "step": 37339 }, { "epoch": 2.090939634897525, "grad_norm": 1.1959185600280762, "learning_rate": 9.544105263157896e-05, "loss": 0.4192, "step": 37340 }, { "epoch": 2.090995632209654, "grad_norm": 1.0133477449417114, "learning_rate": 9.544078947368422e-05, "loss": 0.4587, "step": 37341 }, { "epoch": 2.091051629521783, "grad_norm": 1.1428687572479248, "learning_rate": 9.544052631578947e-05, "loss": 0.4325, "step": 37342 }, { "epoch": 2.091107626833912, "grad_norm": 1.1978991031646729, "learning_rate": 9.544026315789473e-05, "loss": 0.4032, "step": 37343 }, { "epoch": 2.091163624146041, "grad_norm": 1.2987525463104248, "learning_rate": 9.544000000000001e-05, "loss": 0.3846, "step": 37344 }, { "epoch": 2.09121962145817, "grad_norm": 1.6236987113952637, "learning_rate": 9.543973684210527e-05, "loss": 0.4488, "step": 37345 }, { "epoch": 2.091275618770299, "grad_norm": 1.1632187366485596, "learning_rate": 9.543947368421054e-05, "loss": 0.3504, "step": 37346 }, { "epoch": 2.091331616082428, "grad_norm": 1.1244224309921265, "learning_rate": 9.543921052631579e-05, "loss": 0.4028, "step": 37347 }, { "epoch": 2.091387613394557, "grad_norm": 1.3994784355163574, "learning_rate": 9.543894736842106e-05, "loss": 0.4271, "step": 37348 }, { "epoch": 2.091443610706686, "grad_norm": 1.2536182403564453, "learning_rate": 9.543868421052632e-05, "loss": 0.4585, "step": 37349 }, { "epoch": 2.091499608018815, "grad_norm": 4.12711238861084, "learning_rate": 9.54384210526316e-05, "loss": 0.4133, "step": 37350 }, { "epoch": 2.091555605330944, "grad_norm": 1.0105020999908447, "learning_rate": 9.543815789473684e-05, "loss": 0.3117, "step": 37351 }, { "epoch": 2.091611602643073, "grad_norm": 1.1619453430175781, "learning_rate": 9.543789473684211e-05, "loss": 0.3117, "step": 37352 }, { "epoch": 2.091667599955202, "grad_norm": 1.6516146659851074, "learning_rate": 9.543763157894737e-05, "loss": 0.409, "step": 37353 }, { "epoch": 2.091723597267331, "grad_norm": 1.3395391702651978, "learning_rate": 9.543736842105263e-05, "loss": 0.4325, "step": 37354 }, { "epoch": 2.09177959457946, "grad_norm": 1.1015377044677734, "learning_rate": 9.54371052631579e-05, "loss": 0.3454, "step": 37355 }, { "epoch": 2.091835591891589, "grad_norm": 1.5405291318893433, "learning_rate": 9.543684210526315e-05, "loss": 0.5277, "step": 37356 }, { "epoch": 2.091891589203718, "grad_norm": 1.5420892238616943, "learning_rate": 9.543657894736842e-05, "loss": 0.4734, "step": 37357 }, { "epoch": 2.0919475865158472, "grad_norm": 1.1422734260559082, "learning_rate": 9.543631578947368e-05, "loss": 0.4361, "step": 37358 }, { "epoch": 2.0920035838279762, "grad_norm": 1.1816012859344482, "learning_rate": 9.543605263157896e-05, "loss": 0.39, "step": 37359 }, { "epoch": 2.0920595811401053, "grad_norm": 1.4430456161499023, "learning_rate": 9.543578947368422e-05, "loss": 0.4375, "step": 37360 }, { "epoch": 2.0921155784522343, "grad_norm": 1.3723641633987427, "learning_rate": 9.543552631578948e-05, "loss": 0.618, "step": 37361 }, { "epoch": 2.0921715757643633, "grad_norm": 1.2190183401107788, "learning_rate": 9.543526315789474e-05, "loss": 0.4008, "step": 37362 }, { "epoch": 2.0922275730764923, "grad_norm": 1.0896131992340088, "learning_rate": 9.543500000000001e-05, "loss": 0.3852, "step": 37363 }, { "epoch": 2.0922835703886213, "grad_norm": 3.0846312046051025, "learning_rate": 9.543473684210527e-05, "loss": 0.3818, "step": 37364 }, { "epoch": 2.0923395677007504, "grad_norm": 1.1485261917114258, "learning_rate": 9.543447368421053e-05, "loss": 0.4209, "step": 37365 }, { "epoch": 2.0923955650128794, "grad_norm": 1.19721257686615, "learning_rate": 9.543421052631579e-05, "loss": 0.3192, "step": 37366 }, { "epoch": 2.0924515623250084, "grad_norm": 1.2039201259613037, "learning_rate": 9.543394736842106e-05, "loss": 0.369, "step": 37367 }, { "epoch": 2.0925075596371374, "grad_norm": 1.1867198944091797, "learning_rate": 9.543368421052632e-05, "loss": 0.4234, "step": 37368 }, { "epoch": 2.0925635569492664, "grad_norm": 1.2044740915298462, "learning_rate": 9.543342105263158e-05, "loss": 0.3206, "step": 37369 }, { "epoch": 2.0926195542613955, "grad_norm": 2.129072427749634, "learning_rate": 9.543315789473684e-05, "loss": 0.3831, "step": 37370 }, { "epoch": 2.0926755515735245, "grad_norm": 1.0805439949035645, "learning_rate": 9.54328947368421e-05, "loss": 0.3292, "step": 37371 }, { "epoch": 2.0927315488856535, "grad_norm": 1.4952677488327026, "learning_rate": 9.543263157894738e-05, "loss": 0.4247, "step": 37372 }, { "epoch": 2.0927875461977825, "grad_norm": 1.153788447380066, "learning_rate": 9.543236842105263e-05, "loss": 0.3438, "step": 37373 }, { "epoch": 2.0928435435099115, "grad_norm": 1.24036705493927, "learning_rate": 9.54321052631579e-05, "loss": 0.4539, "step": 37374 }, { "epoch": 2.0928995408220405, "grad_norm": 1.5538272857666016, "learning_rate": 9.543184210526315e-05, "loss": 0.6366, "step": 37375 }, { "epoch": 2.0929555381341696, "grad_norm": 1.5553739070892334, "learning_rate": 9.543157894736843e-05, "loss": 0.4747, "step": 37376 }, { "epoch": 2.0930115354462986, "grad_norm": 1.358181357383728, "learning_rate": 9.543131578947369e-05, "loss": 0.5924, "step": 37377 }, { "epoch": 2.0930675327584276, "grad_norm": 1.3236382007598877, "learning_rate": 9.543105263157896e-05, "loss": 0.3966, "step": 37378 }, { "epoch": 2.0931235300705566, "grad_norm": 1.1184443235397339, "learning_rate": 9.54307894736842e-05, "loss": 0.3685, "step": 37379 }, { "epoch": 2.0931795273826856, "grad_norm": 1.0814545154571533, "learning_rate": 9.543052631578948e-05, "loss": 0.3307, "step": 37380 }, { "epoch": 2.0932355246948147, "grad_norm": 1.1240146160125732, "learning_rate": 9.543026315789474e-05, "loss": 0.3621, "step": 37381 }, { "epoch": 2.0932915220069437, "grad_norm": 1.158893346786499, "learning_rate": 9.543000000000001e-05, "loss": 0.3994, "step": 37382 }, { "epoch": 2.0933475193190727, "grad_norm": 1.1639890670776367, "learning_rate": 9.542973684210527e-05, "loss": 0.4934, "step": 37383 }, { "epoch": 2.0934035166312017, "grad_norm": 1.4643397331237793, "learning_rate": 9.542947368421053e-05, "loss": 0.4854, "step": 37384 }, { "epoch": 2.0934595139433307, "grad_norm": 1.1275420188903809, "learning_rate": 9.542921052631579e-05, "loss": 0.3062, "step": 37385 }, { "epoch": 2.0935155112554598, "grad_norm": 1.1237006187438965, "learning_rate": 9.542894736842107e-05, "loss": 0.5781, "step": 37386 }, { "epoch": 2.093571508567589, "grad_norm": 1.3043904304504395, "learning_rate": 9.542868421052633e-05, "loss": 0.493, "step": 37387 }, { "epoch": 2.093627505879718, "grad_norm": 1.6650962829589844, "learning_rate": 9.542842105263157e-05, "loss": 0.5845, "step": 37388 }, { "epoch": 2.093683503191847, "grad_norm": 1.3205933570861816, "learning_rate": 9.542815789473684e-05, "loss": 0.3947, "step": 37389 }, { "epoch": 2.093739500503976, "grad_norm": 1.439923644065857, "learning_rate": 9.54278947368421e-05, "loss": 0.5636, "step": 37390 }, { "epoch": 2.093795497816105, "grad_norm": 1.2145929336547852, "learning_rate": 9.542763157894738e-05, "loss": 0.38, "step": 37391 }, { "epoch": 2.093851495128234, "grad_norm": 1.3981770277023315, "learning_rate": 9.542736842105264e-05, "loss": 0.3531, "step": 37392 }, { "epoch": 2.093907492440363, "grad_norm": 1.2881981134414673, "learning_rate": 9.54271052631579e-05, "loss": 0.4195, "step": 37393 }, { "epoch": 2.093963489752492, "grad_norm": 1.1832435131072998, "learning_rate": 9.542684210526316e-05, "loss": 0.439, "step": 37394 }, { "epoch": 2.094019487064621, "grad_norm": 1.1978384256362915, "learning_rate": 9.542657894736843e-05, "loss": 0.4055, "step": 37395 }, { "epoch": 2.09407548437675, "grad_norm": 1.3463934659957886, "learning_rate": 9.542631578947369e-05, "loss": 0.3306, "step": 37396 }, { "epoch": 2.094131481688879, "grad_norm": 1.0849336385726929, "learning_rate": 9.542605263157895e-05, "loss": 0.3562, "step": 37397 }, { "epoch": 2.094187479001008, "grad_norm": 1.3308988809585571, "learning_rate": 9.542578947368421e-05, "loss": 0.3903, "step": 37398 }, { "epoch": 2.094243476313137, "grad_norm": 1.2387291193008423, "learning_rate": 9.542552631578948e-05, "loss": 0.4327, "step": 37399 }, { "epoch": 2.094299473625266, "grad_norm": 1.2718130350112915, "learning_rate": 9.542526315789474e-05, "loss": 0.5289, "step": 37400 }, { "epoch": 2.094355470937395, "grad_norm": 1.0964674949645996, "learning_rate": 9.542500000000002e-05, "loss": 0.2977, "step": 37401 }, { "epoch": 2.094411468249524, "grad_norm": 1.032365322113037, "learning_rate": 9.542473684210526e-05, "loss": 0.3181, "step": 37402 }, { "epoch": 2.094467465561653, "grad_norm": 1.5739465951919556, "learning_rate": 9.542447368421054e-05, "loss": 0.4535, "step": 37403 }, { "epoch": 2.094523462873782, "grad_norm": 1.0292078256607056, "learning_rate": 9.54242105263158e-05, "loss": 0.4825, "step": 37404 }, { "epoch": 2.094579460185911, "grad_norm": 1.3064666986465454, "learning_rate": 9.542394736842107e-05, "loss": 0.4716, "step": 37405 }, { "epoch": 2.09463545749804, "grad_norm": 1.0690101385116577, "learning_rate": 9.542368421052631e-05, "loss": 0.5079, "step": 37406 }, { "epoch": 2.094691454810169, "grad_norm": 1.1750473976135254, "learning_rate": 9.542342105263157e-05, "loss": 0.4538, "step": 37407 }, { "epoch": 2.094747452122298, "grad_norm": 1.2519437074661255, "learning_rate": 9.542315789473685e-05, "loss": 0.4664, "step": 37408 }, { "epoch": 2.094803449434427, "grad_norm": 1.2530477046966553, "learning_rate": 9.542289473684211e-05, "loss": 0.4613, "step": 37409 }, { "epoch": 2.094859446746556, "grad_norm": 1.133747935295105, "learning_rate": 9.542263157894738e-05, "loss": 0.4216, "step": 37410 }, { "epoch": 2.0949154440586852, "grad_norm": 1.041358232498169, "learning_rate": 9.542236842105263e-05, "loss": 0.3232, "step": 37411 }, { "epoch": 2.0949714413708143, "grad_norm": 1.2870550155639648, "learning_rate": 9.54221052631579e-05, "loss": 0.4394, "step": 37412 }, { "epoch": 2.0950274386829433, "grad_norm": 1.2573827505111694, "learning_rate": 9.542184210526316e-05, "loss": 0.3643, "step": 37413 }, { "epoch": 2.0950834359950723, "grad_norm": 1.0182030200958252, "learning_rate": 9.542157894736843e-05, "loss": 0.413, "step": 37414 }, { "epoch": 2.0951394333072013, "grad_norm": 1.5206785202026367, "learning_rate": 9.542131578947369e-05, "loss": 0.3778, "step": 37415 }, { "epoch": 2.0951954306193303, "grad_norm": 1.2366689443588257, "learning_rate": 9.542105263157895e-05, "loss": 0.3302, "step": 37416 }, { "epoch": 2.0952514279314594, "grad_norm": 1.2743369340896606, "learning_rate": 9.542078947368421e-05, "loss": 0.3246, "step": 37417 }, { "epoch": 2.0953074252435884, "grad_norm": 1.2836215496063232, "learning_rate": 9.542052631578949e-05, "loss": 0.5246, "step": 37418 }, { "epoch": 2.0953634225557174, "grad_norm": 1.1655752658843994, "learning_rate": 9.542026315789474e-05, "loss": 0.407, "step": 37419 }, { "epoch": 2.0954194198678464, "grad_norm": 1.2362353801727295, "learning_rate": 9.542e-05, "loss": 0.3755, "step": 37420 }, { "epoch": 2.0954754171799754, "grad_norm": 1.2940351963043213, "learning_rate": 9.541973684210526e-05, "loss": 0.4432, "step": 37421 }, { "epoch": 2.0955314144921044, "grad_norm": 1.0072232484817505, "learning_rate": 9.541947368421054e-05, "loss": 0.3858, "step": 37422 }, { "epoch": 2.0955874118042335, "grad_norm": 1.354242205619812, "learning_rate": 9.54192105263158e-05, "loss": 0.4157, "step": 37423 }, { "epoch": 2.0956434091163625, "grad_norm": 1.4414793252944946, "learning_rate": 9.541894736842106e-05, "loss": 0.4629, "step": 37424 }, { "epoch": 2.0956994064284915, "grad_norm": 1.1099823713302612, "learning_rate": 9.541868421052632e-05, "loss": 0.3359, "step": 37425 }, { "epoch": 2.0957554037406205, "grad_norm": 1.3464715480804443, "learning_rate": 9.541842105263158e-05, "loss": 0.3824, "step": 37426 }, { "epoch": 2.0958114010527495, "grad_norm": 1.2769676446914673, "learning_rate": 9.541815789473685e-05, "loss": 0.4031, "step": 37427 }, { "epoch": 2.0958673983648786, "grad_norm": 1.2056983709335327, "learning_rate": 9.541789473684211e-05, "loss": 0.3576, "step": 37428 }, { "epoch": 2.0959233956770076, "grad_norm": 1.1970973014831543, "learning_rate": 9.541763157894737e-05, "loss": 0.3541, "step": 37429 }, { "epoch": 2.0959793929891366, "grad_norm": 1.2029637098312378, "learning_rate": 9.541736842105263e-05, "loss": 0.4307, "step": 37430 }, { "epoch": 2.0960353903012656, "grad_norm": 1.3212459087371826, "learning_rate": 9.54171052631579e-05, "loss": 0.3502, "step": 37431 }, { "epoch": 2.0960913876133946, "grad_norm": 1.5098742246627808, "learning_rate": 9.541684210526316e-05, "loss": 0.3903, "step": 37432 }, { "epoch": 2.0961473849255237, "grad_norm": 1.0971882343292236, "learning_rate": 9.541657894736844e-05, "loss": 0.3606, "step": 37433 }, { "epoch": 2.0962033822376527, "grad_norm": 1.0959628820419312, "learning_rate": 9.541631578947368e-05, "loss": 0.3228, "step": 37434 }, { "epoch": 2.0962593795497817, "grad_norm": 1.2527858018875122, "learning_rate": 9.541605263157895e-05, "loss": 0.3596, "step": 37435 }, { "epoch": 2.0963153768619107, "grad_norm": 1.2567248344421387, "learning_rate": 9.541578947368421e-05, "loss": 0.4379, "step": 37436 }, { "epoch": 2.0963713741740397, "grad_norm": 1.4920306205749512, "learning_rate": 9.541552631578949e-05, "loss": 0.5387, "step": 37437 }, { "epoch": 2.0964273714861688, "grad_norm": 1.2100061178207397, "learning_rate": 9.541526315789475e-05, "loss": 0.3701, "step": 37438 }, { "epoch": 2.0964833687982978, "grad_norm": 1.143375039100647, "learning_rate": 9.541500000000001e-05, "loss": 0.4088, "step": 37439 }, { "epoch": 2.096539366110427, "grad_norm": 1.1372005939483643, "learning_rate": 9.541473684210527e-05, "loss": 0.3769, "step": 37440 }, { "epoch": 2.096595363422556, "grad_norm": 1.1026577949523926, "learning_rate": 9.541447368421053e-05, "loss": 0.4592, "step": 37441 }, { "epoch": 2.096651360734685, "grad_norm": 1.075384259223938, "learning_rate": 9.54142105263158e-05, "loss": 0.4273, "step": 37442 }, { "epoch": 2.096707358046814, "grad_norm": 1.0644259452819824, "learning_rate": 9.541394736842105e-05, "loss": 0.3473, "step": 37443 }, { "epoch": 2.096763355358943, "grad_norm": 1.162556529045105, "learning_rate": 9.541368421052632e-05, "loss": 0.3998, "step": 37444 }, { "epoch": 2.096819352671072, "grad_norm": 1.1349822282791138, "learning_rate": 9.541342105263158e-05, "loss": 0.315, "step": 37445 }, { "epoch": 2.096875349983201, "grad_norm": 0.964417040348053, "learning_rate": 9.541315789473685e-05, "loss": 0.3121, "step": 37446 }, { "epoch": 2.09693134729533, "grad_norm": 1.3284751176834106, "learning_rate": 9.541289473684211e-05, "loss": 0.4745, "step": 37447 }, { "epoch": 2.096987344607459, "grad_norm": 1.365309238433838, "learning_rate": 9.541263157894737e-05, "loss": 0.4909, "step": 37448 }, { "epoch": 2.097043341919588, "grad_norm": 1.2811161279678345, "learning_rate": 9.541236842105263e-05, "loss": 0.4636, "step": 37449 }, { "epoch": 2.097099339231717, "grad_norm": 1.3605387210845947, "learning_rate": 9.54121052631579e-05, "loss": 0.4437, "step": 37450 }, { "epoch": 2.097155336543846, "grad_norm": 1.6411168575286865, "learning_rate": 9.541184210526316e-05, "loss": 0.3933, "step": 37451 }, { "epoch": 2.097211333855975, "grad_norm": 1.4493459463119507, "learning_rate": 9.541157894736842e-05, "loss": 0.4003, "step": 37452 }, { "epoch": 2.097267331168104, "grad_norm": 1.049507737159729, "learning_rate": 9.541131578947368e-05, "loss": 0.4041, "step": 37453 }, { "epoch": 2.097323328480233, "grad_norm": 1.447320818901062, "learning_rate": 9.541105263157896e-05, "loss": 0.5007, "step": 37454 }, { "epoch": 2.097379325792362, "grad_norm": 1.0521488189697266, "learning_rate": 9.541078947368422e-05, "loss": 0.4576, "step": 37455 }, { "epoch": 2.097435323104491, "grad_norm": 1.0134435892105103, "learning_rate": 9.541052631578948e-05, "loss": 0.365, "step": 37456 }, { "epoch": 2.09749132041662, "grad_norm": 1.0665197372436523, "learning_rate": 9.541026315789474e-05, "loss": 0.3322, "step": 37457 }, { "epoch": 2.097547317728749, "grad_norm": 1.5423494577407837, "learning_rate": 9.541e-05, "loss": 0.5235, "step": 37458 }, { "epoch": 2.097603315040878, "grad_norm": 1.1237189769744873, "learning_rate": 9.540973684210527e-05, "loss": 0.3981, "step": 37459 }, { "epoch": 2.097659312353007, "grad_norm": 1.0664355754852295, "learning_rate": 9.540947368421053e-05, "loss": 0.3673, "step": 37460 }, { "epoch": 2.097715309665136, "grad_norm": 1.0892367362976074, "learning_rate": 9.540921052631579e-05, "loss": 0.433, "step": 37461 }, { "epoch": 2.097771306977265, "grad_norm": 1.2048622369766235, "learning_rate": 9.540894736842105e-05, "loss": 0.4602, "step": 37462 }, { "epoch": 2.0978273042893942, "grad_norm": 1.3197017908096313, "learning_rate": 9.540868421052632e-05, "loss": 0.5158, "step": 37463 }, { "epoch": 2.0978833016015233, "grad_norm": 1.4500712156295776, "learning_rate": 9.540842105263158e-05, "loss": 0.4806, "step": 37464 }, { "epoch": 2.0979392989136523, "grad_norm": 1.0754746198654175, "learning_rate": 9.540815789473685e-05, "loss": 0.3819, "step": 37465 }, { "epoch": 2.0979952962257813, "grad_norm": 1.3551568984985352, "learning_rate": 9.54078947368421e-05, "loss": 0.4861, "step": 37466 }, { "epoch": 2.0980512935379103, "grad_norm": 1.2907880544662476, "learning_rate": 9.540763157894737e-05, "loss": 0.4613, "step": 37467 }, { "epoch": 2.0981072908500393, "grad_norm": 1.043502926826477, "learning_rate": 9.540736842105263e-05, "loss": 0.4111, "step": 37468 }, { "epoch": 2.0981632881621683, "grad_norm": 1.3776546716690063, "learning_rate": 9.540710526315791e-05, "loss": 0.4498, "step": 37469 }, { "epoch": 2.0982192854742974, "grad_norm": 1.6057136058807373, "learning_rate": 9.540684210526317e-05, "loss": 0.4522, "step": 37470 }, { "epoch": 2.0982752827864264, "grad_norm": 1.2646669149398804, "learning_rate": 9.540657894736843e-05, "loss": 0.4493, "step": 37471 }, { "epoch": 2.0983312800985554, "grad_norm": 1.35685133934021, "learning_rate": 9.540631578947369e-05, "loss": 0.4374, "step": 37472 }, { "epoch": 2.0983872774106844, "grad_norm": 1.2141730785369873, "learning_rate": 9.540605263157896e-05, "loss": 0.336, "step": 37473 }, { "epoch": 2.0984432747228134, "grad_norm": 0.959006130695343, "learning_rate": 9.540578947368422e-05, "loss": 0.3191, "step": 37474 }, { "epoch": 2.0984992720349425, "grad_norm": 1.154911756515503, "learning_rate": 9.540552631578948e-05, "loss": 0.4295, "step": 37475 }, { "epoch": 2.0985552693470715, "grad_norm": 1.1139824390411377, "learning_rate": 9.540526315789474e-05, "loss": 0.3457, "step": 37476 }, { "epoch": 2.0986112666592005, "grad_norm": 1.1385635137557983, "learning_rate": 9.5405e-05, "loss": 0.4435, "step": 37477 }, { "epoch": 2.0986672639713295, "grad_norm": 1.2122125625610352, "learning_rate": 9.540473684210527e-05, "loss": 0.4658, "step": 37478 }, { "epoch": 2.0987232612834585, "grad_norm": 1.038793921470642, "learning_rate": 9.540447368421053e-05, "loss": 0.3161, "step": 37479 }, { "epoch": 2.0987792585955876, "grad_norm": 1.1674559116363525, "learning_rate": 9.540421052631579e-05, "loss": 0.3696, "step": 37480 }, { "epoch": 2.0988352559077166, "grad_norm": 1.4023323059082031, "learning_rate": 9.540394736842105e-05, "loss": 0.5009, "step": 37481 }, { "epoch": 2.0988912532198456, "grad_norm": 1.4067128896713257, "learning_rate": 9.540368421052632e-05, "loss": 0.3737, "step": 37482 }, { "epoch": 2.0989472505319746, "grad_norm": 1.057881474494934, "learning_rate": 9.540342105263158e-05, "loss": 0.4268, "step": 37483 }, { "epoch": 2.0990032478441036, "grad_norm": Infinity, "learning_rate": 9.540342105263158e-05, "loss": 0.5112, "step": 37484 }, { "epoch": 2.0990592451562327, "grad_norm": 1.0927484035491943, "learning_rate": 9.540315789473684e-05, "loss": 0.3716, "step": 37485 }, { "epoch": 2.0991152424683617, "grad_norm": 1.2988111972808838, "learning_rate": 9.54028947368421e-05, "loss": 0.4031, "step": 37486 }, { "epoch": 2.0991712397804907, "grad_norm": 1.22554349899292, "learning_rate": 9.540263157894738e-05, "loss": 0.3964, "step": 37487 }, { "epoch": 2.0992272370926197, "grad_norm": 1.038845181465149, "learning_rate": 9.540236842105264e-05, "loss": 0.4681, "step": 37488 }, { "epoch": 2.0992832344047487, "grad_norm": 1.2106190919876099, "learning_rate": 9.540210526315791e-05, "loss": 0.3466, "step": 37489 }, { "epoch": 2.0993392317168778, "grad_norm": 1.1918106079101562, "learning_rate": 9.540184210526316e-05, "loss": 0.3871, "step": 37490 }, { "epoch": 2.0993952290290068, "grad_norm": 1.5124584436416626, "learning_rate": 9.540157894736843e-05, "loss": 0.5388, "step": 37491 }, { "epoch": 2.099451226341136, "grad_norm": 1.2367541790008545, "learning_rate": 9.540131578947369e-05, "loss": 0.4239, "step": 37492 }, { "epoch": 2.099507223653265, "grad_norm": 1.241233468055725, "learning_rate": 9.540105263157895e-05, "loss": 0.4121, "step": 37493 }, { "epoch": 2.099563220965394, "grad_norm": 1.0106979608535767, "learning_rate": 9.540078947368422e-05, "loss": 0.3556, "step": 37494 }, { "epoch": 2.099619218277523, "grad_norm": 1.0835684537887573, "learning_rate": 9.540052631578947e-05, "loss": 0.371, "step": 37495 }, { "epoch": 2.099675215589652, "grad_norm": 1.2277915477752686, "learning_rate": 9.540026315789474e-05, "loss": 0.3772, "step": 37496 }, { "epoch": 2.099731212901781, "grad_norm": 1.3299797773361206, "learning_rate": 9.54e-05, "loss": 0.3928, "step": 37497 }, { "epoch": 2.09978721021391, "grad_norm": 1.2439411878585815, "learning_rate": 9.539973684210527e-05, "loss": 0.3833, "step": 37498 }, { "epoch": 2.099843207526039, "grad_norm": 1.1844677925109863, "learning_rate": 9.539947368421052e-05, "loss": 0.4501, "step": 37499 }, { "epoch": 2.099899204838168, "grad_norm": 1.5768829584121704, "learning_rate": 9.53992105263158e-05, "loss": 0.3532, "step": 37500 }, { "epoch": 2.099955202150297, "grad_norm": 1.0400227308273315, "learning_rate": 9.539894736842105e-05, "loss": 0.3932, "step": 37501 }, { "epoch": 2.100011199462426, "grad_norm": 1.2879549264907837, "learning_rate": 9.539868421052633e-05, "loss": 0.3829, "step": 37502 }, { "epoch": 2.100067196774555, "grad_norm": 1.179744005203247, "learning_rate": 9.539842105263159e-05, "loss": 0.3992, "step": 37503 }, { "epoch": 2.100123194086684, "grad_norm": 1.1425442695617676, "learning_rate": 9.539815789473685e-05, "loss": 0.4249, "step": 37504 }, { "epoch": 2.100179191398813, "grad_norm": 1.0603622198104858, "learning_rate": 9.53978947368421e-05, "loss": 0.311, "step": 37505 }, { "epoch": 2.100235188710942, "grad_norm": 1.3630765676498413, "learning_rate": 9.539763157894738e-05, "loss": 0.3658, "step": 37506 }, { "epoch": 2.100291186023071, "grad_norm": 1.2215807437896729, "learning_rate": 9.539736842105264e-05, "loss": 0.5665, "step": 37507 }, { "epoch": 2.1003471833352, "grad_norm": 1.2204254865646362, "learning_rate": 9.53971052631579e-05, "loss": 0.365, "step": 37508 }, { "epoch": 2.100403180647329, "grad_norm": 1.1640019416809082, "learning_rate": 9.539684210526316e-05, "loss": 0.5645, "step": 37509 }, { "epoch": 2.100459177959458, "grad_norm": 1.2654796838760376, "learning_rate": 9.539657894736842e-05, "loss": 0.4639, "step": 37510 }, { "epoch": 2.100515175271587, "grad_norm": 1.2631561756134033, "learning_rate": 9.539631578947369e-05, "loss": 0.4163, "step": 37511 }, { "epoch": 2.100571172583716, "grad_norm": 1.1944363117218018, "learning_rate": 9.539605263157895e-05, "loss": 0.4792, "step": 37512 }, { "epoch": 2.100627169895845, "grad_norm": 1.024147629737854, "learning_rate": 9.539578947368421e-05, "loss": 0.3747, "step": 37513 }, { "epoch": 2.100683167207974, "grad_norm": 1.2537106275558472, "learning_rate": 9.539552631578947e-05, "loss": 0.427, "step": 37514 }, { "epoch": 2.1007391645201032, "grad_norm": 1.1115913391113281, "learning_rate": 9.539526315789474e-05, "loss": 0.416, "step": 37515 }, { "epoch": 2.1007951618322322, "grad_norm": 1.4281648397445679, "learning_rate": 9.5395e-05, "loss": 0.4033, "step": 37516 }, { "epoch": 2.1008511591443613, "grad_norm": 1.257993459701538, "learning_rate": 9.539473684210526e-05, "loss": 0.3923, "step": 37517 }, { "epoch": 2.1009071564564903, "grad_norm": 1.2031153440475464, "learning_rate": 9.539447368421052e-05, "loss": 0.3821, "step": 37518 }, { "epoch": 2.1009631537686193, "grad_norm": 1.0563081502914429, "learning_rate": 9.53942105263158e-05, "loss": 0.4296, "step": 37519 }, { "epoch": 2.1010191510807483, "grad_norm": 1.3949379920959473, "learning_rate": 9.539394736842106e-05, "loss": 0.4826, "step": 37520 }, { "epoch": 2.1010751483928773, "grad_norm": 1.1932613849639893, "learning_rate": 9.539368421052633e-05, "loss": 0.3783, "step": 37521 }, { "epoch": 2.1011311457050064, "grad_norm": 1.34486985206604, "learning_rate": 9.539342105263158e-05, "loss": 0.4214, "step": 37522 }, { "epoch": 2.1011871430171354, "grad_norm": 1.2657222747802734, "learning_rate": 9.539315789473685e-05, "loss": 0.4101, "step": 37523 }, { "epoch": 2.1012431403292644, "grad_norm": 1.351082444190979, "learning_rate": 9.539289473684211e-05, "loss": 0.3538, "step": 37524 }, { "epoch": 2.1012991376413934, "grad_norm": 1.369026780128479, "learning_rate": 9.539263157894738e-05, "loss": 0.4197, "step": 37525 }, { "epoch": 2.1013551349535224, "grad_norm": 1.1988039016723633, "learning_rate": 9.539236842105264e-05, "loss": 0.4993, "step": 37526 }, { "epoch": 2.1014111322656515, "grad_norm": 1.323513388633728, "learning_rate": 9.53921052631579e-05, "loss": 0.4617, "step": 37527 }, { "epoch": 2.1014671295777805, "grad_norm": 1.4121928215026855, "learning_rate": 9.539184210526316e-05, "loss": 0.3061, "step": 37528 }, { "epoch": 2.1015231268899095, "grad_norm": 1.47354257106781, "learning_rate": 9.539157894736842e-05, "loss": 0.5402, "step": 37529 }, { "epoch": 2.101579124202038, "grad_norm": 1.6160423755645752, "learning_rate": 9.53913157894737e-05, "loss": 0.3643, "step": 37530 }, { "epoch": 2.1016351215141675, "grad_norm": 1.4825937747955322, "learning_rate": 9.539105263157895e-05, "loss": 0.5207, "step": 37531 }, { "epoch": 2.101691118826296, "grad_norm": 1.3338812589645386, "learning_rate": 9.539078947368421e-05, "loss": 0.3811, "step": 37532 }, { "epoch": 2.1017471161384256, "grad_norm": 1.099802851676941, "learning_rate": 9.539052631578947e-05, "loss": 0.4325, "step": 37533 }, { "epoch": 2.101803113450554, "grad_norm": 1.2754861116409302, "learning_rate": 9.539026315789475e-05, "loss": 0.4251, "step": 37534 }, { "epoch": 2.1018591107626836, "grad_norm": 1.2157913446426392, "learning_rate": 9.539e-05, "loss": 0.3712, "step": 37535 }, { "epoch": 2.101915108074812, "grad_norm": 2.3159897327423096, "learning_rate": 9.538973684210527e-05, "loss": 0.3095, "step": 37536 }, { "epoch": 2.1019711053869417, "grad_norm": 1.369210124015808, "learning_rate": 9.538947368421053e-05, "loss": 0.3814, "step": 37537 }, { "epoch": 2.1020271026990702, "grad_norm": 1.0498032569885254, "learning_rate": 9.53892105263158e-05, "loss": 0.2957, "step": 37538 }, { "epoch": 2.1020831000111997, "grad_norm": 1.3048136234283447, "learning_rate": 9.538894736842106e-05, "loss": 0.4662, "step": 37539 }, { "epoch": 2.1021390973233283, "grad_norm": 1.214530348777771, "learning_rate": 9.538868421052632e-05, "loss": 0.4265, "step": 37540 }, { "epoch": 2.1021950946354573, "grad_norm": 1.177441120147705, "learning_rate": 9.538842105263158e-05, "loss": 0.3527, "step": 37541 }, { "epoch": 2.1022510919475863, "grad_norm": 1.0436906814575195, "learning_rate": 9.538815789473685e-05, "loss": 0.3506, "step": 37542 }, { "epoch": 2.1023070892597153, "grad_norm": 1.4231051206588745, "learning_rate": 9.538789473684211e-05, "loss": 0.6043, "step": 37543 }, { "epoch": 2.1023630865718443, "grad_norm": 1.1546419858932495, "learning_rate": 9.538763157894738e-05, "loss": 0.3679, "step": 37544 }, { "epoch": 2.1024190838839734, "grad_norm": 1.2269538640975952, "learning_rate": 9.538736842105263e-05, "loss": 0.4475, "step": 37545 }, { "epoch": 2.1024750811961024, "grad_norm": 1.3043169975280762, "learning_rate": 9.538710526315789e-05, "loss": 0.4066, "step": 37546 }, { "epoch": 2.1025310785082314, "grad_norm": 1.1699858903884888, "learning_rate": 9.538684210526316e-05, "loss": 0.4026, "step": 37547 }, { "epoch": 2.1025870758203604, "grad_norm": 1.221286654472351, "learning_rate": 9.538657894736842e-05, "loss": 0.4903, "step": 37548 }, { "epoch": 2.1026430731324894, "grad_norm": 1.325402021408081, "learning_rate": 9.53863157894737e-05, "loss": 0.4644, "step": 37549 }, { "epoch": 2.1026990704446185, "grad_norm": 1.1518927812576294, "learning_rate": 9.538605263157894e-05, "loss": 0.3624, "step": 37550 }, { "epoch": 2.1027550677567475, "grad_norm": 1.0352146625518799, "learning_rate": 9.538578947368422e-05, "loss": 0.3069, "step": 37551 }, { "epoch": 2.1028110650688765, "grad_norm": 1.0786614418029785, "learning_rate": 9.538552631578948e-05, "loss": 0.368, "step": 37552 }, { "epoch": 2.1028670623810055, "grad_norm": 1.1645514965057373, "learning_rate": 9.538526315789475e-05, "loss": 0.4692, "step": 37553 }, { "epoch": 2.1029230596931345, "grad_norm": 1.149208903312683, "learning_rate": 9.5385e-05, "loss": 0.474, "step": 37554 }, { "epoch": 2.1029790570052636, "grad_norm": 1.4023059606552124, "learning_rate": 9.538473684210527e-05, "loss": 0.4279, "step": 37555 }, { "epoch": 2.1030350543173926, "grad_norm": 1.1420704126358032, "learning_rate": 9.538447368421053e-05, "loss": 0.4171, "step": 37556 }, { "epoch": 2.1030910516295216, "grad_norm": 1.4176591634750366, "learning_rate": 9.53842105263158e-05, "loss": 0.4781, "step": 37557 }, { "epoch": 2.1031470489416506, "grad_norm": 1.1842955350875854, "learning_rate": 9.538394736842106e-05, "loss": 0.3584, "step": 37558 }, { "epoch": 2.1032030462537796, "grad_norm": 1.2433788776397705, "learning_rate": 9.538368421052632e-05, "loss": 0.383, "step": 37559 }, { "epoch": 2.1032590435659086, "grad_norm": 1.3103694915771484, "learning_rate": 9.538342105263158e-05, "loss": 0.4348, "step": 37560 }, { "epoch": 2.1033150408780377, "grad_norm": 1.1505239009857178, "learning_rate": 9.538315789473685e-05, "loss": 0.3615, "step": 37561 }, { "epoch": 2.1033710381901667, "grad_norm": 1.1136572360992432, "learning_rate": 9.538289473684211e-05, "loss": 0.3285, "step": 37562 }, { "epoch": 2.1034270355022957, "grad_norm": 1.0530961751937866, "learning_rate": 9.538263157894737e-05, "loss": 0.3297, "step": 37563 }, { "epoch": 2.1034830328144247, "grad_norm": 1.1275643110275269, "learning_rate": 9.538236842105263e-05, "loss": 0.3673, "step": 37564 }, { "epoch": 2.1035390301265537, "grad_norm": 1.1657040119171143, "learning_rate": 9.53821052631579e-05, "loss": 0.4207, "step": 37565 }, { "epoch": 2.1035950274386828, "grad_norm": 1.2637416124343872, "learning_rate": 9.538184210526317e-05, "loss": 0.3207, "step": 37566 }, { "epoch": 2.103651024750812, "grad_norm": 1.205443263053894, "learning_rate": 9.538157894736843e-05, "loss": 0.3905, "step": 37567 }, { "epoch": 2.103707022062941, "grad_norm": 1.214951992034912, "learning_rate": 9.538131578947369e-05, "loss": 0.4477, "step": 37568 }, { "epoch": 2.10376301937507, "grad_norm": 1.2780406475067139, "learning_rate": 9.538105263157895e-05, "loss": 0.3984, "step": 37569 }, { "epoch": 2.103819016687199, "grad_norm": 1.2695943117141724, "learning_rate": 9.538078947368422e-05, "loss": 0.3483, "step": 37570 }, { "epoch": 2.103875013999328, "grad_norm": 1.2124972343444824, "learning_rate": 9.538052631578948e-05, "loss": 0.3902, "step": 37571 }, { "epoch": 2.103931011311457, "grad_norm": 1.0899230241775513, "learning_rate": 9.538026315789474e-05, "loss": 0.3797, "step": 37572 }, { "epoch": 2.103987008623586, "grad_norm": 1.3242824077606201, "learning_rate": 9.538e-05, "loss": 0.3386, "step": 37573 }, { "epoch": 2.104043005935715, "grad_norm": 1.321236491203308, "learning_rate": 9.537973684210527e-05, "loss": 0.3262, "step": 37574 }, { "epoch": 2.104099003247844, "grad_norm": 1.4211359024047852, "learning_rate": 9.537947368421053e-05, "loss": 0.3566, "step": 37575 }, { "epoch": 2.104155000559973, "grad_norm": 1.4502272605895996, "learning_rate": 9.53792105263158e-05, "loss": 0.4676, "step": 37576 }, { "epoch": 2.104210997872102, "grad_norm": 1.1795663833618164, "learning_rate": 9.537894736842105e-05, "loss": 0.4295, "step": 37577 }, { "epoch": 2.104266995184231, "grad_norm": 1.2600855827331543, "learning_rate": 9.537868421052632e-05, "loss": 0.4081, "step": 37578 }, { "epoch": 2.10432299249636, "grad_norm": 1.0279121398925781, "learning_rate": 9.537842105263158e-05, "loss": 0.4388, "step": 37579 }, { "epoch": 2.104378989808489, "grad_norm": 1.7014554738998413, "learning_rate": 9.537815789473684e-05, "loss": 0.4917, "step": 37580 }, { "epoch": 2.104434987120618, "grad_norm": 1.2564527988433838, "learning_rate": 9.537789473684212e-05, "loss": 0.4139, "step": 37581 }, { "epoch": 2.104490984432747, "grad_norm": 1.1442245244979858, "learning_rate": 9.537763157894736e-05, "loss": 0.4013, "step": 37582 }, { "epoch": 2.104546981744876, "grad_norm": 1.162278652191162, "learning_rate": 9.537736842105264e-05, "loss": 0.5206, "step": 37583 }, { "epoch": 2.104602979057005, "grad_norm": 2.273815155029297, "learning_rate": 9.53771052631579e-05, "loss": 0.439, "step": 37584 }, { "epoch": 2.104658976369134, "grad_norm": 1.1859922409057617, "learning_rate": 9.537684210526317e-05, "loss": 0.4164, "step": 37585 }, { "epoch": 2.104714973681263, "grad_norm": 1.170497179031372, "learning_rate": 9.537657894736843e-05, "loss": 0.3093, "step": 37586 }, { "epoch": 2.104770970993392, "grad_norm": 1.0040929317474365, "learning_rate": 9.537631578947369e-05, "loss": 0.4037, "step": 37587 }, { "epoch": 2.104826968305521, "grad_norm": 1.472621202468872, "learning_rate": 9.537605263157895e-05, "loss": 0.3692, "step": 37588 }, { "epoch": 2.10488296561765, "grad_norm": 1.2446300983428955, "learning_rate": 9.537578947368422e-05, "loss": 0.5026, "step": 37589 }, { "epoch": 2.1049389629297792, "grad_norm": 1.1514365673065186, "learning_rate": 9.537552631578948e-05, "loss": 0.4952, "step": 37590 }, { "epoch": 2.1049949602419082, "grad_norm": 1.3575093746185303, "learning_rate": 9.537526315789474e-05, "loss": 0.4979, "step": 37591 }, { "epoch": 2.1050509575540373, "grad_norm": 1.142269253730774, "learning_rate": 9.5375e-05, "loss": 0.3675, "step": 37592 }, { "epoch": 2.1051069548661663, "grad_norm": 1.1615089178085327, "learning_rate": 9.537473684210527e-05, "loss": 0.4012, "step": 37593 }, { "epoch": 2.1051629521782953, "grad_norm": 1.3611550331115723, "learning_rate": 9.537447368421053e-05, "loss": 0.3028, "step": 37594 }, { "epoch": 2.1052189494904243, "grad_norm": 1.0073506832122803, "learning_rate": 9.53742105263158e-05, "loss": 0.3443, "step": 37595 }, { "epoch": 2.1052749468025533, "grad_norm": 1.3089855909347534, "learning_rate": 9.537394736842105e-05, "loss": 0.4662, "step": 37596 }, { "epoch": 2.1053309441146824, "grad_norm": 1.0892695188522339, "learning_rate": 9.537368421052631e-05, "loss": 0.3493, "step": 37597 }, { "epoch": 2.1053869414268114, "grad_norm": 1.1969753503799438, "learning_rate": 9.537342105263159e-05, "loss": 0.4227, "step": 37598 }, { "epoch": 2.1054429387389404, "grad_norm": 1.1530171632766724, "learning_rate": 9.537315789473685e-05, "loss": 0.3752, "step": 37599 }, { "epoch": 2.1054989360510694, "grad_norm": 1.081833004951477, "learning_rate": 9.53728947368421e-05, "loss": 0.4024, "step": 37600 }, { "epoch": 2.1055549333631984, "grad_norm": 1.1977484226226807, "learning_rate": 9.537263157894737e-05, "loss": 0.5045, "step": 37601 }, { "epoch": 2.1056109306753275, "grad_norm": 1.2235177755355835, "learning_rate": 9.537236842105264e-05, "loss": 0.4183, "step": 37602 }, { "epoch": 2.1056669279874565, "grad_norm": 1.6584830284118652, "learning_rate": 9.53721052631579e-05, "loss": 0.492, "step": 37603 }, { "epoch": 2.1057229252995855, "grad_norm": 1.2708359956741333, "learning_rate": 9.537184210526317e-05, "loss": 0.5033, "step": 37604 }, { "epoch": 2.1057789226117145, "grad_norm": 1.1725993156433105, "learning_rate": 9.537157894736842e-05, "loss": 0.423, "step": 37605 }, { "epoch": 2.1058349199238435, "grad_norm": 1.2848418951034546, "learning_rate": 9.537131578947369e-05, "loss": 0.4571, "step": 37606 }, { "epoch": 2.1058909172359725, "grad_norm": 1.1580767631530762, "learning_rate": 9.537105263157895e-05, "loss": 0.3126, "step": 37607 }, { "epoch": 2.1059469145481016, "grad_norm": 1.1102182865142822, "learning_rate": 9.537078947368422e-05, "loss": 0.3116, "step": 37608 }, { "epoch": 2.1060029118602306, "grad_norm": 1.4429696798324585, "learning_rate": 9.537052631578947e-05, "loss": 0.4199, "step": 37609 }, { "epoch": 2.1060589091723596, "grad_norm": 1.2626240253448486, "learning_rate": 9.537026315789474e-05, "loss": 0.4992, "step": 37610 }, { "epoch": 2.1061149064844886, "grad_norm": 1.210782527923584, "learning_rate": 9.537e-05, "loss": 0.3782, "step": 37611 }, { "epoch": 2.1061709037966176, "grad_norm": 1.3715052604675293, "learning_rate": 9.536973684210528e-05, "loss": 0.4497, "step": 37612 }, { "epoch": 2.1062269011087467, "grad_norm": 1.0635639429092407, "learning_rate": 9.536947368421054e-05, "loss": 0.4196, "step": 37613 }, { "epoch": 2.1062828984208757, "grad_norm": 1.2242220640182495, "learning_rate": 9.536921052631578e-05, "loss": 0.4658, "step": 37614 }, { "epoch": 2.1063388957330047, "grad_norm": 1.2983136177062988, "learning_rate": 9.536894736842106e-05, "loss": 0.3303, "step": 37615 }, { "epoch": 2.1063948930451337, "grad_norm": 1.2525765895843506, "learning_rate": 9.536868421052632e-05, "loss": 0.4423, "step": 37616 }, { "epoch": 2.1064508903572627, "grad_norm": 1.5390067100524902, "learning_rate": 9.536842105263159e-05, "loss": 0.4236, "step": 37617 }, { "epoch": 2.1065068876693918, "grad_norm": 1.3755855560302734, "learning_rate": 9.536815789473685e-05, "loss": 0.4819, "step": 37618 }, { "epoch": 2.1065628849815208, "grad_norm": 1.3707313537597656, "learning_rate": 9.536789473684211e-05, "loss": 0.4312, "step": 37619 }, { "epoch": 2.10661888229365, "grad_norm": 1.4791666269302368, "learning_rate": 9.536763157894737e-05, "loss": 0.4804, "step": 37620 }, { "epoch": 2.106674879605779, "grad_norm": 1.3952860832214355, "learning_rate": 9.536736842105264e-05, "loss": 0.3225, "step": 37621 }, { "epoch": 2.106730876917908, "grad_norm": 1.162414312362671, "learning_rate": 9.53671052631579e-05, "loss": 0.3696, "step": 37622 }, { "epoch": 2.106786874230037, "grad_norm": 1.1685415506362915, "learning_rate": 9.536684210526316e-05, "loss": 0.4196, "step": 37623 }, { "epoch": 2.106842871542166, "grad_norm": 1.2862279415130615, "learning_rate": 9.536657894736842e-05, "loss": 0.355, "step": 37624 }, { "epoch": 2.106898868854295, "grad_norm": 1.0106096267700195, "learning_rate": 9.53663157894737e-05, "loss": 0.2846, "step": 37625 }, { "epoch": 2.106954866166424, "grad_norm": 1.1744362115859985, "learning_rate": 9.536605263157895e-05, "loss": 0.3198, "step": 37626 }, { "epoch": 2.107010863478553, "grad_norm": 1.1389715671539307, "learning_rate": 9.536578947368421e-05, "loss": 0.4217, "step": 37627 }, { "epoch": 2.107066860790682, "grad_norm": 1.3454861640930176, "learning_rate": 9.536552631578947e-05, "loss": 0.4608, "step": 37628 }, { "epoch": 2.107122858102811, "grad_norm": 1.1700462102890015, "learning_rate": 9.536526315789475e-05, "loss": 0.3859, "step": 37629 }, { "epoch": 2.10717885541494, "grad_norm": 1.2105263471603394, "learning_rate": 9.5365e-05, "loss": 0.4089, "step": 37630 }, { "epoch": 2.107234852727069, "grad_norm": 1.2925617694854736, "learning_rate": 9.536473684210527e-05, "loss": 0.47, "step": 37631 }, { "epoch": 2.107290850039198, "grad_norm": 1.2375229597091675, "learning_rate": 9.536447368421053e-05, "loss": 0.4665, "step": 37632 }, { "epoch": 2.107346847351327, "grad_norm": 1.166219711303711, "learning_rate": 9.536421052631579e-05, "loss": 0.3837, "step": 37633 }, { "epoch": 2.107402844663456, "grad_norm": 1.2813639640808105, "learning_rate": 9.536394736842106e-05, "loss": 0.4506, "step": 37634 }, { "epoch": 2.107458841975585, "grad_norm": 1.2452691793441772, "learning_rate": 9.536368421052632e-05, "loss": 0.4071, "step": 37635 }, { "epoch": 2.107514839287714, "grad_norm": 1.2973533868789673, "learning_rate": 9.536342105263159e-05, "loss": 0.3378, "step": 37636 }, { "epoch": 2.107570836599843, "grad_norm": 1.2390633821487427, "learning_rate": 9.536315789473684e-05, "loss": 0.3924, "step": 37637 }, { "epoch": 2.107626833911972, "grad_norm": 1.1294937133789062, "learning_rate": 9.536289473684211e-05, "loss": 0.3611, "step": 37638 }, { "epoch": 2.107682831224101, "grad_norm": 1.0345213413238525, "learning_rate": 9.536263157894737e-05, "loss": 0.3974, "step": 37639 }, { "epoch": 2.10773882853623, "grad_norm": 1.1751327514648438, "learning_rate": 9.536236842105264e-05, "loss": 0.4803, "step": 37640 }, { "epoch": 2.107794825848359, "grad_norm": 1.4237899780273438, "learning_rate": 9.53621052631579e-05, "loss": 0.4054, "step": 37641 }, { "epoch": 2.107850823160488, "grad_norm": 1.1355594396591187, "learning_rate": 9.536184210526316e-05, "loss": 0.3758, "step": 37642 }, { "epoch": 2.1079068204726172, "grad_norm": 1.2845560312271118, "learning_rate": 9.536157894736842e-05, "loss": 0.4636, "step": 37643 }, { "epoch": 2.1079628177847463, "grad_norm": 1.09945809841156, "learning_rate": 9.53613157894737e-05, "loss": 0.2978, "step": 37644 }, { "epoch": 2.1080188150968753, "grad_norm": 1.336234211921692, "learning_rate": 9.536105263157896e-05, "loss": 0.3787, "step": 37645 }, { "epoch": 2.1080748124090043, "grad_norm": 1.6262624263763428, "learning_rate": 9.536078947368422e-05, "loss": 0.4172, "step": 37646 }, { "epoch": 2.1081308097211333, "grad_norm": 1.1292541027069092, "learning_rate": 9.536052631578948e-05, "loss": 0.3687, "step": 37647 }, { "epoch": 2.1081868070332623, "grad_norm": 1.4260233640670776, "learning_rate": 9.536026315789475e-05, "loss": 0.3782, "step": 37648 }, { "epoch": 2.1082428043453914, "grad_norm": 1.0964329242706299, "learning_rate": 9.536000000000001e-05, "loss": 0.3028, "step": 37649 }, { "epoch": 2.1082988016575204, "grad_norm": 1.1549230813980103, "learning_rate": 9.535973684210527e-05, "loss": 0.3356, "step": 37650 }, { "epoch": 2.1083547989696494, "grad_norm": 1.5794603824615479, "learning_rate": 9.535947368421053e-05, "loss": 0.6498, "step": 37651 }, { "epoch": 2.1084107962817784, "grad_norm": 1.0998942852020264, "learning_rate": 9.535921052631579e-05, "loss": 0.2676, "step": 37652 }, { "epoch": 2.1084667935939074, "grad_norm": 1.2723844051361084, "learning_rate": 9.535894736842106e-05, "loss": 0.3411, "step": 37653 }, { "epoch": 2.1085227909060364, "grad_norm": 1.1733119487762451, "learning_rate": 9.535868421052632e-05, "loss": 0.3587, "step": 37654 }, { "epoch": 2.1085787882181655, "grad_norm": 1.2792655229568481, "learning_rate": 9.535842105263158e-05, "loss": 0.4295, "step": 37655 }, { "epoch": 2.1086347855302945, "grad_norm": 1.0961569547653198, "learning_rate": 9.535815789473684e-05, "loss": 0.3743, "step": 37656 }, { "epoch": 2.1086907828424235, "grad_norm": 1.2600672245025635, "learning_rate": 9.535789473684211e-05, "loss": 0.4055, "step": 37657 }, { "epoch": 2.1087467801545525, "grad_norm": 1.284856915473938, "learning_rate": 9.535763157894737e-05, "loss": 0.4472, "step": 37658 }, { "epoch": 2.1088027774666815, "grad_norm": 1.4620368480682373, "learning_rate": 9.535736842105263e-05, "loss": 0.4192, "step": 37659 }, { "epoch": 2.1088587747788106, "grad_norm": 1.1823731660842896, "learning_rate": 9.535710526315789e-05, "loss": 0.3756, "step": 37660 }, { "epoch": 2.1089147720909396, "grad_norm": 1.25752592086792, "learning_rate": 9.535684210526317e-05, "loss": 0.4007, "step": 37661 }, { "epoch": 2.1089707694030686, "grad_norm": 1.2487444877624512, "learning_rate": 9.535657894736843e-05, "loss": 0.4453, "step": 37662 }, { "epoch": 2.1090267667151976, "grad_norm": 1.2192504405975342, "learning_rate": 9.53563157894737e-05, "loss": 0.3499, "step": 37663 }, { "epoch": 2.1090827640273266, "grad_norm": 1.2093487977981567, "learning_rate": 9.535605263157895e-05, "loss": 0.3824, "step": 37664 }, { "epoch": 2.1091387613394557, "grad_norm": 1.2120347023010254, "learning_rate": 9.535578947368422e-05, "loss": 0.4127, "step": 37665 }, { "epoch": 2.1091947586515847, "grad_norm": 1.254859209060669, "learning_rate": 9.535552631578948e-05, "loss": 0.4518, "step": 37666 }, { "epoch": 2.1092507559637137, "grad_norm": 1.2412467002868652, "learning_rate": 9.535526315789474e-05, "loss": 0.4724, "step": 37667 }, { "epoch": 2.1093067532758427, "grad_norm": 1.1160590648651123, "learning_rate": 9.535500000000001e-05, "loss": 0.3399, "step": 37668 }, { "epoch": 2.1093627505879717, "grad_norm": 1.1172974109649658, "learning_rate": 9.535473684210526e-05, "loss": 0.3388, "step": 37669 }, { "epoch": 2.1094187479001008, "grad_norm": 1.2220187187194824, "learning_rate": 9.535447368421053e-05, "loss": 0.4007, "step": 37670 }, { "epoch": 2.1094747452122298, "grad_norm": 1.247684359550476, "learning_rate": 9.535421052631579e-05, "loss": 0.3795, "step": 37671 }, { "epoch": 2.109530742524359, "grad_norm": 1.02691650390625, "learning_rate": 9.535394736842106e-05, "loss": 0.296, "step": 37672 }, { "epoch": 2.109586739836488, "grad_norm": 1.1002277135849, "learning_rate": 9.535368421052632e-05, "loss": 0.3334, "step": 37673 }, { "epoch": 2.109642737148617, "grad_norm": 1.1732851266860962, "learning_rate": 9.535342105263158e-05, "loss": 0.3932, "step": 37674 }, { "epoch": 2.109698734460746, "grad_norm": 1.5924240350723267, "learning_rate": 9.535315789473684e-05, "loss": 0.4279, "step": 37675 }, { "epoch": 2.109754731772875, "grad_norm": 1.443625569343567, "learning_rate": 9.535289473684212e-05, "loss": 0.5602, "step": 37676 }, { "epoch": 2.109810729085004, "grad_norm": 1.2515921592712402, "learning_rate": 9.535263157894738e-05, "loss": 0.419, "step": 37677 }, { "epoch": 2.109866726397133, "grad_norm": 1.3528836965560913, "learning_rate": 9.535236842105264e-05, "loss": 0.663, "step": 37678 }, { "epoch": 2.109922723709262, "grad_norm": 1.3246128559112549, "learning_rate": 9.53521052631579e-05, "loss": 0.3789, "step": 37679 }, { "epoch": 2.109978721021391, "grad_norm": 1.1721516847610474, "learning_rate": 9.535184210526317e-05, "loss": 0.3756, "step": 37680 }, { "epoch": 2.11003471833352, "grad_norm": 1.4207085371017456, "learning_rate": 9.535157894736843e-05, "loss": 0.4669, "step": 37681 }, { "epoch": 2.110090715645649, "grad_norm": 1.2498027086257935, "learning_rate": 9.535131578947369e-05, "loss": 0.3731, "step": 37682 }, { "epoch": 2.110146712957778, "grad_norm": 1.4895986318588257, "learning_rate": 9.535105263157895e-05, "loss": 0.4104, "step": 37683 }, { "epoch": 2.110202710269907, "grad_norm": 1.1882609128952026, "learning_rate": 9.535078947368421e-05, "loss": 0.4324, "step": 37684 }, { "epoch": 2.110258707582036, "grad_norm": 1.3308801651000977, "learning_rate": 9.535052631578948e-05, "loss": 0.3848, "step": 37685 }, { "epoch": 2.110314704894165, "grad_norm": 1.0326311588287354, "learning_rate": 9.535026315789474e-05, "loss": 0.2912, "step": 37686 }, { "epoch": 2.110370702206294, "grad_norm": 1.1226708889007568, "learning_rate": 9.535e-05, "loss": 0.3299, "step": 37687 }, { "epoch": 2.110426699518423, "grad_norm": 1.4871490001678467, "learning_rate": 9.534973684210526e-05, "loss": 0.4005, "step": 37688 }, { "epoch": 2.110482696830552, "grad_norm": 1.2377749681472778, "learning_rate": 9.534947368421053e-05, "loss": 0.422, "step": 37689 }, { "epoch": 2.110538694142681, "grad_norm": 1.1320468187332153, "learning_rate": 9.534921052631579e-05, "loss": 0.3589, "step": 37690 }, { "epoch": 2.11059469145481, "grad_norm": 1.4200750589370728, "learning_rate": 9.534894736842107e-05, "loss": 0.4861, "step": 37691 }, { "epoch": 2.110650688766939, "grad_norm": 1.0389201641082764, "learning_rate": 9.534868421052631e-05, "loss": 0.5698, "step": 37692 }, { "epoch": 2.110706686079068, "grad_norm": 0.9634940028190613, "learning_rate": 9.534842105263159e-05, "loss": 0.3202, "step": 37693 }, { "epoch": 2.110762683391197, "grad_norm": 1.0997002124786377, "learning_rate": 9.534815789473685e-05, "loss": 0.333, "step": 37694 }, { "epoch": 2.1108186807033262, "grad_norm": 1.1376430988311768, "learning_rate": 9.534789473684212e-05, "loss": 0.3694, "step": 37695 }, { "epoch": 2.1108746780154553, "grad_norm": 1.2030510902404785, "learning_rate": 9.534763157894738e-05, "loss": 0.4139, "step": 37696 }, { "epoch": 2.1109306753275843, "grad_norm": 1.1908349990844727, "learning_rate": 9.534736842105264e-05, "loss": 0.3961, "step": 37697 }, { "epoch": 2.1109866726397133, "grad_norm": 1.1226392984390259, "learning_rate": 9.53471052631579e-05, "loss": 0.4358, "step": 37698 }, { "epoch": 2.1110426699518423, "grad_norm": 1.2626878023147583, "learning_rate": 9.534684210526317e-05, "loss": 0.4907, "step": 37699 }, { "epoch": 2.1110986672639713, "grad_norm": 1.3042765855789185, "learning_rate": 9.534657894736843e-05, "loss": 0.4645, "step": 37700 }, { "epoch": 2.1111546645761003, "grad_norm": 1.2931530475616455, "learning_rate": 9.534631578947368e-05, "loss": 0.4219, "step": 37701 }, { "epoch": 2.1112106618882294, "grad_norm": 1.3562021255493164, "learning_rate": 9.534605263157895e-05, "loss": 0.4596, "step": 37702 }, { "epoch": 2.1112666592003584, "grad_norm": 1.2898374795913696, "learning_rate": 9.534578947368421e-05, "loss": 0.3581, "step": 37703 }, { "epoch": 2.1113226565124874, "grad_norm": 1.2061854600906372, "learning_rate": 9.534552631578948e-05, "loss": 0.3977, "step": 37704 }, { "epoch": 2.1113786538246164, "grad_norm": 1.3235297203063965, "learning_rate": 9.534526315789474e-05, "loss": 0.3571, "step": 37705 }, { "epoch": 2.1114346511367454, "grad_norm": 1.2746213674545288, "learning_rate": 9.5345e-05, "loss": 0.5094, "step": 37706 }, { "epoch": 2.1114906484488745, "grad_norm": 1.3411320447921753, "learning_rate": 9.534473684210526e-05, "loss": 0.4216, "step": 37707 }, { "epoch": 2.1115466457610035, "grad_norm": 1.3013726472854614, "learning_rate": 9.534447368421054e-05, "loss": 0.3305, "step": 37708 }, { "epoch": 2.1116026430731325, "grad_norm": 1.1931748390197754, "learning_rate": 9.53442105263158e-05, "loss": 0.4516, "step": 37709 }, { "epoch": 2.1116586403852615, "grad_norm": 0.997417688369751, "learning_rate": 9.534394736842106e-05, "loss": 0.3268, "step": 37710 }, { "epoch": 2.1117146376973905, "grad_norm": 1.3128045797348022, "learning_rate": 9.534368421052631e-05, "loss": 0.5035, "step": 37711 }, { "epoch": 2.1117706350095196, "grad_norm": 1.5369590520858765, "learning_rate": 9.534342105263159e-05, "loss": 0.3783, "step": 37712 }, { "epoch": 2.1118266323216486, "grad_norm": 0.994907557964325, "learning_rate": 9.534315789473685e-05, "loss": 0.3859, "step": 37713 }, { "epoch": 2.1118826296337776, "grad_norm": 1.630784273147583, "learning_rate": 9.534289473684211e-05, "loss": 0.5884, "step": 37714 }, { "epoch": 2.1119386269459066, "grad_norm": 1.5292789936065674, "learning_rate": 9.534263157894737e-05, "loss": 0.4662, "step": 37715 }, { "epoch": 2.1119946242580356, "grad_norm": 1.374040961265564, "learning_rate": 9.534236842105264e-05, "loss": 0.4858, "step": 37716 }, { "epoch": 2.1120506215701647, "grad_norm": 1.1735118627548218, "learning_rate": 9.53421052631579e-05, "loss": 0.366, "step": 37717 }, { "epoch": 2.1121066188822937, "grad_norm": 1.1391663551330566, "learning_rate": 9.534184210526316e-05, "loss": 0.4214, "step": 37718 }, { "epoch": 2.1121626161944227, "grad_norm": 1.172099232673645, "learning_rate": 9.534157894736842e-05, "loss": 0.3972, "step": 37719 }, { "epoch": 2.1122186135065517, "grad_norm": 1.3714842796325684, "learning_rate": 9.534131578947368e-05, "loss": 0.5416, "step": 37720 }, { "epoch": 2.1122746108186807, "grad_norm": 1.390973687171936, "learning_rate": 9.534105263157895e-05, "loss": 0.4445, "step": 37721 }, { "epoch": 2.1123306081308098, "grad_norm": 1.1501284837722778, "learning_rate": 9.534078947368421e-05, "loss": 0.4735, "step": 37722 }, { "epoch": 2.1123866054429388, "grad_norm": 1.2320268154144287, "learning_rate": 9.534052631578949e-05, "loss": 0.4413, "step": 37723 }, { "epoch": 2.112442602755068, "grad_norm": 1.2600257396697998, "learning_rate": 9.534026315789473e-05, "loss": 0.3476, "step": 37724 }, { "epoch": 2.112498600067197, "grad_norm": 1.1833323240280151, "learning_rate": 9.534e-05, "loss": 0.3807, "step": 37725 }, { "epoch": 2.112554597379326, "grad_norm": 1.3143657445907593, "learning_rate": 9.533973684210527e-05, "loss": 0.4744, "step": 37726 }, { "epoch": 2.112610594691455, "grad_norm": 1.4492616653442383, "learning_rate": 9.533947368421054e-05, "loss": 0.4555, "step": 37727 }, { "epoch": 2.112666592003584, "grad_norm": 1.362603783607483, "learning_rate": 9.53392105263158e-05, "loss": 0.4049, "step": 37728 }, { "epoch": 2.112722589315713, "grad_norm": 1.4482049942016602, "learning_rate": 9.533894736842106e-05, "loss": 0.4499, "step": 37729 }, { "epoch": 2.112778586627842, "grad_norm": 1.1095775365829468, "learning_rate": 9.533868421052632e-05, "loss": 0.3733, "step": 37730 }, { "epoch": 2.112834583939971, "grad_norm": 1.288974404335022, "learning_rate": 9.533842105263159e-05, "loss": 0.3501, "step": 37731 }, { "epoch": 2.1128905812521, "grad_norm": 1.22319757938385, "learning_rate": 9.533815789473685e-05, "loss": 0.3843, "step": 37732 }, { "epoch": 2.112946578564229, "grad_norm": 1.2581007480621338, "learning_rate": 9.533789473684211e-05, "loss": 0.4432, "step": 37733 }, { "epoch": 2.113002575876358, "grad_norm": 1.4334641695022583, "learning_rate": 9.533763157894737e-05, "loss": 0.3485, "step": 37734 }, { "epoch": 2.113058573188487, "grad_norm": 1.1329319477081299, "learning_rate": 9.533736842105263e-05, "loss": 0.4525, "step": 37735 }, { "epoch": 2.113114570500616, "grad_norm": 1.234907627105713, "learning_rate": 9.53371052631579e-05, "loss": 0.386, "step": 37736 }, { "epoch": 2.113170567812745, "grad_norm": 10.424367904663086, "learning_rate": 9.533684210526316e-05, "loss": 0.4617, "step": 37737 }, { "epoch": 2.113226565124874, "grad_norm": 0.9995739459991455, "learning_rate": 9.533657894736842e-05, "loss": 0.2939, "step": 37738 }, { "epoch": 2.113282562437003, "grad_norm": 1.154493808746338, "learning_rate": 9.533631578947368e-05, "loss": 0.3866, "step": 37739 }, { "epoch": 2.113338559749132, "grad_norm": 1.1501070261001587, "learning_rate": 9.533605263157896e-05, "loss": 0.4242, "step": 37740 }, { "epoch": 2.113394557061261, "grad_norm": 1.2766205072402954, "learning_rate": 9.533578947368422e-05, "loss": 0.3907, "step": 37741 }, { "epoch": 2.11345055437339, "grad_norm": 1.2891227006912231, "learning_rate": 9.533552631578947e-05, "loss": 0.4394, "step": 37742 }, { "epoch": 2.113506551685519, "grad_norm": 1.2441725730895996, "learning_rate": 9.533526315789473e-05, "loss": 0.3491, "step": 37743 }, { "epoch": 2.113562548997648, "grad_norm": 1.2892502546310425, "learning_rate": 9.533500000000001e-05, "loss": 0.4216, "step": 37744 }, { "epoch": 2.113618546309777, "grad_norm": 1.5214523077011108, "learning_rate": 9.533473684210527e-05, "loss": 0.3599, "step": 37745 }, { "epoch": 2.113674543621906, "grad_norm": 1.1385174989700317, "learning_rate": 9.533447368421054e-05, "loss": 0.4494, "step": 37746 }, { "epoch": 2.1137305409340352, "grad_norm": 1.390559196472168, "learning_rate": 9.533421052631579e-05, "loss": 0.3402, "step": 37747 }, { "epoch": 2.1137865382461642, "grad_norm": 1.2365553379058838, "learning_rate": 9.533394736842106e-05, "loss": 0.4832, "step": 37748 }, { "epoch": 2.1138425355582933, "grad_norm": 1.30582857131958, "learning_rate": 9.533368421052632e-05, "loss": 0.4236, "step": 37749 }, { "epoch": 2.1138985328704223, "grad_norm": 1.213720440864563, "learning_rate": 9.53334210526316e-05, "loss": 0.4795, "step": 37750 }, { "epoch": 2.1139545301825513, "grad_norm": 1.1774375438690186, "learning_rate": 9.533315789473685e-05, "loss": 0.3157, "step": 37751 }, { "epoch": 2.1140105274946803, "grad_norm": 1.2122899293899536, "learning_rate": 9.53328947368421e-05, "loss": 0.3471, "step": 37752 }, { "epoch": 2.1140665248068093, "grad_norm": 1.4899814128875732, "learning_rate": 9.533263157894737e-05, "loss": 0.3924, "step": 37753 }, { "epoch": 2.1141225221189384, "grad_norm": 1.2417616844177246, "learning_rate": 9.533236842105263e-05, "loss": 0.355, "step": 37754 }, { "epoch": 2.1141785194310674, "grad_norm": 1.3443083763122559, "learning_rate": 9.53321052631579e-05, "loss": 0.4508, "step": 37755 }, { "epoch": 2.1142345167431964, "grad_norm": 1.0989341735839844, "learning_rate": 9.533184210526315e-05, "loss": 0.6193, "step": 37756 }, { "epoch": 2.1142905140553254, "grad_norm": 1.1604912281036377, "learning_rate": 9.533157894736843e-05, "loss": 0.5548, "step": 37757 }, { "epoch": 2.1143465113674544, "grad_norm": 1.5853008031845093, "learning_rate": 9.533131578947368e-05, "loss": 0.4636, "step": 37758 }, { "epoch": 2.1144025086795835, "grad_norm": 1.2316597700119019, "learning_rate": 9.533105263157896e-05, "loss": 0.3978, "step": 37759 }, { "epoch": 2.1144585059917125, "grad_norm": 1.0920389890670776, "learning_rate": 9.533078947368422e-05, "loss": 0.3358, "step": 37760 }, { "epoch": 2.1145145033038415, "grad_norm": 1.0193519592285156, "learning_rate": 9.533052631578948e-05, "loss": 0.3429, "step": 37761 }, { "epoch": 2.1145705006159705, "grad_norm": 1.4410526752471924, "learning_rate": 9.533026315789474e-05, "loss": 0.4416, "step": 37762 }, { "epoch": 2.1146264979280995, "grad_norm": 1.339468002319336, "learning_rate": 9.533000000000001e-05, "loss": 0.5131, "step": 37763 }, { "epoch": 2.1146824952402286, "grad_norm": 1.0867359638214111, "learning_rate": 9.532973684210527e-05, "loss": 0.3936, "step": 37764 }, { "epoch": 2.1147384925523576, "grad_norm": 1.1157872676849365, "learning_rate": 9.532947368421053e-05, "loss": 0.2791, "step": 37765 }, { "epoch": 2.1147944898644866, "grad_norm": 1.4042932987213135, "learning_rate": 9.532921052631579e-05, "loss": 0.4413, "step": 37766 }, { "epoch": 2.1148504871766156, "grad_norm": 1.2471221685409546, "learning_rate": 9.532894736842106e-05, "loss": 0.5011, "step": 37767 }, { "epoch": 2.1149064844887446, "grad_norm": 1.0515024662017822, "learning_rate": 9.532868421052632e-05, "loss": 0.3093, "step": 37768 }, { "epoch": 2.1149624818008737, "grad_norm": 1.1320897340774536, "learning_rate": 9.532842105263158e-05, "loss": 0.3541, "step": 37769 }, { "epoch": 2.1150184791130027, "grad_norm": 1.1313374042510986, "learning_rate": 9.532815789473684e-05, "loss": 0.4539, "step": 37770 }, { "epoch": 2.1150744764251317, "grad_norm": 1.2079766988754272, "learning_rate": 9.53278947368421e-05, "loss": 0.4676, "step": 37771 }, { "epoch": 2.1151304737372607, "grad_norm": 1.1099145412445068, "learning_rate": 9.532763157894738e-05, "loss": 0.3352, "step": 37772 }, { "epoch": 2.1151864710493897, "grad_norm": 1.165467619895935, "learning_rate": 9.532736842105263e-05, "loss": 0.374, "step": 37773 }, { "epoch": 2.1152424683615187, "grad_norm": 1.6281195878982544, "learning_rate": 9.53271052631579e-05, "loss": 0.489, "step": 37774 }, { "epoch": 2.1152984656736478, "grad_norm": 1.1673880815505981, "learning_rate": 9.532684210526315e-05, "loss": 0.5044, "step": 37775 }, { "epoch": 2.115354462985777, "grad_norm": 1.1003457307815552, "learning_rate": 9.532657894736843e-05, "loss": 0.3416, "step": 37776 }, { "epoch": 2.115410460297906, "grad_norm": 1.307185411453247, "learning_rate": 9.532631578947369e-05, "loss": 0.4734, "step": 37777 }, { "epoch": 2.115466457610035, "grad_norm": 1.1056904792785645, "learning_rate": 9.532605263157896e-05, "loss": 0.3534, "step": 37778 }, { "epoch": 2.115522454922164, "grad_norm": 1.336822509765625, "learning_rate": 9.532578947368421e-05, "loss": 0.432, "step": 37779 }, { "epoch": 2.115578452234293, "grad_norm": 1.2675280570983887, "learning_rate": 9.532552631578948e-05, "loss": 0.5916, "step": 37780 }, { "epoch": 2.115634449546422, "grad_norm": 1.0618001222610474, "learning_rate": 9.532526315789474e-05, "loss": 0.3309, "step": 37781 }, { "epoch": 2.115690446858551, "grad_norm": 1.125723958015442, "learning_rate": 9.532500000000001e-05, "loss": 0.2804, "step": 37782 }, { "epoch": 2.11574644417068, "grad_norm": 1.0606441497802734, "learning_rate": 9.532473684210527e-05, "loss": 0.3503, "step": 37783 }, { "epoch": 2.115802441482809, "grad_norm": 1.1353930234909058, "learning_rate": 9.532447368421053e-05, "loss": 0.4429, "step": 37784 }, { "epoch": 2.115858438794938, "grad_norm": 1.134667158126831, "learning_rate": 9.532421052631579e-05, "loss": 0.3666, "step": 37785 }, { "epoch": 2.115914436107067, "grad_norm": 1.098807454109192, "learning_rate": 9.532394736842107e-05, "loss": 0.4427, "step": 37786 }, { "epoch": 2.115970433419196, "grad_norm": 1.4602035284042358, "learning_rate": 9.532368421052633e-05, "loss": 0.3726, "step": 37787 }, { "epoch": 2.116026430731325, "grad_norm": 1.1076409816741943, "learning_rate": 9.532342105263159e-05, "loss": 0.289, "step": 37788 }, { "epoch": 2.116082428043454, "grad_norm": 1.1125380992889404, "learning_rate": 9.532315789473684e-05, "loss": 0.3272, "step": 37789 }, { "epoch": 2.116138425355583, "grad_norm": 1.3662583827972412, "learning_rate": 9.53228947368421e-05, "loss": 0.388, "step": 37790 }, { "epoch": 2.116194422667712, "grad_norm": 1.3298346996307373, "learning_rate": 9.532263157894738e-05, "loss": 0.4258, "step": 37791 }, { "epoch": 2.116250419979841, "grad_norm": 1.2065690755844116, "learning_rate": 9.532236842105264e-05, "loss": 0.432, "step": 37792 }, { "epoch": 2.11630641729197, "grad_norm": 1.2760372161865234, "learning_rate": 9.53221052631579e-05, "loss": 0.3999, "step": 37793 }, { "epoch": 2.116362414604099, "grad_norm": 1.139719009399414, "learning_rate": 9.532184210526316e-05, "loss": 0.3156, "step": 37794 }, { "epoch": 2.116418411916228, "grad_norm": 1.0459928512573242, "learning_rate": 9.532157894736843e-05, "loss": 0.3737, "step": 37795 }, { "epoch": 2.116474409228357, "grad_norm": 1.007080316543579, "learning_rate": 9.532131578947369e-05, "loss": 0.3101, "step": 37796 }, { "epoch": 2.116530406540486, "grad_norm": 1.2657349109649658, "learning_rate": 9.532105263157895e-05, "loss": 0.3466, "step": 37797 }, { "epoch": 2.116586403852615, "grad_norm": 1.2727843523025513, "learning_rate": 9.532078947368421e-05, "loss": 0.3888, "step": 37798 }, { "epoch": 2.1166424011647442, "grad_norm": 1.3665987253189087, "learning_rate": 9.532052631578948e-05, "loss": 0.5027, "step": 37799 }, { "epoch": 2.1166983984768732, "grad_norm": 1.14633047580719, "learning_rate": 9.532026315789474e-05, "loss": 0.4561, "step": 37800 }, { "epoch": 2.1167543957890023, "grad_norm": 1.150771141052246, "learning_rate": 9.532000000000002e-05, "loss": 0.429, "step": 37801 }, { "epoch": 2.1168103931011313, "grad_norm": 1.1088114976882935, "learning_rate": 9.531973684210526e-05, "loss": 0.3064, "step": 37802 }, { "epoch": 2.1168663904132603, "grad_norm": 1.6099441051483154, "learning_rate": 9.531947368421054e-05, "loss": 0.5734, "step": 37803 }, { "epoch": 2.1169223877253893, "grad_norm": 1.4852136373519897, "learning_rate": 9.53192105263158e-05, "loss": 0.6458, "step": 37804 }, { "epoch": 2.1169783850375183, "grad_norm": 1.2405089139938354, "learning_rate": 9.531894736842105e-05, "loss": 0.3487, "step": 37805 }, { "epoch": 2.1170343823496474, "grad_norm": 0.9049539566040039, "learning_rate": 9.531868421052633e-05, "loss": 0.2875, "step": 37806 }, { "epoch": 2.1170903796617764, "grad_norm": 1.2981488704681396, "learning_rate": 9.531842105263157e-05, "loss": 0.4661, "step": 37807 }, { "epoch": 2.1171463769739054, "grad_norm": 3.335503339767456, "learning_rate": 9.531815789473685e-05, "loss": 0.5825, "step": 37808 }, { "epoch": 2.1172023742860344, "grad_norm": 1.1003698110580444, "learning_rate": 9.531789473684211e-05, "loss": 0.4532, "step": 37809 }, { "epoch": 2.1172583715981634, "grad_norm": 1.1118097305297852, "learning_rate": 9.531763157894738e-05, "loss": 0.3212, "step": 37810 }, { "epoch": 2.1173143689102925, "grad_norm": 1.362941026687622, "learning_rate": 9.531736842105263e-05, "loss": 0.3852, "step": 37811 }, { "epoch": 2.1173703662224215, "grad_norm": 1.21826171875, "learning_rate": 9.53171052631579e-05, "loss": 0.4628, "step": 37812 }, { "epoch": 2.1174263635345505, "grad_norm": 1.2498557567596436, "learning_rate": 9.531684210526316e-05, "loss": 0.6538, "step": 37813 }, { "epoch": 2.1174823608466795, "grad_norm": 1.3448197841644287, "learning_rate": 9.531657894736843e-05, "loss": 0.4049, "step": 37814 }, { "epoch": 2.1175383581588085, "grad_norm": 1.0381081104278564, "learning_rate": 9.531631578947369e-05, "loss": 0.3324, "step": 37815 }, { "epoch": 2.1175943554709376, "grad_norm": 1.150907278060913, "learning_rate": 9.531605263157895e-05, "loss": 0.3745, "step": 37816 }, { "epoch": 2.1176503527830666, "grad_norm": 1.1191520690917969, "learning_rate": 9.531578947368421e-05, "loss": 0.317, "step": 37817 }, { "epoch": 2.1177063500951956, "grad_norm": 1.0300110578536987, "learning_rate": 9.531552631578949e-05, "loss": 0.4101, "step": 37818 }, { "epoch": 2.1177623474073246, "grad_norm": 1.178125262260437, "learning_rate": 9.531526315789475e-05, "loss": 0.3649, "step": 37819 }, { "epoch": 2.1178183447194536, "grad_norm": 1.2245209217071533, "learning_rate": 9.5315e-05, "loss": 0.387, "step": 37820 }, { "epoch": 2.1178743420315826, "grad_norm": 1.3217214345932007, "learning_rate": 9.531473684210526e-05, "loss": 0.4777, "step": 37821 }, { "epoch": 2.1179303393437117, "grad_norm": 1.4592339992523193, "learning_rate": 9.531447368421052e-05, "loss": 0.3873, "step": 37822 }, { "epoch": 2.1179863366558407, "grad_norm": 1.208885908126831, "learning_rate": 9.53142105263158e-05, "loss": 0.3682, "step": 37823 }, { "epoch": 2.1180423339679697, "grad_norm": 1.3035715818405151, "learning_rate": 9.531394736842106e-05, "loss": 0.4042, "step": 37824 }, { "epoch": 2.1180983312800987, "grad_norm": 1.3901745080947876, "learning_rate": 9.531368421052632e-05, "loss": 0.4977, "step": 37825 }, { "epoch": 2.1181543285922277, "grad_norm": 1.1876839399337769, "learning_rate": 9.531342105263158e-05, "loss": 0.4223, "step": 37826 }, { "epoch": 2.1182103259043568, "grad_norm": 1.1811094284057617, "learning_rate": 9.531315789473685e-05, "loss": 0.4048, "step": 37827 }, { "epoch": 2.118266323216486, "grad_norm": 1.0523566007614136, "learning_rate": 9.531289473684211e-05, "loss": 0.3743, "step": 37828 }, { "epoch": 2.118322320528615, "grad_norm": 1.228696346282959, "learning_rate": 9.531263157894737e-05, "loss": 0.3872, "step": 37829 }, { "epoch": 2.118378317840744, "grad_norm": 1.5651582479476929, "learning_rate": 9.531236842105263e-05, "loss": 0.4446, "step": 37830 }, { "epoch": 2.118434315152873, "grad_norm": 1.2424721717834473, "learning_rate": 9.53121052631579e-05, "loss": 0.3981, "step": 37831 }, { "epoch": 2.118490312465002, "grad_norm": 1.9074064493179321, "learning_rate": 9.531184210526316e-05, "loss": 0.4753, "step": 37832 }, { "epoch": 2.118546309777131, "grad_norm": 1.414995789527893, "learning_rate": 9.531157894736844e-05, "loss": 0.4431, "step": 37833 }, { "epoch": 2.11860230708926, "grad_norm": 1.2317641973495483, "learning_rate": 9.531131578947368e-05, "loss": 0.5211, "step": 37834 }, { "epoch": 2.118658304401389, "grad_norm": 1.1901524066925049, "learning_rate": 9.531105263157895e-05, "loss": 0.3705, "step": 37835 }, { "epoch": 2.118714301713518, "grad_norm": 1.1597578525543213, "learning_rate": 9.531078947368421e-05, "loss": 0.4003, "step": 37836 }, { "epoch": 2.118770299025647, "grad_norm": 1.245458960533142, "learning_rate": 9.531052631578949e-05, "loss": 0.3511, "step": 37837 }, { "epoch": 2.118826296337776, "grad_norm": 1.221401572227478, "learning_rate": 9.531026315789475e-05, "loss": 0.3562, "step": 37838 }, { "epoch": 2.118882293649905, "grad_norm": 1.2704401016235352, "learning_rate": 9.531e-05, "loss": 0.408, "step": 37839 }, { "epoch": 2.118938290962034, "grad_norm": 1.1648112535476685, "learning_rate": 9.530973684210527e-05, "loss": 0.3679, "step": 37840 }, { "epoch": 2.118994288274163, "grad_norm": 1.776847004890442, "learning_rate": 9.530947368421053e-05, "loss": 0.474, "step": 37841 }, { "epoch": 2.119050285586292, "grad_norm": 1.1837369203567505, "learning_rate": 9.53092105263158e-05, "loss": 0.4325, "step": 37842 }, { "epoch": 2.119106282898421, "grad_norm": 1.2718595266342163, "learning_rate": 9.530894736842106e-05, "loss": 0.3621, "step": 37843 }, { "epoch": 2.11916228021055, "grad_norm": 1.0920614004135132, "learning_rate": 9.530868421052632e-05, "loss": 0.4358, "step": 37844 }, { "epoch": 2.119218277522679, "grad_norm": 1.2388941049575806, "learning_rate": 9.530842105263158e-05, "loss": 0.4012, "step": 37845 }, { "epoch": 2.119274274834808, "grad_norm": 1.3358289003372192, "learning_rate": 9.530815789473685e-05, "loss": 0.4192, "step": 37846 }, { "epoch": 2.119330272146937, "grad_norm": 1.178553581237793, "learning_rate": 9.530789473684211e-05, "loss": 0.3461, "step": 37847 }, { "epoch": 2.119386269459066, "grad_norm": 1.004017949104309, "learning_rate": 9.530763157894737e-05, "loss": 0.3324, "step": 37848 }, { "epoch": 2.119442266771195, "grad_norm": 1.5627021789550781, "learning_rate": 9.530736842105263e-05, "loss": 0.5497, "step": 37849 }, { "epoch": 2.119498264083324, "grad_norm": 1.0687642097473145, "learning_rate": 9.53071052631579e-05, "loss": 0.3485, "step": 37850 }, { "epoch": 2.119554261395453, "grad_norm": 1.380363941192627, "learning_rate": 9.530684210526316e-05, "loss": 0.3637, "step": 37851 }, { "epoch": 2.1196102587075822, "grad_norm": 1.372911810874939, "learning_rate": 9.530657894736842e-05, "loss": 0.3865, "step": 37852 }, { "epoch": 2.1196662560197113, "grad_norm": 1.0048118829727173, "learning_rate": 9.530631578947368e-05, "loss": 0.3525, "step": 37853 }, { "epoch": 2.1197222533318403, "grad_norm": 1.1297627687454224, "learning_rate": 9.530605263157896e-05, "loss": 0.3891, "step": 37854 }, { "epoch": 2.1197782506439693, "grad_norm": 1.726850986480713, "learning_rate": 9.530578947368422e-05, "loss": 0.5057, "step": 37855 }, { "epoch": 2.1198342479560983, "grad_norm": 1.1801815032958984, "learning_rate": 9.530552631578948e-05, "loss": 0.3669, "step": 37856 }, { "epoch": 2.1198902452682273, "grad_norm": 1.9252707958221436, "learning_rate": 9.530526315789474e-05, "loss": 0.4337, "step": 37857 }, { "epoch": 2.1199462425803564, "grad_norm": 1.2330302000045776, "learning_rate": 9.5305e-05, "loss": 0.3446, "step": 37858 }, { "epoch": 2.1200022398924854, "grad_norm": 1.2027246952056885, "learning_rate": 9.530473684210527e-05, "loss": 0.4215, "step": 37859 }, { "epoch": 2.1200582372046144, "grad_norm": 11.59250259399414, "learning_rate": 9.530447368421053e-05, "loss": 0.3774, "step": 37860 }, { "epoch": 2.120114234516743, "grad_norm": 1.3247222900390625, "learning_rate": 9.530421052631579e-05, "loss": 0.3576, "step": 37861 }, { "epoch": 2.1201702318288724, "grad_norm": 1.2677847146987915, "learning_rate": 9.530394736842105e-05, "loss": 0.3704, "step": 37862 }, { "epoch": 2.120226229141001, "grad_norm": 1.1607929468154907, "learning_rate": 9.530368421052632e-05, "loss": 0.3964, "step": 37863 }, { "epoch": 2.1202822264531305, "grad_norm": 1.180167555809021, "learning_rate": 9.530342105263158e-05, "loss": 0.3506, "step": 37864 }, { "epoch": 2.120338223765259, "grad_norm": 1.253099799156189, "learning_rate": 9.530315789473686e-05, "loss": 0.368, "step": 37865 }, { "epoch": 2.1203942210773885, "grad_norm": 1.6238375902175903, "learning_rate": 9.53028947368421e-05, "loss": 0.5655, "step": 37866 }, { "epoch": 2.120450218389517, "grad_norm": 0.9868596792221069, "learning_rate": 9.530263157894737e-05, "loss": 0.3393, "step": 37867 }, { "epoch": 2.1205062157016465, "grad_norm": 0.935724139213562, "learning_rate": 9.530236842105263e-05, "loss": 0.2918, "step": 37868 }, { "epoch": 2.120562213013775, "grad_norm": 1.0049982070922852, "learning_rate": 9.530210526315791e-05, "loss": 0.34, "step": 37869 }, { "epoch": 2.1206182103259046, "grad_norm": 2.395608425140381, "learning_rate": 9.530184210526317e-05, "loss": 0.5362, "step": 37870 }, { "epoch": 2.120674207638033, "grad_norm": 1.216915249824524, "learning_rate": 9.530157894736843e-05, "loss": 0.3044, "step": 37871 }, { "epoch": 2.120730204950162, "grad_norm": 1.6804417371749878, "learning_rate": 9.530131578947369e-05, "loss": 0.5489, "step": 37872 }, { "epoch": 2.120786202262291, "grad_norm": 1.2425605058670044, "learning_rate": 9.530105263157895e-05, "loss": 0.5188, "step": 37873 }, { "epoch": 2.12084219957442, "grad_norm": 1.4513862133026123, "learning_rate": 9.530078947368422e-05, "loss": 0.3784, "step": 37874 }, { "epoch": 2.1208981968865492, "grad_norm": 1.2104915380477905, "learning_rate": 9.530052631578948e-05, "loss": 0.4571, "step": 37875 }, { "epoch": 2.1209541941986783, "grad_norm": 1.4345799684524536, "learning_rate": 9.530026315789474e-05, "loss": 0.409, "step": 37876 }, { "epoch": 2.1210101915108073, "grad_norm": 1.3612395524978638, "learning_rate": 9.53e-05, "loss": 0.3945, "step": 37877 }, { "epoch": 2.1210661888229363, "grad_norm": 1.274493932723999, "learning_rate": 9.529973684210527e-05, "loss": 0.4298, "step": 37878 }, { "epoch": 2.1211221861350653, "grad_norm": 1.0498849153518677, "learning_rate": 9.529947368421053e-05, "loss": 0.3003, "step": 37879 }, { "epoch": 2.1211781834471943, "grad_norm": 1.6037408113479614, "learning_rate": 9.529921052631579e-05, "loss": 0.4317, "step": 37880 }, { "epoch": 2.1212341807593234, "grad_norm": 1.1873399019241333, "learning_rate": 9.529894736842105e-05, "loss": 0.3375, "step": 37881 }, { "epoch": 2.1212901780714524, "grad_norm": 2.2799975872039795, "learning_rate": 9.529868421052632e-05, "loss": 0.4741, "step": 37882 }, { "epoch": 2.1213461753835814, "grad_norm": 1.4707082509994507, "learning_rate": 9.529842105263158e-05, "loss": 0.4066, "step": 37883 }, { "epoch": 2.1214021726957104, "grad_norm": 1.120848536491394, "learning_rate": 9.529815789473684e-05, "loss": 0.3951, "step": 37884 }, { "epoch": 2.1214581700078394, "grad_norm": 2.239795207977295, "learning_rate": 9.52978947368421e-05, "loss": 0.4015, "step": 37885 }, { "epoch": 2.1215141673199684, "grad_norm": 1.1888052225112915, "learning_rate": 9.529763157894738e-05, "loss": 0.3552, "step": 37886 }, { "epoch": 2.1215701646320975, "grad_norm": 1.222002625465393, "learning_rate": 9.529736842105264e-05, "loss": 0.3734, "step": 37887 }, { "epoch": 2.1216261619442265, "grad_norm": 1.1781657934188843, "learning_rate": 9.529710526315791e-05, "loss": 0.4107, "step": 37888 }, { "epoch": 2.1216821592563555, "grad_norm": 1.4246891736984253, "learning_rate": 9.529684210526316e-05, "loss": 0.4274, "step": 37889 }, { "epoch": 2.1217381565684845, "grad_norm": 1.1937236785888672, "learning_rate": 9.529657894736842e-05, "loss": 0.4264, "step": 37890 }, { "epoch": 2.1217941538806135, "grad_norm": 1.1374826431274414, "learning_rate": 9.529631578947369e-05, "loss": 0.4189, "step": 37891 }, { "epoch": 2.1218501511927426, "grad_norm": 1.240546464920044, "learning_rate": 9.529605263157895e-05, "loss": 0.384, "step": 37892 }, { "epoch": 2.1219061485048716, "grad_norm": 1.2603422403335571, "learning_rate": 9.529578947368422e-05, "loss": 0.4497, "step": 37893 }, { "epoch": 2.1219621458170006, "grad_norm": 1.3641451597213745, "learning_rate": 9.529552631578947e-05, "loss": 0.3536, "step": 37894 }, { "epoch": 2.1220181431291296, "grad_norm": 1.1944833993911743, "learning_rate": 9.529526315789474e-05, "loss": 0.4059, "step": 37895 }, { "epoch": 2.1220741404412586, "grad_norm": 1.2889608144760132, "learning_rate": 9.5295e-05, "loss": 0.3696, "step": 37896 }, { "epoch": 2.1221301377533877, "grad_norm": 1.3130391836166382, "learning_rate": 9.529473684210527e-05, "loss": 0.4156, "step": 37897 }, { "epoch": 2.1221861350655167, "grad_norm": 1.4661166667938232, "learning_rate": 9.529447368421053e-05, "loss": 0.3641, "step": 37898 }, { "epoch": 2.1222421323776457, "grad_norm": 1.3656140565872192, "learning_rate": 9.52942105263158e-05, "loss": 0.5186, "step": 37899 }, { "epoch": 2.1222981296897747, "grad_norm": 1.2916266918182373, "learning_rate": 9.529394736842105e-05, "loss": 0.4432, "step": 37900 }, { "epoch": 2.1223541270019037, "grad_norm": 1.222621202468872, "learning_rate": 9.529368421052633e-05, "loss": 0.576, "step": 37901 }, { "epoch": 2.1224101243140328, "grad_norm": 1.6173008680343628, "learning_rate": 9.529342105263159e-05, "loss": 0.4105, "step": 37902 }, { "epoch": 2.1224661216261618, "grad_norm": 1.3266522884368896, "learning_rate": 9.529315789473685e-05, "loss": 0.5658, "step": 37903 }, { "epoch": 2.122522118938291, "grad_norm": 1.2293651103973389, "learning_rate": 9.52928947368421e-05, "loss": 0.4712, "step": 37904 }, { "epoch": 2.12257811625042, "grad_norm": 1.1439555883407593, "learning_rate": 9.529263157894738e-05, "loss": 0.3701, "step": 37905 }, { "epoch": 2.122634113562549, "grad_norm": 1.230742335319519, "learning_rate": 9.529236842105264e-05, "loss": 0.449, "step": 37906 }, { "epoch": 2.122690110874678, "grad_norm": 1.2715702056884766, "learning_rate": 9.52921052631579e-05, "loss": 0.4344, "step": 37907 }, { "epoch": 2.122746108186807, "grad_norm": 1.171204924583435, "learning_rate": 9.529184210526316e-05, "loss": 0.4105, "step": 37908 }, { "epoch": 2.122802105498936, "grad_norm": 0.9493831396102905, "learning_rate": 9.529157894736842e-05, "loss": 0.3483, "step": 37909 }, { "epoch": 2.122858102811065, "grad_norm": 1.241207480430603, "learning_rate": 9.529131578947369e-05, "loss": 0.398, "step": 37910 }, { "epoch": 2.122914100123194, "grad_norm": 1.1960015296936035, "learning_rate": 9.529105263157895e-05, "loss": 0.3897, "step": 37911 }, { "epoch": 2.122970097435323, "grad_norm": 1.1621278524398804, "learning_rate": 9.529078947368421e-05, "loss": 0.3482, "step": 37912 }, { "epoch": 2.123026094747452, "grad_norm": 0.9802743792533875, "learning_rate": 9.529052631578947e-05, "loss": 0.3122, "step": 37913 }, { "epoch": 2.123082092059581, "grad_norm": 1.3529890775680542, "learning_rate": 9.529026315789474e-05, "loss": 0.4806, "step": 37914 }, { "epoch": 2.12313808937171, "grad_norm": 1.2725598812103271, "learning_rate": 9.529e-05, "loss": 0.3694, "step": 37915 }, { "epoch": 2.123194086683839, "grad_norm": 1.2651251554489136, "learning_rate": 9.528973684210526e-05, "loss": 0.5421, "step": 37916 }, { "epoch": 2.123250083995968, "grad_norm": 1.0582131147384644, "learning_rate": 9.528947368421052e-05, "loss": 0.3326, "step": 37917 }, { "epoch": 2.123306081308097, "grad_norm": 2.0830466747283936, "learning_rate": 9.52892105263158e-05, "loss": 0.542, "step": 37918 }, { "epoch": 2.123362078620226, "grad_norm": 1.1611149311065674, "learning_rate": 9.528894736842106e-05, "loss": 0.4467, "step": 37919 }, { "epoch": 2.123418075932355, "grad_norm": 1.0782891511917114, "learning_rate": 9.528868421052633e-05, "loss": 0.289, "step": 37920 }, { "epoch": 2.123474073244484, "grad_norm": 1.3379461765289307, "learning_rate": 9.528842105263158e-05, "loss": 0.5336, "step": 37921 }, { "epoch": 2.123530070556613, "grad_norm": 1.0055097341537476, "learning_rate": 9.528815789473685e-05, "loss": 0.3341, "step": 37922 }, { "epoch": 2.123586067868742, "grad_norm": 1.2222527265548706, "learning_rate": 9.528789473684211e-05, "loss": 0.4326, "step": 37923 }, { "epoch": 2.123642065180871, "grad_norm": 1.134134292602539, "learning_rate": 9.528763157894738e-05, "loss": 0.35, "step": 37924 }, { "epoch": 2.123698062493, "grad_norm": 1.1304410696029663, "learning_rate": 9.528736842105264e-05, "loss": 0.381, "step": 37925 }, { "epoch": 2.123754059805129, "grad_norm": 1.1868478059768677, "learning_rate": 9.528710526315789e-05, "loss": 0.4347, "step": 37926 }, { "epoch": 2.1238100571172582, "grad_norm": 1.1191680431365967, "learning_rate": 9.528684210526316e-05, "loss": 0.4864, "step": 37927 }, { "epoch": 2.1238660544293873, "grad_norm": 1.53757643699646, "learning_rate": 9.528657894736842e-05, "loss": 0.4159, "step": 37928 }, { "epoch": 2.1239220517415163, "grad_norm": 1.367113471031189, "learning_rate": 9.52863157894737e-05, "loss": 0.3963, "step": 37929 }, { "epoch": 2.1239780490536453, "grad_norm": 1.32086980342865, "learning_rate": 9.528605263157895e-05, "loss": 0.3874, "step": 37930 }, { "epoch": 2.1240340463657743, "grad_norm": 1.1955931186676025, "learning_rate": 9.528578947368421e-05, "loss": 0.3927, "step": 37931 }, { "epoch": 2.1240900436779033, "grad_norm": 1.0497366189956665, "learning_rate": 9.528552631578947e-05, "loss": 0.4363, "step": 37932 }, { "epoch": 2.1241460409900323, "grad_norm": 1.3721774816513062, "learning_rate": 9.528526315789475e-05, "loss": 0.3975, "step": 37933 }, { "epoch": 2.1242020383021614, "grad_norm": 2.4244110584259033, "learning_rate": 9.5285e-05, "loss": 0.4301, "step": 37934 }, { "epoch": 2.1242580356142904, "grad_norm": 1.2817292213439941, "learning_rate": 9.528473684210527e-05, "loss": 0.38, "step": 37935 }, { "epoch": 2.1243140329264194, "grad_norm": 1.9610052108764648, "learning_rate": 9.528447368421053e-05, "loss": 0.4149, "step": 37936 }, { "epoch": 2.1243700302385484, "grad_norm": 1.0222722291946411, "learning_rate": 9.52842105263158e-05, "loss": 0.2925, "step": 37937 }, { "epoch": 2.1244260275506774, "grad_norm": 1.5136748552322388, "learning_rate": 9.528394736842106e-05, "loss": 0.5081, "step": 37938 }, { "epoch": 2.1244820248628065, "grad_norm": 1.4547631740570068, "learning_rate": 9.528368421052632e-05, "loss": 0.3788, "step": 37939 }, { "epoch": 2.1245380221749355, "grad_norm": 1.1482230424880981, "learning_rate": 9.528342105263158e-05, "loss": 0.3097, "step": 37940 }, { "epoch": 2.1245940194870645, "grad_norm": 1.0308619737625122, "learning_rate": 9.528315789473685e-05, "loss": 0.3182, "step": 37941 }, { "epoch": 2.1246500167991935, "grad_norm": 1.0513734817504883, "learning_rate": 9.528289473684211e-05, "loss": 0.3506, "step": 37942 }, { "epoch": 2.1247060141113225, "grad_norm": 0.9860881567001343, "learning_rate": 9.528263157894737e-05, "loss": 0.2575, "step": 37943 }, { "epoch": 2.1247620114234516, "grad_norm": 1.2484816312789917, "learning_rate": 9.528236842105263e-05, "loss": 0.3139, "step": 37944 }, { "epoch": 2.1248180087355806, "grad_norm": 1.031117558479309, "learning_rate": 9.528210526315789e-05, "loss": 0.2977, "step": 37945 }, { "epoch": 2.1248740060477096, "grad_norm": 1.2670975923538208, "learning_rate": 9.528184210526316e-05, "loss": 0.4885, "step": 37946 }, { "epoch": 2.1249300033598386, "grad_norm": 1.2101235389709473, "learning_rate": 9.528157894736842e-05, "loss": 0.4491, "step": 37947 }, { "epoch": 2.1249860006719676, "grad_norm": 1.1838452816009521, "learning_rate": 9.52813157894737e-05, "loss": 0.4123, "step": 37948 }, { "epoch": 2.1250419979840967, "grad_norm": 1.2386916875839233, "learning_rate": 9.528105263157894e-05, "loss": 0.461, "step": 37949 }, { "epoch": 2.1250979952962257, "grad_norm": 1.1315258741378784, "learning_rate": 9.528078947368422e-05, "loss": 0.3907, "step": 37950 }, { "epoch": 2.1251539926083547, "grad_norm": 1.2928507328033447, "learning_rate": 9.528052631578948e-05, "loss": 0.4226, "step": 37951 }, { "epoch": 2.1252099899204837, "grad_norm": 1.2939456701278687, "learning_rate": 9.528026315789475e-05, "loss": 0.5222, "step": 37952 }, { "epoch": 2.1252659872326127, "grad_norm": 1.379740834236145, "learning_rate": 9.528000000000001e-05, "loss": 0.3743, "step": 37953 }, { "epoch": 2.1253219845447417, "grad_norm": 0.9714986681938171, "learning_rate": 9.527973684210527e-05, "loss": 0.3069, "step": 37954 }, { "epoch": 2.1253779818568708, "grad_norm": 1.1989487409591675, "learning_rate": 9.527947368421053e-05, "loss": 0.3927, "step": 37955 }, { "epoch": 2.125433979169, "grad_norm": 1.2335395812988281, "learning_rate": 9.52792105263158e-05, "loss": 0.4218, "step": 37956 }, { "epoch": 2.125489976481129, "grad_norm": 1.1995121240615845, "learning_rate": 9.527894736842106e-05, "loss": 0.3731, "step": 37957 }, { "epoch": 2.125545973793258, "grad_norm": 1.0925146341323853, "learning_rate": 9.527868421052632e-05, "loss": 0.4277, "step": 37958 }, { "epoch": 2.125601971105387, "grad_norm": 1.2077703475952148, "learning_rate": 9.527842105263158e-05, "loss": 0.4163, "step": 37959 }, { "epoch": 2.125657968417516, "grad_norm": 1.1684659719467163, "learning_rate": 9.527815789473684e-05, "loss": 0.4115, "step": 37960 }, { "epoch": 2.125713965729645, "grad_norm": 2.821614980697632, "learning_rate": 9.527789473684211e-05, "loss": 0.4261, "step": 37961 }, { "epoch": 2.125769963041774, "grad_norm": 1.2369601726531982, "learning_rate": 9.527763157894737e-05, "loss": 0.4649, "step": 37962 }, { "epoch": 2.125825960353903, "grad_norm": 1.170612096786499, "learning_rate": 9.527736842105263e-05, "loss": 0.4416, "step": 37963 }, { "epoch": 2.125881957666032, "grad_norm": 1.1098328828811646, "learning_rate": 9.52771052631579e-05, "loss": 0.405, "step": 37964 }, { "epoch": 2.125937954978161, "grad_norm": 1.4585012197494507, "learning_rate": 9.527684210526317e-05, "loss": 0.423, "step": 37965 }, { "epoch": 2.12599395229029, "grad_norm": 1.250841498374939, "learning_rate": 9.527657894736843e-05, "loss": 0.3501, "step": 37966 }, { "epoch": 2.126049949602419, "grad_norm": 1.2169735431671143, "learning_rate": 9.527631578947369e-05, "loss": 0.3859, "step": 37967 }, { "epoch": 2.126105946914548, "grad_norm": 1.583406925201416, "learning_rate": 9.527605263157895e-05, "loss": 0.3602, "step": 37968 }, { "epoch": 2.126161944226677, "grad_norm": 1.2548919916152954, "learning_rate": 9.527578947368422e-05, "loss": 0.378, "step": 37969 }, { "epoch": 2.126217941538806, "grad_norm": 1.0360541343688965, "learning_rate": 9.527552631578948e-05, "loss": 0.3592, "step": 37970 }, { "epoch": 2.126273938850935, "grad_norm": 1.138854742050171, "learning_rate": 9.527526315789474e-05, "loss": 0.3268, "step": 37971 }, { "epoch": 2.126329936163064, "grad_norm": 1.2766937017440796, "learning_rate": 9.5275e-05, "loss": 0.4271, "step": 37972 }, { "epoch": 2.126385933475193, "grad_norm": 1.1715785264968872, "learning_rate": 9.527473684210527e-05, "loss": 0.3522, "step": 37973 }, { "epoch": 2.126441930787322, "grad_norm": 1.1503130197525024, "learning_rate": 9.527447368421053e-05, "loss": 0.3519, "step": 37974 }, { "epoch": 2.126497928099451, "grad_norm": 1.5244576930999756, "learning_rate": 9.52742105263158e-05, "loss": 0.4394, "step": 37975 }, { "epoch": 2.12655392541158, "grad_norm": 1.2267115116119385, "learning_rate": 9.527394736842105e-05, "loss": 0.5937, "step": 37976 }, { "epoch": 2.126609922723709, "grad_norm": 1.0399476289749146, "learning_rate": 9.527368421052631e-05, "loss": 0.4108, "step": 37977 }, { "epoch": 2.126665920035838, "grad_norm": 3.3570339679718018, "learning_rate": 9.527342105263158e-05, "loss": 0.351, "step": 37978 }, { "epoch": 2.1267219173479672, "grad_norm": 1.0552146434783936, "learning_rate": 9.527315789473684e-05, "loss": 0.3609, "step": 37979 }, { "epoch": 2.1267779146600962, "grad_norm": 1.0554651021957397, "learning_rate": 9.527289473684212e-05, "loss": 0.2997, "step": 37980 }, { "epoch": 2.1268339119722253, "grad_norm": 1.498835563659668, "learning_rate": 9.527263157894736e-05, "loss": 0.4488, "step": 37981 }, { "epoch": 2.1268899092843543, "grad_norm": 0.9943491220474243, "learning_rate": 9.527236842105264e-05, "loss": 0.2958, "step": 37982 }, { "epoch": 2.1269459065964833, "grad_norm": 1.1603772640228271, "learning_rate": 9.52721052631579e-05, "loss": 0.3737, "step": 37983 }, { "epoch": 2.1270019039086123, "grad_norm": 1.308395504951477, "learning_rate": 9.527184210526317e-05, "loss": 0.5178, "step": 37984 }, { "epoch": 2.1270579012207413, "grad_norm": 1.1913459300994873, "learning_rate": 9.527157894736843e-05, "loss": 0.4921, "step": 37985 }, { "epoch": 2.1271138985328704, "grad_norm": 1.0885347127914429, "learning_rate": 9.527131578947369e-05, "loss": 0.4256, "step": 37986 }, { "epoch": 2.1271698958449994, "grad_norm": 1.210465908050537, "learning_rate": 9.527105263157895e-05, "loss": 0.4189, "step": 37987 }, { "epoch": 2.1272258931571284, "grad_norm": 1.1525578498840332, "learning_rate": 9.527078947368422e-05, "loss": 0.392, "step": 37988 }, { "epoch": 2.1272818904692574, "grad_norm": 1.4139964580535889, "learning_rate": 9.527052631578948e-05, "loss": 0.4049, "step": 37989 }, { "epoch": 2.1273378877813864, "grad_norm": 1.1509031057357788, "learning_rate": 9.527026315789474e-05, "loss": 0.396, "step": 37990 }, { "epoch": 2.1273938850935155, "grad_norm": 1.540934443473816, "learning_rate": 9.527e-05, "loss": 0.5453, "step": 37991 }, { "epoch": 2.1274498824056445, "grad_norm": 1.2494606971740723, "learning_rate": 9.526973684210527e-05, "loss": 0.5135, "step": 37992 }, { "epoch": 2.1275058797177735, "grad_norm": 1.2312878370285034, "learning_rate": 9.526947368421053e-05, "loss": 0.4202, "step": 37993 }, { "epoch": 2.1275618770299025, "grad_norm": 1.0873737335205078, "learning_rate": 9.52692105263158e-05, "loss": 0.4458, "step": 37994 }, { "epoch": 2.1276178743420315, "grad_norm": 1.1822694540023804, "learning_rate": 9.526894736842105e-05, "loss": 0.4514, "step": 37995 }, { "epoch": 2.1276738716541606, "grad_norm": 1.933834433555603, "learning_rate": 9.526868421052631e-05, "loss": 0.4042, "step": 37996 }, { "epoch": 2.1277298689662896, "grad_norm": 1.212084412574768, "learning_rate": 9.526842105263159e-05, "loss": 0.4588, "step": 37997 }, { "epoch": 2.1277858662784186, "grad_norm": 1.2162895202636719, "learning_rate": 9.526815789473685e-05, "loss": 0.3609, "step": 37998 }, { "epoch": 2.1278418635905476, "grad_norm": 1.2796589136123657, "learning_rate": 9.52678947368421e-05, "loss": 0.4637, "step": 37999 }, { "epoch": 2.1278978609026766, "grad_norm": 1.2076795101165771, "learning_rate": 9.526763157894737e-05, "loss": 0.3873, "step": 38000 }, { "epoch": 2.1279538582148056, "grad_norm": 1.3360549211502075, "learning_rate": 9.526736842105264e-05, "loss": 0.4939, "step": 38001 }, { "epoch": 2.1280098555269347, "grad_norm": 1.0555404424667358, "learning_rate": 9.52671052631579e-05, "loss": 0.326, "step": 38002 }, { "epoch": 2.1280658528390637, "grad_norm": 1.303219199180603, "learning_rate": 9.526684210526317e-05, "loss": 0.4291, "step": 38003 }, { "epoch": 2.1281218501511927, "grad_norm": 1.0729445219039917, "learning_rate": 9.526657894736842e-05, "loss": 0.3746, "step": 38004 }, { "epoch": 2.1281778474633217, "grad_norm": 1.295015811920166, "learning_rate": 9.526631578947369e-05, "loss": 0.3456, "step": 38005 }, { "epoch": 2.1282338447754507, "grad_norm": 1.159035563468933, "learning_rate": 9.526605263157895e-05, "loss": 0.4296, "step": 38006 }, { "epoch": 2.1282898420875798, "grad_norm": 1.1793303489685059, "learning_rate": 9.526578947368422e-05, "loss": 0.3101, "step": 38007 }, { "epoch": 2.128345839399709, "grad_norm": 1.3309861421585083, "learning_rate": 9.526552631578947e-05, "loss": 0.472, "step": 38008 }, { "epoch": 2.128401836711838, "grad_norm": 1.6163116693496704, "learning_rate": 9.526526315789474e-05, "loss": 0.3426, "step": 38009 }, { "epoch": 2.128457834023967, "grad_norm": 1.2657898664474487, "learning_rate": 9.5265e-05, "loss": 0.4352, "step": 38010 }, { "epoch": 2.128513831336096, "grad_norm": 1.3555500507354736, "learning_rate": 9.526473684210526e-05, "loss": 0.5425, "step": 38011 }, { "epoch": 2.128569828648225, "grad_norm": 1.3209218978881836, "learning_rate": 9.526447368421054e-05, "loss": 0.5877, "step": 38012 }, { "epoch": 2.128625825960354, "grad_norm": 1.2382484674453735, "learning_rate": 9.526421052631578e-05, "loss": 0.4607, "step": 38013 }, { "epoch": 2.128681823272483, "grad_norm": 0.9914510846138, "learning_rate": 9.526394736842106e-05, "loss": 0.2801, "step": 38014 }, { "epoch": 2.128737820584612, "grad_norm": 1.3365525007247925, "learning_rate": 9.526368421052632e-05, "loss": 0.5038, "step": 38015 }, { "epoch": 2.128793817896741, "grad_norm": 1.103435754776001, "learning_rate": 9.526342105263159e-05, "loss": 0.2661, "step": 38016 }, { "epoch": 2.12884981520887, "grad_norm": 1.072560429573059, "learning_rate": 9.526315789473685e-05, "loss": 0.3636, "step": 38017 }, { "epoch": 2.128905812520999, "grad_norm": 1.1811704635620117, "learning_rate": 9.526289473684211e-05, "loss": 0.4714, "step": 38018 }, { "epoch": 2.128961809833128, "grad_norm": 1.3292332887649536, "learning_rate": 9.526263157894737e-05, "loss": 0.3906, "step": 38019 }, { "epoch": 2.129017807145257, "grad_norm": 1.183525800704956, "learning_rate": 9.526236842105264e-05, "loss": 0.3852, "step": 38020 }, { "epoch": 2.129073804457386, "grad_norm": 1.075858235359192, "learning_rate": 9.52621052631579e-05, "loss": 0.3572, "step": 38021 }, { "epoch": 2.129129801769515, "grad_norm": 1.056778073310852, "learning_rate": 9.526184210526316e-05, "loss": 0.3609, "step": 38022 }, { "epoch": 2.129185799081644, "grad_norm": 1.314979076385498, "learning_rate": 9.526157894736842e-05, "loss": 0.4256, "step": 38023 }, { "epoch": 2.129241796393773, "grad_norm": 1.4851086139678955, "learning_rate": 9.52613157894737e-05, "loss": 0.569, "step": 38024 }, { "epoch": 2.129297793705902, "grad_norm": 1.003501534461975, "learning_rate": 9.526105263157895e-05, "loss": 0.3412, "step": 38025 }, { "epoch": 2.129353791018031, "grad_norm": 1.280484676361084, "learning_rate": 9.526078947368421e-05, "loss": 0.3972, "step": 38026 }, { "epoch": 2.12940978833016, "grad_norm": 1.2305500507354736, "learning_rate": 9.526052631578947e-05, "loss": 0.4467, "step": 38027 }, { "epoch": 2.129465785642289, "grad_norm": 1.06526780128479, "learning_rate": 9.526026315789475e-05, "loss": 0.4148, "step": 38028 }, { "epoch": 2.129521782954418, "grad_norm": 1.4010645151138306, "learning_rate": 9.526e-05, "loss": 0.6029, "step": 38029 }, { "epoch": 2.129577780266547, "grad_norm": 1.3162074089050293, "learning_rate": 9.525973684210527e-05, "loss": 0.4352, "step": 38030 }, { "epoch": 2.1296337775786762, "grad_norm": 1.3628954887390137, "learning_rate": 9.525947368421053e-05, "loss": 0.4788, "step": 38031 }, { "epoch": 2.1296897748908052, "grad_norm": 1.086837887763977, "learning_rate": 9.525921052631579e-05, "loss": 0.4691, "step": 38032 }, { "epoch": 2.1297457722029343, "grad_norm": 1.3622840642929077, "learning_rate": 9.525894736842106e-05, "loss": 0.379, "step": 38033 }, { "epoch": 2.1298017695150633, "grad_norm": 1.2818411588668823, "learning_rate": 9.525868421052632e-05, "loss": 0.3972, "step": 38034 }, { "epoch": 2.1298577668271923, "grad_norm": 1.116562843322754, "learning_rate": 9.525842105263159e-05, "loss": 0.3853, "step": 38035 }, { "epoch": 2.1299137641393213, "grad_norm": 1.5651719570159912, "learning_rate": 9.525815789473684e-05, "loss": 0.5475, "step": 38036 }, { "epoch": 2.1299697614514503, "grad_norm": 1.1923152208328247, "learning_rate": 9.525789473684211e-05, "loss": 0.3647, "step": 38037 }, { "epoch": 2.1300257587635794, "grad_norm": 1.0621285438537598, "learning_rate": 9.525763157894737e-05, "loss": 0.3732, "step": 38038 }, { "epoch": 2.1300817560757084, "grad_norm": 1.2186707258224487, "learning_rate": 9.525736842105264e-05, "loss": 0.3878, "step": 38039 }, { "epoch": 2.1301377533878374, "grad_norm": 1.339374303817749, "learning_rate": 9.52571052631579e-05, "loss": 0.4424, "step": 38040 }, { "epoch": 2.1301937506999664, "grad_norm": 1.1642976999282837, "learning_rate": 9.525684210526316e-05, "loss": 0.374, "step": 38041 }, { "epoch": 2.1302497480120954, "grad_norm": 1.3978774547576904, "learning_rate": 9.525657894736842e-05, "loss": 0.4784, "step": 38042 }, { "epoch": 2.1303057453242245, "grad_norm": 1.1733441352844238, "learning_rate": 9.52563157894737e-05, "loss": 0.3921, "step": 38043 }, { "epoch": 2.1303617426363535, "grad_norm": 1.6524600982666016, "learning_rate": 9.525605263157896e-05, "loss": 0.6389, "step": 38044 }, { "epoch": 2.1304177399484825, "grad_norm": 1.2763721942901611, "learning_rate": 9.525578947368422e-05, "loss": 0.4434, "step": 38045 }, { "epoch": 2.1304737372606115, "grad_norm": 1.1687251329421997, "learning_rate": 9.525552631578948e-05, "loss": 0.3839, "step": 38046 }, { "epoch": 2.1305297345727405, "grad_norm": 1.3363008499145508, "learning_rate": 9.525526315789474e-05, "loss": 0.5429, "step": 38047 }, { "epoch": 2.1305857318848695, "grad_norm": 1.1057367324829102, "learning_rate": 9.525500000000001e-05, "loss": 0.4435, "step": 38048 }, { "epoch": 2.1306417291969986, "grad_norm": 1.234102725982666, "learning_rate": 9.525473684210527e-05, "loss": 0.3411, "step": 38049 }, { "epoch": 2.1306977265091276, "grad_norm": 1.1591323614120483, "learning_rate": 9.525447368421053e-05, "loss": 0.3871, "step": 38050 }, { "epoch": 2.1307537238212566, "grad_norm": 1.2242653369903564, "learning_rate": 9.525421052631579e-05, "loss": 0.4891, "step": 38051 }, { "epoch": 2.1308097211333856, "grad_norm": 1.1745156049728394, "learning_rate": 9.525394736842106e-05, "loss": 0.3484, "step": 38052 }, { "epoch": 2.1308657184455146, "grad_norm": 1.4954283237457275, "learning_rate": 9.525368421052632e-05, "loss": 0.5606, "step": 38053 }, { "epoch": 2.1309217157576437, "grad_norm": 1.1327989101409912, "learning_rate": 9.525342105263158e-05, "loss": 0.3961, "step": 38054 }, { "epoch": 2.1309777130697727, "grad_norm": 1.109371542930603, "learning_rate": 9.525315789473684e-05, "loss": 0.3639, "step": 38055 }, { "epoch": 2.1310337103819017, "grad_norm": 1.0768954753875732, "learning_rate": 9.525289473684211e-05, "loss": 0.3222, "step": 38056 }, { "epoch": 2.1310897076940307, "grad_norm": 1.4685007333755493, "learning_rate": 9.525263157894737e-05, "loss": 0.3647, "step": 38057 }, { "epoch": 2.1311457050061597, "grad_norm": 1.146191120147705, "learning_rate": 9.525236842105265e-05, "loss": 0.3827, "step": 38058 }, { "epoch": 2.1312017023182888, "grad_norm": 1.1066203117370605, "learning_rate": 9.525210526315789e-05, "loss": 0.3453, "step": 38059 }, { "epoch": 2.131257699630418, "grad_norm": 1.2790555953979492, "learning_rate": 9.525184210526317e-05, "loss": 0.3675, "step": 38060 }, { "epoch": 2.131313696942547, "grad_norm": 1.3151731491088867, "learning_rate": 9.525157894736843e-05, "loss": 0.3577, "step": 38061 }, { "epoch": 2.131369694254676, "grad_norm": 1.1565687656402588, "learning_rate": 9.52513157894737e-05, "loss": 0.5799, "step": 38062 }, { "epoch": 2.131425691566805, "grad_norm": 1.2114107608795166, "learning_rate": 9.525105263157895e-05, "loss": 0.3779, "step": 38063 }, { "epoch": 2.131481688878934, "grad_norm": 1.0612064599990845, "learning_rate": 9.52507894736842e-05, "loss": 0.399, "step": 38064 }, { "epoch": 2.131537686191063, "grad_norm": 1.068852424621582, "learning_rate": 9.525052631578948e-05, "loss": 0.3575, "step": 38065 }, { "epoch": 2.131593683503192, "grad_norm": 0.9930023550987244, "learning_rate": 9.525026315789474e-05, "loss": 0.3041, "step": 38066 }, { "epoch": 2.131649680815321, "grad_norm": 1.4951878786087036, "learning_rate": 9.525000000000001e-05, "loss": 0.2975, "step": 38067 }, { "epoch": 2.13170567812745, "grad_norm": 1.0131851434707642, "learning_rate": 9.524973684210526e-05, "loss": 0.3439, "step": 38068 }, { "epoch": 2.131761675439579, "grad_norm": 1.3838732242584229, "learning_rate": 9.524947368421053e-05, "loss": 0.4531, "step": 38069 }, { "epoch": 2.131817672751708, "grad_norm": 1.0750999450683594, "learning_rate": 9.524921052631579e-05, "loss": 0.4624, "step": 38070 }, { "epoch": 2.131873670063837, "grad_norm": 1.3043855428695679, "learning_rate": 9.524894736842106e-05, "loss": 0.3655, "step": 38071 }, { "epoch": 2.131929667375966, "grad_norm": 1.5056402683258057, "learning_rate": 9.524868421052632e-05, "loss": 0.593, "step": 38072 }, { "epoch": 2.131985664688095, "grad_norm": 1.312848687171936, "learning_rate": 9.524842105263158e-05, "loss": 0.3436, "step": 38073 }, { "epoch": 2.132041662000224, "grad_norm": 3.372086763381958, "learning_rate": 9.524815789473684e-05, "loss": 0.4041, "step": 38074 }, { "epoch": 2.132097659312353, "grad_norm": 1.2546002864837646, "learning_rate": 9.524789473684212e-05, "loss": 0.4795, "step": 38075 }, { "epoch": 2.132153656624482, "grad_norm": 1.248281717300415, "learning_rate": 9.524763157894738e-05, "loss": 0.3266, "step": 38076 }, { "epoch": 2.132209653936611, "grad_norm": 1.1348991394042969, "learning_rate": 9.524736842105264e-05, "loss": 0.4241, "step": 38077 }, { "epoch": 2.13226565124874, "grad_norm": 1.1503900289535522, "learning_rate": 9.52471052631579e-05, "loss": 0.4737, "step": 38078 }, { "epoch": 2.132321648560869, "grad_norm": 1.0026459693908691, "learning_rate": 9.524684210526317e-05, "loss": 0.2794, "step": 38079 }, { "epoch": 2.132377645872998, "grad_norm": 1.325727939605713, "learning_rate": 9.524657894736843e-05, "loss": 0.4621, "step": 38080 }, { "epoch": 2.132433643185127, "grad_norm": 1.221501350402832, "learning_rate": 9.524631578947369e-05, "loss": 0.385, "step": 38081 }, { "epoch": 2.132489640497256, "grad_norm": 1.2808952331542969, "learning_rate": 9.524605263157895e-05, "loss": 0.5001, "step": 38082 }, { "epoch": 2.132545637809385, "grad_norm": 1.339376449584961, "learning_rate": 9.524578947368421e-05, "loss": 0.3182, "step": 38083 }, { "epoch": 2.1326016351215142, "grad_norm": 1.396331548690796, "learning_rate": 9.524552631578948e-05, "loss": 0.6019, "step": 38084 }, { "epoch": 2.1326576324336433, "grad_norm": 1.290798306465149, "learning_rate": 9.524526315789474e-05, "loss": 0.4234, "step": 38085 }, { "epoch": 2.1327136297457723, "grad_norm": 1.3265392780303955, "learning_rate": 9.5245e-05, "loss": 0.471, "step": 38086 }, { "epoch": 2.1327696270579013, "grad_norm": 1.3487203121185303, "learning_rate": 9.524473684210526e-05, "loss": 0.3755, "step": 38087 }, { "epoch": 2.1328256243700303, "grad_norm": 1.2713758945465088, "learning_rate": 9.524447368421053e-05, "loss": 0.3466, "step": 38088 }, { "epoch": 2.1328816216821593, "grad_norm": 1.2795480489730835, "learning_rate": 9.524421052631579e-05, "loss": 0.4042, "step": 38089 }, { "epoch": 2.1329376189942884, "grad_norm": 1.3855855464935303, "learning_rate": 9.524394736842107e-05, "loss": 0.3244, "step": 38090 }, { "epoch": 2.1329936163064174, "grad_norm": 1.3090237379074097, "learning_rate": 9.524368421052631e-05, "loss": 0.4619, "step": 38091 }, { "epoch": 2.1330496136185464, "grad_norm": 1.1355621814727783, "learning_rate": 9.524342105263159e-05, "loss": 0.3734, "step": 38092 }, { "epoch": 2.1331056109306754, "grad_norm": 1.2517218589782715, "learning_rate": 9.524315789473685e-05, "loss": 0.4236, "step": 38093 }, { "epoch": 2.1331616082428044, "grad_norm": 1.0171056985855103, "learning_rate": 9.524289473684212e-05, "loss": 0.3167, "step": 38094 }, { "epoch": 2.1332176055549334, "grad_norm": 1.0247516632080078, "learning_rate": 9.524263157894738e-05, "loss": 0.3004, "step": 38095 }, { "epoch": 2.1332736028670625, "grad_norm": 1.3456711769104004, "learning_rate": 9.524236842105264e-05, "loss": 0.4945, "step": 38096 }, { "epoch": 2.1333296001791915, "grad_norm": 1.6759661436080933, "learning_rate": 9.52421052631579e-05, "loss": 0.5181, "step": 38097 }, { "epoch": 2.1333855974913205, "grad_norm": 1.3364152908325195, "learning_rate": 9.524184210526316e-05, "loss": 0.5071, "step": 38098 }, { "epoch": 2.1334415948034495, "grad_norm": 1.2659976482391357, "learning_rate": 9.524157894736843e-05, "loss": 0.3458, "step": 38099 }, { "epoch": 2.1334975921155785, "grad_norm": 1.069238543510437, "learning_rate": 9.524131578947369e-05, "loss": 0.4014, "step": 38100 }, { "epoch": 2.1335535894277076, "grad_norm": 1.1336287260055542, "learning_rate": 9.524105263157895e-05, "loss": 0.3361, "step": 38101 }, { "epoch": 2.1336095867398366, "grad_norm": 1.1548731327056885, "learning_rate": 9.524078947368421e-05, "loss": 0.3298, "step": 38102 }, { "epoch": 2.1336655840519656, "grad_norm": 1.1080031394958496, "learning_rate": 9.524052631578948e-05, "loss": 0.3494, "step": 38103 }, { "epoch": 2.1337215813640946, "grad_norm": 1.2724939584732056, "learning_rate": 9.524026315789474e-05, "loss": 0.4123, "step": 38104 }, { "epoch": 2.1337775786762236, "grad_norm": 1.1961098909378052, "learning_rate": 9.524e-05, "loss": 0.4559, "step": 38105 }, { "epoch": 2.1338335759883527, "grad_norm": 1.2123501300811768, "learning_rate": 9.523973684210526e-05, "loss": 0.4127, "step": 38106 }, { "epoch": 2.1338895733004817, "grad_norm": 1.1664334535598755, "learning_rate": 9.523947368421054e-05, "loss": 0.3587, "step": 38107 }, { "epoch": 2.1339455706126107, "grad_norm": 1.2061718702316284, "learning_rate": 9.52392105263158e-05, "loss": 0.4268, "step": 38108 }, { "epoch": 2.1340015679247397, "grad_norm": 1.4267489910125732, "learning_rate": 9.523894736842106e-05, "loss": 0.4335, "step": 38109 }, { "epoch": 2.1340575652368687, "grad_norm": 1.1007779836654663, "learning_rate": 9.523868421052632e-05, "loss": 0.3474, "step": 38110 }, { "epoch": 2.1341135625489978, "grad_norm": 1.1919065713882446, "learning_rate": 9.523842105263159e-05, "loss": 0.4011, "step": 38111 }, { "epoch": 2.1341695598611268, "grad_norm": 1.30142343044281, "learning_rate": 9.523815789473685e-05, "loss": 0.321, "step": 38112 }, { "epoch": 2.134225557173256, "grad_norm": 1.7247234582901, "learning_rate": 9.523789473684212e-05, "loss": 0.5547, "step": 38113 }, { "epoch": 2.134281554485385, "grad_norm": 17.529237747192383, "learning_rate": 9.523763157894737e-05, "loss": 0.5024, "step": 38114 }, { "epoch": 2.134337551797514, "grad_norm": 1.0507961511611938, "learning_rate": 9.523736842105263e-05, "loss": 0.4108, "step": 38115 }, { "epoch": 2.134393549109643, "grad_norm": 1.1761435270309448, "learning_rate": 9.52371052631579e-05, "loss": 0.3861, "step": 38116 }, { "epoch": 2.134449546421772, "grad_norm": 1.165257215499878, "learning_rate": 9.523684210526316e-05, "loss": 0.3681, "step": 38117 }, { "epoch": 2.134505543733901, "grad_norm": 1.3846526145935059, "learning_rate": 9.523657894736842e-05, "loss": 0.4921, "step": 38118 }, { "epoch": 2.13456154104603, "grad_norm": 1.1170088052749634, "learning_rate": 9.523631578947368e-05, "loss": 0.3884, "step": 38119 }, { "epoch": 2.134617538358159, "grad_norm": 1.1309449672698975, "learning_rate": 9.523605263157895e-05, "loss": 0.4525, "step": 38120 }, { "epoch": 2.134673535670288, "grad_norm": 7.3944091796875, "learning_rate": 9.523578947368421e-05, "loss": 0.3676, "step": 38121 }, { "epoch": 2.134729532982417, "grad_norm": 1.280097484588623, "learning_rate": 9.523552631578949e-05, "loss": 0.4587, "step": 38122 }, { "epoch": 2.134785530294546, "grad_norm": 1.298851490020752, "learning_rate": 9.523526315789473e-05, "loss": 0.5293, "step": 38123 }, { "epoch": 2.134841527606675, "grad_norm": 1.1458475589752197, "learning_rate": 9.5235e-05, "loss": 0.4269, "step": 38124 }, { "epoch": 2.134897524918804, "grad_norm": 1.4874486923217773, "learning_rate": 9.523473684210527e-05, "loss": 0.4289, "step": 38125 }, { "epoch": 2.134953522230933, "grad_norm": 1.2012462615966797, "learning_rate": 9.523447368421054e-05, "loss": 0.4538, "step": 38126 }, { "epoch": 2.135009519543062, "grad_norm": 0.940038800239563, "learning_rate": 9.52342105263158e-05, "loss": 0.2816, "step": 38127 }, { "epoch": 2.135065516855191, "grad_norm": 1.2139818668365479, "learning_rate": 9.523394736842106e-05, "loss": 0.4267, "step": 38128 }, { "epoch": 2.13512151416732, "grad_norm": 1.2821401357650757, "learning_rate": 9.523368421052632e-05, "loss": 0.4666, "step": 38129 }, { "epoch": 2.135177511479449, "grad_norm": 1.4604532718658447, "learning_rate": 9.523342105263159e-05, "loss": 0.4266, "step": 38130 }, { "epoch": 2.135233508791578, "grad_norm": 1.1600923538208008, "learning_rate": 9.523315789473685e-05, "loss": 0.3544, "step": 38131 }, { "epoch": 2.135289506103707, "grad_norm": 1.4497381448745728, "learning_rate": 9.523289473684211e-05, "loss": 0.6255, "step": 38132 }, { "epoch": 2.135345503415836, "grad_norm": 1.1776341199874878, "learning_rate": 9.523263157894737e-05, "loss": 0.4341, "step": 38133 }, { "epoch": 2.135401500727965, "grad_norm": 1.1329070329666138, "learning_rate": 9.523236842105263e-05, "loss": 0.3535, "step": 38134 }, { "epoch": 2.135457498040094, "grad_norm": 1.3169541358947754, "learning_rate": 9.52321052631579e-05, "loss": 0.4009, "step": 38135 }, { "epoch": 2.1355134953522232, "grad_norm": 1.129478096961975, "learning_rate": 9.523184210526316e-05, "loss": 0.3754, "step": 38136 }, { "epoch": 2.1355694926643523, "grad_norm": 1.1229019165039062, "learning_rate": 9.523157894736842e-05, "loss": 0.3577, "step": 38137 }, { "epoch": 2.1356254899764813, "grad_norm": 1.3719444274902344, "learning_rate": 9.523131578947368e-05, "loss": 0.4032, "step": 38138 }, { "epoch": 2.1356814872886103, "grad_norm": 1.0451343059539795, "learning_rate": 9.523105263157896e-05, "loss": 0.3173, "step": 38139 }, { "epoch": 2.1357374846007393, "grad_norm": 1.2154014110565186, "learning_rate": 9.523078947368422e-05, "loss": 0.3402, "step": 38140 }, { "epoch": 2.1357934819128683, "grad_norm": 1.2046551704406738, "learning_rate": 9.523052631578948e-05, "loss": 0.2937, "step": 38141 }, { "epoch": 2.1358494792249973, "grad_norm": 1.037703275680542, "learning_rate": 9.523026315789473e-05, "loss": 0.4117, "step": 38142 }, { "epoch": 2.1359054765371264, "grad_norm": 1.2204055786132812, "learning_rate": 9.523000000000001e-05, "loss": 0.3595, "step": 38143 }, { "epoch": 2.1359614738492554, "grad_norm": 1.3470975160598755, "learning_rate": 9.522973684210527e-05, "loss": 0.3916, "step": 38144 }, { "epoch": 2.1360174711613844, "grad_norm": 1.1628057956695557, "learning_rate": 9.522947368421054e-05, "loss": 0.4497, "step": 38145 }, { "epoch": 2.1360734684735134, "grad_norm": 1.1477094888687134, "learning_rate": 9.522921052631579e-05, "loss": 0.3363, "step": 38146 }, { "epoch": 2.1361294657856424, "grad_norm": 0.9284495115280151, "learning_rate": 9.522894736842106e-05, "loss": 0.3003, "step": 38147 }, { "epoch": 2.1361854630977715, "grad_norm": 1.283875823020935, "learning_rate": 9.522868421052632e-05, "loss": 0.3802, "step": 38148 }, { "epoch": 2.1362414604099005, "grad_norm": 1.2610158920288086, "learning_rate": 9.52284210526316e-05, "loss": 0.4136, "step": 38149 }, { "epoch": 2.1362974577220295, "grad_norm": 1.7163478136062622, "learning_rate": 9.522815789473685e-05, "loss": 0.391, "step": 38150 }, { "epoch": 2.1363534550341585, "grad_norm": 1.3285573720932007, "learning_rate": 9.52278947368421e-05, "loss": 0.583, "step": 38151 }, { "epoch": 2.1364094523462875, "grad_norm": 1.1929993629455566, "learning_rate": 9.522763157894737e-05, "loss": 0.4423, "step": 38152 }, { "epoch": 2.1364654496584166, "grad_norm": 1.0414402484893799, "learning_rate": 9.522736842105263e-05, "loss": 0.3851, "step": 38153 }, { "epoch": 2.1365214469705456, "grad_norm": 1.5661691427230835, "learning_rate": 9.52271052631579e-05, "loss": 0.4844, "step": 38154 }, { "epoch": 2.1365774442826746, "grad_norm": 1.4953093528747559, "learning_rate": 9.522684210526317e-05, "loss": 0.4092, "step": 38155 }, { "epoch": 2.1366334415948036, "grad_norm": 1.1428807973861694, "learning_rate": 9.522657894736843e-05, "loss": 0.4427, "step": 38156 }, { "epoch": 2.1366894389069326, "grad_norm": 1.4431251287460327, "learning_rate": 9.522631578947368e-05, "loss": 0.3928, "step": 38157 }, { "epoch": 2.1367454362190617, "grad_norm": 1.0334142446517944, "learning_rate": 9.522605263157896e-05, "loss": 0.3657, "step": 38158 }, { "epoch": 2.1368014335311907, "grad_norm": 1.386918544769287, "learning_rate": 9.522578947368422e-05, "loss": 0.5782, "step": 38159 }, { "epoch": 2.1368574308433197, "grad_norm": 1.8670121431350708, "learning_rate": 9.522552631578948e-05, "loss": 0.5832, "step": 38160 }, { "epoch": 2.1369134281554487, "grad_norm": 1.3642046451568604, "learning_rate": 9.522526315789474e-05, "loss": 0.4891, "step": 38161 }, { "epoch": 2.1369694254675777, "grad_norm": 1.1905229091644287, "learning_rate": 9.522500000000001e-05, "loss": 0.394, "step": 38162 }, { "epoch": 2.1370254227797068, "grad_norm": 1.1040364503860474, "learning_rate": 9.522473684210527e-05, "loss": 0.4327, "step": 38163 }, { "epoch": 2.1370814200918358, "grad_norm": 1.4536675214767456, "learning_rate": 9.522447368421053e-05, "loss": 0.3779, "step": 38164 }, { "epoch": 2.137137417403965, "grad_norm": 1.1703200340270996, "learning_rate": 9.522421052631579e-05, "loss": 0.5, "step": 38165 }, { "epoch": 2.137193414716094, "grad_norm": 1.2313188314437866, "learning_rate": 9.522394736842106e-05, "loss": 0.4218, "step": 38166 }, { "epoch": 2.137249412028223, "grad_norm": 1.3737343549728394, "learning_rate": 9.522368421052632e-05, "loss": 0.4801, "step": 38167 }, { "epoch": 2.137305409340352, "grad_norm": 1.1587690114974976, "learning_rate": 9.522342105263158e-05, "loss": 0.449, "step": 38168 }, { "epoch": 2.137361406652481, "grad_norm": 1.213584065437317, "learning_rate": 9.522315789473684e-05, "loss": 0.6608, "step": 38169 }, { "epoch": 2.13741740396461, "grad_norm": 1.144053339958191, "learning_rate": 9.52228947368421e-05, "loss": 0.3027, "step": 38170 }, { "epoch": 2.137473401276739, "grad_norm": 1.5866066217422485, "learning_rate": 9.522263157894738e-05, "loss": 0.3931, "step": 38171 }, { "epoch": 2.137529398588868, "grad_norm": 1.2629061937332153, "learning_rate": 9.522236842105264e-05, "loss": 0.3728, "step": 38172 }, { "epoch": 2.137585395900997, "grad_norm": 1.3687466382980347, "learning_rate": 9.52221052631579e-05, "loss": 0.5041, "step": 38173 }, { "epoch": 2.137641393213126, "grad_norm": 1.656934142112732, "learning_rate": 9.522184210526315e-05, "loss": 0.405, "step": 38174 }, { "epoch": 2.137697390525255, "grad_norm": 1.3004800081253052, "learning_rate": 9.522157894736843e-05, "loss": 0.4654, "step": 38175 }, { "epoch": 2.137753387837384, "grad_norm": 1.2191650867462158, "learning_rate": 9.522131578947369e-05, "loss": 0.4331, "step": 38176 }, { "epoch": 2.137809385149513, "grad_norm": 1.1870242357254028, "learning_rate": 9.522105263157896e-05, "loss": 0.4856, "step": 38177 }, { "epoch": 2.137865382461642, "grad_norm": 1.1643965244293213, "learning_rate": 9.522078947368421e-05, "loss": 0.5596, "step": 38178 }, { "epoch": 2.137921379773771, "grad_norm": 1.1035288572311401, "learning_rate": 9.522052631578948e-05, "loss": 0.3734, "step": 38179 }, { "epoch": 2.1379773770859, "grad_norm": 1.2940701246261597, "learning_rate": 9.522026315789474e-05, "loss": 0.4103, "step": 38180 }, { "epoch": 2.138033374398029, "grad_norm": 1.2635301351547241, "learning_rate": 9.522000000000001e-05, "loss": 0.3733, "step": 38181 }, { "epoch": 2.138089371710158, "grad_norm": 1.1572139263153076, "learning_rate": 9.521973684210527e-05, "loss": 0.3266, "step": 38182 }, { "epoch": 2.138145369022287, "grad_norm": 1.041929841041565, "learning_rate": 9.521947368421053e-05, "loss": 0.3002, "step": 38183 }, { "epoch": 2.138201366334416, "grad_norm": 0.955590546131134, "learning_rate": 9.521921052631579e-05, "loss": 0.2736, "step": 38184 }, { "epoch": 2.138257363646545, "grad_norm": 1.9749939441680908, "learning_rate": 9.521894736842105e-05, "loss": 0.3939, "step": 38185 }, { "epoch": 2.1383133609586737, "grad_norm": 1.3457142114639282, "learning_rate": 9.521868421052633e-05, "loss": 0.422, "step": 38186 }, { "epoch": 2.138369358270803, "grad_norm": 1.1366219520568848, "learning_rate": 9.521842105263159e-05, "loss": 0.3412, "step": 38187 }, { "epoch": 2.138425355582932, "grad_norm": 1.5393035411834717, "learning_rate": 9.521815789473684e-05, "loss": 0.4015, "step": 38188 }, { "epoch": 2.1384813528950612, "grad_norm": 1.14662766456604, "learning_rate": 9.52178947368421e-05, "loss": 0.329, "step": 38189 }, { "epoch": 2.13853735020719, "grad_norm": 1.3876137733459473, "learning_rate": 9.521763157894738e-05, "loss": 0.4296, "step": 38190 }, { "epoch": 2.1385933475193193, "grad_norm": 1.0128370523452759, "learning_rate": 9.521736842105264e-05, "loss": 0.3615, "step": 38191 }, { "epoch": 2.138649344831448, "grad_norm": 1.2008684873580933, "learning_rate": 9.52171052631579e-05, "loss": 0.3374, "step": 38192 }, { "epoch": 2.1387053421435773, "grad_norm": 1.228963017463684, "learning_rate": 9.521684210526316e-05, "loss": 0.4207, "step": 38193 }, { "epoch": 2.138761339455706, "grad_norm": 1.266869068145752, "learning_rate": 9.521657894736843e-05, "loss": 0.4245, "step": 38194 }, { "epoch": 2.1388173367678354, "grad_norm": 1.4772392511367798, "learning_rate": 9.521631578947369e-05, "loss": 0.4317, "step": 38195 }, { "epoch": 2.138873334079964, "grad_norm": 1.2737996578216553, "learning_rate": 9.521605263157895e-05, "loss": 0.5218, "step": 38196 }, { "epoch": 2.1389293313920934, "grad_norm": 1.0991106033325195, "learning_rate": 9.521578947368421e-05, "loss": 0.3129, "step": 38197 }, { "epoch": 2.138985328704222, "grad_norm": 1.2733256816864014, "learning_rate": 9.521552631578948e-05, "loss": 0.3836, "step": 38198 }, { "epoch": 2.1390413260163514, "grad_norm": 1.081042766571045, "learning_rate": 9.521526315789474e-05, "loss": 0.2377, "step": 38199 }, { "epoch": 2.13909732332848, "grad_norm": 1.481172800064087, "learning_rate": 9.521500000000002e-05, "loss": 0.4217, "step": 38200 }, { "epoch": 2.1391533206406095, "grad_norm": 1.7473317384719849, "learning_rate": 9.521473684210526e-05, "loss": 0.3945, "step": 38201 }, { "epoch": 2.139209317952738, "grad_norm": 1.254936933517456, "learning_rate": 9.521447368421052e-05, "loss": 0.3956, "step": 38202 }, { "epoch": 2.1392653152648675, "grad_norm": 1.0650924444198608, "learning_rate": 9.52142105263158e-05, "loss": 0.3619, "step": 38203 }, { "epoch": 2.139321312576996, "grad_norm": 1.7617086172103882, "learning_rate": 9.521394736842105e-05, "loss": 0.4086, "step": 38204 }, { "epoch": 2.1393773098891256, "grad_norm": 1.22715163230896, "learning_rate": 9.521368421052633e-05, "loss": 0.3465, "step": 38205 }, { "epoch": 2.139433307201254, "grad_norm": 1.155155062675476, "learning_rate": 9.521342105263157e-05, "loss": 0.3816, "step": 38206 }, { "epoch": 2.139489304513383, "grad_norm": 1.2315603494644165, "learning_rate": 9.521315789473685e-05, "loss": 0.3732, "step": 38207 }, { "epoch": 2.139545301825512, "grad_norm": 1.3409041166305542, "learning_rate": 9.521289473684211e-05, "loss": 0.3569, "step": 38208 }, { "epoch": 2.139601299137641, "grad_norm": 1.539539098739624, "learning_rate": 9.521263157894738e-05, "loss": 0.4029, "step": 38209 }, { "epoch": 2.13965729644977, "grad_norm": 1.4609636068344116, "learning_rate": 9.521236842105263e-05, "loss": 0.473, "step": 38210 }, { "epoch": 2.1397132937618992, "grad_norm": 1.2497127056121826, "learning_rate": 9.52121052631579e-05, "loss": 0.4548, "step": 38211 }, { "epoch": 2.1397692910740282, "grad_norm": 1.1959890127182007, "learning_rate": 9.521184210526316e-05, "loss": 0.4805, "step": 38212 }, { "epoch": 2.1398252883861573, "grad_norm": 1.2456157207489014, "learning_rate": 9.521157894736843e-05, "loss": 0.6036, "step": 38213 }, { "epoch": 2.1398812856982863, "grad_norm": 1.2958616018295288, "learning_rate": 9.521131578947369e-05, "loss": 0.4659, "step": 38214 }, { "epoch": 2.1399372830104153, "grad_norm": 1.251543402671814, "learning_rate": 9.521105263157895e-05, "loss": 0.3911, "step": 38215 }, { "epoch": 2.1399932803225443, "grad_norm": 1.1401814222335815, "learning_rate": 9.521078947368421e-05, "loss": 0.4113, "step": 38216 }, { "epoch": 2.1400492776346733, "grad_norm": 1.1797345876693726, "learning_rate": 9.521052631578949e-05, "loss": 0.3466, "step": 38217 }, { "epoch": 2.1401052749468024, "grad_norm": 1.1843957901000977, "learning_rate": 9.521026315789475e-05, "loss": 0.3757, "step": 38218 }, { "epoch": 2.1401612722589314, "grad_norm": 1.1159995794296265, "learning_rate": 9.521e-05, "loss": 0.3955, "step": 38219 }, { "epoch": 2.1402172695710604, "grad_norm": 2.6713151931762695, "learning_rate": 9.520973684210526e-05, "loss": 0.5151, "step": 38220 }, { "epoch": 2.1402732668831894, "grad_norm": 1.3884177207946777, "learning_rate": 9.520947368421052e-05, "loss": 0.4047, "step": 38221 }, { "epoch": 2.1403292641953184, "grad_norm": 1.573036551475525, "learning_rate": 9.52092105263158e-05, "loss": 0.4764, "step": 38222 }, { "epoch": 2.1403852615074475, "grad_norm": 2.4388699531555176, "learning_rate": 9.520894736842106e-05, "loss": 0.3302, "step": 38223 }, { "epoch": 2.1404412588195765, "grad_norm": 1.01044499874115, "learning_rate": 9.520868421052632e-05, "loss": 0.4048, "step": 38224 }, { "epoch": 2.1404972561317055, "grad_norm": 1.3563491106033325, "learning_rate": 9.520842105263158e-05, "loss": 0.3775, "step": 38225 }, { "epoch": 2.1405532534438345, "grad_norm": 1.245753288269043, "learning_rate": 9.520815789473685e-05, "loss": 0.3903, "step": 38226 }, { "epoch": 2.1406092507559635, "grad_norm": 1.3333702087402344, "learning_rate": 9.520789473684211e-05, "loss": 0.3899, "step": 38227 }, { "epoch": 2.1406652480680926, "grad_norm": 1.3277714252471924, "learning_rate": 9.520763157894737e-05, "loss": 0.3534, "step": 38228 }, { "epoch": 2.1407212453802216, "grad_norm": 1.1180626153945923, "learning_rate": 9.520736842105263e-05, "loss": 0.3965, "step": 38229 }, { "epoch": 2.1407772426923506, "grad_norm": 1.2243865728378296, "learning_rate": 9.52071052631579e-05, "loss": 0.416, "step": 38230 }, { "epoch": 2.1408332400044796, "grad_norm": 1.4420181512832642, "learning_rate": 9.520684210526316e-05, "loss": 0.4152, "step": 38231 }, { "epoch": 2.1408892373166086, "grad_norm": 1.1621487140655518, "learning_rate": 9.520657894736844e-05, "loss": 0.3386, "step": 38232 }, { "epoch": 2.1409452346287376, "grad_norm": 1.1099042892456055, "learning_rate": 9.520631578947368e-05, "loss": 0.356, "step": 38233 }, { "epoch": 2.1410012319408667, "grad_norm": 1.3326021432876587, "learning_rate": 9.520605263157896e-05, "loss": 0.549, "step": 38234 }, { "epoch": 2.1410572292529957, "grad_norm": 1.1555290222167969, "learning_rate": 9.520578947368421e-05, "loss": 0.3943, "step": 38235 }, { "epoch": 2.1411132265651247, "grad_norm": 1.0788766145706177, "learning_rate": 9.520552631578947e-05, "loss": 0.4022, "step": 38236 }, { "epoch": 2.1411692238772537, "grad_norm": 1.3333680629730225, "learning_rate": 9.520526315789475e-05, "loss": 0.4609, "step": 38237 }, { "epoch": 2.1412252211893827, "grad_norm": 1.1839755773544312, "learning_rate": 9.5205e-05, "loss": 0.4888, "step": 38238 }, { "epoch": 2.1412812185015118, "grad_norm": 1.1685596704483032, "learning_rate": 9.520473684210527e-05, "loss": 0.3532, "step": 38239 }, { "epoch": 2.141337215813641, "grad_norm": 1.1523849964141846, "learning_rate": 9.520447368421053e-05, "loss": 0.4416, "step": 38240 }, { "epoch": 2.14139321312577, "grad_norm": 1.5175058841705322, "learning_rate": 9.52042105263158e-05, "loss": 0.3381, "step": 38241 }, { "epoch": 2.141449210437899, "grad_norm": 1.2360601425170898, "learning_rate": 9.520394736842106e-05, "loss": 0.3368, "step": 38242 }, { "epoch": 2.141505207750028, "grad_norm": 1.612764596939087, "learning_rate": 9.520368421052632e-05, "loss": 0.4384, "step": 38243 }, { "epoch": 2.141561205062157, "grad_norm": 1.1987849473953247, "learning_rate": 9.520342105263158e-05, "loss": 0.4229, "step": 38244 }, { "epoch": 2.141617202374286, "grad_norm": 1.4699829816818237, "learning_rate": 9.520315789473685e-05, "loss": 0.3507, "step": 38245 }, { "epoch": 2.141673199686415, "grad_norm": 1.0830490589141846, "learning_rate": 9.520289473684211e-05, "loss": 0.3972, "step": 38246 }, { "epoch": 2.141729196998544, "grad_norm": 1.5250779390335083, "learning_rate": 9.520263157894737e-05, "loss": 0.3755, "step": 38247 }, { "epoch": 2.141785194310673, "grad_norm": 1.159103512763977, "learning_rate": 9.520236842105263e-05, "loss": 0.4339, "step": 38248 }, { "epoch": 2.141841191622802, "grad_norm": 1.3004153966903687, "learning_rate": 9.52021052631579e-05, "loss": 0.5246, "step": 38249 }, { "epoch": 2.141897188934931, "grad_norm": 1.6057721376419067, "learning_rate": 9.520184210526316e-05, "loss": 0.6392, "step": 38250 }, { "epoch": 2.14195318624706, "grad_norm": 1.3801565170288086, "learning_rate": 9.520157894736842e-05, "loss": 0.4938, "step": 38251 }, { "epoch": 2.142009183559189, "grad_norm": 1.3770190477371216, "learning_rate": 9.520131578947368e-05, "loss": 0.4209, "step": 38252 }, { "epoch": 2.142065180871318, "grad_norm": 1.5340890884399414, "learning_rate": 9.520105263157894e-05, "loss": 0.3501, "step": 38253 }, { "epoch": 2.142121178183447, "grad_norm": 1.3469096422195435, "learning_rate": 9.520078947368422e-05, "loss": 0.3959, "step": 38254 }, { "epoch": 2.142177175495576, "grad_norm": 2.013986349105835, "learning_rate": 9.520052631578948e-05, "loss": 0.3397, "step": 38255 }, { "epoch": 2.142233172807705, "grad_norm": 1.247554063796997, "learning_rate": 9.520026315789474e-05, "loss": 0.4052, "step": 38256 }, { "epoch": 2.142289170119834, "grad_norm": 1.3199414014816284, "learning_rate": 9.52e-05, "loss": 0.4163, "step": 38257 }, { "epoch": 2.142345167431963, "grad_norm": 1.2954035997390747, "learning_rate": 9.519973684210527e-05, "loss": 0.3952, "step": 38258 }, { "epoch": 2.142401164744092, "grad_norm": 3.7618207931518555, "learning_rate": 9.519947368421053e-05, "loss": 0.3623, "step": 38259 }, { "epoch": 2.142457162056221, "grad_norm": 1.317212462425232, "learning_rate": 9.51992105263158e-05, "loss": 0.3576, "step": 38260 }, { "epoch": 2.14251315936835, "grad_norm": 1.2333412170410156, "learning_rate": 9.519894736842105e-05, "loss": 0.3835, "step": 38261 }, { "epoch": 2.142569156680479, "grad_norm": 1.303763747215271, "learning_rate": 9.519868421052632e-05, "loss": 0.4328, "step": 38262 }, { "epoch": 2.1426251539926082, "grad_norm": 1.2166461944580078, "learning_rate": 9.519842105263158e-05, "loss": 0.4558, "step": 38263 }, { "epoch": 2.1426811513047372, "grad_norm": 1.468721628189087, "learning_rate": 9.519815789473686e-05, "loss": 0.3667, "step": 38264 }, { "epoch": 2.1427371486168663, "grad_norm": 1.43351411819458, "learning_rate": 9.51978947368421e-05, "loss": 0.4482, "step": 38265 }, { "epoch": 2.1427931459289953, "grad_norm": 1.1382423639297485, "learning_rate": 9.519763157894737e-05, "loss": 0.3623, "step": 38266 }, { "epoch": 2.1428491432411243, "grad_norm": 1.515245795249939, "learning_rate": 9.519736842105263e-05, "loss": 0.4972, "step": 38267 }, { "epoch": 2.1429051405532533, "grad_norm": 1.1439354419708252, "learning_rate": 9.519710526315791e-05, "loss": 0.2823, "step": 38268 }, { "epoch": 2.1429611378653823, "grad_norm": 1.1343514919281006, "learning_rate": 9.519684210526317e-05, "loss": 0.5263, "step": 38269 }, { "epoch": 2.1430171351775114, "grad_norm": 1.220656394958496, "learning_rate": 9.519657894736843e-05, "loss": 0.4349, "step": 38270 }, { "epoch": 2.1430731324896404, "grad_norm": 1.3684910535812378, "learning_rate": 9.519631578947369e-05, "loss": 0.4427, "step": 38271 }, { "epoch": 2.1431291298017694, "grad_norm": 1.428341269493103, "learning_rate": 9.519605263157895e-05, "loss": 0.317, "step": 38272 }, { "epoch": 2.1431851271138984, "grad_norm": 1.359214186668396, "learning_rate": 9.519578947368422e-05, "loss": 0.5043, "step": 38273 }, { "epoch": 2.1432411244260274, "grad_norm": 1.393699288368225, "learning_rate": 9.519552631578948e-05, "loss": 0.5634, "step": 38274 }, { "epoch": 2.1432971217381565, "grad_norm": 1.0113399028778076, "learning_rate": 9.519526315789474e-05, "loss": 0.3596, "step": 38275 }, { "epoch": 2.1433531190502855, "grad_norm": 1.1781200170516968, "learning_rate": 9.5195e-05, "loss": 0.515, "step": 38276 }, { "epoch": 2.1434091163624145, "grad_norm": 1.2815715074539185, "learning_rate": 9.519473684210527e-05, "loss": 0.7271, "step": 38277 }, { "epoch": 2.1434651136745435, "grad_norm": 0.9573467373847961, "learning_rate": 9.519447368421053e-05, "loss": 0.3249, "step": 38278 }, { "epoch": 2.1435211109866725, "grad_norm": 1.0952003002166748, "learning_rate": 9.519421052631579e-05, "loss": 0.3179, "step": 38279 }, { "epoch": 2.1435771082988015, "grad_norm": 1.7654109001159668, "learning_rate": 9.519394736842105e-05, "loss": 0.5171, "step": 38280 }, { "epoch": 2.1436331056109306, "grad_norm": 1.0280646085739136, "learning_rate": 9.519368421052632e-05, "loss": 0.347, "step": 38281 }, { "epoch": 2.1436891029230596, "grad_norm": 1.328781008720398, "learning_rate": 9.519342105263158e-05, "loss": 0.4942, "step": 38282 }, { "epoch": 2.1437451002351886, "grad_norm": 1.1049416065216064, "learning_rate": 9.519315789473684e-05, "loss": 0.4046, "step": 38283 }, { "epoch": 2.1438010975473176, "grad_norm": 1.1152710914611816, "learning_rate": 9.51928947368421e-05, "loss": 0.3752, "step": 38284 }, { "epoch": 2.1438570948594466, "grad_norm": 1.3638591766357422, "learning_rate": 9.519263157894738e-05, "loss": 0.4311, "step": 38285 }, { "epoch": 2.1439130921715757, "grad_norm": 1.081524133682251, "learning_rate": 9.519236842105264e-05, "loss": 0.3546, "step": 38286 }, { "epoch": 2.1439690894837047, "grad_norm": 1.1396889686584473, "learning_rate": 9.519210526315791e-05, "loss": 0.3938, "step": 38287 }, { "epoch": 2.1440250867958337, "grad_norm": 1.2552344799041748, "learning_rate": 9.519184210526316e-05, "loss": 0.3956, "step": 38288 }, { "epoch": 2.1440810841079627, "grad_norm": 1.2583619356155396, "learning_rate": 9.519157894736842e-05, "loss": 0.3633, "step": 38289 }, { "epoch": 2.1441370814200917, "grad_norm": 1.4059014320373535, "learning_rate": 9.519131578947369e-05, "loss": 0.3834, "step": 38290 }, { "epoch": 2.1441930787322208, "grad_norm": 22.061979293823242, "learning_rate": 9.519105263157895e-05, "loss": 0.482, "step": 38291 }, { "epoch": 2.1442490760443498, "grad_norm": 1.2164620161056519, "learning_rate": 9.519078947368422e-05, "loss": 0.4478, "step": 38292 }, { "epoch": 2.144305073356479, "grad_norm": 1.2906829118728638, "learning_rate": 9.519052631578947e-05, "loss": 0.4366, "step": 38293 }, { "epoch": 2.144361070668608, "grad_norm": 1.21834397315979, "learning_rate": 9.519026315789474e-05, "loss": 0.3554, "step": 38294 }, { "epoch": 2.144417067980737, "grad_norm": 1.115110158920288, "learning_rate": 9.519e-05, "loss": 0.3624, "step": 38295 }, { "epoch": 2.144473065292866, "grad_norm": 2.6654458045959473, "learning_rate": 9.518973684210527e-05, "loss": 0.4215, "step": 38296 }, { "epoch": 2.144529062604995, "grad_norm": 1.1112185716629028, "learning_rate": 9.518947368421053e-05, "loss": 0.3235, "step": 38297 }, { "epoch": 2.144585059917124, "grad_norm": 1.0276960134506226, "learning_rate": 9.51892105263158e-05, "loss": 0.3116, "step": 38298 }, { "epoch": 2.144641057229253, "grad_norm": 1.3983666896820068, "learning_rate": 9.518894736842105e-05, "loss": 0.4774, "step": 38299 }, { "epoch": 2.144697054541382, "grad_norm": 1.3449548482894897, "learning_rate": 9.518868421052633e-05, "loss": 0.4011, "step": 38300 }, { "epoch": 2.144753051853511, "grad_norm": 1.0634759664535522, "learning_rate": 9.518842105263159e-05, "loss": 0.3559, "step": 38301 }, { "epoch": 2.14480904916564, "grad_norm": 1.1814496517181396, "learning_rate": 9.518815789473685e-05, "loss": 0.4522, "step": 38302 }, { "epoch": 2.144865046477769, "grad_norm": 1.2169724702835083, "learning_rate": 9.51878947368421e-05, "loss": 0.3815, "step": 38303 }, { "epoch": 2.144921043789898, "grad_norm": 1.457496166229248, "learning_rate": 9.518763157894738e-05, "loss": 0.4407, "step": 38304 }, { "epoch": 2.144977041102027, "grad_norm": 1.0918076038360596, "learning_rate": 9.518736842105264e-05, "loss": 0.5094, "step": 38305 }, { "epoch": 2.145033038414156, "grad_norm": 1.5331761837005615, "learning_rate": 9.51871052631579e-05, "loss": 0.4693, "step": 38306 }, { "epoch": 2.145089035726285, "grad_norm": 1.241992712020874, "learning_rate": 9.518684210526316e-05, "loss": 0.3811, "step": 38307 }, { "epoch": 2.145145033038414, "grad_norm": 1.1199181079864502, "learning_rate": 9.518657894736842e-05, "loss": 0.355, "step": 38308 }, { "epoch": 2.145201030350543, "grad_norm": 1.2314311265945435, "learning_rate": 9.518631578947369e-05, "loss": 0.3225, "step": 38309 }, { "epoch": 2.145257027662672, "grad_norm": 1.224465250968933, "learning_rate": 9.518605263157895e-05, "loss": 0.4511, "step": 38310 }, { "epoch": 2.145313024974801, "grad_norm": 1.66157865524292, "learning_rate": 9.518578947368421e-05, "loss": 0.3905, "step": 38311 }, { "epoch": 2.14536902228693, "grad_norm": 1.0555822849273682, "learning_rate": 9.518552631578947e-05, "loss": 0.4319, "step": 38312 }, { "epoch": 2.145425019599059, "grad_norm": 1.0612894296646118, "learning_rate": 9.518526315789474e-05, "loss": 0.3882, "step": 38313 }, { "epoch": 2.145481016911188, "grad_norm": 1.2183176279067993, "learning_rate": 9.5185e-05, "loss": 0.3918, "step": 38314 }, { "epoch": 2.145537014223317, "grad_norm": 1.1744695901870728, "learning_rate": 9.518473684210528e-05, "loss": 0.4009, "step": 38315 }, { "epoch": 2.1455930115354462, "grad_norm": 1.217829704284668, "learning_rate": 9.518447368421052e-05, "loss": 0.4671, "step": 38316 }, { "epoch": 2.1456490088475753, "grad_norm": 1.1557682752609253, "learning_rate": 9.51842105263158e-05, "loss": 0.3153, "step": 38317 }, { "epoch": 2.1457050061597043, "grad_norm": 1.12213134765625, "learning_rate": 9.518394736842106e-05, "loss": 0.3185, "step": 38318 }, { "epoch": 2.1457610034718333, "grad_norm": 1.2991222143173218, "learning_rate": 9.518368421052633e-05, "loss": 0.3617, "step": 38319 }, { "epoch": 2.1458170007839623, "grad_norm": 1.6040012836456299, "learning_rate": 9.518342105263158e-05, "loss": 0.4461, "step": 38320 }, { "epoch": 2.1458729980960913, "grad_norm": 1.1934152841567993, "learning_rate": 9.518315789473685e-05, "loss": 0.3457, "step": 38321 }, { "epoch": 2.1459289954082204, "grad_norm": 1.2900394201278687, "learning_rate": 9.518289473684211e-05, "loss": 0.4278, "step": 38322 }, { "epoch": 2.1459849927203494, "grad_norm": 1.6887977123260498, "learning_rate": 9.518263157894737e-05, "loss": 0.4584, "step": 38323 }, { "epoch": 2.1460409900324784, "grad_norm": 1.2036949396133423, "learning_rate": 9.518236842105264e-05, "loss": 0.4191, "step": 38324 }, { "epoch": 2.1460969873446074, "grad_norm": 1.3448704481124878, "learning_rate": 9.518210526315789e-05, "loss": 0.3949, "step": 38325 }, { "epoch": 2.1461529846567364, "grad_norm": 1.206503987312317, "learning_rate": 9.518184210526316e-05, "loss": 0.4296, "step": 38326 }, { "epoch": 2.1462089819688654, "grad_norm": 1.0800708532333374, "learning_rate": 9.518157894736842e-05, "loss": 0.3174, "step": 38327 }, { "epoch": 2.1462649792809945, "grad_norm": 1.0248748064041138, "learning_rate": 9.51813157894737e-05, "loss": 0.2715, "step": 38328 }, { "epoch": 2.1463209765931235, "grad_norm": 1.1427592039108276, "learning_rate": 9.518105263157895e-05, "loss": 0.3067, "step": 38329 }, { "epoch": 2.1463769739052525, "grad_norm": 1.1758934259414673, "learning_rate": 9.518078947368421e-05, "loss": 0.3684, "step": 38330 }, { "epoch": 2.1464329712173815, "grad_norm": 1.4410475492477417, "learning_rate": 9.518052631578947e-05, "loss": 0.4653, "step": 38331 }, { "epoch": 2.1464889685295105, "grad_norm": 1.2631841897964478, "learning_rate": 9.518026315789475e-05, "loss": 0.4607, "step": 38332 }, { "epoch": 2.1465449658416396, "grad_norm": 1.0833930969238281, "learning_rate": 9.518000000000001e-05, "loss": 0.3683, "step": 38333 }, { "epoch": 2.1466009631537686, "grad_norm": 1.2091177701950073, "learning_rate": 9.517973684210527e-05, "loss": 0.3553, "step": 38334 }, { "epoch": 2.1466569604658976, "grad_norm": 1.1710312366485596, "learning_rate": 9.517947368421053e-05, "loss": 0.3622, "step": 38335 }, { "epoch": 2.1467129577780266, "grad_norm": 1.8413041830062866, "learning_rate": 9.51792105263158e-05, "loss": 0.4118, "step": 38336 }, { "epoch": 2.1467689550901556, "grad_norm": 1.2330278158187866, "learning_rate": 9.517894736842106e-05, "loss": 0.3719, "step": 38337 }, { "epoch": 2.1468249524022847, "grad_norm": 1.2583216428756714, "learning_rate": 9.517868421052632e-05, "loss": 0.371, "step": 38338 }, { "epoch": 2.1468809497144137, "grad_norm": 1.4155641794204712, "learning_rate": 9.517842105263158e-05, "loss": 0.3507, "step": 38339 }, { "epoch": 2.1469369470265427, "grad_norm": 1.2163387537002563, "learning_rate": 9.517815789473684e-05, "loss": 0.4214, "step": 38340 }, { "epoch": 2.1469929443386717, "grad_norm": 1.347472071647644, "learning_rate": 9.517789473684211e-05, "loss": 0.511, "step": 38341 }, { "epoch": 2.1470489416508007, "grad_norm": 1.351393222808838, "learning_rate": 9.517763157894737e-05, "loss": 0.5125, "step": 38342 }, { "epoch": 2.1471049389629298, "grad_norm": 1.3511576652526855, "learning_rate": 9.517736842105263e-05, "loss": 0.4792, "step": 38343 }, { "epoch": 2.1471609362750588, "grad_norm": 1.2691364288330078, "learning_rate": 9.517710526315789e-05, "loss": 0.3979, "step": 38344 }, { "epoch": 2.147216933587188, "grad_norm": 1.173933506011963, "learning_rate": 9.517684210526316e-05, "loss": 0.4042, "step": 38345 }, { "epoch": 2.147272930899317, "grad_norm": 1.3882774114608765, "learning_rate": 9.517657894736842e-05, "loss": 0.3967, "step": 38346 }, { "epoch": 2.147328928211446, "grad_norm": 1.3339409828186035, "learning_rate": 9.51763157894737e-05, "loss": 0.3959, "step": 38347 }, { "epoch": 2.147384925523575, "grad_norm": 1.1768335103988647, "learning_rate": 9.517605263157894e-05, "loss": 0.3841, "step": 38348 }, { "epoch": 2.147440922835704, "grad_norm": 1.2612472772598267, "learning_rate": 9.517578947368422e-05, "loss": 0.4125, "step": 38349 }, { "epoch": 2.147496920147833, "grad_norm": 1.4095532894134521, "learning_rate": 9.517552631578948e-05, "loss": 0.4211, "step": 38350 }, { "epoch": 2.147552917459962, "grad_norm": 1.1696006059646606, "learning_rate": 9.517526315789475e-05, "loss": 0.4731, "step": 38351 }, { "epoch": 2.147608914772091, "grad_norm": 1.128891110420227, "learning_rate": 9.517500000000001e-05, "loss": 0.3521, "step": 38352 }, { "epoch": 2.14766491208422, "grad_norm": 1.2698031663894653, "learning_rate": 9.517473684210527e-05, "loss": 0.4201, "step": 38353 }, { "epoch": 2.147720909396349, "grad_norm": 1.259680986404419, "learning_rate": 9.517447368421053e-05, "loss": 0.4834, "step": 38354 }, { "epoch": 2.147776906708478, "grad_norm": 1.3092985153198242, "learning_rate": 9.51742105263158e-05, "loss": 0.4251, "step": 38355 }, { "epoch": 2.147832904020607, "grad_norm": 1.0928853750228882, "learning_rate": 9.517394736842106e-05, "loss": 0.3274, "step": 38356 }, { "epoch": 2.147888901332736, "grad_norm": 1.5518240928649902, "learning_rate": 9.517368421052631e-05, "loss": 0.4858, "step": 38357 }, { "epoch": 2.147944898644865, "grad_norm": 1.2851063013076782, "learning_rate": 9.517342105263158e-05, "loss": 0.6377, "step": 38358 }, { "epoch": 2.148000895956994, "grad_norm": 1.3254694938659668, "learning_rate": 9.517315789473684e-05, "loss": 0.3525, "step": 38359 }, { "epoch": 2.148056893269123, "grad_norm": 1.1813671588897705, "learning_rate": 9.517289473684211e-05, "loss": 0.3759, "step": 38360 }, { "epoch": 2.148112890581252, "grad_norm": 1.207115888595581, "learning_rate": 9.517263157894737e-05, "loss": 0.308, "step": 38361 }, { "epoch": 2.148168887893381, "grad_norm": 1.1749515533447266, "learning_rate": 9.517236842105263e-05, "loss": 0.3349, "step": 38362 }, { "epoch": 2.14822488520551, "grad_norm": 1.2420547008514404, "learning_rate": 9.51721052631579e-05, "loss": 0.3061, "step": 38363 }, { "epoch": 2.148280882517639, "grad_norm": 1.067697286605835, "learning_rate": 9.517184210526317e-05, "loss": 0.335, "step": 38364 }, { "epoch": 2.148336879829768, "grad_norm": 1.0185019969940186, "learning_rate": 9.517157894736843e-05, "loss": 0.3493, "step": 38365 }, { "epoch": 2.148392877141897, "grad_norm": 1.1054452657699585, "learning_rate": 9.517131578947369e-05, "loss": 0.3525, "step": 38366 }, { "epoch": 2.148448874454026, "grad_norm": 1.2548161745071411, "learning_rate": 9.517105263157895e-05, "loss": 0.4089, "step": 38367 }, { "epoch": 2.1485048717661552, "grad_norm": 0.9476507306098938, "learning_rate": 9.517078947368422e-05, "loss": 0.2851, "step": 38368 }, { "epoch": 2.1485608690782843, "grad_norm": 1.0133048295974731, "learning_rate": 9.517052631578948e-05, "loss": 0.3709, "step": 38369 }, { "epoch": 2.1486168663904133, "grad_norm": 1.2617453336715698, "learning_rate": 9.517026315789475e-05, "loss": 0.4156, "step": 38370 }, { "epoch": 2.1486728637025423, "grad_norm": 1.101865291595459, "learning_rate": 9.517e-05, "loss": 0.3994, "step": 38371 }, { "epoch": 2.1487288610146713, "grad_norm": 1.0678901672363281, "learning_rate": 9.516973684210527e-05, "loss": 0.3432, "step": 38372 }, { "epoch": 2.1487848583268003, "grad_norm": 1.4221538305282593, "learning_rate": 9.516947368421053e-05, "loss": 0.4337, "step": 38373 }, { "epoch": 2.1488408556389293, "grad_norm": 1.2667040824890137, "learning_rate": 9.516921052631579e-05, "loss": 0.4032, "step": 38374 }, { "epoch": 2.1488968529510584, "grad_norm": 1.187917709350586, "learning_rate": 9.516894736842105e-05, "loss": 0.4213, "step": 38375 }, { "epoch": 2.1489528502631874, "grad_norm": 1.1725702285766602, "learning_rate": 9.516868421052631e-05, "loss": 0.5062, "step": 38376 }, { "epoch": 2.1490088475753164, "grad_norm": 1.0603768825531006, "learning_rate": 9.516842105263158e-05, "loss": 0.3374, "step": 38377 }, { "epoch": 2.1490648448874454, "grad_norm": 1.3903053998947144, "learning_rate": 9.516815789473684e-05, "loss": 0.5, "step": 38378 }, { "epoch": 2.1491208421995744, "grad_norm": 1.1216073036193848, "learning_rate": 9.516789473684212e-05, "loss": 0.3625, "step": 38379 }, { "epoch": 2.1491768395117035, "grad_norm": 1.4432493448257446, "learning_rate": 9.516763157894736e-05, "loss": 0.5352, "step": 38380 }, { "epoch": 2.1492328368238325, "grad_norm": 1.377672553062439, "learning_rate": 9.516736842105264e-05, "loss": 0.3987, "step": 38381 }, { "epoch": 2.1492888341359615, "grad_norm": 1.30591881275177, "learning_rate": 9.51671052631579e-05, "loss": 0.5589, "step": 38382 }, { "epoch": 2.1493448314480905, "grad_norm": 1.307314395904541, "learning_rate": 9.516684210526317e-05, "loss": 0.4652, "step": 38383 }, { "epoch": 2.1494008287602195, "grad_norm": 1.6501095294952393, "learning_rate": 9.516657894736843e-05, "loss": 0.4011, "step": 38384 }, { "epoch": 2.1494568260723486, "grad_norm": 1.1767069101333618, "learning_rate": 9.516631578947369e-05, "loss": 0.3429, "step": 38385 }, { "epoch": 2.1495128233844776, "grad_norm": 1.1041499376296997, "learning_rate": 9.516605263157895e-05, "loss": 0.5331, "step": 38386 }, { "epoch": 2.1495688206966066, "grad_norm": 1.075477957725525, "learning_rate": 9.516578947368422e-05, "loss": 0.3662, "step": 38387 }, { "epoch": 2.1496248180087356, "grad_norm": 1.2789161205291748, "learning_rate": 9.516552631578948e-05, "loss": 0.5727, "step": 38388 }, { "epoch": 2.1496808153208646, "grad_norm": 1.06875479221344, "learning_rate": 9.516526315789474e-05, "loss": 0.2913, "step": 38389 }, { "epoch": 2.1497368126329937, "grad_norm": 1.1192445755004883, "learning_rate": 9.5165e-05, "loss": 0.3445, "step": 38390 }, { "epoch": 2.1497928099451227, "grad_norm": 1.203905701637268, "learning_rate": 9.516473684210527e-05, "loss": 0.3686, "step": 38391 }, { "epoch": 2.1498488072572517, "grad_norm": 1.228430151939392, "learning_rate": 9.516447368421053e-05, "loss": 0.4566, "step": 38392 }, { "epoch": 2.1499048045693807, "grad_norm": 0.8909831643104553, "learning_rate": 9.51642105263158e-05, "loss": 0.2665, "step": 38393 }, { "epoch": 2.1499608018815097, "grad_norm": 1.204802393913269, "learning_rate": 9.516394736842105e-05, "loss": 0.416, "step": 38394 }, { "epoch": 2.1500167991936388, "grad_norm": 1.3512656688690186, "learning_rate": 9.516368421052631e-05, "loss": 0.41, "step": 38395 }, { "epoch": 2.1500727965057678, "grad_norm": 1.1512117385864258, "learning_rate": 9.516342105263159e-05, "loss": 0.3702, "step": 38396 }, { "epoch": 2.150128793817897, "grad_norm": 1.8756872415542603, "learning_rate": 9.516315789473685e-05, "loss": 0.4072, "step": 38397 }, { "epoch": 2.150184791130026, "grad_norm": 1.3012136220932007, "learning_rate": 9.51628947368421e-05, "loss": 0.3271, "step": 38398 }, { "epoch": 2.150240788442155, "grad_norm": 1.1595178842544556, "learning_rate": 9.516263157894737e-05, "loss": 0.3225, "step": 38399 }, { "epoch": 2.150296785754284, "grad_norm": 2.2717444896698, "learning_rate": 9.516236842105264e-05, "loss": 0.3537, "step": 38400 }, { "epoch": 2.150352783066413, "grad_norm": 1.4404935836791992, "learning_rate": 9.51621052631579e-05, "loss": 0.4387, "step": 38401 }, { "epoch": 2.150408780378542, "grad_norm": 1.1425600051879883, "learning_rate": 9.516184210526317e-05, "loss": 0.3943, "step": 38402 }, { "epoch": 2.150464777690671, "grad_norm": 1.2277195453643799, "learning_rate": 9.516157894736842e-05, "loss": 0.4301, "step": 38403 }, { "epoch": 2.1505207750028, "grad_norm": 1.088707685470581, "learning_rate": 9.516131578947369e-05, "loss": 0.377, "step": 38404 }, { "epoch": 2.150576772314929, "grad_norm": 1.5279866456985474, "learning_rate": 9.516105263157895e-05, "loss": 0.4505, "step": 38405 }, { "epoch": 2.150632769627058, "grad_norm": 1.4106463193893433, "learning_rate": 9.516078947368422e-05, "loss": 0.3173, "step": 38406 }, { "epoch": 2.150688766939187, "grad_norm": 1.3112070560455322, "learning_rate": 9.516052631578948e-05, "loss": 0.4362, "step": 38407 }, { "epoch": 2.150744764251316, "grad_norm": 1.2518410682678223, "learning_rate": 9.516026315789474e-05, "loss": 0.3829, "step": 38408 }, { "epoch": 2.150800761563445, "grad_norm": 1.3488497734069824, "learning_rate": 9.516e-05, "loss": 0.3634, "step": 38409 }, { "epoch": 2.150856758875574, "grad_norm": 1.2787147760391235, "learning_rate": 9.515973684210526e-05, "loss": 0.4027, "step": 38410 }, { "epoch": 2.150912756187703, "grad_norm": 1.1850714683532715, "learning_rate": 9.515947368421054e-05, "loss": 0.4072, "step": 38411 }, { "epoch": 2.150968753499832, "grad_norm": 1.5273966789245605, "learning_rate": 9.515921052631578e-05, "loss": 0.3271, "step": 38412 }, { "epoch": 2.151024750811961, "grad_norm": 1.3151838779449463, "learning_rate": 9.515894736842106e-05, "loss": 0.5206, "step": 38413 }, { "epoch": 2.15108074812409, "grad_norm": 1.1229515075683594, "learning_rate": 9.515868421052632e-05, "loss": 0.3385, "step": 38414 }, { "epoch": 2.151136745436219, "grad_norm": 2.3135693073272705, "learning_rate": 9.515842105263159e-05, "loss": 0.4188, "step": 38415 }, { "epoch": 2.151192742748348, "grad_norm": 1.2588485479354858, "learning_rate": 9.515815789473685e-05, "loss": 0.3691, "step": 38416 }, { "epoch": 2.151248740060477, "grad_norm": 0.9635387659072876, "learning_rate": 9.515789473684211e-05, "loss": 0.2888, "step": 38417 }, { "epoch": 2.151304737372606, "grad_norm": 1.3263167142868042, "learning_rate": 9.515763157894737e-05, "loss": 0.5061, "step": 38418 }, { "epoch": 2.151360734684735, "grad_norm": 1.085422158241272, "learning_rate": 9.515736842105264e-05, "loss": 0.4358, "step": 38419 }, { "epoch": 2.1514167319968642, "grad_norm": 1.3631871938705444, "learning_rate": 9.51571052631579e-05, "loss": 0.3652, "step": 38420 }, { "epoch": 2.1514727293089932, "grad_norm": 1.1401987075805664, "learning_rate": 9.515684210526316e-05, "loss": 0.4013, "step": 38421 }, { "epoch": 2.1515287266211223, "grad_norm": 1.1191740036010742, "learning_rate": 9.515657894736842e-05, "loss": 0.3712, "step": 38422 }, { "epoch": 2.1515847239332513, "grad_norm": 1.2889227867126465, "learning_rate": 9.51563157894737e-05, "loss": 0.4646, "step": 38423 }, { "epoch": 2.1516407212453803, "grad_norm": 1.3274955749511719, "learning_rate": 9.515605263157895e-05, "loss": 0.4719, "step": 38424 }, { "epoch": 2.1516967185575093, "grad_norm": 5.06099271774292, "learning_rate": 9.515578947368423e-05, "loss": 0.3651, "step": 38425 }, { "epoch": 2.1517527158696383, "grad_norm": 0.9237107634544373, "learning_rate": 9.515552631578947e-05, "loss": 0.3798, "step": 38426 }, { "epoch": 2.1518087131817674, "grad_norm": 1.1915366649627686, "learning_rate": 9.515526315789473e-05, "loss": 0.4381, "step": 38427 }, { "epoch": 2.1518647104938964, "grad_norm": 1.8770909309387207, "learning_rate": 9.5155e-05, "loss": 0.4434, "step": 38428 }, { "epoch": 2.1519207078060254, "grad_norm": 1.025797724723816, "learning_rate": 9.515473684210527e-05, "loss": 0.3957, "step": 38429 }, { "epoch": 2.1519767051181544, "grad_norm": 1.0619704723358154, "learning_rate": 9.515447368421053e-05, "loss": 0.3415, "step": 38430 }, { "epoch": 2.1520327024302834, "grad_norm": 1.0002723932266235, "learning_rate": 9.515421052631579e-05, "loss": 0.2805, "step": 38431 }, { "epoch": 2.1520886997424125, "grad_norm": 1.3483352661132812, "learning_rate": 9.515394736842106e-05, "loss": 0.3768, "step": 38432 }, { "epoch": 2.1521446970545415, "grad_norm": 1.174026370048523, "learning_rate": 9.515368421052632e-05, "loss": 0.3947, "step": 38433 }, { "epoch": 2.1522006943666705, "grad_norm": 1.195227026939392, "learning_rate": 9.515342105263159e-05, "loss": 0.4498, "step": 38434 }, { "epoch": 2.1522566916787995, "grad_norm": 1.5190393924713135, "learning_rate": 9.515315789473684e-05, "loss": 0.569, "step": 38435 }, { "epoch": 2.1523126889909285, "grad_norm": 1.0727328062057495, "learning_rate": 9.515289473684211e-05, "loss": 0.3135, "step": 38436 }, { "epoch": 2.1523686863030576, "grad_norm": 1.1796677112579346, "learning_rate": 9.515263157894737e-05, "loss": 0.3885, "step": 38437 }, { "epoch": 2.1524246836151866, "grad_norm": 1.220680832862854, "learning_rate": 9.515236842105264e-05, "loss": 0.4243, "step": 38438 }, { "epoch": 2.1524806809273156, "grad_norm": 1.3221995830535889, "learning_rate": 9.51521052631579e-05, "loss": 0.6364, "step": 38439 }, { "epoch": 2.1525366782394446, "grad_norm": 1.0980709791183472, "learning_rate": 9.515184210526316e-05, "loss": 0.413, "step": 38440 }, { "epoch": 2.1525926755515736, "grad_norm": 1.1699408292770386, "learning_rate": 9.515157894736842e-05, "loss": 0.3476, "step": 38441 }, { "epoch": 2.1526486728637027, "grad_norm": 1.2593698501586914, "learning_rate": 9.51513157894737e-05, "loss": 0.3814, "step": 38442 }, { "epoch": 2.1527046701758317, "grad_norm": 1.216633915901184, "learning_rate": 9.515105263157896e-05, "loss": 0.4571, "step": 38443 }, { "epoch": 2.1527606674879607, "grad_norm": 1.1833136081695557, "learning_rate": 9.515078947368422e-05, "loss": 0.3987, "step": 38444 }, { "epoch": 2.1528166648000897, "grad_norm": 1.7841136455535889, "learning_rate": 9.515052631578948e-05, "loss": 0.3522, "step": 38445 }, { "epoch": 2.1528726621122187, "grad_norm": 1.0907915830612183, "learning_rate": 9.515026315789474e-05, "loss": 0.3427, "step": 38446 }, { "epoch": 2.1529286594243477, "grad_norm": 1.0720961093902588, "learning_rate": 9.515000000000001e-05, "loss": 0.3173, "step": 38447 }, { "epoch": 2.1529846567364768, "grad_norm": 1.3579915761947632, "learning_rate": 9.514973684210527e-05, "loss": 0.3721, "step": 38448 }, { "epoch": 2.153040654048606, "grad_norm": 1.539716362953186, "learning_rate": 9.514947368421053e-05, "loss": 0.4071, "step": 38449 }, { "epoch": 2.153096651360735, "grad_norm": 0.9702361822128296, "learning_rate": 9.514921052631579e-05, "loss": 0.365, "step": 38450 }, { "epoch": 2.153152648672864, "grad_norm": 1.2700371742248535, "learning_rate": 9.514894736842106e-05, "loss": 0.5815, "step": 38451 }, { "epoch": 2.153208645984993, "grad_norm": 1.013090968132019, "learning_rate": 9.514868421052632e-05, "loss": 0.2962, "step": 38452 }, { "epoch": 2.153264643297122, "grad_norm": 1.250405192375183, "learning_rate": 9.514842105263158e-05, "loss": 0.4018, "step": 38453 }, { "epoch": 2.153320640609251, "grad_norm": 1.1727354526519775, "learning_rate": 9.514815789473684e-05, "loss": 0.4397, "step": 38454 }, { "epoch": 2.15337663792138, "grad_norm": 1.2441362142562866, "learning_rate": 9.514789473684211e-05, "loss": 0.3624, "step": 38455 }, { "epoch": 2.153432635233509, "grad_norm": 2.902956247329712, "learning_rate": 9.514763157894737e-05, "loss": 0.383, "step": 38456 }, { "epoch": 2.153488632545638, "grad_norm": 1.148582935333252, "learning_rate": 9.514736842105265e-05, "loss": 0.4701, "step": 38457 }, { "epoch": 2.153544629857767, "grad_norm": 1.849380373954773, "learning_rate": 9.514710526315789e-05, "loss": 0.256, "step": 38458 }, { "epoch": 2.153600627169896, "grad_norm": 1.0875487327575684, "learning_rate": 9.514684210526317e-05, "loss": 0.3613, "step": 38459 }, { "epoch": 2.153656624482025, "grad_norm": 1.0818275213241577, "learning_rate": 9.514657894736843e-05, "loss": 0.4268, "step": 38460 }, { "epoch": 2.153712621794154, "grad_norm": 1.1120792627334595, "learning_rate": 9.514631578947369e-05, "loss": 0.3878, "step": 38461 }, { "epoch": 2.153768619106283, "grad_norm": 1.3657582998275757, "learning_rate": 9.514605263157896e-05, "loss": 0.4561, "step": 38462 }, { "epoch": 2.153824616418412, "grad_norm": 1.220436453819275, "learning_rate": 9.51457894736842e-05, "loss": 0.364, "step": 38463 }, { "epoch": 2.153880613730541, "grad_norm": 1.2224228382110596, "learning_rate": 9.514552631578948e-05, "loss": 0.4847, "step": 38464 }, { "epoch": 2.15393661104267, "grad_norm": 1.2699791193008423, "learning_rate": 9.514526315789474e-05, "loss": 0.3903, "step": 38465 }, { "epoch": 2.153992608354799, "grad_norm": 1.104504942893982, "learning_rate": 9.514500000000001e-05, "loss": 0.3714, "step": 38466 }, { "epoch": 2.154048605666928, "grad_norm": 1.4629234075546265, "learning_rate": 9.514473684210526e-05, "loss": 0.4596, "step": 38467 }, { "epoch": 2.154104602979057, "grad_norm": 1.3545204401016235, "learning_rate": 9.514447368421053e-05, "loss": 0.3256, "step": 38468 }, { "epoch": 2.154160600291186, "grad_norm": 1.1996327638626099, "learning_rate": 9.514421052631579e-05, "loss": 0.504, "step": 38469 }, { "epoch": 2.154216597603315, "grad_norm": 1.231961727142334, "learning_rate": 9.514394736842106e-05, "loss": 0.3281, "step": 38470 }, { "epoch": 2.154272594915444, "grad_norm": 1.2423095703125, "learning_rate": 9.514368421052632e-05, "loss": 0.315, "step": 38471 }, { "epoch": 2.1543285922275732, "grad_norm": 1.1325401067733765, "learning_rate": 9.514342105263158e-05, "loss": 0.3833, "step": 38472 }, { "epoch": 2.1543845895397022, "grad_norm": 1.2642079591751099, "learning_rate": 9.514315789473684e-05, "loss": 0.3332, "step": 38473 }, { "epoch": 2.1544405868518313, "grad_norm": 1.121393084526062, "learning_rate": 9.514289473684212e-05, "loss": 0.3456, "step": 38474 }, { "epoch": 2.1544965841639603, "grad_norm": 1.3007762432098389, "learning_rate": 9.514263157894738e-05, "loss": 0.5197, "step": 38475 }, { "epoch": 2.1545525814760893, "grad_norm": 1.2785131931304932, "learning_rate": 9.514236842105264e-05, "loss": 0.7217, "step": 38476 }, { "epoch": 2.1546085787882183, "grad_norm": 1.2964714765548706, "learning_rate": 9.51421052631579e-05, "loss": 0.3526, "step": 38477 }, { "epoch": 2.1546645761003473, "grad_norm": 1.0315666198730469, "learning_rate": 9.514184210526316e-05, "loss": 0.4063, "step": 38478 }, { "epoch": 2.1547205734124764, "grad_norm": 1.1242539882659912, "learning_rate": 9.514157894736843e-05, "loss": 0.3544, "step": 38479 }, { "epoch": 2.1547765707246054, "grad_norm": 1.4843276739120483, "learning_rate": 9.514131578947369e-05, "loss": 0.3062, "step": 38480 }, { "epoch": 2.1548325680367344, "grad_norm": 1.1322059631347656, "learning_rate": 9.514105263157895e-05, "loss": 0.3404, "step": 38481 }, { "epoch": 2.1548885653488634, "grad_norm": 1.428338885307312, "learning_rate": 9.514078947368421e-05, "loss": 0.3516, "step": 38482 }, { "epoch": 2.1549445626609924, "grad_norm": 1.294930100440979, "learning_rate": 9.514052631578948e-05, "loss": 0.4035, "step": 38483 }, { "epoch": 2.1550005599731215, "grad_norm": 1.3391445875167847, "learning_rate": 9.514026315789474e-05, "loss": 0.3593, "step": 38484 }, { "epoch": 2.1550565572852505, "grad_norm": 1.3055952787399292, "learning_rate": 9.514e-05, "loss": 0.4217, "step": 38485 }, { "epoch": 2.1551125545973795, "grad_norm": 1.3343820571899414, "learning_rate": 9.513973684210526e-05, "loss": 0.4283, "step": 38486 }, { "epoch": 2.1551685519095085, "grad_norm": 1.2045443058013916, "learning_rate": 9.513947368421053e-05, "loss": 0.3519, "step": 38487 }, { "epoch": 2.1552245492216375, "grad_norm": 1.249302864074707, "learning_rate": 9.51392105263158e-05, "loss": 0.3885, "step": 38488 }, { "epoch": 2.1552805465337666, "grad_norm": 1.2215322256088257, "learning_rate": 9.513894736842107e-05, "loss": 0.3797, "step": 38489 }, { "epoch": 2.1553365438458956, "grad_norm": 1.0655783414840698, "learning_rate": 9.513868421052631e-05, "loss": 0.298, "step": 38490 }, { "epoch": 2.1553925411580246, "grad_norm": 1.4894440174102783, "learning_rate": 9.513842105263159e-05, "loss": 0.3956, "step": 38491 }, { "epoch": 2.1554485384701536, "grad_norm": 1.0665535926818848, "learning_rate": 9.513815789473685e-05, "loss": 0.4009, "step": 38492 }, { "epoch": 2.1555045357822826, "grad_norm": 1.4605998992919922, "learning_rate": 9.513789473684212e-05, "loss": 0.4195, "step": 38493 }, { "epoch": 2.1555605330944116, "grad_norm": 1.0981621742248535, "learning_rate": 9.513763157894738e-05, "loss": 0.3732, "step": 38494 }, { "epoch": 2.1556165304065407, "grad_norm": 1.1166752576828003, "learning_rate": 9.513736842105262e-05, "loss": 0.4115, "step": 38495 }, { "epoch": 2.1556725277186697, "grad_norm": 1.3041073083877563, "learning_rate": 9.51371052631579e-05, "loss": 0.4104, "step": 38496 }, { "epoch": 2.1557285250307987, "grad_norm": 1.2176666259765625, "learning_rate": 9.513684210526316e-05, "loss": 0.3595, "step": 38497 }, { "epoch": 2.1557845223429277, "grad_norm": 1.1245518922805786, "learning_rate": 9.513657894736843e-05, "loss": 0.4238, "step": 38498 }, { "epoch": 2.1558405196550567, "grad_norm": 1.2398622035980225, "learning_rate": 9.513631578947369e-05, "loss": 0.4483, "step": 38499 }, { "epoch": 2.1558965169671858, "grad_norm": 1.4633852243423462, "learning_rate": 9.513605263157895e-05, "loss": 0.3133, "step": 38500 }, { "epoch": 2.155952514279315, "grad_norm": 1.0684674978256226, "learning_rate": 9.513578947368421e-05, "loss": 0.3676, "step": 38501 }, { "epoch": 2.156008511591444, "grad_norm": 1.0375701189041138, "learning_rate": 9.513552631578948e-05, "loss": 0.3438, "step": 38502 }, { "epoch": 2.156064508903573, "grad_norm": 1.0976521968841553, "learning_rate": 9.513526315789474e-05, "loss": 0.341, "step": 38503 }, { "epoch": 2.156120506215702, "grad_norm": 1.1272724866867065, "learning_rate": 9.5135e-05, "loss": 0.297, "step": 38504 }, { "epoch": 2.156176503527831, "grad_norm": 1.0749926567077637, "learning_rate": 9.513473684210526e-05, "loss": 0.3416, "step": 38505 }, { "epoch": 2.15623250083996, "grad_norm": 1.091835618019104, "learning_rate": 9.513447368421054e-05, "loss": 0.3801, "step": 38506 }, { "epoch": 2.156288498152089, "grad_norm": 1.025221347808838, "learning_rate": 9.51342105263158e-05, "loss": 0.3643, "step": 38507 }, { "epoch": 2.156344495464218, "grad_norm": 1.303847074508667, "learning_rate": 9.513394736842106e-05, "loss": 0.4585, "step": 38508 }, { "epoch": 2.156400492776347, "grad_norm": 1.105273962020874, "learning_rate": 9.513368421052632e-05, "loss": 0.3048, "step": 38509 }, { "epoch": 2.156456490088476, "grad_norm": 1.1365665197372437, "learning_rate": 9.513342105263159e-05, "loss": 0.421, "step": 38510 }, { "epoch": 2.156512487400605, "grad_norm": 1.1094828844070435, "learning_rate": 9.513315789473685e-05, "loss": 0.3764, "step": 38511 }, { "epoch": 2.156568484712734, "grad_norm": 1.1896651983261108, "learning_rate": 9.513289473684212e-05, "loss": 0.4012, "step": 38512 }, { "epoch": 2.156624482024863, "grad_norm": 1.4077340364456177, "learning_rate": 9.513263157894737e-05, "loss": 0.3851, "step": 38513 }, { "epoch": 2.156680479336992, "grad_norm": 1.1899935007095337, "learning_rate": 9.513236842105263e-05, "loss": 0.3729, "step": 38514 }, { "epoch": 2.156736476649121, "grad_norm": 1.1590992212295532, "learning_rate": 9.51321052631579e-05, "loss": 0.4018, "step": 38515 }, { "epoch": 2.15679247396125, "grad_norm": 1.2674232721328735, "learning_rate": 9.513184210526316e-05, "loss": 0.5133, "step": 38516 }, { "epoch": 2.1568484712733786, "grad_norm": 1.2413398027420044, "learning_rate": 9.513157894736843e-05, "loss": 0.508, "step": 38517 }, { "epoch": 2.156904468585508, "grad_norm": 1.1949427127838135, "learning_rate": 9.513131578947368e-05, "loss": 0.4393, "step": 38518 }, { "epoch": 2.1569604658976367, "grad_norm": 1.2321666479110718, "learning_rate": 9.513105263157895e-05, "loss": 0.3572, "step": 38519 }, { "epoch": 2.157016463209766, "grad_norm": 1.2356003522872925, "learning_rate": 9.513078947368421e-05, "loss": 0.4017, "step": 38520 }, { "epoch": 2.1570724605218947, "grad_norm": 1.1150670051574707, "learning_rate": 9.513052631578949e-05, "loss": 0.3228, "step": 38521 }, { "epoch": 2.157128457834024, "grad_norm": 1.595812439918518, "learning_rate": 9.513026315789473e-05, "loss": 0.4999, "step": 38522 }, { "epoch": 2.1571844551461528, "grad_norm": 1.9882651567459106, "learning_rate": 9.513e-05, "loss": 0.3013, "step": 38523 }, { "epoch": 2.157240452458282, "grad_norm": 1.2250685691833496, "learning_rate": 9.512973684210527e-05, "loss": 0.362, "step": 38524 }, { "epoch": 2.157296449770411, "grad_norm": 1.7353954315185547, "learning_rate": 9.512947368421054e-05, "loss": 0.5075, "step": 38525 }, { "epoch": 2.1573524470825403, "grad_norm": 1.0507529973983765, "learning_rate": 9.51292105263158e-05, "loss": 0.3354, "step": 38526 }, { "epoch": 2.157408444394669, "grad_norm": 1.3162833452224731, "learning_rate": 9.512894736842106e-05, "loss": 0.4163, "step": 38527 }, { "epoch": 2.1574644417067983, "grad_norm": 1.0403687953948975, "learning_rate": 9.512868421052632e-05, "loss": 0.4029, "step": 38528 }, { "epoch": 2.157520439018927, "grad_norm": 1.1419215202331543, "learning_rate": 9.512842105263159e-05, "loss": 0.3681, "step": 38529 }, { "epoch": 2.1575764363310563, "grad_norm": 1.597672462463379, "learning_rate": 9.512815789473685e-05, "loss": 0.3261, "step": 38530 }, { "epoch": 2.157632433643185, "grad_norm": 2.1670682430267334, "learning_rate": 9.512789473684211e-05, "loss": 0.3101, "step": 38531 }, { "epoch": 2.1576884309553144, "grad_norm": 1.0850896835327148, "learning_rate": 9.512763157894737e-05, "loss": 0.4121, "step": 38532 }, { "epoch": 2.157744428267443, "grad_norm": 1.089418649673462, "learning_rate": 9.512736842105263e-05, "loss": 0.3356, "step": 38533 }, { "epoch": 2.1578004255795724, "grad_norm": 1.0577048063278198, "learning_rate": 9.51271052631579e-05, "loss": 0.3781, "step": 38534 }, { "epoch": 2.157856422891701, "grad_norm": 1.6464054584503174, "learning_rate": 9.512684210526316e-05, "loss": 0.4049, "step": 38535 }, { "epoch": 2.1579124202038305, "grad_norm": 1.7340925931930542, "learning_rate": 9.512657894736842e-05, "loss": 0.5827, "step": 38536 }, { "epoch": 2.157968417515959, "grad_norm": 1.3729428052902222, "learning_rate": 9.512631578947368e-05, "loss": 0.4285, "step": 38537 }, { "epoch": 2.158024414828088, "grad_norm": 1.1906861066818237, "learning_rate": 9.512605263157896e-05, "loss": 0.2826, "step": 38538 }, { "epoch": 2.158080412140217, "grad_norm": 0.9225759506225586, "learning_rate": 9.512578947368422e-05, "loss": 0.2453, "step": 38539 }, { "epoch": 2.158136409452346, "grad_norm": 1.1134223937988281, "learning_rate": 9.512552631578948e-05, "loss": 0.3685, "step": 38540 }, { "epoch": 2.158192406764475, "grad_norm": 1.5738328695297241, "learning_rate": 9.512526315789473e-05, "loss": 0.5515, "step": 38541 }, { "epoch": 2.158248404076604, "grad_norm": 1.5049524307250977, "learning_rate": 9.512500000000001e-05, "loss": 0.3536, "step": 38542 }, { "epoch": 2.158304401388733, "grad_norm": 1.2111343145370483, "learning_rate": 9.512473684210527e-05, "loss": 0.5285, "step": 38543 }, { "epoch": 2.158360398700862, "grad_norm": 1.3672007322311401, "learning_rate": 9.512447368421054e-05, "loss": 0.5118, "step": 38544 }, { "epoch": 2.158416396012991, "grad_norm": 1.1374894380569458, "learning_rate": 9.512421052631579e-05, "loss": 0.3471, "step": 38545 }, { "epoch": 2.15847239332512, "grad_norm": 1.3705662488937378, "learning_rate": 9.512394736842106e-05, "loss": 0.4479, "step": 38546 }, { "epoch": 2.158528390637249, "grad_norm": 1.0276386737823486, "learning_rate": 9.512368421052632e-05, "loss": 0.4133, "step": 38547 }, { "epoch": 2.1585843879493782, "grad_norm": 1.2824528217315674, "learning_rate": 9.512342105263158e-05, "loss": 0.4397, "step": 38548 }, { "epoch": 2.1586403852615073, "grad_norm": 1.2494326829910278, "learning_rate": 9.512315789473685e-05, "loss": 0.3303, "step": 38549 }, { "epoch": 2.1586963825736363, "grad_norm": 1.5131142139434814, "learning_rate": 9.51228947368421e-05, "loss": 0.702, "step": 38550 }, { "epoch": 2.1587523798857653, "grad_norm": 1.3123862743377686, "learning_rate": 9.512263157894737e-05, "loss": 0.3819, "step": 38551 }, { "epoch": 2.1588083771978943, "grad_norm": 1.3677548170089722, "learning_rate": 9.512236842105263e-05, "loss": 0.449, "step": 38552 }, { "epoch": 2.1588643745100233, "grad_norm": 1.2566975355148315, "learning_rate": 9.51221052631579e-05, "loss": 0.434, "step": 38553 }, { "epoch": 2.1589203718221524, "grad_norm": 1.159308910369873, "learning_rate": 9.512184210526317e-05, "loss": 0.5404, "step": 38554 }, { "epoch": 2.1589763691342814, "grad_norm": 1.2370284795761108, "learning_rate": 9.512157894736843e-05, "loss": 0.5489, "step": 38555 }, { "epoch": 2.1590323664464104, "grad_norm": 1.1077632904052734, "learning_rate": 9.512131578947369e-05, "loss": 0.3503, "step": 38556 }, { "epoch": 2.1590883637585394, "grad_norm": 1.2936064004898071, "learning_rate": 9.512105263157896e-05, "loss": 0.4271, "step": 38557 }, { "epoch": 2.1591443610706684, "grad_norm": 0.9969408512115479, "learning_rate": 9.512078947368422e-05, "loss": 0.4391, "step": 38558 }, { "epoch": 2.1592003583827974, "grad_norm": 1.1851403713226318, "learning_rate": 9.512052631578948e-05, "loss": 0.3519, "step": 38559 }, { "epoch": 2.1592563556949265, "grad_norm": 1.053171157836914, "learning_rate": 9.512026315789474e-05, "loss": 0.342, "step": 38560 }, { "epoch": 2.1593123530070555, "grad_norm": 1.3716280460357666, "learning_rate": 9.512000000000001e-05, "loss": 0.2969, "step": 38561 }, { "epoch": 2.1593683503191845, "grad_norm": 1.2639951705932617, "learning_rate": 9.511973684210527e-05, "loss": 0.3877, "step": 38562 }, { "epoch": 2.1594243476313135, "grad_norm": 1.4132440090179443, "learning_rate": 9.511947368421053e-05, "loss": 0.3946, "step": 38563 }, { "epoch": 2.1594803449434425, "grad_norm": 1.3084564208984375, "learning_rate": 9.511921052631579e-05, "loss": 0.4713, "step": 38564 }, { "epoch": 2.1595363422555716, "grad_norm": 1.294042706489563, "learning_rate": 9.511894736842105e-05, "loss": 0.3917, "step": 38565 }, { "epoch": 2.1595923395677006, "grad_norm": 1.09099280834198, "learning_rate": 9.511868421052632e-05, "loss": 0.2852, "step": 38566 }, { "epoch": 2.1596483368798296, "grad_norm": 1.2776057720184326, "learning_rate": 9.511842105263158e-05, "loss": 0.3396, "step": 38567 }, { "epoch": 2.1597043341919586, "grad_norm": 1.1086061000823975, "learning_rate": 9.511815789473684e-05, "loss": 0.3762, "step": 38568 }, { "epoch": 2.1597603315040876, "grad_norm": 1.0837737321853638, "learning_rate": 9.51178947368421e-05, "loss": 0.2789, "step": 38569 }, { "epoch": 2.1598163288162167, "grad_norm": 1.5426263809204102, "learning_rate": 9.511763157894738e-05, "loss": 0.5148, "step": 38570 }, { "epoch": 2.1598723261283457, "grad_norm": 0.9703961610794067, "learning_rate": 9.511736842105264e-05, "loss": 0.4398, "step": 38571 }, { "epoch": 2.1599283234404747, "grad_norm": 1.5045948028564453, "learning_rate": 9.511710526315791e-05, "loss": 0.7126, "step": 38572 }, { "epoch": 2.1599843207526037, "grad_norm": 1.4359759092330933, "learning_rate": 9.511684210526315e-05, "loss": 0.3646, "step": 38573 }, { "epoch": 2.1600403180647327, "grad_norm": 1.2386369705200195, "learning_rate": 9.511657894736843e-05, "loss": 0.4214, "step": 38574 }, { "epoch": 2.1600963153768618, "grad_norm": 1.1075921058654785, "learning_rate": 9.511631578947369e-05, "loss": 0.3562, "step": 38575 }, { "epoch": 2.1601523126889908, "grad_norm": 1.3298624753952026, "learning_rate": 9.511605263157896e-05, "loss": 0.447, "step": 38576 }, { "epoch": 2.16020831000112, "grad_norm": 1.3276902437210083, "learning_rate": 9.511578947368421e-05, "loss": 0.3599, "step": 38577 }, { "epoch": 2.160264307313249, "grad_norm": 1.250604271888733, "learning_rate": 9.511552631578948e-05, "loss": 0.4126, "step": 38578 }, { "epoch": 2.160320304625378, "grad_norm": 1.123768925666809, "learning_rate": 9.511526315789474e-05, "loss": 0.3555, "step": 38579 }, { "epoch": 2.160376301937507, "grad_norm": 1.360512137413025, "learning_rate": 9.511500000000001e-05, "loss": 0.4484, "step": 38580 }, { "epoch": 2.160432299249636, "grad_norm": 1.6691123247146606, "learning_rate": 9.511473684210527e-05, "loss": 0.3395, "step": 38581 }, { "epoch": 2.160488296561765, "grad_norm": 1.8766708374023438, "learning_rate": 9.511447368421052e-05, "loss": 0.5455, "step": 38582 }, { "epoch": 2.160544293873894, "grad_norm": 1.3959908485412598, "learning_rate": 9.511421052631579e-05, "loss": 0.5452, "step": 38583 }, { "epoch": 2.160600291186023, "grad_norm": 1.4481756687164307, "learning_rate": 9.511394736842105e-05, "loss": 0.4623, "step": 38584 }, { "epoch": 2.160656288498152, "grad_norm": 1.4850200414657593, "learning_rate": 9.511368421052633e-05, "loss": 0.4645, "step": 38585 }, { "epoch": 2.160712285810281, "grad_norm": 1.1922173500061035, "learning_rate": 9.511342105263159e-05, "loss": 0.3469, "step": 38586 }, { "epoch": 2.16076828312241, "grad_norm": 1.4706612825393677, "learning_rate": 9.511315789473685e-05, "loss": 0.4546, "step": 38587 }, { "epoch": 2.160824280434539, "grad_norm": 1.2319934368133545, "learning_rate": 9.51128947368421e-05, "loss": 0.4516, "step": 38588 }, { "epoch": 2.160880277746668, "grad_norm": 1.1282672882080078, "learning_rate": 9.511263157894738e-05, "loss": 0.345, "step": 38589 }, { "epoch": 2.160936275058797, "grad_norm": 3.1275134086608887, "learning_rate": 9.511236842105264e-05, "loss": 0.5066, "step": 38590 }, { "epoch": 2.160992272370926, "grad_norm": 1.3236494064331055, "learning_rate": 9.51121052631579e-05, "loss": 0.4097, "step": 38591 }, { "epoch": 2.161048269683055, "grad_norm": 1.289269208908081, "learning_rate": 9.511184210526316e-05, "loss": 0.4153, "step": 38592 }, { "epoch": 2.161104266995184, "grad_norm": 1.4659175872802734, "learning_rate": 9.511157894736843e-05, "loss": 0.5203, "step": 38593 }, { "epoch": 2.161160264307313, "grad_norm": 1.179755449295044, "learning_rate": 9.511131578947369e-05, "loss": 0.4724, "step": 38594 }, { "epoch": 2.161216261619442, "grad_norm": 1.3480043411254883, "learning_rate": 9.511105263157895e-05, "loss": 0.393, "step": 38595 }, { "epoch": 2.161272258931571, "grad_norm": 1.3042832612991333, "learning_rate": 9.511078947368421e-05, "loss": 0.5268, "step": 38596 }, { "epoch": 2.1613282562437, "grad_norm": 1.2216871976852417, "learning_rate": 9.511052631578948e-05, "loss": 0.4966, "step": 38597 }, { "epoch": 2.161384253555829, "grad_norm": 1.2597949504852295, "learning_rate": 9.511026315789474e-05, "loss": 0.39, "step": 38598 }, { "epoch": 2.161440250867958, "grad_norm": 1.4922704696655273, "learning_rate": 9.511e-05, "loss": 0.4078, "step": 38599 }, { "epoch": 2.1614962481800872, "grad_norm": 1.1868919134140015, "learning_rate": 9.510973684210526e-05, "loss": 0.4372, "step": 38600 }, { "epoch": 2.1615522454922163, "grad_norm": 1.1306382417678833, "learning_rate": 9.510947368421052e-05, "loss": 0.3356, "step": 38601 }, { "epoch": 2.1616082428043453, "grad_norm": 1.0911821126937866, "learning_rate": 9.51092105263158e-05, "loss": 0.3762, "step": 38602 }, { "epoch": 2.1616642401164743, "grad_norm": 1.5354933738708496, "learning_rate": 9.510894736842105e-05, "loss": 0.3249, "step": 38603 }, { "epoch": 2.1617202374286033, "grad_norm": 1.2692798376083374, "learning_rate": 9.510868421052633e-05, "loss": 0.3939, "step": 38604 }, { "epoch": 2.1617762347407323, "grad_norm": 1.6741389036178589, "learning_rate": 9.510842105263157e-05, "loss": 0.3377, "step": 38605 }, { "epoch": 2.1618322320528613, "grad_norm": 1.1014587879180908, "learning_rate": 9.510815789473685e-05, "loss": 0.4165, "step": 38606 }, { "epoch": 2.1618882293649904, "grad_norm": 1.2921602725982666, "learning_rate": 9.510789473684211e-05, "loss": 0.3577, "step": 38607 }, { "epoch": 2.1619442266771194, "grad_norm": 2.498472213745117, "learning_rate": 9.510763157894738e-05, "loss": 0.3721, "step": 38608 }, { "epoch": 2.1620002239892484, "grad_norm": 1.0836174488067627, "learning_rate": 9.510736842105264e-05, "loss": 0.3331, "step": 38609 }, { "epoch": 2.1620562213013774, "grad_norm": 1.6303850412368774, "learning_rate": 9.51071052631579e-05, "loss": 0.4466, "step": 38610 }, { "epoch": 2.1621122186135064, "grad_norm": 1.271372675895691, "learning_rate": 9.510684210526316e-05, "loss": 0.4084, "step": 38611 }, { "epoch": 2.1621682159256355, "grad_norm": 1.1850817203521729, "learning_rate": 9.510657894736843e-05, "loss": 0.2733, "step": 38612 }, { "epoch": 2.1622242132377645, "grad_norm": 1.1912884712219238, "learning_rate": 9.510631578947369e-05, "loss": 0.4199, "step": 38613 }, { "epoch": 2.1622802105498935, "grad_norm": 1.3928399085998535, "learning_rate": 9.510605263157895e-05, "loss": 0.352, "step": 38614 }, { "epoch": 2.1623362078620225, "grad_norm": 4.831754684448242, "learning_rate": 9.510578947368421e-05, "loss": 0.3855, "step": 38615 }, { "epoch": 2.1623922051741515, "grad_norm": 1.2795354127883911, "learning_rate": 9.510552631578947e-05, "loss": 0.3873, "step": 38616 }, { "epoch": 2.1624482024862806, "grad_norm": 1.6733497381210327, "learning_rate": 9.510526315789475e-05, "loss": 0.4981, "step": 38617 }, { "epoch": 2.1625041997984096, "grad_norm": 1.0682317018508911, "learning_rate": 9.5105e-05, "loss": 0.3443, "step": 38618 }, { "epoch": 2.1625601971105386, "grad_norm": 6.7518310546875, "learning_rate": 9.510473684210526e-05, "loss": 0.358, "step": 38619 }, { "epoch": 2.1626161944226676, "grad_norm": 1.6293147802352905, "learning_rate": 9.510447368421052e-05, "loss": 0.4795, "step": 38620 }, { "epoch": 2.1626721917347966, "grad_norm": 1.2021028995513916, "learning_rate": 9.51042105263158e-05, "loss": 0.3671, "step": 38621 }, { "epoch": 2.1627281890469257, "grad_norm": 1.1868031024932861, "learning_rate": 9.510394736842106e-05, "loss": 0.4678, "step": 38622 }, { "epoch": 2.1627841863590547, "grad_norm": 1.2829930782318115, "learning_rate": 9.510368421052632e-05, "loss": 0.5021, "step": 38623 }, { "epoch": 2.1628401836711837, "grad_norm": 2.4438936710357666, "learning_rate": 9.510342105263158e-05, "loss": 0.426, "step": 38624 }, { "epoch": 2.1628961809833127, "grad_norm": 1.3038809299468994, "learning_rate": 9.510315789473685e-05, "loss": 0.3525, "step": 38625 }, { "epoch": 2.1629521782954417, "grad_norm": 1.0626068115234375, "learning_rate": 9.510289473684211e-05, "loss": 0.267, "step": 38626 }, { "epoch": 2.1630081756075707, "grad_norm": 0.9845471978187561, "learning_rate": 9.510263157894738e-05, "loss": 0.313, "step": 38627 }, { "epoch": 2.1630641729196998, "grad_norm": 1.1851896047592163, "learning_rate": 9.510236842105263e-05, "loss": 0.4291, "step": 38628 }, { "epoch": 2.163120170231829, "grad_norm": 1.3013083934783936, "learning_rate": 9.51021052631579e-05, "loss": 0.378, "step": 38629 }, { "epoch": 2.163176167543958, "grad_norm": 1.3607717752456665, "learning_rate": 9.510184210526316e-05, "loss": 0.3257, "step": 38630 }, { "epoch": 2.163232164856087, "grad_norm": 1.3301739692687988, "learning_rate": 9.510157894736844e-05, "loss": 0.4685, "step": 38631 }, { "epoch": 2.163288162168216, "grad_norm": 1.148032307624817, "learning_rate": 9.510131578947368e-05, "loss": 0.3056, "step": 38632 }, { "epoch": 2.163344159480345, "grad_norm": 1.2843828201293945, "learning_rate": 9.510105263157896e-05, "loss": 0.3811, "step": 38633 }, { "epoch": 2.163400156792474, "grad_norm": 1.2604568004608154, "learning_rate": 9.510078947368421e-05, "loss": 0.3152, "step": 38634 }, { "epoch": 2.163456154104603, "grad_norm": 1.4230263233184814, "learning_rate": 9.510052631578947e-05, "loss": 0.4336, "step": 38635 }, { "epoch": 2.163512151416732, "grad_norm": 1.424559473991394, "learning_rate": 9.510026315789475e-05, "loss": 0.401, "step": 38636 }, { "epoch": 2.163568148728861, "grad_norm": 1.4415538311004639, "learning_rate": 9.51e-05, "loss": 0.5129, "step": 38637 }, { "epoch": 2.16362414604099, "grad_norm": 1.2011644840240479, "learning_rate": 9.509973684210527e-05, "loss": 0.493, "step": 38638 }, { "epoch": 2.163680143353119, "grad_norm": 1.1000211238861084, "learning_rate": 9.509947368421053e-05, "loss": 0.4089, "step": 38639 }, { "epoch": 2.163736140665248, "grad_norm": 1.1017917394638062, "learning_rate": 9.50992105263158e-05, "loss": 0.3115, "step": 38640 }, { "epoch": 2.163792137977377, "grad_norm": 2.239315986633301, "learning_rate": 9.509894736842106e-05, "loss": 0.3906, "step": 38641 }, { "epoch": 2.163848135289506, "grad_norm": 1.5324336290359497, "learning_rate": 9.509868421052632e-05, "loss": 0.3539, "step": 38642 }, { "epoch": 2.163904132601635, "grad_norm": 1.2346194982528687, "learning_rate": 9.509842105263158e-05, "loss": 0.3852, "step": 38643 }, { "epoch": 2.163960129913764, "grad_norm": 1.1673130989074707, "learning_rate": 9.509815789473685e-05, "loss": 0.3684, "step": 38644 }, { "epoch": 2.164016127225893, "grad_norm": 1.2488288879394531, "learning_rate": 9.509789473684211e-05, "loss": 0.4818, "step": 38645 }, { "epoch": 2.164072124538022, "grad_norm": 1.3335537910461426, "learning_rate": 9.509763157894737e-05, "loss": 0.3676, "step": 38646 }, { "epoch": 2.164128121850151, "grad_norm": 1.3128639459609985, "learning_rate": 9.509736842105263e-05, "loss": 0.3896, "step": 38647 }, { "epoch": 2.16418411916228, "grad_norm": 1.1093014478683472, "learning_rate": 9.50971052631579e-05, "loss": 0.4251, "step": 38648 }, { "epoch": 2.164240116474409, "grad_norm": 1.3227241039276123, "learning_rate": 9.509684210526317e-05, "loss": 0.5072, "step": 38649 }, { "epoch": 2.164296113786538, "grad_norm": 1.337886929512024, "learning_rate": 9.509657894736842e-05, "loss": 0.4624, "step": 38650 }, { "epoch": 2.164352111098667, "grad_norm": 1.1644375324249268, "learning_rate": 9.509631578947368e-05, "loss": 0.3698, "step": 38651 }, { "epoch": 2.1644081084107962, "grad_norm": 1.3222434520721436, "learning_rate": 9.509605263157894e-05, "loss": 0.441, "step": 38652 }, { "epoch": 2.1644641057229252, "grad_norm": 1.2937666177749634, "learning_rate": 9.509578947368422e-05, "loss": 0.356, "step": 38653 }, { "epoch": 2.1645201030350543, "grad_norm": 1.115323543548584, "learning_rate": 9.509552631578948e-05, "loss": 0.3832, "step": 38654 }, { "epoch": 2.1645761003471833, "grad_norm": 1.278096318244934, "learning_rate": 9.509526315789474e-05, "loss": 0.3858, "step": 38655 }, { "epoch": 2.1646320976593123, "grad_norm": 1.301234245300293, "learning_rate": 9.5095e-05, "loss": 0.5348, "step": 38656 }, { "epoch": 2.1646880949714413, "grad_norm": 1.2598164081573486, "learning_rate": 9.509473684210527e-05, "loss": 0.4293, "step": 38657 }, { "epoch": 2.1647440922835703, "grad_norm": 1.2415803670883179, "learning_rate": 9.509447368421053e-05, "loss": 0.3764, "step": 38658 }, { "epoch": 2.1648000895956994, "grad_norm": 1.280566930770874, "learning_rate": 9.50942105263158e-05, "loss": 0.4864, "step": 38659 }, { "epoch": 2.1648560869078284, "grad_norm": 1.072081446647644, "learning_rate": 9.509394736842105e-05, "loss": 0.3543, "step": 38660 }, { "epoch": 2.1649120842199574, "grad_norm": 1.2830604314804077, "learning_rate": 9.509368421052632e-05, "loss": 0.5242, "step": 38661 }, { "epoch": 2.1649680815320864, "grad_norm": 1.5358259677886963, "learning_rate": 9.509342105263158e-05, "loss": 0.3182, "step": 38662 }, { "epoch": 2.1650240788442154, "grad_norm": 1.0890122652053833, "learning_rate": 9.509315789473686e-05, "loss": 0.3241, "step": 38663 }, { "epoch": 2.1650800761563445, "grad_norm": 1.2559322118759155, "learning_rate": 9.509289473684212e-05, "loss": 0.3738, "step": 38664 }, { "epoch": 2.1651360734684735, "grad_norm": 1.1010996103286743, "learning_rate": 9.509263157894737e-05, "loss": 0.3821, "step": 38665 }, { "epoch": 2.1651920707806025, "grad_norm": 1.3612381219863892, "learning_rate": 9.509236842105263e-05, "loss": 0.4368, "step": 38666 }, { "epoch": 2.1652480680927315, "grad_norm": 1.3079760074615479, "learning_rate": 9.509210526315791e-05, "loss": 0.321, "step": 38667 }, { "epoch": 2.1653040654048605, "grad_norm": 1.1332366466522217, "learning_rate": 9.509184210526317e-05, "loss": 0.4093, "step": 38668 }, { "epoch": 2.1653600627169896, "grad_norm": 1.2772388458251953, "learning_rate": 9.509157894736841e-05, "loss": 0.5936, "step": 38669 }, { "epoch": 2.1654160600291186, "grad_norm": 1.3397071361541748, "learning_rate": 9.509131578947369e-05, "loss": 0.5131, "step": 38670 }, { "epoch": 2.1654720573412476, "grad_norm": 1.2153857946395874, "learning_rate": 9.509105263157895e-05, "loss": 0.3978, "step": 38671 }, { "epoch": 2.1655280546533766, "grad_norm": 1.2067170143127441, "learning_rate": 9.509078947368422e-05, "loss": 0.4489, "step": 38672 }, { "epoch": 2.1655840519655056, "grad_norm": 1.1188828945159912, "learning_rate": 9.509052631578948e-05, "loss": 0.3822, "step": 38673 }, { "epoch": 2.1656400492776346, "grad_norm": 1.2397791147232056, "learning_rate": 9.509026315789474e-05, "loss": 0.4053, "step": 38674 }, { "epoch": 2.1656960465897637, "grad_norm": 1.6326227188110352, "learning_rate": 9.509e-05, "loss": 0.5112, "step": 38675 }, { "epoch": 2.1657520439018927, "grad_norm": 2.9639601707458496, "learning_rate": 9.508973684210527e-05, "loss": 0.6638, "step": 38676 }, { "epoch": 2.1658080412140217, "grad_norm": 1.2753801345825195, "learning_rate": 9.508947368421053e-05, "loss": 0.411, "step": 38677 }, { "epoch": 2.1658640385261507, "grad_norm": 1.3385000228881836, "learning_rate": 9.508921052631579e-05, "loss": 0.3314, "step": 38678 }, { "epoch": 2.1659200358382797, "grad_norm": 1.5822936296463013, "learning_rate": 9.508894736842105e-05, "loss": 0.4313, "step": 38679 }, { "epoch": 2.1659760331504088, "grad_norm": 1.1356408596038818, "learning_rate": 9.508868421052633e-05, "loss": 0.2771, "step": 38680 }, { "epoch": 2.166032030462538, "grad_norm": 1.179540991783142, "learning_rate": 9.508842105263158e-05, "loss": 0.2924, "step": 38681 }, { "epoch": 2.166088027774667, "grad_norm": 1.1818360090255737, "learning_rate": 9.508815789473684e-05, "loss": 0.386, "step": 38682 }, { "epoch": 2.166144025086796, "grad_norm": 1.357463002204895, "learning_rate": 9.50878947368421e-05, "loss": 0.4024, "step": 38683 }, { "epoch": 2.166200022398925, "grad_norm": 1.4847887754440308, "learning_rate": 9.508763157894738e-05, "loss": 0.3718, "step": 38684 }, { "epoch": 2.166256019711054, "grad_norm": 1.1857059001922607, "learning_rate": 9.508736842105264e-05, "loss": 0.4537, "step": 38685 }, { "epoch": 2.166312017023183, "grad_norm": 1.2659543752670288, "learning_rate": 9.50871052631579e-05, "loss": 0.459, "step": 38686 }, { "epoch": 2.166368014335312, "grad_norm": 1.1082799434661865, "learning_rate": 9.508684210526316e-05, "loss": 0.3, "step": 38687 }, { "epoch": 2.166424011647441, "grad_norm": 1.6420667171478271, "learning_rate": 9.508657894736842e-05, "loss": 0.4268, "step": 38688 }, { "epoch": 2.16648000895957, "grad_norm": 1.2505706548690796, "learning_rate": 9.508631578947369e-05, "loss": 0.3536, "step": 38689 }, { "epoch": 2.166536006271699, "grad_norm": 1.760909914970398, "learning_rate": 9.508605263157895e-05, "loss": 0.4157, "step": 38690 }, { "epoch": 2.166592003583828, "grad_norm": 1.2517143487930298, "learning_rate": 9.508578947368422e-05, "loss": 0.4143, "step": 38691 }, { "epoch": 2.166648000895957, "grad_norm": 1.3516215085983276, "learning_rate": 9.508552631578947e-05, "loss": 0.439, "step": 38692 }, { "epoch": 2.166703998208086, "grad_norm": 1.533014178276062, "learning_rate": 9.508526315789474e-05, "loss": 0.4483, "step": 38693 }, { "epoch": 2.166759995520215, "grad_norm": 1.5135496854782104, "learning_rate": 9.5085e-05, "loss": 0.4472, "step": 38694 }, { "epoch": 2.166815992832344, "grad_norm": 1.148119330406189, "learning_rate": 9.508473684210528e-05, "loss": 0.4208, "step": 38695 }, { "epoch": 2.166871990144473, "grad_norm": 2.8705575466156006, "learning_rate": 9.508447368421053e-05, "loss": 0.3584, "step": 38696 }, { "epoch": 2.166927987456602, "grad_norm": 1.128533959388733, "learning_rate": 9.50842105263158e-05, "loss": 0.2863, "step": 38697 }, { "epoch": 2.166983984768731, "grad_norm": 1.2889177799224854, "learning_rate": 9.508394736842105e-05, "loss": 0.5247, "step": 38698 }, { "epoch": 2.16703998208086, "grad_norm": 1.1939932107925415, "learning_rate": 9.508368421052633e-05, "loss": 0.3645, "step": 38699 }, { "epoch": 2.167095979392989, "grad_norm": 1.2405543327331543, "learning_rate": 9.508342105263159e-05, "loss": 0.3339, "step": 38700 }, { "epoch": 2.167151976705118, "grad_norm": 1.2746635675430298, "learning_rate": 9.508315789473685e-05, "loss": 0.4621, "step": 38701 }, { "epoch": 2.167207974017247, "grad_norm": 1.770918607711792, "learning_rate": 9.508289473684211e-05, "loss": 0.4877, "step": 38702 }, { "epoch": 2.167263971329376, "grad_norm": 1.111429214477539, "learning_rate": 9.508263157894737e-05, "loss": 0.318, "step": 38703 }, { "epoch": 2.1673199686415052, "grad_norm": 0.9885323643684387, "learning_rate": 9.508236842105264e-05, "loss": 0.3386, "step": 38704 }, { "epoch": 2.1673759659536342, "grad_norm": 1.0358213186264038, "learning_rate": 9.50821052631579e-05, "loss": 0.4759, "step": 38705 }, { "epoch": 2.1674319632657633, "grad_norm": 1.1039458513259888, "learning_rate": 9.508184210526316e-05, "loss": 0.3714, "step": 38706 }, { "epoch": 2.1674879605778923, "grad_norm": 1.2194938659667969, "learning_rate": 9.508157894736842e-05, "loss": 0.5097, "step": 38707 }, { "epoch": 2.1675439578900213, "grad_norm": 1.103987693786621, "learning_rate": 9.508131578947369e-05, "loss": 0.3322, "step": 38708 }, { "epoch": 2.1675999552021503, "grad_norm": 1.4788658618927002, "learning_rate": 9.508105263157895e-05, "loss": 0.517, "step": 38709 }, { "epoch": 2.1676559525142793, "grad_norm": 1.2633532285690308, "learning_rate": 9.508078947368421e-05, "loss": 0.366, "step": 38710 }, { "epoch": 2.1677119498264084, "grad_norm": 1.207823395729065, "learning_rate": 9.508052631578947e-05, "loss": 0.3953, "step": 38711 }, { "epoch": 2.1677679471385374, "grad_norm": 1.4169831275939941, "learning_rate": 9.508026315789474e-05, "loss": 0.3455, "step": 38712 }, { "epoch": 2.1678239444506664, "grad_norm": 1.0815608501434326, "learning_rate": 9.508e-05, "loss": 0.3824, "step": 38713 }, { "epoch": 2.1678799417627954, "grad_norm": 11.572774887084961, "learning_rate": 9.507973684210528e-05, "loss": 0.3728, "step": 38714 }, { "epoch": 2.1679359390749244, "grad_norm": 1.1930299997329712, "learning_rate": 9.507947368421052e-05, "loss": 0.3353, "step": 38715 }, { "epoch": 2.1679919363870535, "grad_norm": 1.3133292198181152, "learning_rate": 9.50792105263158e-05, "loss": 0.5097, "step": 38716 }, { "epoch": 2.1680479336991825, "grad_norm": 1.9562718868255615, "learning_rate": 9.507894736842106e-05, "loss": 0.3721, "step": 38717 }, { "epoch": 2.1681039310113115, "grad_norm": 1.2233657836914062, "learning_rate": 9.507868421052633e-05, "loss": 0.4567, "step": 38718 }, { "epoch": 2.1681599283234405, "grad_norm": 1.4319254159927368, "learning_rate": 9.507842105263159e-05, "loss": 0.5813, "step": 38719 }, { "epoch": 2.1682159256355695, "grad_norm": 1.3189743757247925, "learning_rate": 9.507815789473684e-05, "loss": 0.4681, "step": 38720 }, { "epoch": 2.1682719229476985, "grad_norm": 1.2775558233261108, "learning_rate": 9.507789473684211e-05, "loss": 0.4193, "step": 38721 }, { "epoch": 2.1683279202598276, "grad_norm": 1.1394903659820557, "learning_rate": 9.507763157894737e-05, "loss": 0.3815, "step": 38722 }, { "epoch": 2.1683839175719566, "grad_norm": 0.9606124758720398, "learning_rate": 9.507736842105264e-05, "loss": 0.3349, "step": 38723 }, { "epoch": 2.1684399148840856, "grad_norm": 1.2792342901229858, "learning_rate": 9.507710526315789e-05, "loss": 0.4129, "step": 38724 }, { "epoch": 2.1684959121962146, "grad_norm": 1.188530683517456, "learning_rate": 9.507684210526316e-05, "loss": 0.4346, "step": 38725 }, { "epoch": 2.1685519095083436, "grad_norm": 1.5447030067443848, "learning_rate": 9.507657894736842e-05, "loss": 0.4115, "step": 38726 }, { "epoch": 2.1686079068204727, "grad_norm": 1.632058024406433, "learning_rate": 9.50763157894737e-05, "loss": 0.3921, "step": 38727 }, { "epoch": 2.1686639041326017, "grad_norm": 1.1843558549880981, "learning_rate": 9.507605263157895e-05, "loss": 0.5779, "step": 38728 }, { "epoch": 2.1687199014447307, "grad_norm": 1.3355997800827026, "learning_rate": 9.507578947368421e-05, "loss": 0.3578, "step": 38729 }, { "epoch": 2.1687758987568597, "grad_norm": 1.1126041412353516, "learning_rate": 9.507552631578947e-05, "loss": 0.377, "step": 38730 }, { "epoch": 2.1688318960689887, "grad_norm": 1.345935344696045, "learning_rate": 9.507526315789475e-05, "loss": 0.5817, "step": 38731 }, { "epoch": 2.1688878933811178, "grad_norm": 1.5539087057113647, "learning_rate": 9.507500000000001e-05, "loss": 0.4198, "step": 38732 }, { "epoch": 2.168943890693247, "grad_norm": 1.0643341541290283, "learning_rate": 9.507473684210527e-05, "loss": 0.3843, "step": 38733 }, { "epoch": 2.168999888005376, "grad_norm": 1.3023912906646729, "learning_rate": 9.507447368421053e-05, "loss": 0.4587, "step": 38734 }, { "epoch": 2.169055885317505, "grad_norm": 1.0790605545043945, "learning_rate": 9.50742105263158e-05, "loss": 0.5219, "step": 38735 }, { "epoch": 2.169111882629634, "grad_norm": 1.103473424911499, "learning_rate": 9.507394736842106e-05, "loss": 0.3692, "step": 38736 }, { "epoch": 2.169167879941763, "grad_norm": 1.1826369762420654, "learning_rate": 9.507368421052632e-05, "loss": 0.3282, "step": 38737 }, { "epoch": 2.169223877253892, "grad_norm": 1.1878516674041748, "learning_rate": 9.507342105263158e-05, "loss": 0.3549, "step": 38738 }, { "epoch": 2.169279874566021, "grad_norm": 1.3450536727905273, "learning_rate": 9.507315789473684e-05, "loss": 0.3837, "step": 38739 }, { "epoch": 2.16933587187815, "grad_norm": 18.298437118530273, "learning_rate": 9.507289473684211e-05, "loss": 0.448, "step": 38740 }, { "epoch": 2.169391869190279, "grad_norm": 1.3776415586471558, "learning_rate": 9.507263157894737e-05, "loss": 0.3615, "step": 38741 }, { "epoch": 2.169447866502408, "grad_norm": 1.2406986951828003, "learning_rate": 9.507236842105263e-05, "loss": 0.5144, "step": 38742 }, { "epoch": 2.169503863814537, "grad_norm": 1.4912259578704834, "learning_rate": 9.507210526315789e-05, "loss": 0.4983, "step": 38743 }, { "epoch": 2.169559861126666, "grad_norm": 1.1874314546585083, "learning_rate": 9.507184210526316e-05, "loss": 0.4494, "step": 38744 }, { "epoch": 2.169615858438795, "grad_norm": 1.2723397016525269, "learning_rate": 9.507157894736842e-05, "loss": 0.307, "step": 38745 }, { "epoch": 2.169671855750924, "grad_norm": 1.0741443634033203, "learning_rate": 9.50713157894737e-05, "loss": 0.3702, "step": 38746 }, { "epoch": 2.169727853063053, "grad_norm": 1.28151273727417, "learning_rate": 9.507105263157894e-05, "loss": 0.4373, "step": 38747 }, { "epoch": 2.169783850375182, "grad_norm": 1.0656479597091675, "learning_rate": 9.507078947368422e-05, "loss": 0.3111, "step": 38748 }, { "epoch": 2.169839847687311, "grad_norm": 1.0278630256652832, "learning_rate": 9.507052631578948e-05, "loss": 0.3361, "step": 38749 }, { "epoch": 2.16989584499944, "grad_norm": 1.1096421480178833, "learning_rate": 9.507026315789475e-05, "loss": 0.2895, "step": 38750 }, { "epoch": 2.169951842311569, "grad_norm": 1.2937290668487549, "learning_rate": 9.507000000000001e-05, "loss": 0.3763, "step": 38751 }, { "epoch": 2.170007839623698, "grad_norm": 1.3802119493484497, "learning_rate": 9.506973684210527e-05, "loss": 0.3737, "step": 38752 }, { "epoch": 2.170063836935827, "grad_norm": 1.3635809421539307, "learning_rate": 9.506947368421053e-05, "loss": 0.3415, "step": 38753 }, { "epoch": 2.170119834247956, "grad_norm": 1.760245680809021, "learning_rate": 9.50692105263158e-05, "loss": 0.3595, "step": 38754 }, { "epoch": 2.170175831560085, "grad_norm": 7.00363826751709, "learning_rate": 9.506894736842106e-05, "loss": 0.4028, "step": 38755 }, { "epoch": 2.170231828872214, "grad_norm": 1.265249252319336, "learning_rate": 9.506868421052632e-05, "loss": 0.4439, "step": 38756 }, { "epoch": 2.1702878261843432, "grad_norm": 1.1187540292739868, "learning_rate": 9.506842105263158e-05, "loss": 0.3813, "step": 38757 }, { "epoch": 2.1703438234964723, "grad_norm": 1.2081091403961182, "learning_rate": 9.506815789473684e-05, "loss": 0.3272, "step": 38758 }, { "epoch": 2.1703998208086013, "grad_norm": 1.191619634628296, "learning_rate": 9.506789473684211e-05, "loss": 0.3484, "step": 38759 }, { "epoch": 2.1704558181207303, "grad_norm": 1.1724276542663574, "learning_rate": 9.506763157894737e-05, "loss": 0.4558, "step": 38760 }, { "epoch": 2.1705118154328593, "grad_norm": 1.1554993391036987, "learning_rate": 9.506736842105263e-05, "loss": 0.385, "step": 38761 }, { "epoch": 2.1705678127449883, "grad_norm": 1.2074999809265137, "learning_rate": 9.50671052631579e-05, "loss": 0.337, "step": 38762 }, { "epoch": 2.1706238100571174, "grad_norm": 1.16044282913208, "learning_rate": 9.506684210526317e-05, "loss": 0.445, "step": 38763 }, { "epoch": 2.1706798073692464, "grad_norm": 1.5974719524383545, "learning_rate": 9.506657894736843e-05, "loss": 0.6338, "step": 38764 }, { "epoch": 2.1707358046813754, "grad_norm": 1.3173381090164185, "learning_rate": 9.506631578947369e-05, "loss": 0.4665, "step": 38765 }, { "epoch": 2.1707918019935044, "grad_norm": 1.090227484703064, "learning_rate": 9.506605263157895e-05, "loss": 0.5187, "step": 38766 }, { "epoch": 2.1708477993056334, "grad_norm": 1.4474667310714722, "learning_rate": 9.506578947368422e-05, "loss": 0.4419, "step": 38767 }, { "epoch": 2.1709037966177624, "grad_norm": 1.0742278099060059, "learning_rate": 9.506552631578948e-05, "loss": 0.357, "step": 38768 }, { "epoch": 2.1709597939298915, "grad_norm": 1.0377870798110962, "learning_rate": 9.506526315789475e-05, "loss": 0.3934, "step": 38769 }, { "epoch": 2.1710157912420205, "grad_norm": 1.4398410320281982, "learning_rate": 9.5065e-05, "loss": 0.3485, "step": 38770 }, { "epoch": 2.1710717885541495, "grad_norm": 1.1837376356124878, "learning_rate": 9.506473684210527e-05, "loss": 0.3261, "step": 38771 }, { "epoch": 2.1711277858662785, "grad_norm": 1.4360090494155884, "learning_rate": 9.506447368421053e-05, "loss": 0.4374, "step": 38772 }, { "epoch": 2.1711837831784075, "grad_norm": 1.3159102201461792, "learning_rate": 9.506421052631579e-05, "loss": 0.4219, "step": 38773 }, { "epoch": 2.1712397804905366, "grad_norm": 1.34866201877594, "learning_rate": 9.506394736842106e-05, "loss": 0.5321, "step": 38774 }, { "epoch": 2.1712957778026656, "grad_norm": 1.5206176042556763, "learning_rate": 9.506368421052631e-05, "loss": 0.4511, "step": 38775 }, { "epoch": 2.1713517751147946, "grad_norm": 1.2116981744766235, "learning_rate": 9.506342105263158e-05, "loss": 0.3733, "step": 38776 }, { "epoch": 2.1714077724269236, "grad_norm": 1.347051978111267, "learning_rate": 9.506315789473684e-05, "loss": 0.3345, "step": 38777 }, { "epoch": 2.1714637697390526, "grad_norm": 1.2784743309020996, "learning_rate": 9.506289473684212e-05, "loss": 0.4808, "step": 38778 }, { "epoch": 2.1715197670511817, "grad_norm": 2.8104066848754883, "learning_rate": 9.506263157894736e-05, "loss": 0.4559, "step": 38779 }, { "epoch": 2.1715757643633107, "grad_norm": 1.1485052108764648, "learning_rate": 9.506236842105264e-05, "loss": 0.3383, "step": 38780 }, { "epoch": 2.1716317616754397, "grad_norm": 1.295506477355957, "learning_rate": 9.50621052631579e-05, "loss": 0.3663, "step": 38781 }, { "epoch": 2.1716877589875687, "grad_norm": 1.2281622886657715, "learning_rate": 9.506184210526317e-05, "loss": 0.4737, "step": 38782 }, { "epoch": 2.1717437562996977, "grad_norm": 1.2863173484802246, "learning_rate": 9.506157894736843e-05, "loss": 0.4569, "step": 38783 }, { "epoch": 2.1717997536118268, "grad_norm": 0.9083560109138489, "learning_rate": 9.506131578947369e-05, "loss": 0.3385, "step": 38784 }, { "epoch": 2.1718557509239558, "grad_norm": 1.363257646560669, "learning_rate": 9.506105263157895e-05, "loss": 0.4225, "step": 38785 }, { "epoch": 2.171911748236085, "grad_norm": 1.5215632915496826, "learning_rate": 9.506078947368422e-05, "loss": 0.4476, "step": 38786 }, { "epoch": 2.171967745548214, "grad_norm": 1.3151311874389648, "learning_rate": 9.506052631578948e-05, "loss": 0.4814, "step": 38787 }, { "epoch": 2.172023742860343, "grad_norm": 1.5748416185379028, "learning_rate": 9.506026315789474e-05, "loss": 0.5159, "step": 38788 }, { "epoch": 2.172079740172472, "grad_norm": 1.0687793493270874, "learning_rate": 9.506e-05, "loss": 0.3712, "step": 38789 }, { "epoch": 2.172135737484601, "grad_norm": 1.0927104949951172, "learning_rate": 9.505973684210526e-05, "loss": 0.3703, "step": 38790 }, { "epoch": 2.17219173479673, "grad_norm": 1.2010008096694946, "learning_rate": 9.505947368421053e-05, "loss": 0.4677, "step": 38791 }, { "epoch": 2.172247732108859, "grad_norm": 1.1475380659103394, "learning_rate": 9.50592105263158e-05, "loss": 0.4434, "step": 38792 }, { "epoch": 2.172303729420988, "grad_norm": 1.2391051054000854, "learning_rate": 9.505894736842105e-05, "loss": 0.4522, "step": 38793 }, { "epoch": 2.172359726733117, "grad_norm": 1.1092345714569092, "learning_rate": 9.505868421052631e-05, "loss": 0.3942, "step": 38794 }, { "epoch": 2.172415724045246, "grad_norm": 1.4943710565567017, "learning_rate": 9.505842105263159e-05, "loss": 0.3755, "step": 38795 }, { "epoch": 2.172471721357375, "grad_norm": 1.1359182596206665, "learning_rate": 9.505815789473685e-05, "loss": 0.3567, "step": 38796 }, { "epoch": 2.172527718669504, "grad_norm": 0.9820088744163513, "learning_rate": 9.50578947368421e-05, "loss": 0.3297, "step": 38797 }, { "epoch": 2.172583715981633, "grad_norm": 1.015222191810608, "learning_rate": 9.505763157894737e-05, "loss": 0.379, "step": 38798 }, { "epoch": 2.172639713293762, "grad_norm": 1.2182691097259521, "learning_rate": 9.505736842105264e-05, "loss": 0.3359, "step": 38799 }, { "epoch": 2.172695710605891, "grad_norm": 1.1907962560653687, "learning_rate": 9.50571052631579e-05, "loss": 0.3002, "step": 38800 }, { "epoch": 2.17275170791802, "grad_norm": 1.1289174556732178, "learning_rate": 9.505684210526317e-05, "loss": 0.3434, "step": 38801 }, { "epoch": 2.172807705230149, "grad_norm": 1.3047112226486206, "learning_rate": 9.505657894736842e-05, "loss": 0.4215, "step": 38802 }, { "epoch": 2.172863702542278, "grad_norm": 1.1560428142547607, "learning_rate": 9.505631578947369e-05, "loss": 0.3619, "step": 38803 }, { "epoch": 2.172919699854407, "grad_norm": 1.3775564432144165, "learning_rate": 9.505605263157895e-05, "loss": 0.5222, "step": 38804 }, { "epoch": 2.172975697166536, "grad_norm": 1.239606261253357, "learning_rate": 9.505578947368422e-05, "loss": 0.3483, "step": 38805 }, { "epoch": 2.173031694478665, "grad_norm": 1.0560238361358643, "learning_rate": 9.505552631578948e-05, "loss": 0.4161, "step": 38806 }, { "epoch": 2.173087691790794, "grad_norm": 10.787339210510254, "learning_rate": 9.505526315789473e-05, "loss": 0.4249, "step": 38807 }, { "epoch": 2.173143689102923, "grad_norm": 1.4692264795303345, "learning_rate": 9.5055e-05, "loss": 0.4207, "step": 38808 }, { "epoch": 2.1731996864150522, "grad_norm": 1.3276922702789307, "learning_rate": 9.505473684210526e-05, "loss": 0.5401, "step": 38809 }, { "epoch": 2.1732556837271813, "grad_norm": 1.1649796962738037, "learning_rate": 9.505447368421054e-05, "loss": 0.3488, "step": 38810 }, { "epoch": 2.1733116810393103, "grad_norm": 1.348006010055542, "learning_rate": 9.50542105263158e-05, "loss": 0.3728, "step": 38811 }, { "epoch": 2.1733676783514393, "grad_norm": 1.331128716468811, "learning_rate": 9.505394736842106e-05, "loss": 0.5421, "step": 38812 }, { "epoch": 2.1734236756635683, "grad_norm": 1.311055302619934, "learning_rate": 9.505368421052632e-05, "loss": 0.4138, "step": 38813 }, { "epoch": 2.1734796729756973, "grad_norm": 1.1107230186462402, "learning_rate": 9.505342105263159e-05, "loss": 0.3522, "step": 38814 }, { "epoch": 2.1735356702878263, "grad_norm": 1.0440689325332642, "learning_rate": 9.505315789473685e-05, "loss": 0.3763, "step": 38815 }, { "epoch": 2.1735916675999554, "grad_norm": 1.1436115503311157, "learning_rate": 9.505289473684211e-05, "loss": 0.4915, "step": 38816 }, { "epoch": 2.1736476649120844, "grad_norm": 1.4141136407852173, "learning_rate": 9.505263157894737e-05, "loss": 0.5187, "step": 38817 }, { "epoch": 2.1737036622242134, "grad_norm": 1.1843149662017822, "learning_rate": 9.505236842105264e-05, "loss": 0.4248, "step": 38818 }, { "epoch": 2.1737596595363424, "grad_norm": 1.2413588762283325, "learning_rate": 9.50521052631579e-05, "loss": 0.3571, "step": 38819 }, { "epoch": 2.1738156568484714, "grad_norm": 1.5059640407562256, "learning_rate": 9.505184210526316e-05, "loss": 0.4477, "step": 38820 }, { "epoch": 2.1738716541606005, "grad_norm": 1.0821102857589722, "learning_rate": 9.505157894736842e-05, "loss": 0.3638, "step": 38821 }, { "epoch": 2.1739276514727295, "grad_norm": 1.2391413450241089, "learning_rate": 9.50513157894737e-05, "loss": 0.3356, "step": 38822 }, { "epoch": 2.1739836487848585, "grad_norm": 2.526907444000244, "learning_rate": 9.505105263157895e-05, "loss": 0.3537, "step": 38823 }, { "epoch": 2.1740396460969875, "grad_norm": 1.299836277961731, "learning_rate": 9.505078947368421e-05, "loss": 0.5154, "step": 38824 }, { "epoch": 2.1740956434091165, "grad_norm": 1.2905185222625732, "learning_rate": 9.505052631578947e-05, "loss": 0.4221, "step": 38825 }, { "epoch": 2.1741516407212456, "grad_norm": 1.4165515899658203, "learning_rate": 9.505026315789473e-05, "loss": 0.4411, "step": 38826 }, { "epoch": 2.1742076380333746, "grad_norm": 1.1649396419525146, "learning_rate": 9.505e-05, "loss": 0.3733, "step": 38827 }, { "epoch": 2.1742636353455036, "grad_norm": 1.7880675792694092, "learning_rate": 9.504973684210527e-05, "loss": 0.5652, "step": 38828 }, { "epoch": 2.1743196326576326, "grad_norm": 1.4706186056137085, "learning_rate": 9.504947368421053e-05, "loss": 0.5711, "step": 38829 }, { "epoch": 2.1743756299697616, "grad_norm": 1.3184335231781006, "learning_rate": 9.504921052631579e-05, "loss": 0.6148, "step": 38830 }, { "epoch": 2.1744316272818907, "grad_norm": 1.4265294075012207, "learning_rate": 9.504894736842106e-05, "loss": 0.5209, "step": 38831 }, { "epoch": 2.1744876245940197, "grad_norm": 1.400057315826416, "learning_rate": 9.504868421052632e-05, "loss": 0.4364, "step": 38832 }, { "epoch": 2.1745436219061487, "grad_norm": 1.2918931245803833, "learning_rate": 9.504842105263159e-05, "loss": 0.4272, "step": 38833 }, { "epoch": 2.1745996192182777, "grad_norm": 1.2086308002471924, "learning_rate": 9.504815789473684e-05, "loss": 0.4111, "step": 38834 }, { "epoch": 2.1746556165304067, "grad_norm": 1.6088145971298218, "learning_rate": 9.504789473684211e-05, "loss": 0.4501, "step": 38835 }, { "epoch": 2.1747116138425358, "grad_norm": 1.1741492748260498, "learning_rate": 9.504763157894737e-05, "loss": 0.4131, "step": 38836 }, { "epoch": 2.1747676111546648, "grad_norm": 1.002426028251648, "learning_rate": 9.504736842105264e-05, "loss": 0.3974, "step": 38837 }, { "epoch": 2.174823608466794, "grad_norm": 1.4331179857254028, "learning_rate": 9.50471052631579e-05, "loss": 0.4252, "step": 38838 }, { "epoch": 2.174879605778923, "grad_norm": 1.3485172986984253, "learning_rate": 9.504684210526316e-05, "loss": 0.3749, "step": 38839 }, { "epoch": 2.174935603091052, "grad_norm": 1.1548750400543213, "learning_rate": 9.504657894736842e-05, "loss": 0.4604, "step": 38840 }, { "epoch": 2.174991600403181, "grad_norm": 1.2099618911743164, "learning_rate": 9.504631578947368e-05, "loss": 0.3838, "step": 38841 }, { "epoch": 2.17504759771531, "grad_norm": 1.1643341779708862, "learning_rate": 9.504605263157896e-05, "loss": 0.3674, "step": 38842 }, { "epoch": 2.175103595027439, "grad_norm": 1.2454111576080322, "learning_rate": 9.504578947368422e-05, "loss": 0.4027, "step": 38843 }, { "epoch": 2.175159592339568, "grad_norm": 1.327427625656128, "learning_rate": 9.504552631578948e-05, "loss": 0.4797, "step": 38844 }, { "epoch": 2.175215589651697, "grad_norm": 1.336377739906311, "learning_rate": 9.504526315789474e-05, "loss": 0.333, "step": 38845 }, { "epoch": 2.175271586963826, "grad_norm": 1.1384215354919434, "learning_rate": 9.504500000000001e-05, "loss": 0.364, "step": 38846 }, { "epoch": 2.175327584275955, "grad_norm": 1.369988203048706, "learning_rate": 9.504473684210527e-05, "loss": 0.5326, "step": 38847 }, { "epoch": 2.1753835815880835, "grad_norm": 1.1162896156311035, "learning_rate": 9.504447368421053e-05, "loss": 0.356, "step": 38848 }, { "epoch": 2.175439578900213, "grad_norm": 1.1583393812179565, "learning_rate": 9.504421052631579e-05, "loss": 0.4827, "step": 38849 }, { "epoch": 2.1754955762123416, "grad_norm": 0.9568096399307251, "learning_rate": 9.504394736842106e-05, "loss": 0.2738, "step": 38850 }, { "epoch": 2.175551573524471, "grad_norm": 1.0295175313949585, "learning_rate": 9.504368421052632e-05, "loss": 0.3141, "step": 38851 }, { "epoch": 2.1756075708365996, "grad_norm": 1.140512228012085, "learning_rate": 9.504342105263158e-05, "loss": 0.4043, "step": 38852 }, { "epoch": 2.175663568148729, "grad_norm": 1.1284157037734985, "learning_rate": 9.504315789473684e-05, "loss": 0.4028, "step": 38853 }, { "epoch": 2.1757195654608577, "grad_norm": 1.0963139533996582, "learning_rate": 9.504289473684211e-05, "loss": 0.397, "step": 38854 }, { "epoch": 2.175775562772987, "grad_norm": 1.193366289138794, "learning_rate": 9.504263157894737e-05, "loss": 0.4722, "step": 38855 }, { "epoch": 2.1758315600851157, "grad_norm": 1.4941381216049194, "learning_rate": 9.504236842105265e-05, "loss": 0.4345, "step": 38856 }, { "epoch": 2.175887557397245, "grad_norm": 1.2314658164978027, "learning_rate": 9.504210526315789e-05, "loss": 0.4036, "step": 38857 }, { "epoch": 2.1759435547093737, "grad_norm": 1.2378042936325073, "learning_rate": 9.504184210526315e-05, "loss": 0.4267, "step": 38858 }, { "epoch": 2.175999552021503, "grad_norm": 1.2242639064788818, "learning_rate": 9.504157894736843e-05, "loss": 0.3896, "step": 38859 }, { "epoch": 2.1760555493336318, "grad_norm": 1.1625510454177856, "learning_rate": 9.504131578947369e-05, "loss": 0.4591, "step": 38860 }, { "epoch": 2.1761115466457612, "grad_norm": 1.1546874046325684, "learning_rate": 9.504105263157896e-05, "loss": 0.324, "step": 38861 }, { "epoch": 2.17616754395789, "grad_norm": 1.0996016263961792, "learning_rate": 9.50407894736842e-05, "loss": 0.3849, "step": 38862 }, { "epoch": 2.1762235412700193, "grad_norm": 1.4061026573181152, "learning_rate": 9.504052631578948e-05, "loss": 0.63, "step": 38863 }, { "epoch": 2.176279538582148, "grad_norm": 1.5557388067245483, "learning_rate": 9.504026315789474e-05, "loss": 0.511, "step": 38864 }, { "epoch": 2.1763355358942773, "grad_norm": 1.2355982065200806, "learning_rate": 9.504000000000001e-05, "loss": 0.4664, "step": 38865 }, { "epoch": 2.176391533206406, "grad_norm": 1.4220056533813477, "learning_rate": 9.503973684210527e-05, "loss": 0.3973, "step": 38866 }, { "epoch": 2.1764475305185353, "grad_norm": 1.2815178632736206, "learning_rate": 9.503947368421053e-05, "loss": 0.3512, "step": 38867 }, { "epoch": 2.176503527830664, "grad_norm": 1.3415242433547974, "learning_rate": 9.503921052631579e-05, "loss": 0.4323, "step": 38868 }, { "epoch": 2.176559525142793, "grad_norm": 0.9945501685142517, "learning_rate": 9.503894736842106e-05, "loss": 0.3058, "step": 38869 }, { "epoch": 2.176615522454922, "grad_norm": 1.1742113828659058, "learning_rate": 9.503868421052632e-05, "loss": 0.42, "step": 38870 }, { "epoch": 2.176671519767051, "grad_norm": 1.1134363412857056, "learning_rate": 9.503842105263158e-05, "loss": 0.3647, "step": 38871 }, { "epoch": 2.17672751707918, "grad_norm": 1.309715986251831, "learning_rate": 9.503815789473684e-05, "loss": 0.393, "step": 38872 }, { "epoch": 2.176783514391309, "grad_norm": 1.3498668670654297, "learning_rate": 9.503789473684212e-05, "loss": 0.3947, "step": 38873 }, { "epoch": 2.176839511703438, "grad_norm": 1.7171504497528076, "learning_rate": 9.503763157894738e-05, "loss": 0.401, "step": 38874 }, { "epoch": 2.176895509015567, "grad_norm": 1.3124399185180664, "learning_rate": 9.503736842105264e-05, "loss": 0.4468, "step": 38875 }, { "epoch": 2.176951506327696, "grad_norm": 1.4594861268997192, "learning_rate": 9.50371052631579e-05, "loss": 0.6204, "step": 38876 }, { "epoch": 2.177007503639825, "grad_norm": 1.3782179355621338, "learning_rate": 9.503684210526316e-05, "loss": 0.413, "step": 38877 }, { "epoch": 2.177063500951954, "grad_norm": 1.1675093173980713, "learning_rate": 9.503657894736843e-05, "loss": 0.3121, "step": 38878 }, { "epoch": 2.177119498264083, "grad_norm": 1.2666668891906738, "learning_rate": 9.503631578947369e-05, "loss": 0.5612, "step": 38879 }, { "epoch": 2.177175495576212, "grad_norm": 1.3125929832458496, "learning_rate": 9.503605263157895e-05, "loss": 0.3641, "step": 38880 }, { "epoch": 2.177231492888341, "grad_norm": 1.1350667476654053, "learning_rate": 9.503578947368421e-05, "loss": 0.4192, "step": 38881 }, { "epoch": 2.17728749020047, "grad_norm": 1.1488652229309082, "learning_rate": 9.503552631578948e-05, "loss": 0.4399, "step": 38882 }, { "epoch": 2.177343487512599, "grad_norm": 1.3907078504562378, "learning_rate": 9.503526315789474e-05, "loss": 0.4261, "step": 38883 }, { "epoch": 2.1773994848247282, "grad_norm": 1.2135865688323975, "learning_rate": 9.5035e-05, "loss": 0.4621, "step": 38884 }, { "epoch": 2.1774554821368572, "grad_norm": 1.1741116046905518, "learning_rate": 9.503473684210526e-05, "loss": 0.3871, "step": 38885 }, { "epoch": 2.1775114794489863, "grad_norm": 1.2427276372909546, "learning_rate": 9.503447368421053e-05, "loss": 0.4548, "step": 38886 }, { "epoch": 2.1775674767611153, "grad_norm": 1.0792986154556274, "learning_rate": 9.50342105263158e-05, "loss": 0.4223, "step": 38887 }, { "epoch": 2.1776234740732443, "grad_norm": 1.2899301052093506, "learning_rate": 9.503394736842107e-05, "loss": 0.4178, "step": 38888 }, { "epoch": 2.1776794713853733, "grad_norm": 1.2221159934997559, "learning_rate": 9.503368421052631e-05, "loss": 0.3752, "step": 38889 }, { "epoch": 2.1777354686975023, "grad_norm": 1.1396558284759521, "learning_rate": 9.503342105263159e-05, "loss": 0.3783, "step": 38890 }, { "epoch": 2.1777914660096314, "grad_norm": 1.1767154932022095, "learning_rate": 9.503315789473685e-05, "loss": 0.4357, "step": 38891 }, { "epoch": 2.1778474633217604, "grad_norm": 1.1137216091156006, "learning_rate": 9.503289473684212e-05, "loss": 0.3027, "step": 38892 }, { "epoch": 2.1779034606338894, "grad_norm": 1.1826591491699219, "learning_rate": 9.503263157894738e-05, "loss": 0.3199, "step": 38893 }, { "epoch": 2.1779594579460184, "grad_norm": 1.4285383224487305, "learning_rate": 9.503236842105263e-05, "loss": 0.3135, "step": 38894 }, { "epoch": 2.1780154552581474, "grad_norm": 1.256919264793396, "learning_rate": 9.50321052631579e-05, "loss": 0.3256, "step": 38895 }, { "epoch": 2.1780714525702765, "grad_norm": 1.0414938926696777, "learning_rate": 9.503184210526316e-05, "loss": 0.348, "step": 38896 }, { "epoch": 2.1781274498824055, "grad_norm": 1.1992238759994507, "learning_rate": 9.503157894736843e-05, "loss": 0.3693, "step": 38897 }, { "epoch": 2.1781834471945345, "grad_norm": 1.8195973634719849, "learning_rate": 9.503131578947369e-05, "loss": 0.4111, "step": 38898 }, { "epoch": 2.1782394445066635, "grad_norm": 1.1915786266326904, "learning_rate": 9.503105263157895e-05, "loss": 0.4478, "step": 38899 }, { "epoch": 2.1782954418187925, "grad_norm": 1.3864026069641113, "learning_rate": 9.503078947368421e-05, "loss": 0.4103, "step": 38900 }, { "epoch": 2.1783514391309216, "grad_norm": 1.1396385431289673, "learning_rate": 9.503052631578948e-05, "loss": 0.3759, "step": 38901 }, { "epoch": 2.1784074364430506, "grad_norm": 1.4047677516937256, "learning_rate": 9.503026315789474e-05, "loss": 0.305, "step": 38902 }, { "epoch": 2.1784634337551796, "grad_norm": 1.1481488943099976, "learning_rate": 9.503e-05, "loss": 0.3261, "step": 38903 }, { "epoch": 2.1785194310673086, "grad_norm": 0.9933944344520569, "learning_rate": 9.502973684210526e-05, "loss": 0.4122, "step": 38904 }, { "epoch": 2.1785754283794376, "grad_norm": 1.276698350906372, "learning_rate": 9.502947368421054e-05, "loss": 0.4318, "step": 38905 }, { "epoch": 2.1786314256915666, "grad_norm": 1.16350257396698, "learning_rate": 9.50292105263158e-05, "loss": 0.3173, "step": 38906 }, { "epoch": 2.1786874230036957, "grad_norm": 1.2647807598114014, "learning_rate": 9.502894736842106e-05, "loss": 0.5815, "step": 38907 }, { "epoch": 2.1787434203158247, "grad_norm": 1.2193032503128052, "learning_rate": 9.502868421052632e-05, "loss": 0.3209, "step": 38908 }, { "epoch": 2.1787994176279537, "grad_norm": 1.2528175115585327, "learning_rate": 9.502842105263159e-05, "loss": 0.33, "step": 38909 }, { "epoch": 2.1788554149400827, "grad_norm": 1.1707937717437744, "learning_rate": 9.502815789473685e-05, "loss": 0.3684, "step": 38910 }, { "epoch": 2.1789114122522117, "grad_norm": 1.7491612434387207, "learning_rate": 9.502789473684211e-05, "loss": 0.4934, "step": 38911 }, { "epoch": 2.1789674095643408, "grad_norm": 1.155828833580017, "learning_rate": 9.502763157894737e-05, "loss": 0.3752, "step": 38912 }, { "epoch": 2.17902340687647, "grad_norm": 1.2812103033065796, "learning_rate": 9.502736842105263e-05, "loss": 0.4206, "step": 38913 }, { "epoch": 2.179079404188599, "grad_norm": 1.065891146659851, "learning_rate": 9.50271052631579e-05, "loss": 0.3271, "step": 38914 }, { "epoch": 2.179135401500728, "grad_norm": 1.0294924974441528, "learning_rate": 9.502684210526316e-05, "loss": 0.3177, "step": 38915 }, { "epoch": 2.179191398812857, "grad_norm": 1.785454511642456, "learning_rate": 9.502657894736843e-05, "loss": 0.394, "step": 38916 }, { "epoch": 2.179247396124986, "grad_norm": 1.1948015689849854, "learning_rate": 9.502631578947368e-05, "loss": 0.4498, "step": 38917 }, { "epoch": 2.179303393437115, "grad_norm": 1.1637802124023438, "learning_rate": 9.502605263157895e-05, "loss": 0.4534, "step": 38918 }, { "epoch": 2.179359390749244, "grad_norm": 1.2530790567398071, "learning_rate": 9.502578947368421e-05, "loss": 0.3578, "step": 38919 }, { "epoch": 2.179415388061373, "grad_norm": 1.146479845046997, "learning_rate": 9.502552631578949e-05, "loss": 0.4398, "step": 38920 }, { "epoch": 2.179471385373502, "grad_norm": 1.2815388441085815, "learning_rate": 9.502526315789475e-05, "loss": 0.5147, "step": 38921 }, { "epoch": 2.179527382685631, "grad_norm": 1.3130314350128174, "learning_rate": 9.5025e-05, "loss": 0.3835, "step": 38922 }, { "epoch": 2.17958337999776, "grad_norm": 1.3097599744796753, "learning_rate": 9.502473684210527e-05, "loss": 0.6164, "step": 38923 }, { "epoch": 2.179639377309889, "grad_norm": 1.446223258972168, "learning_rate": 9.502447368421054e-05, "loss": 0.5908, "step": 38924 }, { "epoch": 2.179695374622018, "grad_norm": 3.3467094898223877, "learning_rate": 9.50242105263158e-05, "loss": 0.4512, "step": 38925 }, { "epoch": 2.179751371934147, "grad_norm": 1.2375922203063965, "learning_rate": 9.502394736842106e-05, "loss": 0.3563, "step": 38926 }, { "epoch": 2.179807369246276, "grad_norm": 1.1299866437911987, "learning_rate": 9.502368421052632e-05, "loss": 0.3449, "step": 38927 }, { "epoch": 2.179863366558405, "grad_norm": 1.254575252532959, "learning_rate": 9.502342105263158e-05, "loss": 0.3399, "step": 38928 }, { "epoch": 2.179919363870534, "grad_norm": 1.0637726783752441, "learning_rate": 9.502315789473685e-05, "loss": 0.3475, "step": 38929 }, { "epoch": 2.179975361182663, "grad_norm": 1.1697044372558594, "learning_rate": 9.502289473684211e-05, "loss": 0.3728, "step": 38930 }, { "epoch": 2.180031358494792, "grad_norm": 1.3098459243774414, "learning_rate": 9.502263157894737e-05, "loss": 0.4322, "step": 38931 }, { "epoch": 2.180087355806921, "grad_norm": 2.233530282974243, "learning_rate": 9.502236842105263e-05, "loss": 0.3817, "step": 38932 }, { "epoch": 2.18014335311905, "grad_norm": 1.1003053188323975, "learning_rate": 9.50221052631579e-05, "loss": 0.4172, "step": 38933 }, { "epoch": 2.180199350431179, "grad_norm": 1.119368314743042, "learning_rate": 9.502184210526316e-05, "loss": 0.5839, "step": 38934 }, { "epoch": 2.180255347743308, "grad_norm": 1.164452075958252, "learning_rate": 9.502157894736842e-05, "loss": 0.4346, "step": 38935 }, { "epoch": 2.1803113450554372, "grad_norm": 1.368444561958313, "learning_rate": 9.502131578947368e-05, "loss": 0.4748, "step": 38936 }, { "epoch": 2.1803673423675662, "grad_norm": 1.2676095962524414, "learning_rate": 9.502105263157896e-05, "loss": 0.3016, "step": 38937 }, { "epoch": 2.1804233396796953, "grad_norm": 1.023878812789917, "learning_rate": 9.502078947368422e-05, "loss": 0.3663, "step": 38938 }, { "epoch": 2.1804793369918243, "grad_norm": 1.0366082191467285, "learning_rate": 9.502052631578948e-05, "loss": 0.4657, "step": 38939 }, { "epoch": 2.1805353343039533, "grad_norm": 1.2221672534942627, "learning_rate": 9.502026315789474e-05, "loss": 0.3695, "step": 38940 }, { "epoch": 2.1805913316160823, "grad_norm": 1.192507028579712, "learning_rate": 9.502000000000001e-05, "loss": 0.4121, "step": 38941 }, { "epoch": 2.1806473289282113, "grad_norm": 1.4165290594100952, "learning_rate": 9.501973684210527e-05, "loss": 0.4447, "step": 38942 }, { "epoch": 2.1807033262403404, "grad_norm": 1.3653814792633057, "learning_rate": 9.501947368421054e-05, "loss": 0.4148, "step": 38943 }, { "epoch": 2.1807593235524694, "grad_norm": 1.22435462474823, "learning_rate": 9.501921052631579e-05, "loss": 0.3519, "step": 38944 }, { "epoch": 2.1808153208645984, "grad_norm": 1.050353765487671, "learning_rate": 9.501894736842105e-05, "loss": 0.352, "step": 38945 }, { "epoch": 2.1808713181767274, "grad_norm": 1.0864853858947754, "learning_rate": 9.501868421052632e-05, "loss": 0.4096, "step": 38946 }, { "epoch": 2.1809273154888564, "grad_norm": 1.4147504568099976, "learning_rate": 9.501842105263158e-05, "loss": 0.4296, "step": 38947 }, { "epoch": 2.1809833128009855, "grad_norm": 1.0325373411178589, "learning_rate": 9.501815789473685e-05, "loss": 0.3209, "step": 38948 }, { "epoch": 2.1810393101131145, "grad_norm": 1.2250773906707764, "learning_rate": 9.50178947368421e-05, "loss": 0.4107, "step": 38949 }, { "epoch": 2.1810953074252435, "grad_norm": 1.2440638542175293, "learning_rate": 9.501763157894737e-05, "loss": 0.4684, "step": 38950 }, { "epoch": 2.1811513047373725, "grad_norm": 1.2981528043746948, "learning_rate": 9.501736842105263e-05, "loss": 0.6772, "step": 38951 }, { "epoch": 2.1812073020495015, "grad_norm": 1.3794368505477905, "learning_rate": 9.50171052631579e-05, "loss": 0.5427, "step": 38952 }, { "epoch": 2.1812632993616305, "grad_norm": 1.3692600727081299, "learning_rate": 9.501684210526317e-05, "loss": 0.4986, "step": 38953 }, { "epoch": 2.1813192966737596, "grad_norm": 0.9568555951118469, "learning_rate": 9.501657894736843e-05, "loss": 0.2829, "step": 38954 }, { "epoch": 2.1813752939858886, "grad_norm": 1.1541242599487305, "learning_rate": 9.501631578947369e-05, "loss": 0.3728, "step": 38955 }, { "epoch": 2.1814312912980176, "grad_norm": 1.4491171836853027, "learning_rate": 9.501605263157896e-05, "loss": 0.5582, "step": 38956 }, { "epoch": 2.1814872886101466, "grad_norm": 1.1712186336517334, "learning_rate": 9.501578947368422e-05, "loss": 0.4367, "step": 38957 }, { "epoch": 2.1815432859222756, "grad_norm": 1.528090476989746, "learning_rate": 9.501552631578948e-05, "loss": 0.4122, "step": 38958 }, { "epoch": 2.1815992832344047, "grad_norm": 1.26664400100708, "learning_rate": 9.501526315789474e-05, "loss": 0.4434, "step": 38959 }, { "epoch": 2.1816552805465337, "grad_norm": 1.0228183269500732, "learning_rate": 9.501500000000001e-05, "loss": 0.3562, "step": 38960 }, { "epoch": 2.1817112778586627, "grad_norm": 1.4269508123397827, "learning_rate": 9.501473684210527e-05, "loss": 0.5148, "step": 38961 }, { "epoch": 2.1817672751707917, "grad_norm": 1.1177386045455933, "learning_rate": 9.501447368421053e-05, "loss": 0.4435, "step": 38962 }, { "epoch": 2.1818232724829207, "grad_norm": 1.1529814004898071, "learning_rate": 9.501421052631579e-05, "loss": 0.4385, "step": 38963 }, { "epoch": 2.1818792697950498, "grad_norm": 1.5116426944732666, "learning_rate": 9.501394736842105e-05, "loss": 0.3738, "step": 38964 }, { "epoch": 2.1819352671071788, "grad_norm": 1.2246263027191162, "learning_rate": 9.501368421052632e-05, "loss": 0.4642, "step": 38965 }, { "epoch": 2.181991264419308, "grad_norm": 1.2727391719818115, "learning_rate": 9.501342105263158e-05, "loss": 0.3506, "step": 38966 }, { "epoch": 2.182047261731437, "grad_norm": 1.125381588935852, "learning_rate": 9.501315789473684e-05, "loss": 0.4078, "step": 38967 }, { "epoch": 2.182103259043566, "grad_norm": 1.204925775527954, "learning_rate": 9.50128947368421e-05, "loss": 0.4381, "step": 38968 }, { "epoch": 2.182159256355695, "grad_norm": 1.3298226594924927, "learning_rate": 9.501263157894738e-05, "loss": 0.5204, "step": 38969 }, { "epoch": 2.182215253667824, "grad_norm": 1.2462958097457886, "learning_rate": 9.501236842105264e-05, "loss": 0.3601, "step": 38970 }, { "epoch": 2.182271250979953, "grad_norm": 1.0171583890914917, "learning_rate": 9.501210526315791e-05, "loss": 0.2963, "step": 38971 }, { "epoch": 2.182327248292082, "grad_norm": 1.1733615398406982, "learning_rate": 9.501184210526315e-05, "loss": 0.3303, "step": 38972 }, { "epoch": 2.182383245604211, "grad_norm": 1.3030155897140503, "learning_rate": 9.501157894736843e-05, "loss": 0.5021, "step": 38973 }, { "epoch": 2.18243924291634, "grad_norm": 1.178606390953064, "learning_rate": 9.501131578947369e-05, "loss": 0.444, "step": 38974 }, { "epoch": 2.182495240228469, "grad_norm": 1.173682689666748, "learning_rate": 9.501105263157896e-05, "loss": 0.3879, "step": 38975 }, { "epoch": 2.182551237540598, "grad_norm": 1.5737130641937256, "learning_rate": 9.501078947368422e-05, "loss": 0.5326, "step": 38976 }, { "epoch": 2.182607234852727, "grad_norm": 1.249324083328247, "learning_rate": 9.501052631578948e-05, "loss": 0.547, "step": 38977 }, { "epoch": 2.182663232164856, "grad_norm": 1.126025676727295, "learning_rate": 9.501026315789474e-05, "loss": 0.3829, "step": 38978 }, { "epoch": 2.182719229476985, "grad_norm": 1.2087044715881348, "learning_rate": 9.501e-05, "loss": 0.3842, "step": 38979 }, { "epoch": 2.182775226789114, "grad_norm": 1.2227627038955688, "learning_rate": 9.500973684210527e-05, "loss": 0.3454, "step": 38980 }, { "epoch": 2.182831224101243, "grad_norm": 1.2327896356582642, "learning_rate": 9.500947368421052e-05, "loss": 0.5793, "step": 38981 }, { "epoch": 2.182887221413372, "grad_norm": 1.4385664463043213, "learning_rate": 9.500921052631579e-05, "loss": 0.4619, "step": 38982 }, { "epoch": 2.182943218725501, "grad_norm": 1.091143012046814, "learning_rate": 9.500894736842105e-05, "loss": 0.3438, "step": 38983 }, { "epoch": 2.18299921603763, "grad_norm": 1.3226444721221924, "learning_rate": 9.500868421052633e-05, "loss": 0.3826, "step": 38984 }, { "epoch": 2.183055213349759, "grad_norm": 2.773717164993286, "learning_rate": 9.500842105263159e-05, "loss": 0.5174, "step": 38985 }, { "epoch": 2.183111210661888, "grad_norm": 1.127301812171936, "learning_rate": 9.500815789473685e-05, "loss": 0.3555, "step": 38986 }, { "epoch": 2.183167207974017, "grad_norm": 1.1886684894561768, "learning_rate": 9.50078947368421e-05, "loss": 0.4, "step": 38987 }, { "epoch": 2.183223205286146, "grad_norm": 1.497908592224121, "learning_rate": 9.500763157894738e-05, "loss": 0.4662, "step": 38988 }, { "epoch": 2.1832792025982752, "grad_norm": 0.9962058067321777, "learning_rate": 9.500736842105264e-05, "loss": 0.3578, "step": 38989 }, { "epoch": 2.1833351999104043, "grad_norm": 1.0867133140563965, "learning_rate": 9.50071052631579e-05, "loss": 0.3901, "step": 38990 }, { "epoch": 2.1833911972225333, "grad_norm": 1.0843846797943115, "learning_rate": 9.500684210526316e-05, "loss": 0.3555, "step": 38991 }, { "epoch": 2.1834471945346623, "grad_norm": 1.1588462591171265, "learning_rate": 9.500657894736843e-05, "loss": 0.3438, "step": 38992 }, { "epoch": 2.1835031918467913, "grad_norm": 1.1109051704406738, "learning_rate": 9.500631578947369e-05, "loss": 0.3453, "step": 38993 }, { "epoch": 2.1835591891589203, "grad_norm": 1.0532950162887573, "learning_rate": 9.500605263157895e-05, "loss": 0.3334, "step": 38994 }, { "epoch": 2.1836151864710494, "grad_norm": 1.0967183113098145, "learning_rate": 9.500578947368421e-05, "loss": 0.4175, "step": 38995 }, { "epoch": 2.1836711837831784, "grad_norm": 1.1317375898361206, "learning_rate": 9.500552631578948e-05, "loss": 0.404, "step": 38996 }, { "epoch": 2.1837271810953074, "grad_norm": 1.2214254140853882, "learning_rate": 9.500526315789474e-05, "loss": 0.3777, "step": 38997 }, { "epoch": 2.1837831784074364, "grad_norm": 1.44541335105896, "learning_rate": 9.5005e-05, "loss": 0.5532, "step": 38998 }, { "epoch": 2.1838391757195654, "grad_norm": 1.2402639389038086, "learning_rate": 9.500473684210526e-05, "loss": 0.3779, "step": 38999 }, { "epoch": 2.1838951730316944, "grad_norm": 1.216313123703003, "learning_rate": 9.500447368421052e-05, "loss": 0.3283, "step": 39000 }, { "epoch": 2.1839511703438235, "grad_norm": 1.1168580055236816, "learning_rate": 9.50042105263158e-05, "loss": 0.3606, "step": 39001 }, { "epoch": 2.1840071676559525, "grad_norm": 1.0124750137329102, "learning_rate": 9.500394736842106e-05, "loss": 0.2679, "step": 39002 }, { "epoch": 2.1840631649680815, "grad_norm": 1.1533573865890503, "learning_rate": 9.500368421052633e-05, "loss": 0.4544, "step": 39003 }, { "epoch": 2.1841191622802105, "grad_norm": 1.3082711696624756, "learning_rate": 9.500342105263157e-05, "loss": 0.4118, "step": 39004 }, { "epoch": 2.1841751595923395, "grad_norm": 1.6087048053741455, "learning_rate": 9.500315789473685e-05, "loss": 0.3556, "step": 39005 }, { "epoch": 2.1842311569044686, "grad_norm": 1.2887152433395386, "learning_rate": 9.500289473684211e-05, "loss": 0.3823, "step": 39006 }, { "epoch": 2.1842871542165976, "grad_norm": 1.2029297351837158, "learning_rate": 9.500263157894738e-05, "loss": 0.4293, "step": 39007 }, { "epoch": 2.1843431515287266, "grad_norm": 0.9717657566070557, "learning_rate": 9.500236842105264e-05, "loss": 0.3112, "step": 39008 }, { "epoch": 2.1843991488408556, "grad_norm": 1.5726242065429688, "learning_rate": 9.50021052631579e-05, "loss": 0.5459, "step": 39009 }, { "epoch": 2.1844551461529846, "grad_norm": 1.1057627201080322, "learning_rate": 9.500184210526316e-05, "loss": 0.4304, "step": 39010 }, { "epoch": 2.1845111434651137, "grad_norm": 1.2038341760635376, "learning_rate": 9.500157894736843e-05, "loss": 0.4084, "step": 39011 }, { "epoch": 2.1845671407772427, "grad_norm": 1.1715941429138184, "learning_rate": 9.500131578947369e-05, "loss": 0.4198, "step": 39012 }, { "epoch": 2.1846231380893717, "grad_norm": 1.3885571956634521, "learning_rate": 9.500105263157895e-05, "loss": 0.4682, "step": 39013 }, { "epoch": 2.1846791354015007, "grad_norm": 1.2786542177200317, "learning_rate": 9.500078947368421e-05, "loss": 0.4038, "step": 39014 }, { "epoch": 2.1847351327136297, "grad_norm": 1.22564697265625, "learning_rate": 9.500052631578947e-05, "loss": 0.3938, "step": 39015 }, { "epoch": 2.1847911300257588, "grad_norm": 1.1310079097747803, "learning_rate": 9.500026315789475e-05, "loss": 0.4345, "step": 39016 }, { "epoch": 2.1848471273378878, "grad_norm": 1.3271491527557373, "learning_rate": 9.5e-05, "loss": 0.4416, "step": 39017 }, { "epoch": 2.184903124650017, "grad_norm": 1.1095949411392212, "learning_rate": 9.499973684210526e-05, "loss": 0.3353, "step": 39018 }, { "epoch": 2.184959121962146, "grad_norm": 2.6231560707092285, "learning_rate": 9.499947368421052e-05, "loss": 0.4896, "step": 39019 }, { "epoch": 2.185015119274275, "grad_norm": 1.1061480045318604, "learning_rate": 9.49992105263158e-05, "loss": 0.5028, "step": 39020 }, { "epoch": 2.185071116586404, "grad_norm": 1.3535009622573853, "learning_rate": 9.499894736842106e-05, "loss": 0.4763, "step": 39021 }, { "epoch": 2.185127113898533, "grad_norm": 3.0489447116851807, "learning_rate": 9.499868421052632e-05, "loss": 0.38, "step": 39022 }, { "epoch": 2.185183111210662, "grad_norm": 1.338590383529663, "learning_rate": 9.499842105263158e-05, "loss": 0.3659, "step": 39023 }, { "epoch": 2.185239108522791, "grad_norm": 1.6380319595336914, "learning_rate": 9.499815789473685e-05, "loss": 0.3535, "step": 39024 }, { "epoch": 2.18529510583492, "grad_norm": 1.4939478635787964, "learning_rate": 9.499789473684211e-05, "loss": 0.3089, "step": 39025 }, { "epoch": 2.185351103147049, "grad_norm": 1.0415600538253784, "learning_rate": 9.499763157894738e-05, "loss": 0.3007, "step": 39026 }, { "epoch": 2.185407100459178, "grad_norm": 1.3063600063323975, "learning_rate": 9.499736842105263e-05, "loss": 0.3285, "step": 39027 }, { "epoch": 2.185463097771307, "grad_norm": 1.5945566892623901, "learning_rate": 9.49971052631579e-05, "loss": 0.5552, "step": 39028 }, { "epoch": 2.185519095083436, "grad_norm": 1.6054874658584595, "learning_rate": 9.499684210526316e-05, "loss": 0.4378, "step": 39029 }, { "epoch": 2.185575092395565, "grad_norm": 1.6225864887237549, "learning_rate": 9.499657894736844e-05, "loss": 0.4031, "step": 39030 }, { "epoch": 2.185631089707694, "grad_norm": 1.2703971862792969, "learning_rate": 9.499631578947368e-05, "loss": 0.413, "step": 39031 }, { "epoch": 2.185687087019823, "grad_norm": 0.9448586702346802, "learning_rate": 9.499605263157894e-05, "loss": 0.3488, "step": 39032 }, { "epoch": 2.185743084331952, "grad_norm": 1.1336658000946045, "learning_rate": 9.499578947368422e-05, "loss": 0.3389, "step": 39033 }, { "epoch": 2.185799081644081, "grad_norm": 1.075467586517334, "learning_rate": 9.499552631578947e-05, "loss": 0.3926, "step": 39034 }, { "epoch": 2.18585507895621, "grad_norm": 1.138867974281311, "learning_rate": 9.499526315789475e-05, "loss": 0.3904, "step": 39035 }, { "epoch": 2.185911076268339, "grad_norm": 1.709425687789917, "learning_rate": 9.4995e-05, "loss": 0.5505, "step": 39036 }, { "epoch": 2.185967073580468, "grad_norm": 1.1988377571105957, "learning_rate": 9.499473684210527e-05, "loss": 0.3754, "step": 39037 }, { "epoch": 2.186023070892597, "grad_norm": 1.3486446142196655, "learning_rate": 9.499447368421053e-05, "loss": 0.4484, "step": 39038 }, { "epoch": 2.186079068204726, "grad_norm": 1.1207929849624634, "learning_rate": 9.49942105263158e-05, "loss": 0.35, "step": 39039 }, { "epoch": 2.186135065516855, "grad_norm": 1.3796926736831665, "learning_rate": 9.499394736842106e-05, "loss": 0.4969, "step": 39040 }, { "epoch": 2.1861910628289842, "grad_norm": 1.2409188747406006, "learning_rate": 9.499368421052632e-05, "loss": 0.5177, "step": 39041 }, { "epoch": 2.1862470601411133, "grad_norm": 1.2011913061141968, "learning_rate": 9.499342105263158e-05, "loss": 0.3566, "step": 39042 }, { "epoch": 2.1863030574532423, "grad_norm": 1.0961623191833496, "learning_rate": 9.499315789473685e-05, "loss": 0.4385, "step": 39043 }, { "epoch": 2.1863590547653713, "grad_norm": 1.2823792695999146, "learning_rate": 9.499289473684211e-05, "loss": 0.3847, "step": 39044 }, { "epoch": 2.1864150520775003, "grad_norm": 1.1282930374145508, "learning_rate": 9.499263157894737e-05, "loss": 0.3827, "step": 39045 }, { "epoch": 2.1864710493896293, "grad_norm": 1.1703003644943237, "learning_rate": 9.499236842105263e-05, "loss": 0.4588, "step": 39046 }, { "epoch": 2.1865270467017583, "grad_norm": 1.125441551208496, "learning_rate": 9.49921052631579e-05, "loss": 0.3284, "step": 39047 }, { "epoch": 2.1865830440138874, "grad_norm": 1.1358660459518433, "learning_rate": 9.499184210526317e-05, "loss": 0.4166, "step": 39048 }, { "epoch": 2.1866390413260164, "grad_norm": 1.1321810483932495, "learning_rate": 9.499157894736842e-05, "loss": 0.3167, "step": 39049 }, { "epoch": 2.1866950386381454, "grad_norm": 1.1149832010269165, "learning_rate": 9.499131578947368e-05, "loss": 0.3864, "step": 39050 }, { "epoch": 2.1867510359502744, "grad_norm": 1.2042311429977417, "learning_rate": 9.499105263157894e-05, "loss": 0.4259, "step": 39051 }, { "epoch": 2.1868070332624034, "grad_norm": 1.3041541576385498, "learning_rate": 9.499078947368422e-05, "loss": 0.3633, "step": 39052 }, { "epoch": 2.1868630305745325, "grad_norm": 1.0885785818099976, "learning_rate": 9.499052631578948e-05, "loss": 0.2723, "step": 39053 }, { "epoch": 2.1869190278866615, "grad_norm": 1.215593695640564, "learning_rate": 9.499026315789474e-05, "loss": 0.3927, "step": 39054 }, { "epoch": 2.1869750251987905, "grad_norm": 1.653286099433899, "learning_rate": 9.499e-05, "loss": 0.5376, "step": 39055 }, { "epoch": 2.1870310225109195, "grad_norm": 1.3529754877090454, "learning_rate": 9.498973684210527e-05, "loss": 0.4994, "step": 39056 }, { "epoch": 2.1870870198230485, "grad_norm": 1.3690624237060547, "learning_rate": 9.498947368421053e-05, "loss": 0.4035, "step": 39057 }, { "epoch": 2.1871430171351776, "grad_norm": 1.157879114151001, "learning_rate": 9.49892105263158e-05, "loss": 0.3911, "step": 39058 }, { "epoch": 2.1871990144473066, "grad_norm": 1.4271876811981201, "learning_rate": 9.498894736842105e-05, "loss": 0.4178, "step": 39059 }, { "epoch": 2.1872550117594356, "grad_norm": 1.2080905437469482, "learning_rate": 9.498868421052632e-05, "loss": 0.356, "step": 39060 }, { "epoch": 2.1873110090715646, "grad_norm": 1.517770767211914, "learning_rate": 9.498842105263158e-05, "loss": 0.5933, "step": 39061 }, { "epoch": 2.1873670063836936, "grad_norm": 1.3313344717025757, "learning_rate": 9.498815789473686e-05, "loss": 0.3613, "step": 39062 }, { "epoch": 2.1874230036958227, "grad_norm": 1.301638126373291, "learning_rate": 9.498789473684212e-05, "loss": 0.5225, "step": 39063 }, { "epoch": 2.1874790010079517, "grad_norm": 1.6095978021621704, "learning_rate": 9.498763157894738e-05, "loss": 0.5183, "step": 39064 }, { "epoch": 2.1875349983200807, "grad_norm": 1.1433076858520508, "learning_rate": 9.498736842105263e-05, "loss": 0.348, "step": 39065 }, { "epoch": 2.1875909956322097, "grad_norm": 1.3681985139846802, "learning_rate": 9.49871052631579e-05, "loss": 0.4488, "step": 39066 }, { "epoch": 2.1876469929443387, "grad_norm": 1.0436224937438965, "learning_rate": 9.498684210526317e-05, "loss": 0.3897, "step": 39067 }, { "epoch": 2.1877029902564677, "grad_norm": 1.19947350025177, "learning_rate": 9.498657894736843e-05, "loss": 0.5267, "step": 39068 }, { "epoch": 2.1877589875685968, "grad_norm": 1.2829921245574951, "learning_rate": 9.498631578947369e-05, "loss": 0.4798, "step": 39069 }, { "epoch": 2.187814984880726, "grad_norm": 1.0748436450958252, "learning_rate": 9.498605263157895e-05, "loss": 0.3202, "step": 39070 }, { "epoch": 2.187870982192855, "grad_norm": 1.782092809677124, "learning_rate": 9.498578947368422e-05, "loss": 0.4477, "step": 39071 }, { "epoch": 2.187926979504984, "grad_norm": 1.437738299369812, "learning_rate": 9.498552631578948e-05, "loss": 0.5512, "step": 39072 }, { "epoch": 2.187982976817113, "grad_norm": 1.4653900861740112, "learning_rate": 9.498526315789474e-05, "loss": 0.5765, "step": 39073 }, { "epoch": 2.188038974129242, "grad_norm": 1.160523772239685, "learning_rate": 9.4985e-05, "loss": 0.4265, "step": 39074 }, { "epoch": 2.188094971441371, "grad_norm": 1.1257492303848267, "learning_rate": 9.498473684210527e-05, "loss": 0.3653, "step": 39075 }, { "epoch": 2.1881509687535, "grad_norm": 1.2313061952590942, "learning_rate": 9.498447368421053e-05, "loss": 0.4876, "step": 39076 }, { "epoch": 2.188206966065629, "grad_norm": 1.2742465734481812, "learning_rate": 9.498421052631579e-05, "loss": 0.4261, "step": 39077 }, { "epoch": 2.188262963377758, "grad_norm": 1.4065688848495483, "learning_rate": 9.498394736842105e-05, "loss": 0.5457, "step": 39078 }, { "epoch": 2.188318960689887, "grad_norm": 1.0316917896270752, "learning_rate": 9.498368421052633e-05, "loss": 0.387, "step": 39079 }, { "epoch": 2.188374958002016, "grad_norm": 1.0288453102111816, "learning_rate": 9.498342105263158e-05, "loss": 0.3717, "step": 39080 }, { "epoch": 2.188430955314145, "grad_norm": 1.3102045059204102, "learning_rate": 9.498315789473686e-05, "loss": 0.4779, "step": 39081 }, { "epoch": 2.188486952626274, "grad_norm": 1.369468092918396, "learning_rate": 9.49828947368421e-05, "loss": 0.45, "step": 39082 }, { "epoch": 2.188542949938403, "grad_norm": 1.1277834177017212, "learning_rate": 9.498263157894736e-05, "loss": 0.2852, "step": 39083 }, { "epoch": 2.188598947250532, "grad_norm": 1.4799185991287231, "learning_rate": 9.498236842105264e-05, "loss": 0.4513, "step": 39084 }, { "epoch": 2.188654944562661, "grad_norm": 1.1144607067108154, "learning_rate": 9.49821052631579e-05, "loss": 0.3621, "step": 39085 }, { "epoch": 2.18871094187479, "grad_norm": 1.2686020135879517, "learning_rate": 9.498184210526316e-05, "loss": 0.4895, "step": 39086 }, { "epoch": 2.188766939186919, "grad_norm": 0.9396287202835083, "learning_rate": 9.498157894736842e-05, "loss": 0.3632, "step": 39087 }, { "epoch": 2.188822936499048, "grad_norm": 1.519822120666504, "learning_rate": 9.498131578947369e-05, "loss": 0.4151, "step": 39088 }, { "epoch": 2.188878933811177, "grad_norm": 1.1211975812911987, "learning_rate": 9.498105263157895e-05, "loss": 0.3343, "step": 39089 }, { "epoch": 2.188934931123306, "grad_norm": 1.1856366395950317, "learning_rate": 9.498078947368422e-05, "loss": 0.3753, "step": 39090 }, { "epoch": 2.188990928435435, "grad_norm": 1.2592588663101196, "learning_rate": 9.498052631578947e-05, "loss": 0.575, "step": 39091 }, { "epoch": 2.189046925747564, "grad_norm": 1.1531543731689453, "learning_rate": 9.498026315789474e-05, "loss": 0.3017, "step": 39092 }, { "epoch": 2.1891029230596932, "grad_norm": 1.2448511123657227, "learning_rate": 9.498e-05, "loss": 0.4248, "step": 39093 }, { "epoch": 2.1891589203718222, "grad_norm": 1.5831552743911743, "learning_rate": 9.497973684210528e-05, "loss": 0.5818, "step": 39094 }, { "epoch": 2.1892149176839513, "grad_norm": 1.2447991371154785, "learning_rate": 9.497947368421054e-05, "loss": 0.3859, "step": 39095 }, { "epoch": 2.1892709149960803, "grad_norm": 1.3265899419784546, "learning_rate": 9.49792105263158e-05, "loss": 0.4058, "step": 39096 }, { "epoch": 2.1893269123082093, "grad_norm": 1.16269850730896, "learning_rate": 9.497894736842105e-05, "loss": 0.4267, "step": 39097 }, { "epoch": 2.1893829096203383, "grad_norm": 1.2243558168411255, "learning_rate": 9.497868421052633e-05, "loss": 0.3839, "step": 39098 }, { "epoch": 2.1894389069324673, "grad_norm": 1.1689585447311401, "learning_rate": 9.497842105263159e-05, "loss": 0.4997, "step": 39099 }, { "epoch": 2.1894949042445964, "grad_norm": 1.2064203023910522, "learning_rate": 9.497815789473685e-05, "loss": 0.3026, "step": 39100 }, { "epoch": 2.1895509015567254, "grad_norm": 1.153178334236145, "learning_rate": 9.497789473684211e-05, "loss": 0.4539, "step": 39101 }, { "epoch": 2.1896068988688544, "grad_norm": 1.1924258470535278, "learning_rate": 9.497763157894737e-05, "loss": 0.4493, "step": 39102 }, { "epoch": 2.1896628961809834, "grad_norm": 1.712356448173523, "learning_rate": 9.497736842105264e-05, "loss": 0.389, "step": 39103 }, { "epoch": 2.1897188934931124, "grad_norm": 1.312074065208435, "learning_rate": 9.49771052631579e-05, "loss": 0.3904, "step": 39104 }, { "epoch": 2.1897748908052415, "grad_norm": 1.186565637588501, "learning_rate": 9.497684210526316e-05, "loss": 0.466, "step": 39105 }, { "epoch": 2.1898308881173705, "grad_norm": 1.7509232759475708, "learning_rate": 9.497657894736842e-05, "loss": 0.5128, "step": 39106 }, { "epoch": 2.1898868854294995, "grad_norm": 1.4431231021881104, "learning_rate": 9.497631578947369e-05, "loss": 0.5513, "step": 39107 }, { "epoch": 2.1899428827416285, "grad_norm": 1.2687747478485107, "learning_rate": 9.497605263157895e-05, "loss": 0.3674, "step": 39108 }, { "epoch": 2.1899988800537575, "grad_norm": 1.2940678596496582, "learning_rate": 9.497578947368421e-05, "loss": 0.4532, "step": 39109 }, { "epoch": 2.1900548773658866, "grad_norm": 1.352541208267212, "learning_rate": 9.497552631578947e-05, "loss": 0.4748, "step": 39110 }, { "epoch": 2.1901108746780156, "grad_norm": 1.1367441415786743, "learning_rate": 9.497526315789474e-05, "loss": 0.3074, "step": 39111 }, { "epoch": 2.1901668719901446, "grad_norm": 1.5073899030685425, "learning_rate": 9.4975e-05, "loss": 0.5365, "step": 39112 }, { "epoch": 2.1902228693022736, "grad_norm": 1.1930011510849, "learning_rate": 9.497473684210528e-05, "loss": 0.3782, "step": 39113 }, { "epoch": 2.1902788666144026, "grad_norm": 1.1974886655807495, "learning_rate": 9.497447368421052e-05, "loss": 0.519, "step": 39114 }, { "epoch": 2.1903348639265316, "grad_norm": 1.2613402605056763, "learning_rate": 9.49742105263158e-05, "loss": 0.3358, "step": 39115 }, { "epoch": 2.1903908612386607, "grad_norm": 1.0909866094589233, "learning_rate": 9.497394736842106e-05, "loss": 0.3243, "step": 39116 }, { "epoch": 2.1904468585507897, "grad_norm": 1.2143921852111816, "learning_rate": 9.497368421052633e-05, "loss": 0.5245, "step": 39117 }, { "epoch": 2.1905028558629187, "grad_norm": 1.3832098245620728, "learning_rate": 9.497342105263159e-05, "loss": 0.4174, "step": 39118 }, { "epoch": 2.1905588531750477, "grad_norm": 1.1252905130386353, "learning_rate": 9.497315789473684e-05, "loss": 0.3326, "step": 39119 }, { "epoch": 2.1906148504871767, "grad_norm": 1.20613431930542, "learning_rate": 9.497289473684211e-05, "loss": 0.4339, "step": 39120 }, { "epoch": 2.1906708477993058, "grad_norm": 1.369518756866455, "learning_rate": 9.497263157894737e-05, "loss": 0.4737, "step": 39121 }, { "epoch": 2.190726845111435, "grad_norm": 1.3958098888397217, "learning_rate": 9.497236842105264e-05, "loss": 0.3378, "step": 39122 }, { "epoch": 2.190782842423564, "grad_norm": 1.2123152017593384, "learning_rate": 9.49721052631579e-05, "loss": 0.3848, "step": 39123 }, { "epoch": 2.190838839735693, "grad_norm": 1.1625896692276, "learning_rate": 9.497184210526316e-05, "loss": 0.405, "step": 39124 }, { "epoch": 2.190894837047822, "grad_norm": 1.1821825504302979, "learning_rate": 9.497157894736842e-05, "loss": 0.4239, "step": 39125 }, { "epoch": 2.190950834359951, "grad_norm": 1.431579828262329, "learning_rate": 9.49713157894737e-05, "loss": 0.3814, "step": 39126 }, { "epoch": 2.19100683167208, "grad_norm": 1.3663723468780518, "learning_rate": 9.497105263157895e-05, "loss": 0.5754, "step": 39127 }, { "epoch": 2.191062828984209, "grad_norm": 1.3225445747375488, "learning_rate": 9.497078947368421e-05, "loss": 0.4511, "step": 39128 }, { "epoch": 2.191118826296338, "grad_norm": 1.0950003862380981, "learning_rate": 9.497052631578947e-05, "loss": 0.3255, "step": 39129 }, { "epoch": 2.191174823608467, "grad_norm": 1.2019085884094238, "learning_rate": 9.497026315789475e-05, "loss": 0.4056, "step": 39130 }, { "epoch": 2.191230820920596, "grad_norm": 1.0218220949172974, "learning_rate": 9.497000000000001e-05, "loss": 0.2836, "step": 39131 }, { "epoch": 2.191286818232725, "grad_norm": 1.5577071905136108, "learning_rate": 9.496973684210527e-05, "loss": 0.3909, "step": 39132 }, { "epoch": 2.191342815544854, "grad_norm": 1.2853047847747803, "learning_rate": 9.496947368421053e-05, "loss": 0.3458, "step": 39133 }, { "epoch": 2.191398812856983, "grad_norm": 1.121080756187439, "learning_rate": 9.49692105263158e-05, "loss": 0.394, "step": 39134 }, { "epoch": 2.191454810169112, "grad_norm": 1.3960609436035156, "learning_rate": 9.496894736842106e-05, "loss": 0.433, "step": 39135 }, { "epoch": 2.191510807481241, "grad_norm": 1.69021737575531, "learning_rate": 9.496868421052632e-05, "loss": 0.3527, "step": 39136 }, { "epoch": 2.19156680479337, "grad_norm": 1.6987029314041138, "learning_rate": 9.496842105263158e-05, "loss": 0.4539, "step": 39137 }, { "epoch": 2.191622802105499, "grad_norm": 1.0741510391235352, "learning_rate": 9.496815789473684e-05, "loss": 0.3644, "step": 39138 }, { "epoch": 2.191678799417628, "grad_norm": 1.169575810432434, "learning_rate": 9.496789473684211e-05, "loss": 0.3835, "step": 39139 }, { "epoch": 2.191734796729757, "grad_norm": 1.0368984937667847, "learning_rate": 9.496763157894737e-05, "loss": 0.3543, "step": 39140 }, { "epoch": 2.191790794041886, "grad_norm": 1.200516939163208, "learning_rate": 9.496736842105263e-05, "loss": 0.3347, "step": 39141 }, { "epoch": 2.191846791354015, "grad_norm": 1.1778619289398193, "learning_rate": 9.496710526315789e-05, "loss": 0.5514, "step": 39142 }, { "epoch": 2.191902788666144, "grad_norm": 1.0610679388046265, "learning_rate": 9.496684210526316e-05, "loss": 0.4898, "step": 39143 }, { "epoch": 2.191958785978273, "grad_norm": 1.0308139324188232, "learning_rate": 9.496657894736842e-05, "loss": 0.3861, "step": 39144 }, { "epoch": 2.1920147832904022, "grad_norm": 1.3206017017364502, "learning_rate": 9.49663157894737e-05, "loss": 0.4364, "step": 39145 }, { "epoch": 2.1920707806025312, "grad_norm": 1.1329728364944458, "learning_rate": 9.496605263157894e-05, "loss": 0.4449, "step": 39146 }, { "epoch": 2.1921267779146603, "grad_norm": 1.07108736038208, "learning_rate": 9.496578947368422e-05, "loss": 0.4644, "step": 39147 }, { "epoch": 2.1921827752267893, "grad_norm": 5.464520454406738, "learning_rate": 9.496552631578948e-05, "loss": 0.3794, "step": 39148 }, { "epoch": 2.1922387725389183, "grad_norm": 1.1187776327133179, "learning_rate": 9.496526315789475e-05, "loss": 0.3259, "step": 39149 }, { "epoch": 2.1922947698510473, "grad_norm": 1.1216533184051514, "learning_rate": 9.496500000000001e-05, "loss": 0.3556, "step": 39150 }, { "epoch": 2.1923507671631763, "grad_norm": 1.1532158851623535, "learning_rate": 9.496473684210527e-05, "loss": 0.4255, "step": 39151 }, { "epoch": 2.1924067644753054, "grad_norm": 0.9703332185745239, "learning_rate": 9.496447368421053e-05, "loss": 0.3977, "step": 39152 }, { "epoch": 2.1924627617874344, "grad_norm": 1.062602162361145, "learning_rate": 9.496421052631579e-05, "loss": 0.3796, "step": 39153 }, { "epoch": 2.1925187590995634, "grad_norm": 1.2606024742126465, "learning_rate": 9.496394736842106e-05, "loss": 0.4109, "step": 39154 }, { "epoch": 2.1925747564116924, "grad_norm": 1.25972580909729, "learning_rate": 9.496368421052632e-05, "loss": 0.4907, "step": 39155 }, { "epoch": 2.1926307537238214, "grad_norm": 1.2315218448638916, "learning_rate": 9.496342105263158e-05, "loss": 0.3272, "step": 39156 }, { "epoch": 2.1926867510359505, "grad_norm": 1.318179965019226, "learning_rate": 9.496315789473684e-05, "loss": 0.3284, "step": 39157 }, { "epoch": 2.1927427483480795, "grad_norm": 1.233273983001709, "learning_rate": 9.496289473684211e-05, "loss": 0.3327, "step": 39158 }, { "epoch": 2.1927987456602085, "grad_norm": 1.1989058256149292, "learning_rate": 9.496263157894737e-05, "loss": 0.3619, "step": 39159 }, { "epoch": 2.1928547429723375, "grad_norm": 1.437318205833435, "learning_rate": 9.496236842105263e-05, "loss": 0.3528, "step": 39160 }, { "epoch": 2.1929107402844665, "grad_norm": 1.3835985660552979, "learning_rate": 9.49621052631579e-05, "loss": 0.5303, "step": 39161 }, { "epoch": 2.1929667375965956, "grad_norm": 1.2616150379180908, "learning_rate": 9.496184210526317e-05, "loss": 0.3978, "step": 39162 }, { "epoch": 2.1930227349087246, "grad_norm": 1.1376595497131348, "learning_rate": 9.496157894736843e-05, "loss": 0.3847, "step": 39163 }, { "epoch": 2.1930787322208536, "grad_norm": 1.135696530342102, "learning_rate": 9.496131578947369e-05, "loss": 0.4026, "step": 39164 }, { "epoch": 2.1931347295329826, "grad_norm": 1.1316195726394653, "learning_rate": 9.496105263157895e-05, "loss": 0.3641, "step": 39165 }, { "epoch": 2.1931907268451116, "grad_norm": 1.1594762802124023, "learning_rate": 9.496078947368422e-05, "loss": 0.5395, "step": 39166 }, { "epoch": 2.1932467241572406, "grad_norm": 1.3209244012832642, "learning_rate": 9.496052631578948e-05, "loss": 0.4982, "step": 39167 }, { "epoch": 2.1933027214693697, "grad_norm": 1.1222604513168335, "learning_rate": 9.496026315789475e-05, "loss": 0.4387, "step": 39168 }, { "epoch": 2.1933587187814987, "grad_norm": 1.0970358848571777, "learning_rate": 9.496e-05, "loss": 0.4285, "step": 39169 }, { "epoch": 2.1934147160936277, "grad_norm": 0.9503394961357117, "learning_rate": 9.495973684210526e-05, "loss": 0.3276, "step": 39170 }, { "epoch": 2.1934707134057567, "grad_norm": 1.6275936365127563, "learning_rate": 9.495947368421053e-05, "loss": 0.401, "step": 39171 }, { "epoch": 2.1935267107178857, "grad_norm": 1.111403226852417, "learning_rate": 9.495921052631579e-05, "loss": 0.3822, "step": 39172 }, { "epoch": 2.1935827080300148, "grad_norm": 1.0329989194869995, "learning_rate": 9.495894736842106e-05, "loss": 0.4374, "step": 39173 }, { "epoch": 2.193638705342144, "grad_norm": 1.0841001272201538, "learning_rate": 9.495868421052631e-05, "loss": 0.4015, "step": 39174 }, { "epoch": 2.193694702654273, "grad_norm": 1.3559916019439697, "learning_rate": 9.495842105263158e-05, "loss": 0.5634, "step": 39175 }, { "epoch": 2.193750699966402, "grad_norm": 1.3796883821487427, "learning_rate": 9.495815789473684e-05, "loss": 0.4402, "step": 39176 }, { "epoch": 2.193806697278531, "grad_norm": 1.0877370834350586, "learning_rate": 9.495789473684212e-05, "loss": 0.3181, "step": 39177 }, { "epoch": 2.19386269459066, "grad_norm": 1.078378677368164, "learning_rate": 9.495763157894738e-05, "loss": 0.3726, "step": 39178 }, { "epoch": 2.1939186919027884, "grad_norm": 1.0643137693405151, "learning_rate": 9.495736842105264e-05, "loss": 0.3799, "step": 39179 }, { "epoch": 2.193974689214918, "grad_norm": 1.2806888818740845, "learning_rate": 9.49571052631579e-05, "loss": 0.39, "step": 39180 }, { "epoch": 2.1940306865270465, "grad_norm": 1.2856566905975342, "learning_rate": 9.495684210526317e-05, "loss": 0.3795, "step": 39181 }, { "epoch": 2.194086683839176, "grad_norm": 1.1614986658096313, "learning_rate": 9.495657894736843e-05, "loss": 0.3582, "step": 39182 }, { "epoch": 2.1941426811513045, "grad_norm": 1.077746868133545, "learning_rate": 9.495631578947369e-05, "loss": 0.3703, "step": 39183 }, { "epoch": 2.194198678463434, "grad_norm": 1.4261763095855713, "learning_rate": 9.495605263157895e-05, "loss": 0.4189, "step": 39184 }, { "epoch": 2.1942546757755625, "grad_norm": 1.3532723188400269, "learning_rate": 9.495578947368422e-05, "loss": 0.4273, "step": 39185 }, { "epoch": 2.194310673087692, "grad_norm": 1.1887974739074707, "learning_rate": 9.495552631578948e-05, "loss": 0.3335, "step": 39186 }, { "epoch": 2.1943666703998206, "grad_norm": 1.2685467004776, "learning_rate": 9.495526315789474e-05, "loss": 0.4662, "step": 39187 }, { "epoch": 2.19442266771195, "grad_norm": 0.9770505428314209, "learning_rate": 9.4955e-05, "loss": 0.3361, "step": 39188 }, { "epoch": 2.1944786650240786, "grad_norm": 1.0768907070159912, "learning_rate": 9.495473684210526e-05, "loss": 0.4428, "step": 39189 }, { "epoch": 2.194534662336208, "grad_norm": 1.177499771118164, "learning_rate": 9.495447368421053e-05, "loss": 0.5098, "step": 39190 }, { "epoch": 2.1945906596483367, "grad_norm": 1.2610516548156738, "learning_rate": 9.49542105263158e-05, "loss": 0.4874, "step": 39191 }, { "epoch": 2.194646656960466, "grad_norm": 1.0482573509216309, "learning_rate": 9.495394736842105e-05, "loss": 0.4516, "step": 39192 }, { "epoch": 2.1947026542725947, "grad_norm": 1.1462467908859253, "learning_rate": 9.495368421052631e-05, "loss": 0.3628, "step": 39193 }, { "epoch": 2.194758651584724, "grad_norm": 1.2358667850494385, "learning_rate": 9.495342105263159e-05, "loss": 0.4096, "step": 39194 }, { "epoch": 2.1948146488968527, "grad_norm": 1.7542204856872559, "learning_rate": 9.495315789473685e-05, "loss": 0.5005, "step": 39195 }, { "epoch": 2.194870646208982, "grad_norm": 1.1441692113876343, "learning_rate": 9.49528947368421e-05, "loss": 0.3844, "step": 39196 }, { "epoch": 2.1949266435211108, "grad_norm": 1.5919365882873535, "learning_rate": 9.495263157894737e-05, "loss": 0.4196, "step": 39197 }, { "epoch": 2.1949826408332402, "grad_norm": 1.2509034872055054, "learning_rate": 9.495236842105264e-05, "loss": 0.4728, "step": 39198 }, { "epoch": 2.195038638145369, "grad_norm": 1.468296766281128, "learning_rate": 9.49521052631579e-05, "loss": 0.5532, "step": 39199 }, { "epoch": 2.195094635457498, "grad_norm": 1.3390840291976929, "learning_rate": 9.495184210526317e-05, "loss": 0.403, "step": 39200 }, { "epoch": 2.195150632769627, "grad_norm": 1.035071611404419, "learning_rate": 9.495157894736842e-05, "loss": 0.4697, "step": 39201 }, { "epoch": 2.195206630081756, "grad_norm": 1.354354739189148, "learning_rate": 9.495131578947369e-05, "loss": 0.4857, "step": 39202 }, { "epoch": 2.195262627393885, "grad_norm": 1.4106533527374268, "learning_rate": 9.495105263157895e-05, "loss": 0.4241, "step": 39203 }, { "epoch": 2.195318624706014, "grad_norm": 1.2615022659301758, "learning_rate": 9.495078947368421e-05, "loss": 0.5512, "step": 39204 }, { "epoch": 2.195374622018143, "grad_norm": 1.1615214347839355, "learning_rate": 9.495052631578948e-05, "loss": 0.2905, "step": 39205 }, { "epoch": 2.195430619330272, "grad_norm": 1.1964410543441772, "learning_rate": 9.495026315789473e-05, "loss": 0.2503, "step": 39206 }, { "epoch": 2.195486616642401, "grad_norm": 0.9163525700569153, "learning_rate": 9.495e-05, "loss": 0.305, "step": 39207 }, { "epoch": 2.19554261395453, "grad_norm": 1.2203083038330078, "learning_rate": 9.494973684210526e-05, "loss": 0.3645, "step": 39208 }, { "epoch": 2.195598611266659, "grad_norm": 0.9435203671455383, "learning_rate": 9.494947368421054e-05, "loss": 0.3135, "step": 39209 }, { "epoch": 2.195654608578788, "grad_norm": 1.3386194705963135, "learning_rate": 9.49492105263158e-05, "loss": 0.5953, "step": 39210 }, { "epoch": 2.195710605890917, "grad_norm": 1.1914485692977905, "learning_rate": 9.494894736842106e-05, "loss": 0.5013, "step": 39211 }, { "epoch": 2.195766603203046, "grad_norm": 1.083156943321228, "learning_rate": 9.494868421052632e-05, "loss": 0.2811, "step": 39212 }, { "epoch": 2.195822600515175, "grad_norm": 1.305131435394287, "learning_rate": 9.494842105263159e-05, "loss": 0.4749, "step": 39213 }, { "epoch": 2.195878597827304, "grad_norm": 1.2930506467819214, "learning_rate": 9.494815789473685e-05, "loss": 0.3796, "step": 39214 }, { "epoch": 2.195934595139433, "grad_norm": 1.2020272016525269, "learning_rate": 9.494789473684211e-05, "loss": 0.4032, "step": 39215 }, { "epoch": 2.195990592451562, "grad_norm": 1.2056829929351807, "learning_rate": 9.494763157894737e-05, "loss": 0.4452, "step": 39216 }, { "epoch": 2.196046589763691, "grad_norm": 1.2149534225463867, "learning_rate": 9.494736842105264e-05, "loss": 0.3319, "step": 39217 }, { "epoch": 2.19610258707582, "grad_norm": 1.3055657148361206, "learning_rate": 9.49471052631579e-05, "loss": 0.5154, "step": 39218 }, { "epoch": 2.196158584387949, "grad_norm": 1.0483262538909912, "learning_rate": 9.494684210526316e-05, "loss": 0.3424, "step": 39219 }, { "epoch": 2.196214581700078, "grad_norm": 1.2173939943313599, "learning_rate": 9.494657894736842e-05, "loss": 0.3585, "step": 39220 }, { "epoch": 2.1962705790122072, "grad_norm": 1.94076406955719, "learning_rate": 9.494631578947368e-05, "loss": 0.4395, "step": 39221 }, { "epoch": 2.1963265763243363, "grad_norm": 1.204633355140686, "learning_rate": 9.494605263157895e-05, "loss": 0.318, "step": 39222 }, { "epoch": 2.1963825736364653, "grad_norm": 1.2658069133758545, "learning_rate": 9.494578947368421e-05, "loss": 0.428, "step": 39223 }, { "epoch": 2.1964385709485943, "grad_norm": 0.978947103023529, "learning_rate": 9.494552631578947e-05, "loss": 0.3905, "step": 39224 }, { "epoch": 2.1964945682607233, "grad_norm": 1.4354674816131592, "learning_rate": 9.494526315789473e-05, "loss": 0.6673, "step": 39225 }, { "epoch": 2.1965505655728523, "grad_norm": 1.1528240442276, "learning_rate": 9.4945e-05, "loss": 0.4348, "step": 39226 }, { "epoch": 2.1966065628849814, "grad_norm": 1.365470290184021, "learning_rate": 9.494473684210527e-05, "loss": 0.4295, "step": 39227 }, { "epoch": 2.1966625601971104, "grad_norm": 1.1184728145599365, "learning_rate": 9.494447368421054e-05, "loss": 0.3364, "step": 39228 }, { "epoch": 2.1967185575092394, "grad_norm": 1.0508556365966797, "learning_rate": 9.494421052631579e-05, "loss": 0.3661, "step": 39229 }, { "epoch": 2.1967745548213684, "grad_norm": 1.1564157009124756, "learning_rate": 9.494394736842106e-05, "loss": 0.3553, "step": 39230 }, { "epoch": 2.1968305521334974, "grad_norm": 1.2126152515411377, "learning_rate": 9.494368421052632e-05, "loss": 0.3258, "step": 39231 }, { "epoch": 2.1968865494456264, "grad_norm": 1.1503323316574097, "learning_rate": 9.494342105263159e-05, "loss": 0.3937, "step": 39232 }, { "epoch": 2.1969425467577555, "grad_norm": 1.0257138013839722, "learning_rate": 9.494315789473684e-05, "loss": 0.5778, "step": 39233 }, { "epoch": 2.1969985440698845, "grad_norm": 1.0747004747390747, "learning_rate": 9.494289473684211e-05, "loss": 0.38, "step": 39234 }, { "epoch": 2.1970545413820135, "grad_norm": 1.3167614936828613, "learning_rate": 9.494263157894737e-05, "loss": 0.3817, "step": 39235 }, { "epoch": 2.1971105386941425, "grad_norm": 1.3896008729934692, "learning_rate": 9.494236842105264e-05, "loss": 0.3847, "step": 39236 }, { "epoch": 2.1971665360062715, "grad_norm": 1.2476284503936768, "learning_rate": 9.49421052631579e-05, "loss": 0.3669, "step": 39237 }, { "epoch": 2.1972225333184006, "grad_norm": 1.0637397766113281, "learning_rate": 9.494184210526316e-05, "loss": 0.3385, "step": 39238 }, { "epoch": 2.1972785306305296, "grad_norm": 1.0601918697357178, "learning_rate": 9.494157894736842e-05, "loss": 0.3928, "step": 39239 }, { "epoch": 2.1973345279426586, "grad_norm": 1.411753535270691, "learning_rate": 9.494131578947368e-05, "loss": 0.6168, "step": 39240 }, { "epoch": 2.1973905252547876, "grad_norm": 1.4000637531280518, "learning_rate": 9.494105263157896e-05, "loss": 0.4251, "step": 39241 }, { "epoch": 2.1974465225669166, "grad_norm": 1.333798885345459, "learning_rate": 9.494078947368422e-05, "loss": 0.4538, "step": 39242 }, { "epoch": 2.1975025198790457, "grad_norm": 1.1884304285049438, "learning_rate": 9.494052631578948e-05, "loss": 0.4104, "step": 39243 }, { "epoch": 2.1975585171911747, "grad_norm": 1.0855166912078857, "learning_rate": 9.494026315789474e-05, "loss": 0.3587, "step": 39244 }, { "epoch": 2.1976145145033037, "grad_norm": 1.1423125267028809, "learning_rate": 9.494000000000001e-05, "loss": 0.3943, "step": 39245 }, { "epoch": 2.1976705118154327, "grad_norm": 1.1682051420211792, "learning_rate": 9.493973684210527e-05, "loss": 0.4156, "step": 39246 }, { "epoch": 2.1977265091275617, "grad_norm": 1.141829252243042, "learning_rate": 9.493947368421053e-05, "loss": 0.3297, "step": 39247 }, { "epoch": 2.1977825064396908, "grad_norm": 1.6670130491256714, "learning_rate": 9.493921052631579e-05, "loss": 0.3846, "step": 39248 }, { "epoch": 2.1978385037518198, "grad_norm": 2.317133903503418, "learning_rate": 9.493894736842106e-05, "loss": 0.363, "step": 39249 }, { "epoch": 2.197894501063949, "grad_norm": Infinity, "learning_rate": 9.493894736842106e-05, "loss": 0.4071, "step": 39250 }, { "epoch": 2.197950498376078, "grad_norm": 1.3584693670272827, "learning_rate": 9.493868421052632e-05, "loss": 0.4368, "step": 39251 }, { "epoch": 2.198006495688207, "grad_norm": 1.3900644779205322, "learning_rate": 9.493842105263158e-05, "loss": 0.4187, "step": 39252 }, { "epoch": 2.198062493000336, "grad_norm": 1.392337441444397, "learning_rate": 9.493815789473684e-05, "loss": 0.3972, "step": 39253 }, { "epoch": 2.198118490312465, "grad_norm": 1.0016899108886719, "learning_rate": 9.493789473684211e-05, "loss": 0.3765, "step": 39254 }, { "epoch": 2.198174487624594, "grad_norm": 1.286842942237854, "learning_rate": 9.493763157894737e-05, "loss": 0.4873, "step": 39255 }, { "epoch": 2.198230484936723, "grad_norm": 1.3937572240829468, "learning_rate": 9.493736842105265e-05, "loss": 0.4057, "step": 39256 }, { "epoch": 2.198286482248852, "grad_norm": 1.6088981628417969, "learning_rate": 9.49371052631579e-05, "loss": 0.6128, "step": 39257 }, { "epoch": 2.198342479560981, "grad_norm": 1.049758791923523, "learning_rate": 9.493684210526315e-05, "loss": 0.4872, "step": 39258 }, { "epoch": 2.19839847687311, "grad_norm": 2.756300687789917, "learning_rate": 9.493657894736843e-05, "loss": 0.3316, "step": 39259 }, { "epoch": 2.198454474185239, "grad_norm": 0.9867526888847351, "learning_rate": 9.493631578947369e-05, "loss": 0.3947, "step": 39260 }, { "epoch": 2.198510471497368, "grad_norm": 1.358424186706543, "learning_rate": 9.493605263157896e-05, "loss": 0.3495, "step": 39261 }, { "epoch": 2.198566468809497, "grad_norm": 1.1801587343215942, "learning_rate": 9.49357894736842e-05, "loss": 0.3617, "step": 39262 }, { "epoch": 2.198622466121626, "grad_norm": 1.095976710319519, "learning_rate": 9.493552631578948e-05, "loss": 0.4444, "step": 39263 }, { "epoch": 2.198678463433755, "grad_norm": 1.1274425983428955, "learning_rate": 9.493526315789474e-05, "loss": 0.4946, "step": 39264 }, { "epoch": 2.198734460745884, "grad_norm": 1.2692400217056274, "learning_rate": 9.493500000000001e-05, "loss": 0.3954, "step": 39265 }, { "epoch": 2.198790458058013, "grad_norm": 1.3247352838516235, "learning_rate": 9.493473684210527e-05, "loss": 0.3894, "step": 39266 }, { "epoch": 2.198846455370142, "grad_norm": 1.5664595365524292, "learning_rate": 9.493447368421053e-05, "loss": 0.4534, "step": 39267 }, { "epoch": 2.198902452682271, "grad_norm": 1.2559038400650024, "learning_rate": 9.493421052631579e-05, "loss": 0.3604, "step": 39268 }, { "epoch": 2.1989584499944, "grad_norm": 1.5000864267349243, "learning_rate": 9.493394736842106e-05, "loss": 0.4337, "step": 39269 }, { "epoch": 2.199014447306529, "grad_norm": 1.262047529220581, "learning_rate": 9.493368421052632e-05, "loss": 0.3786, "step": 39270 }, { "epoch": 2.199070444618658, "grad_norm": 1.5206862688064575, "learning_rate": 9.493342105263158e-05, "loss": 0.4685, "step": 39271 }, { "epoch": 2.199126441930787, "grad_norm": 1.3647706508636475, "learning_rate": 9.493315789473684e-05, "loss": 0.4278, "step": 39272 }, { "epoch": 2.1991824392429162, "grad_norm": 1.1124950647354126, "learning_rate": 9.493289473684212e-05, "loss": 0.3485, "step": 39273 }, { "epoch": 2.1992384365550453, "grad_norm": 1.4288742542266846, "learning_rate": 9.493263157894738e-05, "loss": 0.4678, "step": 39274 }, { "epoch": 2.1992944338671743, "grad_norm": 1.0784953832626343, "learning_rate": 9.493236842105264e-05, "loss": 0.2871, "step": 39275 }, { "epoch": 2.1993504311793033, "grad_norm": 1.1378778219223022, "learning_rate": 9.49321052631579e-05, "loss": 0.4651, "step": 39276 }, { "epoch": 2.1994064284914323, "grad_norm": 1.417097806930542, "learning_rate": 9.493184210526316e-05, "loss": 0.4562, "step": 39277 }, { "epoch": 2.1994624258035613, "grad_norm": 1.0440655946731567, "learning_rate": 9.493157894736843e-05, "loss": 0.3712, "step": 39278 }, { "epoch": 2.1995184231156903, "grad_norm": 1.160477638244629, "learning_rate": 9.493131578947369e-05, "loss": 0.3591, "step": 39279 }, { "epoch": 2.1995744204278194, "grad_norm": 1.1765644550323486, "learning_rate": 9.493105263157895e-05, "loss": 0.4118, "step": 39280 }, { "epoch": 2.1996304177399484, "grad_norm": 1.2263824939727783, "learning_rate": 9.493078947368421e-05, "loss": 0.4364, "step": 39281 }, { "epoch": 2.1996864150520774, "grad_norm": 1.0986815690994263, "learning_rate": 9.493052631578948e-05, "loss": 0.4, "step": 39282 }, { "epoch": 2.1997424123642064, "grad_norm": 1.3523582220077515, "learning_rate": 9.493026315789474e-05, "loss": 0.4107, "step": 39283 }, { "epoch": 2.1997984096763354, "grad_norm": 1.6746745109558105, "learning_rate": 9.493000000000001e-05, "loss": 0.5522, "step": 39284 }, { "epoch": 2.1998544069884645, "grad_norm": 1.158724308013916, "learning_rate": 9.492973684210526e-05, "loss": 0.3857, "step": 39285 }, { "epoch": 2.1999104043005935, "grad_norm": 1.1654574871063232, "learning_rate": 9.492947368421053e-05, "loss": 0.46, "step": 39286 }, { "epoch": 2.1999664016127225, "grad_norm": 1.2558428049087524, "learning_rate": 9.49292105263158e-05, "loss": 0.4747, "step": 39287 }, { "epoch": 2.2000223989248515, "grad_norm": 1.0895527601242065, "learning_rate": 9.492894736842107e-05, "loss": 0.303, "step": 39288 }, { "epoch": 2.2000783962369805, "grad_norm": 1.1655685901641846, "learning_rate": 9.492868421052631e-05, "loss": 0.3701, "step": 39289 }, { "epoch": 2.2001343935491096, "grad_norm": 1.3279938697814941, "learning_rate": 9.492842105263159e-05, "loss": 0.3684, "step": 39290 }, { "epoch": 2.2001903908612386, "grad_norm": 1.2167469263076782, "learning_rate": 9.492815789473685e-05, "loss": 0.5036, "step": 39291 }, { "epoch": 2.2002463881733676, "grad_norm": 1.102217435836792, "learning_rate": 9.49278947368421e-05, "loss": 0.4345, "step": 39292 }, { "epoch": 2.2003023854854966, "grad_norm": 1.2142664194107056, "learning_rate": 9.492763157894738e-05, "loss": 0.3749, "step": 39293 }, { "epoch": 2.2003583827976256, "grad_norm": 1.1024116277694702, "learning_rate": 9.492736842105263e-05, "loss": 0.2944, "step": 39294 }, { "epoch": 2.2004143801097547, "grad_norm": 1.0679608583450317, "learning_rate": 9.49271052631579e-05, "loss": 0.309, "step": 39295 }, { "epoch": 2.2004703774218837, "grad_norm": 1.1432099342346191, "learning_rate": 9.492684210526316e-05, "loss": 0.47, "step": 39296 }, { "epoch": 2.2005263747340127, "grad_norm": 1.4059308767318726, "learning_rate": 9.492657894736843e-05, "loss": 0.4945, "step": 39297 }, { "epoch": 2.2005823720461417, "grad_norm": 1.4727104902267456, "learning_rate": 9.492631578947369e-05, "loss": 0.498, "step": 39298 }, { "epoch": 2.2006383693582707, "grad_norm": 1.1360934972763062, "learning_rate": 9.492605263157895e-05, "loss": 0.43, "step": 39299 }, { "epoch": 2.2006943666703997, "grad_norm": 1.139660120010376, "learning_rate": 9.492578947368421e-05, "loss": 0.4029, "step": 39300 }, { "epoch": 2.2007503639825288, "grad_norm": 1.252203106880188, "learning_rate": 9.492552631578948e-05, "loss": 0.3405, "step": 39301 }, { "epoch": 2.200806361294658, "grad_norm": 1.2618763446807861, "learning_rate": 9.492526315789474e-05, "loss": 0.4606, "step": 39302 }, { "epoch": 2.200862358606787, "grad_norm": 1.0292749404907227, "learning_rate": 9.4925e-05, "loss": 0.4432, "step": 39303 }, { "epoch": 2.200918355918916, "grad_norm": 1.155129313468933, "learning_rate": 9.492473684210526e-05, "loss": 0.3724, "step": 39304 }, { "epoch": 2.200974353231045, "grad_norm": 1.9602291584014893, "learning_rate": 9.492447368421054e-05, "loss": 0.6007, "step": 39305 }, { "epoch": 2.201030350543174, "grad_norm": 1.5157901048660278, "learning_rate": 9.49242105263158e-05, "loss": 0.5295, "step": 39306 }, { "epoch": 2.201086347855303, "grad_norm": 1.308485746383667, "learning_rate": 9.492394736842106e-05, "loss": 0.2816, "step": 39307 }, { "epoch": 2.201142345167432, "grad_norm": 1.3615050315856934, "learning_rate": 9.492368421052632e-05, "loss": 0.4409, "step": 39308 }, { "epoch": 2.201198342479561, "grad_norm": 1.2001307010650635, "learning_rate": 9.492342105263158e-05, "loss": 0.3931, "step": 39309 }, { "epoch": 2.20125433979169, "grad_norm": 1.1810969114303589, "learning_rate": 9.492315789473685e-05, "loss": 0.3792, "step": 39310 }, { "epoch": 2.201310337103819, "grad_norm": 1.0671933889389038, "learning_rate": 9.492289473684211e-05, "loss": 0.3758, "step": 39311 }, { "epoch": 2.201366334415948, "grad_norm": 1.090793490409851, "learning_rate": 9.492263157894737e-05, "loss": 0.3441, "step": 39312 }, { "epoch": 2.201422331728077, "grad_norm": 1.2792291641235352, "learning_rate": 9.492236842105263e-05, "loss": 0.3195, "step": 39313 }, { "epoch": 2.201478329040206, "grad_norm": 1.1196097135543823, "learning_rate": 9.49221052631579e-05, "loss": 0.3854, "step": 39314 }, { "epoch": 2.201534326352335, "grad_norm": 1.177736759185791, "learning_rate": 9.492184210526316e-05, "loss": 0.3877, "step": 39315 }, { "epoch": 2.201590323664464, "grad_norm": 1.6207741498947144, "learning_rate": 9.492157894736843e-05, "loss": 0.4691, "step": 39316 }, { "epoch": 2.201646320976593, "grad_norm": 1.186977744102478, "learning_rate": 9.492131578947368e-05, "loss": 0.3499, "step": 39317 }, { "epoch": 2.201702318288722, "grad_norm": 1.4378749132156372, "learning_rate": 9.492105263157895e-05, "loss": 0.4954, "step": 39318 }, { "epoch": 2.201758315600851, "grad_norm": 1.0880298614501953, "learning_rate": 9.492078947368421e-05, "loss": 0.3905, "step": 39319 }, { "epoch": 2.20181431291298, "grad_norm": 1.228933334350586, "learning_rate": 9.492052631578949e-05, "loss": 0.4962, "step": 39320 }, { "epoch": 2.201870310225109, "grad_norm": 1.3729842901229858, "learning_rate": 9.492026315789475e-05, "loss": 0.3732, "step": 39321 }, { "epoch": 2.201926307537238, "grad_norm": 1.1341028213500977, "learning_rate": 9.492e-05, "loss": 0.2977, "step": 39322 }, { "epoch": 2.201982304849367, "grad_norm": 1.1909477710723877, "learning_rate": 9.491973684210527e-05, "loss": 0.4542, "step": 39323 }, { "epoch": 2.202038302161496, "grad_norm": 1.1851598024368286, "learning_rate": 9.491947368421054e-05, "loss": 0.3342, "step": 39324 }, { "epoch": 2.2020942994736252, "grad_norm": 1.3212474584579468, "learning_rate": 9.49192105263158e-05, "loss": 0.4274, "step": 39325 }, { "epoch": 2.2021502967857542, "grad_norm": 1.1927757263183594, "learning_rate": 9.491894736842106e-05, "loss": 0.3609, "step": 39326 }, { "epoch": 2.2022062940978833, "grad_norm": 1.2361246347427368, "learning_rate": 9.491868421052632e-05, "loss": 0.3993, "step": 39327 }, { "epoch": 2.2022622914100123, "grad_norm": 1.221638798713684, "learning_rate": 9.491842105263158e-05, "loss": 0.2948, "step": 39328 }, { "epoch": 2.2023182887221413, "grad_norm": 2.991151809692383, "learning_rate": 9.491815789473685e-05, "loss": 0.3586, "step": 39329 }, { "epoch": 2.2023742860342703, "grad_norm": 1.0577765703201294, "learning_rate": 9.491789473684211e-05, "loss": 0.357, "step": 39330 }, { "epoch": 2.2024302833463993, "grad_norm": 1.4361063241958618, "learning_rate": 9.491763157894737e-05, "loss": 0.5449, "step": 39331 }, { "epoch": 2.2024862806585284, "grad_norm": 1.175093412399292, "learning_rate": 9.491736842105263e-05, "loss": 0.4037, "step": 39332 }, { "epoch": 2.2025422779706574, "grad_norm": 1.527953028678894, "learning_rate": 9.49171052631579e-05, "loss": 0.4733, "step": 39333 }, { "epoch": 2.2025982752827864, "grad_norm": 1.089242696762085, "learning_rate": 9.491684210526316e-05, "loss": 0.3371, "step": 39334 }, { "epoch": 2.2026542725949154, "grad_norm": 1.4622660875320435, "learning_rate": 9.491657894736842e-05, "loss": 0.5044, "step": 39335 }, { "epoch": 2.2027102699070444, "grad_norm": 1.4181054830551147, "learning_rate": 9.491631578947368e-05, "loss": 0.3694, "step": 39336 }, { "epoch": 2.2027662672191735, "grad_norm": 1.3265615701675415, "learning_rate": 9.491605263157896e-05, "loss": 0.4154, "step": 39337 }, { "epoch": 2.2028222645313025, "grad_norm": 1.2674179077148438, "learning_rate": 9.491578947368422e-05, "loss": 0.3843, "step": 39338 }, { "epoch": 2.2028782618434315, "grad_norm": 1.1680123805999756, "learning_rate": 9.491552631578949e-05, "loss": 0.4554, "step": 39339 }, { "epoch": 2.2029342591555605, "grad_norm": 1.3256821632385254, "learning_rate": 9.491526315789474e-05, "loss": 0.4979, "step": 39340 }, { "epoch": 2.2029902564676895, "grad_norm": 1.8294093608856201, "learning_rate": 9.491500000000001e-05, "loss": 0.4146, "step": 39341 }, { "epoch": 2.2030462537798186, "grad_norm": 1.1637344360351562, "learning_rate": 9.491473684210527e-05, "loss": 0.5407, "step": 39342 }, { "epoch": 2.2031022510919476, "grad_norm": 1.3018330335617065, "learning_rate": 9.491447368421053e-05, "loss": 0.3945, "step": 39343 }, { "epoch": 2.2031582484040766, "grad_norm": 1.7104288339614868, "learning_rate": 9.491421052631579e-05, "loss": 0.4961, "step": 39344 }, { "epoch": 2.2032142457162056, "grad_norm": 1.2187089920043945, "learning_rate": 9.491394736842105e-05, "loss": 0.391, "step": 39345 }, { "epoch": 2.2032702430283346, "grad_norm": 1.0079604387283325, "learning_rate": 9.491368421052632e-05, "loss": 0.2951, "step": 39346 }, { "epoch": 2.2033262403404636, "grad_norm": 0.9097388386726379, "learning_rate": 9.491342105263158e-05, "loss": 0.3989, "step": 39347 }, { "epoch": 2.2033822376525927, "grad_norm": 1.2129552364349365, "learning_rate": 9.491315789473685e-05, "loss": 0.4267, "step": 39348 }, { "epoch": 2.2034382349647217, "grad_norm": 1.247456669807434, "learning_rate": 9.49128947368421e-05, "loss": 0.3425, "step": 39349 }, { "epoch": 2.2034942322768507, "grad_norm": 1.3718011379241943, "learning_rate": 9.491263157894737e-05, "loss": 0.3578, "step": 39350 }, { "epoch": 2.2035502295889797, "grad_norm": 1.1155140399932861, "learning_rate": 9.491236842105263e-05, "loss": 0.346, "step": 39351 }, { "epoch": 2.2036062269011087, "grad_norm": 1.2574106454849243, "learning_rate": 9.49121052631579e-05, "loss": 0.4189, "step": 39352 }, { "epoch": 2.2036622242132378, "grad_norm": 1.05961012840271, "learning_rate": 9.491184210526317e-05, "loss": 0.3504, "step": 39353 }, { "epoch": 2.203718221525367, "grad_norm": 1.1112011671066284, "learning_rate": 9.491157894736843e-05, "loss": 0.3679, "step": 39354 }, { "epoch": 2.203774218837496, "grad_norm": 1.2634233236312866, "learning_rate": 9.491131578947369e-05, "loss": 0.5298, "step": 39355 }, { "epoch": 2.203830216149625, "grad_norm": 1.2223896980285645, "learning_rate": 9.491105263157896e-05, "loss": 0.493, "step": 39356 }, { "epoch": 2.203886213461754, "grad_norm": 1.292396068572998, "learning_rate": 9.491078947368422e-05, "loss": 0.4107, "step": 39357 }, { "epoch": 2.203942210773883, "grad_norm": 1.210006833076477, "learning_rate": 9.491052631578948e-05, "loss": 0.3662, "step": 39358 }, { "epoch": 2.203998208086012, "grad_norm": 1.2033412456512451, "learning_rate": 9.491026315789474e-05, "loss": 0.3618, "step": 39359 }, { "epoch": 2.204054205398141, "grad_norm": 1.2836880683898926, "learning_rate": 9.491000000000001e-05, "loss": 0.2981, "step": 39360 }, { "epoch": 2.20411020271027, "grad_norm": 0.9685962796211243, "learning_rate": 9.490973684210527e-05, "loss": 0.2838, "step": 39361 }, { "epoch": 2.204166200022399, "grad_norm": 1.1783638000488281, "learning_rate": 9.490947368421053e-05, "loss": 0.4193, "step": 39362 }, { "epoch": 2.204222197334528, "grad_norm": 1.2935866117477417, "learning_rate": 9.490921052631579e-05, "loss": 0.3465, "step": 39363 }, { "epoch": 2.204278194646657, "grad_norm": 1.152005910873413, "learning_rate": 9.490894736842105e-05, "loss": 0.403, "step": 39364 }, { "epoch": 2.204334191958786, "grad_norm": 1.0610591173171997, "learning_rate": 9.490868421052632e-05, "loss": 0.3927, "step": 39365 }, { "epoch": 2.204390189270915, "grad_norm": 1.2709193229675293, "learning_rate": 9.490842105263158e-05, "loss": 0.3874, "step": 39366 }, { "epoch": 2.204446186583044, "grad_norm": 1.2130619287490845, "learning_rate": 9.490815789473684e-05, "loss": 0.482, "step": 39367 }, { "epoch": 2.204502183895173, "grad_norm": 1.4525854587554932, "learning_rate": 9.49078947368421e-05, "loss": 0.4421, "step": 39368 }, { "epoch": 2.204558181207302, "grad_norm": 1.3734872341156006, "learning_rate": 9.490763157894738e-05, "loss": 0.4058, "step": 39369 }, { "epoch": 2.204614178519431, "grad_norm": 1.0020607709884644, "learning_rate": 9.490736842105264e-05, "loss": 0.3527, "step": 39370 }, { "epoch": 2.20467017583156, "grad_norm": 1.1850841045379639, "learning_rate": 9.490710526315791e-05, "loss": 0.4669, "step": 39371 }, { "epoch": 2.204726173143689, "grad_norm": 1.3022788763046265, "learning_rate": 9.490684210526316e-05, "loss": 0.4011, "step": 39372 }, { "epoch": 2.204782170455818, "grad_norm": 1.42355215549469, "learning_rate": 9.490657894736843e-05, "loss": 0.5983, "step": 39373 }, { "epoch": 2.204838167767947, "grad_norm": 1.6269807815551758, "learning_rate": 9.490631578947369e-05, "loss": 0.4742, "step": 39374 }, { "epoch": 2.204894165080076, "grad_norm": 1.2100167274475098, "learning_rate": 9.490605263157896e-05, "loss": 0.3567, "step": 39375 }, { "epoch": 2.204950162392205, "grad_norm": 1.0779049396514893, "learning_rate": 9.490578947368422e-05, "loss": 0.3919, "step": 39376 }, { "epoch": 2.2050061597043342, "grad_norm": 1.4696465730667114, "learning_rate": 9.490552631578948e-05, "loss": 0.4987, "step": 39377 }, { "epoch": 2.2050621570164632, "grad_norm": 1.1518709659576416, "learning_rate": 9.490526315789474e-05, "loss": 0.4005, "step": 39378 }, { "epoch": 2.2051181543285923, "grad_norm": 1.4353548288345337, "learning_rate": 9.4905e-05, "loss": 0.4269, "step": 39379 }, { "epoch": 2.2051741516407213, "grad_norm": 1.6050125360488892, "learning_rate": 9.490473684210527e-05, "loss": 0.3598, "step": 39380 }, { "epoch": 2.2052301489528503, "grad_norm": 1.313718557357788, "learning_rate": 9.490447368421052e-05, "loss": 0.4787, "step": 39381 }, { "epoch": 2.2052861462649793, "grad_norm": 1.4907491207122803, "learning_rate": 9.490421052631579e-05, "loss": 0.4583, "step": 39382 }, { "epoch": 2.2053421435771083, "grad_norm": 1.0511032342910767, "learning_rate": 9.490394736842105e-05, "loss": 0.428, "step": 39383 }, { "epoch": 2.2053981408892374, "grad_norm": 1.29319429397583, "learning_rate": 9.490368421052633e-05, "loss": 0.4183, "step": 39384 }, { "epoch": 2.2054541382013664, "grad_norm": 1.3380197286605835, "learning_rate": 9.490342105263159e-05, "loss": 0.4372, "step": 39385 }, { "epoch": 2.2055101355134954, "grad_norm": 1.205701231956482, "learning_rate": 9.490315789473685e-05, "loss": 0.6668, "step": 39386 }, { "epoch": 2.2055661328256244, "grad_norm": 1.1658575534820557, "learning_rate": 9.49028947368421e-05, "loss": 0.3206, "step": 39387 }, { "epoch": 2.2056221301377534, "grad_norm": 1.552689552307129, "learning_rate": 9.490263157894738e-05, "loss": 0.5339, "step": 39388 }, { "epoch": 2.2056781274498825, "grad_norm": 1.019373893737793, "learning_rate": 9.490236842105264e-05, "loss": 0.2995, "step": 39389 }, { "epoch": 2.2057341247620115, "grad_norm": 1.1684759855270386, "learning_rate": 9.49021052631579e-05, "loss": 0.409, "step": 39390 }, { "epoch": 2.2057901220741405, "grad_norm": 1.038171410560608, "learning_rate": 9.490184210526316e-05, "loss": 0.3349, "step": 39391 }, { "epoch": 2.2058461193862695, "grad_norm": 1.7563350200653076, "learning_rate": 9.490157894736843e-05, "loss": 0.5792, "step": 39392 }, { "epoch": 2.2059021166983985, "grad_norm": 1.1089731454849243, "learning_rate": 9.490131578947369e-05, "loss": 0.3798, "step": 39393 }, { "epoch": 2.2059581140105275, "grad_norm": 1.2701400518417358, "learning_rate": 9.490105263157896e-05, "loss": 0.4079, "step": 39394 }, { "epoch": 2.2060141113226566, "grad_norm": 1.291379451751709, "learning_rate": 9.490078947368421e-05, "loss": 0.4018, "step": 39395 }, { "epoch": 2.2060701086347856, "grad_norm": 1.1315112113952637, "learning_rate": 9.490052631578947e-05, "loss": 0.3452, "step": 39396 }, { "epoch": 2.2061261059469146, "grad_norm": 1.3090662956237793, "learning_rate": 9.490026315789474e-05, "loss": 0.3291, "step": 39397 }, { "epoch": 2.2061821032590436, "grad_norm": 1.2487105131149292, "learning_rate": 9.49e-05, "loss": 0.3743, "step": 39398 }, { "epoch": 2.2062381005711726, "grad_norm": 1.1568964719772339, "learning_rate": 9.489973684210526e-05, "loss": 0.3617, "step": 39399 }, { "epoch": 2.2062940978833017, "grad_norm": 1.7552618980407715, "learning_rate": 9.489947368421052e-05, "loss": 0.5455, "step": 39400 }, { "epoch": 2.2063500951954307, "grad_norm": 0.9996274709701538, "learning_rate": 9.48992105263158e-05, "loss": 0.3291, "step": 39401 }, { "epoch": 2.2064060925075597, "grad_norm": 1.0297887325286865, "learning_rate": 9.489894736842106e-05, "loss": 0.4223, "step": 39402 }, { "epoch": 2.2064620898196887, "grad_norm": 0.9772664308547974, "learning_rate": 9.489868421052633e-05, "loss": 0.2732, "step": 39403 }, { "epoch": 2.2065180871318177, "grad_norm": 1.4791831970214844, "learning_rate": 9.489842105263157e-05, "loss": 0.3542, "step": 39404 }, { "epoch": 2.2065740844439468, "grad_norm": 1.216740369796753, "learning_rate": 9.489815789473685e-05, "loss": 0.4073, "step": 39405 }, { "epoch": 2.206630081756076, "grad_norm": 1.59219491481781, "learning_rate": 9.489789473684211e-05, "loss": 0.4708, "step": 39406 }, { "epoch": 2.206686079068205, "grad_norm": 1.1547629833221436, "learning_rate": 9.489763157894738e-05, "loss": 0.4151, "step": 39407 }, { "epoch": 2.206742076380334, "grad_norm": 1.1807096004486084, "learning_rate": 9.489736842105264e-05, "loss": 0.4442, "step": 39408 }, { "epoch": 2.206798073692463, "grad_norm": 1.4545261859893799, "learning_rate": 9.48971052631579e-05, "loss": 0.3936, "step": 39409 }, { "epoch": 2.206854071004592, "grad_norm": 1.263182520866394, "learning_rate": 9.489684210526316e-05, "loss": 0.3769, "step": 39410 }, { "epoch": 2.206910068316721, "grad_norm": 1.2876176834106445, "learning_rate": 9.489657894736843e-05, "loss": 0.4633, "step": 39411 }, { "epoch": 2.20696606562885, "grad_norm": 1.2935539484024048, "learning_rate": 9.489631578947369e-05, "loss": 0.5254, "step": 39412 }, { "epoch": 2.207022062940979, "grad_norm": 1.2221156358718872, "learning_rate": 9.489605263157895e-05, "loss": 0.4282, "step": 39413 }, { "epoch": 2.207078060253108, "grad_norm": 1.3922674655914307, "learning_rate": 9.489578947368421e-05, "loss": 0.3174, "step": 39414 }, { "epoch": 2.207134057565237, "grad_norm": 1.2614103555679321, "learning_rate": 9.489552631578947e-05, "loss": 0.4641, "step": 39415 }, { "epoch": 2.207190054877366, "grad_norm": 1.3582285642623901, "learning_rate": 9.489526315789475e-05, "loss": 0.4834, "step": 39416 }, { "epoch": 2.207246052189495, "grad_norm": 1.2694936990737915, "learning_rate": 9.4895e-05, "loss": 0.3946, "step": 39417 }, { "epoch": 2.207302049501624, "grad_norm": 1.1047664880752563, "learning_rate": 9.489473684210527e-05, "loss": 0.3991, "step": 39418 }, { "epoch": 2.207358046813753, "grad_norm": 1.309397578239441, "learning_rate": 9.489447368421052e-05, "loss": 0.4047, "step": 39419 }, { "epoch": 2.207414044125882, "grad_norm": 0.965138852596283, "learning_rate": 9.48942105263158e-05, "loss": 0.2978, "step": 39420 }, { "epoch": 2.207470041438011, "grad_norm": 1.0652178525924683, "learning_rate": 9.489394736842106e-05, "loss": 0.4868, "step": 39421 }, { "epoch": 2.20752603875014, "grad_norm": 1.2504161596298218, "learning_rate": 9.489368421052632e-05, "loss": 0.5275, "step": 39422 }, { "epoch": 2.207582036062269, "grad_norm": 1.2136930227279663, "learning_rate": 9.489342105263158e-05, "loss": 0.3844, "step": 39423 }, { "epoch": 2.207638033374398, "grad_norm": 1.0742990970611572, "learning_rate": 9.489315789473685e-05, "loss": 0.3827, "step": 39424 }, { "epoch": 2.207694030686527, "grad_norm": 1.3198479413986206, "learning_rate": 9.489289473684211e-05, "loss": 0.5481, "step": 39425 }, { "epoch": 2.207750027998656, "grad_norm": 1.1638554334640503, "learning_rate": 9.489263157894738e-05, "loss": 0.35, "step": 39426 }, { "epoch": 2.207806025310785, "grad_norm": 1.0478335618972778, "learning_rate": 9.489236842105263e-05, "loss": 0.3603, "step": 39427 }, { "epoch": 2.207862022622914, "grad_norm": 1.3544954061508179, "learning_rate": 9.48921052631579e-05, "loss": 0.444, "step": 39428 }, { "epoch": 2.207918019935043, "grad_norm": 1.2961604595184326, "learning_rate": 9.489184210526316e-05, "loss": 0.459, "step": 39429 }, { "epoch": 2.2079740172471722, "grad_norm": 1.2026969194412231, "learning_rate": 9.489157894736842e-05, "loss": 0.3337, "step": 39430 }, { "epoch": 2.2080300145593013, "grad_norm": 1.0703009366989136, "learning_rate": 9.48913157894737e-05, "loss": 0.3582, "step": 39431 }, { "epoch": 2.2080860118714303, "grad_norm": 1.4483842849731445, "learning_rate": 9.489105263157894e-05, "loss": 0.5261, "step": 39432 }, { "epoch": 2.2081420091835593, "grad_norm": 1.122277021408081, "learning_rate": 9.489078947368422e-05, "loss": 0.3199, "step": 39433 }, { "epoch": 2.2081980064956883, "grad_norm": 1.0623100996017456, "learning_rate": 9.489052631578947e-05, "loss": 0.3993, "step": 39434 }, { "epoch": 2.2082540038078173, "grad_norm": 1.1094510555267334, "learning_rate": 9.489026315789475e-05, "loss": 0.3147, "step": 39435 }, { "epoch": 2.2083100011199464, "grad_norm": 1.2570363283157349, "learning_rate": 9.489e-05, "loss": 0.4363, "step": 39436 }, { "epoch": 2.2083659984320754, "grad_norm": 1.1157420873641968, "learning_rate": 9.488973684210527e-05, "loss": 0.4852, "step": 39437 }, { "epoch": 2.2084219957442044, "grad_norm": 1.2953319549560547, "learning_rate": 9.488947368421053e-05, "loss": 0.5739, "step": 39438 }, { "epoch": 2.2084779930563334, "grad_norm": 1.335352897644043, "learning_rate": 9.48892105263158e-05, "loss": 0.3707, "step": 39439 }, { "epoch": 2.2085339903684624, "grad_norm": 1.5566648244857788, "learning_rate": 9.488894736842106e-05, "loss": 0.3212, "step": 39440 }, { "epoch": 2.2085899876805914, "grad_norm": 1.1545898914337158, "learning_rate": 9.488868421052632e-05, "loss": 0.4212, "step": 39441 }, { "epoch": 2.2086459849927205, "grad_norm": 1.1226252317428589, "learning_rate": 9.488842105263158e-05, "loss": 0.3335, "step": 39442 }, { "epoch": 2.2087019823048495, "grad_norm": 1.2611626386642456, "learning_rate": 9.488815789473685e-05, "loss": 0.3461, "step": 39443 }, { "epoch": 2.2087579796169785, "grad_norm": 1.2921968698501587, "learning_rate": 9.488789473684211e-05, "loss": 0.482, "step": 39444 }, { "epoch": 2.2088139769291075, "grad_norm": 1.3033865690231323, "learning_rate": 9.488763157894737e-05, "loss": 0.4343, "step": 39445 }, { "epoch": 2.2088699742412365, "grad_norm": 1.3479501008987427, "learning_rate": 9.488736842105263e-05, "loss": 0.3744, "step": 39446 }, { "epoch": 2.2089259715533656, "grad_norm": 1.0953289270401, "learning_rate": 9.488710526315789e-05, "loss": 0.3697, "step": 39447 }, { "epoch": 2.2089819688654946, "grad_norm": 1.4060044288635254, "learning_rate": 9.488684210526317e-05, "loss": 0.3674, "step": 39448 }, { "epoch": 2.2090379661776236, "grad_norm": 1.0578078031539917, "learning_rate": 9.488657894736843e-05, "loss": 0.3018, "step": 39449 }, { "epoch": 2.2090939634897526, "grad_norm": 1.0024700164794922, "learning_rate": 9.488631578947368e-05, "loss": 0.39, "step": 39450 }, { "epoch": 2.2091499608018816, "grad_norm": 1.2217293977737427, "learning_rate": 9.488605263157894e-05, "loss": 0.4692, "step": 39451 }, { "epoch": 2.2092059581140107, "grad_norm": 2.155561685562134, "learning_rate": 9.488578947368422e-05, "loss": 0.4475, "step": 39452 }, { "epoch": 2.2092619554261397, "grad_norm": 1.280143141746521, "learning_rate": 9.488552631578948e-05, "loss": 0.3999, "step": 39453 }, { "epoch": 2.2093179527382687, "grad_norm": 1.1410243511199951, "learning_rate": 9.488526315789474e-05, "loss": 0.4895, "step": 39454 }, { "epoch": 2.2093739500503977, "grad_norm": 1.0631921291351318, "learning_rate": 9.4885e-05, "loss": 0.3316, "step": 39455 }, { "epoch": 2.2094299473625267, "grad_norm": 1.2528951168060303, "learning_rate": 9.488473684210527e-05, "loss": 0.3505, "step": 39456 }, { "epoch": 2.2094859446746558, "grad_norm": 1.5276639461517334, "learning_rate": 9.488447368421053e-05, "loss": 0.4463, "step": 39457 }, { "epoch": 2.2095419419867848, "grad_norm": 1.1898967027664185, "learning_rate": 9.48842105263158e-05, "loss": 0.397, "step": 39458 }, { "epoch": 2.209597939298914, "grad_norm": 1.1977025270462036, "learning_rate": 9.488394736842105e-05, "loss": 0.3885, "step": 39459 }, { "epoch": 2.209653936611043, "grad_norm": 1.207180380821228, "learning_rate": 9.488368421052632e-05, "loss": 0.3495, "step": 39460 }, { "epoch": 2.209709933923172, "grad_norm": 2.1690587997436523, "learning_rate": 9.488342105263158e-05, "loss": 0.4943, "step": 39461 }, { "epoch": 2.209765931235301, "grad_norm": 1.2840505838394165, "learning_rate": 9.488315789473686e-05, "loss": 0.3654, "step": 39462 }, { "epoch": 2.20982192854743, "grad_norm": 1.235318899154663, "learning_rate": 9.488289473684212e-05, "loss": 0.4723, "step": 39463 }, { "epoch": 2.209877925859559, "grad_norm": 0.914313793182373, "learning_rate": 9.488263157894736e-05, "loss": 0.3448, "step": 39464 }, { "epoch": 2.209933923171688, "grad_norm": 1.4670906066894531, "learning_rate": 9.488236842105263e-05, "loss": 0.4202, "step": 39465 }, { "epoch": 2.209989920483817, "grad_norm": 1.1392887830734253, "learning_rate": 9.48821052631579e-05, "loss": 0.3634, "step": 39466 }, { "epoch": 2.210045917795946, "grad_norm": 1.3882942199707031, "learning_rate": 9.488184210526317e-05, "loss": 0.4386, "step": 39467 }, { "epoch": 2.210101915108075, "grad_norm": 1.3332775831222534, "learning_rate": 9.488157894736843e-05, "loss": 0.402, "step": 39468 }, { "epoch": 2.210157912420204, "grad_norm": 1.3088740110397339, "learning_rate": 9.488131578947369e-05, "loss": 0.4818, "step": 39469 }, { "epoch": 2.210213909732333, "grad_norm": 1.0668730735778809, "learning_rate": 9.488105263157895e-05, "loss": 0.3802, "step": 39470 }, { "epoch": 2.210269907044462, "grad_norm": 1.3302572965621948, "learning_rate": 9.488078947368422e-05, "loss": 0.3559, "step": 39471 }, { "epoch": 2.210325904356591, "grad_norm": 1.135267734527588, "learning_rate": 9.488052631578948e-05, "loss": 0.3098, "step": 39472 }, { "epoch": 2.21038190166872, "grad_norm": 1.3553977012634277, "learning_rate": 9.488026315789474e-05, "loss": 0.3405, "step": 39473 }, { "epoch": 2.210437898980849, "grad_norm": 1.0660375356674194, "learning_rate": 9.488e-05, "loss": 0.3221, "step": 39474 }, { "epoch": 2.210493896292978, "grad_norm": 1.2229137420654297, "learning_rate": 9.487973684210527e-05, "loss": 0.3875, "step": 39475 }, { "epoch": 2.210549893605107, "grad_norm": 1.1356780529022217, "learning_rate": 9.487947368421053e-05, "loss": 0.4331, "step": 39476 }, { "epoch": 2.210605890917236, "grad_norm": 1.1612269878387451, "learning_rate": 9.487921052631579e-05, "loss": 0.3993, "step": 39477 }, { "epoch": 2.210661888229365, "grad_norm": 1.132050633430481, "learning_rate": 9.487894736842105e-05, "loss": 0.3471, "step": 39478 }, { "epoch": 2.210717885541494, "grad_norm": 1.2439926862716675, "learning_rate": 9.487868421052633e-05, "loss": 0.5064, "step": 39479 }, { "epoch": 2.210773882853623, "grad_norm": 1.0388453006744385, "learning_rate": 9.487842105263159e-05, "loss": 0.3963, "step": 39480 }, { "epoch": 2.210829880165752, "grad_norm": 1.0763019323349, "learning_rate": 9.487815789473686e-05, "loss": 0.3492, "step": 39481 }, { "epoch": 2.2108858774778812, "grad_norm": 1.2014552354812622, "learning_rate": 9.48778947368421e-05, "loss": 0.3836, "step": 39482 }, { "epoch": 2.2109418747900103, "grad_norm": 1.0644665956497192, "learning_rate": 9.487763157894736e-05, "loss": 0.3975, "step": 39483 }, { "epoch": 2.2109978721021393, "grad_norm": 1.3811395168304443, "learning_rate": 9.487736842105264e-05, "loss": 0.3515, "step": 39484 }, { "epoch": 2.2110538694142683, "grad_norm": 1.309718370437622, "learning_rate": 9.48771052631579e-05, "loss": 0.3616, "step": 39485 }, { "epoch": 2.2111098667263973, "grad_norm": 1.0296283960342407, "learning_rate": 9.487684210526317e-05, "loss": 0.3913, "step": 39486 }, { "epoch": 2.2111658640385263, "grad_norm": 1.2111494541168213, "learning_rate": 9.487657894736842e-05, "loss": 0.374, "step": 39487 }, { "epoch": 2.2112218613506553, "grad_norm": 1.1305137872695923, "learning_rate": 9.487631578947369e-05, "loss": 0.4628, "step": 39488 }, { "epoch": 2.2112778586627844, "grad_norm": 1.375943899154663, "learning_rate": 9.487605263157895e-05, "loss": 0.4683, "step": 39489 }, { "epoch": 2.2113338559749134, "grad_norm": 1.403503656387329, "learning_rate": 9.487578947368422e-05, "loss": 0.3749, "step": 39490 }, { "epoch": 2.2113898532870424, "grad_norm": 1.1193042993545532, "learning_rate": 9.487552631578947e-05, "loss": 0.3039, "step": 39491 }, { "epoch": 2.2114458505991714, "grad_norm": 1.232711672782898, "learning_rate": 9.487526315789474e-05, "loss": 0.4333, "step": 39492 }, { "epoch": 2.2115018479113004, "grad_norm": 1.1490355730056763, "learning_rate": 9.4875e-05, "loss": 0.4038, "step": 39493 }, { "epoch": 2.2115578452234295, "grad_norm": 1.578195571899414, "learning_rate": 9.487473684210528e-05, "loss": 0.4094, "step": 39494 }, { "epoch": 2.2116138425355585, "grad_norm": 1.1531920433044434, "learning_rate": 9.487447368421054e-05, "loss": 0.386, "step": 39495 }, { "epoch": 2.2116698398476875, "grad_norm": 1.2632719278335571, "learning_rate": 9.48742105263158e-05, "loss": 0.4551, "step": 39496 }, { "epoch": 2.2117258371598165, "grad_norm": 0.9915551543235779, "learning_rate": 9.487394736842105e-05, "loss": 0.335, "step": 39497 }, { "epoch": 2.2117818344719455, "grad_norm": 1.3023353815078735, "learning_rate": 9.487368421052633e-05, "loss": 0.4608, "step": 39498 }, { "epoch": 2.2118378317840746, "grad_norm": 1.3370416164398193, "learning_rate": 9.487342105263159e-05, "loss": 0.3906, "step": 39499 }, { "epoch": 2.2118938290962036, "grad_norm": 1.300503134727478, "learning_rate": 9.487315789473685e-05, "loss": 0.3701, "step": 39500 }, { "epoch": 2.2119498264083326, "grad_norm": 1.0996440649032593, "learning_rate": 9.487289473684211e-05, "loss": 0.4575, "step": 39501 }, { "epoch": 2.2120058237204616, "grad_norm": 1.1221290826797485, "learning_rate": 9.487263157894737e-05, "loss": 0.3514, "step": 39502 }, { "epoch": 2.2120618210325906, "grad_norm": 1.1745353937149048, "learning_rate": 9.487236842105264e-05, "loss": 0.4257, "step": 39503 }, { "epoch": 2.2121178183447197, "grad_norm": 1.1432949304580688, "learning_rate": 9.48721052631579e-05, "loss": 0.3615, "step": 39504 }, { "epoch": 2.2121738156568487, "grad_norm": 1.1346967220306396, "learning_rate": 9.487184210526316e-05, "loss": 0.4955, "step": 39505 }, { "epoch": 2.2122298129689777, "grad_norm": 1.1889140605926514, "learning_rate": 9.487157894736842e-05, "loss": 0.4462, "step": 39506 }, { "epoch": 2.2122858102811067, "grad_norm": 1.1017791032791138, "learning_rate": 9.487131578947369e-05, "loss": 0.413, "step": 39507 }, { "epoch": 2.2123418075932357, "grad_norm": 1.10360586643219, "learning_rate": 9.487105263157895e-05, "loss": 0.2981, "step": 39508 }, { "epoch": 2.2123978049053648, "grad_norm": 0.9922963380813599, "learning_rate": 9.487078947368421e-05, "loss": 0.3166, "step": 39509 }, { "epoch": 2.2124538022174933, "grad_norm": 1.1659842729568481, "learning_rate": 9.487052631578947e-05, "loss": 0.4249, "step": 39510 }, { "epoch": 2.212509799529623, "grad_norm": 1.0374927520751953, "learning_rate": 9.487026315789475e-05, "loss": 0.3453, "step": 39511 }, { "epoch": 2.2125657968417514, "grad_norm": 1.374549388885498, "learning_rate": 9.487e-05, "loss": 0.3526, "step": 39512 }, { "epoch": 2.212621794153881, "grad_norm": 1.4134262800216675, "learning_rate": 9.486973684210528e-05, "loss": 0.4607, "step": 39513 }, { "epoch": 2.2126777914660094, "grad_norm": 0.9947406053543091, "learning_rate": 9.486947368421052e-05, "loss": 0.4176, "step": 39514 }, { "epoch": 2.212733788778139, "grad_norm": 1.235861897468567, "learning_rate": 9.48692105263158e-05, "loss": 0.3164, "step": 39515 }, { "epoch": 2.2127897860902674, "grad_norm": 1.3603397607803345, "learning_rate": 9.486894736842106e-05, "loss": 0.2996, "step": 39516 }, { "epoch": 2.212845783402397, "grad_norm": 1.2442145347595215, "learning_rate": 9.486868421052632e-05, "loss": 0.4031, "step": 39517 }, { "epoch": 2.2129017807145255, "grad_norm": 1.1878242492675781, "learning_rate": 9.486842105263159e-05, "loss": 0.3654, "step": 39518 }, { "epoch": 2.212957778026655, "grad_norm": 1.181290626525879, "learning_rate": 9.486815789473684e-05, "loss": 0.4246, "step": 39519 }, { "epoch": 2.2130137753387835, "grad_norm": 1.4528449773788452, "learning_rate": 9.486789473684211e-05, "loss": 0.4401, "step": 39520 }, { "epoch": 2.213069772650913, "grad_norm": 1.2494711875915527, "learning_rate": 9.486763157894737e-05, "loss": 0.4755, "step": 39521 }, { "epoch": 2.2131257699630416, "grad_norm": 1.2395983934402466, "learning_rate": 9.486736842105264e-05, "loss": 0.5522, "step": 39522 }, { "epoch": 2.213181767275171, "grad_norm": 1.205452561378479, "learning_rate": 9.48671052631579e-05, "loss": 0.3622, "step": 39523 }, { "epoch": 2.2132377645872996, "grad_norm": 1.2171417474746704, "learning_rate": 9.486684210526316e-05, "loss": 0.3705, "step": 39524 }, { "epoch": 2.213293761899429, "grad_norm": 1.2575563192367554, "learning_rate": 9.486657894736842e-05, "loss": 0.413, "step": 39525 }, { "epoch": 2.2133497592115576, "grad_norm": 1.1577190160751343, "learning_rate": 9.48663157894737e-05, "loss": 0.36, "step": 39526 }, { "epoch": 2.213405756523687, "grad_norm": 1.4515182971954346, "learning_rate": 9.486605263157895e-05, "loss": 0.5074, "step": 39527 }, { "epoch": 2.2134617538358157, "grad_norm": 1.1130716800689697, "learning_rate": 9.486578947368421e-05, "loss": 0.3852, "step": 39528 }, { "epoch": 2.213517751147945, "grad_norm": 1.2099227905273438, "learning_rate": 9.486552631578947e-05, "loss": 0.3614, "step": 39529 }, { "epoch": 2.2135737484600737, "grad_norm": 1.2388745546340942, "learning_rate": 9.486526315789475e-05, "loss": 0.3864, "step": 39530 }, { "epoch": 2.2136297457722027, "grad_norm": 1.3295307159423828, "learning_rate": 9.486500000000001e-05, "loss": 0.3432, "step": 39531 }, { "epoch": 2.2136857430843317, "grad_norm": 1.3419257402420044, "learning_rate": 9.486473684210527e-05, "loss": 0.3558, "step": 39532 }, { "epoch": 2.2137417403964608, "grad_norm": 1.0854109525680542, "learning_rate": 9.486447368421053e-05, "loss": 0.3938, "step": 39533 }, { "epoch": 2.21379773770859, "grad_norm": 1.218134880065918, "learning_rate": 9.486421052631579e-05, "loss": 0.4503, "step": 39534 }, { "epoch": 2.213853735020719, "grad_norm": 1.2031400203704834, "learning_rate": 9.486394736842106e-05, "loss": 0.3679, "step": 39535 }, { "epoch": 2.213909732332848, "grad_norm": 1.1427584886550903, "learning_rate": 9.486368421052632e-05, "loss": 0.3866, "step": 39536 }, { "epoch": 2.213965729644977, "grad_norm": 1.0621387958526611, "learning_rate": 9.486342105263158e-05, "loss": 0.3412, "step": 39537 }, { "epoch": 2.214021726957106, "grad_norm": 1.129280686378479, "learning_rate": 9.486315789473684e-05, "loss": 0.3086, "step": 39538 }, { "epoch": 2.214077724269235, "grad_norm": 1.1368530988693237, "learning_rate": 9.486289473684211e-05, "loss": 0.3482, "step": 39539 }, { "epoch": 2.214133721581364, "grad_norm": 1.1958513259887695, "learning_rate": 9.486263157894737e-05, "loss": 0.3904, "step": 39540 }, { "epoch": 2.214189718893493, "grad_norm": 1.5152448415756226, "learning_rate": 9.486236842105265e-05, "loss": 0.4525, "step": 39541 }, { "epoch": 2.214245716205622, "grad_norm": 1.0344181060791016, "learning_rate": 9.486210526315789e-05, "loss": 0.3189, "step": 39542 }, { "epoch": 2.214301713517751, "grad_norm": 1.2310240268707275, "learning_rate": 9.486184210526316e-05, "loss": 0.4751, "step": 39543 }, { "epoch": 2.21435771082988, "grad_norm": 1.3463168144226074, "learning_rate": 9.486157894736842e-05, "loss": 0.3635, "step": 39544 }, { "epoch": 2.214413708142009, "grad_norm": 1.1757280826568604, "learning_rate": 9.48613157894737e-05, "loss": 0.4357, "step": 39545 }, { "epoch": 2.214469705454138, "grad_norm": 1.423470377922058, "learning_rate": 9.486105263157894e-05, "loss": 0.4359, "step": 39546 }, { "epoch": 2.214525702766267, "grad_norm": 1.0382089614868164, "learning_rate": 9.486078947368422e-05, "loss": 0.4562, "step": 39547 }, { "epoch": 2.214581700078396, "grad_norm": 1.0313842296600342, "learning_rate": 9.486052631578948e-05, "loss": 0.3448, "step": 39548 }, { "epoch": 2.214637697390525, "grad_norm": 1.2458701133728027, "learning_rate": 9.486026315789475e-05, "loss": 0.3934, "step": 39549 }, { "epoch": 2.214693694702654, "grad_norm": 1.136637568473816, "learning_rate": 9.486000000000001e-05, "loss": 0.3829, "step": 39550 }, { "epoch": 2.214749692014783, "grad_norm": 1.133142113685608, "learning_rate": 9.485973684210526e-05, "loss": 0.4146, "step": 39551 }, { "epoch": 2.214805689326912, "grad_norm": 1.0305284261703491, "learning_rate": 9.485947368421053e-05, "loss": 0.3061, "step": 39552 }, { "epoch": 2.214861686639041, "grad_norm": 1.3573909997940063, "learning_rate": 9.485921052631579e-05, "loss": 0.551, "step": 39553 }, { "epoch": 2.21491768395117, "grad_norm": 2.0785410404205322, "learning_rate": 9.485894736842106e-05, "loss": 0.4419, "step": 39554 }, { "epoch": 2.214973681263299, "grad_norm": 1.4044862985610962, "learning_rate": 9.485868421052632e-05, "loss": 0.48, "step": 39555 }, { "epoch": 2.215029678575428, "grad_norm": 1.051600456237793, "learning_rate": 9.485842105263158e-05, "loss": 0.3426, "step": 39556 }, { "epoch": 2.2150856758875572, "grad_norm": 1.3114019632339478, "learning_rate": 9.485815789473684e-05, "loss": 0.4629, "step": 39557 }, { "epoch": 2.2151416731996862, "grad_norm": 1.1474425792694092, "learning_rate": 9.485789473684211e-05, "loss": 0.3169, "step": 39558 }, { "epoch": 2.2151976705118153, "grad_norm": 1.1465036869049072, "learning_rate": 9.485763157894737e-05, "loss": 0.4715, "step": 39559 }, { "epoch": 2.2152536678239443, "grad_norm": 1.058963656425476, "learning_rate": 9.485736842105263e-05, "loss": 0.3828, "step": 39560 }, { "epoch": 2.2153096651360733, "grad_norm": 1.0208237171173096, "learning_rate": 9.48571052631579e-05, "loss": 0.3754, "step": 39561 }, { "epoch": 2.2153656624482023, "grad_norm": 1.2445060014724731, "learning_rate": 9.485684210526317e-05, "loss": 0.4185, "step": 39562 }, { "epoch": 2.2154216597603313, "grad_norm": 1.2862858772277832, "learning_rate": 9.485657894736843e-05, "loss": 0.4077, "step": 39563 }, { "epoch": 2.2154776570724604, "grad_norm": 1.5391099452972412, "learning_rate": 9.485631578947369e-05, "loss": 0.4952, "step": 39564 }, { "epoch": 2.2155336543845894, "grad_norm": 1.4818860292434692, "learning_rate": 9.485605263157895e-05, "loss": 0.4438, "step": 39565 }, { "epoch": 2.2155896516967184, "grad_norm": 0.9798838496208191, "learning_rate": 9.485578947368422e-05, "loss": 0.3535, "step": 39566 }, { "epoch": 2.2156456490088474, "grad_norm": 1.1640608310699463, "learning_rate": 9.485552631578948e-05, "loss": 0.3377, "step": 39567 }, { "epoch": 2.2157016463209764, "grad_norm": 0.9555137157440186, "learning_rate": 9.485526315789474e-05, "loss": 0.3356, "step": 39568 }, { "epoch": 2.2157576436331055, "grad_norm": 1.3477306365966797, "learning_rate": 9.4855e-05, "loss": 0.4165, "step": 39569 }, { "epoch": 2.2158136409452345, "grad_norm": 1.3791073560714722, "learning_rate": 9.485473684210526e-05, "loss": 0.3652, "step": 39570 }, { "epoch": 2.2158696382573635, "grad_norm": 1.3857711553573608, "learning_rate": 9.485447368421053e-05, "loss": 0.5188, "step": 39571 }, { "epoch": 2.2159256355694925, "grad_norm": 1.678026556968689, "learning_rate": 9.485421052631579e-05, "loss": 0.4708, "step": 39572 }, { "epoch": 2.2159816328816215, "grad_norm": 1.1528973579406738, "learning_rate": 9.485394736842107e-05, "loss": 0.5218, "step": 39573 }, { "epoch": 2.2160376301937506, "grad_norm": 1.249748706817627, "learning_rate": 9.485368421052631e-05, "loss": 0.37, "step": 39574 }, { "epoch": 2.2160936275058796, "grad_norm": 1.2241184711456299, "learning_rate": 9.485342105263158e-05, "loss": 0.47, "step": 39575 }, { "epoch": 2.2161496248180086, "grad_norm": 1.190361738204956, "learning_rate": 9.485315789473684e-05, "loss": 0.5866, "step": 39576 }, { "epoch": 2.2162056221301376, "grad_norm": 1.234858751296997, "learning_rate": 9.485289473684212e-05, "loss": 0.5482, "step": 39577 }, { "epoch": 2.2162616194422666, "grad_norm": 1.2922357320785522, "learning_rate": 9.485263157894738e-05, "loss": 0.355, "step": 39578 }, { "epoch": 2.2163176167543956, "grad_norm": 1.18617844581604, "learning_rate": 9.485236842105264e-05, "loss": 0.4831, "step": 39579 }, { "epoch": 2.2163736140665247, "grad_norm": 1.3128308057785034, "learning_rate": 9.48521052631579e-05, "loss": 0.4147, "step": 39580 }, { "epoch": 2.2164296113786537, "grad_norm": 1.2681304216384888, "learning_rate": 9.485184210526317e-05, "loss": 0.3738, "step": 39581 }, { "epoch": 2.2164856086907827, "grad_norm": 1.4341285228729248, "learning_rate": 9.485157894736843e-05, "loss": 0.5228, "step": 39582 }, { "epoch": 2.2165416060029117, "grad_norm": 1.6567810773849487, "learning_rate": 9.485131578947369e-05, "loss": 0.4377, "step": 39583 }, { "epoch": 2.2165976033150407, "grad_norm": 1.1970773935317993, "learning_rate": 9.485105263157895e-05, "loss": 0.3151, "step": 39584 }, { "epoch": 2.2166536006271698, "grad_norm": 1.059559941291809, "learning_rate": 9.485078947368421e-05, "loss": 0.4379, "step": 39585 }, { "epoch": 2.216709597939299, "grad_norm": 1.123970866203308, "learning_rate": 9.485052631578948e-05, "loss": 0.3411, "step": 39586 }, { "epoch": 2.216765595251428, "grad_norm": 1.0425021648406982, "learning_rate": 9.485026315789474e-05, "loss": 0.353, "step": 39587 }, { "epoch": 2.216821592563557, "grad_norm": 1.0684791803359985, "learning_rate": 9.485e-05, "loss": 0.5801, "step": 39588 }, { "epoch": 2.216877589875686, "grad_norm": 1.1544586420059204, "learning_rate": 9.484973684210526e-05, "loss": 0.343, "step": 39589 }, { "epoch": 2.216933587187815, "grad_norm": 1.0893439054489136, "learning_rate": 9.484947368421053e-05, "loss": 0.3112, "step": 39590 }, { "epoch": 2.216989584499944, "grad_norm": 1.119444727897644, "learning_rate": 9.48492105263158e-05, "loss": 0.5263, "step": 39591 }, { "epoch": 2.217045581812073, "grad_norm": 1.2116923332214355, "learning_rate": 9.484894736842105e-05, "loss": 0.3842, "step": 39592 }, { "epoch": 2.217101579124202, "grad_norm": 1.315686821937561, "learning_rate": 9.484868421052631e-05, "loss": 0.4364, "step": 39593 }, { "epoch": 2.217157576436331, "grad_norm": 1.096604824066162, "learning_rate": 9.484842105263159e-05, "loss": 0.3816, "step": 39594 }, { "epoch": 2.21721357374846, "grad_norm": 1.1477854251861572, "learning_rate": 9.484815789473685e-05, "loss": 0.4196, "step": 39595 }, { "epoch": 2.217269571060589, "grad_norm": 1.1825330257415771, "learning_rate": 9.484789473684212e-05, "loss": 0.3366, "step": 39596 }, { "epoch": 2.217325568372718, "grad_norm": 1.2780394554138184, "learning_rate": 9.484763157894737e-05, "loss": 0.376, "step": 39597 }, { "epoch": 2.217381565684847, "grad_norm": 1.1259257793426514, "learning_rate": 9.484736842105264e-05, "loss": 0.3643, "step": 39598 }, { "epoch": 2.217437562996976, "grad_norm": 1.4169061183929443, "learning_rate": 9.48471052631579e-05, "loss": 0.3612, "step": 39599 }, { "epoch": 2.217493560309105, "grad_norm": 1.1657676696777344, "learning_rate": 9.484684210526317e-05, "loss": 0.3814, "step": 39600 }, { "epoch": 2.217549557621234, "grad_norm": 1.1967030763626099, "learning_rate": 9.484657894736842e-05, "loss": 0.3648, "step": 39601 }, { "epoch": 2.217605554933363, "grad_norm": 1.3778002262115479, "learning_rate": 9.484631578947369e-05, "loss": 0.4008, "step": 39602 }, { "epoch": 2.217661552245492, "grad_norm": 1.1689342260360718, "learning_rate": 9.484605263157895e-05, "loss": 0.3479, "step": 39603 }, { "epoch": 2.217717549557621, "grad_norm": 1.2687549591064453, "learning_rate": 9.484578947368421e-05, "loss": 0.3335, "step": 39604 }, { "epoch": 2.21777354686975, "grad_norm": 1.307100534439087, "learning_rate": 9.484552631578948e-05, "loss": 0.4053, "step": 39605 }, { "epoch": 2.217829544181879, "grad_norm": 1.1839298009872437, "learning_rate": 9.484526315789473e-05, "loss": 0.3905, "step": 39606 }, { "epoch": 2.217885541494008, "grad_norm": 1.2022268772125244, "learning_rate": 9.4845e-05, "loss": 0.3875, "step": 39607 }, { "epoch": 2.217941538806137, "grad_norm": 1.246770977973938, "learning_rate": 9.484473684210526e-05, "loss": 0.5234, "step": 39608 }, { "epoch": 2.217997536118266, "grad_norm": 1.1393014192581177, "learning_rate": 9.484447368421054e-05, "loss": 0.3716, "step": 39609 }, { "epoch": 2.2180535334303952, "grad_norm": 1.2752878665924072, "learning_rate": 9.48442105263158e-05, "loss": 0.3931, "step": 39610 }, { "epoch": 2.2181095307425243, "grad_norm": 1.000220537185669, "learning_rate": 9.484394736842106e-05, "loss": 0.3441, "step": 39611 }, { "epoch": 2.2181655280546533, "grad_norm": 1.2714444398880005, "learning_rate": 9.484368421052632e-05, "loss": 0.3639, "step": 39612 }, { "epoch": 2.2182215253667823, "grad_norm": 1.600055456161499, "learning_rate": 9.484342105263159e-05, "loss": 0.4784, "step": 39613 }, { "epoch": 2.2182775226789113, "grad_norm": 1.0380969047546387, "learning_rate": 9.484315789473685e-05, "loss": 0.3703, "step": 39614 }, { "epoch": 2.2183335199910403, "grad_norm": 1.3356578350067139, "learning_rate": 9.484289473684211e-05, "loss": 0.4546, "step": 39615 }, { "epoch": 2.2183895173031694, "grad_norm": 1.2550917863845825, "learning_rate": 9.484263157894737e-05, "loss": 0.4955, "step": 39616 }, { "epoch": 2.2184455146152984, "grad_norm": 1.0455214977264404, "learning_rate": 9.484236842105264e-05, "loss": 0.3334, "step": 39617 }, { "epoch": 2.2185015119274274, "grad_norm": 1.1249582767486572, "learning_rate": 9.48421052631579e-05, "loss": 0.3429, "step": 39618 }, { "epoch": 2.2185575092395564, "grad_norm": 1.170705795288086, "learning_rate": 9.484184210526316e-05, "loss": 0.4165, "step": 39619 }, { "epoch": 2.2186135065516854, "grad_norm": 0.9637539386749268, "learning_rate": 9.484157894736842e-05, "loss": 0.3251, "step": 39620 }, { "epoch": 2.2186695038638145, "grad_norm": 1.1349906921386719, "learning_rate": 9.484131578947368e-05, "loss": 0.3594, "step": 39621 }, { "epoch": 2.2187255011759435, "grad_norm": 1.3575472831726074, "learning_rate": 9.484105263157895e-05, "loss": 0.5506, "step": 39622 }, { "epoch": 2.2187814984880725, "grad_norm": 1.261404037475586, "learning_rate": 9.484078947368421e-05, "loss": 0.4343, "step": 39623 }, { "epoch": 2.2188374958002015, "grad_norm": 1.1019033193588257, "learning_rate": 9.484052631578947e-05, "loss": 0.3609, "step": 39624 }, { "epoch": 2.2188934931123305, "grad_norm": 1.121375560760498, "learning_rate": 9.484026315789473e-05, "loss": 0.4969, "step": 39625 }, { "epoch": 2.2189494904244595, "grad_norm": 1.2595089673995972, "learning_rate": 9.484e-05, "loss": 0.4708, "step": 39626 }, { "epoch": 2.2190054877365886, "grad_norm": 1.1131263971328735, "learning_rate": 9.483973684210527e-05, "loss": 0.4694, "step": 39627 }, { "epoch": 2.2190614850487176, "grad_norm": 1.2495172023773193, "learning_rate": 9.483947368421054e-05, "loss": 0.3802, "step": 39628 }, { "epoch": 2.2191174823608466, "grad_norm": 1.1700639724731445, "learning_rate": 9.483921052631579e-05, "loss": 0.3091, "step": 39629 }, { "epoch": 2.2191734796729756, "grad_norm": 1.186990737915039, "learning_rate": 9.483894736842106e-05, "loss": 0.3972, "step": 39630 }, { "epoch": 2.2192294769851046, "grad_norm": 1.2280267477035522, "learning_rate": 9.483868421052632e-05, "loss": 0.2716, "step": 39631 }, { "epoch": 2.2192854742972337, "grad_norm": 1.2524919509887695, "learning_rate": 9.483842105263159e-05, "loss": 0.4729, "step": 39632 }, { "epoch": 2.2193414716093627, "grad_norm": 1.2634778022766113, "learning_rate": 9.483815789473685e-05, "loss": 0.4062, "step": 39633 }, { "epoch": 2.2193974689214917, "grad_norm": 1.1456775665283203, "learning_rate": 9.483789473684211e-05, "loss": 0.3915, "step": 39634 }, { "epoch": 2.2194534662336207, "grad_norm": 1.087252140045166, "learning_rate": 9.483763157894737e-05, "loss": 0.416, "step": 39635 }, { "epoch": 2.2195094635457497, "grad_norm": 2.1726810932159424, "learning_rate": 9.483736842105264e-05, "loss": 0.5216, "step": 39636 }, { "epoch": 2.2195654608578788, "grad_norm": 1.1706876754760742, "learning_rate": 9.48371052631579e-05, "loss": 0.4654, "step": 39637 }, { "epoch": 2.2196214581700078, "grad_norm": 1.371441125869751, "learning_rate": 9.483684210526315e-05, "loss": 0.3873, "step": 39638 }, { "epoch": 2.219677455482137, "grad_norm": 1.204808235168457, "learning_rate": 9.483657894736842e-05, "loss": 0.404, "step": 39639 }, { "epoch": 2.219733452794266, "grad_norm": 1.261924147605896, "learning_rate": 9.483631578947368e-05, "loss": 0.4299, "step": 39640 }, { "epoch": 2.219789450106395, "grad_norm": 1.1421394348144531, "learning_rate": 9.483605263157896e-05, "loss": 0.3555, "step": 39641 }, { "epoch": 2.219845447418524, "grad_norm": 1.214897871017456, "learning_rate": 9.483578947368422e-05, "loss": 0.6148, "step": 39642 }, { "epoch": 2.219901444730653, "grad_norm": 1.0573283433914185, "learning_rate": 9.483552631578948e-05, "loss": 0.4294, "step": 39643 }, { "epoch": 2.219957442042782, "grad_norm": 10.791913986206055, "learning_rate": 9.483526315789474e-05, "loss": 0.4417, "step": 39644 }, { "epoch": 2.220013439354911, "grad_norm": 1.390397548675537, "learning_rate": 9.483500000000001e-05, "loss": 0.529, "step": 39645 }, { "epoch": 2.22006943666704, "grad_norm": 1.1331439018249512, "learning_rate": 9.483473684210527e-05, "loss": 0.4047, "step": 39646 }, { "epoch": 2.220125433979169, "grad_norm": 1.2922428846359253, "learning_rate": 9.483447368421053e-05, "loss": 0.418, "step": 39647 }, { "epoch": 2.220181431291298, "grad_norm": 1.1824982166290283, "learning_rate": 9.483421052631579e-05, "loss": 0.3895, "step": 39648 }, { "epoch": 2.220237428603427, "grad_norm": 1.2164486646652222, "learning_rate": 9.483394736842106e-05, "loss": 0.4417, "step": 39649 }, { "epoch": 2.220293425915556, "grad_norm": 1.3991725444793701, "learning_rate": 9.483368421052632e-05, "loss": 0.4443, "step": 39650 }, { "epoch": 2.220349423227685, "grad_norm": 1.0671916007995605, "learning_rate": 9.48334210526316e-05, "loss": 0.4276, "step": 39651 }, { "epoch": 2.220405420539814, "grad_norm": 1.4109306335449219, "learning_rate": 9.483315789473684e-05, "loss": 0.409, "step": 39652 }, { "epoch": 2.220461417851943, "grad_norm": 1.2509561777114868, "learning_rate": 9.483289473684211e-05, "loss": 0.4706, "step": 39653 }, { "epoch": 2.220517415164072, "grad_norm": 1.0618574619293213, "learning_rate": 9.483263157894737e-05, "loss": 0.4609, "step": 39654 }, { "epoch": 2.220573412476201, "grad_norm": 1.336771845817566, "learning_rate": 9.483236842105263e-05, "loss": 0.5094, "step": 39655 }, { "epoch": 2.22062940978833, "grad_norm": 1.4276797771453857, "learning_rate": 9.48321052631579e-05, "loss": 0.3624, "step": 39656 }, { "epoch": 2.220685407100459, "grad_norm": 1.7243316173553467, "learning_rate": 9.483184210526315e-05, "loss": 0.4573, "step": 39657 }, { "epoch": 2.220741404412588, "grad_norm": 1.1294041872024536, "learning_rate": 9.483157894736843e-05, "loss": 0.4825, "step": 39658 }, { "epoch": 2.220797401724717, "grad_norm": 0.9865116477012634, "learning_rate": 9.483131578947369e-05, "loss": 0.3246, "step": 39659 }, { "epoch": 2.220853399036846, "grad_norm": 1.2761858701705933, "learning_rate": 9.483105263157896e-05, "loss": 0.3819, "step": 39660 }, { "epoch": 2.220909396348975, "grad_norm": 1.1085267066955566, "learning_rate": 9.48307894736842e-05, "loss": 0.3212, "step": 39661 }, { "epoch": 2.2209653936611042, "grad_norm": 1.7972567081451416, "learning_rate": 9.483052631578948e-05, "loss": 0.6801, "step": 39662 }, { "epoch": 2.2210213909732333, "grad_norm": 1.2082934379577637, "learning_rate": 9.483026315789474e-05, "loss": 0.4397, "step": 39663 }, { "epoch": 2.2210773882853623, "grad_norm": 1.0723037719726562, "learning_rate": 9.483000000000001e-05, "loss": 0.3937, "step": 39664 }, { "epoch": 2.2211333855974913, "grad_norm": 1.058102011680603, "learning_rate": 9.482973684210527e-05, "loss": 0.3651, "step": 39665 }, { "epoch": 2.2211893829096203, "grad_norm": 2.7301034927368164, "learning_rate": 9.482947368421053e-05, "loss": 0.5489, "step": 39666 }, { "epoch": 2.2212453802217493, "grad_norm": 1.2046235799789429, "learning_rate": 9.482921052631579e-05, "loss": 0.4115, "step": 39667 }, { "epoch": 2.2213013775338784, "grad_norm": 1.2329860925674438, "learning_rate": 9.482894736842106e-05, "loss": 0.3013, "step": 39668 }, { "epoch": 2.2213573748460074, "grad_norm": 1.0649844408035278, "learning_rate": 9.482868421052632e-05, "loss": 0.3596, "step": 39669 }, { "epoch": 2.2214133721581364, "grad_norm": 1.4467285871505737, "learning_rate": 9.482842105263158e-05, "loss": 0.3552, "step": 39670 }, { "epoch": 2.2214693694702654, "grad_norm": 1.0206724405288696, "learning_rate": 9.482815789473684e-05, "loss": 0.3166, "step": 39671 }, { "epoch": 2.2215253667823944, "grad_norm": 1.1924445629119873, "learning_rate": 9.48278947368421e-05, "loss": 0.4182, "step": 39672 }, { "epoch": 2.2215813640945234, "grad_norm": 1.177398443222046, "learning_rate": 9.482763157894738e-05, "loss": 0.4786, "step": 39673 }, { "epoch": 2.2216373614066525, "grad_norm": 1.2489194869995117, "learning_rate": 9.482736842105264e-05, "loss": 0.4167, "step": 39674 }, { "epoch": 2.2216933587187815, "grad_norm": 1.434828519821167, "learning_rate": 9.48271052631579e-05, "loss": 0.4523, "step": 39675 }, { "epoch": 2.2217493560309105, "grad_norm": 1.1750794649124146, "learning_rate": 9.482684210526316e-05, "loss": 0.487, "step": 39676 }, { "epoch": 2.2218053533430395, "grad_norm": 1.2070955038070679, "learning_rate": 9.482657894736843e-05, "loss": 0.3655, "step": 39677 }, { "epoch": 2.2218613506551685, "grad_norm": 1.1159030199050903, "learning_rate": 9.482631578947369e-05, "loss": 0.3658, "step": 39678 }, { "epoch": 2.2219173479672976, "grad_norm": 1.3631054162979126, "learning_rate": 9.482605263157895e-05, "loss": 0.4561, "step": 39679 }, { "epoch": 2.2219733452794266, "grad_norm": 1.663633942604065, "learning_rate": 9.482578947368421e-05, "loss": 0.4248, "step": 39680 }, { "epoch": 2.2220293425915556, "grad_norm": 1.2630255222320557, "learning_rate": 9.482552631578948e-05, "loss": 0.3751, "step": 39681 }, { "epoch": 2.2220853399036846, "grad_norm": 1.3706482648849487, "learning_rate": 9.482526315789474e-05, "loss": 0.3752, "step": 39682 }, { "epoch": 2.2221413372158136, "grad_norm": 1.5206102132797241, "learning_rate": 9.482500000000001e-05, "loss": 0.4022, "step": 39683 }, { "epoch": 2.2221973345279427, "grad_norm": 1.2914458513259888, "learning_rate": 9.482473684210526e-05, "loss": 0.4704, "step": 39684 }, { "epoch": 2.2222533318400717, "grad_norm": 0.9695793390274048, "learning_rate": 9.482447368421053e-05, "loss": 0.3478, "step": 39685 }, { "epoch": 2.2223093291522007, "grad_norm": 1.1576822996139526, "learning_rate": 9.48242105263158e-05, "loss": 0.384, "step": 39686 }, { "epoch": 2.2223653264643297, "grad_norm": 1.2858967781066895, "learning_rate": 9.482394736842107e-05, "loss": 0.5366, "step": 39687 }, { "epoch": 2.2224213237764587, "grad_norm": 1.0221651792526245, "learning_rate": 9.482368421052633e-05, "loss": 0.376, "step": 39688 }, { "epoch": 2.2224773210885878, "grad_norm": 1.1719318628311157, "learning_rate": 9.482342105263157e-05, "loss": 0.3026, "step": 39689 }, { "epoch": 2.2225333184007168, "grad_norm": 1.4125910997390747, "learning_rate": 9.482315789473685e-05, "loss": 0.338, "step": 39690 }, { "epoch": 2.222589315712846, "grad_norm": 1.1082124710083008, "learning_rate": 9.48228947368421e-05, "loss": 0.3626, "step": 39691 }, { "epoch": 2.222645313024975, "grad_norm": 1.2429324388504028, "learning_rate": 9.482263157894738e-05, "loss": 0.5297, "step": 39692 }, { "epoch": 2.222701310337104, "grad_norm": 1.2607945203781128, "learning_rate": 9.482236842105263e-05, "loss": 0.4846, "step": 39693 }, { "epoch": 2.222757307649233, "grad_norm": 1.5364521741867065, "learning_rate": 9.48221052631579e-05, "loss": 0.4344, "step": 39694 }, { "epoch": 2.222813304961362, "grad_norm": 1.3926931619644165, "learning_rate": 9.482184210526316e-05, "loss": 0.3599, "step": 39695 }, { "epoch": 2.222869302273491, "grad_norm": 1.4049726724624634, "learning_rate": 9.482157894736843e-05, "loss": 0.3878, "step": 39696 }, { "epoch": 2.22292529958562, "grad_norm": 1.5774027109146118, "learning_rate": 9.482131578947369e-05, "loss": 0.5274, "step": 39697 }, { "epoch": 2.222981296897749, "grad_norm": 1.7156578302383423, "learning_rate": 9.482105263157895e-05, "loss": 0.4011, "step": 39698 }, { "epoch": 2.223037294209878, "grad_norm": 1.2779486179351807, "learning_rate": 9.482078947368421e-05, "loss": 0.4061, "step": 39699 }, { "epoch": 2.223093291522007, "grad_norm": 1.2724679708480835, "learning_rate": 9.482052631578948e-05, "loss": 0.3558, "step": 39700 }, { "epoch": 2.223149288834136, "grad_norm": 1.2752137184143066, "learning_rate": 9.482026315789474e-05, "loss": 0.4258, "step": 39701 }, { "epoch": 2.223205286146265, "grad_norm": 1.357906699180603, "learning_rate": 9.482e-05, "loss": 0.3723, "step": 39702 }, { "epoch": 2.223261283458394, "grad_norm": 1.6095585823059082, "learning_rate": 9.481973684210526e-05, "loss": 0.4974, "step": 39703 }, { "epoch": 2.223317280770523, "grad_norm": 1.19790518283844, "learning_rate": 9.481947368421054e-05, "loss": 0.4012, "step": 39704 }, { "epoch": 2.223373278082652, "grad_norm": 1.2199758291244507, "learning_rate": 9.48192105263158e-05, "loss": 0.5005, "step": 39705 }, { "epoch": 2.223429275394781, "grad_norm": 1.2912514209747314, "learning_rate": 9.481894736842106e-05, "loss": 0.3701, "step": 39706 }, { "epoch": 2.22348527270691, "grad_norm": 1.1865085363388062, "learning_rate": 9.481868421052632e-05, "loss": 0.4453, "step": 39707 }, { "epoch": 2.223541270019039, "grad_norm": 2.6361682415008545, "learning_rate": 9.481842105263158e-05, "loss": 0.3508, "step": 39708 }, { "epoch": 2.223597267331168, "grad_norm": 1.3985697031021118, "learning_rate": 9.481815789473685e-05, "loss": 0.4197, "step": 39709 }, { "epoch": 2.223653264643297, "grad_norm": 1.170430302619934, "learning_rate": 9.481789473684211e-05, "loss": 0.4483, "step": 39710 }, { "epoch": 2.223709261955426, "grad_norm": 1.0090118646621704, "learning_rate": 9.481763157894737e-05, "loss": 0.3575, "step": 39711 }, { "epoch": 2.223765259267555, "grad_norm": 1.0705267190933228, "learning_rate": 9.481736842105263e-05, "loss": 0.348, "step": 39712 }, { "epoch": 2.223821256579684, "grad_norm": 1.1835777759552002, "learning_rate": 9.48171052631579e-05, "loss": 0.4068, "step": 39713 }, { "epoch": 2.2238772538918132, "grad_norm": 1.257071852684021, "learning_rate": 9.481684210526316e-05, "loss": 0.4582, "step": 39714 }, { "epoch": 2.2239332512039423, "grad_norm": 2.141129732131958, "learning_rate": 9.481657894736843e-05, "loss": 0.4844, "step": 39715 }, { "epoch": 2.2239892485160713, "grad_norm": 1.0961389541625977, "learning_rate": 9.481631578947368e-05, "loss": 0.4245, "step": 39716 }, { "epoch": 2.2240452458282003, "grad_norm": 1.2191020250320435, "learning_rate": 9.481605263157895e-05, "loss": 0.4326, "step": 39717 }, { "epoch": 2.2241012431403293, "grad_norm": 1.1012532711029053, "learning_rate": 9.481578947368421e-05, "loss": 0.3926, "step": 39718 }, { "epoch": 2.2241572404524583, "grad_norm": 1.0334488153457642, "learning_rate": 9.481552631578949e-05, "loss": 0.4717, "step": 39719 }, { "epoch": 2.2242132377645873, "grad_norm": 1.1681933403015137, "learning_rate": 9.481526315789475e-05, "loss": 0.3487, "step": 39720 }, { "epoch": 2.2242692350767164, "grad_norm": 1.2991100549697876, "learning_rate": 9.4815e-05, "loss": 0.4101, "step": 39721 }, { "epoch": 2.2243252323888454, "grad_norm": 1.236640214920044, "learning_rate": 9.481473684210527e-05, "loss": 0.5678, "step": 39722 }, { "epoch": 2.2243812297009744, "grad_norm": 1.0643407106399536, "learning_rate": 9.481447368421054e-05, "loss": 0.3461, "step": 39723 }, { "epoch": 2.2244372270131034, "grad_norm": 1.4532872438430786, "learning_rate": 9.48142105263158e-05, "loss": 0.3079, "step": 39724 }, { "epoch": 2.2244932243252324, "grad_norm": 1.170744776725769, "learning_rate": 9.481394736842106e-05, "loss": 0.4741, "step": 39725 }, { "epoch": 2.2245492216373615, "grad_norm": 1.157811164855957, "learning_rate": 9.481368421052632e-05, "loss": 0.3857, "step": 39726 }, { "epoch": 2.2246052189494905, "grad_norm": 1.125324010848999, "learning_rate": 9.481342105263158e-05, "loss": 0.4201, "step": 39727 }, { "epoch": 2.2246612162616195, "grad_norm": 1.4654470682144165, "learning_rate": 9.481315789473685e-05, "loss": 0.3172, "step": 39728 }, { "epoch": 2.2247172135737485, "grad_norm": 1.0262280702590942, "learning_rate": 9.481289473684211e-05, "loss": 0.4554, "step": 39729 }, { "epoch": 2.2247732108858775, "grad_norm": 1.249243974685669, "learning_rate": 9.481263157894737e-05, "loss": 0.3718, "step": 39730 }, { "epoch": 2.2248292081980066, "grad_norm": 1.0469605922698975, "learning_rate": 9.481236842105263e-05, "loss": 0.3856, "step": 39731 }, { "epoch": 2.2248852055101356, "grad_norm": 1.4074395895004272, "learning_rate": 9.48121052631579e-05, "loss": 0.4509, "step": 39732 }, { "epoch": 2.2249412028222646, "grad_norm": 1.3245704174041748, "learning_rate": 9.481184210526316e-05, "loss": 0.478, "step": 39733 }, { "epoch": 2.2249972001343936, "grad_norm": 1.2466191053390503, "learning_rate": 9.481157894736842e-05, "loss": 0.3827, "step": 39734 }, { "epoch": 2.2250531974465226, "grad_norm": 1.398132085800171, "learning_rate": 9.481131578947368e-05, "loss": 0.4098, "step": 39735 }, { "epoch": 2.2251091947586517, "grad_norm": 1.1671637296676636, "learning_rate": 9.481105263157896e-05, "loss": 0.3573, "step": 39736 }, { "epoch": 2.2251651920707807, "grad_norm": 1.032700777053833, "learning_rate": 9.481078947368422e-05, "loss": 0.4409, "step": 39737 }, { "epoch": 2.2252211893829097, "grad_norm": 1.1913005113601685, "learning_rate": 9.481052631578949e-05, "loss": 0.4002, "step": 39738 }, { "epoch": 2.2252771866950387, "grad_norm": 2.002268075942993, "learning_rate": 9.481026315789474e-05, "loss": 0.4355, "step": 39739 }, { "epoch": 2.2253331840071677, "grad_norm": 1.1960407495498657, "learning_rate": 9.481000000000001e-05, "loss": 0.4236, "step": 39740 }, { "epoch": 2.2253891813192967, "grad_norm": 1.2726662158966064, "learning_rate": 9.480973684210527e-05, "loss": 0.3861, "step": 39741 }, { "epoch": 2.2254451786314258, "grad_norm": 1.1637592315673828, "learning_rate": 9.480947368421053e-05, "loss": 0.4101, "step": 39742 }, { "epoch": 2.225501175943555, "grad_norm": 0.940432608127594, "learning_rate": 9.48092105263158e-05, "loss": 0.3208, "step": 39743 }, { "epoch": 2.225557173255684, "grad_norm": 1.6558113098144531, "learning_rate": 9.480894736842105e-05, "loss": 0.3767, "step": 39744 }, { "epoch": 2.225613170567813, "grad_norm": 1.0391144752502441, "learning_rate": 9.480868421052632e-05, "loss": 0.4304, "step": 39745 }, { "epoch": 2.225669167879942, "grad_norm": 1.2625582218170166, "learning_rate": 9.480842105263158e-05, "loss": 0.5164, "step": 39746 }, { "epoch": 2.225725165192071, "grad_norm": 1.1927005052566528, "learning_rate": 9.480815789473685e-05, "loss": 0.4818, "step": 39747 }, { "epoch": 2.2257811625042, "grad_norm": 1.2076629400253296, "learning_rate": 9.48078947368421e-05, "loss": 0.3365, "step": 39748 }, { "epoch": 2.225837159816329, "grad_norm": 1.2277437448501587, "learning_rate": 9.480763157894737e-05, "loss": 0.3782, "step": 39749 }, { "epoch": 2.225893157128458, "grad_norm": 1.1514946222305298, "learning_rate": 9.480736842105263e-05, "loss": 0.4731, "step": 39750 }, { "epoch": 2.225949154440587, "grad_norm": 1.470760703086853, "learning_rate": 9.48071052631579e-05, "loss": 0.4129, "step": 39751 }, { "epoch": 2.226005151752716, "grad_norm": 0.8810442686080933, "learning_rate": 9.480684210526317e-05, "loss": 0.2546, "step": 39752 }, { "epoch": 2.226061149064845, "grad_norm": 1.1701087951660156, "learning_rate": 9.480657894736843e-05, "loss": 0.3746, "step": 39753 }, { "epoch": 2.226117146376974, "grad_norm": 1.3707770109176636, "learning_rate": 9.480631578947369e-05, "loss": 0.362, "step": 39754 }, { "epoch": 2.226173143689103, "grad_norm": 1.1347661018371582, "learning_rate": 9.480605263157896e-05, "loss": 0.3165, "step": 39755 }, { "epoch": 2.226229141001232, "grad_norm": 1.1795978546142578, "learning_rate": 9.480578947368422e-05, "loss": 0.4668, "step": 39756 }, { "epoch": 2.226285138313361, "grad_norm": 1.299878478050232, "learning_rate": 9.480552631578948e-05, "loss": 0.4173, "step": 39757 }, { "epoch": 2.22634113562549, "grad_norm": 0.974886417388916, "learning_rate": 9.480526315789474e-05, "loss": 0.318, "step": 39758 }, { "epoch": 2.226397132937619, "grad_norm": 1.0609098672866821, "learning_rate": 9.4805e-05, "loss": 0.4361, "step": 39759 }, { "epoch": 2.226453130249748, "grad_norm": 1.8939884901046753, "learning_rate": 9.480473684210527e-05, "loss": 0.3306, "step": 39760 }, { "epoch": 2.226509127561877, "grad_norm": 1.3681687116622925, "learning_rate": 9.480447368421053e-05, "loss": 0.426, "step": 39761 }, { "epoch": 2.226565124874006, "grad_norm": 1.3573007583618164, "learning_rate": 9.480421052631579e-05, "loss": 0.5548, "step": 39762 }, { "epoch": 2.226621122186135, "grad_norm": 1.0843397378921509, "learning_rate": 9.480394736842105e-05, "loss": 0.3726, "step": 39763 }, { "epoch": 2.226677119498264, "grad_norm": 1.2007956504821777, "learning_rate": 9.480368421052632e-05, "loss": 0.4688, "step": 39764 }, { "epoch": 2.226733116810393, "grad_norm": 1.2222439050674438, "learning_rate": 9.480342105263158e-05, "loss": 0.3997, "step": 39765 }, { "epoch": 2.2267891141225222, "grad_norm": 1.286411166191101, "learning_rate": 9.480315789473684e-05, "loss": 0.6286, "step": 39766 }, { "epoch": 2.2268451114346512, "grad_norm": 1.085105538368225, "learning_rate": 9.48028947368421e-05, "loss": 0.4206, "step": 39767 }, { "epoch": 2.2269011087467803, "grad_norm": 1.2870198488235474, "learning_rate": 9.480263157894738e-05, "loss": 0.384, "step": 39768 }, { "epoch": 2.2269571060589093, "grad_norm": 1.1900490522384644, "learning_rate": 9.480236842105264e-05, "loss": 0.3915, "step": 39769 }, { "epoch": 2.2270131033710383, "grad_norm": 1.1406409740447998, "learning_rate": 9.480210526315791e-05, "loss": 0.4217, "step": 39770 }, { "epoch": 2.2270691006831673, "grad_norm": 1.1910468339920044, "learning_rate": 9.480184210526316e-05, "loss": 0.3716, "step": 39771 }, { "epoch": 2.2271250979952963, "grad_norm": 1.4051614999771118, "learning_rate": 9.480157894736843e-05, "loss": 0.433, "step": 39772 }, { "epoch": 2.2271810953074254, "grad_norm": 1.2196649312973022, "learning_rate": 9.480131578947369e-05, "loss": 0.3273, "step": 39773 }, { "epoch": 2.2272370926195544, "grad_norm": 1.033158302307129, "learning_rate": 9.480105263157896e-05, "loss": 0.3351, "step": 39774 }, { "epoch": 2.2272930899316834, "grad_norm": 1.2032259702682495, "learning_rate": 9.480078947368422e-05, "loss": 0.3464, "step": 39775 }, { "epoch": 2.2273490872438124, "grad_norm": 1.281961441040039, "learning_rate": 9.480052631578947e-05, "loss": 0.4609, "step": 39776 }, { "epoch": 2.2274050845559414, "grad_norm": 4.507180213928223, "learning_rate": 9.480026315789474e-05, "loss": 0.3777, "step": 39777 }, { "epoch": 2.2274610818680705, "grad_norm": 1.2982254028320312, "learning_rate": 9.48e-05, "loss": 0.4758, "step": 39778 }, { "epoch": 2.2275170791801995, "grad_norm": 1.4358621835708618, "learning_rate": 9.479973684210527e-05, "loss": 0.406, "step": 39779 }, { "epoch": 2.2275730764923285, "grad_norm": 1.2582206726074219, "learning_rate": 9.479947368421053e-05, "loss": 0.4762, "step": 39780 }, { "epoch": 2.2276290738044575, "grad_norm": 1.2534246444702148, "learning_rate": 9.479921052631579e-05, "loss": 0.4386, "step": 39781 }, { "epoch": 2.2276850711165865, "grad_norm": 2.333282232284546, "learning_rate": 9.479894736842105e-05, "loss": 0.3885, "step": 39782 }, { "epoch": 2.2277410684287156, "grad_norm": 1.3362810611724854, "learning_rate": 9.479868421052633e-05, "loss": 0.464, "step": 39783 }, { "epoch": 2.2277970657408446, "grad_norm": 1.3119702339172363, "learning_rate": 9.479842105263159e-05, "loss": 0.6372, "step": 39784 }, { "epoch": 2.2278530630529736, "grad_norm": 1.0718625783920288, "learning_rate": 9.479815789473685e-05, "loss": 0.3374, "step": 39785 }, { "epoch": 2.2279090603651026, "grad_norm": 1.34573495388031, "learning_rate": 9.47978947368421e-05, "loss": 0.3578, "step": 39786 }, { "epoch": 2.2279650576772316, "grad_norm": 1.118297815322876, "learning_rate": 9.479763157894738e-05, "loss": 0.291, "step": 39787 }, { "epoch": 2.2280210549893606, "grad_norm": 1.1476413011550903, "learning_rate": 9.479736842105264e-05, "loss": 0.3062, "step": 39788 }, { "epoch": 2.2280770523014897, "grad_norm": 1.116910696029663, "learning_rate": 9.47971052631579e-05, "loss": 0.4255, "step": 39789 }, { "epoch": 2.2281330496136187, "grad_norm": 1.252384066581726, "learning_rate": 9.479684210526316e-05, "loss": 0.4015, "step": 39790 }, { "epoch": 2.2281890469257477, "grad_norm": 1.2172892093658447, "learning_rate": 9.479657894736843e-05, "loss": 0.4348, "step": 39791 }, { "epoch": 2.2282450442378767, "grad_norm": 1.406664490699768, "learning_rate": 9.479631578947369e-05, "loss": 0.4189, "step": 39792 }, { "epoch": 2.2283010415500057, "grad_norm": 1.3978335857391357, "learning_rate": 9.479605263157895e-05, "loss": 0.3727, "step": 39793 }, { "epoch": 2.2283570388621348, "grad_norm": 1.137801170349121, "learning_rate": 9.479578947368421e-05, "loss": 0.3744, "step": 39794 }, { "epoch": 2.228413036174264, "grad_norm": 1.4093049764633179, "learning_rate": 9.479552631578947e-05, "loss": 0.6216, "step": 39795 }, { "epoch": 2.228469033486393, "grad_norm": 1.2021464109420776, "learning_rate": 9.479526315789474e-05, "loss": 0.3637, "step": 39796 }, { "epoch": 2.228525030798522, "grad_norm": 1.1721134185791016, "learning_rate": 9.4795e-05, "loss": 0.3977, "step": 39797 }, { "epoch": 2.228581028110651, "grad_norm": 0.9753643274307251, "learning_rate": 9.479473684210528e-05, "loss": 0.2699, "step": 39798 }, { "epoch": 2.22863702542278, "grad_norm": 1.1609952449798584, "learning_rate": 9.479447368421052e-05, "loss": 0.4145, "step": 39799 }, { "epoch": 2.228693022734909, "grad_norm": 1.2368007898330688, "learning_rate": 9.47942105263158e-05, "loss": 0.5363, "step": 39800 }, { "epoch": 2.228749020047038, "grad_norm": 0.9663000106811523, "learning_rate": 9.479394736842106e-05, "loss": 0.249, "step": 39801 }, { "epoch": 2.228805017359167, "grad_norm": 1.2016947269439697, "learning_rate": 9.479368421052633e-05, "loss": 0.3795, "step": 39802 }, { "epoch": 2.228861014671296, "grad_norm": 1.0489708185195923, "learning_rate": 9.479342105263157e-05, "loss": 0.3504, "step": 39803 }, { "epoch": 2.228917011983425, "grad_norm": 0.964414656162262, "learning_rate": 9.479315789473685e-05, "loss": 0.3381, "step": 39804 }, { "epoch": 2.228973009295554, "grad_norm": 1.1644946336746216, "learning_rate": 9.479289473684211e-05, "loss": 0.3507, "step": 39805 }, { "epoch": 2.229029006607683, "grad_norm": 1.5401169061660767, "learning_rate": 9.479263157894738e-05, "loss": 0.4503, "step": 39806 }, { "epoch": 2.229085003919812, "grad_norm": 1.1361335515975952, "learning_rate": 9.479236842105264e-05, "loss": 0.4594, "step": 39807 }, { "epoch": 2.229141001231941, "grad_norm": 1.2504023313522339, "learning_rate": 9.47921052631579e-05, "loss": 0.3669, "step": 39808 }, { "epoch": 2.22919699854407, "grad_norm": 1.2397444248199463, "learning_rate": 9.479184210526316e-05, "loss": 0.2995, "step": 39809 }, { "epoch": 2.229252995856199, "grad_norm": 1.14100980758667, "learning_rate": 9.479157894736842e-05, "loss": 0.3375, "step": 39810 }, { "epoch": 2.229308993168328, "grad_norm": 1.2952340841293335, "learning_rate": 9.47913157894737e-05, "loss": 0.4742, "step": 39811 }, { "epoch": 2.229364990480457, "grad_norm": 1.1374386548995972, "learning_rate": 9.479105263157895e-05, "loss": 0.385, "step": 39812 }, { "epoch": 2.229420987792586, "grad_norm": 1.1735543012619019, "learning_rate": 9.479078947368421e-05, "loss": 0.4437, "step": 39813 }, { "epoch": 2.229476985104715, "grad_norm": 1.3908323049545288, "learning_rate": 9.479052631578947e-05, "loss": 0.4092, "step": 39814 }, { "epoch": 2.229532982416844, "grad_norm": 1.7981433868408203, "learning_rate": 9.479026315789475e-05, "loss": 0.3561, "step": 39815 }, { "epoch": 2.229588979728973, "grad_norm": 1.3346965312957764, "learning_rate": 9.479e-05, "loss": 0.4796, "step": 39816 }, { "epoch": 2.229644977041102, "grad_norm": 1.8139790296554565, "learning_rate": 9.478973684210527e-05, "loss": 0.3339, "step": 39817 }, { "epoch": 2.2297009743532312, "grad_norm": 1.2758008241653442, "learning_rate": 9.478947368421053e-05, "loss": 0.4748, "step": 39818 }, { "epoch": 2.2297569716653602, "grad_norm": 1.1799649000167847, "learning_rate": 9.47892105263158e-05, "loss": 0.3774, "step": 39819 }, { "epoch": 2.2298129689774893, "grad_norm": 1.022952675819397, "learning_rate": 9.478894736842106e-05, "loss": 0.3983, "step": 39820 }, { "epoch": 2.2298689662896183, "grad_norm": 1.3135348558425903, "learning_rate": 9.478868421052632e-05, "loss": 0.3605, "step": 39821 }, { "epoch": 2.2299249636017473, "grad_norm": 1.201823115348816, "learning_rate": 9.478842105263158e-05, "loss": 0.3748, "step": 39822 }, { "epoch": 2.2299809609138763, "grad_norm": 1.1420440673828125, "learning_rate": 9.478815789473685e-05, "loss": 0.4037, "step": 39823 }, { "epoch": 2.2300369582260053, "grad_norm": 1.2136605978012085, "learning_rate": 9.478789473684211e-05, "loss": 0.3921, "step": 39824 }, { "epoch": 2.2300929555381344, "grad_norm": 1.9451848268508911, "learning_rate": 9.478763157894738e-05, "loss": 0.5273, "step": 39825 }, { "epoch": 2.2301489528502634, "grad_norm": 1.184277892112732, "learning_rate": 9.478736842105263e-05, "loss": 0.31, "step": 39826 }, { "epoch": 2.2302049501623924, "grad_norm": 1.0472304821014404, "learning_rate": 9.478710526315789e-05, "loss": 0.4147, "step": 39827 }, { "epoch": 2.2302609474745214, "grad_norm": 1.1181080341339111, "learning_rate": 9.478684210526316e-05, "loss": 0.398, "step": 39828 }, { "epoch": 2.2303169447866504, "grad_norm": 1.3620513677597046, "learning_rate": 9.478657894736842e-05, "loss": 0.4324, "step": 39829 }, { "epoch": 2.2303729420987795, "grad_norm": 1.1015474796295166, "learning_rate": 9.47863157894737e-05, "loss": 0.4513, "step": 39830 }, { "epoch": 2.2304289394109085, "grad_norm": 1.3489550352096558, "learning_rate": 9.478605263157894e-05, "loss": 0.3804, "step": 39831 }, { "epoch": 2.2304849367230375, "grad_norm": 1.2574396133422852, "learning_rate": 9.478578947368422e-05, "loss": 0.3538, "step": 39832 }, { "epoch": 2.2305409340351665, "grad_norm": 1.5626978874206543, "learning_rate": 9.478552631578948e-05, "loss": 0.3698, "step": 39833 }, { "epoch": 2.2305969313472955, "grad_norm": 1.0399703979492188, "learning_rate": 9.478526315789475e-05, "loss": 0.4223, "step": 39834 }, { "epoch": 2.2306529286594245, "grad_norm": 1.1375476121902466, "learning_rate": 9.478500000000001e-05, "loss": 0.5115, "step": 39835 }, { "epoch": 2.2307089259715536, "grad_norm": 1.6256465911865234, "learning_rate": 9.478473684210527e-05, "loss": 0.4987, "step": 39836 }, { "epoch": 2.2307649232836826, "grad_norm": 1.2412623167037964, "learning_rate": 9.478447368421053e-05, "loss": 0.3802, "step": 39837 }, { "epoch": 2.2308209205958116, "grad_norm": 1.166092038154602, "learning_rate": 9.47842105263158e-05, "loss": 0.4695, "step": 39838 }, { "epoch": 2.2308769179079406, "grad_norm": 1.1605539321899414, "learning_rate": 9.478394736842106e-05, "loss": 0.4198, "step": 39839 }, { "epoch": 2.2309329152200696, "grad_norm": 1.2228286266326904, "learning_rate": 9.478368421052632e-05, "loss": 0.4252, "step": 39840 }, { "epoch": 2.230988912532198, "grad_norm": 0.9917532801628113, "learning_rate": 9.478342105263158e-05, "loss": 0.3579, "step": 39841 }, { "epoch": 2.2310449098443277, "grad_norm": 1.3003849983215332, "learning_rate": 9.478315789473685e-05, "loss": 0.3199, "step": 39842 }, { "epoch": 2.2311009071564563, "grad_norm": 1.5952361822128296, "learning_rate": 9.478289473684211e-05, "loss": 0.3744, "step": 39843 }, { "epoch": 2.2311569044685857, "grad_norm": 1.2072569131851196, "learning_rate": 9.478263157894737e-05, "loss": 0.5561, "step": 39844 }, { "epoch": 2.2312129017807143, "grad_norm": 1.322372555732727, "learning_rate": 9.478236842105263e-05, "loss": 0.4333, "step": 39845 }, { "epoch": 2.2312688990928438, "grad_norm": 1.1622461080551147, "learning_rate": 9.478210526315789e-05, "loss": 0.4416, "step": 39846 }, { "epoch": 2.2313248964049723, "grad_norm": 20.153928756713867, "learning_rate": 9.478184210526317e-05, "loss": 0.3336, "step": 39847 }, { "epoch": 2.231380893717102, "grad_norm": 1.6961157321929932, "learning_rate": 9.478157894736843e-05, "loss": 0.4616, "step": 39848 }, { "epoch": 2.2314368910292304, "grad_norm": 1.068688988685608, "learning_rate": 9.478131578947368e-05, "loss": 0.4314, "step": 39849 }, { "epoch": 2.23149288834136, "grad_norm": 1.40049147605896, "learning_rate": 9.478105263157894e-05, "loss": 0.3476, "step": 39850 }, { "epoch": 2.2315488856534884, "grad_norm": 1.318068504333496, "learning_rate": 9.478078947368422e-05, "loss": 0.381, "step": 39851 }, { "epoch": 2.231604882965618, "grad_norm": 1.2210620641708374, "learning_rate": 9.478052631578948e-05, "loss": 0.3842, "step": 39852 }, { "epoch": 2.2316608802777465, "grad_norm": 1.0520250797271729, "learning_rate": 9.478026315789474e-05, "loss": 0.3699, "step": 39853 }, { "epoch": 2.231716877589876, "grad_norm": 1.6190332174301147, "learning_rate": 9.478e-05, "loss": 0.4258, "step": 39854 }, { "epoch": 2.2317728749020045, "grad_norm": 1.3141025304794312, "learning_rate": 9.477973684210527e-05, "loss": 0.3708, "step": 39855 }, { "epoch": 2.231828872214134, "grad_norm": 1.2058528661727905, "learning_rate": 9.477947368421053e-05, "loss": 0.3888, "step": 39856 }, { "epoch": 2.2318848695262625, "grad_norm": 1.110175371170044, "learning_rate": 9.47792105263158e-05, "loss": 0.3429, "step": 39857 }, { "epoch": 2.231940866838392, "grad_norm": 1.2571953535079956, "learning_rate": 9.477894736842105e-05, "loss": 0.2947, "step": 39858 }, { "epoch": 2.2319968641505206, "grad_norm": 1.224549651145935, "learning_rate": 9.477868421052632e-05, "loss": 0.348, "step": 39859 }, { "epoch": 2.23205286146265, "grad_norm": 1.143818736076355, "learning_rate": 9.477842105263158e-05, "loss": 0.4194, "step": 39860 }, { "epoch": 2.2321088587747786, "grad_norm": 1.4943702220916748, "learning_rate": 9.477815789473686e-05, "loss": 0.4468, "step": 39861 }, { "epoch": 2.2321648560869076, "grad_norm": 1.168671727180481, "learning_rate": 9.477789473684212e-05, "loss": 0.4272, "step": 39862 }, { "epoch": 2.2322208533990366, "grad_norm": 1.1281013488769531, "learning_rate": 9.477763157894736e-05, "loss": 0.4344, "step": 39863 }, { "epoch": 2.2322768507111657, "grad_norm": 1.409969687461853, "learning_rate": 9.477736842105264e-05, "loss": 0.4032, "step": 39864 }, { "epoch": 2.2323328480232947, "grad_norm": 1.2034969329833984, "learning_rate": 9.47771052631579e-05, "loss": 0.4199, "step": 39865 }, { "epoch": 2.2323888453354237, "grad_norm": 1.4730056524276733, "learning_rate": 9.477684210526317e-05, "loss": 0.5229, "step": 39866 }, { "epoch": 2.2324448426475527, "grad_norm": 1.0847047567367554, "learning_rate": 9.477657894736843e-05, "loss": 0.4607, "step": 39867 }, { "epoch": 2.2325008399596817, "grad_norm": 1.590936303138733, "learning_rate": 9.477631578947369e-05, "loss": 0.4402, "step": 39868 }, { "epoch": 2.2325568372718108, "grad_norm": 1.1979938745498657, "learning_rate": 9.477605263157895e-05, "loss": 0.4177, "step": 39869 }, { "epoch": 2.2326128345839398, "grad_norm": 1.2597712278366089, "learning_rate": 9.477578947368422e-05, "loss": 0.5459, "step": 39870 }, { "epoch": 2.232668831896069, "grad_norm": 1.1838299036026, "learning_rate": 9.477552631578948e-05, "loss": 0.3987, "step": 39871 }, { "epoch": 2.232724829208198, "grad_norm": 1.2514230012893677, "learning_rate": 9.477526315789474e-05, "loss": 0.3958, "step": 39872 }, { "epoch": 2.232780826520327, "grad_norm": 1.5621144771575928, "learning_rate": 9.4775e-05, "loss": 0.3974, "step": 39873 }, { "epoch": 2.232836823832456, "grad_norm": 1.7287871837615967, "learning_rate": 9.477473684210527e-05, "loss": 0.5622, "step": 39874 }, { "epoch": 2.232892821144585, "grad_norm": 1.0921645164489746, "learning_rate": 9.477447368421053e-05, "loss": 0.337, "step": 39875 }, { "epoch": 2.232948818456714, "grad_norm": 1.237606406211853, "learning_rate": 9.477421052631579e-05, "loss": 0.4316, "step": 39876 }, { "epoch": 2.233004815768843, "grad_norm": 1.2418818473815918, "learning_rate": 9.477394736842105e-05, "loss": 0.4315, "step": 39877 }, { "epoch": 2.233060813080972, "grad_norm": 1.1087077856063843, "learning_rate": 9.477368421052633e-05, "loss": 0.3977, "step": 39878 }, { "epoch": 2.233116810393101, "grad_norm": 118.37350463867188, "learning_rate": 9.477342105263159e-05, "loss": 0.5059, "step": 39879 }, { "epoch": 2.23317280770523, "grad_norm": 1.2595902681350708, "learning_rate": 9.477315789473684e-05, "loss": 0.5157, "step": 39880 }, { "epoch": 2.233228805017359, "grad_norm": 1.1393039226531982, "learning_rate": 9.47728947368421e-05, "loss": 0.5149, "step": 39881 }, { "epoch": 2.233284802329488, "grad_norm": 1.2310459613800049, "learning_rate": 9.477263157894736e-05, "loss": 0.3994, "step": 39882 }, { "epoch": 2.233340799641617, "grad_norm": 1.1022762060165405, "learning_rate": 9.477236842105264e-05, "loss": 0.348, "step": 39883 }, { "epoch": 2.233396796953746, "grad_norm": 1.0758891105651855, "learning_rate": 9.47721052631579e-05, "loss": 0.4134, "step": 39884 }, { "epoch": 2.233452794265875, "grad_norm": 1.349063515663147, "learning_rate": 9.477184210526317e-05, "loss": 0.6077, "step": 39885 }, { "epoch": 2.233508791578004, "grad_norm": 1.756942868232727, "learning_rate": 9.477157894736842e-05, "loss": 0.5718, "step": 39886 }, { "epoch": 2.233564788890133, "grad_norm": 1.3244483470916748, "learning_rate": 9.477131578947369e-05, "loss": 0.3469, "step": 39887 }, { "epoch": 2.233620786202262, "grad_norm": 1.1796387434005737, "learning_rate": 9.477105263157895e-05, "loss": 0.457, "step": 39888 }, { "epoch": 2.233676783514391, "grad_norm": 1.3100197315216064, "learning_rate": 9.477078947368422e-05, "loss": 0.4546, "step": 39889 }, { "epoch": 2.23373278082652, "grad_norm": 1.0772711038589478, "learning_rate": 9.477052631578948e-05, "loss": 0.312, "step": 39890 }, { "epoch": 2.233788778138649, "grad_norm": 1.1480755805969238, "learning_rate": 9.477026315789474e-05, "loss": 0.3906, "step": 39891 }, { "epoch": 2.233844775450778, "grad_norm": 1.4124020338058472, "learning_rate": 9.477e-05, "loss": 0.3352, "step": 39892 }, { "epoch": 2.233900772762907, "grad_norm": 1.0873157978057861, "learning_rate": 9.476973684210528e-05, "loss": 0.4241, "step": 39893 }, { "epoch": 2.2339567700750362, "grad_norm": 1.2109360694885254, "learning_rate": 9.476947368421054e-05, "loss": 0.3599, "step": 39894 }, { "epoch": 2.2340127673871653, "grad_norm": 1.368592619895935, "learning_rate": 9.47692105263158e-05, "loss": 0.4567, "step": 39895 }, { "epoch": 2.2340687646992943, "grad_norm": 1.1569875478744507, "learning_rate": 9.476894736842105e-05, "loss": 0.3024, "step": 39896 }, { "epoch": 2.2341247620114233, "grad_norm": 1.054959774017334, "learning_rate": 9.476868421052631e-05, "loss": 0.417, "step": 39897 }, { "epoch": 2.2341807593235523, "grad_norm": 1.1425422430038452, "learning_rate": 9.476842105263159e-05, "loss": 0.3577, "step": 39898 }, { "epoch": 2.2342367566356813, "grad_norm": 1.1021727323532104, "learning_rate": 9.476815789473685e-05, "loss": 0.4438, "step": 39899 }, { "epoch": 2.2342927539478104, "grad_norm": 1.489829182624817, "learning_rate": 9.476789473684211e-05, "loss": 0.4864, "step": 39900 }, { "epoch": 2.2343487512599394, "grad_norm": 1.2514277696609497, "learning_rate": 9.476763157894737e-05, "loss": 0.3668, "step": 39901 }, { "epoch": 2.2344047485720684, "grad_norm": 1.1762288808822632, "learning_rate": 9.476736842105264e-05, "loss": 0.47, "step": 39902 }, { "epoch": 2.2344607458841974, "grad_norm": 1.1313693523406982, "learning_rate": 9.47671052631579e-05, "loss": 0.3328, "step": 39903 }, { "epoch": 2.2345167431963264, "grad_norm": 1.2157329320907593, "learning_rate": 9.476684210526316e-05, "loss": 0.5045, "step": 39904 }, { "epoch": 2.2345727405084554, "grad_norm": 1.1704727411270142, "learning_rate": 9.476657894736842e-05, "loss": 0.3563, "step": 39905 }, { "epoch": 2.2346287378205845, "grad_norm": 1.3883517980575562, "learning_rate": 9.476631578947369e-05, "loss": 0.3895, "step": 39906 }, { "epoch": 2.2346847351327135, "grad_norm": 1.3662335872650146, "learning_rate": 9.476605263157895e-05, "loss": 0.5314, "step": 39907 }, { "epoch": 2.2347407324448425, "grad_norm": 1.518214464187622, "learning_rate": 9.476578947368421e-05, "loss": 0.4034, "step": 39908 }, { "epoch": 2.2347967297569715, "grad_norm": 1.1782102584838867, "learning_rate": 9.476552631578947e-05, "loss": 0.3626, "step": 39909 }, { "epoch": 2.2348527270691005, "grad_norm": 1.2837440967559814, "learning_rate": 9.476526315789475e-05, "loss": 0.4021, "step": 39910 }, { "epoch": 2.2349087243812296, "grad_norm": 1.225203037261963, "learning_rate": 9.4765e-05, "loss": 0.4332, "step": 39911 }, { "epoch": 2.2349647216933586, "grad_norm": 0.9944750070571899, "learning_rate": 9.476473684210528e-05, "loss": 0.3155, "step": 39912 }, { "epoch": 2.2350207190054876, "grad_norm": 1.1196249723434448, "learning_rate": 9.476447368421052e-05, "loss": 0.4272, "step": 39913 }, { "epoch": 2.2350767163176166, "grad_norm": 1.8444879055023193, "learning_rate": 9.476421052631578e-05, "loss": 0.6322, "step": 39914 }, { "epoch": 2.2351327136297456, "grad_norm": 1.1092880964279175, "learning_rate": 9.476394736842106e-05, "loss": 0.3613, "step": 39915 }, { "epoch": 2.2351887109418747, "grad_norm": 1.4292500019073486, "learning_rate": 9.476368421052632e-05, "loss": 0.4643, "step": 39916 }, { "epoch": 2.2352447082540037, "grad_norm": 1.431752324104309, "learning_rate": 9.476342105263159e-05, "loss": 0.4556, "step": 39917 }, { "epoch": 2.2353007055661327, "grad_norm": 1.2649872303009033, "learning_rate": 9.476315789473684e-05, "loss": 0.4774, "step": 39918 }, { "epoch": 2.2353567028782617, "grad_norm": 1.07740318775177, "learning_rate": 9.476289473684211e-05, "loss": 0.4053, "step": 39919 }, { "epoch": 2.2354127001903907, "grad_norm": 1.1882028579711914, "learning_rate": 9.476263157894737e-05, "loss": 0.3765, "step": 39920 }, { "epoch": 2.2354686975025198, "grad_norm": 1.1409432888031006, "learning_rate": 9.476236842105264e-05, "loss": 0.5393, "step": 39921 }, { "epoch": 2.2355246948146488, "grad_norm": 1.3845298290252686, "learning_rate": 9.47621052631579e-05, "loss": 0.3877, "step": 39922 }, { "epoch": 2.235580692126778, "grad_norm": 1.367463231086731, "learning_rate": 9.476184210526316e-05, "loss": 0.331, "step": 39923 }, { "epoch": 2.235636689438907, "grad_norm": 1.0793558359146118, "learning_rate": 9.476157894736842e-05, "loss": 0.3899, "step": 39924 }, { "epoch": 2.235692686751036, "grad_norm": 1.2754768133163452, "learning_rate": 9.47613157894737e-05, "loss": 0.3708, "step": 39925 }, { "epoch": 2.235748684063165, "grad_norm": 1.3791537284851074, "learning_rate": 9.476105263157896e-05, "loss": 0.6432, "step": 39926 }, { "epoch": 2.235804681375294, "grad_norm": 0.948826014995575, "learning_rate": 9.476078947368421e-05, "loss": 0.3876, "step": 39927 }, { "epoch": 2.235860678687423, "grad_norm": 1.3538728952407837, "learning_rate": 9.476052631578947e-05, "loss": 0.3793, "step": 39928 }, { "epoch": 2.235916675999552, "grad_norm": 1.072240948677063, "learning_rate": 9.476026315789475e-05, "loss": 0.413, "step": 39929 }, { "epoch": 2.235972673311681, "grad_norm": 1.527756929397583, "learning_rate": 9.476000000000001e-05, "loss": 0.3499, "step": 39930 }, { "epoch": 2.23602867062381, "grad_norm": 1.3422818183898926, "learning_rate": 9.475973684210527e-05, "loss": 0.346, "step": 39931 }, { "epoch": 2.236084667935939, "grad_norm": 1.402057409286499, "learning_rate": 9.475947368421053e-05, "loss": 0.4758, "step": 39932 }, { "epoch": 2.236140665248068, "grad_norm": 1.0964053869247437, "learning_rate": 9.475921052631579e-05, "loss": 0.3429, "step": 39933 }, { "epoch": 2.236196662560197, "grad_norm": 1.1753878593444824, "learning_rate": 9.475894736842106e-05, "loss": 0.4079, "step": 39934 }, { "epoch": 2.236252659872326, "grad_norm": 1.103694200515747, "learning_rate": 9.475868421052632e-05, "loss": 0.3765, "step": 39935 }, { "epoch": 2.236308657184455, "grad_norm": 1.4737013578414917, "learning_rate": 9.475842105263158e-05, "loss": 0.4378, "step": 39936 }, { "epoch": 2.236364654496584, "grad_norm": 1.2695138454437256, "learning_rate": 9.475815789473684e-05, "loss": 0.3272, "step": 39937 }, { "epoch": 2.236420651808713, "grad_norm": 1.065146803855896, "learning_rate": 9.475789473684211e-05, "loss": 0.3768, "step": 39938 }, { "epoch": 2.236476649120842, "grad_norm": 1.373531699180603, "learning_rate": 9.475763157894737e-05, "loss": 0.5587, "step": 39939 }, { "epoch": 2.236532646432971, "grad_norm": 1.2843073606491089, "learning_rate": 9.475736842105265e-05, "loss": 0.5004, "step": 39940 }, { "epoch": 2.2365886437451, "grad_norm": 1.2023910284042358, "learning_rate": 9.475710526315789e-05, "loss": 0.2787, "step": 39941 }, { "epoch": 2.236644641057229, "grad_norm": 1.529686689376831, "learning_rate": 9.475684210526316e-05, "loss": 0.5828, "step": 39942 }, { "epoch": 2.236700638369358, "grad_norm": 1.269740104675293, "learning_rate": 9.475657894736842e-05, "loss": 0.4057, "step": 39943 }, { "epoch": 2.236756635681487, "grad_norm": 1.2110581398010254, "learning_rate": 9.47563157894737e-05, "loss": 0.3941, "step": 39944 }, { "epoch": 2.236812632993616, "grad_norm": 1.1980048418045044, "learning_rate": 9.475605263157896e-05, "loss": 0.433, "step": 39945 }, { "epoch": 2.2368686303057452, "grad_norm": 1.1548186540603638, "learning_rate": 9.475578947368422e-05, "loss": 0.3879, "step": 39946 }, { "epoch": 2.2369246276178743, "grad_norm": 1.1750766038894653, "learning_rate": 9.475552631578948e-05, "loss": 0.4583, "step": 39947 }, { "epoch": 2.2369806249300033, "grad_norm": 1.3085960149765015, "learning_rate": 9.475526315789474e-05, "loss": 0.4658, "step": 39948 }, { "epoch": 2.2370366222421323, "grad_norm": Infinity, "learning_rate": 9.475526315789474e-05, "loss": 0.4228, "step": 39949 }, { "epoch": 2.2370926195542613, "grad_norm": 1.4823559522628784, "learning_rate": 9.475500000000001e-05, "loss": 0.2996, "step": 39950 }, { "epoch": 2.2371486168663903, "grad_norm": 1.205166220664978, "learning_rate": 9.475473684210526e-05, "loss": 0.3887, "step": 39951 }, { "epoch": 2.2372046141785193, "grad_norm": 1.1702510118484497, "learning_rate": 9.475447368421053e-05, "loss": 0.351, "step": 39952 }, { "epoch": 2.2372606114906484, "grad_norm": 1.0614465475082397, "learning_rate": 9.475421052631579e-05, "loss": 0.393, "step": 39953 }, { "epoch": 2.2373166088027774, "grad_norm": 1.2083666324615479, "learning_rate": 9.475394736842106e-05, "loss": 0.5644, "step": 39954 }, { "epoch": 2.2373726061149064, "grad_norm": 1.180579662322998, "learning_rate": 9.475368421052632e-05, "loss": 0.3806, "step": 39955 }, { "epoch": 2.2374286034270354, "grad_norm": 1.1403300762176514, "learning_rate": 9.475342105263158e-05, "loss": 0.4456, "step": 39956 }, { "epoch": 2.2374846007391644, "grad_norm": 1.1023598909378052, "learning_rate": 9.475315789473684e-05, "loss": 0.4709, "step": 39957 }, { "epoch": 2.2375405980512935, "grad_norm": 1.2078845500946045, "learning_rate": 9.475289473684212e-05, "loss": 0.3839, "step": 39958 }, { "epoch": 2.2375965953634225, "grad_norm": 1.3416873216629028, "learning_rate": 9.475263157894737e-05, "loss": 0.3914, "step": 39959 }, { "epoch": 2.2376525926755515, "grad_norm": 1.1283265352249146, "learning_rate": 9.475236842105263e-05, "loss": 0.5546, "step": 39960 }, { "epoch": 2.2377085899876805, "grad_norm": 1.2295030355453491, "learning_rate": 9.47521052631579e-05, "loss": 0.516, "step": 39961 }, { "epoch": 2.2377645872998095, "grad_norm": 1.1981478929519653, "learning_rate": 9.475184210526317e-05, "loss": 0.4325, "step": 39962 }, { "epoch": 2.2378205846119386, "grad_norm": 0.8648694753646851, "learning_rate": 9.475157894736843e-05, "loss": 0.2625, "step": 39963 }, { "epoch": 2.2378765819240676, "grad_norm": 1.5383596420288086, "learning_rate": 9.475131578947369e-05, "loss": 0.3375, "step": 39964 }, { "epoch": 2.2379325792361966, "grad_norm": 1.240010380744934, "learning_rate": 9.475105263157895e-05, "loss": 0.5168, "step": 39965 }, { "epoch": 2.2379885765483256, "grad_norm": 1.1895915269851685, "learning_rate": 9.475078947368422e-05, "loss": 0.4048, "step": 39966 }, { "epoch": 2.2380445738604546, "grad_norm": 1.3029874563217163, "learning_rate": 9.475052631578948e-05, "loss": 0.4319, "step": 39967 }, { "epoch": 2.2381005711725837, "grad_norm": 1.290453314781189, "learning_rate": 9.475026315789474e-05, "loss": 0.3719, "step": 39968 }, { "epoch": 2.2381565684847127, "grad_norm": 1.1601901054382324, "learning_rate": 9.475e-05, "loss": 0.2659, "step": 39969 }, { "epoch": 2.2382125657968417, "grad_norm": 1.4407151937484741, "learning_rate": 9.474973684210526e-05, "loss": 0.5851, "step": 39970 }, { "epoch": 2.2382685631089707, "grad_norm": 1.149189829826355, "learning_rate": 9.474947368421053e-05, "loss": 0.5252, "step": 39971 }, { "epoch": 2.2383245604210997, "grad_norm": 1.588834524154663, "learning_rate": 9.474921052631579e-05, "loss": 0.3904, "step": 39972 }, { "epoch": 2.2383805577332287, "grad_norm": 1.182956576347351, "learning_rate": 9.474894736842107e-05, "loss": 0.3434, "step": 39973 }, { "epoch": 2.2384365550453578, "grad_norm": 1.3318060636520386, "learning_rate": 9.474868421052631e-05, "loss": 0.6055, "step": 39974 }, { "epoch": 2.238492552357487, "grad_norm": 1.1993985176086426, "learning_rate": 9.474842105263158e-05, "loss": 0.4704, "step": 39975 }, { "epoch": 2.238548549669616, "grad_norm": 1.2488253116607666, "learning_rate": 9.474815789473684e-05, "loss": 0.408, "step": 39976 }, { "epoch": 2.238604546981745, "grad_norm": 1.151971697807312, "learning_rate": 9.474789473684212e-05, "loss": 0.3721, "step": 39977 }, { "epoch": 2.238660544293874, "grad_norm": 1.1755130290985107, "learning_rate": 9.474763157894738e-05, "loss": 0.4729, "step": 39978 }, { "epoch": 2.238716541606003, "grad_norm": 1.3706068992614746, "learning_rate": 9.474736842105264e-05, "loss": 0.3167, "step": 39979 }, { "epoch": 2.238772538918132, "grad_norm": 1.2356736660003662, "learning_rate": 9.47471052631579e-05, "loss": 0.3977, "step": 39980 }, { "epoch": 2.238828536230261, "grad_norm": 1.4225574731826782, "learning_rate": 9.474684210526317e-05, "loss": 0.3663, "step": 39981 }, { "epoch": 2.23888453354239, "grad_norm": 1.1724600791931152, "learning_rate": 9.474657894736843e-05, "loss": 0.4472, "step": 39982 }, { "epoch": 2.238940530854519, "grad_norm": 1.2882767915725708, "learning_rate": 9.474631578947369e-05, "loss": 0.4334, "step": 39983 }, { "epoch": 2.238996528166648, "grad_norm": 2.4172704219818115, "learning_rate": 9.474605263157895e-05, "loss": 0.3863, "step": 39984 }, { "epoch": 2.239052525478777, "grad_norm": 1.551350474357605, "learning_rate": 9.474578947368421e-05, "loss": 0.4781, "step": 39985 }, { "epoch": 2.239108522790906, "grad_norm": 1.3170713186264038, "learning_rate": 9.474552631578948e-05, "loss": 0.4631, "step": 39986 }, { "epoch": 2.239164520103035, "grad_norm": 1.2197309732437134, "learning_rate": 9.474526315789474e-05, "loss": 0.4882, "step": 39987 }, { "epoch": 2.239220517415164, "grad_norm": 1.1837671995162964, "learning_rate": 9.4745e-05, "loss": 0.3603, "step": 39988 }, { "epoch": 2.239276514727293, "grad_norm": 1.223314881324768, "learning_rate": 9.474473684210526e-05, "loss": 0.3685, "step": 39989 }, { "epoch": 2.239332512039422, "grad_norm": 2.763145685195923, "learning_rate": 9.474447368421053e-05, "loss": 0.4145, "step": 39990 }, { "epoch": 2.239388509351551, "grad_norm": 1.1128594875335693, "learning_rate": 9.47442105263158e-05, "loss": 0.4004, "step": 39991 }, { "epoch": 2.23944450666368, "grad_norm": 1.231052279472351, "learning_rate": 9.474394736842105e-05, "loss": 0.474, "step": 39992 }, { "epoch": 2.239500503975809, "grad_norm": 1.0675373077392578, "learning_rate": 9.474368421052631e-05, "loss": 0.4333, "step": 39993 }, { "epoch": 2.239556501287938, "grad_norm": 1.4970506429672241, "learning_rate": 9.474342105263159e-05, "loss": 0.5533, "step": 39994 }, { "epoch": 2.239612498600067, "grad_norm": 1.4095773696899414, "learning_rate": 9.474315789473685e-05, "loss": 0.3557, "step": 39995 }, { "epoch": 2.239668495912196, "grad_norm": 1.0520011186599731, "learning_rate": 9.474289473684212e-05, "loss": 0.2956, "step": 39996 }, { "epoch": 2.239724493224325, "grad_norm": 1.2317395210266113, "learning_rate": 9.474263157894737e-05, "loss": 0.3938, "step": 39997 }, { "epoch": 2.2397804905364542, "grad_norm": 2.2235002517700195, "learning_rate": 9.474236842105264e-05, "loss": 0.4064, "step": 39998 }, { "epoch": 2.2398364878485832, "grad_norm": 1.1575438976287842, "learning_rate": 9.47421052631579e-05, "loss": 0.4056, "step": 39999 }, { "epoch": 2.2398924851607123, "grad_norm": 1.2392680644989014, "learning_rate": 9.474184210526317e-05, "loss": 0.4986, "step": 40000 }, { "epoch": 2.2399484824728413, "grad_norm": 0.9851133823394775, "learning_rate": 9.474157894736843e-05, "loss": 0.3659, "step": 40001 }, { "epoch": 2.2400044797849703, "grad_norm": 1.1650434732437134, "learning_rate": 9.474131578947368e-05, "loss": 0.3928, "step": 40002 }, { "epoch": 2.2400604770970993, "grad_norm": 1.099534511566162, "learning_rate": 9.474105263157895e-05, "loss": 0.3755, "step": 40003 }, { "epoch": 2.2401164744092283, "grad_norm": 1.2335864305496216, "learning_rate": 9.474078947368421e-05, "loss": 0.5132, "step": 40004 }, { "epoch": 2.2401724717213574, "grad_norm": 1.2972909212112427, "learning_rate": 9.474052631578948e-05, "loss": 0.3832, "step": 40005 }, { "epoch": 2.2402284690334864, "grad_norm": 1.3466465473175049, "learning_rate": 9.474026315789473e-05, "loss": 0.3817, "step": 40006 }, { "epoch": 2.2402844663456154, "grad_norm": 1.375908613204956, "learning_rate": 9.474e-05, "loss": 0.3668, "step": 40007 }, { "epoch": 2.2403404636577444, "grad_norm": 1.1419041156768799, "learning_rate": 9.473973684210526e-05, "loss": 0.3017, "step": 40008 }, { "epoch": 2.2403964609698734, "grad_norm": 1.1640428304672241, "learning_rate": 9.473947368421054e-05, "loss": 0.3712, "step": 40009 }, { "epoch": 2.2404524582820025, "grad_norm": 1.5585756301879883, "learning_rate": 9.47392105263158e-05, "loss": 0.473, "step": 40010 }, { "epoch": 2.2405084555941315, "grad_norm": 1.0717350244522095, "learning_rate": 9.473894736842106e-05, "loss": 0.366, "step": 40011 }, { "epoch": 2.2405644529062605, "grad_norm": 1.302870512008667, "learning_rate": 9.473868421052632e-05, "loss": 0.4169, "step": 40012 }, { "epoch": 2.2406204502183895, "grad_norm": 1.2220220565795898, "learning_rate": 9.473842105263159e-05, "loss": 0.3958, "step": 40013 }, { "epoch": 2.2406764475305185, "grad_norm": 1.4140089750289917, "learning_rate": 9.473815789473685e-05, "loss": 0.4479, "step": 40014 }, { "epoch": 2.2407324448426476, "grad_norm": 1.7141547203063965, "learning_rate": 9.473789473684211e-05, "loss": 0.5872, "step": 40015 }, { "epoch": 2.2407884421547766, "grad_norm": 1.467725157737732, "learning_rate": 9.473763157894737e-05, "loss": 0.3752, "step": 40016 }, { "epoch": 2.2408444394669056, "grad_norm": 1.1830037832260132, "learning_rate": 9.473736842105264e-05, "loss": 0.447, "step": 40017 }, { "epoch": 2.2409004367790346, "grad_norm": 1.0790154933929443, "learning_rate": 9.47371052631579e-05, "loss": 0.4001, "step": 40018 }, { "epoch": 2.2409564340911636, "grad_norm": 1.1032518148422241, "learning_rate": 9.473684210526316e-05, "loss": 0.4083, "step": 40019 }, { "epoch": 2.2410124314032926, "grad_norm": 1.2645224332809448, "learning_rate": 9.473657894736842e-05, "loss": 0.4652, "step": 40020 }, { "epoch": 2.2410684287154217, "grad_norm": 1.3790812492370605, "learning_rate": 9.473631578947368e-05, "loss": 0.3428, "step": 40021 }, { "epoch": 2.2411244260275507, "grad_norm": 1.375135898590088, "learning_rate": 9.473605263157895e-05, "loss": 0.3707, "step": 40022 }, { "epoch": 2.2411804233396797, "grad_norm": 1.0942384004592896, "learning_rate": 9.473578947368421e-05, "loss": 0.3139, "step": 40023 }, { "epoch": 2.2412364206518087, "grad_norm": 1.4653470516204834, "learning_rate": 9.473552631578947e-05, "loss": 0.4462, "step": 40024 }, { "epoch": 2.2412924179639377, "grad_norm": 1.3072495460510254, "learning_rate": 9.473526315789473e-05, "loss": 0.4028, "step": 40025 }, { "epoch": 2.2413484152760668, "grad_norm": 1.0115150213241577, "learning_rate": 9.473500000000001e-05, "loss": 0.2975, "step": 40026 }, { "epoch": 2.241404412588196, "grad_norm": 1.0322843790054321, "learning_rate": 9.473473684210527e-05, "loss": 0.2533, "step": 40027 }, { "epoch": 2.241460409900325, "grad_norm": 1.1061087846755981, "learning_rate": 9.473447368421054e-05, "loss": 0.3353, "step": 40028 }, { "epoch": 2.241516407212454, "grad_norm": 1.1929560899734497, "learning_rate": 9.473421052631579e-05, "loss": 0.2924, "step": 40029 }, { "epoch": 2.241572404524583, "grad_norm": 1.4066029787063599, "learning_rate": 9.473394736842106e-05, "loss": 0.401, "step": 40030 }, { "epoch": 2.241628401836712, "grad_norm": 1.2358721494674683, "learning_rate": 9.473368421052632e-05, "loss": 0.4111, "step": 40031 }, { "epoch": 2.241684399148841, "grad_norm": 1.1237452030181885, "learning_rate": 9.473342105263159e-05, "loss": 0.4697, "step": 40032 }, { "epoch": 2.24174039646097, "grad_norm": 1.0696967840194702, "learning_rate": 9.473315789473685e-05, "loss": 0.3536, "step": 40033 }, { "epoch": 2.241796393773099, "grad_norm": 1.179937481880188, "learning_rate": 9.473289473684211e-05, "loss": 0.3543, "step": 40034 }, { "epoch": 2.241852391085228, "grad_norm": 1.2309807538986206, "learning_rate": 9.473263157894737e-05, "loss": 0.3823, "step": 40035 }, { "epoch": 2.241908388397357, "grad_norm": 2.0140416622161865, "learning_rate": 9.473236842105263e-05, "loss": 0.5147, "step": 40036 }, { "epoch": 2.241964385709486, "grad_norm": 1.1582443714141846, "learning_rate": 9.47321052631579e-05, "loss": 0.3891, "step": 40037 }, { "epoch": 2.242020383021615, "grad_norm": 1.3058408498764038, "learning_rate": 9.473184210526316e-05, "loss": 0.4588, "step": 40038 }, { "epoch": 2.242076380333744, "grad_norm": 1.4526749849319458, "learning_rate": 9.473157894736842e-05, "loss": 0.5216, "step": 40039 }, { "epoch": 2.242132377645873, "grad_norm": 1.1712088584899902, "learning_rate": 9.473131578947368e-05, "loss": 0.3896, "step": 40040 }, { "epoch": 2.242188374958002, "grad_norm": 1.3635497093200684, "learning_rate": 9.473105263157896e-05, "loss": 0.3978, "step": 40041 }, { "epoch": 2.242244372270131, "grad_norm": 1.1317417621612549, "learning_rate": 9.473078947368422e-05, "loss": 0.3616, "step": 40042 }, { "epoch": 2.24230036958226, "grad_norm": 1.096992015838623, "learning_rate": 9.473052631578948e-05, "loss": 0.412, "step": 40043 }, { "epoch": 2.242356366894389, "grad_norm": 1.5048686265945435, "learning_rate": 9.473026315789474e-05, "loss": 0.3999, "step": 40044 }, { "epoch": 2.242412364206518, "grad_norm": 1.1228364706039429, "learning_rate": 9.473000000000001e-05, "loss": 0.3503, "step": 40045 }, { "epoch": 2.242468361518647, "grad_norm": 1.1628942489624023, "learning_rate": 9.472973684210527e-05, "loss": 0.403, "step": 40046 }, { "epoch": 2.242524358830776, "grad_norm": 1.1514170169830322, "learning_rate": 9.472947368421053e-05, "loss": 0.3823, "step": 40047 }, { "epoch": 2.242580356142905, "grad_norm": 1.3031609058380127, "learning_rate": 9.472921052631579e-05, "loss": 0.4264, "step": 40048 }, { "epoch": 2.242636353455034, "grad_norm": 1.132583498954773, "learning_rate": 9.472894736842106e-05, "loss": 0.4152, "step": 40049 }, { "epoch": 2.2426923507671632, "grad_norm": 1.7693240642547607, "learning_rate": 9.472868421052632e-05, "loss": 0.387, "step": 40050 }, { "epoch": 2.2427483480792922, "grad_norm": 7.094480991363525, "learning_rate": 9.47284210526316e-05, "loss": 0.4944, "step": 40051 }, { "epoch": 2.2428043453914213, "grad_norm": 1.3027894496917725, "learning_rate": 9.472815789473684e-05, "loss": 0.387, "step": 40052 }, { "epoch": 2.2428603427035503, "grad_norm": 1.1837313175201416, "learning_rate": 9.47278947368421e-05, "loss": 0.3046, "step": 40053 }, { "epoch": 2.2429163400156793, "grad_norm": 1.136681318283081, "learning_rate": 9.472763157894737e-05, "loss": 0.378, "step": 40054 }, { "epoch": 2.2429723373278083, "grad_norm": 1.2850608825683594, "learning_rate": 9.472736842105263e-05, "loss": 0.3835, "step": 40055 }, { "epoch": 2.2430283346399373, "grad_norm": 1.178913950920105, "learning_rate": 9.47271052631579e-05, "loss": 0.4344, "step": 40056 }, { "epoch": 2.2430843319520664, "grad_norm": 1.373615026473999, "learning_rate": 9.472684210526315e-05, "loss": 0.4389, "step": 40057 }, { "epoch": 2.2431403292641954, "grad_norm": 1.1545320749282837, "learning_rate": 9.472657894736843e-05, "loss": 0.3852, "step": 40058 }, { "epoch": 2.2431963265763244, "grad_norm": 1.3827202320098877, "learning_rate": 9.472631578947369e-05, "loss": 0.3811, "step": 40059 }, { "epoch": 2.2432523238884534, "grad_norm": 1.240789532661438, "learning_rate": 9.472605263157896e-05, "loss": 0.3543, "step": 40060 }, { "epoch": 2.2433083212005824, "grad_norm": 1.1640691757202148, "learning_rate": 9.47257894736842e-05, "loss": 0.4291, "step": 40061 }, { "epoch": 2.2433643185127115, "grad_norm": 1.270259141921997, "learning_rate": 9.472552631578948e-05, "loss": 0.3339, "step": 40062 }, { "epoch": 2.2434203158248405, "grad_norm": 1.1392079591751099, "learning_rate": 9.472526315789474e-05, "loss": 0.3278, "step": 40063 }, { "epoch": 2.2434763131369695, "grad_norm": 1.4896596670150757, "learning_rate": 9.472500000000001e-05, "loss": 0.4646, "step": 40064 }, { "epoch": 2.2435323104490985, "grad_norm": 1.2585833072662354, "learning_rate": 9.472473684210527e-05, "loss": 0.4276, "step": 40065 }, { "epoch": 2.2435883077612275, "grad_norm": 1.1571033000946045, "learning_rate": 9.472447368421053e-05, "loss": 0.4073, "step": 40066 }, { "epoch": 2.2436443050733565, "grad_norm": 1.4096288681030273, "learning_rate": 9.472421052631579e-05, "loss": 0.4808, "step": 40067 }, { "epoch": 2.2437003023854856, "grad_norm": 1.2043886184692383, "learning_rate": 9.472394736842106e-05, "loss": 0.3484, "step": 40068 }, { "epoch": 2.2437562996976146, "grad_norm": 1.0547136068344116, "learning_rate": 9.472368421052632e-05, "loss": 0.2978, "step": 40069 }, { "epoch": 2.2438122970097436, "grad_norm": 1.116759181022644, "learning_rate": 9.472342105263158e-05, "loss": 0.3285, "step": 40070 }, { "epoch": 2.2438682943218726, "grad_norm": 1.1092449426651, "learning_rate": 9.472315789473684e-05, "loss": 0.3875, "step": 40071 }, { "epoch": 2.2439242916340016, "grad_norm": 1.1607588529586792, "learning_rate": 9.47228947368421e-05, "loss": 0.4198, "step": 40072 }, { "epoch": 2.2439802889461307, "grad_norm": 1.1589411497116089, "learning_rate": 9.472263157894738e-05, "loss": 0.382, "step": 40073 }, { "epoch": 2.2440362862582597, "grad_norm": 1.0209678411483765, "learning_rate": 9.472236842105264e-05, "loss": 0.446, "step": 40074 }, { "epoch": 2.2440922835703887, "grad_norm": 1.3072453737258911, "learning_rate": 9.47221052631579e-05, "loss": 0.4451, "step": 40075 }, { "epoch": 2.2441482808825177, "grad_norm": 0.9194907546043396, "learning_rate": 9.472184210526316e-05, "loss": 0.3515, "step": 40076 }, { "epoch": 2.2442042781946467, "grad_norm": 1.2015539407730103, "learning_rate": 9.472157894736843e-05, "loss": 0.5375, "step": 40077 }, { "epoch": 2.2442602755067758, "grad_norm": 1.1443856954574585, "learning_rate": 9.472131578947369e-05, "loss": 0.3863, "step": 40078 }, { "epoch": 2.244316272818905, "grad_norm": 1.0491079092025757, "learning_rate": 9.472105263157895e-05, "loss": 0.336, "step": 40079 }, { "epoch": 2.244372270131034, "grad_norm": 1.2625688314437866, "learning_rate": 9.472078947368421e-05, "loss": 0.4045, "step": 40080 }, { "epoch": 2.244428267443163, "grad_norm": 1.3699678182601929, "learning_rate": 9.472052631578948e-05, "loss": 0.451, "step": 40081 }, { "epoch": 2.244484264755292, "grad_norm": 1.2473140954971313, "learning_rate": 9.472026315789474e-05, "loss": 0.306, "step": 40082 }, { "epoch": 2.244540262067421, "grad_norm": 1.1685445308685303, "learning_rate": 9.472000000000001e-05, "loss": 0.3856, "step": 40083 }, { "epoch": 2.24459625937955, "grad_norm": 0.9222745895385742, "learning_rate": 9.471973684210526e-05, "loss": 0.3257, "step": 40084 }, { "epoch": 2.244652256691679, "grad_norm": 1.3740324974060059, "learning_rate": 9.471947368421053e-05, "loss": 0.4357, "step": 40085 }, { "epoch": 2.244708254003808, "grad_norm": 1.3621305227279663, "learning_rate": 9.47192105263158e-05, "loss": 0.4403, "step": 40086 }, { "epoch": 2.244764251315937, "grad_norm": 1.295389175415039, "learning_rate": 9.471894736842107e-05, "loss": 0.3244, "step": 40087 }, { "epoch": 2.244820248628066, "grad_norm": 1.4892868995666504, "learning_rate": 9.471868421052633e-05, "loss": 0.3576, "step": 40088 }, { "epoch": 2.244876245940195, "grad_norm": 1.1565486192703247, "learning_rate": 9.471842105263157e-05, "loss": 0.3935, "step": 40089 }, { "epoch": 2.244932243252324, "grad_norm": 1.5260311365127563, "learning_rate": 9.471815789473685e-05, "loss": 0.4058, "step": 40090 }, { "epoch": 2.244988240564453, "grad_norm": 1.0575448274612427, "learning_rate": 9.47178947368421e-05, "loss": 0.3802, "step": 40091 }, { "epoch": 2.245044237876582, "grad_norm": 1.2589242458343506, "learning_rate": 9.471763157894738e-05, "loss": 0.3704, "step": 40092 }, { "epoch": 2.245100235188711, "grad_norm": 1.0562865734100342, "learning_rate": 9.471736842105264e-05, "loss": 0.336, "step": 40093 }, { "epoch": 2.24515623250084, "grad_norm": 1.2664527893066406, "learning_rate": 9.47171052631579e-05, "loss": 0.5396, "step": 40094 }, { "epoch": 2.245212229812969, "grad_norm": 1.2666444778442383, "learning_rate": 9.471684210526316e-05, "loss": 0.4644, "step": 40095 }, { "epoch": 2.245268227125098, "grad_norm": 1.139235496520996, "learning_rate": 9.471657894736843e-05, "loss": 0.3456, "step": 40096 }, { "epoch": 2.245324224437227, "grad_norm": 1.319272756576538, "learning_rate": 9.471631578947369e-05, "loss": 0.5157, "step": 40097 }, { "epoch": 2.245380221749356, "grad_norm": 1.3000242710113525, "learning_rate": 9.471605263157895e-05, "loss": 0.4559, "step": 40098 }, { "epoch": 2.245436219061485, "grad_norm": 1.203012228012085, "learning_rate": 9.471578947368421e-05, "loss": 0.3529, "step": 40099 }, { "epoch": 2.245492216373614, "grad_norm": 1.2658575773239136, "learning_rate": 9.471552631578948e-05, "loss": 0.4043, "step": 40100 }, { "epoch": 2.245548213685743, "grad_norm": 1.316209077835083, "learning_rate": 9.471526315789474e-05, "loss": 0.4743, "step": 40101 }, { "epoch": 2.245604210997872, "grad_norm": 0.9757815599441528, "learning_rate": 9.4715e-05, "loss": 0.3781, "step": 40102 }, { "epoch": 2.2456602083100012, "grad_norm": 1.2216956615447998, "learning_rate": 9.471473684210526e-05, "loss": 0.4249, "step": 40103 }, { "epoch": 2.2457162056221303, "grad_norm": 1.0190409421920776, "learning_rate": 9.471447368421054e-05, "loss": 0.3108, "step": 40104 }, { "epoch": 2.2457722029342593, "grad_norm": 1.2755038738250732, "learning_rate": 9.47142105263158e-05, "loss": 0.4462, "step": 40105 }, { "epoch": 2.2458282002463883, "grad_norm": 1.097569465637207, "learning_rate": 9.471394736842106e-05, "loss": 0.3708, "step": 40106 }, { "epoch": 2.2458841975585173, "grad_norm": 1.544690489768982, "learning_rate": 9.471368421052632e-05, "loss": 0.7099, "step": 40107 }, { "epoch": 2.2459401948706463, "grad_norm": 1.197120189666748, "learning_rate": 9.471342105263158e-05, "loss": 0.4852, "step": 40108 }, { "epoch": 2.2459961921827754, "grad_norm": 1.1146851778030396, "learning_rate": 9.471315789473685e-05, "loss": 0.3941, "step": 40109 }, { "epoch": 2.2460521894949044, "grad_norm": 1.3033556938171387, "learning_rate": 9.471289473684211e-05, "loss": 0.4565, "step": 40110 }, { "epoch": 2.2461081868070334, "grad_norm": 1.4440224170684814, "learning_rate": 9.471263157894737e-05, "loss": 0.4238, "step": 40111 }, { "epoch": 2.2461641841191624, "grad_norm": 1.2164455652236938, "learning_rate": 9.471236842105263e-05, "loss": 0.5021, "step": 40112 }, { "epoch": 2.2462201814312914, "grad_norm": 1.3465259075164795, "learning_rate": 9.47121052631579e-05, "loss": 0.3254, "step": 40113 }, { "epoch": 2.2462761787434204, "grad_norm": 1.0940874814987183, "learning_rate": 9.471184210526316e-05, "loss": 0.3525, "step": 40114 }, { "epoch": 2.2463321760555495, "grad_norm": 1.3678523302078247, "learning_rate": 9.471157894736843e-05, "loss": 0.3569, "step": 40115 }, { "epoch": 2.2463881733676785, "grad_norm": 1.0850518941879272, "learning_rate": 9.471131578947368e-05, "loss": 0.4202, "step": 40116 }, { "epoch": 2.2464441706798075, "grad_norm": 1.2148479223251343, "learning_rate": 9.471105263157895e-05, "loss": 0.2912, "step": 40117 }, { "epoch": 2.2465001679919365, "grad_norm": 1.4207943677902222, "learning_rate": 9.471078947368421e-05, "loss": 0.5088, "step": 40118 }, { "epoch": 2.2465561653040655, "grad_norm": 1.3358690738677979, "learning_rate": 9.471052631578949e-05, "loss": 0.3653, "step": 40119 }, { "epoch": 2.2466121626161946, "grad_norm": 1.0093083381652832, "learning_rate": 9.471026315789475e-05, "loss": 0.3814, "step": 40120 }, { "epoch": 2.2466681599283236, "grad_norm": 2.148082733154297, "learning_rate": 9.471e-05, "loss": 0.5028, "step": 40121 }, { "epoch": 2.2467241572404526, "grad_norm": 1.4454413652420044, "learning_rate": 9.470973684210527e-05, "loss": 0.376, "step": 40122 }, { "epoch": 2.2467801545525816, "grad_norm": 1.3570654392242432, "learning_rate": 9.470947368421053e-05, "loss": 0.3734, "step": 40123 }, { "epoch": 2.2468361518647106, "grad_norm": 1.3323249816894531, "learning_rate": 9.47092105263158e-05, "loss": 0.4194, "step": 40124 }, { "epoch": 2.2468921491768397, "grad_norm": 1.6291946172714233, "learning_rate": 9.470894736842106e-05, "loss": 0.468, "step": 40125 }, { "epoch": 2.2469481464889687, "grad_norm": 1.167348861694336, "learning_rate": 9.470868421052632e-05, "loss": 0.3023, "step": 40126 }, { "epoch": 2.2470041438010977, "grad_norm": 1.0483983755111694, "learning_rate": 9.470842105263158e-05, "loss": 0.329, "step": 40127 }, { "epoch": 2.2470601411132267, "grad_norm": 1.2004703283309937, "learning_rate": 9.470815789473685e-05, "loss": 0.4033, "step": 40128 }, { "epoch": 2.2471161384253557, "grad_norm": 1.2403535842895508, "learning_rate": 9.470789473684211e-05, "loss": 0.3976, "step": 40129 }, { "epoch": 2.2471721357374848, "grad_norm": 1.2348365783691406, "learning_rate": 9.470763157894737e-05, "loss": 0.3982, "step": 40130 }, { "epoch": 2.2472281330496138, "grad_norm": 1.0892412662506104, "learning_rate": 9.470736842105263e-05, "loss": 0.3602, "step": 40131 }, { "epoch": 2.247284130361743, "grad_norm": 1.4445762634277344, "learning_rate": 9.47071052631579e-05, "loss": 0.3874, "step": 40132 }, { "epoch": 2.247340127673872, "grad_norm": 1.0186032056808472, "learning_rate": 9.470684210526316e-05, "loss": 0.358, "step": 40133 }, { "epoch": 2.247396124986001, "grad_norm": 0.9626522660255432, "learning_rate": 9.470657894736842e-05, "loss": 0.2884, "step": 40134 }, { "epoch": 2.24745212229813, "grad_norm": 1.0658365488052368, "learning_rate": 9.470631578947368e-05, "loss": 0.4058, "step": 40135 }, { "epoch": 2.247508119610259, "grad_norm": 1.147383689880371, "learning_rate": 9.470605263157896e-05, "loss": 0.5104, "step": 40136 }, { "epoch": 2.247564116922388, "grad_norm": 1.7567428350448608, "learning_rate": 9.470578947368422e-05, "loss": 0.4483, "step": 40137 }, { "epoch": 2.247620114234517, "grad_norm": 1.270355224609375, "learning_rate": 9.470552631578949e-05, "loss": 0.3691, "step": 40138 }, { "epoch": 2.247676111546646, "grad_norm": 1.3380851745605469, "learning_rate": 9.470526315789474e-05, "loss": 0.3938, "step": 40139 }, { "epoch": 2.247732108858775, "grad_norm": 1.293134093284607, "learning_rate": 9.4705e-05, "loss": 0.4256, "step": 40140 }, { "epoch": 2.247788106170904, "grad_norm": 1.433640718460083, "learning_rate": 9.470473684210527e-05, "loss": 0.5922, "step": 40141 }, { "epoch": 2.247844103483033, "grad_norm": 1.2602661848068237, "learning_rate": 9.470447368421053e-05, "loss": 0.4579, "step": 40142 }, { "epoch": 2.247900100795162, "grad_norm": 1.115237832069397, "learning_rate": 9.47042105263158e-05, "loss": 0.3688, "step": 40143 }, { "epoch": 2.247956098107291, "grad_norm": 1.0808993577957153, "learning_rate": 9.470394736842105e-05, "loss": 0.5651, "step": 40144 }, { "epoch": 2.24801209541942, "grad_norm": 1.1818220615386963, "learning_rate": 9.470368421052632e-05, "loss": 0.3476, "step": 40145 }, { "epoch": 2.248068092731549, "grad_norm": 1.3889023065567017, "learning_rate": 9.470342105263158e-05, "loss": 0.3479, "step": 40146 }, { "epoch": 2.248124090043678, "grad_norm": 1.410182237625122, "learning_rate": 9.470315789473685e-05, "loss": 0.5013, "step": 40147 }, { "epoch": 2.248180087355807, "grad_norm": 1.351880669593811, "learning_rate": 9.470289473684211e-05, "loss": 0.5259, "step": 40148 }, { "epoch": 2.248236084667936, "grad_norm": 1.4977388381958008, "learning_rate": 9.470263157894737e-05, "loss": 0.485, "step": 40149 }, { "epoch": 2.248292081980065, "grad_norm": 1.0728261470794678, "learning_rate": 9.470236842105263e-05, "loss": 0.3275, "step": 40150 }, { "epoch": 2.248348079292194, "grad_norm": 1.4816807508468628, "learning_rate": 9.47021052631579e-05, "loss": 0.4071, "step": 40151 }, { "epoch": 2.248404076604323, "grad_norm": 1.1847679615020752, "learning_rate": 9.470184210526317e-05, "loss": 0.5818, "step": 40152 }, { "epoch": 2.248460073916452, "grad_norm": 1.2820178270339966, "learning_rate": 9.470157894736843e-05, "loss": 0.4254, "step": 40153 }, { "epoch": 2.248516071228581, "grad_norm": 1.3149336576461792, "learning_rate": 9.470131578947369e-05, "loss": 0.505, "step": 40154 }, { "epoch": 2.2485720685407102, "grad_norm": 1.2017433643341064, "learning_rate": 9.470105263157896e-05, "loss": 0.2538, "step": 40155 }, { "epoch": 2.2486280658528393, "grad_norm": 1.2277536392211914, "learning_rate": 9.470078947368422e-05, "loss": 0.4682, "step": 40156 }, { "epoch": 2.2486840631649683, "grad_norm": 1.5223044157028198, "learning_rate": 9.470052631578948e-05, "loss": 0.4615, "step": 40157 }, { "epoch": 2.2487400604770973, "grad_norm": 1.10268235206604, "learning_rate": 9.470026315789474e-05, "loss": 0.4101, "step": 40158 }, { "epoch": 2.2487960577892263, "grad_norm": 1.0358903408050537, "learning_rate": 9.47e-05, "loss": 0.3913, "step": 40159 }, { "epoch": 2.2488520551013553, "grad_norm": 1.1859701871871948, "learning_rate": 9.469973684210527e-05, "loss": 0.4007, "step": 40160 }, { "epoch": 2.2489080524134843, "grad_norm": 1.2227067947387695, "learning_rate": 9.469947368421053e-05, "loss": 0.4533, "step": 40161 }, { "epoch": 2.2489640497256134, "grad_norm": 1.1936038732528687, "learning_rate": 9.469921052631579e-05, "loss": 0.4481, "step": 40162 }, { "epoch": 2.2490200470377424, "grad_norm": 1.214200496673584, "learning_rate": 9.469894736842105e-05, "loss": 0.3699, "step": 40163 }, { "epoch": 2.2490760443498714, "grad_norm": 1.3284720182418823, "learning_rate": 9.469868421052632e-05, "loss": 0.3362, "step": 40164 }, { "epoch": 2.2491320416620004, "grad_norm": 1.2022281885147095, "learning_rate": 9.469842105263158e-05, "loss": 0.4207, "step": 40165 }, { "epoch": 2.2491880389741294, "grad_norm": 1.1746233701705933, "learning_rate": 9.469815789473684e-05, "loss": 0.4223, "step": 40166 }, { "epoch": 2.2492440362862585, "grad_norm": 1.3932158946990967, "learning_rate": 9.46978947368421e-05, "loss": 0.3824, "step": 40167 }, { "epoch": 2.2493000335983875, "grad_norm": 1.443080186843872, "learning_rate": 9.469763157894738e-05, "loss": 0.3932, "step": 40168 }, { "epoch": 2.2493560309105165, "grad_norm": 1.0938193798065186, "learning_rate": 9.469736842105264e-05, "loss": 0.3954, "step": 40169 }, { "epoch": 2.2494120282226455, "grad_norm": 1.5580129623413086, "learning_rate": 9.469710526315791e-05, "loss": 0.5555, "step": 40170 }, { "epoch": 2.2494680255347745, "grad_norm": 1.2123533487319946, "learning_rate": 9.469684210526316e-05, "loss": 0.4642, "step": 40171 }, { "epoch": 2.249524022846903, "grad_norm": 1.0812416076660156, "learning_rate": 9.469657894736843e-05, "loss": 0.3178, "step": 40172 }, { "epoch": 2.2495800201590326, "grad_norm": 1.3480777740478516, "learning_rate": 9.469631578947369e-05, "loss": 0.4252, "step": 40173 }, { "epoch": 2.249636017471161, "grad_norm": 1.2388789653778076, "learning_rate": 9.469605263157895e-05, "loss": 0.3595, "step": 40174 }, { "epoch": 2.2496920147832906, "grad_norm": 2.250688076019287, "learning_rate": 9.469578947368422e-05, "loss": 0.3539, "step": 40175 }, { "epoch": 2.249748012095419, "grad_norm": 1.377152919769287, "learning_rate": 9.469552631578947e-05, "loss": 0.3855, "step": 40176 }, { "epoch": 2.2498040094075487, "grad_norm": 1.3823412656784058, "learning_rate": 9.469526315789474e-05, "loss": 0.4807, "step": 40177 }, { "epoch": 2.2498600067196772, "grad_norm": 1.090590238571167, "learning_rate": 9.4695e-05, "loss": 0.3301, "step": 40178 }, { "epoch": 2.2499160040318067, "grad_norm": 1.5525349378585815, "learning_rate": 9.469473684210527e-05, "loss": 0.6467, "step": 40179 }, { "epoch": 2.2499720013439353, "grad_norm": 1.1669665575027466, "learning_rate": 9.469447368421053e-05, "loss": 0.3872, "step": 40180 }, { "epoch": 2.2500279986560647, "grad_norm": 1.456902027130127, "learning_rate": 9.46942105263158e-05, "loss": 0.3851, "step": 40181 }, { "epoch": 2.2500839959681933, "grad_norm": 1.1647282838821411, "learning_rate": 9.469394736842105e-05, "loss": 0.4387, "step": 40182 }, { "epoch": 2.2501399932803228, "grad_norm": 1.1139322519302368, "learning_rate": 9.469368421052633e-05, "loss": 0.3383, "step": 40183 }, { "epoch": 2.2501959905924513, "grad_norm": 1.3475444316864014, "learning_rate": 9.469342105263159e-05, "loss": 0.3843, "step": 40184 }, { "epoch": 2.250251987904581, "grad_norm": 1.1799030303955078, "learning_rate": 9.469315789473685e-05, "loss": 0.429, "step": 40185 }, { "epoch": 2.2503079852167094, "grad_norm": 1.2678601741790771, "learning_rate": 9.46928947368421e-05, "loss": 0.4107, "step": 40186 }, { "epoch": 2.250363982528839, "grad_norm": 1.0544410943984985, "learning_rate": 9.469263157894738e-05, "loss": 0.3521, "step": 40187 }, { "epoch": 2.2504199798409674, "grad_norm": 1.2537574768066406, "learning_rate": 9.469236842105264e-05, "loss": 0.3666, "step": 40188 }, { "epoch": 2.250475977153097, "grad_norm": 1.1284570693969727, "learning_rate": 9.46921052631579e-05, "loss": 0.3437, "step": 40189 }, { "epoch": 2.2505319744652255, "grad_norm": 2.3447911739349365, "learning_rate": 9.469184210526316e-05, "loss": 0.4281, "step": 40190 }, { "epoch": 2.250587971777355, "grad_norm": 1.790476679801941, "learning_rate": 9.469157894736842e-05, "loss": 0.5354, "step": 40191 }, { "epoch": 2.2506439690894835, "grad_norm": 1.093313455581665, "learning_rate": 9.469131578947369e-05, "loss": 0.4061, "step": 40192 }, { "epoch": 2.250699966401613, "grad_norm": 1.2129058837890625, "learning_rate": 9.469105263157895e-05, "loss": 0.3507, "step": 40193 }, { "epoch": 2.2507559637137415, "grad_norm": 3.002373456954956, "learning_rate": 9.469078947368421e-05, "loss": 0.4381, "step": 40194 }, { "epoch": 2.250811961025871, "grad_norm": 1.1771005392074585, "learning_rate": 9.469052631578947e-05, "loss": 0.3605, "step": 40195 }, { "epoch": 2.2508679583379996, "grad_norm": 1.1493512392044067, "learning_rate": 9.469026315789474e-05, "loss": 0.4266, "step": 40196 }, { "epoch": 2.250923955650129, "grad_norm": 1.136483907699585, "learning_rate": 9.469e-05, "loss": 0.4038, "step": 40197 }, { "epoch": 2.2509799529622576, "grad_norm": 1.11309814453125, "learning_rate": 9.468973684210528e-05, "loss": 0.359, "step": 40198 }, { "epoch": 2.251035950274387, "grad_norm": 1.3423651456832886, "learning_rate": 9.468947368421052e-05, "loss": 0.5096, "step": 40199 }, { "epoch": 2.2510919475865157, "grad_norm": 1.347076177597046, "learning_rate": 9.46892105263158e-05, "loss": 0.6856, "step": 40200 }, { "epoch": 2.251147944898645, "grad_norm": 1.193207859992981, "learning_rate": 9.468894736842106e-05, "loss": 0.4806, "step": 40201 }, { "epoch": 2.2512039422107737, "grad_norm": 1.0027910470962524, "learning_rate": 9.468868421052633e-05, "loss": 0.3848, "step": 40202 }, { "epoch": 2.2512599395229027, "grad_norm": 1.05929434299469, "learning_rate": 9.468842105263158e-05, "loss": 0.3007, "step": 40203 }, { "epoch": 2.2513159368350317, "grad_norm": 1.8180017471313477, "learning_rate": 9.468815789473685e-05, "loss": 0.4107, "step": 40204 }, { "epoch": 2.2513719341471607, "grad_norm": 1.191176176071167, "learning_rate": 9.468789473684211e-05, "loss": 0.3798, "step": 40205 }, { "epoch": 2.2514279314592898, "grad_norm": 1.7255076169967651, "learning_rate": 9.468763157894738e-05, "loss": 0.3711, "step": 40206 }, { "epoch": 2.251483928771419, "grad_norm": 1.1258769035339355, "learning_rate": 9.468736842105264e-05, "loss": 0.4427, "step": 40207 }, { "epoch": 2.251539926083548, "grad_norm": 1.1882961988449097, "learning_rate": 9.46871052631579e-05, "loss": 0.4297, "step": 40208 }, { "epoch": 2.251595923395677, "grad_norm": 1.151963710784912, "learning_rate": 9.468684210526316e-05, "loss": 0.4622, "step": 40209 }, { "epoch": 2.251651920707806, "grad_norm": 1.1382732391357422, "learning_rate": 9.468657894736842e-05, "loss": 0.425, "step": 40210 }, { "epoch": 2.251707918019935, "grad_norm": 1.2088879346847534, "learning_rate": 9.46863157894737e-05, "loss": 0.4749, "step": 40211 }, { "epoch": 2.251763915332064, "grad_norm": 1.764775276184082, "learning_rate": 9.468605263157895e-05, "loss": 0.6663, "step": 40212 }, { "epoch": 2.251819912644193, "grad_norm": 1.2178705930709839, "learning_rate": 9.468578947368421e-05, "loss": 0.3944, "step": 40213 }, { "epoch": 2.251875909956322, "grad_norm": 1.1462750434875488, "learning_rate": 9.468552631578947e-05, "loss": 0.3637, "step": 40214 }, { "epoch": 2.251931907268451, "grad_norm": 1.1129882335662842, "learning_rate": 9.468526315789475e-05, "loss": 0.3065, "step": 40215 }, { "epoch": 2.25198790458058, "grad_norm": 1.1299878358840942, "learning_rate": 9.4685e-05, "loss": 0.3973, "step": 40216 }, { "epoch": 2.252043901892709, "grad_norm": 1.2472347021102905, "learning_rate": 9.468473684210527e-05, "loss": 0.4698, "step": 40217 }, { "epoch": 2.252099899204838, "grad_norm": 1.0947084426879883, "learning_rate": 9.468447368421053e-05, "loss": 0.3481, "step": 40218 }, { "epoch": 2.252155896516967, "grad_norm": 1.1644936800003052, "learning_rate": 9.46842105263158e-05, "loss": 0.4058, "step": 40219 }, { "epoch": 2.252211893829096, "grad_norm": 1.2407244443893433, "learning_rate": 9.468394736842106e-05, "loss": 0.2998, "step": 40220 }, { "epoch": 2.252267891141225, "grad_norm": 1.096522569656372, "learning_rate": 9.468368421052632e-05, "loss": 0.3788, "step": 40221 }, { "epoch": 2.252323888453354, "grad_norm": 1.0371482372283936, "learning_rate": 9.468342105263158e-05, "loss": 0.392, "step": 40222 }, { "epoch": 2.252379885765483, "grad_norm": 1.0562124252319336, "learning_rate": 9.468315789473685e-05, "loss": 0.3896, "step": 40223 }, { "epoch": 2.252435883077612, "grad_norm": 1.2212586402893066, "learning_rate": 9.468289473684211e-05, "loss": 0.4116, "step": 40224 }, { "epoch": 2.252491880389741, "grad_norm": 1.3088864088058472, "learning_rate": 9.468263157894738e-05, "loss": 0.3429, "step": 40225 }, { "epoch": 2.25254787770187, "grad_norm": 1.155081033706665, "learning_rate": 9.468236842105263e-05, "loss": 0.423, "step": 40226 }, { "epoch": 2.252603875013999, "grad_norm": 1.1861810684204102, "learning_rate": 9.468210526315789e-05, "loss": 0.3501, "step": 40227 }, { "epoch": 2.252659872326128, "grad_norm": 1.2616063356399536, "learning_rate": 9.468184210526316e-05, "loss": 0.397, "step": 40228 }, { "epoch": 2.252715869638257, "grad_norm": 1.175142526626587, "learning_rate": 9.468157894736842e-05, "loss": 0.2777, "step": 40229 }, { "epoch": 2.2527718669503862, "grad_norm": 1.2170854806900024, "learning_rate": 9.46813157894737e-05, "loss": 0.3856, "step": 40230 }, { "epoch": 2.2528278642625152, "grad_norm": 1.0473250150680542, "learning_rate": 9.468105263157894e-05, "loss": 0.3207, "step": 40231 }, { "epoch": 2.2528838615746443, "grad_norm": 1.2947707176208496, "learning_rate": 9.468078947368422e-05, "loss": 0.3708, "step": 40232 }, { "epoch": 2.2529398588867733, "grad_norm": 1.213927149772644, "learning_rate": 9.468052631578948e-05, "loss": 0.3919, "step": 40233 }, { "epoch": 2.2529958561989023, "grad_norm": 1.257867693901062, "learning_rate": 9.468026315789475e-05, "loss": 0.4879, "step": 40234 }, { "epoch": 2.2530518535110313, "grad_norm": 1.1220237016677856, "learning_rate": 9.468000000000001e-05, "loss": 0.3626, "step": 40235 }, { "epoch": 2.2531078508231603, "grad_norm": 1.1531041860580444, "learning_rate": 9.467973684210527e-05, "loss": 0.3942, "step": 40236 }, { "epoch": 2.2531638481352894, "grad_norm": 1.3512048721313477, "learning_rate": 9.467947368421053e-05, "loss": 0.461, "step": 40237 }, { "epoch": 2.2532198454474184, "grad_norm": 1.1911240816116333, "learning_rate": 9.46792105263158e-05, "loss": 0.4034, "step": 40238 }, { "epoch": 2.2532758427595474, "grad_norm": 1.36972975730896, "learning_rate": 9.467894736842106e-05, "loss": 0.4378, "step": 40239 }, { "epoch": 2.2533318400716764, "grad_norm": 1.178636908531189, "learning_rate": 9.467868421052632e-05, "loss": 0.3935, "step": 40240 }, { "epoch": 2.2533878373838054, "grad_norm": 1.2232637405395508, "learning_rate": 9.467842105263158e-05, "loss": 0.5019, "step": 40241 }, { "epoch": 2.2534438346959345, "grad_norm": 1.074726939201355, "learning_rate": 9.467815789473685e-05, "loss": 0.3484, "step": 40242 }, { "epoch": 2.2534998320080635, "grad_norm": 1.0410679578781128, "learning_rate": 9.467789473684211e-05, "loss": 0.3195, "step": 40243 }, { "epoch": 2.2535558293201925, "grad_norm": 1.1891978979110718, "learning_rate": 9.467763157894737e-05, "loss": 0.4273, "step": 40244 }, { "epoch": 2.2536118266323215, "grad_norm": 1.1611050367355347, "learning_rate": 9.467736842105263e-05, "loss": 0.3517, "step": 40245 }, { "epoch": 2.2536678239444505, "grad_norm": 1.3147691488265991, "learning_rate": 9.467710526315789e-05, "loss": 0.4235, "step": 40246 }, { "epoch": 2.2537238212565796, "grad_norm": 1.5211437940597534, "learning_rate": 9.467684210526317e-05, "loss": 0.3539, "step": 40247 }, { "epoch": 2.2537798185687086, "grad_norm": 1.1932721138000488, "learning_rate": 9.467657894736843e-05, "loss": 0.4261, "step": 40248 }, { "epoch": 2.2538358158808376, "grad_norm": 1.2296850681304932, "learning_rate": 9.467631578947369e-05, "loss": 0.4086, "step": 40249 }, { "epoch": 2.2538918131929666, "grad_norm": 1.157375693321228, "learning_rate": 9.467605263157894e-05, "loss": 0.3681, "step": 40250 }, { "epoch": 2.2539478105050956, "grad_norm": 1.4234784841537476, "learning_rate": 9.467578947368422e-05, "loss": 0.4119, "step": 40251 }, { "epoch": 2.2540038078172246, "grad_norm": 1.0864088535308838, "learning_rate": 9.467552631578948e-05, "loss": 0.3212, "step": 40252 }, { "epoch": 2.2540598051293537, "grad_norm": 1.1244655847549438, "learning_rate": 9.467526315789475e-05, "loss": 0.3245, "step": 40253 }, { "epoch": 2.2541158024414827, "grad_norm": 1.0265889167785645, "learning_rate": 9.4675e-05, "loss": 0.3804, "step": 40254 }, { "epoch": 2.2541717997536117, "grad_norm": 1.337349534034729, "learning_rate": 9.467473684210527e-05, "loss": 0.4125, "step": 40255 }, { "epoch": 2.2542277970657407, "grad_norm": 1.4713741540908813, "learning_rate": 9.467447368421053e-05, "loss": 0.4126, "step": 40256 }, { "epoch": 2.2542837943778697, "grad_norm": 1.6377911567687988, "learning_rate": 9.46742105263158e-05, "loss": 0.5054, "step": 40257 }, { "epoch": 2.2543397916899988, "grad_norm": 1.0416444540023804, "learning_rate": 9.467394736842105e-05, "loss": 0.3019, "step": 40258 }, { "epoch": 2.254395789002128, "grad_norm": 1.5094548463821411, "learning_rate": 9.467368421052632e-05, "loss": 0.5026, "step": 40259 }, { "epoch": 2.254451786314257, "grad_norm": 1.0562242269515991, "learning_rate": 9.467342105263158e-05, "loss": 0.3072, "step": 40260 }, { "epoch": 2.254507783626386, "grad_norm": 1.186156988143921, "learning_rate": 9.467315789473684e-05, "loss": 0.3499, "step": 40261 }, { "epoch": 2.254563780938515, "grad_norm": 1.5425621271133423, "learning_rate": 9.467289473684212e-05, "loss": 0.3366, "step": 40262 }, { "epoch": 2.254619778250644, "grad_norm": 1.4950631856918335, "learning_rate": 9.467263157894736e-05, "loss": 0.4938, "step": 40263 }, { "epoch": 2.254675775562773, "grad_norm": 1.323747158050537, "learning_rate": 9.467236842105264e-05, "loss": 0.3857, "step": 40264 }, { "epoch": 2.254731772874902, "grad_norm": 1.1153520345687866, "learning_rate": 9.46721052631579e-05, "loss": 0.3747, "step": 40265 }, { "epoch": 2.254787770187031, "grad_norm": 1.0667577981948853, "learning_rate": 9.467184210526317e-05, "loss": 0.3097, "step": 40266 }, { "epoch": 2.25484376749916, "grad_norm": 1.3149584531784058, "learning_rate": 9.467157894736843e-05, "loss": 0.4503, "step": 40267 }, { "epoch": 2.254899764811289, "grad_norm": 1.2549396753311157, "learning_rate": 9.467131578947369e-05, "loss": 0.369, "step": 40268 }, { "epoch": 2.254955762123418, "grad_norm": 1.2764784097671509, "learning_rate": 9.467105263157895e-05, "loss": 0.4124, "step": 40269 }, { "epoch": 2.255011759435547, "grad_norm": 1.1376515626907349, "learning_rate": 9.467078947368422e-05, "loss": 0.4005, "step": 40270 }, { "epoch": 2.255067756747676, "grad_norm": 1.490296483039856, "learning_rate": 9.467052631578948e-05, "loss": 0.3302, "step": 40271 }, { "epoch": 2.255123754059805, "grad_norm": 2.2487077713012695, "learning_rate": 9.467026315789474e-05, "loss": 0.3226, "step": 40272 }, { "epoch": 2.255179751371934, "grad_norm": 1.6115819215774536, "learning_rate": 9.467e-05, "loss": 0.4234, "step": 40273 }, { "epoch": 2.255235748684063, "grad_norm": 8.818623542785645, "learning_rate": 9.466973684210527e-05, "loss": 0.4371, "step": 40274 }, { "epoch": 2.255291745996192, "grad_norm": 1.279303789138794, "learning_rate": 9.466947368421053e-05, "loss": 0.4366, "step": 40275 }, { "epoch": 2.255347743308321, "grad_norm": 1.1217670440673828, "learning_rate": 9.466921052631579e-05, "loss": 0.3366, "step": 40276 }, { "epoch": 2.25540374062045, "grad_norm": 1.648451805114746, "learning_rate": 9.466894736842105e-05, "loss": 0.4528, "step": 40277 }, { "epoch": 2.255459737932579, "grad_norm": 1.1057618856430054, "learning_rate": 9.466868421052631e-05, "loss": 0.3194, "step": 40278 }, { "epoch": 2.255515735244708, "grad_norm": 1.3770878314971924, "learning_rate": 9.466842105263159e-05, "loss": 0.4615, "step": 40279 }, { "epoch": 2.255571732556837, "grad_norm": 1.1005779504776, "learning_rate": 9.466815789473685e-05, "loss": 0.364, "step": 40280 }, { "epoch": 2.255627729868966, "grad_norm": 1.367957353591919, "learning_rate": 9.46678947368421e-05, "loss": 0.4468, "step": 40281 }, { "epoch": 2.255683727181095, "grad_norm": 1.3481450080871582, "learning_rate": 9.466763157894736e-05, "loss": 0.461, "step": 40282 }, { "epoch": 2.2557397244932242, "grad_norm": 1.098899245262146, "learning_rate": 9.466736842105264e-05, "loss": 0.4193, "step": 40283 }, { "epoch": 2.2557957218053533, "grad_norm": 1.4182229042053223, "learning_rate": 9.46671052631579e-05, "loss": 0.4216, "step": 40284 }, { "epoch": 2.2558517191174823, "grad_norm": 3.334995746612549, "learning_rate": 9.466684210526317e-05, "loss": 0.5931, "step": 40285 }, { "epoch": 2.2559077164296113, "grad_norm": 1.3568634986877441, "learning_rate": 9.466657894736842e-05, "loss": 0.442, "step": 40286 }, { "epoch": 2.2559637137417403, "grad_norm": 0.9288855791091919, "learning_rate": 9.466631578947369e-05, "loss": 0.3407, "step": 40287 }, { "epoch": 2.2560197110538693, "grad_norm": 1.2287007570266724, "learning_rate": 9.466605263157895e-05, "loss": 0.3445, "step": 40288 }, { "epoch": 2.2560757083659984, "grad_norm": 0.9728140830993652, "learning_rate": 9.466578947368422e-05, "loss": 0.3269, "step": 40289 }, { "epoch": 2.2561317056781274, "grad_norm": 0.9737794995307922, "learning_rate": 9.466552631578948e-05, "loss": 0.3516, "step": 40290 }, { "epoch": 2.2561877029902564, "grad_norm": 1.1371206045150757, "learning_rate": 9.466526315789474e-05, "loss": 0.4607, "step": 40291 }, { "epoch": 2.2562437003023854, "grad_norm": 1.1774191856384277, "learning_rate": 9.4665e-05, "loss": 0.4708, "step": 40292 }, { "epoch": 2.2562996976145144, "grad_norm": 1.1353883743286133, "learning_rate": 9.466473684210528e-05, "loss": 0.539, "step": 40293 }, { "epoch": 2.2563556949266435, "grad_norm": 1.2183647155761719, "learning_rate": 9.466447368421054e-05, "loss": 0.2985, "step": 40294 }, { "epoch": 2.2564116922387725, "grad_norm": 2.177011728286743, "learning_rate": 9.46642105263158e-05, "loss": 0.4216, "step": 40295 }, { "epoch": 2.2564676895509015, "grad_norm": 1.1379708051681519, "learning_rate": 9.466394736842105e-05, "loss": 0.4859, "step": 40296 }, { "epoch": 2.2565236868630305, "grad_norm": 1.1925945281982422, "learning_rate": 9.466368421052631e-05, "loss": 0.4342, "step": 40297 }, { "epoch": 2.2565796841751595, "grad_norm": 1.1074904203414917, "learning_rate": 9.466342105263159e-05, "loss": 0.469, "step": 40298 }, { "epoch": 2.2566356814872885, "grad_norm": 1.2941091060638428, "learning_rate": 9.466315789473685e-05, "loss": 0.412, "step": 40299 }, { "epoch": 2.2566916787994176, "grad_norm": 1.2547640800476074, "learning_rate": 9.466289473684211e-05, "loss": 0.3441, "step": 40300 }, { "epoch": 2.2567476761115466, "grad_norm": 1.072322964668274, "learning_rate": 9.466263157894737e-05, "loss": 0.4617, "step": 40301 }, { "epoch": 2.2568036734236756, "grad_norm": 1.2649054527282715, "learning_rate": 9.466236842105264e-05, "loss": 0.4337, "step": 40302 }, { "epoch": 2.2568596707358046, "grad_norm": 1.1271538734436035, "learning_rate": 9.46621052631579e-05, "loss": 0.4349, "step": 40303 }, { "epoch": 2.2569156680479336, "grad_norm": 1.3438583612442017, "learning_rate": 9.466184210526316e-05, "loss": 0.4786, "step": 40304 }, { "epoch": 2.2569716653600627, "grad_norm": 0.9407152533531189, "learning_rate": 9.466157894736842e-05, "loss": 0.2909, "step": 40305 }, { "epoch": 2.2570276626721917, "grad_norm": 1.1129287481307983, "learning_rate": 9.466131578947369e-05, "loss": 0.3138, "step": 40306 }, { "epoch": 2.2570836599843207, "grad_norm": 1.1620537042617798, "learning_rate": 9.466105263157895e-05, "loss": 0.3559, "step": 40307 }, { "epoch": 2.2571396572964497, "grad_norm": 1.3283755779266357, "learning_rate": 9.466078947368423e-05, "loss": 0.4498, "step": 40308 }, { "epoch": 2.2571956546085787, "grad_norm": 1.0311189889907837, "learning_rate": 9.466052631578947e-05, "loss": 0.3601, "step": 40309 }, { "epoch": 2.2572516519207078, "grad_norm": 1.1707583665847778, "learning_rate": 9.466026315789475e-05, "loss": 0.426, "step": 40310 }, { "epoch": 2.2573076492328368, "grad_norm": 1.4164403676986694, "learning_rate": 9.466e-05, "loss": 0.4428, "step": 40311 }, { "epoch": 2.257363646544966, "grad_norm": 1.3374484777450562, "learning_rate": 9.465973684210526e-05, "loss": 0.5566, "step": 40312 }, { "epoch": 2.257419643857095, "grad_norm": 1.245092511177063, "learning_rate": 9.465947368421052e-05, "loss": 0.4054, "step": 40313 }, { "epoch": 2.257475641169224, "grad_norm": 1.0508105754852295, "learning_rate": 9.465921052631578e-05, "loss": 0.2421, "step": 40314 }, { "epoch": 2.257531638481353, "grad_norm": 1.4905184507369995, "learning_rate": 9.465894736842106e-05, "loss": 0.4139, "step": 40315 }, { "epoch": 2.257587635793482, "grad_norm": 1.2063349485397339, "learning_rate": 9.465868421052632e-05, "loss": 0.3856, "step": 40316 }, { "epoch": 2.257643633105611, "grad_norm": 1.3707929849624634, "learning_rate": 9.465842105263159e-05, "loss": 0.5647, "step": 40317 }, { "epoch": 2.25769963041774, "grad_norm": 1.251119613647461, "learning_rate": 9.465815789473684e-05, "loss": 0.5111, "step": 40318 }, { "epoch": 2.257755627729869, "grad_norm": 1.135854959487915, "learning_rate": 9.465789473684211e-05, "loss": 0.3353, "step": 40319 }, { "epoch": 2.257811625041998, "grad_norm": 1.2801518440246582, "learning_rate": 9.465763157894737e-05, "loss": 0.4317, "step": 40320 }, { "epoch": 2.257867622354127, "grad_norm": 1.2633455991744995, "learning_rate": 9.465736842105264e-05, "loss": 0.4702, "step": 40321 }, { "epoch": 2.257923619666256, "grad_norm": 1.1757678985595703, "learning_rate": 9.46571052631579e-05, "loss": 0.3274, "step": 40322 }, { "epoch": 2.257979616978385, "grad_norm": 1.1845859289169312, "learning_rate": 9.465684210526316e-05, "loss": 0.3826, "step": 40323 }, { "epoch": 2.258035614290514, "grad_norm": 1.0530648231506348, "learning_rate": 9.465657894736842e-05, "loss": 0.3302, "step": 40324 }, { "epoch": 2.258091611602643, "grad_norm": 1.1436012983322144, "learning_rate": 9.46563157894737e-05, "loss": 0.4458, "step": 40325 }, { "epoch": 2.258147608914772, "grad_norm": 1.3375295400619507, "learning_rate": 9.465605263157896e-05, "loss": 0.4548, "step": 40326 }, { "epoch": 2.258203606226901, "grad_norm": 1.704437494277954, "learning_rate": 9.465578947368421e-05, "loss": 0.4973, "step": 40327 }, { "epoch": 2.25825960353903, "grad_norm": 1.1635740995407104, "learning_rate": 9.465552631578947e-05, "loss": 0.3054, "step": 40328 }, { "epoch": 2.258315600851159, "grad_norm": 1.2312188148498535, "learning_rate": 9.465526315789475e-05, "loss": 0.3454, "step": 40329 }, { "epoch": 2.258371598163288, "grad_norm": 1.2998744249343872, "learning_rate": 9.465500000000001e-05, "loss": 0.4072, "step": 40330 }, { "epoch": 2.258427595475417, "grad_norm": 1.1218258142471313, "learning_rate": 9.465473684210527e-05, "loss": 0.4692, "step": 40331 }, { "epoch": 2.258483592787546, "grad_norm": 1.1797640323638916, "learning_rate": 9.465447368421053e-05, "loss": 0.3704, "step": 40332 }, { "epoch": 2.258539590099675, "grad_norm": 1.1451855897903442, "learning_rate": 9.465421052631579e-05, "loss": 0.3644, "step": 40333 }, { "epoch": 2.258595587411804, "grad_norm": 1.065679907798767, "learning_rate": 9.465394736842106e-05, "loss": 0.4223, "step": 40334 }, { "epoch": 2.2586515847239332, "grad_norm": 1.1771676540374756, "learning_rate": 9.465368421052632e-05, "loss": 0.3959, "step": 40335 }, { "epoch": 2.2587075820360623, "grad_norm": 1.444936990737915, "learning_rate": 9.465342105263158e-05, "loss": 0.6433, "step": 40336 }, { "epoch": 2.2587635793481913, "grad_norm": 1.2181119918823242, "learning_rate": 9.465315789473684e-05, "loss": 0.3704, "step": 40337 }, { "epoch": 2.2588195766603203, "grad_norm": 1.3059271574020386, "learning_rate": 9.465289473684211e-05, "loss": 0.3059, "step": 40338 }, { "epoch": 2.2588755739724493, "grad_norm": 1.0524498224258423, "learning_rate": 9.465263157894737e-05, "loss": 0.3513, "step": 40339 }, { "epoch": 2.2589315712845783, "grad_norm": 1.3416804075241089, "learning_rate": 9.465236842105265e-05, "loss": 0.3838, "step": 40340 }, { "epoch": 2.2589875685967074, "grad_norm": 1.1031877994537354, "learning_rate": 9.465210526315789e-05, "loss": 0.4401, "step": 40341 }, { "epoch": 2.2590435659088364, "grad_norm": 1.066445231437683, "learning_rate": 9.465184210526317e-05, "loss": 0.3803, "step": 40342 }, { "epoch": 2.2590995632209654, "grad_norm": 1.0381954908370972, "learning_rate": 9.465157894736842e-05, "loss": 0.3713, "step": 40343 }, { "epoch": 2.2591555605330944, "grad_norm": 1.3082499504089355, "learning_rate": 9.46513157894737e-05, "loss": 0.5798, "step": 40344 }, { "epoch": 2.2592115578452234, "grad_norm": 1.2993909120559692, "learning_rate": 9.465105263157896e-05, "loss": 0.4107, "step": 40345 }, { "epoch": 2.2592675551573524, "grad_norm": 1.0972895622253418, "learning_rate": 9.465078947368422e-05, "loss": 0.4262, "step": 40346 }, { "epoch": 2.2593235524694815, "grad_norm": 1.192597508430481, "learning_rate": 9.465052631578948e-05, "loss": 0.3273, "step": 40347 }, { "epoch": 2.2593795497816105, "grad_norm": 1.2875967025756836, "learning_rate": 9.465026315789474e-05, "loss": 0.3857, "step": 40348 }, { "epoch": 2.2594355470937395, "grad_norm": 1.0806066989898682, "learning_rate": 9.465000000000001e-05, "loss": 0.3981, "step": 40349 }, { "epoch": 2.2594915444058685, "grad_norm": 1.3211942911148071, "learning_rate": 9.464973684210527e-05, "loss": 0.5012, "step": 40350 }, { "epoch": 2.2595475417179975, "grad_norm": 1.0952271223068237, "learning_rate": 9.464947368421053e-05, "loss": 0.3828, "step": 40351 }, { "epoch": 2.2596035390301266, "grad_norm": 1.0847833156585693, "learning_rate": 9.464921052631579e-05, "loss": 0.448, "step": 40352 }, { "epoch": 2.2596595363422556, "grad_norm": 1.0694583654403687, "learning_rate": 9.464894736842106e-05, "loss": 0.3886, "step": 40353 }, { "epoch": 2.2597155336543846, "grad_norm": 0.9983018040657043, "learning_rate": 9.464868421052632e-05, "loss": 0.3799, "step": 40354 }, { "epoch": 2.2597715309665136, "grad_norm": 1.216844916343689, "learning_rate": 9.464842105263158e-05, "loss": 0.4187, "step": 40355 }, { "epoch": 2.2598275282786426, "grad_norm": 1.175490140914917, "learning_rate": 9.464815789473684e-05, "loss": 0.3263, "step": 40356 }, { "epoch": 2.2598835255907717, "grad_norm": 1.2974120378494263, "learning_rate": 9.464789473684212e-05, "loss": 0.3872, "step": 40357 }, { "epoch": 2.2599395229029007, "grad_norm": 1.2057908773422241, "learning_rate": 9.464763157894737e-05, "loss": 0.4072, "step": 40358 }, { "epoch": 2.2599955202150297, "grad_norm": 1.2761505842208862, "learning_rate": 9.464736842105263e-05, "loss": 0.382, "step": 40359 }, { "epoch": 2.2600515175271587, "grad_norm": 1.1110893487930298, "learning_rate": 9.46471052631579e-05, "loss": 0.3967, "step": 40360 }, { "epoch": 2.2601075148392877, "grad_norm": 1.2542656660079956, "learning_rate": 9.464684210526317e-05, "loss": 0.3321, "step": 40361 }, { "epoch": 2.2601635121514168, "grad_norm": 1.3461135625839233, "learning_rate": 9.464657894736843e-05, "loss": 0.506, "step": 40362 }, { "epoch": 2.2602195094635458, "grad_norm": 3.6581709384918213, "learning_rate": 9.46463157894737e-05, "loss": 0.4516, "step": 40363 }, { "epoch": 2.260275506775675, "grad_norm": 1.1049401760101318, "learning_rate": 9.464605263157895e-05, "loss": 0.2598, "step": 40364 }, { "epoch": 2.260331504087804, "grad_norm": 1.2461767196655273, "learning_rate": 9.46457894736842e-05, "loss": 0.33, "step": 40365 }, { "epoch": 2.260387501399933, "grad_norm": 1.1858155727386475, "learning_rate": 9.464552631578948e-05, "loss": 0.3229, "step": 40366 }, { "epoch": 2.260443498712062, "grad_norm": 1.3696266412734985, "learning_rate": 9.464526315789474e-05, "loss": 0.435, "step": 40367 }, { "epoch": 2.260499496024191, "grad_norm": 1.6595209836959839, "learning_rate": 9.4645e-05, "loss": 0.4714, "step": 40368 }, { "epoch": 2.26055549333632, "grad_norm": 1.3161598443984985, "learning_rate": 9.464473684210526e-05, "loss": 0.3896, "step": 40369 }, { "epoch": 2.260611490648449, "grad_norm": 0.9250859022140503, "learning_rate": 9.464447368421053e-05, "loss": 0.2896, "step": 40370 }, { "epoch": 2.260667487960578, "grad_norm": 1.1213774681091309, "learning_rate": 9.464421052631579e-05, "loss": 0.443, "step": 40371 }, { "epoch": 2.260723485272707, "grad_norm": 1.0855780839920044, "learning_rate": 9.464394736842107e-05, "loss": 0.3396, "step": 40372 }, { "epoch": 2.260779482584836, "grad_norm": 1.2259312868118286, "learning_rate": 9.464368421052631e-05, "loss": 0.4426, "step": 40373 }, { "epoch": 2.260835479896965, "grad_norm": 1.1146873235702515, "learning_rate": 9.464342105263158e-05, "loss": 0.3586, "step": 40374 }, { "epoch": 2.260891477209094, "grad_norm": 1.0434610843658447, "learning_rate": 9.464315789473684e-05, "loss": 0.3922, "step": 40375 }, { "epoch": 2.260947474521223, "grad_norm": 1.2282809019088745, "learning_rate": 9.464289473684212e-05, "loss": 0.3515, "step": 40376 }, { "epoch": 2.261003471833352, "grad_norm": 1.2720481157302856, "learning_rate": 9.464263157894738e-05, "loss": 0.4477, "step": 40377 }, { "epoch": 2.261059469145481, "grad_norm": 1.222194790840149, "learning_rate": 9.464236842105264e-05, "loss": 0.3408, "step": 40378 }, { "epoch": 2.26111546645761, "grad_norm": 1.0590826272964478, "learning_rate": 9.46421052631579e-05, "loss": 0.3593, "step": 40379 }, { "epoch": 2.261171463769739, "grad_norm": 1.039536476135254, "learning_rate": 9.464184210526317e-05, "loss": 0.3553, "step": 40380 }, { "epoch": 2.261227461081868, "grad_norm": 1.315877079963684, "learning_rate": 9.464157894736843e-05, "loss": 0.416, "step": 40381 }, { "epoch": 2.261283458393997, "grad_norm": 1.3325190544128418, "learning_rate": 9.464131578947369e-05, "loss": 0.4003, "step": 40382 }, { "epoch": 2.261339455706126, "grad_norm": 1.7807756662368774, "learning_rate": 9.464105263157895e-05, "loss": 0.5673, "step": 40383 }, { "epoch": 2.261395453018255, "grad_norm": 1.0976921319961548, "learning_rate": 9.464078947368421e-05, "loss": 0.4525, "step": 40384 }, { "epoch": 2.261451450330384, "grad_norm": 1.4316296577453613, "learning_rate": 9.464052631578948e-05, "loss": 0.4938, "step": 40385 }, { "epoch": 2.261507447642513, "grad_norm": 1.2819199562072754, "learning_rate": 9.464026315789474e-05, "loss": 0.4582, "step": 40386 }, { "epoch": 2.2615634449546422, "grad_norm": 1.1583997011184692, "learning_rate": 9.464e-05, "loss": 0.4459, "step": 40387 }, { "epoch": 2.2616194422667713, "grad_norm": 1.0067702531814575, "learning_rate": 9.463973684210526e-05, "loss": 0.3078, "step": 40388 }, { "epoch": 2.2616754395789003, "grad_norm": 1.244781494140625, "learning_rate": 9.463947368421053e-05, "loss": 0.3075, "step": 40389 }, { "epoch": 2.2617314368910293, "grad_norm": 1.3263671398162842, "learning_rate": 9.46392105263158e-05, "loss": 0.4379, "step": 40390 }, { "epoch": 2.2617874342031583, "grad_norm": 1.2320064306259155, "learning_rate": 9.463894736842105e-05, "loss": 0.3751, "step": 40391 }, { "epoch": 2.2618434315152873, "grad_norm": 1.0125446319580078, "learning_rate": 9.463868421052631e-05, "loss": 0.3668, "step": 40392 }, { "epoch": 2.2618994288274163, "grad_norm": 0.9972684383392334, "learning_rate": 9.463842105263159e-05, "loss": 0.3232, "step": 40393 }, { "epoch": 2.2619554261395454, "grad_norm": 1.8667103052139282, "learning_rate": 9.463815789473685e-05, "loss": 0.4368, "step": 40394 }, { "epoch": 2.2620114234516744, "grad_norm": 1.273384928703308, "learning_rate": 9.463789473684212e-05, "loss": 0.4138, "step": 40395 }, { "epoch": 2.2620674207638034, "grad_norm": 1.73623526096344, "learning_rate": 9.463763157894737e-05, "loss": 0.4158, "step": 40396 }, { "epoch": 2.2621234180759324, "grad_norm": 1.2916810512542725, "learning_rate": 9.463736842105264e-05, "loss": 0.3893, "step": 40397 }, { "epoch": 2.2621794153880614, "grad_norm": 1.079209566116333, "learning_rate": 9.46371052631579e-05, "loss": 0.4067, "step": 40398 }, { "epoch": 2.2622354127001905, "grad_norm": 1.1295397281646729, "learning_rate": 9.463684210526316e-05, "loss": 0.453, "step": 40399 }, { "epoch": 2.2622914100123195, "grad_norm": 1.0388671159744263, "learning_rate": 9.463657894736843e-05, "loss": 0.4025, "step": 40400 }, { "epoch": 2.2623474073244485, "grad_norm": 1.066413164138794, "learning_rate": 9.463631578947368e-05, "loss": 0.3365, "step": 40401 }, { "epoch": 2.2624034046365775, "grad_norm": 1.284334421157837, "learning_rate": 9.463605263157895e-05, "loss": 0.3746, "step": 40402 }, { "epoch": 2.2624594019487065, "grad_norm": 1.3797918558120728, "learning_rate": 9.463578947368421e-05, "loss": 0.4389, "step": 40403 }, { "epoch": 2.2625153992608356, "grad_norm": 1.4297658205032349, "learning_rate": 9.463552631578949e-05, "loss": 0.4545, "step": 40404 }, { "epoch": 2.2625713965729646, "grad_norm": 1.23444664478302, "learning_rate": 9.463526315789473e-05, "loss": 0.4252, "step": 40405 }, { "epoch": 2.2626273938850936, "grad_norm": 3.6973371505737305, "learning_rate": 9.4635e-05, "loss": 0.44, "step": 40406 }, { "epoch": 2.2626833911972226, "grad_norm": 1.0619100332260132, "learning_rate": 9.463473684210526e-05, "loss": 0.3907, "step": 40407 }, { "epoch": 2.2627393885093516, "grad_norm": 1.301879644393921, "learning_rate": 9.463447368421054e-05, "loss": 0.4198, "step": 40408 }, { "epoch": 2.2627953858214807, "grad_norm": 1.2094963788986206, "learning_rate": 9.46342105263158e-05, "loss": 0.486, "step": 40409 }, { "epoch": 2.2628513831336097, "grad_norm": 1.3144015073776245, "learning_rate": 9.463394736842106e-05, "loss": 0.4568, "step": 40410 }, { "epoch": 2.2629073804457387, "grad_norm": 1.2742695808410645, "learning_rate": 9.463368421052632e-05, "loss": 0.4925, "step": 40411 }, { "epoch": 2.2629633777578677, "grad_norm": 1.131796956062317, "learning_rate": 9.463342105263159e-05, "loss": 0.539, "step": 40412 }, { "epoch": 2.2630193750699967, "grad_norm": 2.9456851482391357, "learning_rate": 9.463315789473685e-05, "loss": 0.3464, "step": 40413 }, { "epoch": 2.2630753723821257, "grad_norm": 1.27320396900177, "learning_rate": 9.463289473684211e-05, "loss": 0.5058, "step": 40414 }, { "epoch": 2.2631313696942548, "grad_norm": 1.1681808233261108, "learning_rate": 9.463263157894737e-05, "loss": 0.3627, "step": 40415 }, { "epoch": 2.263187367006384, "grad_norm": 1.185450553894043, "learning_rate": 9.463236842105263e-05, "loss": 0.4167, "step": 40416 }, { "epoch": 2.263243364318513, "grad_norm": 1.0709630250930786, "learning_rate": 9.46321052631579e-05, "loss": 0.4486, "step": 40417 }, { "epoch": 2.263299361630642, "grad_norm": 1.3015860319137573, "learning_rate": 9.463184210526316e-05, "loss": 0.4456, "step": 40418 }, { "epoch": 2.263355358942771, "grad_norm": 1.0957012176513672, "learning_rate": 9.463157894736842e-05, "loss": 0.3832, "step": 40419 }, { "epoch": 2.2634113562549, "grad_norm": 1.168670654296875, "learning_rate": 9.463131578947368e-05, "loss": 0.4307, "step": 40420 }, { "epoch": 2.263467353567029, "grad_norm": 1.1268547773361206, "learning_rate": 9.463105263157895e-05, "loss": 0.4312, "step": 40421 }, { "epoch": 2.263523350879158, "grad_norm": 1.2061339616775513, "learning_rate": 9.463078947368421e-05, "loss": 0.3836, "step": 40422 }, { "epoch": 2.263579348191287, "grad_norm": 1.1966536045074463, "learning_rate": 9.463052631578947e-05, "loss": 0.3374, "step": 40423 }, { "epoch": 2.263635345503416, "grad_norm": 1.177609920501709, "learning_rate": 9.463026315789473e-05, "loss": 0.4126, "step": 40424 }, { "epoch": 2.263691342815545, "grad_norm": 1.6852208375930786, "learning_rate": 9.463000000000001e-05, "loss": 0.387, "step": 40425 }, { "epoch": 2.263747340127674, "grad_norm": 1.2249035835266113, "learning_rate": 9.462973684210527e-05, "loss": 0.444, "step": 40426 }, { "epoch": 2.263803337439803, "grad_norm": 8.048696517944336, "learning_rate": 9.462947368421054e-05, "loss": 0.336, "step": 40427 }, { "epoch": 2.263859334751932, "grad_norm": 1.4287192821502686, "learning_rate": 9.462921052631579e-05, "loss": 0.4392, "step": 40428 }, { "epoch": 2.263915332064061, "grad_norm": 1.277269721031189, "learning_rate": 9.462894736842106e-05, "loss": 0.4229, "step": 40429 }, { "epoch": 2.26397132937619, "grad_norm": 1.132293462753296, "learning_rate": 9.462868421052632e-05, "loss": 0.4926, "step": 40430 }, { "epoch": 2.264027326688319, "grad_norm": 1.2950248718261719, "learning_rate": 9.462842105263159e-05, "loss": 0.3702, "step": 40431 }, { "epoch": 2.264083324000448, "grad_norm": 1.1158510446548462, "learning_rate": 9.462815789473685e-05, "loss": 0.4292, "step": 40432 }, { "epoch": 2.264139321312577, "grad_norm": 2.381382465362549, "learning_rate": 9.46278947368421e-05, "loss": 0.4134, "step": 40433 }, { "epoch": 2.264195318624706, "grad_norm": 1.0139827728271484, "learning_rate": 9.462763157894737e-05, "loss": 0.388, "step": 40434 }, { "epoch": 2.264251315936835, "grad_norm": 1.3971186876296997, "learning_rate": 9.462736842105263e-05, "loss": 0.3968, "step": 40435 }, { "epoch": 2.264307313248964, "grad_norm": 1.136900782585144, "learning_rate": 9.46271052631579e-05, "loss": 0.4134, "step": 40436 }, { "epoch": 2.264363310561093, "grad_norm": 1.0802419185638428, "learning_rate": 9.462684210526316e-05, "loss": 0.3914, "step": 40437 }, { "epoch": 2.264419307873222, "grad_norm": 1.1812375783920288, "learning_rate": 9.462657894736842e-05, "loss": 0.3496, "step": 40438 }, { "epoch": 2.2644753051853512, "grad_norm": 1.2728197574615479, "learning_rate": 9.462631578947368e-05, "loss": 0.3968, "step": 40439 }, { "epoch": 2.2645313024974802, "grad_norm": 1.2735815048217773, "learning_rate": 9.462605263157896e-05, "loss": 0.4051, "step": 40440 }, { "epoch": 2.2645872998096093, "grad_norm": 1.0759103298187256, "learning_rate": 9.462578947368422e-05, "loss": 0.3644, "step": 40441 }, { "epoch": 2.2646432971217383, "grad_norm": 1.2403099536895752, "learning_rate": 9.462552631578948e-05, "loss": 0.351, "step": 40442 }, { "epoch": 2.2646992944338673, "grad_norm": 1.233041763305664, "learning_rate": 9.462526315789474e-05, "loss": 0.402, "step": 40443 }, { "epoch": 2.2647552917459963, "grad_norm": 1.0860545635223389, "learning_rate": 9.462500000000001e-05, "loss": 0.4781, "step": 40444 }, { "epoch": 2.2648112890581253, "grad_norm": 1.46695077419281, "learning_rate": 9.462473684210527e-05, "loss": 0.4261, "step": 40445 }, { "epoch": 2.2648672863702544, "grad_norm": 1.1049014329910278, "learning_rate": 9.462447368421053e-05, "loss": 0.3832, "step": 40446 }, { "epoch": 2.2649232836823834, "grad_norm": 1.1958290338516235, "learning_rate": 9.462421052631579e-05, "loss": 0.3442, "step": 40447 }, { "epoch": 2.2649792809945124, "grad_norm": 1.3269994258880615, "learning_rate": 9.462394736842106e-05, "loss": 0.4438, "step": 40448 }, { "epoch": 2.2650352783066414, "grad_norm": 1.1083450317382812, "learning_rate": 9.462368421052632e-05, "loss": 0.3506, "step": 40449 }, { "epoch": 2.2650912756187704, "grad_norm": 1.1331969499588013, "learning_rate": 9.46234210526316e-05, "loss": 0.3819, "step": 40450 }, { "epoch": 2.2651472729308995, "grad_norm": 1.1612178087234497, "learning_rate": 9.462315789473684e-05, "loss": 0.4164, "step": 40451 }, { "epoch": 2.2652032702430285, "grad_norm": 1.1315405368804932, "learning_rate": 9.46228947368421e-05, "loss": 0.4671, "step": 40452 }, { "epoch": 2.2652592675551575, "grad_norm": 1.1833078861236572, "learning_rate": 9.462263157894737e-05, "loss": 0.3384, "step": 40453 }, { "epoch": 2.2653152648672865, "grad_norm": 1.2797057628631592, "learning_rate": 9.462236842105263e-05, "loss": 0.4459, "step": 40454 }, { "epoch": 2.2653712621794155, "grad_norm": 1.142492651939392, "learning_rate": 9.462210526315791e-05, "loss": 0.3413, "step": 40455 }, { "epoch": 2.2654272594915446, "grad_norm": 1.2397264242172241, "learning_rate": 9.462184210526315e-05, "loss": 0.4774, "step": 40456 }, { "epoch": 2.2654832568036736, "grad_norm": 1.2352741956710815, "learning_rate": 9.462157894736843e-05, "loss": 0.3767, "step": 40457 }, { "epoch": 2.2655392541158026, "grad_norm": 1.2771943807601929, "learning_rate": 9.462131578947369e-05, "loss": 0.445, "step": 40458 }, { "epoch": 2.2655952514279316, "grad_norm": 0.9388750195503235, "learning_rate": 9.462105263157896e-05, "loss": 0.2893, "step": 40459 }, { "epoch": 2.2656512487400606, "grad_norm": 1.263070821762085, "learning_rate": 9.46207894736842e-05, "loss": 0.3609, "step": 40460 }, { "epoch": 2.2657072460521896, "grad_norm": 1.284348487854004, "learning_rate": 9.462052631578948e-05, "loss": 0.3801, "step": 40461 }, { "epoch": 2.2657632433643187, "grad_norm": 1.3505194187164307, "learning_rate": 9.462026315789474e-05, "loss": 0.3853, "step": 40462 }, { "epoch": 2.2658192406764477, "grad_norm": 1.1799821853637695, "learning_rate": 9.462000000000001e-05, "loss": 0.4255, "step": 40463 }, { "epoch": 2.2658752379885767, "grad_norm": 1.0638905763626099, "learning_rate": 9.461973684210527e-05, "loss": 0.4123, "step": 40464 }, { "epoch": 2.2659312353007057, "grad_norm": 1.027500867843628, "learning_rate": 9.461947368421053e-05, "loss": 0.3315, "step": 40465 }, { "epoch": 2.2659872326128347, "grad_norm": 1.6638792753219604, "learning_rate": 9.461921052631579e-05, "loss": 0.418, "step": 40466 }, { "epoch": 2.2660432299249638, "grad_norm": 2.853843927383423, "learning_rate": 9.461894736842106e-05, "loss": 0.4194, "step": 40467 }, { "epoch": 2.266099227237093, "grad_norm": 1.109104871749878, "learning_rate": 9.461868421052632e-05, "loss": 0.3686, "step": 40468 }, { "epoch": 2.266155224549222, "grad_norm": 1.1648691892623901, "learning_rate": 9.461842105263158e-05, "loss": 0.3273, "step": 40469 }, { "epoch": 2.266211221861351, "grad_norm": 1.2241833209991455, "learning_rate": 9.461815789473684e-05, "loss": 0.4193, "step": 40470 }, { "epoch": 2.26626721917348, "grad_norm": 1.2876853942871094, "learning_rate": 9.46178947368421e-05, "loss": 0.382, "step": 40471 }, { "epoch": 2.266323216485609, "grad_norm": 1.217707633972168, "learning_rate": 9.461763157894738e-05, "loss": 0.3919, "step": 40472 }, { "epoch": 2.266379213797738, "grad_norm": 1.123657464981079, "learning_rate": 9.461736842105264e-05, "loss": 0.4051, "step": 40473 }, { "epoch": 2.266435211109867, "grad_norm": 1.0729305744171143, "learning_rate": 9.46171052631579e-05, "loss": 0.4951, "step": 40474 }, { "epoch": 2.266491208421996, "grad_norm": 1.163252592086792, "learning_rate": 9.461684210526316e-05, "loss": 0.3706, "step": 40475 }, { "epoch": 2.266547205734125, "grad_norm": 1.1236474514007568, "learning_rate": 9.461657894736843e-05, "loss": 0.3724, "step": 40476 }, { "epoch": 2.266603203046254, "grad_norm": 1.0196162462234497, "learning_rate": 9.461631578947369e-05, "loss": 0.3453, "step": 40477 }, { "epoch": 2.266659200358383, "grad_norm": 1.349471092224121, "learning_rate": 9.461605263157895e-05, "loss": 0.4287, "step": 40478 }, { "epoch": 2.266715197670512, "grad_norm": 1.0240190029144287, "learning_rate": 9.461578947368421e-05, "loss": 0.3366, "step": 40479 }, { "epoch": 2.266771194982641, "grad_norm": 1.178196907043457, "learning_rate": 9.461552631578948e-05, "loss": 0.3333, "step": 40480 }, { "epoch": 2.26682719229477, "grad_norm": 1.16752028465271, "learning_rate": 9.461526315789474e-05, "loss": 0.4048, "step": 40481 }, { "epoch": 2.266883189606899, "grad_norm": 1.1811375617980957, "learning_rate": 9.461500000000001e-05, "loss": 0.4129, "step": 40482 }, { "epoch": 2.266939186919028, "grad_norm": 1.3402822017669678, "learning_rate": 9.461473684210526e-05, "loss": 0.5252, "step": 40483 }, { "epoch": 2.266995184231157, "grad_norm": 1.2851439714431763, "learning_rate": 9.461447368421053e-05, "loss": 0.4547, "step": 40484 }, { "epoch": 2.267051181543286, "grad_norm": 1.2237930297851562, "learning_rate": 9.46142105263158e-05, "loss": 0.3724, "step": 40485 }, { "epoch": 2.267107178855415, "grad_norm": 1.1725502014160156, "learning_rate": 9.461394736842105e-05, "loss": 0.3866, "step": 40486 }, { "epoch": 2.267163176167544, "grad_norm": 1.3695721626281738, "learning_rate": 9.461368421052633e-05, "loss": 0.5176, "step": 40487 }, { "epoch": 2.267219173479673, "grad_norm": 1.1344494819641113, "learning_rate": 9.461342105263157e-05, "loss": 0.3788, "step": 40488 }, { "epoch": 2.267275170791802, "grad_norm": 1.2121444940567017, "learning_rate": 9.461315789473685e-05, "loss": 0.5308, "step": 40489 }, { "epoch": 2.267331168103931, "grad_norm": 0.9950360059738159, "learning_rate": 9.46128947368421e-05, "loss": 0.3252, "step": 40490 }, { "epoch": 2.26738716541606, "grad_norm": 1.164266586303711, "learning_rate": 9.461263157894738e-05, "loss": 0.3423, "step": 40491 }, { "epoch": 2.2674431627281892, "grad_norm": 0.986965000629425, "learning_rate": 9.461236842105264e-05, "loss": 0.368, "step": 40492 }, { "epoch": 2.267499160040318, "grad_norm": 1.242859959602356, "learning_rate": 9.46121052631579e-05, "loss": 0.3715, "step": 40493 }, { "epoch": 2.2675551573524473, "grad_norm": 1.0006341934204102, "learning_rate": 9.461184210526316e-05, "loss": 0.4417, "step": 40494 }, { "epoch": 2.267611154664576, "grad_norm": 1.089477777481079, "learning_rate": 9.461157894736843e-05, "loss": 0.429, "step": 40495 }, { "epoch": 2.2676671519767053, "grad_norm": 1.4148645401000977, "learning_rate": 9.461131578947369e-05, "loss": 0.4258, "step": 40496 }, { "epoch": 2.267723149288834, "grad_norm": 1.2728643417358398, "learning_rate": 9.461105263157895e-05, "loss": 0.3921, "step": 40497 }, { "epoch": 2.2677791466009634, "grad_norm": 1.278693675994873, "learning_rate": 9.461078947368421e-05, "loss": 0.4192, "step": 40498 }, { "epoch": 2.267835143913092, "grad_norm": 1.3750426769256592, "learning_rate": 9.461052631578948e-05, "loss": 0.5112, "step": 40499 }, { "epoch": 2.2678911412252214, "grad_norm": 1.2014737129211426, "learning_rate": 9.461026315789474e-05, "loss": 0.3825, "step": 40500 }, { "epoch": 2.26794713853735, "grad_norm": 1.263054609298706, "learning_rate": 9.461e-05, "loss": 0.4629, "step": 40501 }, { "epoch": 2.2680031358494794, "grad_norm": 1.3502540588378906, "learning_rate": 9.460973684210526e-05, "loss": 0.3915, "step": 40502 }, { "epoch": 2.268059133161608, "grad_norm": 1.1903547048568726, "learning_rate": 9.460947368421052e-05, "loss": 0.383, "step": 40503 }, { "epoch": 2.2681151304737375, "grad_norm": 1.1257144212722778, "learning_rate": 9.46092105263158e-05, "loss": 0.3255, "step": 40504 }, { "epoch": 2.268171127785866, "grad_norm": 1.2431561946868896, "learning_rate": 9.460894736842106e-05, "loss": 0.3416, "step": 40505 }, { "epoch": 2.2682271250979955, "grad_norm": 1.1336997747421265, "learning_rate": 9.460868421052632e-05, "loss": 0.4381, "step": 40506 }, { "epoch": 2.268283122410124, "grad_norm": 1.1583088636398315, "learning_rate": 9.460842105263158e-05, "loss": 0.3545, "step": 40507 }, { "epoch": 2.2683391197222535, "grad_norm": 1.2995786666870117, "learning_rate": 9.460815789473685e-05, "loss": 0.3637, "step": 40508 }, { "epoch": 2.268395117034382, "grad_norm": 1.337058424949646, "learning_rate": 9.460789473684211e-05, "loss": 0.376, "step": 40509 }, { "epoch": 2.2684511143465116, "grad_norm": 1.2869439125061035, "learning_rate": 9.460763157894738e-05, "loss": 0.3914, "step": 40510 }, { "epoch": 2.26850711165864, "grad_norm": 1.2116807699203491, "learning_rate": 9.460736842105263e-05, "loss": 0.4164, "step": 40511 }, { "epoch": 2.2685631089707696, "grad_norm": 1.1212992668151855, "learning_rate": 9.46071052631579e-05, "loss": 0.3749, "step": 40512 }, { "epoch": 2.268619106282898, "grad_norm": 1.5295838117599487, "learning_rate": 9.460684210526316e-05, "loss": 0.3856, "step": 40513 }, { "epoch": 2.2686751035950277, "grad_norm": 1.5487151145935059, "learning_rate": 9.460657894736843e-05, "loss": 0.3304, "step": 40514 }, { "epoch": 2.2687311009071562, "grad_norm": 1.5145213603973389, "learning_rate": 9.460631578947368e-05, "loss": 0.5556, "step": 40515 }, { "epoch": 2.2687870982192857, "grad_norm": 1.3443989753723145, "learning_rate": 9.460605263157895e-05, "loss": 0.5611, "step": 40516 }, { "epoch": 2.2688430955314143, "grad_norm": 1.3281117677688599, "learning_rate": 9.460578947368421e-05, "loss": 0.4301, "step": 40517 }, { "epoch": 2.2688990928435437, "grad_norm": 1.243749737739563, "learning_rate": 9.460552631578949e-05, "loss": 0.4215, "step": 40518 }, { "epoch": 2.2689550901556723, "grad_norm": 1.2872587442398071, "learning_rate": 9.460526315789475e-05, "loss": 0.4699, "step": 40519 }, { "epoch": 2.269011087467802, "grad_norm": 1.2896764278411865, "learning_rate": 9.460499999999999e-05, "loss": 0.4627, "step": 40520 }, { "epoch": 2.2690670847799304, "grad_norm": 1.0949015617370605, "learning_rate": 9.460473684210527e-05, "loss": 0.3285, "step": 40521 }, { "epoch": 2.26912308209206, "grad_norm": 1.007161021232605, "learning_rate": 9.460447368421053e-05, "loss": 0.3132, "step": 40522 }, { "epoch": 2.2691790794041884, "grad_norm": 1.0987603664398193, "learning_rate": 9.46042105263158e-05, "loss": 0.5, "step": 40523 }, { "epoch": 2.269235076716318, "grad_norm": 1.708152174949646, "learning_rate": 9.460394736842106e-05, "loss": 0.3719, "step": 40524 }, { "epoch": 2.2692910740284464, "grad_norm": 1.13357675075531, "learning_rate": 9.460368421052632e-05, "loss": 0.3864, "step": 40525 }, { "epoch": 2.269347071340576, "grad_norm": 1.1700465679168701, "learning_rate": 9.460342105263158e-05, "loss": 0.2584, "step": 40526 }, { "epoch": 2.2694030686527045, "grad_norm": 1.2960734367370605, "learning_rate": 9.460315789473685e-05, "loss": 0.4427, "step": 40527 }, { "epoch": 2.269459065964834, "grad_norm": 1.1186174154281616, "learning_rate": 9.460289473684211e-05, "loss": 0.448, "step": 40528 }, { "epoch": 2.2695150632769625, "grad_norm": 1.0102180242538452, "learning_rate": 9.460263157894737e-05, "loss": 0.3312, "step": 40529 }, { "epoch": 2.269571060589092, "grad_norm": 1.3226977586746216, "learning_rate": 9.460236842105263e-05, "loss": 0.3526, "step": 40530 }, { "epoch": 2.2696270579012205, "grad_norm": 1.0980631113052368, "learning_rate": 9.46021052631579e-05, "loss": 0.3637, "step": 40531 }, { "epoch": 2.2696830552133496, "grad_norm": 1.2014403343200684, "learning_rate": 9.460184210526316e-05, "loss": 0.368, "step": 40532 }, { "epoch": 2.2697390525254786, "grad_norm": 1.1677125692367554, "learning_rate": 9.460157894736842e-05, "loss": 0.3874, "step": 40533 }, { "epoch": 2.2697950498376076, "grad_norm": 1.1828606128692627, "learning_rate": 9.460131578947368e-05, "loss": 0.4269, "step": 40534 }, { "epoch": 2.2698510471497366, "grad_norm": 1.1326069831848145, "learning_rate": 9.460105263157896e-05, "loss": 0.2152, "step": 40535 }, { "epoch": 2.2699070444618656, "grad_norm": 1.3309108018875122, "learning_rate": 9.460078947368422e-05, "loss": 0.4462, "step": 40536 }, { "epoch": 2.2699630417739947, "grad_norm": 1.1551995277404785, "learning_rate": 9.460052631578948e-05, "loss": 0.3438, "step": 40537 }, { "epoch": 2.2700190390861237, "grad_norm": 1.0498015880584717, "learning_rate": 9.460026315789474e-05, "loss": 0.3138, "step": 40538 }, { "epoch": 2.2700750363982527, "grad_norm": 1.1026535034179688, "learning_rate": 9.46e-05, "loss": 0.4305, "step": 40539 }, { "epoch": 2.2701310337103817, "grad_norm": 1.0027085542678833, "learning_rate": 9.459973684210527e-05, "loss": 0.3159, "step": 40540 }, { "epoch": 2.2701870310225107, "grad_norm": 1.3759174346923828, "learning_rate": 9.459947368421053e-05, "loss": 0.3919, "step": 40541 }, { "epoch": 2.2702430283346398, "grad_norm": 1.138127326965332, "learning_rate": 9.45992105263158e-05, "loss": 0.4041, "step": 40542 }, { "epoch": 2.2702990256467688, "grad_norm": 1.0994898080825806, "learning_rate": 9.459894736842105e-05, "loss": 0.3442, "step": 40543 }, { "epoch": 2.270355022958898, "grad_norm": 1.5775060653686523, "learning_rate": 9.459868421052632e-05, "loss": 0.4103, "step": 40544 }, { "epoch": 2.270411020271027, "grad_norm": 1.1225507259368896, "learning_rate": 9.459842105263158e-05, "loss": 0.3927, "step": 40545 }, { "epoch": 2.270467017583156, "grad_norm": 1.0932636260986328, "learning_rate": 9.459815789473685e-05, "loss": 0.3331, "step": 40546 }, { "epoch": 2.270523014895285, "grad_norm": 1.2866544723510742, "learning_rate": 9.459789473684211e-05, "loss": 0.4559, "step": 40547 }, { "epoch": 2.270579012207414, "grad_norm": 1.8256518840789795, "learning_rate": 9.459763157894737e-05, "loss": 0.3918, "step": 40548 }, { "epoch": 2.270635009519543, "grad_norm": 1.1217502355575562, "learning_rate": 9.459736842105263e-05, "loss": 0.3729, "step": 40549 }, { "epoch": 2.270691006831672, "grad_norm": 1.2543939352035522, "learning_rate": 9.45971052631579e-05, "loss": 0.453, "step": 40550 }, { "epoch": 2.270747004143801, "grad_norm": 1.3210906982421875, "learning_rate": 9.459684210526317e-05, "loss": 0.544, "step": 40551 }, { "epoch": 2.27080300145593, "grad_norm": 1.0610665082931519, "learning_rate": 9.459657894736843e-05, "loss": 0.3944, "step": 40552 }, { "epoch": 2.270858998768059, "grad_norm": 1.028406023979187, "learning_rate": 9.459631578947369e-05, "loss": 0.3958, "step": 40553 }, { "epoch": 2.270914996080188, "grad_norm": 1.5856077671051025, "learning_rate": 9.459605263157895e-05, "loss": 0.4057, "step": 40554 }, { "epoch": 2.270970993392317, "grad_norm": 1.3397217988967896, "learning_rate": 9.459578947368422e-05, "loss": 0.5113, "step": 40555 }, { "epoch": 2.271026990704446, "grad_norm": 1.700391411781311, "learning_rate": 9.459552631578948e-05, "loss": 0.4107, "step": 40556 }, { "epoch": 2.271082988016575, "grad_norm": 1.009935975074768, "learning_rate": 9.459526315789474e-05, "loss": 0.3437, "step": 40557 }, { "epoch": 2.271138985328704, "grad_norm": 1.384375810623169, "learning_rate": 9.4595e-05, "loss": 0.4215, "step": 40558 }, { "epoch": 2.271194982640833, "grad_norm": 1.545481562614441, "learning_rate": 9.459473684210527e-05, "loss": 0.4502, "step": 40559 }, { "epoch": 2.271250979952962, "grad_norm": 1.2553125619888306, "learning_rate": 9.459447368421053e-05, "loss": 0.5871, "step": 40560 }, { "epoch": 2.271306977265091, "grad_norm": 1.3203444480895996, "learning_rate": 9.459421052631579e-05, "loss": 0.635, "step": 40561 }, { "epoch": 2.27136297457722, "grad_norm": 1.2080284357070923, "learning_rate": 9.459394736842105e-05, "loss": 0.3949, "step": 40562 }, { "epoch": 2.271418971889349, "grad_norm": 1.0241115093231201, "learning_rate": 9.459368421052632e-05, "loss": 0.4426, "step": 40563 }, { "epoch": 2.271474969201478, "grad_norm": 1.3481581211090088, "learning_rate": 9.459342105263158e-05, "loss": 0.396, "step": 40564 }, { "epoch": 2.271530966513607, "grad_norm": 1.2400517463684082, "learning_rate": 9.459315789473686e-05, "loss": 0.4219, "step": 40565 }, { "epoch": 2.271586963825736, "grad_norm": 1.135642409324646, "learning_rate": 9.45928947368421e-05, "loss": 0.2816, "step": 40566 }, { "epoch": 2.2716429611378652, "grad_norm": 1.3617733716964722, "learning_rate": 9.459263157894738e-05, "loss": 0.4161, "step": 40567 }, { "epoch": 2.2716989584499943, "grad_norm": 1.1379430294036865, "learning_rate": 9.459236842105264e-05, "loss": 0.4298, "step": 40568 }, { "epoch": 2.2717549557621233, "grad_norm": 1.3969991207122803, "learning_rate": 9.459210526315791e-05, "loss": 0.4807, "step": 40569 }, { "epoch": 2.2718109530742523, "grad_norm": 1.384647011756897, "learning_rate": 9.459184210526316e-05, "loss": 0.387, "step": 40570 }, { "epoch": 2.2718669503863813, "grad_norm": 1.4508918523788452, "learning_rate": 9.459157894736843e-05, "loss": 0.5153, "step": 40571 }, { "epoch": 2.2719229476985103, "grad_norm": 1.0793359279632568, "learning_rate": 9.459131578947369e-05, "loss": 0.3294, "step": 40572 }, { "epoch": 2.2719789450106394, "grad_norm": 1.2309826612472534, "learning_rate": 9.459105263157895e-05, "loss": 0.5021, "step": 40573 }, { "epoch": 2.2720349423227684, "grad_norm": 1.0962035655975342, "learning_rate": 9.459078947368422e-05, "loss": 0.4171, "step": 40574 }, { "epoch": 2.2720909396348974, "grad_norm": 1.2114489078521729, "learning_rate": 9.459052631578947e-05, "loss": 0.3636, "step": 40575 }, { "epoch": 2.2721469369470264, "grad_norm": 1.2173047065734863, "learning_rate": 9.459026315789474e-05, "loss": 0.3545, "step": 40576 }, { "epoch": 2.2722029342591554, "grad_norm": 1.0975050926208496, "learning_rate": 9.459e-05, "loss": 0.5374, "step": 40577 }, { "epoch": 2.2722589315712844, "grad_norm": 1.1415431499481201, "learning_rate": 9.458973684210527e-05, "loss": 0.3797, "step": 40578 }, { "epoch": 2.2723149288834135, "grad_norm": 1.1166616678237915, "learning_rate": 9.458947368421053e-05, "loss": 0.4344, "step": 40579 }, { "epoch": 2.2723709261955425, "grad_norm": 1.2645405530929565, "learning_rate": 9.45892105263158e-05, "loss": 0.4048, "step": 40580 }, { "epoch": 2.2724269235076715, "grad_norm": 1.1214083433151245, "learning_rate": 9.458894736842105e-05, "loss": 0.4528, "step": 40581 }, { "epoch": 2.2724829208198005, "grad_norm": 1.2971934080123901, "learning_rate": 9.458868421052633e-05, "loss": 0.5001, "step": 40582 }, { "epoch": 2.2725389181319295, "grad_norm": 1.2930711507797241, "learning_rate": 9.458842105263159e-05, "loss": 0.4363, "step": 40583 }, { "epoch": 2.2725949154440586, "grad_norm": 1.1945903301239014, "learning_rate": 9.458815789473685e-05, "loss": 0.4326, "step": 40584 }, { "epoch": 2.2726509127561876, "grad_norm": 1.2399060726165771, "learning_rate": 9.45878947368421e-05, "loss": 0.5048, "step": 40585 }, { "epoch": 2.2727069100683166, "grad_norm": 1.334643840789795, "learning_rate": 9.458763157894738e-05, "loss": 0.4086, "step": 40586 }, { "epoch": 2.2727629073804456, "grad_norm": 1.189266324043274, "learning_rate": 9.458736842105264e-05, "loss": 0.3829, "step": 40587 }, { "epoch": 2.2728189046925746, "grad_norm": 1.0801719427108765, "learning_rate": 9.45871052631579e-05, "loss": 0.4455, "step": 40588 }, { "epoch": 2.2728749020047037, "grad_norm": 1.4720373153686523, "learning_rate": 9.458684210526316e-05, "loss": 0.5109, "step": 40589 }, { "epoch": 2.2729308993168327, "grad_norm": 1.4487801790237427, "learning_rate": 9.458657894736842e-05, "loss": 0.3974, "step": 40590 }, { "epoch": 2.2729868966289617, "grad_norm": 1.0977919101715088, "learning_rate": 9.458631578947369e-05, "loss": 0.3584, "step": 40591 }, { "epoch": 2.2730428939410907, "grad_norm": 1.474965214729309, "learning_rate": 9.458605263157895e-05, "loss": 0.3883, "step": 40592 }, { "epoch": 2.2730988912532197, "grad_norm": 1.1310701370239258, "learning_rate": 9.458578947368421e-05, "loss": 0.3491, "step": 40593 }, { "epoch": 2.2731548885653488, "grad_norm": 1.2242786884307861, "learning_rate": 9.458552631578947e-05, "loss": 0.3747, "step": 40594 }, { "epoch": 2.2732108858774778, "grad_norm": 2.974498748779297, "learning_rate": 9.458526315789474e-05, "loss": 0.3417, "step": 40595 }, { "epoch": 2.273266883189607, "grad_norm": 1.2573018074035645, "learning_rate": 9.4585e-05, "loss": 0.3961, "step": 40596 }, { "epoch": 2.273322880501736, "grad_norm": 1.2831525802612305, "learning_rate": 9.458473684210528e-05, "loss": 0.4377, "step": 40597 }, { "epoch": 2.273378877813865, "grad_norm": 1.4229695796966553, "learning_rate": 9.458447368421052e-05, "loss": 0.4332, "step": 40598 }, { "epoch": 2.273434875125994, "grad_norm": 1.1408116817474365, "learning_rate": 9.45842105263158e-05, "loss": 0.3922, "step": 40599 }, { "epoch": 2.273490872438123, "grad_norm": 1.0934343338012695, "learning_rate": 9.458394736842106e-05, "loss": 0.3424, "step": 40600 }, { "epoch": 2.273546869750252, "grad_norm": 1.2211631536483765, "learning_rate": 9.458368421052633e-05, "loss": 0.4017, "step": 40601 }, { "epoch": 2.273602867062381, "grad_norm": 1.3245149850845337, "learning_rate": 9.458342105263159e-05, "loss": 0.4802, "step": 40602 }, { "epoch": 2.27365886437451, "grad_norm": 1.1212351322174072, "learning_rate": 9.458315789473685e-05, "loss": 0.5004, "step": 40603 }, { "epoch": 2.273714861686639, "grad_norm": 1.0691895484924316, "learning_rate": 9.458289473684211e-05, "loss": 0.4036, "step": 40604 }, { "epoch": 2.273770858998768, "grad_norm": 1.0293790102005005, "learning_rate": 9.458263157894738e-05, "loss": 0.2551, "step": 40605 }, { "epoch": 2.273826856310897, "grad_norm": 1.2396889925003052, "learning_rate": 9.458236842105264e-05, "loss": 0.4525, "step": 40606 }, { "epoch": 2.273882853623026, "grad_norm": 1.2029087543487549, "learning_rate": 9.458210526315789e-05, "loss": 0.3525, "step": 40607 }, { "epoch": 2.273938850935155, "grad_norm": 1.2619563341140747, "learning_rate": 9.458184210526316e-05, "loss": 0.4911, "step": 40608 }, { "epoch": 2.273994848247284, "grad_norm": 1.2013729810714722, "learning_rate": 9.458157894736842e-05, "loss": 0.5835, "step": 40609 }, { "epoch": 2.274050845559413, "grad_norm": 1.0333744287490845, "learning_rate": 9.45813157894737e-05, "loss": 0.4243, "step": 40610 }, { "epoch": 2.274106842871542, "grad_norm": 1.1525123119354248, "learning_rate": 9.458105263157895e-05, "loss": 0.5303, "step": 40611 }, { "epoch": 2.274162840183671, "grad_norm": 1.3624944686889648, "learning_rate": 9.458078947368421e-05, "loss": 0.4247, "step": 40612 }, { "epoch": 2.2742188374958, "grad_norm": 1.298219084739685, "learning_rate": 9.458052631578947e-05, "loss": 0.4992, "step": 40613 }, { "epoch": 2.274274834807929, "grad_norm": 1.0188695192337036, "learning_rate": 9.458026315789475e-05, "loss": 0.29, "step": 40614 }, { "epoch": 2.274330832120058, "grad_norm": 1.324236512184143, "learning_rate": 9.458e-05, "loss": 0.3992, "step": 40615 }, { "epoch": 2.274386829432187, "grad_norm": 1.5384267568588257, "learning_rate": 9.457973684210527e-05, "loss": 0.4577, "step": 40616 }, { "epoch": 2.274442826744316, "grad_norm": 0.99504154920578, "learning_rate": 9.457947368421053e-05, "loss": 0.2792, "step": 40617 }, { "epoch": 2.274498824056445, "grad_norm": 1.2706249952316284, "learning_rate": 9.45792105263158e-05, "loss": 0.3899, "step": 40618 }, { "epoch": 2.2745548213685742, "grad_norm": 1.286297082901001, "learning_rate": 9.457894736842106e-05, "loss": 0.385, "step": 40619 }, { "epoch": 2.2746108186807033, "grad_norm": 1.2250655889511108, "learning_rate": 9.457868421052633e-05, "loss": 0.361, "step": 40620 }, { "epoch": 2.2746668159928323, "grad_norm": 1.2352221012115479, "learning_rate": 9.457842105263158e-05, "loss": 0.3888, "step": 40621 }, { "epoch": 2.2747228133049613, "grad_norm": 2.1670773029327393, "learning_rate": 9.457815789473685e-05, "loss": 0.4272, "step": 40622 }, { "epoch": 2.2747788106170903, "grad_norm": 1.369737148284912, "learning_rate": 9.457789473684211e-05, "loss": 0.3345, "step": 40623 }, { "epoch": 2.2748348079292193, "grad_norm": 1.2595831155776978, "learning_rate": 9.457763157894737e-05, "loss": 0.587, "step": 40624 }, { "epoch": 2.2748908052413483, "grad_norm": 1.288218379020691, "learning_rate": 9.457736842105263e-05, "loss": 0.4151, "step": 40625 }, { "epoch": 2.2749468025534774, "grad_norm": 1.1666189432144165, "learning_rate": 9.457710526315789e-05, "loss": 0.3919, "step": 40626 }, { "epoch": 2.2750027998656064, "grad_norm": 1.8396273851394653, "learning_rate": 9.457684210526316e-05, "loss": 0.3797, "step": 40627 }, { "epoch": 2.2750587971777354, "grad_norm": 1.4156901836395264, "learning_rate": 9.457657894736842e-05, "loss": 0.3822, "step": 40628 }, { "epoch": 2.2751147944898644, "grad_norm": 1.175866723060608, "learning_rate": 9.45763157894737e-05, "loss": 0.4732, "step": 40629 }, { "epoch": 2.2751707918019934, "grad_norm": 1.1439058780670166, "learning_rate": 9.457605263157894e-05, "loss": 0.2878, "step": 40630 }, { "epoch": 2.2752267891141225, "grad_norm": 1.288842797279358, "learning_rate": 9.457578947368422e-05, "loss": 0.3905, "step": 40631 }, { "epoch": 2.2752827864262515, "grad_norm": 1.33756685256958, "learning_rate": 9.457552631578948e-05, "loss": 0.3562, "step": 40632 }, { "epoch": 2.2753387837383805, "grad_norm": 1.22990083694458, "learning_rate": 9.457526315789475e-05, "loss": 0.2758, "step": 40633 }, { "epoch": 2.2753947810505095, "grad_norm": 1.1799225807189941, "learning_rate": 9.457500000000001e-05, "loss": 0.5032, "step": 40634 }, { "epoch": 2.2754507783626385, "grad_norm": 1.7661281824111938, "learning_rate": 9.457473684210527e-05, "loss": 0.5858, "step": 40635 }, { "epoch": 2.2755067756747676, "grad_norm": 1.152991771697998, "learning_rate": 9.457447368421053e-05, "loss": 0.4368, "step": 40636 }, { "epoch": 2.2755627729868966, "grad_norm": 1.489823818206787, "learning_rate": 9.45742105263158e-05, "loss": 0.559, "step": 40637 }, { "epoch": 2.2756187702990256, "grad_norm": 1.209084391593933, "learning_rate": 9.457394736842106e-05, "loss": 0.4946, "step": 40638 }, { "epoch": 2.2756747676111546, "grad_norm": 1.1643140316009521, "learning_rate": 9.457368421052632e-05, "loss": 0.3482, "step": 40639 }, { "epoch": 2.2757307649232836, "grad_norm": 1.270938515663147, "learning_rate": 9.457342105263158e-05, "loss": 0.3369, "step": 40640 }, { "epoch": 2.2757867622354127, "grad_norm": 1.0088104009628296, "learning_rate": 9.457315789473684e-05, "loss": 0.2605, "step": 40641 }, { "epoch": 2.2758427595475417, "grad_norm": 1.1692498922348022, "learning_rate": 9.457289473684211e-05, "loss": 0.478, "step": 40642 }, { "epoch": 2.2758987568596707, "grad_norm": 1.4544577598571777, "learning_rate": 9.457263157894737e-05, "loss": 0.3604, "step": 40643 }, { "epoch": 2.2759547541717997, "grad_norm": 1.437093734741211, "learning_rate": 9.457236842105263e-05, "loss": 0.4022, "step": 40644 }, { "epoch": 2.2760107514839287, "grad_norm": 1.5041439533233643, "learning_rate": 9.457210526315789e-05, "loss": 0.4109, "step": 40645 }, { "epoch": 2.2760667487960577, "grad_norm": 1.5819566249847412, "learning_rate": 9.457184210526317e-05, "loss": 0.4624, "step": 40646 }, { "epoch": 2.2761227461081868, "grad_norm": 1.1363409757614136, "learning_rate": 9.457157894736843e-05, "loss": 0.4082, "step": 40647 }, { "epoch": 2.276178743420316, "grad_norm": 1.1435924768447876, "learning_rate": 9.457131578947369e-05, "loss": 0.4487, "step": 40648 }, { "epoch": 2.276234740732445, "grad_norm": 1.104856014251709, "learning_rate": 9.457105263157895e-05, "loss": 0.3443, "step": 40649 }, { "epoch": 2.276290738044574, "grad_norm": 1.4004374742507935, "learning_rate": 9.457078947368422e-05, "loss": 0.5505, "step": 40650 }, { "epoch": 2.276346735356703, "grad_norm": 1.9768024682998657, "learning_rate": 9.457052631578948e-05, "loss": 0.2911, "step": 40651 }, { "epoch": 2.276402732668832, "grad_norm": 1.1699367761611938, "learning_rate": 9.457026315789475e-05, "loss": 0.4692, "step": 40652 }, { "epoch": 2.276458729980961, "grad_norm": 1.182948350906372, "learning_rate": 9.457e-05, "loss": 0.4232, "step": 40653 }, { "epoch": 2.27651472729309, "grad_norm": 1.4045296907424927, "learning_rate": 9.456973684210527e-05, "loss": 0.3406, "step": 40654 }, { "epoch": 2.276570724605219, "grad_norm": 3.0734546184539795, "learning_rate": 9.456947368421053e-05, "loss": 0.5694, "step": 40655 }, { "epoch": 2.276626721917348, "grad_norm": 1.249147653579712, "learning_rate": 9.45692105263158e-05, "loss": 0.442, "step": 40656 }, { "epoch": 2.276682719229477, "grad_norm": 1.1361415386199951, "learning_rate": 9.456894736842106e-05, "loss": 0.4814, "step": 40657 }, { "epoch": 2.276738716541606, "grad_norm": 1.0875663757324219, "learning_rate": 9.456868421052631e-05, "loss": 0.3289, "step": 40658 }, { "epoch": 2.276794713853735, "grad_norm": 1.055518388748169, "learning_rate": 9.456842105263158e-05, "loss": 0.414, "step": 40659 }, { "epoch": 2.276850711165864, "grad_norm": 1.5574101209640503, "learning_rate": 9.456815789473684e-05, "loss": 0.4048, "step": 40660 }, { "epoch": 2.276906708477993, "grad_norm": 1.1019195318222046, "learning_rate": 9.456789473684212e-05, "loss": 0.3016, "step": 40661 }, { "epoch": 2.276962705790122, "grad_norm": 1.1658498048782349, "learning_rate": 9.456763157894736e-05, "loss": 0.4819, "step": 40662 }, { "epoch": 2.277018703102251, "grad_norm": 1.2599719762802124, "learning_rate": 9.456736842105264e-05, "loss": 0.378, "step": 40663 }, { "epoch": 2.27707470041438, "grad_norm": 1.1545414924621582, "learning_rate": 9.45671052631579e-05, "loss": 0.3704, "step": 40664 }, { "epoch": 2.277130697726509, "grad_norm": 1.086358904838562, "learning_rate": 9.456684210526317e-05, "loss": 0.3505, "step": 40665 }, { "epoch": 2.277186695038638, "grad_norm": 1.1106089353561401, "learning_rate": 9.456657894736843e-05, "loss": 0.3949, "step": 40666 }, { "epoch": 2.277242692350767, "grad_norm": 1.1631847620010376, "learning_rate": 9.456631578947369e-05, "loss": 0.3261, "step": 40667 }, { "epoch": 2.277298689662896, "grad_norm": 1.504260540008545, "learning_rate": 9.456605263157895e-05, "loss": 0.5226, "step": 40668 }, { "epoch": 2.277354686975025, "grad_norm": 1.1442697048187256, "learning_rate": 9.456578947368422e-05, "loss": 0.3669, "step": 40669 }, { "epoch": 2.277410684287154, "grad_norm": 1.2757880687713623, "learning_rate": 9.456552631578948e-05, "loss": 0.4514, "step": 40670 }, { "epoch": 2.2774666815992832, "grad_norm": 1.204907774925232, "learning_rate": 9.456526315789474e-05, "loss": 0.4126, "step": 40671 }, { "epoch": 2.2775226789114122, "grad_norm": 1.2308409214019775, "learning_rate": 9.4565e-05, "loss": 0.5865, "step": 40672 }, { "epoch": 2.2775786762235413, "grad_norm": 1.2333112955093384, "learning_rate": 9.456473684210527e-05, "loss": 0.371, "step": 40673 }, { "epoch": 2.2776346735356703, "grad_norm": 1.0535119771957397, "learning_rate": 9.456447368421053e-05, "loss": 0.3727, "step": 40674 }, { "epoch": 2.2776906708477993, "grad_norm": 1.2088721990585327, "learning_rate": 9.456421052631579e-05, "loss": 0.3457, "step": 40675 }, { "epoch": 2.2777466681599283, "grad_norm": 2.702401876449585, "learning_rate": 9.456394736842105e-05, "loss": 0.4453, "step": 40676 }, { "epoch": 2.2778026654720573, "grad_norm": 1.2911460399627686, "learning_rate": 9.456368421052631e-05, "loss": 0.5286, "step": 40677 }, { "epoch": 2.2778586627841864, "grad_norm": 1.0863419771194458, "learning_rate": 9.456342105263159e-05, "loss": 0.4829, "step": 40678 }, { "epoch": 2.2779146600963154, "grad_norm": 1.0396877527236938, "learning_rate": 9.456315789473685e-05, "loss": 0.3943, "step": 40679 }, { "epoch": 2.2779706574084444, "grad_norm": 1.3092750310897827, "learning_rate": 9.45628947368421e-05, "loss": 0.4931, "step": 40680 }, { "epoch": 2.2780266547205734, "grad_norm": 2.3488402366638184, "learning_rate": 9.456263157894736e-05, "loss": 0.3927, "step": 40681 }, { "epoch": 2.2780826520327024, "grad_norm": 1.1009044647216797, "learning_rate": 9.456236842105264e-05, "loss": 0.311, "step": 40682 }, { "epoch": 2.2781386493448315, "grad_norm": 1.2242518663406372, "learning_rate": 9.45621052631579e-05, "loss": 0.5009, "step": 40683 }, { "epoch": 2.2781946466569605, "grad_norm": 1.48647940158844, "learning_rate": 9.456184210526317e-05, "loss": 0.4011, "step": 40684 }, { "epoch": 2.2782506439690895, "grad_norm": 1.2681246995925903, "learning_rate": 9.456157894736842e-05, "loss": 0.4263, "step": 40685 }, { "epoch": 2.2783066412812185, "grad_norm": 1.281705379486084, "learning_rate": 9.456131578947369e-05, "loss": 0.334, "step": 40686 }, { "epoch": 2.2783626385933475, "grad_norm": 1.189846396446228, "learning_rate": 9.456105263157895e-05, "loss": 0.3196, "step": 40687 }, { "epoch": 2.2784186359054766, "grad_norm": 1.3390530347824097, "learning_rate": 9.456078947368422e-05, "loss": 0.3496, "step": 40688 }, { "epoch": 2.2784746332176056, "grad_norm": 0.9478362798690796, "learning_rate": 9.456052631578948e-05, "loss": 0.3264, "step": 40689 }, { "epoch": 2.2785306305297346, "grad_norm": 1.1544431447982788, "learning_rate": 9.456026315789474e-05, "loss": 0.4445, "step": 40690 }, { "epoch": 2.2785866278418636, "grad_norm": 1.1023313999176025, "learning_rate": 9.456e-05, "loss": 0.2768, "step": 40691 }, { "epoch": 2.2786426251539926, "grad_norm": 1.0048623085021973, "learning_rate": 9.455973684210528e-05, "loss": 0.3045, "step": 40692 }, { "epoch": 2.2786986224661216, "grad_norm": 1.1986424922943115, "learning_rate": 9.455947368421054e-05, "loss": 0.3915, "step": 40693 }, { "epoch": 2.2787546197782507, "grad_norm": 1.5429383516311646, "learning_rate": 9.45592105263158e-05, "loss": 0.3852, "step": 40694 }, { "epoch": 2.2788106170903797, "grad_norm": 1.5468636751174927, "learning_rate": 9.455894736842106e-05, "loss": 0.5161, "step": 40695 }, { "epoch": 2.2788666144025087, "grad_norm": 1.1783115863800049, "learning_rate": 9.455868421052631e-05, "loss": 0.3406, "step": 40696 }, { "epoch": 2.2789226117146377, "grad_norm": 1.3493643999099731, "learning_rate": 9.455842105263159e-05, "loss": 0.3999, "step": 40697 }, { "epoch": 2.2789786090267667, "grad_norm": 1.3171998262405396, "learning_rate": 9.455815789473685e-05, "loss": 0.3947, "step": 40698 }, { "epoch": 2.2790346063388958, "grad_norm": 1.8221908807754517, "learning_rate": 9.455789473684211e-05, "loss": 0.4198, "step": 40699 }, { "epoch": 2.279090603651025, "grad_norm": 1.0413686037063599, "learning_rate": 9.455763157894737e-05, "loss": 0.3787, "step": 40700 }, { "epoch": 2.279146600963154, "grad_norm": 1.8679466247558594, "learning_rate": 9.455736842105264e-05, "loss": 0.3968, "step": 40701 }, { "epoch": 2.279202598275283, "grad_norm": 1.2845100164413452, "learning_rate": 9.45571052631579e-05, "loss": 0.3852, "step": 40702 }, { "epoch": 2.279258595587412, "grad_norm": 1.473031759262085, "learning_rate": 9.455684210526316e-05, "loss": 0.3869, "step": 40703 }, { "epoch": 2.279314592899541, "grad_norm": 1.3209993839263916, "learning_rate": 9.455657894736842e-05, "loss": 0.5044, "step": 40704 }, { "epoch": 2.27937059021167, "grad_norm": 1.190691351890564, "learning_rate": 9.455631578947369e-05, "loss": 0.3455, "step": 40705 }, { "epoch": 2.279426587523799, "grad_norm": 1.1404445171356201, "learning_rate": 9.455605263157895e-05, "loss": 0.3455, "step": 40706 }, { "epoch": 2.279482584835928, "grad_norm": 1.1950658559799194, "learning_rate": 9.455578947368423e-05, "loss": 0.4867, "step": 40707 }, { "epoch": 2.279538582148057, "grad_norm": 1.141538381576538, "learning_rate": 9.455552631578947e-05, "loss": 0.4024, "step": 40708 }, { "epoch": 2.279594579460186, "grad_norm": 1.139651894569397, "learning_rate": 9.455526315789475e-05, "loss": 0.3851, "step": 40709 }, { "epoch": 2.279650576772315, "grad_norm": 2.500432252883911, "learning_rate": 9.4555e-05, "loss": 0.4448, "step": 40710 }, { "epoch": 2.279706574084444, "grad_norm": 1.292118787765503, "learning_rate": 9.455473684210526e-05, "loss": 0.3999, "step": 40711 }, { "epoch": 2.279762571396573, "grad_norm": 1.3038464784622192, "learning_rate": 9.455447368421054e-05, "loss": 0.4593, "step": 40712 }, { "epoch": 2.279818568708702, "grad_norm": 1.586011290550232, "learning_rate": 9.455421052631578e-05, "loss": 0.6508, "step": 40713 }, { "epoch": 2.279874566020831, "grad_norm": 1.3113987445831299, "learning_rate": 9.455394736842106e-05, "loss": 0.4406, "step": 40714 }, { "epoch": 2.27993056333296, "grad_norm": 1.0662182569503784, "learning_rate": 9.455368421052632e-05, "loss": 0.2824, "step": 40715 }, { "epoch": 2.279986560645089, "grad_norm": 1.3647671937942505, "learning_rate": 9.455342105263159e-05, "loss": 0.4789, "step": 40716 }, { "epoch": 2.280042557957218, "grad_norm": 1.1081174612045288, "learning_rate": 9.455315789473684e-05, "loss": 0.3911, "step": 40717 }, { "epoch": 2.280098555269347, "grad_norm": 1.1749792098999023, "learning_rate": 9.455289473684211e-05, "loss": 0.4746, "step": 40718 }, { "epoch": 2.280154552581476, "grad_norm": 1.1469017267227173, "learning_rate": 9.455263157894737e-05, "loss": 0.4871, "step": 40719 }, { "epoch": 2.280210549893605, "grad_norm": 1.0866130590438843, "learning_rate": 9.455236842105264e-05, "loss": 0.3739, "step": 40720 }, { "epoch": 2.280266547205734, "grad_norm": 1.3231158256530762, "learning_rate": 9.45521052631579e-05, "loss": 0.452, "step": 40721 }, { "epoch": 2.280322544517863, "grad_norm": 1.2544708251953125, "learning_rate": 9.455184210526316e-05, "loss": 0.4238, "step": 40722 }, { "epoch": 2.2803785418299922, "grad_norm": 1.5187524557113647, "learning_rate": 9.455157894736842e-05, "loss": 0.3369, "step": 40723 }, { "epoch": 2.2804345391421212, "grad_norm": 1.0395478010177612, "learning_rate": 9.45513157894737e-05, "loss": 0.3294, "step": 40724 }, { "epoch": 2.2804905364542503, "grad_norm": 1.2137303352355957, "learning_rate": 9.455105263157896e-05, "loss": 0.4857, "step": 40725 }, { "epoch": 2.2805465337663793, "grad_norm": 1.1185930967330933, "learning_rate": 9.455078947368422e-05, "loss": 0.4514, "step": 40726 }, { "epoch": 2.2806025310785083, "grad_norm": 1.0367804765701294, "learning_rate": 9.455052631578947e-05, "loss": 0.4904, "step": 40727 }, { "epoch": 2.2806585283906373, "grad_norm": 1.1859992742538452, "learning_rate": 9.455026315789473e-05, "loss": 0.4533, "step": 40728 }, { "epoch": 2.2807145257027663, "grad_norm": 1.1731269359588623, "learning_rate": 9.455000000000001e-05, "loss": 0.3564, "step": 40729 }, { "epoch": 2.2807705230148954, "grad_norm": 1.441381573677063, "learning_rate": 9.454973684210527e-05, "loss": 0.6787, "step": 40730 }, { "epoch": 2.2808265203270244, "grad_norm": 1.266228437423706, "learning_rate": 9.454947368421053e-05, "loss": 0.4046, "step": 40731 }, { "epoch": 2.2808825176391534, "grad_norm": 1.1656194925308228, "learning_rate": 9.454921052631579e-05, "loss": 0.4161, "step": 40732 }, { "epoch": 2.2809385149512824, "grad_norm": 0.9893287420272827, "learning_rate": 9.454894736842106e-05, "loss": 0.3206, "step": 40733 }, { "epoch": 2.2809945122634114, "grad_norm": 1.1631611585617065, "learning_rate": 9.454868421052632e-05, "loss": 0.4332, "step": 40734 }, { "epoch": 2.2810505095755405, "grad_norm": 1.1853488683700562, "learning_rate": 9.454842105263158e-05, "loss": 0.4037, "step": 40735 }, { "epoch": 2.2811065068876695, "grad_norm": 1.2882390022277832, "learning_rate": 9.454815789473684e-05, "loss": 0.452, "step": 40736 }, { "epoch": 2.2811625041997985, "grad_norm": 1.1939971446990967, "learning_rate": 9.454789473684211e-05, "loss": 0.3749, "step": 40737 }, { "epoch": 2.2812185015119275, "grad_norm": 1.4257616996765137, "learning_rate": 9.454763157894737e-05, "loss": 0.7062, "step": 40738 }, { "epoch": 2.2812744988240565, "grad_norm": 1.230320930480957, "learning_rate": 9.454736842105265e-05, "loss": 0.491, "step": 40739 }, { "epoch": 2.2813304961361855, "grad_norm": 1.7072973251342773, "learning_rate": 9.454710526315789e-05, "loss": 0.6571, "step": 40740 }, { "epoch": 2.2813864934483146, "grad_norm": 1.2500269412994385, "learning_rate": 9.454684210526317e-05, "loss": 0.409, "step": 40741 }, { "epoch": 2.2814424907604436, "grad_norm": 1.2978328466415405, "learning_rate": 9.454657894736842e-05, "loss": 0.4459, "step": 40742 }, { "epoch": 2.2814984880725726, "grad_norm": 1.1234685182571411, "learning_rate": 9.45463157894737e-05, "loss": 0.3741, "step": 40743 }, { "epoch": 2.2815544853847016, "grad_norm": 1.0350223779678345, "learning_rate": 9.454605263157896e-05, "loss": 0.3605, "step": 40744 }, { "epoch": 2.2816104826968306, "grad_norm": 0.9962635040283203, "learning_rate": 9.45457894736842e-05, "loss": 0.3641, "step": 40745 }, { "epoch": 2.2816664800089597, "grad_norm": 1.1440106630325317, "learning_rate": 9.454552631578948e-05, "loss": 0.4514, "step": 40746 }, { "epoch": 2.2817224773210887, "grad_norm": 1.0001720190048218, "learning_rate": 9.454526315789474e-05, "loss": 0.3663, "step": 40747 }, { "epoch": 2.2817784746332177, "grad_norm": 1.0957376956939697, "learning_rate": 9.454500000000001e-05, "loss": 0.3766, "step": 40748 }, { "epoch": 2.2818344719453467, "grad_norm": 1.0857912302017212, "learning_rate": 9.454473684210527e-05, "loss": 0.3578, "step": 40749 }, { "epoch": 2.2818904692574757, "grad_norm": 1.0717769861221313, "learning_rate": 9.454447368421053e-05, "loss": 0.3777, "step": 40750 }, { "epoch": 2.2819464665696048, "grad_norm": 1.1021376848220825, "learning_rate": 9.454421052631579e-05, "loss": 0.3118, "step": 40751 }, { "epoch": 2.282002463881734, "grad_norm": 1.1121015548706055, "learning_rate": 9.454394736842106e-05, "loss": 0.4028, "step": 40752 }, { "epoch": 2.282058461193863, "grad_norm": 1.2617932558059692, "learning_rate": 9.454368421052632e-05, "loss": 0.3605, "step": 40753 }, { "epoch": 2.282114458505992, "grad_norm": 1.328307032585144, "learning_rate": 9.454342105263158e-05, "loss": 0.4156, "step": 40754 }, { "epoch": 2.282170455818121, "grad_norm": 1.6096069812774658, "learning_rate": 9.454315789473684e-05, "loss": 0.5602, "step": 40755 }, { "epoch": 2.28222645313025, "grad_norm": 1.155880093574524, "learning_rate": 9.454289473684212e-05, "loss": 0.3975, "step": 40756 }, { "epoch": 2.282282450442379, "grad_norm": 1.1766146421432495, "learning_rate": 9.454263157894738e-05, "loss": 0.3232, "step": 40757 }, { "epoch": 2.282338447754508, "grad_norm": 1.078818440437317, "learning_rate": 9.454236842105263e-05, "loss": 0.3293, "step": 40758 }, { "epoch": 2.282394445066637, "grad_norm": 1.1642459630966187, "learning_rate": 9.45421052631579e-05, "loss": 0.5092, "step": 40759 }, { "epoch": 2.282450442378766, "grad_norm": 1.0969308614730835, "learning_rate": 9.454184210526317e-05, "loss": 0.4029, "step": 40760 }, { "epoch": 2.282506439690895, "grad_norm": 1.1417568922042847, "learning_rate": 9.454157894736843e-05, "loss": 0.4593, "step": 40761 }, { "epoch": 2.282562437003024, "grad_norm": 1.3384737968444824, "learning_rate": 9.454131578947369e-05, "loss": 0.3998, "step": 40762 }, { "epoch": 2.282618434315153, "grad_norm": 1.0474281311035156, "learning_rate": 9.454105263157895e-05, "loss": 0.3799, "step": 40763 }, { "epoch": 2.282674431627282, "grad_norm": 1.0527393817901611, "learning_rate": 9.45407894736842e-05, "loss": 0.3328, "step": 40764 }, { "epoch": 2.282730428939411, "grad_norm": 1.5475727319717407, "learning_rate": 9.454052631578948e-05, "loss": 0.5563, "step": 40765 }, { "epoch": 2.28278642625154, "grad_norm": 1.1792128086090088, "learning_rate": 9.454026315789474e-05, "loss": 0.3018, "step": 40766 }, { "epoch": 2.282842423563669, "grad_norm": 1.3407564163208008, "learning_rate": 9.454000000000001e-05, "loss": 0.5293, "step": 40767 }, { "epoch": 2.282898420875798, "grad_norm": 1.006574273109436, "learning_rate": 9.453973684210526e-05, "loss": 0.3691, "step": 40768 }, { "epoch": 2.282954418187927, "grad_norm": 1.1501902341842651, "learning_rate": 9.453947368421053e-05, "loss": 0.4222, "step": 40769 }, { "epoch": 2.283010415500056, "grad_norm": 1.2683444023132324, "learning_rate": 9.453921052631579e-05, "loss": 0.4697, "step": 40770 }, { "epoch": 2.283066412812185, "grad_norm": 1.2929222583770752, "learning_rate": 9.453894736842107e-05, "loss": 0.5122, "step": 40771 }, { "epoch": 2.283122410124314, "grad_norm": 1.339768886566162, "learning_rate": 9.453868421052631e-05, "loss": 0.4908, "step": 40772 }, { "epoch": 2.283178407436443, "grad_norm": 1.1752787828445435, "learning_rate": 9.453842105263158e-05, "loss": 0.336, "step": 40773 }, { "epoch": 2.283234404748572, "grad_norm": 1.1361424922943115, "learning_rate": 9.453815789473684e-05, "loss": 0.3768, "step": 40774 }, { "epoch": 2.283290402060701, "grad_norm": 1.2001701593399048, "learning_rate": 9.453789473684212e-05, "loss": 0.4433, "step": 40775 }, { "epoch": 2.2833463993728302, "grad_norm": 1.187086582183838, "learning_rate": 9.453763157894738e-05, "loss": 0.3738, "step": 40776 }, { "epoch": 2.2834023966849593, "grad_norm": 1.1378377676010132, "learning_rate": 9.453736842105264e-05, "loss": 0.3925, "step": 40777 }, { "epoch": 2.2834583939970883, "grad_norm": 1.069530963897705, "learning_rate": 9.45371052631579e-05, "loss": 0.3852, "step": 40778 }, { "epoch": 2.2835143913092173, "grad_norm": 1.092367172241211, "learning_rate": 9.453684210526316e-05, "loss": 0.3679, "step": 40779 }, { "epoch": 2.2835703886213463, "grad_norm": 1.2742834091186523, "learning_rate": 9.453657894736843e-05, "loss": 0.475, "step": 40780 }, { "epoch": 2.2836263859334753, "grad_norm": 1.0827237367630005, "learning_rate": 9.453631578947369e-05, "loss": 0.4066, "step": 40781 }, { "epoch": 2.2836823832456044, "grad_norm": 1.3026787042617798, "learning_rate": 9.453605263157895e-05, "loss": 0.5782, "step": 40782 }, { "epoch": 2.2837383805577334, "grad_norm": 1.1385407447814941, "learning_rate": 9.453578947368421e-05, "loss": 0.3384, "step": 40783 }, { "epoch": 2.2837943778698624, "grad_norm": 1.182676911354065, "learning_rate": 9.453552631578948e-05, "loss": 0.4198, "step": 40784 }, { "epoch": 2.2838503751819914, "grad_norm": 1.4524996280670166, "learning_rate": 9.453526315789474e-05, "loss": 0.3952, "step": 40785 }, { "epoch": 2.2839063724941204, "grad_norm": 1.1759510040283203, "learning_rate": 9.4535e-05, "loss": 0.3721, "step": 40786 }, { "epoch": 2.2839623698062494, "grad_norm": 1.1353518962860107, "learning_rate": 9.453473684210526e-05, "loss": 0.4739, "step": 40787 }, { "epoch": 2.2840183671183785, "grad_norm": 1.1772749423980713, "learning_rate": 9.453447368421054e-05, "loss": 0.3984, "step": 40788 }, { "epoch": 2.2840743644305075, "grad_norm": 1.1025346517562866, "learning_rate": 9.45342105263158e-05, "loss": 0.365, "step": 40789 }, { "epoch": 2.2841303617426365, "grad_norm": 1.0741193294525146, "learning_rate": 9.453394736842105e-05, "loss": 0.4014, "step": 40790 }, { "epoch": 2.2841863590547655, "grad_norm": 1.2959717512130737, "learning_rate": 9.453368421052631e-05, "loss": 0.5235, "step": 40791 }, { "epoch": 2.2842423563668945, "grad_norm": 1.1121904850006104, "learning_rate": 9.453342105263159e-05, "loss": 0.3379, "step": 40792 }, { "epoch": 2.2842983536790236, "grad_norm": 1.1972206830978394, "learning_rate": 9.453315789473685e-05, "loss": 0.4776, "step": 40793 }, { "epoch": 2.2843543509911526, "grad_norm": 1.1832555532455444, "learning_rate": 9.453289473684212e-05, "loss": 0.4313, "step": 40794 }, { "epoch": 2.2844103483032816, "grad_norm": 1.0814447402954102, "learning_rate": 9.453263157894737e-05, "loss": 0.3868, "step": 40795 }, { "epoch": 2.2844663456154106, "grad_norm": 1.6931625604629517, "learning_rate": 9.453236842105263e-05, "loss": 0.3943, "step": 40796 }, { "epoch": 2.2845223429275396, "grad_norm": 1.4816912412643433, "learning_rate": 9.45321052631579e-05, "loss": 0.428, "step": 40797 }, { "epoch": 2.2845783402396687, "grad_norm": 1.384658694267273, "learning_rate": 9.453184210526316e-05, "loss": 0.545, "step": 40798 }, { "epoch": 2.2846343375517977, "grad_norm": 1.3358010053634644, "learning_rate": 9.453157894736843e-05, "loss": 0.3701, "step": 40799 }, { "epoch": 2.2846903348639267, "grad_norm": 1.3422331809997559, "learning_rate": 9.453131578947368e-05, "loss": 0.3349, "step": 40800 }, { "epoch": 2.2847463321760557, "grad_norm": 1.3526791334152222, "learning_rate": 9.453105263157895e-05, "loss": 0.3471, "step": 40801 }, { "epoch": 2.2848023294881847, "grad_norm": 1.410909652709961, "learning_rate": 9.453078947368421e-05, "loss": 0.4392, "step": 40802 }, { "epoch": 2.2848583268003138, "grad_norm": 1.245595932006836, "learning_rate": 9.453052631578949e-05, "loss": 0.5317, "step": 40803 }, { "epoch": 2.2849143241124428, "grad_norm": 1.268906831741333, "learning_rate": 9.453026315789474e-05, "loss": 0.3763, "step": 40804 }, { "epoch": 2.284970321424572, "grad_norm": 1.3148695230484009, "learning_rate": 9.453e-05, "loss": 0.3801, "step": 40805 }, { "epoch": 2.285026318736701, "grad_norm": 1.3297442197799683, "learning_rate": 9.452973684210526e-05, "loss": 0.5929, "step": 40806 }, { "epoch": 2.28508231604883, "grad_norm": 1.0440672636032104, "learning_rate": 9.452947368421054e-05, "loss": 0.4379, "step": 40807 }, { "epoch": 2.285138313360959, "grad_norm": 1.1177469491958618, "learning_rate": 9.45292105263158e-05, "loss": 0.4573, "step": 40808 }, { "epoch": 2.285194310673088, "grad_norm": 1.1262434720993042, "learning_rate": 9.452894736842106e-05, "loss": 0.3983, "step": 40809 }, { "epoch": 2.285250307985217, "grad_norm": 0.913974404335022, "learning_rate": 9.452868421052632e-05, "loss": 0.3562, "step": 40810 }, { "epoch": 2.285306305297346, "grad_norm": 1.361995816230774, "learning_rate": 9.452842105263159e-05, "loss": 0.4027, "step": 40811 }, { "epoch": 2.285362302609475, "grad_norm": 1.4298921823501587, "learning_rate": 9.452815789473685e-05, "loss": 0.4842, "step": 40812 }, { "epoch": 2.285418299921604, "grad_norm": 1.1082196235656738, "learning_rate": 9.452789473684211e-05, "loss": 0.4625, "step": 40813 }, { "epoch": 2.285474297233733, "grad_norm": 1.6947755813598633, "learning_rate": 9.452763157894737e-05, "loss": 0.4076, "step": 40814 }, { "epoch": 2.285530294545862, "grad_norm": 2.012397289276123, "learning_rate": 9.452736842105263e-05, "loss": 0.5032, "step": 40815 }, { "epoch": 2.285586291857991, "grad_norm": 1.3171069622039795, "learning_rate": 9.45271052631579e-05, "loss": 0.5199, "step": 40816 }, { "epoch": 2.28564228917012, "grad_norm": 1.0655114650726318, "learning_rate": 9.452684210526316e-05, "loss": 0.358, "step": 40817 }, { "epoch": 2.285698286482249, "grad_norm": 1.25496506690979, "learning_rate": 9.452657894736842e-05, "loss": 0.4259, "step": 40818 }, { "epoch": 2.285754283794378, "grad_norm": 1.226019024848938, "learning_rate": 9.452631578947368e-05, "loss": 0.4369, "step": 40819 }, { "epoch": 2.285810281106507, "grad_norm": 1.29500412940979, "learning_rate": 9.452605263157895e-05, "loss": 0.3374, "step": 40820 }, { "epoch": 2.285866278418636, "grad_norm": 1.312785029411316, "learning_rate": 9.452578947368421e-05, "loss": 0.5795, "step": 40821 }, { "epoch": 2.2859222757307647, "grad_norm": 1.2990490198135376, "learning_rate": 9.452552631578949e-05, "loss": 0.5413, "step": 40822 }, { "epoch": 2.285978273042894, "grad_norm": 1.215506672859192, "learning_rate": 9.452526315789473e-05, "loss": 0.4853, "step": 40823 }, { "epoch": 2.2860342703550227, "grad_norm": 1.4231382608413696, "learning_rate": 9.452500000000001e-05, "loss": 0.4386, "step": 40824 }, { "epoch": 2.286090267667152, "grad_norm": 1.7093122005462646, "learning_rate": 9.452473684210527e-05, "loss": 0.4625, "step": 40825 }, { "epoch": 2.2861462649792808, "grad_norm": 1.0981152057647705, "learning_rate": 9.452447368421054e-05, "loss": 0.4368, "step": 40826 }, { "epoch": 2.28620226229141, "grad_norm": 1.358931064605713, "learning_rate": 9.452421052631579e-05, "loss": 0.351, "step": 40827 }, { "epoch": 2.286258259603539, "grad_norm": 1.16800856590271, "learning_rate": 9.452394736842106e-05, "loss": 0.4189, "step": 40828 }, { "epoch": 2.2863142569156683, "grad_norm": 1.1592634916305542, "learning_rate": 9.452368421052632e-05, "loss": 0.5028, "step": 40829 }, { "epoch": 2.286370254227797, "grad_norm": 1.0210527181625366, "learning_rate": 9.452342105263159e-05, "loss": 0.3342, "step": 40830 }, { "epoch": 2.2864262515399263, "grad_norm": 1.3599320650100708, "learning_rate": 9.452315789473685e-05, "loss": 0.435, "step": 40831 }, { "epoch": 2.286482248852055, "grad_norm": 1.6425976753234863, "learning_rate": 9.45228947368421e-05, "loss": 0.6303, "step": 40832 }, { "epoch": 2.2865382461641843, "grad_norm": 1.7112220525741577, "learning_rate": 9.452263157894737e-05, "loss": 0.534, "step": 40833 }, { "epoch": 2.286594243476313, "grad_norm": 1.0830254554748535, "learning_rate": 9.452236842105263e-05, "loss": 0.3632, "step": 40834 }, { "epoch": 2.2866502407884424, "grad_norm": 1.2991468906402588, "learning_rate": 9.45221052631579e-05, "loss": 0.4075, "step": 40835 }, { "epoch": 2.286706238100571, "grad_norm": 1.2036579847335815, "learning_rate": 9.452184210526316e-05, "loss": 0.4785, "step": 40836 }, { "epoch": 2.2867622354127004, "grad_norm": 1.0331439971923828, "learning_rate": 9.452157894736842e-05, "loss": 0.42, "step": 40837 }, { "epoch": 2.286818232724829, "grad_norm": 1.3257039785385132, "learning_rate": 9.452131578947368e-05, "loss": 0.5174, "step": 40838 }, { "epoch": 2.2868742300369584, "grad_norm": 1.1616357564926147, "learning_rate": 9.452105263157896e-05, "loss": 0.4571, "step": 40839 }, { "epoch": 2.286930227349087, "grad_norm": 1.7315592765808105, "learning_rate": 9.452078947368422e-05, "loss": 0.3595, "step": 40840 }, { "epoch": 2.2869862246612165, "grad_norm": 1.304601788520813, "learning_rate": 9.452052631578948e-05, "loss": 0.3848, "step": 40841 }, { "epoch": 2.287042221973345, "grad_norm": 1.7512010335922241, "learning_rate": 9.452026315789474e-05, "loss": 0.3855, "step": 40842 }, { "epoch": 2.2870982192854745, "grad_norm": 1.8160886764526367, "learning_rate": 9.452000000000001e-05, "loss": 0.3164, "step": 40843 }, { "epoch": 2.287154216597603, "grad_norm": 1.1015946865081787, "learning_rate": 9.451973684210527e-05, "loss": 0.413, "step": 40844 }, { "epoch": 2.2872102139097326, "grad_norm": 1.353179931640625, "learning_rate": 9.451947368421053e-05, "loss": 0.4529, "step": 40845 }, { "epoch": 2.287266211221861, "grad_norm": 1.2537550926208496, "learning_rate": 9.451921052631579e-05, "loss": 0.4895, "step": 40846 }, { "epoch": 2.2873222085339906, "grad_norm": 1.1315968036651611, "learning_rate": 9.451894736842106e-05, "loss": 0.4106, "step": 40847 }, { "epoch": 2.287378205846119, "grad_norm": 1.1098504066467285, "learning_rate": 9.451868421052632e-05, "loss": 0.4442, "step": 40848 }, { "epoch": 2.2874342031582486, "grad_norm": 1.4122551679611206, "learning_rate": 9.451842105263158e-05, "loss": 0.628, "step": 40849 }, { "epoch": 2.287490200470377, "grad_norm": 1.446104645729065, "learning_rate": 9.451815789473684e-05, "loss": 0.472, "step": 40850 }, { "epoch": 2.2875461977825067, "grad_norm": 1.0111216306686401, "learning_rate": 9.45178947368421e-05, "loss": 0.3676, "step": 40851 }, { "epoch": 2.2876021950946352, "grad_norm": 3.5224239826202393, "learning_rate": 9.451763157894737e-05, "loss": 0.3782, "step": 40852 }, { "epoch": 2.2876581924067647, "grad_norm": 1.0094807147979736, "learning_rate": 9.451736842105263e-05, "loss": 0.3587, "step": 40853 }, { "epoch": 2.2877141897188933, "grad_norm": 1.1357190608978271, "learning_rate": 9.451710526315791e-05, "loss": 0.4085, "step": 40854 }, { "epoch": 2.2877701870310228, "grad_norm": 1.5335227251052856, "learning_rate": 9.451684210526315e-05, "loss": 0.5235, "step": 40855 }, { "epoch": 2.2878261843431513, "grad_norm": 1.2523075342178345, "learning_rate": 9.451657894736843e-05, "loss": 0.444, "step": 40856 }, { "epoch": 2.287882181655281, "grad_norm": 1.159908652305603, "learning_rate": 9.451631578947369e-05, "loss": 0.4259, "step": 40857 }, { "epoch": 2.2879381789674094, "grad_norm": 1.0728968381881714, "learning_rate": 9.451605263157896e-05, "loss": 0.3896, "step": 40858 }, { "epoch": 2.287994176279539, "grad_norm": 1.1651214361190796, "learning_rate": 9.451578947368422e-05, "loss": 0.3782, "step": 40859 }, { "epoch": 2.2880501735916674, "grad_norm": 1.0190309286117554, "learning_rate": 9.451552631578948e-05, "loss": 0.3151, "step": 40860 }, { "epoch": 2.288106170903797, "grad_norm": 1.1230497360229492, "learning_rate": 9.451526315789474e-05, "loss": 0.413, "step": 40861 }, { "epoch": 2.2881621682159254, "grad_norm": 1.2884025573730469, "learning_rate": 9.451500000000001e-05, "loss": 0.4299, "step": 40862 }, { "epoch": 2.2882181655280545, "grad_norm": 1.2730110883712769, "learning_rate": 9.451473684210527e-05, "loss": 0.3841, "step": 40863 }, { "epoch": 2.2882741628401835, "grad_norm": 1.1795185804367065, "learning_rate": 9.451447368421053e-05, "loss": 0.4143, "step": 40864 }, { "epoch": 2.2883301601523125, "grad_norm": 1.2776638269424438, "learning_rate": 9.451421052631579e-05, "loss": 0.3611, "step": 40865 }, { "epoch": 2.2883861574644415, "grad_norm": 1.0309574604034424, "learning_rate": 9.451394736842105e-05, "loss": 0.3548, "step": 40866 }, { "epoch": 2.2884421547765705, "grad_norm": 1.2780596017837524, "learning_rate": 9.451368421052632e-05, "loss": 0.4396, "step": 40867 }, { "epoch": 2.2884981520886996, "grad_norm": 1.0982019901275635, "learning_rate": 9.451342105263158e-05, "loss": 0.4266, "step": 40868 }, { "epoch": 2.2885541494008286, "grad_norm": 1.0824683904647827, "learning_rate": 9.451315789473684e-05, "loss": 0.3387, "step": 40869 }, { "epoch": 2.2886101467129576, "grad_norm": 1.2712594270706177, "learning_rate": 9.45128947368421e-05, "loss": 0.362, "step": 40870 }, { "epoch": 2.2886661440250866, "grad_norm": 1.3995704650878906, "learning_rate": 9.451263157894738e-05, "loss": 0.4478, "step": 40871 }, { "epoch": 2.2887221413372156, "grad_norm": 1.255401849746704, "learning_rate": 9.451236842105264e-05, "loss": 0.491, "step": 40872 }, { "epoch": 2.2887781386493447, "grad_norm": 1.3592734336853027, "learning_rate": 9.45121052631579e-05, "loss": 0.4026, "step": 40873 }, { "epoch": 2.2888341359614737, "grad_norm": 1.1194995641708374, "learning_rate": 9.451184210526316e-05, "loss": 0.4288, "step": 40874 }, { "epoch": 2.2888901332736027, "grad_norm": 1.3842111825942993, "learning_rate": 9.451157894736843e-05, "loss": 0.4076, "step": 40875 }, { "epoch": 2.2889461305857317, "grad_norm": 1.0915096998214722, "learning_rate": 9.451131578947369e-05, "loss": 0.4239, "step": 40876 }, { "epoch": 2.2890021278978607, "grad_norm": 1.2498295307159424, "learning_rate": 9.451105263157895e-05, "loss": 0.6339, "step": 40877 }, { "epoch": 2.2890581252099897, "grad_norm": 1.2229845523834229, "learning_rate": 9.451078947368421e-05, "loss": 0.4647, "step": 40878 }, { "epoch": 2.2891141225221188, "grad_norm": 1.2996368408203125, "learning_rate": 9.451052631578948e-05, "loss": 0.3963, "step": 40879 }, { "epoch": 2.289170119834248, "grad_norm": 1.2182092666625977, "learning_rate": 9.451026315789474e-05, "loss": 0.3182, "step": 40880 }, { "epoch": 2.289226117146377, "grad_norm": 1.193029522895813, "learning_rate": 9.451000000000002e-05, "loss": 0.3691, "step": 40881 }, { "epoch": 2.289282114458506, "grad_norm": 1.087503433227539, "learning_rate": 9.450973684210526e-05, "loss": 0.3632, "step": 40882 }, { "epoch": 2.289338111770635, "grad_norm": 1.507468819618225, "learning_rate": 9.450947368421052e-05, "loss": 0.4712, "step": 40883 }, { "epoch": 2.289394109082764, "grad_norm": 1.2011854648590088, "learning_rate": 9.45092105263158e-05, "loss": 0.4968, "step": 40884 }, { "epoch": 2.289450106394893, "grad_norm": 1.2221673727035522, "learning_rate": 9.450894736842105e-05, "loss": 0.4666, "step": 40885 }, { "epoch": 2.289506103707022, "grad_norm": 1.7016428709030151, "learning_rate": 9.450868421052633e-05, "loss": 0.4986, "step": 40886 }, { "epoch": 2.289562101019151, "grad_norm": 1.347862958908081, "learning_rate": 9.450842105263157e-05, "loss": 0.4446, "step": 40887 }, { "epoch": 2.28961809833128, "grad_norm": 1.1245880126953125, "learning_rate": 9.450815789473685e-05, "loss": 0.3261, "step": 40888 }, { "epoch": 2.289674095643409, "grad_norm": 1.355403184890747, "learning_rate": 9.45078947368421e-05, "loss": 0.5013, "step": 40889 }, { "epoch": 2.289730092955538, "grad_norm": 1.2152338027954102, "learning_rate": 9.450763157894738e-05, "loss": 0.4256, "step": 40890 }, { "epoch": 2.289786090267667, "grad_norm": 1.1112534999847412, "learning_rate": 9.450736842105264e-05, "loss": 0.3098, "step": 40891 }, { "epoch": 2.289842087579796, "grad_norm": 1.1031806468963623, "learning_rate": 9.45071052631579e-05, "loss": 0.4814, "step": 40892 }, { "epoch": 2.289898084891925, "grad_norm": 1.1614916324615479, "learning_rate": 9.450684210526316e-05, "loss": 0.3306, "step": 40893 }, { "epoch": 2.289954082204054, "grad_norm": 1.2880679368972778, "learning_rate": 9.450657894736843e-05, "loss": 0.4621, "step": 40894 }, { "epoch": 2.290010079516183, "grad_norm": 1.1163889169692993, "learning_rate": 9.450631578947369e-05, "loss": 0.2877, "step": 40895 }, { "epoch": 2.290066076828312, "grad_norm": 2.1183884143829346, "learning_rate": 9.450605263157895e-05, "loss": 0.3459, "step": 40896 }, { "epoch": 2.290122074140441, "grad_norm": 1.1605061292648315, "learning_rate": 9.450578947368421e-05, "loss": 0.5265, "step": 40897 }, { "epoch": 2.29017807145257, "grad_norm": 1.2426642179489136, "learning_rate": 9.450552631578948e-05, "loss": 0.4247, "step": 40898 }, { "epoch": 2.290234068764699, "grad_norm": 1.0339198112487793, "learning_rate": 9.450526315789474e-05, "loss": 0.3855, "step": 40899 }, { "epoch": 2.290290066076828, "grad_norm": 1.3488032817840576, "learning_rate": 9.4505e-05, "loss": 0.3739, "step": 40900 }, { "epoch": 2.290346063388957, "grad_norm": 1.2192391157150269, "learning_rate": 9.450473684210526e-05, "loss": 0.3077, "step": 40901 }, { "epoch": 2.290402060701086, "grad_norm": 1.304444670677185, "learning_rate": 9.450447368421052e-05, "loss": 0.632, "step": 40902 }, { "epoch": 2.2904580580132152, "grad_norm": 1.239427089691162, "learning_rate": 9.45042105263158e-05, "loss": 0.4729, "step": 40903 }, { "epoch": 2.2905140553253442, "grad_norm": 1.1766020059585571, "learning_rate": 9.450394736842106e-05, "loss": 0.3786, "step": 40904 }, { "epoch": 2.2905700526374733, "grad_norm": 1.263381004333496, "learning_rate": 9.450368421052632e-05, "loss": 0.4117, "step": 40905 }, { "epoch": 2.2906260499496023, "grad_norm": 1.1620476245880127, "learning_rate": 9.450342105263158e-05, "loss": 0.3772, "step": 40906 }, { "epoch": 2.2906820472617313, "grad_norm": 1.5683238506317139, "learning_rate": 9.450315789473685e-05, "loss": 0.449, "step": 40907 }, { "epoch": 2.2907380445738603, "grad_norm": 1.2682077884674072, "learning_rate": 9.450289473684211e-05, "loss": 0.3034, "step": 40908 }, { "epoch": 2.2907940418859893, "grad_norm": 1.0831515789031982, "learning_rate": 9.450263157894738e-05, "loss": 0.3772, "step": 40909 }, { "epoch": 2.2908500391981184, "grad_norm": 1.1901028156280518, "learning_rate": 9.450236842105263e-05, "loss": 0.467, "step": 40910 }, { "epoch": 2.2909060365102474, "grad_norm": 1.1299101114273071, "learning_rate": 9.45021052631579e-05, "loss": 0.3466, "step": 40911 }, { "epoch": 2.2909620338223764, "grad_norm": 1.2004923820495605, "learning_rate": 9.450184210526316e-05, "loss": 0.4476, "step": 40912 }, { "epoch": 2.2910180311345054, "grad_norm": 1.2486859560012817, "learning_rate": 9.450157894736843e-05, "loss": 0.4379, "step": 40913 }, { "epoch": 2.2910740284466344, "grad_norm": 1.0966185331344604, "learning_rate": 9.45013157894737e-05, "loss": 0.3944, "step": 40914 }, { "epoch": 2.2911300257587635, "grad_norm": 1.1059114933013916, "learning_rate": 9.450105263157895e-05, "loss": 0.2733, "step": 40915 }, { "epoch": 2.2911860230708925, "grad_norm": 1.1275370121002197, "learning_rate": 9.450078947368421e-05, "loss": 0.3725, "step": 40916 }, { "epoch": 2.2912420203830215, "grad_norm": 1.7922306060791016, "learning_rate": 9.450052631578947e-05, "loss": 0.3991, "step": 40917 }, { "epoch": 2.2912980176951505, "grad_norm": 1.0369951725006104, "learning_rate": 9.450026315789475e-05, "loss": 0.3563, "step": 40918 }, { "epoch": 2.2913540150072795, "grad_norm": 1.0638270378112793, "learning_rate": 9.449999999999999e-05, "loss": 0.3632, "step": 40919 }, { "epoch": 2.2914100123194086, "grad_norm": 1.3651202917099, "learning_rate": 9.449973684210527e-05, "loss": 0.4033, "step": 40920 }, { "epoch": 2.2914660096315376, "grad_norm": 1.327662467956543, "learning_rate": 9.449947368421053e-05, "loss": 0.4075, "step": 40921 }, { "epoch": 2.2915220069436666, "grad_norm": 1.2228999137878418, "learning_rate": 9.44992105263158e-05, "loss": 0.4145, "step": 40922 }, { "epoch": 2.2915780042557956, "grad_norm": 1.3536463975906372, "learning_rate": 9.449894736842106e-05, "loss": 0.3919, "step": 40923 }, { "epoch": 2.2916340015679246, "grad_norm": 1.0907925367355347, "learning_rate": 9.449868421052632e-05, "loss": 0.3847, "step": 40924 }, { "epoch": 2.2916899988800536, "grad_norm": 1.1760512590408325, "learning_rate": 9.449842105263158e-05, "loss": 0.425, "step": 40925 }, { "epoch": 2.2917459961921827, "grad_norm": 1.0818884372711182, "learning_rate": 9.449815789473685e-05, "loss": 0.369, "step": 40926 }, { "epoch": 2.2918019935043117, "grad_norm": 1.1519336700439453, "learning_rate": 9.449789473684211e-05, "loss": 0.3967, "step": 40927 }, { "epoch": 2.2918579908164407, "grad_norm": 1.2027822732925415, "learning_rate": 9.449763157894737e-05, "loss": 0.407, "step": 40928 }, { "epoch": 2.2919139881285697, "grad_norm": 1.3721596002578735, "learning_rate": 9.449736842105263e-05, "loss": 0.3189, "step": 40929 }, { "epoch": 2.2919699854406987, "grad_norm": 1.0069239139556885, "learning_rate": 9.44971052631579e-05, "loss": 0.448, "step": 40930 }, { "epoch": 2.2920259827528278, "grad_norm": 1.4787769317626953, "learning_rate": 9.449684210526316e-05, "loss": 0.4343, "step": 40931 }, { "epoch": 2.292081980064957, "grad_norm": 1.2266465425491333, "learning_rate": 9.449657894736842e-05, "loss": 0.3329, "step": 40932 }, { "epoch": 2.292137977377086, "grad_norm": 1.3142961263656616, "learning_rate": 9.449631578947368e-05, "loss": 0.4719, "step": 40933 }, { "epoch": 2.292193974689215, "grad_norm": 1.2013123035430908, "learning_rate": 9.449605263157896e-05, "loss": 0.3198, "step": 40934 }, { "epoch": 2.292249972001344, "grad_norm": 1.2666776180267334, "learning_rate": 9.449578947368422e-05, "loss": 0.5273, "step": 40935 }, { "epoch": 2.292305969313473, "grad_norm": 1.22313392162323, "learning_rate": 9.449552631578948e-05, "loss": 0.3404, "step": 40936 }, { "epoch": 2.292361966625602, "grad_norm": 1.190106749534607, "learning_rate": 9.449526315789474e-05, "loss": 0.3736, "step": 40937 }, { "epoch": 2.292417963937731, "grad_norm": 1.1512961387634277, "learning_rate": 9.4495e-05, "loss": 0.4433, "step": 40938 }, { "epoch": 2.29247396124986, "grad_norm": 1.0428340435028076, "learning_rate": 9.449473684210527e-05, "loss": 0.3289, "step": 40939 }, { "epoch": 2.292529958561989, "grad_norm": 1.0771962404251099, "learning_rate": 9.449447368421053e-05, "loss": 0.3793, "step": 40940 }, { "epoch": 2.292585955874118, "grad_norm": 1.0007634162902832, "learning_rate": 9.44942105263158e-05, "loss": 0.3915, "step": 40941 }, { "epoch": 2.292641953186247, "grad_norm": 1.365447759628296, "learning_rate": 9.449394736842105e-05, "loss": 0.3978, "step": 40942 }, { "epoch": 2.292697950498376, "grad_norm": 1.527063250541687, "learning_rate": 9.449368421052632e-05, "loss": 0.4195, "step": 40943 }, { "epoch": 2.292753947810505, "grad_norm": 1.4016728401184082, "learning_rate": 9.449342105263158e-05, "loss": 0.4597, "step": 40944 }, { "epoch": 2.292809945122634, "grad_norm": 1.1718723773956299, "learning_rate": 9.449315789473685e-05, "loss": 0.4199, "step": 40945 }, { "epoch": 2.292865942434763, "grad_norm": 1.3043171167373657, "learning_rate": 9.449289473684211e-05, "loss": 0.3205, "step": 40946 }, { "epoch": 2.292921939746892, "grad_norm": 1.2200672626495361, "learning_rate": 9.449263157894737e-05, "loss": 0.3675, "step": 40947 }, { "epoch": 2.292977937059021, "grad_norm": 1.3172346353530884, "learning_rate": 9.449236842105263e-05, "loss": 0.3676, "step": 40948 }, { "epoch": 2.29303393437115, "grad_norm": 1.0622167587280273, "learning_rate": 9.449210526315791e-05, "loss": 0.3939, "step": 40949 }, { "epoch": 2.293089931683279, "grad_norm": 1.3013421297073364, "learning_rate": 9.449184210526317e-05, "loss": 0.2635, "step": 40950 }, { "epoch": 2.293145928995408, "grad_norm": 1.1558432579040527, "learning_rate": 9.449157894736843e-05, "loss": 0.4306, "step": 40951 }, { "epoch": 2.293201926307537, "grad_norm": 1.2057414054870605, "learning_rate": 9.449131578947369e-05, "loss": 0.3858, "step": 40952 }, { "epoch": 2.293257923619666, "grad_norm": 1.0726661682128906, "learning_rate": 9.449105263157895e-05, "loss": 0.3789, "step": 40953 }, { "epoch": 2.293313920931795, "grad_norm": 1.287476658821106, "learning_rate": 9.449078947368422e-05, "loss": 0.4155, "step": 40954 }, { "epoch": 2.293369918243924, "grad_norm": 1.2825465202331543, "learning_rate": 9.449052631578948e-05, "loss": 0.4285, "step": 40955 }, { "epoch": 2.2934259155560532, "grad_norm": 1.1929147243499756, "learning_rate": 9.449026315789474e-05, "loss": 0.5072, "step": 40956 }, { "epoch": 2.2934819128681823, "grad_norm": 1.1540424823760986, "learning_rate": 9.449e-05, "loss": 0.3393, "step": 40957 }, { "epoch": 2.2935379101803113, "grad_norm": 1.2304447889328003, "learning_rate": 9.448973684210527e-05, "loss": 0.355, "step": 40958 }, { "epoch": 2.2935939074924403, "grad_norm": 1.1898878812789917, "learning_rate": 9.448947368421053e-05, "loss": 0.3964, "step": 40959 }, { "epoch": 2.2936499048045693, "grad_norm": 1.2480679750442505, "learning_rate": 9.448921052631579e-05, "loss": 0.3654, "step": 40960 }, { "epoch": 2.2937059021166983, "grad_norm": 1.1460193395614624, "learning_rate": 9.448894736842105e-05, "loss": 0.4169, "step": 40961 }, { "epoch": 2.2937618994288274, "grad_norm": 1.3420658111572266, "learning_rate": 9.448868421052632e-05, "loss": 0.4806, "step": 40962 }, { "epoch": 2.2938178967409564, "grad_norm": 1.2498208284378052, "learning_rate": 9.448842105263158e-05, "loss": 0.3124, "step": 40963 }, { "epoch": 2.2938738940530854, "grad_norm": 1.3126887083053589, "learning_rate": 9.448815789473686e-05, "loss": 0.3718, "step": 40964 }, { "epoch": 2.2939298913652144, "grad_norm": 0.9935676455497742, "learning_rate": 9.44878947368421e-05, "loss": 0.3567, "step": 40965 }, { "epoch": 2.2939858886773434, "grad_norm": 1.150750756263733, "learning_rate": 9.448763157894738e-05, "loss": 0.5106, "step": 40966 }, { "epoch": 2.2940418859894725, "grad_norm": 1.1576104164123535, "learning_rate": 9.448736842105264e-05, "loss": 0.4623, "step": 40967 }, { "epoch": 2.2940978833016015, "grad_norm": 1.4180045127868652, "learning_rate": 9.448710526315791e-05, "loss": 0.4302, "step": 40968 }, { "epoch": 2.2941538806137305, "grad_norm": 1.1114437580108643, "learning_rate": 9.448684210526317e-05, "loss": 0.4735, "step": 40969 }, { "epoch": 2.2942098779258595, "grad_norm": 1.166338324546814, "learning_rate": 9.448657894736842e-05, "loss": 0.3872, "step": 40970 }, { "epoch": 2.2942658752379885, "grad_norm": 1.473102331161499, "learning_rate": 9.448631578947369e-05, "loss": 0.4464, "step": 40971 }, { "epoch": 2.2943218725501175, "grad_norm": 1.3719292879104614, "learning_rate": 9.448605263157895e-05, "loss": 0.3943, "step": 40972 }, { "epoch": 2.2943778698622466, "grad_norm": 1.0014541149139404, "learning_rate": 9.448578947368422e-05, "loss": 0.344, "step": 40973 }, { "epoch": 2.2944338671743756, "grad_norm": 1.3095749616622925, "learning_rate": 9.448552631578947e-05, "loss": 0.4144, "step": 40974 }, { "epoch": 2.2944898644865046, "grad_norm": 1.5319305658340454, "learning_rate": 9.448526315789474e-05, "loss": 0.4001, "step": 40975 }, { "epoch": 2.2945458617986336, "grad_norm": 1.2399495840072632, "learning_rate": 9.4485e-05, "loss": 0.38, "step": 40976 }, { "epoch": 2.2946018591107626, "grad_norm": 0.9646350741386414, "learning_rate": 9.448473684210527e-05, "loss": 0.3228, "step": 40977 }, { "epoch": 2.2946578564228917, "grad_norm": 1.1930832862854004, "learning_rate": 9.448447368421053e-05, "loss": 0.5378, "step": 40978 }, { "epoch": 2.2947138537350207, "grad_norm": 3.9676475524902344, "learning_rate": 9.44842105263158e-05, "loss": 0.4193, "step": 40979 }, { "epoch": 2.2947698510471497, "grad_norm": 1.2792505025863647, "learning_rate": 9.448394736842105e-05, "loss": 0.4197, "step": 40980 }, { "epoch": 2.2948258483592787, "grad_norm": 1.1944348812103271, "learning_rate": 9.448368421052633e-05, "loss": 0.5138, "step": 40981 }, { "epoch": 2.2948818456714077, "grad_norm": 1.1937428712844849, "learning_rate": 9.448342105263159e-05, "loss": 0.4034, "step": 40982 }, { "epoch": 2.2949378429835368, "grad_norm": 1.27170729637146, "learning_rate": 9.448315789473685e-05, "loss": 0.437, "step": 40983 }, { "epoch": 2.2949938402956658, "grad_norm": 2.1695430278778076, "learning_rate": 9.44828947368421e-05, "loss": 0.4922, "step": 40984 }, { "epoch": 2.295049837607795, "grad_norm": 1.2298381328582764, "learning_rate": 9.448263157894738e-05, "loss": 0.4957, "step": 40985 }, { "epoch": 2.295105834919924, "grad_norm": 1.2365107536315918, "learning_rate": 9.448236842105264e-05, "loss": 0.4557, "step": 40986 }, { "epoch": 2.295161832232053, "grad_norm": 1.1257683038711548, "learning_rate": 9.44821052631579e-05, "loss": 0.4618, "step": 40987 }, { "epoch": 2.295217829544182, "grad_norm": 1.2703577280044556, "learning_rate": 9.448184210526316e-05, "loss": 0.368, "step": 40988 }, { "epoch": 2.295273826856311, "grad_norm": 1.1775504350662231, "learning_rate": 9.448157894736842e-05, "loss": 0.4538, "step": 40989 }, { "epoch": 2.29532982416844, "grad_norm": 1.211552381515503, "learning_rate": 9.448131578947369e-05, "loss": 0.442, "step": 40990 }, { "epoch": 2.295385821480569, "grad_norm": 0.9949976205825806, "learning_rate": 9.448105263157895e-05, "loss": 0.2922, "step": 40991 }, { "epoch": 2.295441818792698, "grad_norm": 1.3057242631912231, "learning_rate": 9.448078947368421e-05, "loss": 0.4271, "step": 40992 }, { "epoch": 2.295497816104827, "grad_norm": 1.3530786037445068, "learning_rate": 9.448052631578947e-05, "loss": 0.3554, "step": 40993 }, { "epoch": 2.295553813416956, "grad_norm": 1.249964714050293, "learning_rate": 9.448026315789474e-05, "loss": 0.4115, "step": 40994 }, { "epoch": 2.295609810729085, "grad_norm": 1.3308093547821045, "learning_rate": 9.448e-05, "loss": 0.3706, "step": 40995 }, { "epoch": 2.295665808041214, "grad_norm": 1.0598113536834717, "learning_rate": 9.447973684210528e-05, "loss": 0.3008, "step": 40996 }, { "epoch": 2.295721805353343, "grad_norm": 1.2341864109039307, "learning_rate": 9.447947368421052e-05, "loss": 0.4196, "step": 40997 }, { "epoch": 2.295777802665472, "grad_norm": 1.2854304313659668, "learning_rate": 9.44792105263158e-05, "loss": 0.4727, "step": 40998 }, { "epoch": 2.295833799977601, "grad_norm": 1.2498843669891357, "learning_rate": 9.447894736842106e-05, "loss": 0.4855, "step": 40999 }, { "epoch": 2.29588979728973, "grad_norm": 0.9384483098983765, "learning_rate": 9.447868421052633e-05, "loss": 0.3439, "step": 41000 }, { "epoch": 2.295945794601859, "grad_norm": 1.5126700401306152, "learning_rate": 9.447842105263159e-05, "loss": 0.4676, "step": 41001 }, { "epoch": 2.296001791913988, "grad_norm": 1.1356618404388428, "learning_rate": 9.447815789473685e-05, "loss": 0.3712, "step": 41002 }, { "epoch": 2.296057789226117, "grad_norm": 1.1557092666625977, "learning_rate": 9.447789473684211e-05, "loss": 0.3532, "step": 41003 }, { "epoch": 2.296113786538246, "grad_norm": 1.2167025804519653, "learning_rate": 9.447763157894737e-05, "loss": 0.3081, "step": 41004 }, { "epoch": 2.296169783850375, "grad_norm": 1.0456291437149048, "learning_rate": 9.447736842105264e-05, "loss": 0.3286, "step": 41005 }, { "epoch": 2.296225781162504, "grad_norm": 1.3038848638534546, "learning_rate": 9.44771052631579e-05, "loss": 0.3303, "step": 41006 }, { "epoch": 2.296281778474633, "grad_norm": 1.148787260055542, "learning_rate": 9.447684210526316e-05, "loss": 0.3593, "step": 41007 }, { "epoch": 2.2963377757867622, "grad_norm": 1.431427001953125, "learning_rate": 9.447657894736842e-05, "loss": 0.3813, "step": 41008 }, { "epoch": 2.2963937730988913, "grad_norm": 1.318831443786621, "learning_rate": 9.44763157894737e-05, "loss": 0.6031, "step": 41009 }, { "epoch": 2.2964497704110203, "grad_norm": 1.2120219469070435, "learning_rate": 9.447605263157895e-05, "loss": 0.3939, "step": 41010 }, { "epoch": 2.2965057677231493, "grad_norm": 1.4941589832305908, "learning_rate": 9.447578947368421e-05, "loss": 0.3676, "step": 41011 }, { "epoch": 2.2965617650352783, "grad_norm": 1.1603403091430664, "learning_rate": 9.447552631578947e-05, "loss": 0.4713, "step": 41012 }, { "epoch": 2.2966177623474073, "grad_norm": 1.2495685815811157, "learning_rate": 9.447526315789475e-05, "loss": 0.3814, "step": 41013 }, { "epoch": 2.2966737596595364, "grad_norm": 1.3770884275436401, "learning_rate": 9.4475e-05, "loss": 0.4228, "step": 41014 }, { "epoch": 2.2967297569716654, "grad_norm": 1.1375032663345337, "learning_rate": 9.447473684210527e-05, "loss": 0.4001, "step": 41015 }, { "epoch": 2.2967857542837944, "grad_norm": 1.1974637508392334, "learning_rate": 9.447447368421053e-05, "loss": 0.4937, "step": 41016 }, { "epoch": 2.2968417515959234, "grad_norm": 1.4289354085922241, "learning_rate": 9.44742105263158e-05, "loss": 0.3866, "step": 41017 }, { "epoch": 2.2968977489080524, "grad_norm": 1.3628568649291992, "learning_rate": 9.447394736842106e-05, "loss": 0.4045, "step": 41018 }, { "epoch": 2.2969537462201814, "grad_norm": 1.4367363452911377, "learning_rate": 9.447368421052633e-05, "loss": 0.6259, "step": 41019 }, { "epoch": 2.2970097435323105, "grad_norm": 1.2419058084487915, "learning_rate": 9.447342105263158e-05, "loss": 0.4216, "step": 41020 }, { "epoch": 2.2970657408444395, "grad_norm": 1.2421067953109741, "learning_rate": 9.447315789473684e-05, "loss": 0.4085, "step": 41021 }, { "epoch": 2.2971217381565685, "grad_norm": 1.1407455205917358, "learning_rate": 9.447289473684211e-05, "loss": 0.4231, "step": 41022 }, { "epoch": 2.2971777354686975, "grad_norm": 1.1749318838119507, "learning_rate": 9.447263157894737e-05, "loss": 0.3661, "step": 41023 }, { "epoch": 2.2972337327808265, "grad_norm": 1.1493127346038818, "learning_rate": 9.447236842105264e-05, "loss": 0.3681, "step": 41024 }, { "epoch": 2.2972897300929556, "grad_norm": 1.308562994003296, "learning_rate": 9.447210526315789e-05, "loss": 0.4233, "step": 41025 }, { "epoch": 2.2973457274050846, "grad_norm": 1.1703099012374878, "learning_rate": 9.447184210526316e-05, "loss": 0.4349, "step": 41026 }, { "epoch": 2.2974017247172136, "grad_norm": 1.1787515878677368, "learning_rate": 9.447157894736842e-05, "loss": 0.3317, "step": 41027 }, { "epoch": 2.2974577220293426, "grad_norm": 1.4087570905685425, "learning_rate": 9.44713157894737e-05, "loss": 0.456, "step": 41028 }, { "epoch": 2.2975137193414716, "grad_norm": 1.1689895391464233, "learning_rate": 9.447105263157894e-05, "loss": 0.3891, "step": 41029 }, { "epoch": 2.2975697166536007, "grad_norm": 1.2607016563415527, "learning_rate": 9.447078947368422e-05, "loss": 0.4144, "step": 41030 }, { "epoch": 2.2976257139657297, "grad_norm": 1.0939470529556274, "learning_rate": 9.447052631578948e-05, "loss": 0.4209, "step": 41031 }, { "epoch": 2.2976817112778587, "grad_norm": 1.0828138589859009, "learning_rate": 9.447026315789475e-05, "loss": 0.41, "step": 41032 }, { "epoch": 2.2977377085899877, "grad_norm": 1.824317216873169, "learning_rate": 9.447000000000001e-05, "loss": 0.4542, "step": 41033 }, { "epoch": 2.2977937059021167, "grad_norm": 1.4709928035736084, "learning_rate": 9.446973684210527e-05, "loss": 0.4738, "step": 41034 }, { "epoch": 2.2978497032142458, "grad_norm": 1.1033259630203247, "learning_rate": 9.446947368421053e-05, "loss": 0.4526, "step": 41035 }, { "epoch": 2.2979057005263748, "grad_norm": 1.1319345235824585, "learning_rate": 9.44692105263158e-05, "loss": 0.3799, "step": 41036 }, { "epoch": 2.297961697838504, "grad_norm": 1.0476441383361816, "learning_rate": 9.446894736842106e-05, "loss": 0.3174, "step": 41037 }, { "epoch": 2.298017695150633, "grad_norm": 1.1341053247451782, "learning_rate": 9.446868421052632e-05, "loss": 0.4399, "step": 41038 }, { "epoch": 2.298073692462762, "grad_norm": 1.166084885597229, "learning_rate": 9.446842105263158e-05, "loss": 0.3245, "step": 41039 }, { "epoch": 2.298129689774891, "grad_norm": 1.4878582954406738, "learning_rate": 9.446815789473684e-05, "loss": 0.5112, "step": 41040 }, { "epoch": 2.29818568708702, "grad_norm": 1.1163021326065063, "learning_rate": 9.446789473684211e-05, "loss": 0.3523, "step": 41041 }, { "epoch": 2.298241684399149, "grad_norm": 1.2826194763183594, "learning_rate": 9.446763157894737e-05, "loss": 0.4249, "step": 41042 }, { "epoch": 2.298297681711278, "grad_norm": 0.9728094339370728, "learning_rate": 9.446736842105263e-05, "loss": 0.307, "step": 41043 }, { "epoch": 2.298353679023407, "grad_norm": 1.2379939556121826, "learning_rate": 9.446710526315789e-05, "loss": 0.3998, "step": 41044 }, { "epoch": 2.298409676335536, "grad_norm": 1.08194100856781, "learning_rate": 9.446684210526317e-05, "loss": 0.3588, "step": 41045 }, { "epoch": 2.298465673647665, "grad_norm": 1.02166748046875, "learning_rate": 9.446657894736843e-05, "loss": 0.294, "step": 41046 }, { "epoch": 2.298521670959794, "grad_norm": 1.039847731590271, "learning_rate": 9.446631578947369e-05, "loss": 0.3131, "step": 41047 }, { "epoch": 2.298577668271923, "grad_norm": 1.292528748512268, "learning_rate": 9.446605263157895e-05, "loss": 0.4246, "step": 41048 }, { "epoch": 2.298633665584052, "grad_norm": 2.0334975719451904, "learning_rate": 9.446578947368422e-05, "loss": 0.6701, "step": 41049 }, { "epoch": 2.298689662896181, "grad_norm": 1.2254564762115479, "learning_rate": 9.446552631578948e-05, "loss": 0.3212, "step": 41050 }, { "epoch": 2.29874566020831, "grad_norm": 1.154109239578247, "learning_rate": 9.446526315789475e-05, "loss": 0.2934, "step": 41051 }, { "epoch": 2.298801657520439, "grad_norm": 1.0780616998672485, "learning_rate": 9.4465e-05, "loss": 0.4078, "step": 41052 }, { "epoch": 2.298857654832568, "grad_norm": 1.3531138896942139, "learning_rate": 9.446473684210527e-05, "loss": 0.429, "step": 41053 }, { "epoch": 2.298913652144697, "grad_norm": 1.2903035879135132, "learning_rate": 9.446447368421053e-05, "loss": 0.4173, "step": 41054 }, { "epoch": 2.298969649456826, "grad_norm": 1.4337186813354492, "learning_rate": 9.44642105263158e-05, "loss": 0.4609, "step": 41055 }, { "epoch": 2.299025646768955, "grad_norm": 1.255073070526123, "learning_rate": 9.446394736842106e-05, "loss": 0.4492, "step": 41056 }, { "epoch": 2.299081644081084, "grad_norm": 1.4382634162902832, "learning_rate": 9.446368421052631e-05, "loss": 0.421, "step": 41057 }, { "epoch": 2.299137641393213, "grad_norm": 1.0598331689834595, "learning_rate": 9.446342105263158e-05, "loss": 0.4544, "step": 41058 }, { "epoch": 2.299193638705342, "grad_norm": 1.15713632106781, "learning_rate": 9.446315789473684e-05, "loss": 0.3135, "step": 41059 }, { "epoch": 2.2992496360174712, "grad_norm": 1.1746466159820557, "learning_rate": 9.446289473684212e-05, "loss": 0.4169, "step": 41060 }, { "epoch": 2.2993056333296003, "grad_norm": 1.4122806787490845, "learning_rate": 9.446263157894738e-05, "loss": 0.3712, "step": 41061 }, { "epoch": 2.2993616306417293, "grad_norm": 1.0100699663162231, "learning_rate": 9.446236842105264e-05, "loss": 0.4306, "step": 41062 }, { "epoch": 2.2994176279538583, "grad_norm": 1.4227796792984009, "learning_rate": 9.44621052631579e-05, "loss": 0.4145, "step": 41063 }, { "epoch": 2.2994736252659873, "grad_norm": 1.1188349723815918, "learning_rate": 9.446184210526317e-05, "loss": 0.5237, "step": 41064 }, { "epoch": 2.2995296225781163, "grad_norm": 1.7654399871826172, "learning_rate": 9.446157894736843e-05, "loss": 0.4551, "step": 41065 }, { "epoch": 2.2995856198902453, "grad_norm": 1.2685762643814087, "learning_rate": 9.446131578947369e-05, "loss": 0.6306, "step": 41066 }, { "epoch": 2.2996416172023744, "grad_norm": 1.1727772951126099, "learning_rate": 9.446105263157895e-05, "loss": 0.3253, "step": 41067 }, { "epoch": 2.2996976145145034, "grad_norm": 1.4301499128341675, "learning_rate": 9.446078947368422e-05, "loss": 0.4329, "step": 41068 }, { "epoch": 2.2997536118266324, "grad_norm": 1.3428634405136108, "learning_rate": 9.446052631578948e-05, "loss": 0.4392, "step": 41069 }, { "epoch": 2.2998096091387614, "grad_norm": 1.1013458967208862, "learning_rate": 9.446026315789474e-05, "loss": 0.3832, "step": 41070 }, { "epoch": 2.2998656064508904, "grad_norm": 1.1627110242843628, "learning_rate": 9.446e-05, "loss": 0.3285, "step": 41071 }, { "epoch": 2.2999216037630195, "grad_norm": 1.4366910457611084, "learning_rate": 9.445973684210527e-05, "loss": 0.4185, "step": 41072 }, { "epoch": 2.2999776010751485, "grad_norm": 1.4668974876403809, "learning_rate": 9.445947368421053e-05, "loss": 0.4888, "step": 41073 }, { "epoch": 2.3000335983872775, "grad_norm": 1.4171216487884521, "learning_rate": 9.445921052631579e-05, "loss": 0.6389, "step": 41074 }, { "epoch": 2.3000895956994065, "grad_norm": 1.1692252159118652, "learning_rate": 9.445894736842105e-05, "loss": 0.4283, "step": 41075 }, { "epoch": 2.3001455930115355, "grad_norm": 1.0869266986846924, "learning_rate": 9.445868421052631e-05, "loss": 0.3723, "step": 41076 }, { "epoch": 2.3002015903236646, "grad_norm": 1.2284107208251953, "learning_rate": 9.445842105263159e-05, "loss": 0.4374, "step": 41077 }, { "epoch": 2.3002575876357936, "grad_norm": 1.244626760482788, "learning_rate": 9.445815789473685e-05, "loss": 0.2871, "step": 41078 }, { "epoch": 2.3003135849479226, "grad_norm": 1.3934588432312012, "learning_rate": 9.44578947368421e-05, "loss": 0.4364, "step": 41079 }, { "epoch": 2.3003695822600516, "grad_norm": 1.210655689239502, "learning_rate": 9.445763157894736e-05, "loss": 0.4166, "step": 41080 }, { "epoch": 2.3004255795721806, "grad_norm": 1.0206670761108398, "learning_rate": 9.445736842105264e-05, "loss": 0.2758, "step": 41081 }, { "epoch": 2.3004815768843097, "grad_norm": 1.387534260749817, "learning_rate": 9.44571052631579e-05, "loss": 0.3962, "step": 41082 }, { "epoch": 2.3005375741964387, "grad_norm": 1.2012280225753784, "learning_rate": 9.445684210526317e-05, "loss": 0.4016, "step": 41083 }, { "epoch": 2.3005935715085677, "grad_norm": 1.517020344734192, "learning_rate": 9.445657894736842e-05, "loss": 0.4156, "step": 41084 }, { "epoch": 2.3006495688206967, "grad_norm": 1.1856268644332886, "learning_rate": 9.445631578947369e-05, "loss": 0.3997, "step": 41085 }, { "epoch": 2.3007055661328257, "grad_norm": 1.2983989715576172, "learning_rate": 9.445605263157895e-05, "loss": 0.4981, "step": 41086 }, { "epoch": 2.3007615634449547, "grad_norm": 1.1210360527038574, "learning_rate": 9.445578947368422e-05, "loss": 0.3545, "step": 41087 }, { "epoch": 2.3008175607570838, "grad_norm": 1.2245783805847168, "learning_rate": 9.445552631578948e-05, "loss": 0.4266, "step": 41088 }, { "epoch": 2.300873558069213, "grad_norm": 1.170212745666504, "learning_rate": 9.445526315789474e-05, "loss": 0.4732, "step": 41089 }, { "epoch": 2.300929555381342, "grad_norm": 1.2895238399505615, "learning_rate": 9.4455e-05, "loss": 0.4146, "step": 41090 }, { "epoch": 2.300985552693471, "grad_norm": 1.3115994930267334, "learning_rate": 9.445473684210526e-05, "loss": 0.3908, "step": 41091 }, { "epoch": 2.3010415500056, "grad_norm": 1.357094645500183, "learning_rate": 9.445447368421054e-05, "loss": 0.4171, "step": 41092 }, { "epoch": 2.301097547317729, "grad_norm": 1.2858004570007324, "learning_rate": 9.44542105263158e-05, "loss": 0.4663, "step": 41093 }, { "epoch": 2.301153544629858, "grad_norm": 1.167243242263794, "learning_rate": 9.445394736842106e-05, "loss": 0.478, "step": 41094 }, { "epoch": 2.301209541941987, "grad_norm": 1.3510496616363525, "learning_rate": 9.445368421052632e-05, "loss": 0.334, "step": 41095 }, { "epoch": 2.301265539254116, "grad_norm": 1.2992651462554932, "learning_rate": 9.445342105263159e-05, "loss": 0.4249, "step": 41096 }, { "epoch": 2.301321536566245, "grad_norm": 1.0918200016021729, "learning_rate": 9.445315789473685e-05, "loss": 0.3791, "step": 41097 }, { "epoch": 2.301377533878374, "grad_norm": 1.1572182178497314, "learning_rate": 9.445289473684211e-05, "loss": 0.4501, "step": 41098 }, { "epoch": 2.301433531190503, "grad_norm": 1.1787786483764648, "learning_rate": 9.445263157894737e-05, "loss": 0.5839, "step": 41099 }, { "epoch": 2.301489528502632, "grad_norm": 1.1431785821914673, "learning_rate": 9.445236842105264e-05, "loss": 0.4091, "step": 41100 }, { "epoch": 2.301545525814761, "grad_norm": 0.9629998803138733, "learning_rate": 9.44521052631579e-05, "loss": 0.261, "step": 41101 }, { "epoch": 2.30160152312689, "grad_norm": 1.0944408178329468, "learning_rate": 9.445184210526316e-05, "loss": 0.4321, "step": 41102 }, { "epoch": 2.301657520439019, "grad_norm": 1.130164623260498, "learning_rate": 9.445157894736842e-05, "loss": 0.4404, "step": 41103 }, { "epoch": 2.301713517751148, "grad_norm": 1.2889387607574463, "learning_rate": 9.445131578947369e-05, "loss": 0.4271, "step": 41104 }, { "epoch": 2.301769515063277, "grad_norm": 1.2469089031219482, "learning_rate": 9.445105263157895e-05, "loss": 0.4932, "step": 41105 }, { "epoch": 2.301825512375406, "grad_norm": 1.561651349067688, "learning_rate": 9.445078947368423e-05, "loss": 0.4857, "step": 41106 }, { "epoch": 2.301881509687535, "grad_norm": 1.207578182220459, "learning_rate": 9.445052631578947e-05, "loss": 0.3862, "step": 41107 }, { "epoch": 2.301937506999664, "grad_norm": 1.0754132270812988, "learning_rate": 9.445026315789473e-05, "loss": 0.4123, "step": 41108 }, { "epoch": 2.301993504311793, "grad_norm": 1.0970557928085327, "learning_rate": 9.445e-05, "loss": 0.3689, "step": 41109 }, { "epoch": 2.302049501623922, "grad_norm": 1.1462322473526, "learning_rate": 9.444973684210527e-05, "loss": 0.389, "step": 41110 }, { "epoch": 2.302105498936051, "grad_norm": 1.1647051572799683, "learning_rate": 9.444947368421054e-05, "loss": 0.4134, "step": 41111 }, { "epoch": 2.3021614962481802, "grad_norm": 1.1929714679718018, "learning_rate": 9.444921052631578e-05, "loss": 0.3728, "step": 41112 }, { "epoch": 2.3022174935603092, "grad_norm": 1.1695880889892578, "learning_rate": 9.444894736842106e-05, "loss": 0.3382, "step": 41113 }, { "epoch": 2.3022734908724383, "grad_norm": 1.2483124732971191, "learning_rate": 9.444868421052632e-05, "loss": 0.3711, "step": 41114 }, { "epoch": 2.3023294881845673, "grad_norm": 1.0774250030517578, "learning_rate": 9.444842105263159e-05, "loss": 0.3927, "step": 41115 }, { "epoch": 2.3023854854966963, "grad_norm": 1.1876918077468872, "learning_rate": 9.444815789473685e-05, "loss": 0.4074, "step": 41116 }, { "epoch": 2.3024414828088253, "grad_norm": 1.1963183879852295, "learning_rate": 9.444789473684211e-05, "loss": 0.2942, "step": 41117 }, { "epoch": 2.3024974801209543, "grad_norm": 1.2483224868774414, "learning_rate": 9.444763157894737e-05, "loss": 0.3444, "step": 41118 }, { "epoch": 2.3025534774330834, "grad_norm": 1.2213937044143677, "learning_rate": 9.444736842105264e-05, "loss": 0.5581, "step": 41119 }, { "epoch": 2.3026094747452124, "grad_norm": 1.1406149864196777, "learning_rate": 9.44471052631579e-05, "loss": 0.4119, "step": 41120 }, { "epoch": 2.3026654720573414, "grad_norm": 1.4735124111175537, "learning_rate": 9.444684210526316e-05, "loss": 0.4359, "step": 41121 }, { "epoch": 2.3027214693694704, "grad_norm": 1.2479605674743652, "learning_rate": 9.444657894736842e-05, "loss": 0.4266, "step": 41122 }, { "epoch": 2.3027774666815994, "grad_norm": 1.0820972919464111, "learning_rate": 9.44463157894737e-05, "loss": 0.3723, "step": 41123 }, { "epoch": 2.3028334639937285, "grad_norm": 1.0464650392532349, "learning_rate": 9.444605263157896e-05, "loss": 0.5619, "step": 41124 }, { "epoch": 2.3028894613058575, "grad_norm": 3.076843023300171, "learning_rate": 9.444578947368422e-05, "loss": 0.5786, "step": 41125 }, { "epoch": 2.3029454586179865, "grad_norm": 1.3389883041381836, "learning_rate": 9.444552631578947e-05, "loss": 0.4176, "step": 41126 }, { "epoch": 2.3030014559301155, "grad_norm": 1.0942277908325195, "learning_rate": 9.444526315789473e-05, "loss": 0.4362, "step": 41127 }, { "epoch": 2.3030574532422445, "grad_norm": 1.4207924604415894, "learning_rate": 9.444500000000001e-05, "loss": 0.5054, "step": 41128 }, { "epoch": 2.3031134505543736, "grad_norm": 1.9212104082107544, "learning_rate": 9.444473684210527e-05, "loss": 0.4975, "step": 41129 }, { "epoch": 2.3031694478665026, "grad_norm": 1.1983555555343628, "learning_rate": 9.444447368421053e-05, "loss": 0.3388, "step": 41130 }, { "epoch": 2.3032254451786316, "grad_norm": 1.1969877481460571, "learning_rate": 9.444421052631579e-05, "loss": 0.4088, "step": 41131 }, { "epoch": 2.3032814424907606, "grad_norm": 1.1954209804534912, "learning_rate": 9.444394736842106e-05, "loss": 0.5604, "step": 41132 }, { "epoch": 2.3033374398028896, "grad_norm": 1.2000771760940552, "learning_rate": 9.444368421052632e-05, "loss": 0.4022, "step": 41133 }, { "epoch": 2.3033934371150186, "grad_norm": 1.2491918802261353, "learning_rate": 9.444342105263158e-05, "loss": 0.3094, "step": 41134 }, { "epoch": 2.3034494344271477, "grad_norm": 1.0597612857818604, "learning_rate": 9.444315789473684e-05, "loss": 0.3546, "step": 41135 }, { "epoch": 2.3035054317392767, "grad_norm": 1.6120266914367676, "learning_rate": 9.444289473684211e-05, "loss": 0.5958, "step": 41136 }, { "epoch": 2.3035614290514057, "grad_norm": 1.248927116394043, "learning_rate": 9.444263157894737e-05, "loss": 0.3942, "step": 41137 }, { "epoch": 2.3036174263635347, "grad_norm": 1.0413066148757935, "learning_rate": 9.444236842105265e-05, "loss": 0.3408, "step": 41138 }, { "epoch": 2.3036734236756637, "grad_norm": 1.2440885305404663, "learning_rate": 9.444210526315789e-05, "loss": 0.4456, "step": 41139 }, { "epoch": 2.3037294209877928, "grad_norm": 1.1932263374328613, "learning_rate": 9.444184210526317e-05, "loss": 0.3805, "step": 41140 }, { "epoch": 2.303785418299922, "grad_norm": 1.8755558729171753, "learning_rate": 9.444157894736843e-05, "loss": 0.657, "step": 41141 }, { "epoch": 2.303841415612051, "grad_norm": 1.25771963596344, "learning_rate": 9.444131578947368e-05, "loss": 0.3562, "step": 41142 }, { "epoch": 2.30389741292418, "grad_norm": 1.5962650775909424, "learning_rate": 9.444105263157896e-05, "loss": 0.4554, "step": 41143 }, { "epoch": 2.303953410236309, "grad_norm": 1.016640067100525, "learning_rate": 9.44407894736842e-05, "loss": 0.2997, "step": 41144 }, { "epoch": 2.304009407548438, "grad_norm": 1.0921821594238281, "learning_rate": 9.444052631578948e-05, "loss": 0.3552, "step": 41145 }, { "epoch": 2.304065404860567, "grad_norm": 1.1198456287384033, "learning_rate": 9.444026315789474e-05, "loss": 0.4067, "step": 41146 }, { "epoch": 2.304121402172696, "grad_norm": 1.135983943939209, "learning_rate": 9.444000000000001e-05, "loss": 0.3517, "step": 41147 }, { "epoch": 2.304177399484825, "grad_norm": 1.1520676612854004, "learning_rate": 9.443973684210527e-05, "loss": 0.3625, "step": 41148 }, { "epoch": 2.304233396796954, "grad_norm": 1.096591591835022, "learning_rate": 9.443947368421053e-05, "loss": 0.3884, "step": 41149 }, { "epoch": 2.304289394109083, "grad_norm": 1.1004701852798462, "learning_rate": 9.443921052631579e-05, "loss": 0.587, "step": 41150 }, { "epoch": 2.304345391421212, "grad_norm": 1.0026360750198364, "learning_rate": 9.443894736842106e-05, "loss": 0.2592, "step": 41151 }, { "epoch": 2.304401388733341, "grad_norm": 1.2916796207427979, "learning_rate": 9.443868421052632e-05, "loss": 0.3808, "step": 41152 }, { "epoch": 2.3044573860454696, "grad_norm": 1.241463541984558, "learning_rate": 9.443842105263158e-05, "loss": 0.4439, "step": 41153 }, { "epoch": 2.304513383357599, "grad_norm": 1.0523325204849243, "learning_rate": 9.443815789473684e-05, "loss": 0.2913, "step": 41154 }, { "epoch": 2.3045693806697276, "grad_norm": 1.4757956266403198, "learning_rate": 9.443789473684212e-05, "loss": 0.602, "step": 41155 }, { "epoch": 2.304625377981857, "grad_norm": 1.5350556373596191, "learning_rate": 9.443763157894738e-05, "loss": 0.5242, "step": 41156 }, { "epoch": 2.3046813752939856, "grad_norm": 1.3382859230041504, "learning_rate": 9.443736842105263e-05, "loss": 0.3939, "step": 41157 }, { "epoch": 2.304737372606115, "grad_norm": 1.2251603603363037, "learning_rate": 9.44371052631579e-05, "loss": 0.3432, "step": 41158 }, { "epoch": 2.3047933699182437, "grad_norm": 1.0954320430755615, "learning_rate": 9.443684210526315e-05, "loss": 0.393, "step": 41159 }, { "epoch": 2.304849367230373, "grad_norm": 1.1894170045852661, "learning_rate": 9.443657894736843e-05, "loss": 0.4276, "step": 41160 }, { "epoch": 2.3049053645425017, "grad_norm": 1.2310396432876587, "learning_rate": 9.443631578947369e-05, "loss": 0.3287, "step": 41161 }, { "epoch": 2.304961361854631, "grad_norm": 1.019013524055481, "learning_rate": 9.443605263157895e-05, "loss": 0.3896, "step": 41162 }, { "epoch": 2.3050173591667598, "grad_norm": 1.1283564567565918, "learning_rate": 9.443578947368421e-05, "loss": 0.3744, "step": 41163 }, { "epoch": 2.3050733564788892, "grad_norm": 1.1618505716323853, "learning_rate": 9.443552631578948e-05, "loss": 0.4102, "step": 41164 }, { "epoch": 2.305129353791018, "grad_norm": 1.096095323562622, "learning_rate": 9.443526315789474e-05, "loss": 0.4126, "step": 41165 }, { "epoch": 2.3051853511031473, "grad_norm": 1.2699341773986816, "learning_rate": 9.443500000000001e-05, "loss": 0.4288, "step": 41166 }, { "epoch": 2.305241348415276, "grad_norm": 1.2683535814285278, "learning_rate": 9.443473684210526e-05, "loss": 0.4654, "step": 41167 }, { "epoch": 2.3052973457274053, "grad_norm": 1.0817099809646606, "learning_rate": 9.443447368421053e-05, "loss": 0.3669, "step": 41168 }, { "epoch": 2.305353343039534, "grad_norm": 1.0629780292510986, "learning_rate": 9.443421052631579e-05, "loss": 0.3186, "step": 41169 }, { "epoch": 2.3054093403516633, "grad_norm": 1.2280093431472778, "learning_rate": 9.443394736842107e-05, "loss": 0.3481, "step": 41170 }, { "epoch": 2.305465337663792, "grad_norm": 1.1792889833450317, "learning_rate": 9.443368421052633e-05, "loss": 0.3871, "step": 41171 }, { "epoch": 2.3055213349759214, "grad_norm": 1.1607532501220703, "learning_rate": 9.443342105263159e-05, "loss": 0.3518, "step": 41172 }, { "epoch": 2.30557733228805, "grad_norm": 1.382346510887146, "learning_rate": 9.443315789473684e-05, "loss": 0.3422, "step": 41173 }, { "epoch": 2.3056333296001794, "grad_norm": 2.358943462371826, "learning_rate": 9.443289473684212e-05, "loss": 0.4702, "step": 41174 }, { "epoch": 2.305689326912308, "grad_norm": 1.1040019989013672, "learning_rate": 9.443263157894738e-05, "loss": 0.396, "step": 41175 }, { "epoch": 2.3057453242244375, "grad_norm": 1.385833740234375, "learning_rate": 9.443236842105264e-05, "loss": 0.4665, "step": 41176 }, { "epoch": 2.305801321536566, "grad_norm": 1.2313884496688843, "learning_rate": 9.44321052631579e-05, "loss": 0.4431, "step": 41177 }, { "epoch": 2.3058573188486955, "grad_norm": 1.314171552658081, "learning_rate": 9.443184210526316e-05, "loss": 0.5751, "step": 41178 }, { "epoch": 2.305913316160824, "grad_norm": 1.1755602359771729, "learning_rate": 9.443157894736843e-05, "loss": 0.3631, "step": 41179 }, { "epoch": 2.3059693134729535, "grad_norm": 1.1333853006362915, "learning_rate": 9.443131578947369e-05, "loss": 0.457, "step": 41180 }, { "epoch": 2.306025310785082, "grad_norm": 1.4316025972366333, "learning_rate": 9.443105263157895e-05, "loss": 0.4215, "step": 41181 }, { "epoch": 2.3060813080972116, "grad_norm": 1.150217890739441, "learning_rate": 9.443078947368421e-05, "loss": 0.33, "step": 41182 }, { "epoch": 2.30613730540934, "grad_norm": 1.2990572452545166, "learning_rate": 9.443052631578948e-05, "loss": 0.4061, "step": 41183 }, { "epoch": 2.3061933027214696, "grad_norm": 1.056262731552124, "learning_rate": 9.443026315789474e-05, "loss": 0.4119, "step": 41184 }, { "epoch": 2.306249300033598, "grad_norm": 1.326519250869751, "learning_rate": 9.443e-05, "loss": 0.4717, "step": 41185 }, { "epoch": 2.3063052973457276, "grad_norm": 1.003314733505249, "learning_rate": 9.442973684210526e-05, "loss": 0.3695, "step": 41186 }, { "epoch": 2.306361294657856, "grad_norm": 1.3201234340667725, "learning_rate": 9.442947368421054e-05, "loss": 0.6548, "step": 41187 }, { "epoch": 2.3064172919699857, "grad_norm": 1.1729272603988647, "learning_rate": 9.44292105263158e-05, "loss": 0.3128, "step": 41188 }, { "epoch": 2.3064732892821143, "grad_norm": 1.1012417078018188, "learning_rate": 9.442894736842105e-05, "loss": 0.2822, "step": 41189 }, { "epoch": 2.3065292865942437, "grad_norm": 1.3226896524429321, "learning_rate": 9.442868421052631e-05, "loss": 0.521, "step": 41190 }, { "epoch": 2.3065852839063723, "grad_norm": 1.0767464637756348, "learning_rate": 9.442842105263159e-05, "loss": 0.3531, "step": 41191 }, { "epoch": 2.3066412812185018, "grad_norm": 1.212410807609558, "learning_rate": 9.442815789473685e-05, "loss": 0.3683, "step": 41192 }, { "epoch": 2.3066972785306303, "grad_norm": 1.1131529808044434, "learning_rate": 9.442789473684212e-05, "loss": 0.2997, "step": 41193 }, { "epoch": 2.3067532758427594, "grad_norm": 1.0302926301956177, "learning_rate": 9.442763157894737e-05, "loss": 0.3515, "step": 41194 }, { "epoch": 2.3068092731548884, "grad_norm": 1.3277411460876465, "learning_rate": 9.442736842105263e-05, "loss": 0.3476, "step": 41195 }, { "epoch": 2.3068652704670174, "grad_norm": 1.1428031921386719, "learning_rate": 9.44271052631579e-05, "loss": 0.3007, "step": 41196 }, { "epoch": 2.3069212677791464, "grad_norm": 1.12355637550354, "learning_rate": 9.442684210526316e-05, "loss": 0.476, "step": 41197 }, { "epoch": 2.3069772650912754, "grad_norm": 1.4822880029678345, "learning_rate": 9.442657894736843e-05, "loss": 0.497, "step": 41198 }, { "epoch": 2.3070332624034044, "grad_norm": 1.2447497844696045, "learning_rate": 9.442631578947368e-05, "loss": 0.4572, "step": 41199 }, { "epoch": 2.3070892597155335, "grad_norm": 3.252469778060913, "learning_rate": 9.442605263157895e-05, "loss": 0.4996, "step": 41200 }, { "epoch": 2.3071452570276625, "grad_norm": 1.3244056701660156, "learning_rate": 9.442578947368421e-05, "loss": 0.4706, "step": 41201 }, { "epoch": 2.3072012543397915, "grad_norm": 5.475678443908691, "learning_rate": 9.442552631578949e-05, "loss": 0.3791, "step": 41202 }, { "epoch": 2.3072572516519205, "grad_norm": 1.1146981716156006, "learning_rate": 9.442526315789475e-05, "loss": 0.6018, "step": 41203 }, { "epoch": 2.3073132489640495, "grad_norm": 1.054796576499939, "learning_rate": 9.4425e-05, "loss": 0.3865, "step": 41204 }, { "epoch": 2.3073692462761786, "grad_norm": 1.4048603773117065, "learning_rate": 9.442473684210526e-05, "loss": 0.4875, "step": 41205 }, { "epoch": 2.3074252435883076, "grad_norm": 1.4637945890426636, "learning_rate": 9.442447368421054e-05, "loss": 0.4676, "step": 41206 }, { "epoch": 2.3074812409004366, "grad_norm": 1.1658724546432495, "learning_rate": 9.44242105263158e-05, "loss": 0.4202, "step": 41207 }, { "epoch": 2.3075372382125656, "grad_norm": 1.299358606338501, "learning_rate": 9.442394736842106e-05, "loss": 0.6069, "step": 41208 }, { "epoch": 2.3075932355246946, "grad_norm": 1.7072272300720215, "learning_rate": 9.442368421052632e-05, "loss": 0.634, "step": 41209 }, { "epoch": 2.3076492328368237, "grad_norm": 1.3296971321105957, "learning_rate": 9.442342105263159e-05, "loss": 0.5488, "step": 41210 }, { "epoch": 2.3077052301489527, "grad_norm": 1.1389737129211426, "learning_rate": 9.442315789473685e-05, "loss": 0.3998, "step": 41211 }, { "epoch": 2.3077612274610817, "grad_norm": 1.3433154821395874, "learning_rate": 9.442289473684211e-05, "loss": 0.4376, "step": 41212 }, { "epoch": 2.3078172247732107, "grad_norm": 1.2962489128112793, "learning_rate": 9.442263157894737e-05, "loss": 0.3559, "step": 41213 }, { "epoch": 2.3078732220853397, "grad_norm": 1.194562315940857, "learning_rate": 9.442236842105263e-05, "loss": 0.3917, "step": 41214 }, { "epoch": 2.3079292193974688, "grad_norm": 1.3812541961669922, "learning_rate": 9.44221052631579e-05, "loss": 0.3673, "step": 41215 }, { "epoch": 2.3079852167095978, "grad_norm": 1.4755765199661255, "learning_rate": 9.442184210526316e-05, "loss": 0.5047, "step": 41216 }, { "epoch": 2.308041214021727, "grad_norm": 1.1226434707641602, "learning_rate": 9.442157894736842e-05, "loss": 0.3681, "step": 41217 }, { "epoch": 2.308097211333856, "grad_norm": 1.3759899139404297, "learning_rate": 9.442131578947368e-05, "loss": 0.4198, "step": 41218 }, { "epoch": 2.308153208645985, "grad_norm": 1.372986912727356, "learning_rate": 9.442105263157895e-05, "loss": 0.4164, "step": 41219 }, { "epoch": 2.308209205958114, "grad_norm": 1.2326743602752686, "learning_rate": 9.442078947368421e-05, "loss": 0.3656, "step": 41220 }, { "epoch": 2.308265203270243, "grad_norm": 1.4366059303283691, "learning_rate": 9.442052631578949e-05, "loss": 0.5534, "step": 41221 }, { "epoch": 2.308321200582372, "grad_norm": 1.2622298002243042, "learning_rate": 9.442026315789473e-05, "loss": 0.3881, "step": 41222 }, { "epoch": 2.308377197894501, "grad_norm": 1.1661293506622314, "learning_rate": 9.442000000000001e-05, "loss": 0.3973, "step": 41223 }, { "epoch": 2.30843319520663, "grad_norm": 1.1904220581054688, "learning_rate": 9.441973684210527e-05, "loss": 0.4137, "step": 41224 }, { "epoch": 2.308489192518759, "grad_norm": 1.3862000703811646, "learning_rate": 9.441947368421054e-05, "loss": 0.3895, "step": 41225 }, { "epoch": 2.308545189830888, "grad_norm": 1.1853761672973633, "learning_rate": 9.441921052631579e-05, "loss": 0.4411, "step": 41226 }, { "epoch": 2.308601187143017, "grad_norm": 1.4763569831848145, "learning_rate": 9.441894736842106e-05, "loss": 0.5152, "step": 41227 }, { "epoch": 2.308657184455146, "grad_norm": 1.10613214969635, "learning_rate": 9.441868421052632e-05, "loss": 0.423, "step": 41228 }, { "epoch": 2.308713181767275, "grad_norm": 1.2948658466339111, "learning_rate": 9.441842105263158e-05, "loss": 0.367, "step": 41229 }, { "epoch": 2.308769179079404, "grad_norm": 1.1493228673934937, "learning_rate": 9.441815789473685e-05, "loss": 0.3077, "step": 41230 }, { "epoch": 2.308825176391533, "grad_norm": 1.3492008447647095, "learning_rate": 9.44178947368421e-05, "loss": 0.4266, "step": 41231 }, { "epoch": 2.308881173703662, "grad_norm": 1.0226320028305054, "learning_rate": 9.441763157894737e-05, "loss": 0.3619, "step": 41232 }, { "epoch": 2.308937171015791, "grad_norm": 1.1736783981323242, "learning_rate": 9.441736842105263e-05, "loss": 0.4611, "step": 41233 }, { "epoch": 2.30899316832792, "grad_norm": 1.4075218439102173, "learning_rate": 9.44171052631579e-05, "loss": 0.601, "step": 41234 }, { "epoch": 2.309049165640049, "grad_norm": 1.0453821420669556, "learning_rate": 9.441684210526316e-05, "loss": 0.2935, "step": 41235 }, { "epoch": 2.309105162952178, "grad_norm": 1.3486816883087158, "learning_rate": 9.441657894736842e-05, "loss": 0.5075, "step": 41236 }, { "epoch": 2.309161160264307, "grad_norm": 1.1429842710494995, "learning_rate": 9.441631578947368e-05, "loss": 0.4051, "step": 41237 }, { "epoch": 2.309217157576436, "grad_norm": 1.2654139995574951, "learning_rate": 9.441605263157896e-05, "loss": 0.436, "step": 41238 }, { "epoch": 2.309273154888565, "grad_norm": 1.1187604665756226, "learning_rate": 9.441578947368422e-05, "loss": 0.4698, "step": 41239 }, { "epoch": 2.3093291522006942, "grad_norm": 1.3010759353637695, "learning_rate": 9.441552631578948e-05, "loss": 0.4163, "step": 41240 }, { "epoch": 2.3093851495128233, "grad_norm": 1.2733290195465088, "learning_rate": 9.441526315789474e-05, "loss": 0.3791, "step": 41241 }, { "epoch": 2.3094411468249523, "grad_norm": 1.2594940662384033, "learning_rate": 9.441500000000001e-05, "loss": 0.4574, "step": 41242 }, { "epoch": 2.3094971441370813, "grad_norm": 1.2382731437683105, "learning_rate": 9.441473684210527e-05, "loss": 0.4378, "step": 41243 }, { "epoch": 2.3095531414492103, "grad_norm": 1.1887248754501343, "learning_rate": 9.441447368421053e-05, "loss": 0.3804, "step": 41244 }, { "epoch": 2.3096091387613393, "grad_norm": 1.1392658948898315, "learning_rate": 9.441421052631579e-05, "loss": 0.3676, "step": 41245 }, { "epoch": 2.3096651360734684, "grad_norm": 1.0222123861312866, "learning_rate": 9.441394736842105e-05, "loss": 0.4313, "step": 41246 }, { "epoch": 2.3097211333855974, "grad_norm": 1.0896024703979492, "learning_rate": 9.441368421052632e-05, "loss": 0.3613, "step": 41247 }, { "epoch": 2.3097771306977264, "grad_norm": 1.2499682903289795, "learning_rate": 9.441342105263158e-05, "loss": 0.3702, "step": 41248 }, { "epoch": 2.3098331280098554, "grad_norm": 1.5982550382614136, "learning_rate": 9.441315789473684e-05, "loss": 0.3645, "step": 41249 }, { "epoch": 2.3098891253219844, "grad_norm": 1.1547956466674805, "learning_rate": 9.44128947368421e-05, "loss": 0.4226, "step": 41250 }, { "epoch": 2.3099451226341134, "grad_norm": 1.1691392660140991, "learning_rate": 9.441263157894737e-05, "loss": 0.3692, "step": 41251 }, { "epoch": 2.3100011199462425, "grad_norm": 1.3276208639144897, "learning_rate": 9.441236842105263e-05, "loss": 0.427, "step": 41252 }, { "epoch": 2.3100571172583715, "grad_norm": 1.1660151481628418, "learning_rate": 9.441210526315791e-05, "loss": 0.4639, "step": 41253 }, { "epoch": 2.3101131145705005, "grad_norm": 1.5728514194488525, "learning_rate": 9.441184210526315e-05, "loss": 0.4579, "step": 41254 }, { "epoch": 2.3101691118826295, "grad_norm": 1.5247868299484253, "learning_rate": 9.441157894736843e-05, "loss": 0.5588, "step": 41255 }, { "epoch": 2.3102251091947585, "grad_norm": 0.9623777270317078, "learning_rate": 9.441131578947369e-05, "loss": 0.2966, "step": 41256 }, { "epoch": 2.3102811065068876, "grad_norm": 15.850016593933105, "learning_rate": 9.441105263157896e-05, "loss": 0.398, "step": 41257 }, { "epoch": 2.3103371038190166, "grad_norm": 1.3107852935791016, "learning_rate": 9.441078947368422e-05, "loss": 0.5054, "step": 41258 }, { "epoch": 2.3103931011311456, "grad_norm": 1.3028587102890015, "learning_rate": 9.441052631578948e-05, "loss": 0.4868, "step": 41259 }, { "epoch": 2.3104490984432746, "grad_norm": 1.3456355333328247, "learning_rate": 9.441026315789474e-05, "loss": 0.5253, "step": 41260 }, { "epoch": 2.3105050957554036, "grad_norm": 1.1589744091033936, "learning_rate": 9.441000000000001e-05, "loss": 0.3649, "step": 41261 }, { "epoch": 2.3105610930675327, "grad_norm": 1.3102086782455444, "learning_rate": 9.440973684210527e-05, "loss": 0.4942, "step": 41262 }, { "epoch": 2.3106170903796617, "grad_norm": 0.9877236485481262, "learning_rate": 9.440947368421053e-05, "loss": 0.2984, "step": 41263 }, { "epoch": 2.3106730876917907, "grad_norm": 1.7010084390640259, "learning_rate": 9.440921052631579e-05, "loss": 0.5229, "step": 41264 }, { "epoch": 2.3107290850039197, "grad_norm": 1.4839822053909302, "learning_rate": 9.440894736842105e-05, "loss": 0.3877, "step": 41265 }, { "epoch": 2.3107850823160487, "grad_norm": 1.3590247631072998, "learning_rate": 9.440868421052632e-05, "loss": 0.4533, "step": 41266 }, { "epoch": 2.3108410796281778, "grad_norm": 1.2635881900787354, "learning_rate": 9.440842105263158e-05, "loss": 0.4093, "step": 41267 }, { "epoch": 2.3108970769403068, "grad_norm": 1.3417646884918213, "learning_rate": 9.440815789473684e-05, "loss": 0.3533, "step": 41268 }, { "epoch": 2.310953074252436, "grad_norm": 1.9569975137710571, "learning_rate": 9.44078947368421e-05, "loss": 0.4248, "step": 41269 }, { "epoch": 2.311009071564565, "grad_norm": 1.1856062412261963, "learning_rate": 9.440763157894738e-05, "loss": 0.3642, "step": 41270 }, { "epoch": 2.311065068876694, "grad_norm": 1.1207315921783447, "learning_rate": 9.440736842105264e-05, "loss": 0.3766, "step": 41271 }, { "epoch": 2.311121066188823, "grad_norm": 1.1116873025894165, "learning_rate": 9.44071052631579e-05, "loss": 0.3486, "step": 41272 }, { "epoch": 2.311177063500952, "grad_norm": 1.0138052701950073, "learning_rate": 9.440684210526316e-05, "loss": 0.3452, "step": 41273 }, { "epoch": 2.311233060813081, "grad_norm": 3.0268807411193848, "learning_rate": 9.440657894736843e-05, "loss": 0.3628, "step": 41274 }, { "epoch": 2.31128905812521, "grad_norm": 1.2343382835388184, "learning_rate": 9.440631578947369e-05, "loss": 0.5134, "step": 41275 }, { "epoch": 2.311345055437339, "grad_norm": 1.1045588254928589, "learning_rate": 9.440605263157896e-05, "loss": 0.4906, "step": 41276 }, { "epoch": 2.311401052749468, "grad_norm": 1.7074835300445557, "learning_rate": 9.440578947368421e-05, "loss": 0.5393, "step": 41277 }, { "epoch": 2.311457050061597, "grad_norm": 1.5158981084823608, "learning_rate": 9.440552631578948e-05, "loss": 0.3874, "step": 41278 }, { "epoch": 2.311513047373726, "grad_norm": 1.138958215713501, "learning_rate": 9.440526315789474e-05, "loss": 0.3779, "step": 41279 }, { "epoch": 2.311569044685855, "grad_norm": 1.3646339178085327, "learning_rate": 9.4405e-05, "loss": 0.4031, "step": 41280 }, { "epoch": 2.311625041997984, "grad_norm": 1.1444863080978394, "learning_rate": 9.440473684210526e-05, "loss": 0.3771, "step": 41281 }, { "epoch": 2.311681039310113, "grad_norm": 1.1277296543121338, "learning_rate": 9.440447368421052e-05, "loss": 0.4182, "step": 41282 }, { "epoch": 2.311737036622242, "grad_norm": 1.7238092422485352, "learning_rate": 9.44042105263158e-05, "loss": 0.451, "step": 41283 }, { "epoch": 2.311793033934371, "grad_norm": 1.0843058824539185, "learning_rate": 9.440394736842105e-05, "loss": 0.4729, "step": 41284 }, { "epoch": 2.3118490312465, "grad_norm": 1.1807172298431396, "learning_rate": 9.440368421052633e-05, "loss": 0.5245, "step": 41285 }, { "epoch": 2.311905028558629, "grad_norm": 1.1698341369628906, "learning_rate": 9.440342105263157e-05, "loss": 0.4166, "step": 41286 }, { "epoch": 2.311961025870758, "grad_norm": 1.026491403579712, "learning_rate": 9.440315789473685e-05, "loss": 0.3069, "step": 41287 }, { "epoch": 2.312017023182887, "grad_norm": 1.4497567415237427, "learning_rate": 9.44028947368421e-05, "loss": 0.3443, "step": 41288 }, { "epoch": 2.312073020495016, "grad_norm": 1.2242244482040405, "learning_rate": 9.440263157894738e-05, "loss": 0.3821, "step": 41289 }, { "epoch": 2.312129017807145, "grad_norm": 1.2092628479003906, "learning_rate": 9.440236842105264e-05, "loss": 0.4558, "step": 41290 }, { "epoch": 2.312185015119274, "grad_norm": 1.1276986598968506, "learning_rate": 9.44021052631579e-05, "loss": 0.3331, "step": 41291 }, { "epoch": 2.3122410124314032, "grad_norm": 1.174345850944519, "learning_rate": 9.440184210526316e-05, "loss": 0.3326, "step": 41292 }, { "epoch": 2.3122970097435323, "grad_norm": 1.0791987180709839, "learning_rate": 9.440157894736843e-05, "loss": 0.3382, "step": 41293 }, { "epoch": 2.3123530070556613, "grad_norm": 1.0942697525024414, "learning_rate": 9.440131578947369e-05, "loss": 0.3754, "step": 41294 }, { "epoch": 2.3124090043677903, "grad_norm": 1.3148592710494995, "learning_rate": 9.440105263157895e-05, "loss": 0.5432, "step": 41295 }, { "epoch": 2.3124650016799193, "grad_norm": 1.2570302486419678, "learning_rate": 9.440078947368421e-05, "loss": 0.4145, "step": 41296 }, { "epoch": 2.3125209989920483, "grad_norm": 1.1952694654464722, "learning_rate": 9.440052631578948e-05, "loss": 0.2893, "step": 41297 }, { "epoch": 2.3125769963041773, "grad_norm": 1.060887098312378, "learning_rate": 9.440026315789474e-05, "loss": 0.3543, "step": 41298 }, { "epoch": 2.3126329936163064, "grad_norm": 1.3482955694198608, "learning_rate": 9.44e-05, "loss": 0.3677, "step": 41299 }, { "epoch": 2.3126889909284354, "grad_norm": 1.1082282066345215, "learning_rate": 9.439973684210526e-05, "loss": 0.3493, "step": 41300 }, { "epoch": 2.3127449882405644, "grad_norm": 0.9989748597145081, "learning_rate": 9.439947368421052e-05, "loss": 0.3754, "step": 41301 }, { "epoch": 2.3128009855526934, "grad_norm": 1.4040677547454834, "learning_rate": 9.43992105263158e-05, "loss": 0.5438, "step": 41302 }, { "epoch": 2.3128569828648224, "grad_norm": 1.5152626037597656, "learning_rate": 9.439894736842106e-05, "loss": 0.5051, "step": 41303 }, { "epoch": 2.3129129801769515, "grad_norm": 1.204582691192627, "learning_rate": 9.439868421052632e-05, "loss": 0.365, "step": 41304 }, { "epoch": 2.3129689774890805, "grad_norm": 1.354763150215149, "learning_rate": 9.439842105263158e-05, "loss": 0.3932, "step": 41305 }, { "epoch": 2.3130249748012095, "grad_norm": 1.3283936977386475, "learning_rate": 9.439815789473685e-05, "loss": 0.3773, "step": 41306 }, { "epoch": 2.3130809721133385, "grad_norm": 1.0832765102386475, "learning_rate": 9.439789473684211e-05, "loss": 0.3317, "step": 41307 }, { "epoch": 2.3131369694254675, "grad_norm": 1.1603450775146484, "learning_rate": 9.439763157894738e-05, "loss": 0.3382, "step": 41308 }, { "epoch": 2.3131929667375966, "grad_norm": 1.197878360748291, "learning_rate": 9.439736842105263e-05, "loss": 0.3889, "step": 41309 }, { "epoch": 2.3132489640497256, "grad_norm": 1.4172712564468384, "learning_rate": 9.43971052631579e-05, "loss": 0.3217, "step": 41310 }, { "epoch": 2.3133049613618546, "grad_norm": 1.192231297492981, "learning_rate": 9.439684210526316e-05, "loss": 0.5098, "step": 41311 }, { "epoch": 2.3133609586739836, "grad_norm": 1.0548523664474487, "learning_rate": 9.439657894736843e-05, "loss": 0.3614, "step": 41312 }, { "epoch": 2.3134169559861126, "grad_norm": 1.1855655908584595, "learning_rate": 9.43963157894737e-05, "loss": 0.3765, "step": 41313 }, { "epoch": 2.3134729532982417, "grad_norm": 1.0742753744125366, "learning_rate": 9.439605263157895e-05, "loss": 0.4044, "step": 41314 }, { "epoch": 2.3135289506103707, "grad_norm": 1.7576572895050049, "learning_rate": 9.439578947368421e-05, "loss": 0.6979, "step": 41315 }, { "epoch": 2.3135849479224997, "grad_norm": 1.232686161994934, "learning_rate": 9.439552631578947e-05, "loss": 0.3823, "step": 41316 }, { "epoch": 2.3136409452346287, "grad_norm": 1.371396541595459, "learning_rate": 9.439526315789475e-05, "loss": 0.4225, "step": 41317 }, { "epoch": 2.3136969425467577, "grad_norm": 1.556419849395752, "learning_rate": 9.439500000000001e-05, "loss": 0.562, "step": 41318 }, { "epoch": 2.3137529398588867, "grad_norm": 3.6189262866973877, "learning_rate": 9.439473684210527e-05, "loss": 0.415, "step": 41319 }, { "epoch": 2.3138089371710158, "grad_norm": 1.14079749584198, "learning_rate": 9.439447368421053e-05, "loss": 0.3497, "step": 41320 }, { "epoch": 2.313864934483145, "grad_norm": 0.9735620617866516, "learning_rate": 9.43942105263158e-05, "loss": 0.3399, "step": 41321 }, { "epoch": 2.313920931795274, "grad_norm": 1.159853458404541, "learning_rate": 9.439394736842106e-05, "loss": 0.3901, "step": 41322 }, { "epoch": 2.313976929107403, "grad_norm": 1.1504665613174438, "learning_rate": 9.439368421052632e-05, "loss": 0.372, "step": 41323 }, { "epoch": 2.314032926419532, "grad_norm": 1.0760533809661865, "learning_rate": 9.439342105263158e-05, "loss": 0.3904, "step": 41324 }, { "epoch": 2.314088923731661, "grad_norm": 1.1629009246826172, "learning_rate": 9.439315789473685e-05, "loss": 0.5327, "step": 41325 }, { "epoch": 2.31414492104379, "grad_norm": 1.285649061203003, "learning_rate": 9.439289473684211e-05, "loss": 0.477, "step": 41326 }, { "epoch": 2.314200918355919, "grad_norm": 1.1474159955978394, "learning_rate": 9.439263157894737e-05, "loss": 0.3918, "step": 41327 }, { "epoch": 2.314256915668048, "grad_norm": 1.158265233039856, "learning_rate": 9.439236842105263e-05, "loss": 0.4011, "step": 41328 }, { "epoch": 2.314312912980177, "grad_norm": 1.3388100862503052, "learning_rate": 9.43921052631579e-05, "loss": 0.4331, "step": 41329 }, { "epoch": 2.314368910292306, "grad_norm": 1.1474487781524658, "learning_rate": 9.439184210526316e-05, "loss": 0.3586, "step": 41330 }, { "epoch": 2.314424907604435, "grad_norm": 1.3449229001998901, "learning_rate": 9.439157894736844e-05, "loss": 0.4732, "step": 41331 }, { "epoch": 2.314480904916564, "grad_norm": 0.9961864352226257, "learning_rate": 9.439131578947368e-05, "loss": 0.4983, "step": 41332 }, { "epoch": 2.314536902228693, "grad_norm": 1.0431329011917114, "learning_rate": 9.439105263157894e-05, "loss": 0.3561, "step": 41333 }, { "epoch": 2.314592899540822, "grad_norm": 1.059946060180664, "learning_rate": 9.439078947368422e-05, "loss": 0.3425, "step": 41334 }, { "epoch": 2.314648896852951, "grad_norm": 1.4289476871490479, "learning_rate": 9.439052631578948e-05, "loss": 0.4408, "step": 41335 }, { "epoch": 2.31470489416508, "grad_norm": 1.2132060527801514, "learning_rate": 9.439026315789474e-05, "loss": 0.4205, "step": 41336 }, { "epoch": 2.314760891477209, "grad_norm": 0.9894360899925232, "learning_rate": 9.439e-05, "loss": 0.3407, "step": 41337 }, { "epoch": 2.314816888789338, "grad_norm": 1.1171969175338745, "learning_rate": 9.438973684210527e-05, "loss": 0.4879, "step": 41338 }, { "epoch": 2.314872886101467, "grad_norm": 1.1477209329605103, "learning_rate": 9.438947368421053e-05, "loss": 0.4461, "step": 41339 }, { "epoch": 2.314928883413596, "grad_norm": 1.2204196453094482, "learning_rate": 9.43892105263158e-05, "loss": 0.5072, "step": 41340 }, { "epoch": 2.314984880725725, "grad_norm": 1.3188509941101074, "learning_rate": 9.438894736842105e-05, "loss": 0.6795, "step": 41341 }, { "epoch": 2.315040878037854, "grad_norm": 1.0996758937835693, "learning_rate": 9.438868421052632e-05, "loss": 0.3322, "step": 41342 }, { "epoch": 2.315096875349983, "grad_norm": 1.3580321073532104, "learning_rate": 9.438842105263158e-05, "loss": 0.4266, "step": 41343 }, { "epoch": 2.3151528726621122, "grad_norm": 1.1975034475326538, "learning_rate": 9.438815789473685e-05, "loss": 0.4652, "step": 41344 }, { "epoch": 2.3152088699742412, "grad_norm": 1.2128863334655762, "learning_rate": 9.438789473684211e-05, "loss": 0.4818, "step": 41345 }, { "epoch": 2.3152648672863703, "grad_norm": 1.1107937097549438, "learning_rate": 9.438763157894737e-05, "loss": 0.4031, "step": 41346 }, { "epoch": 2.3153208645984993, "grad_norm": 1.0903804302215576, "learning_rate": 9.438736842105263e-05, "loss": 0.3444, "step": 41347 }, { "epoch": 2.3153768619106283, "grad_norm": 1.4978266954421997, "learning_rate": 9.438710526315791e-05, "loss": 0.3729, "step": 41348 }, { "epoch": 2.3154328592227573, "grad_norm": 1.3032159805297852, "learning_rate": 9.438684210526317e-05, "loss": 0.5783, "step": 41349 }, { "epoch": 2.3154888565348863, "grad_norm": 1.316075086593628, "learning_rate": 9.438657894736843e-05, "loss": 0.3711, "step": 41350 }, { "epoch": 2.3155448538470154, "grad_norm": 1.1292375326156616, "learning_rate": 9.438631578947369e-05, "loss": 0.3986, "step": 41351 }, { "epoch": 2.3156008511591444, "grad_norm": 1.2872331142425537, "learning_rate": 9.438605263157895e-05, "loss": 0.3025, "step": 41352 }, { "epoch": 2.3156568484712734, "grad_norm": 1.060409665107727, "learning_rate": 9.438578947368422e-05, "loss": 0.3404, "step": 41353 }, { "epoch": 2.3157128457834024, "grad_norm": 1.3974651098251343, "learning_rate": 9.438552631578948e-05, "loss": 0.3759, "step": 41354 }, { "epoch": 2.3157688430955314, "grad_norm": 1.255693793296814, "learning_rate": 9.438526315789474e-05, "loss": 0.4282, "step": 41355 }, { "epoch": 2.3158248404076605, "grad_norm": 1.0562257766723633, "learning_rate": 9.4385e-05, "loss": 0.3347, "step": 41356 }, { "epoch": 2.3158808377197895, "grad_norm": 1.2200738191604614, "learning_rate": 9.438473684210527e-05, "loss": 0.3947, "step": 41357 }, { "epoch": 2.3159368350319185, "grad_norm": 1.255780816078186, "learning_rate": 9.438447368421053e-05, "loss": 0.4157, "step": 41358 }, { "epoch": 2.3159928323440475, "grad_norm": 1.2522015571594238, "learning_rate": 9.438421052631579e-05, "loss": 0.3716, "step": 41359 }, { "epoch": 2.3160488296561765, "grad_norm": 1.108750820159912, "learning_rate": 9.438394736842105e-05, "loss": 0.4209, "step": 41360 }, { "epoch": 2.3161048269683056, "grad_norm": 1.1700783967971802, "learning_rate": 9.438368421052632e-05, "loss": 0.3629, "step": 41361 }, { "epoch": 2.3161608242804346, "grad_norm": 1.0911403894424438, "learning_rate": 9.438342105263158e-05, "loss": 0.3663, "step": 41362 }, { "epoch": 2.3162168215925636, "grad_norm": 1.2353960275650024, "learning_rate": 9.438315789473686e-05, "loss": 0.3926, "step": 41363 }, { "epoch": 2.3162728189046926, "grad_norm": 1.389467477798462, "learning_rate": 9.43828947368421e-05, "loss": 0.449, "step": 41364 }, { "epoch": 2.3163288162168216, "grad_norm": 1.1134833097457886, "learning_rate": 9.438263157894738e-05, "loss": 0.3548, "step": 41365 }, { "epoch": 2.3163848135289506, "grad_norm": 13.240059852600098, "learning_rate": 9.438236842105264e-05, "loss": 0.4089, "step": 41366 }, { "epoch": 2.3164408108410797, "grad_norm": 1.4368284940719604, "learning_rate": 9.43821052631579e-05, "loss": 0.4111, "step": 41367 }, { "epoch": 2.3164968081532087, "grad_norm": 1.2925031185150146, "learning_rate": 9.438184210526317e-05, "loss": 0.4966, "step": 41368 }, { "epoch": 2.3165528054653377, "grad_norm": 1.3029847145080566, "learning_rate": 9.438157894736842e-05, "loss": 0.3637, "step": 41369 }, { "epoch": 2.3166088027774667, "grad_norm": 1.3450571298599243, "learning_rate": 9.438131578947369e-05, "loss": 0.5743, "step": 41370 }, { "epoch": 2.3166648000895957, "grad_norm": 1.221514105796814, "learning_rate": 9.438105263157895e-05, "loss": 0.4085, "step": 41371 }, { "epoch": 2.3167207974017248, "grad_norm": 1.167541742324829, "learning_rate": 9.438078947368422e-05, "loss": 0.3486, "step": 41372 }, { "epoch": 2.316776794713854, "grad_norm": 1.3627599477767944, "learning_rate": 9.438052631578948e-05, "loss": 0.4794, "step": 41373 }, { "epoch": 2.316832792025983, "grad_norm": 1.270850658416748, "learning_rate": 9.438026315789474e-05, "loss": 0.3536, "step": 41374 }, { "epoch": 2.316888789338112, "grad_norm": 1.3900926113128662, "learning_rate": 9.438e-05, "loss": 0.3641, "step": 41375 }, { "epoch": 2.316944786650241, "grad_norm": 1.3106361627578735, "learning_rate": 9.437973684210527e-05, "loss": 0.4269, "step": 41376 }, { "epoch": 2.31700078396237, "grad_norm": 1.3721694946289062, "learning_rate": 9.437947368421053e-05, "loss": 0.3696, "step": 41377 }, { "epoch": 2.317056781274499, "grad_norm": 1.198875904083252, "learning_rate": 9.43792105263158e-05, "loss": 0.4302, "step": 41378 }, { "epoch": 2.317112778586628, "grad_norm": 1.159014344215393, "learning_rate": 9.437894736842105e-05, "loss": 0.358, "step": 41379 }, { "epoch": 2.317168775898757, "grad_norm": 1.1493645906448364, "learning_rate": 9.437868421052633e-05, "loss": 0.3787, "step": 41380 }, { "epoch": 2.317224773210886, "grad_norm": 1.2999175786972046, "learning_rate": 9.437842105263159e-05, "loss": 0.4319, "step": 41381 }, { "epoch": 2.317280770523015, "grad_norm": 1.0108214616775513, "learning_rate": 9.437815789473685e-05, "loss": 0.3313, "step": 41382 }, { "epoch": 2.317336767835144, "grad_norm": 0.9442539811134338, "learning_rate": 9.43778947368421e-05, "loss": 0.3817, "step": 41383 }, { "epoch": 2.317392765147273, "grad_norm": 1.1305065155029297, "learning_rate": 9.437763157894737e-05, "loss": 0.3805, "step": 41384 }, { "epoch": 2.317448762459402, "grad_norm": 1.7780417203903198, "learning_rate": 9.437736842105264e-05, "loss": 0.499, "step": 41385 }, { "epoch": 2.317504759771531, "grad_norm": 1.2618051767349243, "learning_rate": 9.43771052631579e-05, "loss": 0.3869, "step": 41386 }, { "epoch": 2.31756075708366, "grad_norm": 1.7549107074737549, "learning_rate": 9.437684210526316e-05, "loss": 0.2933, "step": 41387 }, { "epoch": 2.317616754395789, "grad_norm": 1.3269871473312378, "learning_rate": 9.437657894736842e-05, "loss": 0.4403, "step": 41388 }, { "epoch": 2.317672751707918, "grad_norm": 1.320339560508728, "learning_rate": 9.437631578947369e-05, "loss": 0.5584, "step": 41389 }, { "epoch": 2.317728749020047, "grad_norm": 15.979132652282715, "learning_rate": 9.437605263157895e-05, "loss": 0.3821, "step": 41390 }, { "epoch": 2.317784746332176, "grad_norm": 1.1556161642074585, "learning_rate": 9.437578947368421e-05, "loss": 0.3353, "step": 41391 }, { "epoch": 2.317840743644305, "grad_norm": 0.9923631548881531, "learning_rate": 9.437552631578947e-05, "loss": 0.3342, "step": 41392 }, { "epoch": 2.317896740956434, "grad_norm": 1.2832163572311401, "learning_rate": 9.437526315789474e-05, "loss": 0.4033, "step": 41393 }, { "epoch": 2.317952738268563, "grad_norm": 1.159406304359436, "learning_rate": 9.4375e-05, "loss": 0.3435, "step": 41394 }, { "epoch": 2.318008735580692, "grad_norm": 1.163432240486145, "learning_rate": 9.437473684210528e-05, "loss": 0.2772, "step": 41395 }, { "epoch": 2.3180647328928212, "grad_norm": 1.2027949094772339, "learning_rate": 9.437447368421052e-05, "loss": 0.3307, "step": 41396 }, { "epoch": 2.3181207302049502, "grad_norm": 1.23633873462677, "learning_rate": 9.43742105263158e-05, "loss": 0.3789, "step": 41397 }, { "epoch": 2.3181767275170793, "grad_norm": 1.8226385116577148, "learning_rate": 9.437394736842106e-05, "loss": 0.3312, "step": 41398 }, { "epoch": 2.3182327248292083, "grad_norm": 1.2587995529174805, "learning_rate": 9.437368421052633e-05, "loss": 0.4833, "step": 41399 }, { "epoch": 2.3182887221413373, "grad_norm": 1.324337363243103, "learning_rate": 9.437342105263159e-05, "loss": 0.4096, "step": 41400 }, { "epoch": 2.3183447194534663, "grad_norm": 1.2407947778701782, "learning_rate": 9.437315789473684e-05, "loss": 0.4042, "step": 41401 }, { "epoch": 2.3184007167655953, "grad_norm": 2.1488277912139893, "learning_rate": 9.437289473684211e-05, "loss": 0.5565, "step": 41402 }, { "epoch": 2.3184567140777244, "grad_norm": 1.1434844732284546, "learning_rate": 9.437263157894737e-05, "loss": 0.3578, "step": 41403 }, { "epoch": 2.3185127113898534, "grad_norm": 1.7001357078552246, "learning_rate": 9.437236842105264e-05, "loss": 0.5056, "step": 41404 }, { "epoch": 2.3185687087019824, "grad_norm": 1.2785147428512573, "learning_rate": 9.43721052631579e-05, "loss": 0.4721, "step": 41405 }, { "epoch": 2.3186247060141114, "grad_norm": 1.1670337915420532, "learning_rate": 9.437184210526316e-05, "loss": 0.4096, "step": 41406 }, { "epoch": 2.3186807033262404, "grad_norm": 1.314004898071289, "learning_rate": 9.437157894736842e-05, "loss": 0.3949, "step": 41407 }, { "epoch": 2.3187367006383695, "grad_norm": 1.181923508644104, "learning_rate": 9.43713157894737e-05, "loss": 0.3221, "step": 41408 }, { "epoch": 2.3187926979504985, "grad_norm": 1.2725424766540527, "learning_rate": 9.437105263157895e-05, "loss": 0.4359, "step": 41409 }, { "epoch": 2.3188486952626275, "grad_norm": 1.3356045484542847, "learning_rate": 9.437078947368421e-05, "loss": 0.4832, "step": 41410 }, { "epoch": 2.3189046925747565, "grad_norm": 1.111527681350708, "learning_rate": 9.437052631578947e-05, "loss": 0.3469, "step": 41411 }, { "epoch": 2.3189606898868855, "grad_norm": 1.1512436866760254, "learning_rate": 9.437026315789475e-05, "loss": 0.4355, "step": 41412 }, { "epoch": 2.3190166871990145, "grad_norm": 1.1099945306777954, "learning_rate": 9.437e-05, "loss": 0.3806, "step": 41413 }, { "epoch": 2.3190726845111436, "grad_norm": 1.3678765296936035, "learning_rate": 9.436973684210527e-05, "loss": 0.4302, "step": 41414 }, { "epoch": 2.3191286818232726, "grad_norm": 1.2139514684677124, "learning_rate": 9.436947368421053e-05, "loss": 0.379, "step": 41415 }, { "epoch": 2.3191846791354016, "grad_norm": 5.6088690757751465, "learning_rate": 9.43692105263158e-05, "loss": 0.6149, "step": 41416 }, { "epoch": 2.3192406764475306, "grad_norm": 1.440214991569519, "learning_rate": 9.436894736842106e-05, "loss": 0.4052, "step": 41417 }, { "epoch": 2.3192966737596596, "grad_norm": 1.5163050889968872, "learning_rate": 9.436868421052633e-05, "loss": 0.407, "step": 41418 }, { "epoch": 2.3193526710717887, "grad_norm": 6.795169830322266, "learning_rate": 9.436842105263158e-05, "loss": 0.3733, "step": 41419 }, { "epoch": 2.3194086683839177, "grad_norm": 1.2232766151428223, "learning_rate": 9.436815789473684e-05, "loss": 0.4208, "step": 41420 }, { "epoch": 2.3194646656960467, "grad_norm": 1.642359972000122, "learning_rate": 9.436789473684211e-05, "loss": 0.4789, "step": 41421 }, { "epoch": 2.3195206630081757, "grad_norm": 1.1009441614151, "learning_rate": 9.436763157894737e-05, "loss": 0.4159, "step": 41422 }, { "epoch": 2.3195766603203047, "grad_norm": 1.159757375717163, "learning_rate": 9.436736842105264e-05, "loss": 0.376, "step": 41423 }, { "epoch": 2.3196326576324338, "grad_norm": 1.082768201828003, "learning_rate": 9.436710526315789e-05, "loss": 0.3205, "step": 41424 }, { "epoch": 2.3196886549445628, "grad_norm": 1.617936134338379, "learning_rate": 9.436684210526316e-05, "loss": 0.4031, "step": 41425 }, { "epoch": 2.319744652256692, "grad_norm": 1.8554003238677979, "learning_rate": 9.436657894736842e-05, "loss": 0.472, "step": 41426 }, { "epoch": 2.319800649568821, "grad_norm": 1.246032476425171, "learning_rate": 9.43663157894737e-05, "loss": 0.3915, "step": 41427 }, { "epoch": 2.31985664688095, "grad_norm": 1.2154436111450195, "learning_rate": 9.436605263157894e-05, "loss": 0.3353, "step": 41428 }, { "epoch": 2.319912644193079, "grad_norm": 1.1615893840789795, "learning_rate": 9.436578947368422e-05, "loss": 0.3052, "step": 41429 }, { "epoch": 2.319968641505208, "grad_norm": 1.9925775527954102, "learning_rate": 9.436552631578948e-05, "loss": 0.2431, "step": 41430 }, { "epoch": 2.320024638817337, "grad_norm": 1.0813637971878052, "learning_rate": 9.436526315789475e-05, "loss": 0.3694, "step": 41431 }, { "epoch": 2.320080636129466, "grad_norm": 1.2801499366760254, "learning_rate": 9.436500000000001e-05, "loss": 0.3849, "step": 41432 }, { "epoch": 2.320136633441595, "grad_norm": 1.333127737045288, "learning_rate": 9.436473684210527e-05, "loss": 0.3736, "step": 41433 }, { "epoch": 2.320192630753724, "grad_norm": 1.1614184379577637, "learning_rate": 9.436447368421053e-05, "loss": 0.4216, "step": 41434 }, { "epoch": 2.320248628065853, "grad_norm": 2.762162208557129, "learning_rate": 9.43642105263158e-05, "loss": 0.4516, "step": 41435 }, { "epoch": 2.320304625377982, "grad_norm": 1.6449096202850342, "learning_rate": 9.436394736842106e-05, "loss": 0.4867, "step": 41436 }, { "epoch": 2.320360622690111, "grad_norm": 1.2274833917617798, "learning_rate": 9.436368421052632e-05, "loss": 0.3648, "step": 41437 }, { "epoch": 2.32041662000224, "grad_norm": 1.7491999864578247, "learning_rate": 9.436342105263158e-05, "loss": 0.5316, "step": 41438 }, { "epoch": 2.320472617314369, "grad_norm": 1.2867528200149536, "learning_rate": 9.436315789473684e-05, "loss": 0.4215, "step": 41439 }, { "epoch": 2.320528614626498, "grad_norm": 1.2793112993240356, "learning_rate": 9.436289473684211e-05, "loss": 0.3446, "step": 41440 }, { "epoch": 2.320584611938627, "grad_norm": 2.504293203353882, "learning_rate": 9.436263157894737e-05, "loss": 0.4524, "step": 41441 }, { "epoch": 2.320640609250756, "grad_norm": 1.3625446557998657, "learning_rate": 9.436236842105263e-05, "loss": 0.4193, "step": 41442 }, { "epoch": 2.320696606562885, "grad_norm": 1.12100088596344, "learning_rate": 9.436210526315789e-05, "loss": 0.3409, "step": 41443 }, { "epoch": 2.320752603875014, "grad_norm": 1.2281224727630615, "learning_rate": 9.436184210526317e-05, "loss": 0.3814, "step": 41444 }, { "epoch": 2.320808601187143, "grad_norm": 1.0855356454849243, "learning_rate": 9.436157894736843e-05, "loss": 0.4202, "step": 41445 }, { "epoch": 2.320864598499272, "grad_norm": 1.2163196802139282, "learning_rate": 9.436131578947369e-05, "loss": 0.3915, "step": 41446 }, { "epoch": 2.320920595811401, "grad_norm": 0.9154990911483765, "learning_rate": 9.436105263157895e-05, "loss": 0.3104, "step": 41447 }, { "epoch": 2.32097659312353, "grad_norm": 1.186795711517334, "learning_rate": 9.436078947368422e-05, "loss": 0.3597, "step": 41448 }, { "epoch": 2.3210325904356592, "grad_norm": 1.3870964050292969, "learning_rate": 9.436052631578948e-05, "loss": 0.4255, "step": 41449 }, { "epoch": 2.3210885877477883, "grad_norm": 1.103212833404541, "learning_rate": 9.436026315789475e-05, "loss": 0.3273, "step": 41450 }, { "epoch": 2.3211445850599173, "grad_norm": 2.326517105102539, "learning_rate": 9.436e-05, "loss": 0.6455, "step": 41451 }, { "epoch": 2.3212005823720463, "grad_norm": 1.1867144107818604, "learning_rate": 9.435973684210527e-05, "loss": 0.3826, "step": 41452 }, { "epoch": 2.3212565796841753, "grad_norm": 1.2935075759887695, "learning_rate": 9.435947368421053e-05, "loss": 0.4337, "step": 41453 }, { "epoch": 2.3213125769963043, "grad_norm": 1.2231755256652832, "learning_rate": 9.435921052631579e-05, "loss": 0.413, "step": 41454 }, { "epoch": 2.3213685743084334, "grad_norm": 1.3426060676574707, "learning_rate": 9.435894736842106e-05, "loss": 0.6001, "step": 41455 }, { "epoch": 2.3214245716205624, "grad_norm": 1.193281650543213, "learning_rate": 9.435868421052631e-05, "loss": 0.4322, "step": 41456 }, { "epoch": 2.3214805689326914, "grad_norm": 1.1700212955474854, "learning_rate": 9.435842105263158e-05, "loss": 0.3625, "step": 41457 }, { "epoch": 2.3215365662448204, "grad_norm": 1.2244272232055664, "learning_rate": 9.435815789473684e-05, "loss": 0.395, "step": 41458 }, { "epoch": 2.3215925635569494, "grad_norm": 1.6634242534637451, "learning_rate": 9.435789473684212e-05, "loss": 0.4192, "step": 41459 }, { "epoch": 2.3216485608690784, "grad_norm": 1.1649669408798218, "learning_rate": 9.435763157894738e-05, "loss": 0.3864, "step": 41460 }, { "epoch": 2.3217045581812075, "grad_norm": 1.0379775762557983, "learning_rate": 9.435736842105264e-05, "loss": 0.3756, "step": 41461 }, { "epoch": 2.3217605554933365, "grad_norm": 1.249643087387085, "learning_rate": 9.43571052631579e-05, "loss": 0.3745, "step": 41462 }, { "epoch": 2.3218165528054655, "grad_norm": 1.0316903591156006, "learning_rate": 9.435684210526317e-05, "loss": 0.4327, "step": 41463 }, { "epoch": 2.3218725501175945, "grad_norm": 1.4474607706069946, "learning_rate": 9.435657894736843e-05, "loss": 0.4343, "step": 41464 }, { "epoch": 2.3219285474297235, "grad_norm": 1.108749508857727, "learning_rate": 9.435631578947369e-05, "loss": 0.4515, "step": 41465 }, { "epoch": 2.3219845447418526, "grad_norm": 0.9205725789070129, "learning_rate": 9.435605263157895e-05, "loss": 0.3079, "step": 41466 }, { "epoch": 2.3220405420539816, "grad_norm": 1.2063063383102417, "learning_rate": 9.435578947368422e-05, "loss": 0.4234, "step": 41467 }, { "epoch": 2.3220965393661106, "grad_norm": 1.2108074426651, "learning_rate": 9.435552631578948e-05, "loss": 0.3142, "step": 41468 }, { "epoch": 2.3221525366782396, "grad_norm": 1.282341718673706, "learning_rate": 9.435526315789474e-05, "loss": 0.4147, "step": 41469 }, { "epoch": 2.3222085339903686, "grad_norm": 1.2531129121780396, "learning_rate": 9.4355e-05, "loss": 0.3466, "step": 41470 }, { "epoch": 2.3222645313024977, "grad_norm": 1.1117655038833618, "learning_rate": 9.435473684210526e-05, "loss": 0.3235, "step": 41471 }, { "epoch": 2.3223205286146267, "grad_norm": 1.381260871887207, "learning_rate": 9.435447368421053e-05, "loss": 0.3854, "step": 41472 }, { "epoch": 2.3223765259267557, "grad_norm": 1.1924549341201782, "learning_rate": 9.435421052631579e-05, "loss": 0.3963, "step": 41473 }, { "epoch": 2.3224325232388847, "grad_norm": 1.2785983085632324, "learning_rate": 9.435394736842105e-05, "loss": 0.3829, "step": 41474 }, { "epoch": 2.3224885205510137, "grad_norm": 1.138791799545288, "learning_rate": 9.435368421052631e-05, "loss": 0.3686, "step": 41475 }, { "epoch": 2.3225445178631428, "grad_norm": 1.3743484020233154, "learning_rate": 9.435342105263159e-05, "loss": 0.4304, "step": 41476 }, { "epoch": 2.3226005151752718, "grad_norm": 1.4038283824920654, "learning_rate": 9.435315789473685e-05, "loss": 0.4705, "step": 41477 }, { "epoch": 2.322656512487401, "grad_norm": 1.0943456888198853, "learning_rate": 9.435289473684212e-05, "loss": 0.3759, "step": 41478 }, { "epoch": 2.32271250979953, "grad_norm": 0.9064270853996277, "learning_rate": 9.435263157894737e-05, "loss": 0.446, "step": 41479 }, { "epoch": 2.322768507111659, "grad_norm": 1.477450966835022, "learning_rate": 9.435236842105264e-05, "loss": 0.4595, "step": 41480 }, { "epoch": 2.322824504423788, "grad_norm": 1.3120715618133545, "learning_rate": 9.43521052631579e-05, "loss": 0.3375, "step": 41481 }, { "epoch": 2.322880501735917, "grad_norm": 1.0751261711120605, "learning_rate": 9.435184210526317e-05, "loss": 0.4121, "step": 41482 }, { "epoch": 2.322936499048046, "grad_norm": 1.099856972694397, "learning_rate": 9.435157894736842e-05, "loss": 0.3891, "step": 41483 }, { "epoch": 2.3229924963601745, "grad_norm": 1.1082332134246826, "learning_rate": 9.435131578947369e-05, "loss": 0.4049, "step": 41484 }, { "epoch": 2.323048493672304, "grad_norm": 1.136395812034607, "learning_rate": 9.435105263157895e-05, "loss": 0.3487, "step": 41485 }, { "epoch": 2.3231044909844325, "grad_norm": 1.3666706085205078, "learning_rate": 9.435078947368422e-05, "loss": 0.4336, "step": 41486 }, { "epoch": 2.323160488296562, "grad_norm": 1.1374821662902832, "learning_rate": 9.435052631578948e-05, "loss": 0.355, "step": 41487 }, { "epoch": 2.3232164856086905, "grad_norm": 1.497208833694458, "learning_rate": 9.435026315789473e-05, "loss": 0.5575, "step": 41488 }, { "epoch": 2.32327248292082, "grad_norm": 1.1022244691848755, "learning_rate": 9.435e-05, "loss": 0.4633, "step": 41489 }, { "epoch": 2.3233284802329486, "grad_norm": 1.2131279706954956, "learning_rate": 9.434973684210526e-05, "loss": 0.3468, "step": 41490 }, { "epoch": 2.323384477545078, "grad_norm": 1.1669669151306152, "learning_rate": 9.434947368421054e-05, "loss": 0.4671, "step": 41491 }, { "epoch": 2.3234404748572066, "grad_norm": 5.152338027954102, "learning_rate": 9.43492105263158e-05, "loss": 0.3361, "step": 41492 }, { "epoch": 2.323496472169336, "grad_norm": 1.3370871543884277, "learning_rate": 9.434894736842106e-05, "loss": 0.4213, "step": 41493 }, { "epoch": 2.3235524694814647, "grad_norm": 1.6586989164352417, "learning_rate": 9.434868421052632e-05, "loss": 0.6026, "step": 41494 }, { "epoch": 2.323608466793594, "grad_norm": 1.40994393825531, "learning_rate": 9.434842105263159e-05, "loss": 0.5499, "step": 41495 }, { "epoch": 2.3236644641057227, "grad_norm": 1.0669114589691162, "learning_rate": 9.434815789473685e-05, "loss": 0.4114, "step": 41496 }, { "epoch": 2.323720461417852, "grad_norm": 1.7415821552276611, "learning_rate": 9.434789473684211e-05, "loss": 0.4394, "step": 41497 }, { "epoch": 2.3237764587299807, "grad_norm": 1.217822551727295, "learning_rate": 9.434763157894737e-05, "loss": 0.4051, "step": 41498 }, { "epoch": 2.32383245604211, "grad_norm": 1.5085703134536743, "learning_rate": 9.434736842105264e-05, "loss": 0.5867, "step": 41499 }, { "epoch": 2.3238884533542388, "grad_norm": 1.2492339611053467, "learning_rate": 9.43471052631579e-05, "loss": 0.4121, "step": 41500 }, { "epoch": 2.3239444506663682, "grad_norm": 1.3328871726989746, "learning_rate": 9.434684210526316e-05, "loss": 0.5951, "step": 41501 }, { "epoch": 2.324000447978497, "grad_norm": 1.1705149412155151, "learning_rate": 9.434657894736842e-05, "loss": 0.3892, "step": 41502 }, { "epoch": 2.3240564452906263, "grad_norm": 1.110447883605957, "learning_rate": 9.43463157894737e-05, "loss": 0.3708, "step": 41503 }, { "epoch": 2.324112442602755, "grad_norm": 1.2638144493103027, "learning_rate": 9.434605263157895e-05, "loss": 0.3803, "step": 41504 }, { "epoch": 2.3241684399148843, "grad_norm": 1.018725872039795, "learning_rate": 9.434578947368421e-05, "loss": 0.2935, "step": 41505 }, { "epoch": 2.324224437227013, "grad_norm": 1.7340035438537598, "learning_rate": 9.434552631578947e-05, "loss": 0.435, "step": 41506 }, { "epoch": 2.3242804345391423, "grad_norm": 1.3757978677749634, "learning_rate": 9.434526315789473e-05, "loss": 0.3157, "step": 41507 }, { "epoch": 2.324336431851271, "grad_norm": 1.095306634902954, "learning_rate": 9.4345e-05, "loss": 0.4941, "step": 41508 }, { "epoch": 2.3243924291634004, "grad_norm": 1.3000998497009277, "learning_rate": 9.434473684210527e-05, "loss": 0.3635, "step": 41509 }, { "epoch": 2.324448426475529, "grad_norm": 1.2867426872253418, "learning_rate": 9.434447368421054e-05, "loss": 0.337, "step": 41510 }, { "epoch": 2.3245044237876584, "grad_norm": 1.1369102001190186, "learning_rate": 9.434421052631578e-05, "loss": 0.4177, "step": 41511 }, { "epoch": 2.324560421099787, "grad_norm": 1.1665189266204834, "learning_rate": 9.434394736842106e-05, "loss": 0.4357, "step": 41512 }, { "epoch": 2.3246164184119165, "grad_norm": 1.141129970550537, "learning_rate": 9.434368421052632e-05, "loss": 0.3353, "step": 41513 }, { "epoch": 2.324672415724045, "grad_norm": 1.2560420036315918, "learning_rate": 9.434342105263159e-05, "loss": 0.4167, "step": 41514 }, { "epoch": 2.3247284130361745, "grad_norm": 1.153064250946045, "learning_rate": 9.434315789473685e-05, "loss": 0.326, "step": 41515 }, { "epoch": 2.324784410348303, "grad_norm": 1.0928597450256348, "learning_rate": 9.434289473684211e-05, "loss": 0.3567, "step": 41516 }, { "epoch": 2.3248404076604325, "grad_norm": 1.3983395099639893, "learning_rate": 9.434263157894737e-05, "loss": 0.3523, "step": 41517 }, { "epoch": 2.324896404972561, "grad_norm": 1.4594054222106934, "learning_rate": 9.434236842105264e-05, "loss": 0.4962, "step": 41518 }, { "epoch": 2.3249524022846906, "grad_norm": 1.3389967679977417, "learning_rate": 9.43421052631579e-05, "loss": 0.3855, "step": 41519 }, { "epoch": 2.325008399596819, "grad_norm": 1.1690731048583984, "learning_rate": 9.434184210526316e-05, "loss": 0.4012, "step": 41520 }, { "epoch": 2.3250643969089486, "grad_norm": 1.3864506483078003, "learning_rate": 9.434157894736842e-05, "loss": 0.5261, "step": 41521 }, { "epoch": 2.325120394221077, "grad_norm": 1.56587553024292, "learning_rate": 9.434131578947368e-05, "loss": 0.4393, "step": 41522 }, { "epoch": 2.3251763915332067, "grad_norm": 1.2505574226379395, "learning_rate": 9.434105263157896e-05, "loss": 0.4088, "step": 41523 }, { "epoch": 2.3252323888453352, "grad_norm": 1.073076605796814, "learning_rate": 9.434078947368422e-05, "loss": 0.38, "step": 41524 }, { "epoch": 2.3252883861574642, "grad_norm": 0.9981218576431274, "learning_rate": 9.434052631578948e-05, "loss": 0.3153, "step": 41525 }, { "epoch": 2.3253443834695933, "grad_norm": 1.4683637619018555, "learning_rate": 9.434026315789473e-05, "loss": 0.3522, "step": 41526 }, { "epoch": 2.3254003807817223, "grad_norm": 1.2398388385772705, "learning_rate": 9.434000000000001e-05, "loss": 0.4713, "step": 41527 }, { "epoch": 2.3254563780938513, "grad_norm": 1.4454247951507568, "learning_rate": 9.433973684210527e-05, "loss": 0.4499, "step": 41528 }, { "epoch": 2.3255123754059803, "grad_norm": 1.2900307178497314, "learning_rate": 9.433947368421053e-05, "loss": 0.3776, "step": 41529 }, { "epoch": 2.3255683727181093, "grad_norm": 1.1652144193649292, "learning_rate": 9.433921052631579e-05, "loss": 0.3361, "step": 41530 }, { "epoch": 2.3256243700302384, "grad_norm": 1.1878324747085571, "learning_rate": 9.433894736842106e-05, "loss": 0.4921, "step": 41531 }, { "epoch": 2.3256803673423674, "grad_norm": 1.1960644721984863, "learning_rate": 9.433868421052632e-05, "loss": 0.3676, "step": 41532 }, { "epoch": 2.3257363646544964, "grad_norm": 1.0765178203582764, "learning_rate": 9.43384210526316e-05, "loss": 0.3853, "step": 41533 }, { "epoch": 2.3257923619666254, "grad_norm": 1.0752185583114624, "learning_rate": 9.433815789473684e-05, "loss": 0.4312, "step": 41534 }, { "epoch": 2.3258483592787544, "grad_norm": 1.1072521209716797, "learning_rate": 9.433789473684211e-05, "loss": 0.3946, "step": 41535 }, { "epoch": 2.3259043565908835, "grad_norm": 1.0504088401794434, "learning_rate": 9.433763157894737e-05, "loss": 0.3921, "step": 41536 }, { "epoch": 2.3259603539030125, "grad_norm": 1.1277610063552856, "learning_rate": 9.433736842105265e-05, "loss": 0.3754, "step": 41537 }, { "epoch": 2.3260163512151415, "grad_norm": 1.1618610620498657, "learning_rate": 9.433710526315789e-05, "loss": 0.3718, "step": 41538 }, { "epoch": 2.3260723485272705, "grad_norm": 1.2163972854614258, "learning_rate": 9.433684210526317e-05, "loss": 0.4246, "step": 41539 }, { "epoch": 2.3261283458393995, "grad_norm": 1.088391900062561, "learning_rate": 9.433657894736843e-05, "loss": 0.3969, "step": 41540 }, { "epoch": 2.3261843431515286, "grad_norm": 0.9387152194976807, "learning_rate": 9.433631578947369e-05, "loss": 0.3819, "step": 41541 }, { "epoch": 2.3262403404636576, "grad_norm": 0.9201839566230774, "learning_rate": 9.433605263157896e-05, "loss": 0.2971, "step": 41542 }, { "epoch": 2.3262963377757866, "grad_norm": 1.156425952911377, "learning_rate": 9.43357894736842e-05, "loss": 0.4129, "step": 41543 }, { "epoch": 2.3263523350879156, "grad_norm": 1.094070553779602, "learning_rate": 9.433552631578948e-05, "loss": 0.3659, "step": 41544 }, { "epoch": 2.3264083324000446, "grad_norm": 1.2249170541763306, "learning_rate": 9.433526315789474e-05, "loss": 0.4709, "step": 41545 }, { "epoch": 2.3264643297121737, "grad_norm": 0.9159320592880249, "learning_rate": 9.433500000000001e-05, "loss": 0.2835, "step": 41546 }, { "epoch": 2.3265203270243027, "grad_norm": 1.1559115648269653, "learning_rate": 9.433473684210527e-05, "loss": 0.3011, "step": 41547 }, { "epoch": 2.3265763243364317, "grad_norm": 1.3106906414031982, "learning_rate": 9.433447368421053e-05, "loss": 0.3989, "step": 41548 }, { "epoch": 2.3266323216485607, "grad_norm": 1.0658910274505615, "learning_rate": 9.433421052631579e-05, "loss": 0.3484, "step": 41549 }, { "epoch": 2.3266883189606897, "grad_norm": 1.2402797937393188, "learning_rate": 9.433394736842106e-05, "loss": 0.3697, "step": 41550 }, { "epoch": 2.3267443162728187, "grad_norm": 1.0892387628555298, "learning_rate": 9.433368421052632e-05, "loss": 0.3676, "step": 41551 }, { "epoch": 2.3268003135849478, "grad_norm": 1.3662179708480835, "learning_rate": 9.433342105263158e-05, "loss": 0.3938, "step": 41552 }, { "epoch": 2.326856310897077, "grad_norm": 3.620800256729126, "learning_rate": 9.433315789473684e-05, "loss": 0.2778, "step": 41553 }, { "epoch": 2.326912308209206, "grad_norm": 1.1394436359405518, "learning_rate": 9.433289473684212e-05, "loss": 0.3365, "step": 41554 }, { "epoch": 2.326968305521335, "grad_norm": 1.2460987567901611, "learning_rate": 9.433263157894738e-05, "loss": 0.3224, "step": 41555 }, { "epoch": 2.327024302833464, "grad_norm": 1.1246585845947266, "learning_rate": 9.433236842105264e-05, "loss": 0.5338, "step": 41556 }, { "epoch": 2.327080300145593, "grad_norm": 1.3045283555984497, "learning_rate": 9.43321052631579e-05, "loss": 0.4315, "step": 41557 }, { "epoch": 2.327136297457722, "grad_norm": 1.094956874847412, "learning_rate": 9.433184210526315e-05, "loss": 0.4847, "step": 41558 }, { "epoch": 2.327192294769851, "grad_norm": 1.4461864233016968, "learning_rate": 9.433157894736843e-05, "loss": 0.4119, "step": 41559 }, { "epoch": 2.32724829208198, "grad_norm": 1.1447547674179077, "learning_rate": 9.433131578947369e-05, "loss": 0.3574, "step": 41560 }, { "epoch": 2.327304289394109, "grad_norm": 1.2475696802139282, "learning_rate": 9.433105263157895e-05, "loss": 0.4133, "step": 41561 }, { "epoch": 2.327360286706238, "grad_norm": 1.1497441530227661, "learning_rate": 9.433078947368421e-05, "loss": 0.433, "step": 41562 }, { "epoch": 2.327416284018367, "grad_norm": 1.386181354522705, "learning_rate": 9.433052631578948e-05, "loss": 0.3228, "step": 41563 }, { "epoch": 2.327472281330496, "grad_norm": 1.1684449911117554, "learning_rate": 9.433026315789474e-05, "loss": 0.3917, "step": 41564 }, { "epoch": 2.327528278642625, "grad_norm": 1.3723171949386597, "learning_rate": 9.433000000000001e-05, "loss": 0.4866, "step": 41565 }, { "epoch": 2.327584275954754, "grad_norm": 0.9769529700279236, "learning_rate": 9.432973684210526e-05, "loss": 0.3378, "step": 41566 }, { "epoch": 2.327640273266883, "grad_norm": 1.1759470701217651, "learning_rate": 9.432947368421053e-05, "loss": 0.3337, "step": 41567 }, { "epoch": 2.327696270579012, "grad_norm": 1.3004335165023804, "learning_rate": 9.432921052631579e-05, "loss": 0.4525, "step": 41568 }, { "epoch": 2.327752267891141, "grad_norm": 1.381335735321045, "learning_rate": 9.432894736842107e-05, "loss": 0.4564, "step": 41569 }, { "epoch": 2.32780826520327, "grad_norm": 1.2472684383392334, "learning_rate": 9.432868421052633e-05, "loss": 0.3269, "step": 41570 }, { "epoch": 2.327864262515399, "grad_norm": 1.7341516017913818, "learning_rate": 9.432842105263159e-05, "loss": 0.6316, "step": 41571 }, { "epoch": 2.327920259827528, "grad_norm": 0.9813318252563477, "learning_rate": 9.432815789473684e-05, "loss": 0.2936, "step": 41572 }, { "epoch": 2.327976257139657, "grad_norm": 1.0791974067687988, "learning_rate": 9.432789473684212e-05, "loss": 0.359, "step": 41573 }, { "epoch": 2.328032254451786, "grad_norm": 1.2258621454238892, "learning_rate": 9.432763157894738e-05, "loss": 0.4433, "step": 41574 }, { "epoch": 2.328088251763915, "grad_norm": 1.5598926544189453, "learning_rate": 9.432736842105262e-05, "loss": 0.5025, "step": 41575 }, { "epoch": 2.3281442490760442, "grad_norm": 1.1922926902770996, "learning_rate": 9.43271052631579e-05, "loss": 0.4191, "step": 41576 }, { "epoch": 2.3282002463881732, "grad_norm": 1.0238518714904785, "learning_rate": 9.432684210526316e-05, "loss": 0.3593, "step": 41577 }, { "epoch": 2.3282562437003023, "grad_norm": 1.4590539932250977, "learning_rate": 9.432657894736843e-05, "loss": 0.3404, "step": 41578 }, { "epoch": 2.3283122410124313, "grad_norm": 1.292555332183838, "learning_rate": 9.432631578947369e-05, "loss": 0.3438, "step": 41579 }, { "epoch": 2.3283682383245603, "grad_norm": 1.245064616203308, "learning_rate": 9.432605263157895e-05, "loss": 0.3831, "step": 41580 }, { "epoch": 2.3284242356366893, "grad_norm": 1.2050551176071167, "learning_rate": 9.432578947368421e-05, "loss": 0.4587, "step": 41581 }, { "epoch": 2.3284802329488183, "grad_norm": 1.0992491245269775, "learning_rate": 9.432552631578948e-05, "loss": 0.3585, "step": 41582 }, { "epoch": 2.3285362302609474, "grad_norm": 1.3423775434494019, "learning_rate": 9.432526315789474e-05, "loss": 0.5745, "step": 41583 }, { "epoch": 2.3285922275730764, "grad_norm": 1.1654114723205566, "learning_rate": 9.4325e-05, "loss": 0.3859, "step": 41584 }, { "epoch": 2.3286482248852054, "grad_norm": 1.1433289051055908, "learning_rate": 9.432473684210526e-05, "loss": 0.3157, "step": 41585 }, { "epoch": 2.3287042221973344, "grad_norm": 1.4299864768981934, "learning_rate": 9.432447368421054e-05, "loss": 0.4653, "step": 41586 }, { "epoch": 2.3287602195094634, "grad_norm": 3.0687637329101562, "learning_rate": 9.43242105263158e-05, "loss": 0.3564, "step": 41587 }, { "epoch": 2.3288162168215925, "grad_norm": 1.122779130935669, "learning_rate": 9.432394736842107e-05, "loss": 0.409, "step": 41588 }, { "epoch": 2.3288722141337215, "grad_norm": 2.0268421173095703, "learning_rate": 9.432368421052631e-05, "loss": 0.4409, "step": 41589 }, { "epoch": 2.3289282114458505, "grad_norm": 0.9438332915306091, "learning_rate": 9.432342105263159e-05, "loss": 0.2831, "step": 41590 }, { "epoch": 2.3289842087579795, "grad_norm": 1.3705345392227173, "learning_rate": 9.432315789473685e-05, "loss": 0.4815, "step": 41591 }, { "epoch": 2.3290402060701085, "grad_norm": 1.087262511253357, "learning_rate": 9.432289473684211e-05, "loss": 0.3322, "step": 41592 }, { "epoch": 2.3290962033822376, "grad_norm": 1.2847710847854614, "learning_rate": 9.432263157894737e-05, "loss": 0.3894, "step": 41593 }, { "epoch": 2.3291522006943666, "grad_norm": 1.3494991064071655, "learning_rate": 9.432236842105263e-05, "loss": 0.3591, "step": 41594 }, { "epoch": 2.3292081980064956, "grad_norm": 1.268125295639038, "learning_rate": 9.43221052631579e-05, "loss": 0.359, "step": 41595 }, { "epoch": 2.3292641953186246, "grad_norm": 1.2688404321670532, "learning_rate": 9.432184210526316e-05, "loss": 0.4193, "step": 41596 }, { "epoch": 2.3293201926307536, "grad_norm": 1.0668623447418213, "learning_rate": 9.432157894736843e-05, "loss": 0.3658, "step": 41597 }, { "epoch": 2.3293761899428826, "grad_norm": 1.1841307878494263, "learning_rate": 9.432131578947368e-05, "loss": 0.3154, "step": 41598 }, { "epoch": 2.3294321872550117, "grad_norm": 1.1780558824539185, "learning_rate": 9.432105263157895e-05, "loss": 0.4356, "step": 41599 }, { "epoch": 2.3294881845671407, "grad_norm": 2.430851936340332, "learning_rate": 9.432078947368421e-05, "loss": 0.5697, "step": 41600 }, { "epoch": 2.3295441818792697, "grad_norm": 1.1251972913742065, "learning_rate": 9.432052631578949e-05, "loss": 0.3537, "step": 41601 }, { "epoch": 2.3296001791913987, "grad_norm": 1.195039987564087, "learning_rate": 9.432026315789475e-05, "loss": 0.3496, "step": 41602 }, { "epoch": 2.3296561765035277, "grad_norm": 0.9922090172767639, "learning_rate": 9.432e-05, "loss": 0.3585, "step": 41603 }, { "epoch": 2.3297121738156568, "grad_norm": 1.1157501935958862, "learning_rate": 9.431973684210526e-05, "loss": 0.4565, "step": 41604 }, { "epoch": 2.329768171127786, "grad_norm": 1.3803671598434448, "learning_rate": 9.431947368421054e-05, "loss": 0.4088, "step": 41605 }, { "epoch": 2.329824168439915, "grad_norm": 1.2145215272903442, "learning_rate": 9.43192105263158e-05, "loss": 0.4503, "step": 41606 }, { "epoch": 2.329880165752044, "grad_norm": 1.2467933893203735, "learning_rate": 9.431894736842106e-05, "loss": 0.4943, "step": 41607 }, { "epoch": 2.329936163064173, "grad_norm": 1.352020502090454, "learning_rate": 9.431868421052632e-05, "loss": 0.4142, "step": 41608 }, { "epoch": 2.329992160376302, "grad_norm": 1.3673547506332397, "learning_rate": 9.431842105263158e-05, "loss": 0.5611, "step": 41609 }, { "epoch": 2.330048157688431, "grad_norm": 1.2323366403579712, "learning_rate": 9.431815789473685e-05, "loss": 0.698, "step": 41610 }, { "epoch": 2.33010415500056, "grad_norm": 1.1359715461730957, "learning_rate": 9.431789473684211e-05, "loss": 0.3438, "step": 41611 }, { "epoch": 2.330160152312689, "grad_norm": 1.3788206577301025, "learning_rate": 9.431763157894737e-05, "loss": 0.4516, "step": 41612 }, { "epoch": 2.330216149624818, "grad_norm": 1.0749812126159668, "learning_rate": 9.431736842105263e-05, "loss": 0.3207, "step": 41613 }, { "epoch": 2.330272146936947, "grad_norm": 1.2539621591567993, "learning_rate": 9.43171052631579e-05, "loss": 0.3878, "step": 41614 }, { "epoch": 2.330328144249076, "grad_norm": 1.1313960552215576, "learning_rate": 9.431684210526316e-05, "loss": 0.3392, "step": 41615 }, { "epoch": 2.330384141561205, "grad_norm": 1.1177096366882324, "learning_rate": 9.431657894736842e-05, "loss": 0.3661, "step": 41616 }, { "epoch": 2.330440138873334, "grad_norm": 1.251929521560669, "learning_rate": 9.431631578947368e-05, "loss": 0.3735, "step": 41617 }, { "epoch": 2.330496136185463, "grad_norm": 1.1779056787490845, "learning_rate": 9.431605263157896e-05, "loss": 0.4895, "step": 41618 }, { "epoch": 2.330552133497592, "grad_norm": 1.118046760559082, "learning_rate": 9.431578947368421e-05, "loss": 0.437, "step": 41619 }, { "epoch": 2.330608130809721, "grad_norm": 1.448420524597168, "learning_rate": 9.431552631578949e-05, "loss": 0.5376, "step": 41620 }, { "epoch": 2.33066412812185, "grad_norm": 1.3400373458862305, "learning_rate": 9.431526315789473e-05, "loss": 0.3729, "step": 41621 }, { "epoch": 2.330720125433979, "grad_norm": 1.1248011589050293, "learning_rate": 9.431500000000001e-05, "loss": 0.3674, "step": 41622 }, { "epoch": 2.330776122746108, "grad_norm": 1.1546565294265747, "learning_rate": 9.431473684210527e-05, "loss": 0.4526, "step": 41623 }, { "epoch": 2.330832120058237, "grad_norm": 1.0657941102981567, "learning_rate": 9.431447368421054e-05, "loss": 0.3928, "step": 41624 }, { "epoch": 2.330888117370366, "grad_norm": 1.1658271551132202, "learning_rate": 9.43142105263158e-05, "loss": 0.4677, "step": 41625 }, { "epoch": 2.330944114682495, "grad_norm": 1.356210708618164, "learning_rate": 9.431394736842105e-05, "loss": 0.4965, "step": 41626 }, { "epoch": 2.331000111994624, "grad_norm": 1.2530122995376587, "learning_rate": 9.431368421052632e-05, "loss": 0.4313, "step": 41627 }, { "epoch": 2.331056109306753, "grad_norm": 0.990557849407196, "learning_rate": 9.431342105263158e-05, "loss": 0.2755, "step": 41628 }, { "epoch": 2.3311121066188822, "grad_norm": 1.1237517595291138, "learning_rate": 9.431315789473685e-05, "loss": 0.3367, "step": 41629 }, { "epoch": 2.3311681039310113, "grad_norm": 1.4609808921813965, "learning_rate": 9.43128947368421e-05, "loss": 0.4101, "step": 41630 }, { "epoch": 2.3312241012431403, "grad_norm": 1.1659746170043945, "learning_rate": 9.431263157894737e-05, "loss": 0.4217, "step": 41631 }, { "epoch": 2.3312800985552693, "grad_norm": 0.9995316863059998, "learning_rate": 9.431236842105263e-05, "loss": 0.3789, "step": 41632 }, { "epoch": 2.3313360958673983, "grad_norm": 1.2200870513916016, "learning_rate": 9.43121052631579e-05, "loss": 0.3935, "step": 41633 }, { "epoch": 2.3313920931795273, "grad_norm": 1.1122318506240845, "learning_rate": 9.431184210526316e-05, "loss": 0.4057, "step": 41634 }, { "epoch": 2.3314480904916564, "grad_norm": 1.36678147315979, "learning_rate": 9.431157894736842e-05, "loss": 0.3027, "step": 41635 }, { "epoch": 2.3315040878037854, "grad_norm": 1.1468138694763184, "learning_rate": 9.431131578947368e-05, "loss": 0.3935, "step": 41636 }, { "epoch": 2.3315600851159144, "grad_norm": 1.0361790657043457, "learning_rate": 9.431105263157896e-05, "loss": 0.318, "step": 41637 }, { "epoch": 2.3316160824280434, "grad_norm": 1.0989990234375, "learning_rate": 9.431078947368422e-05, "loss": 0.3625, "step": 41638 }, { "epoch": 2.3316720797401724, "grad_norm": 1.1724388599395752, "learning_rate": 9.431052631578948e-05, "loss": 0.3974, "step": 41639 }, { "epoch": 2.3317280770523015, "grad_norm": 1.3609899282455444, "learning_rate": 9.431026315789474e-05, "loss": 0.4279, "step": 41640 }, { "epoch": 2.3317840743644305, "grad_norm": 1.1316336393356323, "learning_rate": 9.431000000000001e-05, "loss": 0.3428, "step": 41641 }, { "epoch": 2.3318400716765595, "grad_norm": 1.0280351638793945, "learning_rate": 9.430973684210527e-05, "loss": 0.3498, "step": 41642 }, { "epoch": 2.3318960689886885, "grad_norm": 1.697363257408142, "learning_rate": 9.430947368421053e-05, "loss": 0.3986, "step": 41643 }, { "epoch": 2.3319520663008175, "grad_norm": 1.0848455429077148, "learning_rate": 9.430921052631579e-05, "loss": 0.4206, "step": 41644 }, { "epoch": 2.3320080636129465, "grad_norm": 1.131593942642212, "learning_rate": 9.430894736842105e-05, "loss": 0.3853, "step": 41645 }, { "epoch": 2.3320640609250756, "grad_norm": 1.2424079179763794, "learning_rate": 9.430868421052632e-05, "loss": 0.5361, "step": 41646 }, { "epoch": 2.3321200582372046, "grad_norm": 1.182169795036316, "learning_rate": 9.430842105263158e-05, "loss": 0.5731, "step": 41647 }, { "epoch": 2.3321760555493336, "grad_norm": 1.1431505680084229, "learning_rate": 9.430815789473684e-05, "loss": 0.3826, "step": 41648 }, { "epoch": 2.3322320528614626, "grad_norm": 1.4568727016448975, "learning_rate": 9.43078947368421e-05, "loss": 0.4087, "step": 41649 }, { "epoch": 2.3322880501735916, "grad_norm": 1.3378961086273193, "learning_rate": 9.430763157894737e-05, "loss": 0.6345, "step": 41650 }, { "epoch": 2.3323440474857207, "grad_norm": 1.6190663576126099, "learning_rate": 9.430736842105263e-05, "loss": 0.4164, "step": 41651 }, { "epoch": 2.3324000447978497, "grad_norm": 1.3478869199752808, "learning_rate": 9.430710526315791e-05, "loss": 0.4849, "step": 41652 }, { "epoch": 2.3324560421099787, "grad_norm": 1.2431246042251587, "learning_rate": 9.430684210526315e-05, "loss": 0.4154, "step": 41653 }, { "epoch": 2.3325120394221077, "grad_norm": 1.2233268022537231, "learning_rate": 9.430657894736843e-05, "loss": 0.3562, "step": 41654 }, { "epoch": 2.3325680367342367, "grad_norm": 0.9987894296646118, "learning_rate": 9.430631578947369e-05, "loss": 0.338, "step": 41655 }, { "epoch": 2.3326240340463658, "grad_norm": 1.0406979322433472, "learning_rate": 9.430605263157896e-05, "loss": 0.36, "step": 41656 }, { "epoch": 2.3326800313584948, "grad_norm": 1.4256278276443481, "learning_rate": 9.430578947368422e-05, "loss": 0.4429, "step": 41657 }, { "epoch": 2.332736028670624, "grad_norm": 1.1722667217254639, "learning_rate": 9.430552631578948e-05, "loss": 0.3298, "step": 41658 }, { "epoch": 2.332792025982753, "grad_norm": 1.0652486085891724, "learning_rate": 9.430526315789474e-05, "loss": 0.3419, "step": 41659 }, { "epoch": 2.332848023294882, "grad_norm": 1.029771089553833, "learning_rate": 9.430500000000001e-05, "loss": 0.2534, "step": 41660 }, { "epoch": 2.332904020607011, "grad_norm": 1.1265232563018799, "learning_rate": 9.430473684210527e-05, "loss": 0.3277, "step": 41661 }, { "epoch": 2.33296001791914, "grad_norm": 1.3172229528427124, "learning_rate": 9.430447368421053e-05, "loss": 0.3412, "step": 41662 }, { "epoch": 2.333016015231269, "grad_norm": 1.2827367782592773, "learning_rate": 9.430421052631579e-05, "loss": 0.3825, "step": 41663 }, { "epoch": 2.333072012543398, "grad_norm": 1.4889280796051025, "learning_rate": 9.430394736842105e-05, "loss": 0.5843, "step": 41664 }, { "epoch": 2.333128009855527, "grad_norm": 1.1209124326705933, "learning_rate": 9.430368421052632e-05, "loss": 0.383, "step": 41665 }, { "epoch": 2.333184007167656, "grad_norm": 1.3554283380508423, "learning_rate": 9.430342105263158e-05, "loss": 0.4178, "step": 41666 }, { "epoch": 2.333240004479785, "grad_norm": 1.1092591285705566, "learning_rate": 9.430315789473684e-05, "loss": 0.3603, "step": 41667 }, { "epoch": 2.333296001791914, "grad_norm": 1.1824424266815186, "learning_rate": 9.43028947368421e-05, "loss": 0.4009, "step": 41668 }, { "epoch": 2.333351999104043, "grad_norm": 1.3623435497283936, "learning_rate": 9.430263157894738e-05, "loss": 0.3503, "step": 41669 }, { "epoch": 2.333407996416172, "grad_norm": 1.3782472610473633, "learning_rate": 9.430236842105264e-05, "loss": 0.4621, "step": 41670 }, { "epoch": 2.333463993728301, "grad_norm": 1.1193665266036987, "learning_rate": 9.43021052631579e-05, "loss": 0.4595, "step": 41671 }, { "epoch": 2.33351999104043, "grad_norm": 1.1852681636810303, "learning_rate": 9.430184210526316e-05, "loss": 0.3594, "step": 41672 }, { "epoch": 2.333575988352559, "grad_norm": 1.169629454612732, "learning_rate": 9.430157894736843e-05, "loss": 0.3724, "step": 41673 }, { "epoch": 2.333631985664688, "grad_norm": 0.9822991490364075, "learning_rate": 9.430131578947369e-05, "loss": 0.4527, "step": 41674 }, { "epoch": 2.333687982976817, "grad_norm": 1.3232277631759644, "learning_rate": 9.430105263157896e-05, "loss": 0.4737, "step": 41675 }, { "epoch": 2.333743980288946, "grad_norm": 1.0209532976150513, "learning_rate": 9.430078947368421e-05, "loss": 0.3734, "step": 41676 }, { "epoch": 2.333799977601075, "grad_norm": 1.3224495649337769, "learning_rate": 9.430052631578948e-05, "loss": 0.5143, "step": 41677 }, { "epoch": 2.333855974913204, "grad_norm": 1.3637382984161377, "learning_rate": 9.430026315789474e-05, "loss": 0.5713, "step": 41678 }, { "epoch": 2.333911972225333, "grad_norm": 1.3716785907745361, "learning_rate": 9.43e-05, "loss": 0.4359, "step": 41679 }, { "epoch": 2.333967969537462, "grad_norm": 1.008894920349121, "learning_rate": 9.429973684210528e-05, "loss": 0.3966, "step": 41680 }, { "epoch": 2.3340239668495912, "grad_norm": 1.3727296590805054, "learning_rate": 9.429947368421052e-05, "loss": 0.498, "step": 41681 }, { "epoch": 2.3340799641617203, "grad_norm": 1.355534315109253, "learning_rate": 9.42992105263158e-05, "loss": 0.3918, "step": 41682 }, { "epoch": 2.3341359614738493, "grad_norm": 1.2827754020690918, "learning_rate": 9.429894736842105e-05, "loss": 0.3398, "step": 41683 }, { "epoch": 2.3341919587859783, "grad_norm": 1.1970857381820679, "learning_rate": 9.429868421052633e-05, "loss": 0.4814, "step": 41684 }, { "epoch": 2.3342479560981073, "grad_norm": 1.4424879550933838, "learning_rate": 9.429842105263157e-05, "loss": 0.3725, "step": 41685 }, { "epoch": 2.3343039534102363, "grad_norm": 1.116942048072815, "learning_rate": 9.429815789473685e-05, "loss": 0.4007, "step": 41686 }, { "epoch": 2.3343599507223654, "grad_norm": 1.3346049785614014, "learning_rate": 9.42978947368421e-05, "loss": 0.6027, "step": 41687 }, { "epoch": 2.3344159480344944, "grad_norm": 0.9552223086357117, "learning_rate": 9.429763157894738e-05, "loss": 0.3323, "step": 41688 }, { "epoch": 2.3344719453466234, "grad_norm": 1.2311464548110962, "learning_rate": 9.429736842105264e-05, "loss": 0.3994, "step": 41689 }, { "epoch": 2.3345279426587524, "grad_norm": 1.2025054693222046, "learning_rate": 9.42971052631579e-05, "loss": 0.469, "step": 41690 }, { "epoch": 2.3345839399708814, "grad_norm": 1.4589117765426636, "learning_rate": 9.429684210526316e-05, "loss": 0.4501, "step": 41691 }, { "epoch": 2.3346399372830104, "grad_norm": 1.066486120223999, "learning_rate": 9.429657894736843e-05, "loss": 0.299, "step": 41692 }, { "epoch": 2.3346959345951395, "grad_norm": 1.087945580482483, "learning_rate": 9.429631578947369e-05, "loss": 0.4147, "step": 41693 }, { "epoch": 2.3347519319072685, "grad_norm": 1.1757198572158813, "learning_rate": 9.429605263157895e-05, "loss": 0.3714, "step": 41694 }, { "epoch": 2.3348079292193975, "grad_norm": 1.5330173969268799, "learning_rate": 9.429578947368421e-05, "loss": 0.5794, "step": 41695 }, { "epoch": 2.3348639265315265, "grad_norm": 0.9312339425086975, "learning_rate": 9.429552631578947e-05, "loss": 0.3141, "step": 41696 }, { "epoch": 2.3349199238436555, "grad_norm": 1.1804759502410889, "learning_rate": 9.429526315789474e-05, "loss": 0.408, "step": 41697 }, { "epoch": 2.3349759211557846, "grad_norm": 1.4757715463638306, "learning_rate": 9.4295e-05, "loss": 0.4644, "step": 41698 }, { "epoch": 2.3350319184679136, "grad_norm": 1.1422994136810303, "learning_rate": 9.429473684210526e-05, "loss": 0.4288, "step": 41699 }, { "epoch": 2.3350879157800426, "grad_norm": 1.0504841804504395, "learning_rate": 9.429447368421052e-05, "loss": 0.3136, "step": 41700 }, { "epoch": 2.3351439130921716, "grad_norm": 1.280503749847412, "learning_rate": 9.42942105263158e-05, "loss": 0.3732, "step": 41701 }, { "epoch": 2.3351999104043006, "grad_norm": 1.050684928894043, "learning_rate": 9.429394736842106e-05, "loss": 0.433, "step": 41702 }, { "epoch": 2.3352559077164297, "grad_norm": 1.3107224702835083, "learning_rate": 9.429368421052632e-05, "loss": 0.4073, "step": 41703 }, { "epoch": 2.3353119050285587, "grad_norm": 1.3444058895111084, "learning_rate": 9.429342105263158e-05, "loss": 0.4727, "step": 41704 }, { "epoch": 2.3353679023406877, "grad_norm": 1.0133336782455444, "learning_rate": 9.429315789473685e-05, "loss": 0.3862, "step": 41705 }, { "epoch": 2.3354238996528167, "grad_norm": 1.2451754808425903, "learning_rate": 9.429289473684211e-05, "loss": 0.5589, "step": 41706 }, { "epoch": 2.3354798969649457, "grad_norm": 1.3266135454177856, "learning_rate": 9.429263157894738e-05, "loss": 0.4292, "step": 41707 }, { "epoch": 2.3355358942770748, "grad_norm": 1.4028217792510986, "learning_rate": 9.429236842105263e-05, "loss": 0.3688, "step": 41708 }, { "epoch": 2.3355918915892038, "grad_norm": 1.3867204189300537, "learning_rate": 9.42921052631579e-05, "loss": 0.3798, "step": 41709 }, { "epoch": 2.335647888901333, "grad_norm": 0.9747878313064575, "learning_rate": 9.429184210526316e-05, "loss": 0.3073, "step": 41710 }, { "epoch": 2.335703886213462, "grad_norm": 8.884810447692871, "learning_rate": 9.429157894736844e-05, "loss": 0.3939, "step": 41711 }, { "epoch": 2.335759883525591, "grad_norm": 1.3453997373580933, "learning_rate": 9.42913157894737e-05, "loss": 0.479, "step": 41712 }, { "epoch": 2.33581588083772, "grad_norm": 1.3915377855300903, "learning_rate": 9.429105263157894e-05, "loss": 0.4703, "step": 41713 }, { "epoch": 2.335871878149849, "grad_norm": 1.4205951690673828, "learning_rate": 9.429078947368421e-05, "loss": 0.416, "step": 41714 }, { "epoch": 2.335927875461978, "grad_norm": 1.204131007194519, "learning_rate": 9.429052631578947e-05, "loss": 0.4131, "step": 41715 }, { "epoch": 2.335983872774107, "grad_norm": 1.440616250038147, "learning_rate": 9.429026315789475e-05, "loss": 0.3045, "step": 41716 }, { "epoch": 2.336039870086236, "grad_norm": 1.2486579418182373, "learning_rate": 9.429000000000001e-05, "loss": 0.3971, "step": 41717 }, { "epoch": 2.336095867398365, "grad_norm": 1.1286476850509644, "learning_rate": 9.428973684210527e-05, "loss": 0.4534, "step": 41718 }, { "epoch": 2.336151864710494, "grad_norm": 1.3379883766174316, "learning_rate": 9.428947368421053e-05, "loss": 0.3618, "step": 41719 }, { "epoch": 2.336207862022623, "grad_norm": 1.2174293994903564, "learning_rate": 9.42892105263158e-05, "loss": 0.3415, "step": 41720 }, { "epoch": 2.336263859334752, "grad_norm": 1.2182146310806274, "learning_rate": 9.428894736842106e-05, "loss": 0.426, "step": 41721 }, { "epoch": 2.336319856646881, "grad_norm": 1.4156049489974976, "learning_rate": 9.428868421052632e-05, "loss": 0.416, "step": 41722 }, { "epoch": 2.33637585395901, "grad_norm": 1.4497874975204468, "learning_rate": 9.428842105263158e-05, "loss": 0.5433, "step": 41723 }, { "epoch": 2.336431851271139, "grad_norm": 1.6712723970413208, "learning_rate": 9.428815789473685e-05, "loss": 0.3794, "step": 41724 }, { "epoch": 2.336487848583268, "grad_norm": 1.330540657043457, "learning_rate": 9.428789473684211e-05, "loss": 0.353, "step": 41725 }, { "epoch": 2.336543845895397, "grad_norm": 1.1170587539672852, "learning_rate": 9.428763157894737e-05, "loss": 0.3398, "step": 41726 }, { "epoch": 2.336599843207526, "grad_norm": 1.41217839717865, "learning_rate": 9.428736842105263e-05, "loss": 0.4369, "step": 41727 }, { "epoch": 2.336655840519655, "grad_norm": 9.436219215393066, "learning_rate": 9.42871052631579e-05, "loss": 0.4083, "step": 41728 }, { "epoch": 2.336711837831784, "grad_norm": 1.4224368333816528, "learning_rate": 9.428684210526316e-05, "loss": 0.5299, "step": 41729 }, { "epoch": 2.336767835143913, "grad_norm": 1.157005786895752, "learning_rate": 9.428657894736842e-05, "loss": 0.3881, "step": 41730 }, { "epoch": 2.336823832456042, "grad_norm": 1.6119577884674072, "learning_rate": 9.428631578947368e-05, "loss": 0.4736, "step": 41731 }, { "epoch": 2.336879829768171, "grad_norm": 1.1501855850219727, "learning_rate": 9.428605263157894e-05, "loss": 0.3721, "step": 41732 }, { "epoch": 2.3369358270803002, "grad_norm": 1.53270423412323, "learning_rate": 9.428578947368422e-05, "loss": 0.4571, "step": 41733 }, { "epoch": 2.3369918243924293, "grad_norm": 1.7130926847457886, "learning_rate": 9.428552631578948e-05, "loss": 0.2826, "step": 41734 }, { "epoch": 2.3370478217045583, "grad_norm": 1.2406684160232544, "learning_rate": 9.428526315789475e-05, "loss": 0.5091, "step": 41735 }, { "epoch": 2.3371038190166873, "grad_norm": 1.0346183776855469, "learning_rate": 9.4285e-05, "loss": 0.2716, "step": 41736 }, { "epoch": 2.3371598163288163, "grad_norm": 1.2530306577682495, "learning_rate": 9.428473684210527e-05, "loss": 0.3613, "step": 41737 }, { "epoch": 2.3372158136409453, "grad_norm": 1.282768964767456, "learning_rate": 9.428447368421053e-05, "loss": 0.417, "step": 41738 }, { "epoch": 2.3372718109530743, "grad_norm": 1.467400074005127, "learning_rate": 9.42842105263158e-05, "loss": 0.4691, "step": 41739 }, { "epoch": 2.3373278082652034, "grad_norm": 1.1563502550125122, "learning_rate": 9.428394736842105e-05, "loss": 0.3668, "step": 41740 }, { "epoch": 2.3373838055773324, "grad_norm": 1.3010636568069458, "learning_rate": 9.428368421052632e-05, "loss": 0.4101, "step": 41741 }, { "epoch": 2.3374398028894614, "grad_norm": 1.5787529945373535, "learning_rate": 9.428342105263158e-05, "loss": 0.4249, "step": 41742 }, { "epoch": 2.3374958002015904, "grad_norm": 1.2126593589782715, "learning_rate": 9.428315789473685e-05, "loss": 0.3829, "step": 41743 }, { "epoch": 2.3375517975137194, "grad_norm": 1.2742772102355957, "learning_rate": 9.428289473684211e-05, "loss": 0.3764, "step": 41744 }, { "epoch": 2.3376077948258485, "grad_norm": 1.1307629346847534, "learning_rate": 9.428263157894737e-05, "loss": 0.3727, "step": 41745 }, { "epoch": 2.3376637921379775, "grad_norm": 1.0303871631622314, "learning_rate": 9.428236842105263e-05, "loss": 0.275, "step": 41746 }, { "epoch": 2.3377197894501065, "grad_norm": 1.8317618370056152, "learning_rate": 9.42821052631579e-05, "loss": 0.465, "step": 41747 }, { "epoch": 2.3377757867622355, "grad_norm": 1.093740701675415, "learning_rate": 9.428184210526317e-05, "loss": 0.3903, "step": 41748 }, { "epoch": 2.3378317840743645, "grad_norm": 1.3149365186691284, "learning_rate": 9.428157894736843e-05, "loss": 0.3396, "step": 41749 }, { "epoch": 2.3378877813864936, "grad_norm": 1.2754952907562256, "learning_rate": 9.428131578947369e-05, "loss": 0.3512, "step": 41750 }, { "epoch": 2.3379437786986226, "grad_norm": 1.2109102010726929, "learning_rate": 9.428105263157895e-05, "loss": 0.3263, "step": 41751 }, { "epoch": 2.3379997760107516, "grad_norm": 1.2914378643035889, "learning_rate": 9.428078947368422e-05, "loss": 0.503, "step": 41752 }, { "epoch": 2.3380557733228806, "grad_norm": 1.2562527656555176, "learning_rate": 9.428052631578948e-05, "loss": 0.4047, "step": 41753 }, { "epoch": 2.3381117706350096, "grad_norm": 1.27664315700531, "learning_rate": 9.428026315789474e-05, "loss": 0.5224, "step": 41754 }, { "epoch": 2.3381677679471387, "grad_norm": 1.3295894861221313, "learning_rate": 9.428e-05, "loss": 0.3922, "step": 41755 }, { "epoch": 2.3382237652592677, "grad_norm": 1.0919817686080933, "learning_rate": 9.427973684210527e-05, "loss": 0.3577, "step": 41756 }, { "epoch": 2.3382797625713967, "grad_norm": 1.1978970766067505, "learning_rate": 9.427947368421053e-05, "loss": 0.4259, "step": 41757 }, { "epoch": 2.3383357598835257, "grad_norm": 1.1360406875610352, "learning_rate": 9.427921052631579e-05, "loss": 0.3695, "step": 41758 }, { "epoch": 2.3383917571956547, "grad_norm": 1.6929552555084229, "learning_rate": 9.427894736842105e-05, "loss": 0.3602, "step": 41759 }, { "epoch": 2.3384477545077837, "grad_norm": 1.345205545425415, "learning_rate": 9.427868421052632e-05, "loss": 0.361, "step": 41760 }, { "epoch": 2.3385037518199128, "grad_norm": 1.705392837524414, "learning_rate": 9.427842105263158e-05, "loss": 0.5616, "step": 41761 }, { "epoch": 2.338559749132042, "grad_norm": 1.2249919176101685, "learning_rate": 9.427815789473686e-05, "loss": 0.3584, "step": 41762 }, { "epoch": 2.338615746444171, "grad_norm": 1.1815836429595947, "learning_rate": 9.42778947368421e-05, "loss": 0.337, "step": 41763 }, { "epoch": 2.3386717437563, "grad_norm": 1.1074751615524292, "learning_rate": 9.427763157894736e-05, "loss": 0.4065, "step": 41764 }, { "epoch": 2.338727741068429, "grad_norm": 1.1780734062194824, "learning_rate": 9.427736842105264e-05, "loss": 0.3084, "step": 41765 }, { "epoch": 2.338783738380558, "grad_norm": 1.3323748111724854, "learning_rate": 9.42771052631579e-05, "loss": 0.4491, "step": 41766 }, { "epoch": 2.338839735692687, "grad_norm": 1.1028201580047607, "learning_rate": 9.427684210526317e-05, "loss": 0.3188, "step": 41767 }, { "epoch": 2.338895733004816, "grad_norm": 1.160706877708435, "learning_rate": 9.427657894736842e-05, "loss": 0.3737, "step": 41768 }, { "epoch": 2.338951730316945, "grad_norm": 1.1706403493881226, "learning_rate": 9.427631578947369e-05, "loss": 0.5354, "step": 41769 }, { "epoch": 2.339007727629074, "grad_norm": 1.269150733947754, "learning_rate": 9.427605263157895e-05, "loss": 0.3022, "step": 41770 }, { "epoch": 2.339063724941203, "grad_norm": 1.3719485998153687, "learning_rate": 9.427578947368422e-05, "loss": 0.4369, "step": 41771 }, { "epoch": 2.339119722253332, "grad_norm": 1.270066738128662, "learning_rate": 9.427552631578948e-05, "loss": 0.3907, "step": 41772 }, { "epoch": 2.339175719565461, "grad_norm": 1.4558935165405273, "learning_rate": 9.427526315789474e-05, "loss": 0.5376, "step": 41773 }, { "epoch": 2.33923171687759, "grad_norm": 1.1652445793151855, "learning_rate": 9.4275e-05, "loss": 0.4387, "step": 41774 }, { "epoch": 2.339287714189719, "grad_norm": 1.228729248046875, "learning_rate": 9.427473684210527e-05, "loss": 0.4562, "step": 41775 }, { "epoch": 2.339343711501848, "grad_norm": 1.0535378456115723, "learning_rate": 9.427447368421053e-05, "loss": 0.5205, "step": 41776 }, { "epoch": 2.339399708813977, "grad_norm": 1.1124930381774902, "learning_rate": 9.42742105263158e-05, "loss": 0.3259, "step": 41777 }, { "epoch": 2.339455706126106, "grad_norm": 1.1334924697875977, "learning_rate": 9.427394736842105e-05, "loss": 0.3837, "step": 41778 }, { "epoch": 2.339511703438235, "grad_norm": 1.1416373252868652, "learning_rate": 9.427368421052633e-05, "loss": 0.3076, "step": 41779 }, { "epoch": 2.339567700750364, "grad_norm": 1.187648892402649, "learning_rate": 9.427342105263159e-05, "loss": 0.4436, "step": 41780 }, { "epoch": 2.339623698062493, "grad_norm": 0.938332736492157, "learning_rate": 9.427315789473685e-05, "loss": 0.2651, "step": 41781 }, { "epoch": 2.339679695374622, "grad_norm": 1.1859545707702637, "learning_rate": 9.42728947368421e-05, "loss": 0.4166, "step": 41782 }, { "epoch": 2.339735692686751, "grad_norm": 1.0829342603683472, "learning_rate": 9.427263157894737e-05, "loss": 0.3966, "step": 41783 }, { "epoch": 2.33979168999888, "grad_norm": 1.1341698169708252, "learning_rate": 9.427236842105264e-05, "loss": 0.3065, "step": 41784 }, { "epoch": 2.3398476873110092, "grad_norm": 1.224109411239624, "learning_rate": 9.42721052631579e-05, "loss": 0.4558, "step": 41785 }, { "epoch": 2.3399036846231382, "grad_norm": 1.0407896041870117, "learning_rate": 9.427184210526316e-05, "loss": 0.4261, "step": 41786 }, { "epoch": 2.3399596819352673, "grad_norm": 1.2565711736679077, "learning_rate": 9.427157894736842e-05, "loss": 0.4373, "step": 41787 }, { "epoch": 2.3400156792473963, "grad_norm": 1.186922311782837, "learning_rate": 9.427131578947369e-05, "loss": 0.3888, "step": 41788 }, { "epoch": 2.3400716765595253, "grad_norm": 1.1499872207641602, "learning_rate": 9.427105263157895e-05, "loss": 0.3495, "step": 41789 }, { "epoch": 2.3401276738716543, "grad_norm": 1.2514384984970093, "learning_rate": 9.427078947368422e-05, "loss": 0.4684, "step": 41790 }, { "epoch": 2.3401836711837833, "grad_norm": 1.0329967737197876, "learning_rate": 9.427052631578947e-05, "loss": 0.3001, "step": 41791 }, { "epoch": 2.3402396684959124, "grad_norm": 1.080004334449768, "learning_rate": 9.427026315789474e-05, "loss": 0.297, "step": 41792 }, { "epoch": 2.3402956658080414, "grad_norm": 1.3767447471618652, "learning_rate": 9.427e-05, "loss": 0.5396, "step": 41793 }, { "epoch": 2.3403516631201704, "grad_norm": 1.1343660354614258, "learning_rate": 9.426973684210528e-05, "loss": 0.3601, "step": 41794 }, { "epoch": 2.3404076604322994, "grad_norm": 1.0282236337661743, "learning_rate": 9.426947368421052e-05, "loss": 0.2492, "step": 41795 }, { "epoch": 2.3404636577444284, "grad_norm": 1.2647206783294678, "learning_rate": 9.42692105263158e-05, "loss": 0.3798, "step": 41796 }, { "epoch": 2.3405196550565575, "grad_norm": 1.4174648523330688, "learning_rate": 9.426894736842106e-05, "loss": 0.3754, "step": 41797 }, { "epoch": 2.3405756523686865, "grad_norm": 1.20793879032135, "learning_rate": 9.426868421052633e-05, "loss": 0.3814, "step": 41798 }, { "epoch": 2.3406316496808155, "grad_norm": 1.113721489906311, "learning_rate": 9.426842105263159e-05, "loss": 0.3805, "step": 41799 }, { "epoch": 2.3406876469929445, "grad_norm": 1.3276747465133667, "learning_rate": 9.426815789473684e-05, "loss": 0.4756, "step": 41800 }, { "epoch": 2.3407436443050735, "grad_norm": 1.2933099269866943, "learning_rate": 9.426789473684211e-05, "loss": 0.6164, "step": 41801 }, { "epoch": 2.3407996416172026, "grad_norm": 1.3743950128555298, "learning_rate": 9.426763157894737e-05, "loss": 0.3269, "step": 41802 }, { "epoch": 2.3408556389293316, "grad_norm": 1.3877071142196655, "learning_rate": 9.426736842105264e-05, "loss": 0.4525, "step": 41803 }, { "epoch": 2.3409116362414606, "grad_norm": 0.9714048504829407, "learning_rate": 9.42671052631579e-05, "loss": 0.312, "step": 41804 }, { "epoch": 2.3409676335535896, "grad_norm": 1.0771499872207642, "learning_rate": 9.426684210526316e-05, "loss": 0.3177, "step": 41805 }, { "epoch": 2.3410236308657186, "grad_norm": 1.1779916286468506, "learning_rate": 9.426657894736842e-05, "loss": 0.3441, "step": 41806 }, { "epoch": 2.3410796281778476, "grad_norm": 1.23214852809906, "learning_rate": 9.42663157894737e-05, "loss": 0.6082, "step": 41807 }, { "epoch": 2.3411356254899767, "grad_norm": 1.1305887699127197, "learning_rate": 9.426605263157895e-05, "loss": 0.3718, "step": 41808 }, { "epoch": 2.3411916228021057, "grad_norm": 0.9967213869094849, "learning_rate": 9.426578947368421e-05, "loss": 0.3536, "step": 41809 }, { "epoch": 2.3412476201142347, "grad_norm": 1.8401435613632202, "learning_rate": 9.426552631578947e-05, "loss": 0.5358, "step": 41810 }, { "epoch": 2.3413036174263637, "grad_norm": 1.336448311805725, "learning_rate": 9.426526315789475e-05, "loss": 0.3524, "step": 41811 }, { "epoch": 2.3413596147384927, "grad_norm": 1.1528698205947876, "learning_rate": 9.4265e-05, "loss": 0.3462, "step": 41812 }, { "epoch": 2.3414156120506218, "grad_norm": 1.3161871433258057, "learning_rate": 9.426473684210527e-05, "loss": 0.4334, "step": 41813 }, { "epoch": 2.341471609362751, "grad_norm": 1.3091539144515991, "learning_rate": 9.426447368421053e-05, "loss": 0.5229, "step": 41814 }, { "epoch": 2.3415276066748794, "grad_norm": 1.1306521892547607, "learning_rate": 9.42642105263158e-05, "loss": 0.4743, "step": 41815 }, { "epoch": 2.341583603987009, "grad_norm": 1.1712980270385742, "learning_rate": 9.426394736842106e-05, "loss": 0.2625, "step": 41816 }, { "epoch": 2.3416396012991374, "grad_norm": 1.3377574682235718, "learning_rate": 9.426368421052632e-05, "loss": 0.4132, "step": 41817 }, { "epoch": 2.341695598611267, "grad_norm": 1.3885612487792969, "learning_rate": 9.426342105263158e-05, "loss": 0.5286, "step": 41818 }, { "epoch": 2.3417515959233954, "grad_norm": 1.1134899854660034, "learning_rate": 9.426315789473684e-05, "loss": 0.3455, "step": 41819 }, { "epoch": 2.341807593235525, "grad_norm": 1.1279454231262207, "learning_rate": 9.426289473684211e-05, "loss": 0.391, "step": 41820 }, { "epoch": 2.3418635905476535, "grad_norm": 1.1154600381851196, "learning_rate": 9.426263157894737e-05, "loss": 0.396, "step": 41821 }, { "epoch": 2.341919587859783, "grad_norm": 1.1974982023239136, "learning_rate": 9.426236842105264e-05, "loss": 0.4784, "step": 41822 }, { "epoch": 2.3419755851719115, "grad_norm": 1.4913843870162964, "learning_rate": 9.426210526315789e-05, "loss": 0.4432, "step": 41823 }, { "epoch": 2.342031582484041, "grad_norm": 1.224095344543457, "learning_rate": 9.426184210526316e-05, "loss": 0.4479, "step": 41824 }, { "epoch": 2.3420875797961695, "grad_norm": 1.6423996686935425, "learning_rate": 9.426157894736842e-05, "loss": 0.4092, "step": 41825 }, { "epoch": 2.342143577108299, "grad_norm": 1.4122017621994019, "learning_rate": 9.42613157894737e-05, "loss": 0.4754, "step": 41826 }, { "epoch": 2.3421995744204276, "grad_norm": 1.071643590927124, "learning_rate": 9.426105263157896e-05, "loss": 0.3118, "step": 41827 }, { "epoch": 2.342255571732557, "grad_norm": 1.2291921377182007, "learning_rate": 9.426078947368422e-05, "loss": 0.485, "step": 41828 }, { "epoch": 2.3423115690446856, "grad_norm": 1.5928658246994019, "learning_rate": 9.426052631578948e-05, "loss": 0.4086, "step": 41829 }, { "epoch": 2.342367566356815, "grad_norm": 1.258038878440857, "learning_rate": 9.426026315789475e-05, "loss": 0.3436, "step": 41830 }, { "epoch": 2.3424235636689437, "grad_norm": 1.3400644063949585, "learning_rate": 9.426000000000001e-05, "loss": 0.6097, "step": 41831 }, { "epoch": 2.342479560981073, "grad_norm": 1.2180894613265991, "learning_rate": 9.425973684210527e-05, "loss": 0.3986, "step": 41832 }, { "epoch": 2.3425355582932017, "grad_norm": 1.7089297771453857, "learning_rate": 9.425947368421053e-05, "loss": 0.4027, "step": 41833 }, { "epoch": 2.342591555605331, "grad_norm": 1.1832506656646729, "learning_rate": 9.425921052631579e-05, "loss": 0.3882, "step": 41834 }, { "epoch": 2.3426475529174597, "grad_norm": 1.1357338428497314, "learning_rate": 9.425894736842106e-05, "loss": 0.34, "step": 41835 }, { "epoch": 2.342703550229589, "grad_norm": 1.2460598945617676, "learning_rate": 9.425868421052632e-05, "loss": 0.4762, "step": 41836 }, { "epoch": 2.342759547541718, "grad_norm": 1.6425243616104126, "learning_rate": 9.425842105263158e-05, "loss": 0.3345, "step": 41837 }, { "epoch": 2.3428155448538472, "grad_norm": 1.2084277868270874, "learning_rate": 9.425815789473684e-05, "loss": 0.3793, "step": 41838 }, { "epoch": 2.342871542165976, "grad_norm": 1.2540837526321411, "learning_rate": 9.425789473684211e-05, "loss": 0.3491, "step": 41839 }, { "epoch": 2.3429275394781053, "grad_norm": 1.1292132139205933, "learning_rate": 9.425763157894737e-05, "loss": 0.3796, "step": 41840 }, { "epoch": 2.342983536790234, "grad_norm": 3.4217171669006348, "learning_rate": 9.425736842105263e-05, "loss": 0.3949, "step": 41841 }, { "epoch": 2.3430395341023633, "grad_norm": 1.3985133171081543, "learning_rate": 9.425710526315789e-05, "loss": 0.4345, "step": 41842 }, { "epoch": 2.343095531414492, "grad_norm": 1.1672624349594116, "learning_rate": 9.425684210526317e-05, "loss": 0.4105, "step": 41843 }, { "epoch": 2.3431515287266214, "grad_norm": 1.179754376411438, "learning_rate": 9.425657894736843e-05, "loss": 0.3785, "step": 41844 }, { "epoch": 2.34320752603875, "grad_norm": 1.3450326919555664, "learning_rate": 9.42563157894737e-05, "loss": 0.4209, "step": 41845 }, { "epoch": 2.3432635233508794, "grad_norm": 1.2324138879776, "learning_rate": 9.425605263157895e-05, "loss": 0.4119, "step": 41846 }, { "epoch": 2.343319520663008, "grad_norm": 0.9802883863449097, "learning_rate": 9.425578947368422e-05, "loss": 0.295, "step": 41847 }, { "epoch": 2.3433755179751374, "grad_norm": 1.1618484258651733, "learning_rate": 9.425552631578948e-05, "loss": 0.5352, "step": 41848 }, { "epoch": 2.343431515287266, "grad_norm": 1.0275614261627197, "learning_rate": 9.425526315789475e-05, "loss": 0.3691, "step": 41849 }, { "epoch": 2.3434875125993955, "grad_norm": 1.4349318742752075, "learning_rate": 9.4255e-05, "loss": 0.3887, "step": 41850 }, { "epoch": 2.343543509911524, "grad_norm": 1.4337527751922607, "learning_rate": 9.425473684210526e-05, "loss": 0.4679, "step": 41851 }, { "epoch": 2.3435995072236535, "grad_norm": 1.142625331878662, "learning_rate": 9.425447368421053e-05, "loss": 0.4438, "step": 41852 }, { "epoch": 2.343655504535782, "grad_norm": 1.2787381410598755, "learning_rate": 9.425421052631579e-05, "loss": 0.3788, "step": 41853 }, { "epoch": 2.3437115018479115, "grad_norm": 1.2361544370651245, "learning_rate": 9.425394736842106e-05, "loss": 0.4464, "step": 41854 }, { "epoch": 2.34376749916004, "grad_norm": 1.086199164390564, "learning_rate": 9.425368421052631e-05, "loss": 0.2947, "step": 41855 }, { "epoch": 2.343823496472169, "grad_norm": 1.2735209465026855, "learning_rate": 9.425342105263158e-05, "loss": 0.496, "step": 41856 }, { "epoch": 2.343879493784298, "grad_norm": 1.3250583410263062, "learning_rate": 9.425315789473684e-05, "loss": 0.3788, "step": 41857 }, { "epoch": 2.343935491096427, "grad_norm": 1.5316301584243774, "learning_rate": 9.425289473684212e-05, "loss": 0.6071, "step": 41858 }, { "epoch": 2.343991488408556, "grad_norm": 1.1828645467758179, "learning_rate": 9.425263157894738e-05, "loss": 0.4172, "step": 41859 }, { "epoch": 2.344047485720685, "grad_norm": 1.4202466011047363, "learning_rate": 9.425236842105264e-05, "loss": 0.4974, "step": 41860 }, { "epoch": 2.3441034830328142, "grad_norm": 1.1320104598999023, "learning_rate": 9.42521052631579e-05, "loss": 0.3741, "step": 41861 }, { "epoch": 2.3441594803449433, "grad_norm": 1.3960785865783691, "learning_rate": 9.425184210526317e-05, "loss": 0.447, "step": 41862 }, { "epoch": 2.3442154776570723, "grad_norm": 1.4883716106414795, "learning_rate": 9.425157894736843e-05, "loss": 0.4523, "step": 41863 }, { "epoch": 2.3442714749692013, "grad_norm": 1.328608751296997, "learning_rate": 9.425131578947369e-05, "loss": 0.3863, "step": 41864 }, { "epoch": 2.3443274722813303, "grad_norm": 1.705578088760376, "learning_rate": 9.425105263157895e-05, "loss": 0.4822, "step": 41865 }, { "epoch": 2.3443834695934593, "grad_norm": 1.3447991609573364, "learning_rate": 9.425078947368422e-05, "loss": 0.3543, "step": 41866 }, { "epoch": 2.3444394669055884, "grad_norm": 1.1524291038513184, "learning_rate": 9.425052631578948e-05, "loss": 0.4113, "step": 41867 }, { "epoch": 2.3444954642177174, "grad_norm": 1.7251302003860474, "learning_rate": 9.425026315789474e-05, "loss": 0.5049, "step": 41868 }, { "epoch": 2.3445514615298464, "grad_norm": 1.0794435739517212, "learning_rate": 9.425e-05, "loss": 0.421, "step": 41869 }, { "epoch": 2.3446074588419754, "grad_norm": 1.2601521015167236, "learning_rate": 9.424973684210526e-05, "loss": 0.4124, "step": 41870 }, { "epoch": 2.3446634561541044, "grad_norm": 1.3364636898040771, "learning_rate": 9.424947368421053e-05, "loss": 0.4945, "step": 41871 }, { "epoch": 2.3447194534662334, "grad_norm": 1.2101716995239258, "learning_rate": 9.42492105263158e-05, "loss": 0.4145, "step": 41872 }, { "epoch": 2.3447754507783625, "grad_norm": 1.1150449514389038, "learning_rate": 9.424894736842105e-05, "loss": 0.3268, "step": 41873 }, { "epoch": 2.3448314480904915, "grad_norm": 1.2457066774368286, "learning_rate": 9.424868421052631e-05, "loss": 0.471, "step": 41874 }, { "epoch": 2.3448874454026205, "grad_norm": 1.1699256896972656, "learning_rate": 9.424842105263159e-05, "loss": 0.4224, "step": 41875 }, { "epoch": 2.3449434427147495, "grad_norm": 1.1539231538772583, "learning_rate": 9.424815789473685e-05, "loss": 0.391, "step": 41876 }, { "epoch": 2.3449994400268785, "grad_norm": 1.3850818872451782, "learning_rate": 9.424789473684212e-05, "loss": 0.5928, "step": 41877 }, { "epoch": 2.3450554373390076, "grad_norm": 1.1350572109222412, "learning_rate": 9.424763157894737e-05, "loss": 0.3388, "step": 41878 }, { "epoch": 2.3451114346511366, "grad_norm": 1.3524158000946045, "learning_rate": 9.424736842105264e-05, "loss": 0.5431, "step": 41879 }, { "epoch": 2.3451674319632656, "grad_norm": 1.4524420499801636, "learning_rate": 9.42471052631579e-05, "loss": 0.4179, "step": 41880 }, { "epoch": 2.3452234292753946, "grad_norm": 1.5528044700622559, "learning_rate": 9.424684210526317e-05, "loss": 0.3886, "step": 41881 }, { "epoch": 2.3452794265875236, "grad_norm": 1.3053419589996338, "learning_rate": 9.424657894736843e-05, "loss": 0.4776, "step": 41882 }, { "epoch": 2.3453354238996527, "grad_norm": 1.2407245635986328, "learning_rate": 9.424631578947369e-05, "loss": 0.5198, "step": 41883 }, { "epoch": 2.3453914212117817, "grad_norm": 1.1918450593948364, "learning_rate": 9.424605263157895e-05, "loss": 0.409, "step": 41884 }, { "epoch": 2.3454474185239107, "grad_norm": 1.6173343658447266, "learning_rate": 9.424578947368421e-05, "loss": 0.4784, "step": 41885 }, { "epoch": 2.3455034158360397, "grad_norm": 1.0414845943450928, "learning_rate": 9.424552631578948e-05, "loss": 0.3483, "step": 41886 }, { "epoch": 2.3455594131481687, "grad_norm": 1.2562373876571655, "learning_rate": 9.424526315789473e-05, "loss": 0.4984, "step": 41887 }, { "epoch": 2.3456154104602978, "grad_norm": 1.0403567552566528, "learning_rate": 9.4245e-05, "loss": 0.4442, "step": 41888 }, { "epoch": 2.3456714077724268, "grad_norm": 1.015623688697815, "learning_rate": 9.424473684210526e-05, "loss": 0.3414, "step": 41889 }, { "epoch": 2.345727405084556, "grad_norm": 1.4809482097625732, "learning_rate": 9.424447368421054e-05, "loss": 0.4381, "step": 41890 }, { "epoch": 2.345783402396685, "grad_norm": 1.1635187864303589, "learning_rate": 9.42442105263158e-05, "loss": 0.4136, "step": 41891 }, { "epoch": 2.345839399708814, "grad_norm": 1.2481491565704346, "learning_rate": 9.424394736842106e-05, "loss": 0.3558, "step": 41892 }, { "epoch": 2.345895397020943, "grad_norm": 1.7884985208511353, "learning_rate": 9.424368421052632e-05, "loss": 0.7679, "step": 41893 }, { "epoch": 2.345951394333072, "grad_norm": 1.0589234828948975, "learning_rate": 9.424342105263159e-05, "loss": 0.3458, "step": 41894 }, { "epoch": 2.346007391645201, "grad_norm": 1.3798965215682983, "learning_rate": 9.424315789473685e-05, "loss": 0.5949, "step": 41895 }, { "epoch": 2.34606338895733, "grad_norm": 1.2889463901519775, "learning_rate": 9.424289473684211e-05, "loss": 0.3607, "step": 41896 }, { "epoch": 2.346119386269459, "grad_norm": 1.4177976846694946, "learning_rate": 9.424263157894737e-05, "loss": 0.739, "step": 41897 }, { "epoch": 2.346175383581588, "grad_norm": 1.213110327720642, "learning_rate": 9.424236842105264e-05, "loss": 0.5117, "step": 41898 }, { "epoch": 2.346231380893717, "grad_norm": 1.0544935464859009, "learning_rate": 9.42421052631579e-05, "loss": 0.3846, "step": 41899 }, { "epoch": 2.346287378205846, "grad_norm": 1.1342898607254028, "learning_rate": 9.424184210526316e-05, "loss": 0.3124, "step": 41900 }, { "epoch": 2.346343375517975, "grad_norm": 1.0577930212020874, "learning_rate": 9.424157894736842e-05, "loss": 0.3406, "step": 41901 }, { "epoch": 2.346399372830104, "grad_norm": 1.6600995063781738, "learning_rate": 9.42413157894737e-05, "loss": 0.3875, "step": 41902 }, { "epoch": 2.346455370142233, "grad_norm": 1.125165343284607, "learning_rate": 9.424105263157895e-05, "loss": 0.3791, "step": 41903 }, { "epoch": 2.346511367454362, "grad_norm": 1.086573600769043, "learning_rate": 9.424078947368421e-05, "loss": 0.331, "step": 41904 }, { "epoch": 2.346567364766491, "grad_norm": 1.5938458442687988, "learning_rate": 9.424052631578947e-05, "loss": 0.4114, "step": 41905 }, { "epoch": 2.34662336207862, "grad_norm": 1.1174705028533936, "learning_rate": 9.424026315789473e-05, "loss": 0.3278, "step": 41906 }, { "epoch": 2.346679359390749, "grad_norm": 0.9441462755203247, "learning_rate": 9.424e-05, "loss": 0.3228, "step": 41907 }, { "epoch": 2.346735356702878, "grad_norm": 1.315874457359314, "learning_rate": 9.423973684210527e-05, "loss": 0.4691, "step": 41908 }, { "epoch": 2.346791354015007, "grad_norm": 1.1920456886291504, "learning_rate": 9.423947368421054e-05, "loss": 0.4221, "step": 41909 }, { "epoch": 2.346847351327136, "grad_norm": 1.08306884765625, "learning_rate": 9.423921052631578e-05, "loss": 0.4482, "step": 41910 }, { "epoch": 2.346903348639265, "grad_norm": 1.1478420495986938, "learning_rate": 9.423894736842106e-05, "loss": 0.4323, "step": 41911 }, { "epoch": 2.346959345951394, "grad_norm": 1.2103220224380493, "learning_rate": 9.423868421052632e-05, "loss": 0.5561, "step": 41912 }, { "epoch": 2.3470153432635232, "grad_norm": 1.527949333190918, "learning_rate": 9.423842105263159e-05, "loss": 0.4219, "step": 41913 }, { "epoch": 2.3470713405756523, "grad_norm": 1.2173051834106445, "learning_rate": 9.423815789473685e-05, "loss": 0.4368, "step": 41914 }, { "epoch": 2.3471273378877813, "grad_norm": 3.7206315994262695, "learning_rate": 9.423789473684211e-05, "loss": 0.3615, "step": 41915 }, { "epoch": 2.3471833351999103, "grad_norm": 1.016814112663269, "learning_rate": 9.423763157894737e-05, "loss": 0.3292, "step": 41916 }, { "epoch": 2.3472393325120393, "grad_norm": 1.2219231128692627, "learning_rate": 9.423736842105264e-05, "loss": 0.5443, "step": 41917 }, { "epoch": 2.3472953298241683, "grad_norm": 1.270013689994812, "learning_rate": 9.42371052631579e-05, "loss": 0.4485, "step": 41918 }, { "epoch": 2.3473513271362973, "grad_norm": 1.19881272315979, "learning_rate": 9.423684210526316e-05, "loss": 0.3319, "step": 41919 }, { "epoch": 2.3474073244484264, "grad_norm": 1.1045656204223633, "learning_rate": 9.423657894736842e-05, "loss": 0.3807, "step": 41920 }, { "epoch": 2.3474633217605554, "grad_norm": 0.9773579835891724, "learning_rate": 9.423631578947368e-05, "loss": 0.3254, "step": 41921 }, { "epoch": 2.3475193190726844, "grad_norm": 1.30803382396698, "learning_rate": 9.423605263157896e-05, "loss": 0.3976, "step": 41922 }, { "epoch": 2.3475753163848134, "grad_norm": 1.4182490110397339, "learning_rate": 9.423578947368422e-05, "loss": 0.4168, "step": 41923 }, { "epoch": 2.3476313136969424, "grad_norm": 1.389750361442566, "learning_rate": 9.423552631578948e-05, "loss": 0.4313, "step": 41924 }, { "epoch": 2.3476873110090715, "grad_norm": 1.2765437364578247, "learning_rate": 9.423526315789474e-05, "loss": 0.6314, "step": 41925 }, { "epoch": 2.3477433083212005, "grad_norm": 1.0952752828598022, "learning_rate": 9.423500000000001e-05, "loss": 0.3257, "step": 41926 }, { "epoch": 2.3477993056333295, "grad_norm": 1.1169794797897339, "learning_rate": 9.423473684210527e-05, "loss": 0.425, "step": 41927 }, { "epoch": 2.3478553029454585, "grad_norm": 1.1962910890579224, "learning_rate": 9.423447368421053e-05, "loss": 0.3431, "step": 41928 }, { "epoch": 2.3479113002575875, "grad_norm": 1.6997184753417969, "learning_rate": 9.423421052631579e-05, "loss": 0.3816, "step": 41929 }, { "epoch": 2.3479672975697166, "grad_norm": 1.2387455701828003, "learning_rate": 9.423394736842106e-05, "loss": 0.3539, "step": 41930 }, { "epoch": 2.3480232948818456, "grad_norm": 1.4194833040237427, "learning_rate": 9.423368421052632e-05, "loss": 0.5217, "step": 41931 }, { "epoch": 2.3480792921939746, "grad_norm": 1.1208289861679077, "learning_rate": 9.42334210526316e-05, "loss": 0.4095, "step": 41932 }, { "epoch": 2.3481352895061036, "grad_norm": 2.0579662322998047, "learning_rate": 9.423315789473684e-05, "loss": 0.3798, "step": 41933 }, { "epoch": 2.3481912868182326, "grad_norm": 1.0255206823349, "learning_rate": 9.423289473684211e-05, "loss": 0.4104, "step": 41934 }, { "epoch": 2.3482472841303617, "grad_norm": 1.035516619682312, "learning_rate": 9.423263157894737e-05, "loss": 0.4242, "step": 41935 }, { "epoch": 2.3483032814424907, "grad_norm": 1.3136500120162964, "learning_rate": 9.423236842105265e-05, "loss": 0.3947, "step": 41936 }, { "epoch": 2.3483592787546197, "grad_norm": 1.2971669435501099, "learning_rate": 9.42321052631579e-05, "loss": 0.4815, "step": 41937 }, { "epoch": 2.3484152760667487, "grad_norm": 1.1738402843475342, "learning_rate": 9.423184210526315e-05, "loss": 0.2907, "step": 41938 }, { "epoch": 2.3484712733788777, "grad_norm": 1.0700563192367554, "learning_rate": 9.423157894736843e-05, "loss": 0.3582, "step": 41939 }, { "epoch": 2.3485272706910068, "grad_norm": 1.2877978086471558, "learning_rate": 9.423131578947369e-05, "loss": 0.3896, "step": 41940 }, { "epoch": 2.3485832680031358, "grad_norm": 1.0687603950500488, "learning_rate": 9.423105263157896e-05, "loss": 0.4127, "step": 41941 }, { "epoch": 2.348639265315265, "grad_norm": 1.3275868892669678, "learning_rate": 9.42307894736842e-05, "loss": 0.3145, "step": 41942 }, { "epoch": 2.348695262627394, "grad_norm": 1.0240685939788818, "learning_rate": 9.423052631578948e-05, "loss": 0.3168, "step": 41943 }, { "epoch": 2.348751259939523, "grad_norm": 1.4507956504821777, "learning_rate": 9.423026315789474e-05, "loss": 0.3863, "step": 41944 }, { "epoch": 2.348807257251652, "grad_norm": 1.2680301666259766, "learning_rate": 9.423000000000001e-05, "loss": 0.4111, "step": 41945 }, { "epoch": 2.348863254563781, "grad_norm": 1.1888868808746338, "learning_rate": 9.422973684210527e-05, "loss": 0.2686, "step": 41946 }, { "epoch": 2.34891925187591, "grad_norm": 1.0212517976760864, "learning_rate": 9.422947368421053e-05, "loss": 0.3038, "step": 41947 }, { "epoch": 2.348975249188039, "grad_norm": 1.2542644739151, "learning_rate": 9.422921052631579e-05, "loss": 0.3456, "step": 41948 }, { "epoch": 2.349031246500168, "grad_norm": 1.1059317588806152, "learning_rate": 9.422894736842106e-05, "loss": 0.3615, "step": 41949 }, { "epoch": 2.349087243812297, "grad_norm": 1.2373396158218384, "learning_rate": 9.422868421052632e-05, "loss": 0.5558, "step": 41950 }, { "epoch": 2.349143241124426, "grad_norm": 1.3789201974868774, "learning_rate": 9.422842105263158e-05, "loss": 0.4783, "step": 41951 }, { "epoch": 2.349199238436555, "grad_norm": 1.4175273180007935, "learning_rate": 9.422815789473684e-05, "loss": 0.3201, "step": 41952 }, { "epoch": 2.349255235748684, "grad_norm": 1.0703898668289185, "learning_rate": 9.422789473684212e-05, "loss": 0.3748, "step": 41953 }, { "epoch": 2.349311233060813, "grad_norm": 1.4079670906066895, "learning_rate": 9.422763157894738e-05, "loss": 0.4475, "step": 41954 }, { "epoch": 2.349367230372942, "grad_norm": 1.2110671997070312, "learning_rate": 9.422736842105264e-05, "loss": 0.3905, "step": 41955 }, { "epoch": 2.349423227685071, "grad_norm": 1.2391728162765503, "learning_rate": 9.42271052631579e-05, "loss": 0.3827, "step": 41956 }, { "epoch": 2.3494792249972, "grad_norm": 1.0653246641159058, "learning_rate": 9.422684210526315e-05, "loss": 0.3572, "step": 41957 }, { "epoch": 2.349535222309329, "grad_norm": 1.1151396036148071, "learning_rate": 9.422657894736843e-05, "loss": 0.4081, "step": 41958 }, { "epoch": 2.349591219621458, "grad_norm": 1.2151936292648315, "learning_rate": 9.422631578947369e-05, "loss": 0.3841, "step": 41959 }, { "epoch": 2.349647216933587, "grad_norm": 1.547565221786499, "learning_rate": 9.422605263157895e-05, "loss": 0.4742, "step": 41960 }, { "epoch": 2.349703214245716, "grad_norm": 1.3111594915390015, "learning_rate": 9.422578947368421e-05, "loss": 0.3881, "step": 41961 }, { "epoch": 2.349759211557845, "grad_norm": 1.1176769733428955, "learning_rate": 9.422552631578948e-05, "loss": 0.3866, "step": 41962 }, { "epoch": 2.349815208869974, "grad_norm": 2.228069305419922, "learning_rate": 9.422526315789474e-05, "loss": 0.4115, "step": 41963 }, { "epoch": 2.349871206182103, "grad_norm": 1.9174860715866089, "learning_rate": 9.422500000000001e-05, "loss": 0.3668, "step": 41964 }, { "epoch": 2.3499272034942322, "grad_norm": 1.223373532295227, "learning_rate": 9.422473684210526e-05, "loss": 0.4554, "step": 41965 }, { "epoch": 2.3499832008063612, "grad_norm": 1.7548977136611938, "learning_rate": 9.422447368421053e-05, "loss": 0.4082, "step": 41966 }, { "epoch": 2.3500391981184903, "grad_norm": 10.118800163269043, "learning_rate": 9.422421052631579e-05, "loss": 0.3732, "step": 41967 }, { "epoch": 2.3500951954306193, "grad_norm": 1.1329727172851562, "learning_rate": 9.422394736842107e-05, "loss": 0.3455, "step": 41968 }, { "epoch": 2.3501511927427483, "grad_norm": 1.1047589778900146, "learning_rate": 9.422368421052633e-05, "loss": 0.3214, "step": 41969 }, { "epoch": 2.3502071900548773, "grad_norm": 1.398796796798706, "learning_rate": 9.422342105263159e-05, "loss": 0.468, "step": 41970 }, { "epoch": 2.3502631873670063, "grad_norm": 1.2286341190338135, "learning_rate": 9.422315789473685e-05, "loss": 0.5296, "step": 41971 }, { "epoch": 2.3503191846791354, "grad_norm": 1.1639487743377686, "learning_rate": 9.42228947368421e-05, "loss": 0.4148, "step": 41972 }, { "epoch": 2.3503751819912644, "grad_norm": 1.1125766038894653, "learning_rate": 9.422263157894738e-05, "loss": 0.384, "step": 41973 }, { "epoch": 2.3504311793033934, "grad_norm": 1.1336102485656738, "learning_rate": 9.422236842105264e-05, "loss": 0.4169, "step": 41974 }, { "epoch": 2.3504871766155224, "grad_norm": 1.2123184204101562, "learning_rate": 9.42221052631579e-05, "loss": 0.4211, "step": 41975 }, { "epoch": 2.3505431739276514, "grad_norm": 1.0760911703109741, "learning_rate": 9.422184210526316e-05, "loss": 0.3141, "step": 41976 }, { "epoch": 2.3505991712397805, "grad_norm": 1.1022170782089233, "learning_rate": 9.422157894736843e-05, "loss": 0.3608, "step": 41977 }, { "epoch": 2.3506551685519095, "grad_norm": 1.0313996076583862, "learning_rate": 9.422131578947369e-05, "loss": 0.3936, "step": 41978 }, { "epoch": 2.3507111658640385, "grad_norm": 1.2951610088348389, "learning_rate": 9.422105263157895e-05, "loss": 0.3751, "step": 41979 }, { "epoch": 2.3507671631761675, "grad_norm": 1.344642162322998, "learning_rate": 9.422078947368421e-05, "loss": 0.5303, "step": 41980 }, { "epoch": 2.3508231604882965, "grad_norm": 1.1803078651428223, "learning_rate": 9.422052631578948e-05, "loss": 0.354, "step": 41981 }, { "epoch": 2.3508791578004256, "grad_norm": 1.8724883794784546, "learning_rate": 9.422026315789474e-05, "loss": 0.4203, "step": 41982 }, { "epoch": 2.3509351551125546, "grad_norm": 1.3569070100784302, "learning_rate": 9.422e-05, "loss": 0.3127, "step": 41983 }, { "epoch": 2.3509911524246836, "grad_norm": 1.0621393918991089, "learning_rate": 9.421973684210526e-05, "loss": 0.3003, "step": 41984 }, { "epoch": 2.3510471497368126, "grad_norm": 1.196302890777588, "learning_rate": 9.421947368421054e-05, "loss": 0.4334, "step": 41985 }, { "epoch": 2.3511031470489416, "grad_norm": 1.472033143043518, "learning_rate": 9.42192105263158e-05, "loss": 0.4106, "step": 41986 }, { "epoch": 2.3511591443610707, "grad_norm": 1.4869413375854492, "learning_rate": 9.421894736842107e-05, "loss": 0.4798, "step": 41987 }, { "epoch": 2.3512151416731997, "grad_norm": 1.1627663373947144, "learning_rate": 9.421868421052631e-05, "loss": 0.3811, "step": 41988 }, { "epoch": 2.3512711389853287, "grad_norm": 1.048711895942688, "learning_rate": 9.421842105263157e-05, "loss": 0.4891, "step": 41989 }, { "epoch": 2.3513271362974577, "grad_norm": 1.013312578201294, "learning_rate": 9.421815789473685e-05, "loss": 0.3595, "step": 41990 }, { "epoch": 2.3513831336095867, "grad_norm": 1.2516334056854248, "learning_rate": 9.421789473684211e-05, "loss": 0.4513, "step": 41991 }, { "epoch": 2.3514391309217157, "grad_norm": 1.0890311002731323, "learning_rate": 9.421763157894738e-05, "loss": 0.3958, "step": 41992 }, { "epoch": 2.3514951282338448, "grad_norm": 1.163549542427063, "learning_rate": 9.421736842105263e-05, "loss": 0.4203, "step": 41993 }, { "epoch": 2.351551125545974, "grad_norm": 1.35733962059021, "learning_rate": 9.42171052631579e-05, "loss": 0.4986, "step": 41994 }, { "epoch": 2.351607122858103, "grad_norm": 1.1108150482177734, "learning_rate": 9.421684210526316e-05, "loss": 0.3442, "step": 41995 }, { "epoch": 2.351663120170232, "grad_norm": 0.950831949710846, "learning_rate": 9.421657894736843e-05, "loss": 0.3652, "step": 41996 }, { "epoch": 2.351719117482361, "grad_norm": 1.182410478591919, "learning_rate": 9.421631578947368e-05, "loss": 0.423, "step": 41997 }, { "epoch": 2.35177511479449, "grad_norm": 1.0937422513961792, "learning_rate": 9.421605263157895e-05, "loss": 0.3506, "step": 41998 }, { "epoch": 2.351831112106619, "grad_norm": 1.310194969177246, "learning_rate": 9.421578947368421e-05, "loss": 0.41, "step": 41999 }, { "epoch": 2.351887109418748, "grad_norm": 1.4692976474761963, "learning_rate": 9.421552631578949e-05, "loss": 0.4413, "step": 42000 }, { "epoch": 2.351943106730877, "grad_norm": 1.204043984413147, "learning_rate": 9.421526315789475e-05, "loss": 0.4192, "step": 42001 }, { "epoch": 2.351999104043006, "grad_norm": 1.1480443477630615, "learning_rate": 9.4215e-05, "loss": 0.3172, "step": 42002 }, { "epoch": 2.352055101355135, "grad_norm": 1.1406035423278809, "learning_rate": 9.421473684210526e-05, "loss": 0.3259, "step": 42003 }, { "epoch": 2.352111098667264, "grad_norm": 1.1293482780456543, "learning_rate": 9.421447368421054e-05, "loss": 0.3216, "step": 42004 }, { "epoch": 2.352167095979393, "grad_norm": 1.1756552457809448, "learning_rate": 9.42142105263158e-05, "loss": 0.3967, "step": 42005 }, { "epoch": 2.352223093291522, "grad_norm": 1.1172279119491577, "learning_rate": 9.421394736842106e-05, "loss": 0.3094, "step": 42006 }, { "epoch": 2.352279090603651, "grad_norm": 1.8310580253601074, "learning_rate": 9.421368421052632e-05, "loss": 0.4468, "step": 42007 }, { "epoch": 2.35233508791578, "grad_norm": 1.2626774311065674, "learning_rate": 9.421342105263158e-05, "loss": 0.3546, "step": 42008 }, { "epoch": 2.352391085227909, "grad_norm": 1.250585913658142, "learning_rate": 9.421315789473685e-05, "loss": 0.4734, "step": 42009 }, { "epoch": 2.352447082540038, "grad_norm": 1.2268357276916504, "learning_rate": 9.421289473684211e-05, "loss": 0.4409, "step": 42010 }, { "epoch": 2.352503079852167, "grad_norm": 1.2325795888900757, "learning_rate": 9.421263157894737e-05, "loss": 0.3902, "step": 42011 }, { "epoch": 2.352559077164296, "grad_norm": 1.3361058235168457, "learning_rate": 9.421236842105263e-05, "loss": 0.3958, "step": 42012 }, { "epoch": 2.352615074476425, "grad_norm": 1.0456767082214355, "learning_rate": 9.42121052631579e-05, "loss": 0.4249, "step": 42013 }, { "epoch": 2.352671071788554, "grad_norm": 1.154228925704956, "learning_rate": 9.421184210526316e-05, "loss": 0.4413, "step": 42014 }, { "epoch": 2.352727069100683, "grad_norm": 1.2956551313400269, "learning_rate": 9.421157894736842e-05, "loss": 0.442, "step": 42015 }, { "epoch": 2.352783066412812, "grad_norm": 1.205148458480835, "learning_rate": 9.421131578947368e-05, "loss": 0.4808, "step": 42016 }, { "epoch": 2.3528390637249412, "grad_norm": 1.5038695335388184, "learning_rate": 9.421105263157896e-05, "loss": 0.4367, "step": 42017 }, { "epoch": 2.3528950610370702, "grad_norm": 1.2735575437545776, "learning_rate": 9.421078947368421e-05, "loss": 0.4128, "step": 42018 }, { "epoch": 2.3529510583491993, "grad_norm": 1.2765793800354004, "learning_rate": 9.421052631578949e-05, "loss": 0.439, "step": 42019 }, { "epoch": 2.3530070556613283, "grad_norm": 1.1689181327819824, "learning_rate": 9.421026315789473e-05, "loss": 0.4519, "step": 42020 }, { "epoch": 2.3530630529734573, "grad_norm": 1.1140270233154297, "learning_rate": 9.421000000000001e-05, "loss": 0.3811, "step": 42021 }, { "epoch": 2.3531190502855863, "grad_norm": 1.6903316974639893, "learning_rate": 9.420973684210527e-05, "loss": 0.6076, "step": 42022 }, { "epoch": 2.3531750475977153, "grad_norm": 1.437776803970337, "learning_rate": 9.420947368421054e-05, "loss": 0.6009, "step": 42023 }, { "epoch": 2.3532310449098444, "grad_norm": 1.2875986099243164, "learning_rate": 9.42092105263158e-05, "loss": 0.5224, "step": 42024 }, { "epoch": 2.3532870422219734, "grad_norm": 1.072899580001831, "learning_rate": 9.420894736842105e-05, "loss": 0.3386, "step": 42025 }, { "epoch": 2.3533430395341024, "grad_norm": 1.104203701019287, "learning_rate": 9.420868421052632e-05, "loss": 0.3261, "step": 42026 }, { "epoch": 2.3533990368462314, "grad_norm": 1.3839703798294067, "learning_rate": 9.420842105263158e-05, "loss": 0.3843, "step": 42027 }, { "epoch": 2.3534550341583604, "grad_norm": 1.1921048164367676, "learning_rate": 9.420815789473685e-05, "loss": 0.5402, "step": 42028 }, { "epoch": 2.3535110314704895, "grad_norm": 1.1254487037658691, "learning_rate": 9.420789473684211e-05, "loss": 0.4031, "step": 42029 }, { "epoch": 2.3535670287826185, "grad_norm": 1.0913447141647339, "learning_rate": 9.420763157894737e-05, "loss": 0.4249, "step": 42030 }, { "epoch": 2.3536230260947475, "grad_norm": 1.4986735582351685, "learning_rate": 9.420736842105263e-05, "loss": 0.4284, "step": 42031 }, { "epoch": 2.3536790234068765, "grad_norm": 1.283302664756775, "learning_rate": 9.42071052631579e-05, "loss": 0.3525, "step": 42032 }, { "epoch": 2.3537350207190055, "grad_norm": 1.1544184684753418, "learning_rate": 9.420684210526317e-05, "loss": 0.4365, "step": 42033 }, { "epoch": 2.3537910180311346, "grad_norm": 1.0214793682098389, "learning_rate": 9.420657894736842e-05, "loss": 0.2939, "step": 42034 }, { "epoch": 2.3538470153432636, "grad_norm": 1.1046987771987915, "learning_rate": 9.420631578947368e-05, "loss": 0.3814, "step": 42035 }, { "epoch": 2.3539030126553926, "grad_norm": 1.1168564558029175, "learning_rate": 9.420605263157896e-05, "loss": 0.28, "step": 42036 }, { "epoch": 2.3539590099675216, "grad_norm": 1.2675065994262695, "learning_rate": 9.420578947368422e-05, "loss": 0.4099, "step": 42037 }, { "epoch": 2.3540150072796506, "grad_norm": 1.2064424753189087, "learning_rate": 9.420552631578948e-05, "loss": 0.4263, "step": 42038 }, { "epoch": 2.3540710045917796, "grad_norm": 1.1632829904556274, "learning_rate": 9.420526315789474e-05, "loss": 0.4582, "step": 42039 }, { "epoch": 2.3541270019039087, "grad_norm": 1.1134392023086548, "learning_rate": 9.420500000000001e-05, "loss": 0.3127, "step": 42040 }, { "epoch": 2.3541829992160377, "grad_norm": 1.4776973724365234, "learning_rate": 9.420473684210527e-05, "loss": 0.4227, "step": 42041 }, { "epoch": 2.3542389965281667, "grad_norm": 1.2541221380233765, "learning_rate": 9.420447368421053e-05, "loss": 0.453, "step": 42042 }, { "epoch": 2.3542949938402957, "grad_norm": 1.1257274150848389, "learning_rate": 9.420421052631579e-05, "loss": 0.3714, "step": 42043 }, { "epoch": 2.3543509911524247, "grad_norm": 1.248753547668457, "learning_rate": 9.420394736842105e-05, "loss": 0.4053, "step": 42044 }, { "epoch": 2.3544069884645538, "grad_norm": 1.2743364572525024, "learning_rate": 9.420368421052632e-05, "loss": 0.4346, "step": 42045 }, { "epoch": 2.354462985776683, "grad_norm": 1.0889259576797485, "learning_rate": 9.420342105263158e-05, "loss": 0.3278, "step": 42046 }, { "epoch": 2.354518983088812, "grad_norm": 1.3129757642745972, "learning_rate": 9.420315789473684e-05, "loss": 0.3579, "step": 42047 }, { "epoch": 2.354574980400941, "grad_norm": 1.2878180742263794, "learning_rate": 9.42028947368421e-05, "loss": 0.4799, "step": 42048 }, { "epoch": 2.35463097771307, "grad_norm": 1.365297555923462, "learning_rate": 9.420263157894737e-05, "loss": 0.3907, "step": 42049 }, { "epoch": 2.354686975025199, "grad_norm": 1.35836923122406, "learning_rate": 9.420236842105263e-05, "loss": 0.3393, "step": 42050 }, { "epoch": 2.354742972337328, "grad_norm": 1.517835259437561, "learning_rate": 9.420210526315791e-05, "loss": 0.4224, "step": 42051 }, { "epoch": 2.354798969649457, "grad_norm": 1.301274061203003, "learning_rate": 9.420184210526315e-05, "loss": 0.3745, "step": 42052 }, { "epoch": 2.354854966961586, "grad_norm": 1.34084951877594, "learning_rate": 9.420157894736843e-05, "loss": 0.5806, "step": 42053 }, { "epoch": 2.354910964273715, "grad_norm": 1.339669942855835, "learning_rate": 9.420131578947369e-05, "loss": 0.5756, "step": 42054 }, { "epoch": 2.354966961585844, "grad_norm": 1.5642307996749878, "learning_rate": 9.420105263157896e-05, "loss": 0.42, "step": 42055 }, { "epoch": 2.355022958897973, "grad_norm": 3.180543899536133, "learning_rate": 9.420078947368422e-05, "loss": 0.3992, "step": 42056 }, { "epoch": 2.355078956210102, "grad_norm": 1.3625800609588623, "learning_rate": 9.420052631578948e-05, "loss": 0.3916, "step": 42057 }, { "epoch": 2.355134953522231, "grad_norm": 2.2966105937957764, "learning_rate": 9.420026315789474e-05, "loss": 0.4625, "step": 42058 }, { "epoch": 2.35519095083436, "grad_norm": 1.1040472984313965, "learning_rate": 9.42e-05, "loss": 0.4012, "step": 42059 }, { "epoch": 2.355246948146489, "grad_norm": 1.2432657480239868, "learning_rate": 9.419973684210527e-05, "loss": 0.4039, "step": 42060 }, { "epoch": 2.355302945458618, "grad_norm": 1.4047868251800537, "learning_rate": 9.419947368421053e-05, "loss": 0.5086, "step": 42061 }, { "epoch": 2.355358942770747, "grad_norm": 1.0367718935012817, "learning_rate": 9.419921052631579e-05, "loss": 0.3648, "step": 42062 }, { "epoch": 2.355414940082876, "grad_norm": 1.008502721786499, "learning_rate": 9.419894736842105e-05, "loss": 0.3302, "step": 42063 }, { "epoch": 2.355470937395005, "grad_norm": 1.1072465181350708, "learning_rate": 9.419868421052633e-05, "loss": 0.3738, "step": 42064 }, { "epoch": 2.355526934707134, "grad_norm": 1.0874302387237549, "learning_rate": 9.419842105263158e-05, "loss": 0.4086, "step": 42065 }, { "epoch": 2.355582932019263, "grad_norm": 1.5220630168914795, "learning_rate": 9.419815789473684e-05, "loss": 0.6325, "step": 42066 }, { "epoch": 2.355638929331392, "grad_norm": 1.1535512208938599, "learning_rate": 9.41978947368421e-05, "loss": 0.32, "step": 42067 }, { "epoch": 2.355694926643521, "grad_norm": 1.1065874099731445, "learning_rate": 9.419763157894738e-05, "loss": 0.4099, "step": 42068 }, { "epoch": 2.3557509239556502, "grad_norm": 1.4733980894088745, "learning_rate": 9.419736842105264e-05, "loss": 0.471, "step": 42069 }, { "epoch": 2.3558069212677792, "grad_norm": 1.2396273612976074, "learning_rate": 9.41971052631579e-05, "loss": 0.4677, "step": 42070 }, { "epoch": 2.3558629185799083, "grad_norm": 1.0827510356903076, "learning_rate": 9.419684210526316e-05, "loss": 0.3787, "step": 42071 }, { "epoch": 2.3559189158920373, "grad_norm": 1.5058119297027588, "learning_rate": 9.419657894736843e-05, "loss": 0.4911, "step": 42072 }, { "epoch": 2.3559749132041663, "grad_norm": 1.165886640548706, "learning_rate": 9.419631578947369e-05, "loss": 0.3993, "step": 42073 }, { "epoch": 2.3560309105162953, "grad_norm": 1.0766451358795166, "learning_rate": 9.419605263157896e-05, "loss": 0.4162, "step": 42074 }, { "epoch": 2.3560869078284243, "grad_norm": 0.9640364050865173, "learning_rate": 9.419578947368421e-05, "loss": 0.2919, "step": 42075 }, { "epoch": 2.3561429051405534, "grad_norm": 1.1855136156082153, "learning_rate": 9.419552631578947e-05, "loss": 0.4452, "step": 42076 }, { "epoch": 2.3561989024526824, "grad_norm": 1.188442587852478, "learning_rate": 9.419526315789474e-05, "loss": 0.3969, "step": 42077 }, { "epoch": 2.3562548997648114, "grad_norm": 1.849393606185913, "learning_rate": 9.4195e-05, "loss": 0.5059, "step": 42078 }, { "epoch": 2.3563108970769404, "grad_norm": 1.0141866207122803, "learning_rate": 9.419473684210528e-05, "loss": 0.3143, "step": 42079 }, { "epoch": 2.3563668943890694, "grad_norm": 1.200398325920105, "learning_rate": 9.419447368421052e-05, "loss": 0.4128, "step": 42080 }, { "epoch": 2.3564228917011985, "grad_norm": 1.2005687952041626, "learning_rate": 9.41942105263158e-05, "loss": 0.3887, "step": 42081 }, { "epoch": 2.3564788890133275, "grad_norm": 1.1121059656143188, "learning_rate": 9.419394736842105e-05, "loss": 0.361, "step": 42082 }, { "epoch": 2.3565348863254565, "grad_norm": 1.4347631931304932, "learning_rate": 9.419368421052633e-05, "loss": 0.5222, "step": 42083 }, { "epoch": 2.3565908836375855, "grad_norm": 1.2278953790664673, "learning_rate": 9.419342105263159e-05, "loss": 0.3644, "step": 42084 }, { "epoch": 2.3566468809497145, "grad_norm": 2.2126948833465576, "learning_rate": 9.419315789473685e-05, "loss": 0.5783, "step": 42085 }, { "epoch": 2.3567028782618435, "grad_norm": 1.3801368474960327, "learning_rate": 9.419289473684211e-05, "loss": 0.5011, "step": 42086 }, { "epoch": 2.3567588755739726, "grad_norm": 1.2987030744552612, "learning_rate": 9.419263157894738e-05, "loss": 0.5352, "step": 42087 }, { "epoch": 2.3568148728861016, "grad_norm": 1.2596839666366577, "learning_rate": 9.419236842105264e-05, "loss": 0.477, "step": 42088 }, { "epoch": 2.3568708701982306, "grad_norm": 1.0862349271774292, "learning_rate": 9.41921052631579e-05, "loss": 0.2845, "step": 42089 }, { "epoch": 2.3569268675103596, "grad_norm": 1.1161876916885376, "learning_rate": 9.419184210526316e-05, "loss": 0.388, "step": 42090 }, { "epoch": 2.3569828648224886, "grad_norm": 1.650629997253418, "learning_rate": 9.419157894736843e-05, "loss": 0.3566, "step": 42091 }, { "epoch": 2.3570388621346177, "grad_norm": 1.850733995437622, "learning_rate": 9.419131578947369e-05, "loss": 0.4441, "step": 42092 }, { "epoch": 2.3570948594467467, "grad_norm": 2.5519347190856934, "learning_rate": 9.419105263157895e-05, "loss": 0.3978, "step": 42093 }, { "epoch": 2.3571508567588757, "grad_norm": 1.5656487941741943, "learning_rate": 9.419078947368421e-05, "loss": 0.7014, "step": 42094 }, { "epoch": 2.3572068540710047, "grad_norm": 1.715987205505371, "learning_rate": 9.419052631578947e-05, "loss": 0.3879, "step": 42095 }, { "epoch": 2.3572628513831337, "grad_norm": 1.540969729423523, "learning_rate": 9.419026315789474e-05, "loss": 0.4568, "step": 42096 }, { "epoch": 2.3573188486952628, "grad_norm": 1.50597083568573, "learning_rate": 9.419e-05, "loss": 0.3852, "step": 42097 }, { "epoch": 2.3573748460073918, "grad_norm": 1.5645270347595215, "learning_rate": 9.418973684210526e-05, "loss": 0.4804, "step": 42098 }, { "epoch": 2.357430843319521, "grad_norm": 1.2777581214904785, "learning_rate": 9.418947368421052e-05, "loss": 0.3347, "step": 42099 }, { "epoch": 2.35748684063165, "grad_norm": 1.2117984294891357, "learning_rate": 9.41892105263158e-05, "loss": 0.4275, "step": 42100 }, { "epoch": 2.357542837943779, "grad_norm": 1.1498421430587769, "learning_rate": 9.418894736842106e-05, "loss": 0.3176, "step": 42101 }, { "epoch": 2.357598835255908, "grad_norm": 1.6608003377914429, "learning_rate": 9.418868421052632e-05, "loss": 0.4514, "step": 42102 }, { "epoch": 2.357654832568037, "grad_norm": 1.249788522720337, "learning_rate": 9.418842105263158e-05, "loss": 0.4094, "step": 42103 }, { "epoch": 2.357710829880166, "grad_norm": 1.2765742540359497, "learning_rate": 9.418815789473685e-05, "loss": 0.3854, "step": 42104 }, { "epoch": 2.357766827192295, "grad_norm": 1.3121942281723022, "learning_rate": 9.418789473684211e-05, "loss": 0.4046, "step": 42105 }, { "epoch": 2.357822824504424, "grad_norm": 1.1774343252182007, "learning_rate": 9.418763157894738e-05, "loss": 0.4232, "step": 42106 }, { "epoch": 2.357878821816553, "grad_norm": 1.0485992431640625, "learning_rate": 9.418736842105263e-05, "loss": 0.3953, "step": 42107 }, { "epoch": 2.357934819128682, "grad_norm": 1.2635159492492676, "learning_rate": 9.41871052631579e-05, "loss": 0.4725, "step": 42108 }, { "epoch": 2.357990816440811, "grad_norm": 1.3943434953689575, "learning_rate": 9.418684210526316e-05, "loss": 0.4141, "step": 42109 }, { "epoch": 2.35804681375294, "grad_norm": 1.1756008863449097, "learning_rate": 9.418657894736842e-05, "loss": 0.42, "step": 42110 }, { "epoch": 2.358102811065069, "grad_norm": 0.9625444412231445, "learning_rate": 9.41863157894737e-05, "loss": 0.278, "step": 42111 }, { "epoch": 2.358158808377198, "grad_norm": 1.0112862586975098, "learning_rate": 9.418605263157894e-05, "loss": 0.3779, "step": 42112 }, { "epoch": 2.358214805689327, "grad_norm": 1.143189549446106, "learning_rate": 9.418578947368421e-05, "loss": 0.3644, "step": 42113 }, { "epoch": 2.358270803001456, "grad_norm": 1.2035561800003052, "learning_rate": 9.418552631578947e-05, "loss": 0.3764, "step": 42114 }, { "epoch": 2.358326800313585, "grad_norm": 61.36873245239258, "learning_rate": 9.418526315789475e-05, "loss": 0.4916, "step": 42115 }, { "epoch": 2.358382797625714, "grad_norm": 1.449543833732605, "learning_rate": 9.418500000000001e-05, "loss": 0.4049, "step": 42116 }, { "epoch": 2.358438794937843, "grad_norm": 1.035894513130188, "learning_rate": 9.418473684210527e-05, "loss": 0.3785, "step": 42117 }, { "epoch": 2.358494792249972, "grad_norm": 1.1107745170593262, "learning_rate": 9.418447368421053e-05, "loss": 0.3628, "step": 42118 }, { "epoch": 2.358550789562101, "grad_norm": 3.718616247177124, "learning_rate": 9.41842105263158e-05, "loss": 0.3685, "step": 42119 }, { "epoch": 2.35860678687423, "grad_norm": 1.372156023979187, "learning_rate": 9.418394736842106e-05, "loss": 0.2991, "step": 42120 }, { "epoch": 2.358662784186359, "grad_norm": 1.1128473281860352, "learning_rate": 9.418368421052632e-05, "loss": 0.3907, "step": 42121 }, { "epoch": 2.3587187814984882, "grad_norm": 1.4412513971328735, "learning_rate": 9.418342105263158e-05, "loss": 0.5189, "step": 42122 }, { "epoch": 2.3587747788106173, "grad_norm": 1.358426570892334, "learning_rate": 9.418315789473685e-05, "loss": 0.3482, "step": 42123 }, { "epoch": 2.3588307761227463, "grad_norm": 1.280336856842041, "learning_rate": 9.418289473684211e-05, "loss": 0.5239, "step": 42124 }, { "epoch": 2.3588867734348753, "grad_norm": 1.1203536987304688, "learning_rate": 9.418263157894737e-05, "loss": 0.3963, "step": 42125 }, { "epoch": 2.3589427707470043, "grad_norm": 1.1906802654266357, "learning_rate": 9.418236842105263e-05, "loss": 0.4178, "step": 42126 }, { "epoch": 2.3589987680591333, "grad_norm": 1.1729192733764648, "learning_rate": 9.418210526315789e-05, "loss": 0.4484, "step": 42127 }, { "epoch": 2.3590547653712624, "grad_norm": 1.3363909721374512, "learning_rate": 9.418184210526316e-05, "loss": 0.4531, "step": 42128 }, { "epoch": 2.3591107626833914, "grad_norm": 0.9300652146339417, "learning_rate": 9.418157894736842e-05, "loss": 0.3279, "step": 42129 }, { "epoch": 2.3591667599955204, "grad_norm": 1.2821087837219238, "learning_rate": 9.418131578947368e-05, "loss": 0.4305, "step": 42130 }, { "epoch": 2.3592227573076494, "grad_norm": 1.1366273164749146, "learning_rate": 9.418105263157894e-05, "loss": 0.4396, "step": 42131 }, { "epoch": 2.3592787546197784, "grad_norm": 1.0845974683761597, "learning_rate": 9.418078947368422e-05, "loss": 0.3359, "step": 42132 }, { "epoch": 2.3593347519319074, "grad_norm": 1.292511224746704, "learning_rate": 9.418052631578948e-05, "loss": 0.4877, "step": 42133 }, { "epoch": 2.3593907492440365, "grad_norm": 1.1286345720291138, "learning_rate": 9.418026315789475e-05, "loss": 0.3599, "step": 42134 }, { "epoch": 2.3594467465561655, "grad_norm": 1.0115923881530762, "learning_rate": 9.418e-05, "loss": 0.38, "step": 42135 }, { "epoch": 2.3595027438682945, "grad_norm": 1.2855521440505981, "learning_rate": 9.417973684210527e-05, "loss": 0.3605, "step": 42136 }, { "epoch": 2.3595587411804235, "grad_norm": 1.0307698249816895, "learning_rate": 9.417947368421053e-05, "loss": 0.3303, "step": 42137 }, { "epoch": 2.3596147384925525, "grad_norm": 1.32102370262146, "learning_rate": 9.41792105263158e-05, "loss": 0.4074, "step": 42138 }, { "epoch": 2.3596707358046816, "grad_norm": 1.0507911443710327, "learning_rate": 9.417894736842106e-05, "loss": 0.3955, "step": 42139 }, { "epoch": 2.3597267331168106, "grad_norm": 1.2132923603057861, "learning_rate": 9.417868421052632e-05, "loss": 0.3623, "step": 42140 }, { "epoch": 2.3597827304289396, "grad_norm": 1.2888809442520142, "learning_rate": 9.417842105263158e-05, "loss": 0.3651, "step": 42141 }, { "epoch": 2.3598387277410686, "grad_norm": 1.3733798265457153, "learning_rate": 9.417815789473685e-05, "loss": 0.5268, "step": 42142 }, { "epoch": 2.3598947250531976, "grad_norm": 1.2760508060455322, "learning_rate": 9.417789473684211e-05, "loss": 0.4916, "step": 42143 }, { "epoch": 2.3599507223653267, "grad_norm": 1.8532829284667969, "learning_rate": 9.417763157894737e-05, "loss": 0.5818, "step": 42144 }, { "epoch": 2.3600067196774557, "grad_norm": 1.2099051475524902, "learning_rate": 9.417736842105263e-05, "loss": 0.4142, "step": 42145 }, { "epoch": 2.3600627169895843, "grad_norm": 0.9829007387161255, "learning_rate": 9.41771052631579e-05, "loss": 0.3263, "step": 42146 }, { "epoch": 2.3601187143017137, "grad_norm": 1.2856959104537964, "learning_rate": 9.417684210526317e-05, "loss": 0.4431, "step": 42147 }, { "epoch": 2.3601747116138423, "grad_norm": 1.0858981609344482, "learning_rate": 9.417657894736843e-05, "loss": 0.4336, "step": 42148 }, { "epoch": 2.3602307089259718, "grad_norm": 1.0360996723175049, "learning_rate": 9.417631578947369e-05, "loss": 0.3676, "step": 42149 }, { "epoch": 2.3602867062381003, "grad_norm": 1.082420825958252, "learning_rate": 9.417605263157895e-05, "loss": 0.3381, "step": 42150 }, { "epoch": 2.36034270355023, "grad_norm": 1.4514939785003662, "learning_rate": 9.417578947368422e-05, "loss": 0.4968, "step": 42151 }, { "epoch": 2.3603987008623584, "grad_norm": 3.6139161586761475, "learning_rate": 9.417552631578948e-05, "loss": 0.4063, "step": 42152 }, { "epoch": 2.360454698174488, "grad_norm": 1.0487850904464722, "learning_rate": 9.417526315789474e-05, "loss": 0.441, "step": 42153 }, { "epoch": 2.3605106954866164, "grad_norm": 1.2696326971054077, "learning_rate": 9.4175e-05, "loss": 0.3713, "step": 42154 }, { "epoch": 2.360566692798746, "grad_norm": 1.152755618095398, "learning_rate": 9.417473684210527e-05, "loss": 0.3529, "step": 42155 }, { "epoch": 2.3606226901108744, "grad_norm": 1.2065237760543823, "learning_rate": 9.417447368421053e-05, "loss": 0.4083, "step": 42156 }, { "epoch": 2.360678687423004, "grad_norm": 1.2593610286712646, "learning_rate": 9.417421052631579e-05, "loss": 0.4469, "step": 42157 }, { "epoch": 2.3607346847351325, "grad_norm": 1.5991078615188599, "learning_rate": 9.417394736842105e-05, "loss": 0.7718, "step": 42158 }, { "epoch": 2.360790682047262, "grad_norm": 1.1831023693084717, "learning_rate": 9.417368421052632e-05, "loss": 0.4062, "step": 42159 }, { "epoch": 2.3608466793593905, "grad_norm": 1.384891390800476, "learning_rate": 9.417342105263158e-05, "loss": 0.4493, "step": 42160 }, { "epoch": 2.36090267667152, "grad_norm": 1.5018233060836792, "learning_rate": 9.417315789473686e-05, "loss": 0.4251, "step": 42161 }, { "epoch": 2.3609586739836486, "grad_norm": 1.1330299377441406, "learning_rate": 9.41728947368421e-05, "loss": 0.3526, "step": 42162 }, { "epoch": 2.361014671295778, "grad_norm": 1.162330150604248, "learning_rate": 9.417263157894736e-05, "loss": 0.3637, "step": 42163 }, { "epoch": 2.3610706686079066, "grad_norm": 1.1777554750442505, "learning_rate": 9.417236842105264e-05, "loss": 0.5096, "step": 42164 }, { "epoch": 2.361126665920036, "grad_norm": 1.178244948387146, "learning_rate": 9.41721052631579e-05, "loss": 0.4498, "step": 42165 }, { "epoch": 2.3611826632321646, "grad_norm": 1.3178316354751587, "learning_rate": 9.417184210526317e-05, "loss": 0.3688, "step": 42166 }, { "epoch": 2.361238660544294, "grad_norm": 1.0979162454605103, "learning_rate": 9.417157894736842e-05, "loss": 0.3657, "step": 42167 }, { "epoch": 2.3612946578564227, "grad_norm": 1.0116496086120605, "learning_rate": 9.417131578947369e-05, "loss": 0.3186, "step": 42168 }, { "epoch": 2.361350655168552, "grad_norm": 1.1470129489898682, "learning_rate": 9.417105263157895e-05, "loss": 0.3409, "step": 42169 }, { "epoch": 2.3614066524806807, "grad_norm": 1.3460580110549927, "learning_rate": 9.417078947368422e-05, "loss": 0.4685, "step": 42170 }, { "epoch": 2.36146264979281, "grad_norm": 1.2768175601959229, "learning_rate": 9.417052631578948e-05, "loss": 0.4529, "step": 42171 }, { "epoch": 2.3615186471049388, "grad_norm": 1.029980182647705, "learning_rate": 9.417026315789474e-05, "loss": 0.3452, "step": 42172 }, { "epoch": 2.361574644417068, "grad_norm": 1.3561220169067383, "learning_rate": 9.417e-05, "loss": 0.4805, "step": 42173 }, { "epoch": 2.361630641729197, "grad_norm": 1.126116156578064, "learning_rate": 9.416973684210527e-05, "loss": 0.2943, "step": 42174 }, { "epoch": 2.3616866390413263, "grad_norm": 1.1219772100448608, "learning_rate": 9.416947368421053e-05, "loss": 0.376, "step": 42175 }, { "epoch": 2.361742636353455, "grad_norm": 1.1999244689941406, "learning_rate": 9.41692105263158e-05, "loss": 0.3814, "step": 42176 }, { "epoch": 2.3617986336655843, "grad_norm": 1.2493246793746948, "learning_rate": 9.416894736842105e-05, "loss": 0.3363, "step": 42177 }, { "epoch": 2.361854630977713, "grad_norm": 1.4358148574829102, "learning_rate": 9.416868421052633e-05, "loss": 0.3995, "step": 42178 }, { "epoch": 2.3619106282898423, "grad_norm": 1.453559398651123, "learning_rate": 9.416842105263159e-05, "loss": 0.5668, "step": 42179 }, { "epoch": 2.361966625601971, "grad_norm": 1.3580994606018066, "learning_rate": 9.416815789473685e-05, "loss": 0.5791, "step": 42180 }, { "epoch": 2.3620226229141004, "grad_norm": 1.1576915979385376, "learning_rate": 9.41678947368421e-05, "loss": 0.3847, "step": 42181 }, { "epoch": 2.362078620226229, "grad_norm": 1.1490558385849, "learning_rate": 9.416763157894737e-05, "loss": 0.6186, "step": 42182 }, { "epoch": 2.3621346175383584, "grad_norm": 1.0884122848510742, "learning_rate": 9.416736842105264e-05, "loss": 0.2773, "step": 42183 }, { "epoch": 2.362190614850487, "grad_norm": 1.2221380472183228, "learning_rate": 9.41671052631579e-05, "loss": 0.3557, "step": 42184 }, { "epoch": 2.3622466121626164, "grad_norm": 1.364810824394226, "learning_rate": 9.416684210526316e-05, "loss": 0.4715, "step": 42185 }, { "epoch": 2.362302609474745, "grad_norm": 1.260488510131836, "learning_rate": 9.416657894736842e-05, "loss": 0.594, "step": 42186 }, { "epoch": 2.362358606786874, "grad_norm": 1.143228530883789, "learning_rate": 9.416631578947369e-05, "loss": 0.5061, "step": 42187 }, { "epoch": 2.362414604099003, "grad_norm": 1.1019113063812256, "learning_rate": 9.416605263157895e-05, "loss": 0.3515, "step": 42188 }, { "epoch": 2.362470601411132, "grad_norm": 1.1494735479354858, "learning_rate": 9.416578947368422e-05, "loss": 0.3531, "step": 42189 }, { "epoch": 2.362526598723261, "grad_norm": 1.0774478912353516, "learning_rate": 9.416552631578947e-05, "loss": 0.3085, "step": 42190 }, { "epoch": 2.36258259603539, "grad_norm": 1.184244155883789, "learning_rate": 9.416526315789474e-05, "loss": 0.4209, "step": 42191 }, { "epoch": 2.362638593347519, "grad_norm": 1.2900294065475464, "learning_rate": 9.4165e-05, "loss": 0.5229, "step": 42192 }, { "epoch": 2.362694590659648, "grad_norm": 0.9700883030891418, "learning_rate": 9.416473684210528e-05, "loss": 0.3744, "step": 42193 }, { "epoch": 2.362750587971777, "grad_norm": 1.0989584922790527, "learning_rate": 9.416447368421054e-05, "loss": 0.433, "step": 42194 }, { "epoch": 2.362806585283906, "grad_norm": 1.1285914182662964, "learning_rate": 9.41642105263158e-05, "loss": 0.3869, "step": 42195 }, { "epoch": 2.362862582596035, "grad_norm": 1.1750162839889526, "learning_rate": 9.416394736842106e-05, "loss": 0.3492, "step": 42196 }, { "epoch": 2.3629185799081642, "grad_norm": 1.3149181604385376, "learning_rate": 9.416368421052632e-05, "loss": 0.4276, "step": 42197 }, { "epoch": 2.3629745772202932, "grad_norm": 1.1668169498443604, "learning_rate": 9.416342105263159e-05, "loss": 0.3422, "step": 42198 }, { "epoch": 2.3630305745324223, "grad_norm": 1.0885361433029175, "learning_rate": 9.416315789473684e-05, "loss": 0.3907, "step": 42199 }, { "epoch": 2.3630865718445513, "grad_norm": 0.9846569895744324, "learning_rate": 9.416289473684211e-05, "loss": 0.3883, "step": 42200 }, { "epoch": 2.3631425691566803, "grad_norm": 1.1640223264694214, "learning_rate": 9.416263157894737e-05, "loss": 0.4835, "step": 42201 }, { "epoch": 2.3631985664688093, "grad_norm": 1.1281291246414185, "learning_rate": 9.416236842105264e-05, "loss": 0.4183, "step": 42202 }, { "epoch": 2.3632545637809383, "grad_norm": 1.3642750978469849, "learning_rate": 9.41621052631579e-05, "loss": 0.4044, "step": 42203 }, { "epoch": 2.3633105610930674, "grad_norm": 1.5359916687011719, "learning_rate": 9.416184210526316e-05, "loss": 0.421, "step": 42204 }, { "epoch": 2.3633665584051964, "grad_norm": 1.3497382402420044, "learning_rate": 9.416157894736842e-05, "loss": 0.4016, "step": 42205 }, { "epoch": 2.3634225557173254, "grad_norm": 1.1716583967208862, "learning_rate": 9.41613157894737e-05, "loss": 0.339, "step": 42206 }, { "epoch": 2.3634785530294544, "grad_norm": 1.4831912517547607, "learning_rate": 9.416105263157895e-05, "loss": 0.3675, "step": 42207 }, { "epoch": 2.3635345503415834, "grad_norm": 1.106115460395813, "learning_rate": 9.416078947368421e-05, "loss": 0.3889, "step": 42208 }, { "epoch": 2.3635905476537125, "grad_norm": 1.136399745941162, "learning_rate": 9.416052631578947e-05, "loss": 0.5022, "step": 42209 }, { "epoch": 2.3636465449658415, "grad_norm": 1.2835071086883545, "learning_rate": 9.416026315789475e-05, "loss": 0.4914, "step": 42210 }, { "epoch": 2.3637025422779705, "grad_norm": 1.346935510635376, "learning_rate": 9.416e-05, "loss": 0.4141, "step": 42211 }, { "epoch": 2.3637585395900995, "grad_norm": 1.3580408096313477, "learning_rate": 9.415973684210527e-05, "loss": 0.3836, "step": 42212 }, { "epoch": 2.3638145369022285, "grad_norm": 1.3355134725570679, "learning_rate": 9.415947368421053e-05, "loss": 0.3028, "step": 42213 }, { "epoch": 2.3638705342143576, "grad_norm": 1.0368287563323975, "learning_rate": 9.415921052631579e-05, "loss": 0.3456, "step": 42214 }, { "epoch": 2.3639265315264866, "grad_norm": 1.2260124683380127, "learning_rate": 9.415894736842106e-05, "loss": 0.3136, "step": 42215 }, { "epoch": 2.3639825288386156, "grad_norm": 1.2879105806350708, "learning_rate": 9.415868421052632e-05, "loss": 0.3998, "step": 42216 }, { "epoch": 2.3640385261507446, "grad_norm": 1.2197303771972656, "learning_rate": 9.415842105263158e-05, "loss": 0.5165, "step": 42217 }, { "epoch": 2.3640945234628736, "grad_norm": 1.231621503829956, "learning_rate": 9.415815789473684e-05, "loss": 0.4964, "step": 42218 }, { "epoch": 2.3641505207750027, "grad_norm": 1.1322216987609863, "learning_rate": 9.415789473684211e-05, "loss": 0.3759, "step": 42219 }, { "epoch": 2.3642065180871317, "grad_norm": 1.1338282823562622, "learning_rate": 9.415763157894737e-05, "loss": 0.3694, "step": 42220 }, { "epoch": 2.3642625153992607, "grad_norm": 1.3787081241607666, "learning_rate": 9.415736842105264e-05, "loss": 0.4801, "step": 42221 }, { "epoch": 2.3643185127113897, "grad_norm": 1.359574794769287, "learning_rate": 9.415710526315789e-05, "loss": 0.4804, "step": 42222 }, { "epoch": 2.3643745100235187, "grad_norm": 1.0256779193878174, "learning_rate": 9.415684210526316e-05, "loss": 0.318, "step": 42223 }, { "epoch": 2.3644305073356477, "grad_norm": 1.412192702293396, "learning_rate": 9.415657894736842e-05, "loss": 0.3921, "step": 42224 }, { "epoch": 2.3644865046477768, "grad_norm": 1.0518834590911865, "learning_rate": 9.41563157894737e-05, "loss": 0.5457, "step": 42225 }, { "epoch": 2.364542501959906, "grad_norm": 1.1536314487457275, "learning_rate": 9.415605263157896e-05, "loss": 0.363, "step": 42226 }, { "epoch": 2.364598499272035, "grad_norm": 2.0204622745513916, "learning_rate": 9.415578947368422e-05, "loss": 0.5185, "step": 42227 }, { "epoch": 2.364654496584164, "grad_norm": 1.1709697246551514, "learning_rate": 9.415552631578948e-05, "loss": 0.392, "step": 42228 }, { "epoch": 2.364710493896293, "grad_norm": 1.1572654247283936, "learning_rate": 9.415526315789475e-05, "loss": 0.4597, "step": 42229 }, { "epoch": 2.364766491208422, "grad_norm": 1.1046416759490967, "learning_rate": 9.415500000000001e-05, "loss": 0.3784, "step": 42230 }, { "epoch": 2.364822488520551, "grad_norm": 1.4480878114700317, "learning_rate": 9.415473684210527e-05, "loss": 0.399, "step": 42231 }, { "epoch": 2.36487848583268, "grad_norm": 1.3057136535644531, "learning_rate": 9.415447368421053e-05, "loss": 0.5041, "step": 42232 }, { "epoch": 2.364934483144809, "grad_norm": 1.4253910779953003, "learning_rate": 9.415421052631579e-05, "loss": 0.5961, "step": 42233 }, { "epoch": 2.364990480456938, "grad_norm": 1.3882946968078613, "learning_rate": 9.415394736842106e-05, "loss": 0.5922, "step": 42234 }, { "epoch": 2.365046477769067, "grad_norm": 1.0331233739852905, "learning_rate": 9.415368421052632e-05, "loss": 0.3107, "step": 42235 }, { "epoch": 2.365102475081196, "grad_norm": 1.6579644680023193, "learning_rate": 9.415342105263158e-05, "loss": 0.4178, "step": 42236 }, { "epoch": 2.365158472393325, "grad_norm": 1.7633724212646484, "learning_rate": 9.415315789473684e-05, "loss": 0.4294, "step": 42237 }, { "epoch": 2.365214469705454, "grad_norm": 0.9841364622116089, "learning_rate": 9.415289473684211e-05, "loss": 0.3487, "step": 42238 }, { "epoch": 2.365270467017583, "grad_norm": 1.0259346961975098, "learning_rate": 9.415263157894737e-05, "loss": 0.332, "step": 42239 }, { "epoch": 2.365326464329712, "grad_norm": 1.0068045854568481, "learning_rate": 9.415236842105263e-05, "loss": 0.3138, "step": 42240 }, { "epoch": 2.365382461641841, "grad_norm": 1.7210932970046997, "learning_rate": 9.415210526315789e-05, "loss": 0.3268, "step": 42241 }, { "epoch": 2.36543845895397, "grad_norm": 1.2664185762405396, "learning_rate": 9.415184210526317e-05, "loss": 0.3914, "step": 42242 }, { "epoch": 2.365494456266099, "grad_norm": 1.252374529838562, "learning_rate": 9.415157894736843e-05, "loss": 0.4191, "step": 42243 }, { "epoch": 2.365550453578228, "grad_norm": 1.5683587789535522, "learning_rate": 9.41513157894737e-05, "loss": 0.4306, "step": 42244 }, { "epoch": 2.365606450890357, "grad_norm": 1.137418270111084, "learning_rate": 9.415105263157895e-05, "loss": 0.4835, "step": 42245 }, { "epoch": 2.365662448202486, "grad_norm": 1.2689480781555176, "learning_rate": 9.415078947368422e-05, "loss": 0.4379, "step": 42246 }, { "epoch": 2.365718445514615, "grad_norm": 1.120169758796692, "learning_rate": 9.415052631578948e-05, "loss": 0.344, "step": 42247 }, { "epoch": 2.365774442826744, "grad_norm": 1.237959861755371, "learning_rate": 9.415026315789474e-05, "loss": 0.3245, "step": 42248 }, { "epoch": 2.3658304401388732, "grad_norm": 1.3555324077606201, "learning_rate": 9.415e-05, "loss": 0.3726, "step": 42249 }, { "epoch": 2.3658864374510022, "grad_norm": 1.4046891927719116, "learning_rate": 9.414973684210526e-05, "loss": 0.5583, "step": 42250 }, { "epoch": 2.3659424347631313, "grad_norm": 1.1015177965164185, "learning_rate": 9.414947368421053e-05, "loss": 0.373, "step": 42251 }, { "epoch": 2.3659984320752603, "grad_norm": 1.1048450469970703, "learning_rate": 9.414921052631579e-05, "loss": 0.3414, "step": 42252 }, { "epoch": 2.3660544293873893, "grad_norm": 1.351989507675171, "learning_rate": 9.414894736842106e-05, "loss": 0.4545, "step": 42253 }, { "epoch": 2.3661104266995183, "grad_norm": 1.4576549530029297, "learning_rate": 9.414868421052631e-05, "loss": 0.6975, "step": 42254 }, { "epoch": 2.3661664240116473, "grad_norm": 1.3782958984375, "learning_rate": 9.414842105263158e-05, "loss": 0.4966, "step": 42255 }, { "epoch": 2.3662224213237764, "grad_norm": 1.2241895198822021, "learning_rate": 9.414815789473684e-05, "loss": 0.5219, "step": 42256 }, { "epoch": 2.3662784186359054, "grad_norm": 1.0890096426010132, "learning_rate": 9.414789473684212e-05, "loss": 0.3904, "step": 42257 }, { "epoch": 2.3663344159480344, "grad_norm": 1.2898129224777222, "learning_rate": 9.414763157894738e-05, "loss": 0.4234, "step": 42258 }, { "epoch": 2.3663904132601634, "grad_norm": 1.3337053060531616, "learning_rate": 9.414736842105264e-05, "loss": 0.5256, "step": 42259 }, { "epoch": 2.3664464105722924, "grad_norm": 1.0534831285476685, "learning_rate": 9.41471052631579e-05, "loss": 0.39, "step": 42260 }, { "epoch": 2.3665024078844215, "grad_norm": 1.1585514545440674, "learning_rate": 9.414684210526317e-05, "loss": 0.51, "step": 42261 }, { "epoch": 2.3665584051965505, "grad_norm": 1.321932077407837, "learning_rate": 9.414657894736843e-05, "loss": 0.5089, "step": 42262 }, { "epoch": 2.3666144025086795, "grad_norm": 1.4394193887710571, "learning_rate": 9.414631578947369e-05, "loss": 0.4224, "step": 42263 }, { "epoch": 2.3666703998208085, "grad_norm": 1.437076210975647, "learning_rate": 9.414605263157895e-05, "loss": 0.7174, "step": 42264 }, { "epoch": 2.3667263971329375, "grad_norm": 1.1430895328521729, "learning_rate": 9.414578947368422e-05, "loss": 0.4302, "step": 42265 }, { "epoch": 2.3667823944450666, "grad_norm": 1.5252368450164795, "learning_rate": 9.414552631578948e-05, "loss": 0.3615, "step": 42266 }, { "epoch": 2.3668383917571956, "grad_norm": 1.2454910278320312, "learning_rate": 9.414526315789474e-05, "loss": 0.3977, "step": 42267 }, { "epoch": 2.3668943890693246, "grad_norm": 1.3482586145401, "learning_rate": 9.4145e-05, "loss": 0.4887, "step": 42268 }, { "epoch": 2.3669503863814536, "grad_norm": 1.1646373271942139, "learning_rate": 9.414473684210526e-05, "loss": 0.3855, "step": 42269 }, { "epoch": 2.3670063836935826, "grad_norm": 1.4197959899902344, "learning_rate": 9.414447368421053e-05, "loss": 0.4413, "step": 42270 }, { "epoch": 2.3670623810057116, "grad_norm": 1.0230342149734497, "learning_rate": 9.41442105263158e-05, "loss": 0.365, "step": 42271 }, { "epoch": 2.3671183783178407, "grad_norm": 1.1333682537078857, "learning_rate": 9.414394736842105e-05, "loss": 0.385, "step": 42272 }, { "epoch": 2.3671743756299697, "grad_norm": 1.3012174367904663, "learning_rate": 9.414368421052631e-05, "loss": 0.4023, "step": 42273 }, { "epoch": 2.3672303729420987, "grad_norm": 1.4104245901107788, "learning_rate": 9.414342105263159e-05, "loss": 0.398, "step": 42274 }, { "epoch": 2.3672863702542277, "grad_norm": 1.1881835460662842, "learning_rate": 9.414315789473685e-05, "loss": 0.4203, "step": 42275 }, { "epoch": 2.3673423675663567, "grad_norm": 1.6257612705230713, "learning_rate": 9.414289473684212e-05, "loss": 0.5006, "step": 42276 }, { "epoch": 2.3673983648784858, "grad_norm": 1.0663199424743652, "learning_rate": 9.414263157894737e-05, "loss": 0.3501, "step": 42277 }, { "epoch": 2.367454362190615, "grad_norm": 1.2903213500976562, "learning_rate": 9.414236842105264e-05, "loss": 0.4218, "step": 42278 }, { "epoch": 2.367510359502744, "grad_norm": 1.1001280546188354, "learning_rate": 9.41421052631579e-05, "loss": 0.3916, "step": 42279 }, { "epoch": 2.367566356814873, "grad_norm": 1.3239824771881104, "learning_rate": 9.414184210526317e-05, "loss": 0.3983, "step": 42280 }, { "epoch": 2.367622354127002, "grad_norm": 1.1877532005310059, "learning_rate": 9.414157894736843e-05, "loss": 0.4268, "step": 42281 }, { "epoch": 2.367678351439131, "grad_norm": 1.2388200759887695, "learning_rate": 9.414131578947369e-05, "loss": 0.3759, "step": 42282 }, { "epoch": 2.36773434875126, "grad_norm": 1.0127414464950562, "learning_rate": 9.414105263157895e-05, "loss": 0.4168, "step": 42283 }, { "epoch": 2.367790346063389, "grad_norm": 1.015407681465149, "learning_rate": 9.414078947368421e-05, "loss": 0.3612, "step": 42284 }, { "epoch": 2.367846343375518, "grad_norm": 1.247032642364502, "learning_rate": 9.414052631578948e-05, "loss": 0.4044, "step": 42285 }, { "epoch": 2.367902340687647, "grad_norm": 1.3325552940368652, "learning_rate": 9.414026315789474e-05, "loss": 0.3992, "step": 42286 }, { "epoch": 2.367958337999776, "grad_norm": 1.2160544395446777, "learning_rate": 9.414e-05, "loss": 0.4365, "step": 42287 }, { "epoch": 2.368014335311905, "grad_norm": 1.3640742301940918, "learning_rate": 9.413973684210526e-05, "loss": 0.5011, "step": 42288 }, { "epoch": 2.368070332624034, "grad_norm": 1.4170483350753784, "learning_rate": 9.413947368421054e-05, "loss": 0.2931, "step": 42289 }, { "epoch": 2.368126329936163, "grad_norm": 1.3146421909332275, "learning_rate": 9.41392105263158e-05, "loss": 0.4019, "step": 42290 }, { "epoch": 2.368182327248292, "grad_norm": 1.4473234415054321, "learning_rate": 9.413894736842106e-05, "loss": 0.4005, "step": 42291 }, { "epoch": 2.368238324560421, "grad_norm": 1.0588699579238892, "learning_rate": 9.413868421052632e-05, "loss": 0.3635, "step": 42292 }, { "epoch": 2.36829432187255, "grad_norm": 1.165635585784912, "learning_rate": 9.413842105263159e-05, "loss": 0.3652, "step": 42293 }, { "epoch": 2.368350319184679, "grad_norm": 1.156206727027893, "learning_rate": 9.413815789473685e-05, "loss": 0.3808, "step": 42294 }, { "epoch": 2.368406316496808, "grad_norm": 1.1734799146652222, "learning_rate": 9.413789473684211e-05, "loss": 0.4195, "step": 42295 }, { "epoch": 2.368462313808937, "grad_norm": 1.3753827810287476, "learning_rate": 9.413763157894737e-05, "loss": 0.2852, "step": 42296 }, { "epoch": 2.368518311121066, "grad_norm": 1.1885277032852173, "learning_rate": 9.413736842105264e-05, "loss": 0.4409, "step": 42297 }, { "epoch": 2.368574308433195, "grad_norm": 1.0572854280471802, "learning_rate": 9.41371052631579e-05, "loss": 0.3468, "step": 42298 }, { "epoch": 2.368630305745324, "grad_norm": 1.1223461627960205, "learning_rate": 9.413684210526317e-05, "loss": 0.3509, "step": 42299 }, { "epoch": 2.368686303057453, "grad_norm": 1.2320494651794434, "learning_rate": 9.413657894736842e-05, "loss": 0.3432, "step": 42300 }, { "epoch": 2.368742300369582, "grad_norm": 1.1564735174179077, "learning_rate": 9.413631578947368e-05, "loss": 0.3376, "step": 42301 }, { "epoch": 2.3687982976817112, "grad_norm": 1.2554796934127808, "learning_rate": 9.413605263157895e-05, "loss": 0.4869, "step": 42302 }, { "epoch": 2.3688542949938403, "grad_norm": 1.1750661134719849, "learning_rate": 9.413578947368421e-05, "loss": 0.5216, "step": 42303 }, { "epoch": 2.3689102923059693, "grad_norm": 1.294796109199524, "learning_rate": 9.413552631578947e-05, "loss": 0.4229, "step": 42304 }, { "epoch": 2.3689662896180983, "grad_norm": 1.187984824180603, "learning_rate": 9.413526315789473e-05, "loss": 0.3875, "step": 42305 }, { "epoch": 2.3690222869302273, "grad_norm": 1.3266453742980957, "learning_rate": 9.4135e-05, "loss": 0.3631, "step": 42306 }, { "epoch": 2.3690782842423563, "grad_norm": 1.2450937032699585, "learning_rate": 9.413473684210527e-05, "loss": 0.3293, "step": 42307 }, { "epoch": 2.3691342815544854, "grad_norm": 1.4017856121063232, "learning_rate": 9.413447368421054e-05, "loss": 0.4375, "step": 42308 }, { "epoch": 2.3691902788666144, "grad_norm": 1.0897020101547241, "learning_rate": 9.413421052631579e-05, "loss": 0.3458, "step": 42309 }, { "epoch": 2.3692462761787434, "grad_norm": 1.0226852893829346, "learning_rate": 9.413394736842106e-05, "loss": 0.3255, "step": 42310 }, { "epoch": 2.3693022734908724, "grad_norm": 1.2814875841140747, "learning_rate": 9.413368421052632e-05, "loss": 0.5398, "step": 42311 }, { "epoch": 2.3693582708030014, "grad_norm": 1.5108939409255981, "learning_rate": 9.413342105263159e-05, "loss": 0.4587, "step": 42312 }, { "epoch": 2.3694142681151305, "grad_norm": 1.310226321220398, "learning_rate": 9.413315789473685e-05, "loss": 0.4365, "step": 42313 }, { "epoch": 2.3694702654272595, "grad_norm": 1.241117238998413, "learning_rate": 9.413289473684211e-05, "loss": 0.3804, "step": 42314 }, { "epoch": 2.3695262627393885, "grad_norm": 1.2082135677337646, "learning_rate": 9.413263157894737e-05, "loss": 0.3618, "step": 42315 }, { "epoch": 2.3695822600515175, "grad_norm": 0.9718478322029114, "learning_rate": 9.413236842105264e-05, "loss": 0.3222, "step": 42316 }, { "epoch": 2.3696382573636465, "grad_norm": 1.446513056755066, "learning_rate": 9.41321052631579e-05, "loss": 0.6055, "step": 42317 }, { "epoch": 2.3696942546757755, "grad_norm": 1.2139391899108887, "learning_rate": 9.413184210526316e-05, "loss": 0.4942, "step": 42318 }, { "epoch": 2.3697502519879046, "grad_norm": 2.036512613296509, "learning_rate": 9.413157894736842e-05, "loss": 0.5179, "step": 42319 }, { "epoch": 2.3698062493000336, "grad_norm": 1.020703911781311, "learning_rate": 9.413131578947368e-05, "loss": 0.4201, "step": 42320 }, { "epoch": 2.3698622466121626, "grad_norm": 1.8046185970306396, "learning_rate": 9.413105263157896e-05, "loss": 0.382, "step": 42321 }, { "epoch": 2.3699182439242916, "grad_norm": 1.198351263999939, "learning_rate": 9.413078947368422e-05, "loss": 0.398, "step": 42322 }, { "epoch": 2.3699742412364206, "grad_norm": 1.2732659578323364, "learning_rate": 9.413052631578948e-05, "loss": 0.471, "step": 42323 }, { "epoch": 2.3700302385485497, "grad_norm": 1.4621856212615967, "learning_rate": 9.413026315789474e-05, "loss": 0.4931, "step": 42324 }, { "epoch": 2.3700862358606787, "grad_norm": 1.0360468626022339, "learning_rate": 9.413000000000001e-05, "loss": 0.4257, "step": 42325 }, { "epoch": 2.3701422331728077, "grad_norm": 1.0498589277267456, "learning_rate": 9.412973684210527e-05, "loss": 0.3706, "step": 42326 }, { "epoch": 2.3701982304849367, "grad_norm": 1.2263721227645874, "learning_rate": 9.412947368421053e-05, "loss": 0.3882, "step": 42327 }, { "epoch": 2.3702542277970657, "grad_norm": 1.3455969095230103, "learning_rate": 9.412921052631579e-05, "loss": 0.4953, "step": 42328 }, { "epoch": 2.3703102251091948, "grad_norm": 1.1746537685394287, "learning_rate": 9.412894736842106e-05, "loss": 0.4029, "step": 42329 }, { "epoch": 2.3703662224213238, "grad_norm": 1.2489250898361206, "learning_rate": 9.412868421052632e-05, "loss": 0.429, "step": 42330 }, { "epoch": 2.370422219733453, "grad_norm": 1.1374889612197876, "learning_rate": 9.41284210526316e-05, "loss": 0.3791, "step": 42331 }, { "epoch": 2.370478217045582, "grad_norm": 1.2026957273483276, "learning_rate": 9.412815789473684e-05, "loss": 0.3491, "step": 42332 }, { "epoch": 2.370534214357711, "grad_norm": 1.35395085811615, "learning_rate": 9.412789473684211e-05, "loss": 0.5071, "step": 42333 }, { "epoch": 2.37059021166984, "grad_norm": 1.2361044883728027, "learning_rate": 9.412763157894737e-05, "loss": 0.5443, "step": 42334 }, { "epoch": 2.370646208981969, "grad_norm": 1.1043180227279663, "learning_rate": 9.412736842105263e-05, "loss": 0.3432, "step": 42335 }, { "epoch": 2.370702206294098, "grad_norm": 1.1496894359588623, "learning_rate": 9.41271052631579e-05, "loss": 0.4516, "step": 42336 }, { "epoch": 2.370758203606227, "grad_norm": 1.928362250328064, "learning_rate": 9.412684210526315e-05, "loss": 0.4151, "step": 42337 }, { "epoch": 2.370814200918356, "grad_norm": 1.3049509525299072, "learning_rate": 9.412657894736843e-05, "loss": 0.5352, "step": 42338 }, { "epoch": 2.370870198230485, "grad_norm": 1.2352666854858398, "learning_rate": 9.412631578947369e-05, "loss": 0.3984, "step": 42339 }, { "epoch": 2.370926195542614, "grad_norm": 1.3515156507492065, "learning_rate": 9.412605263157896e-05, "loss": 0.4786, "step": 42340 }, { "epoch": 2.370982192854743, "grad_norm": 1.0450198650360107, "learning_rate": 9.412578947368422e-05, "loss": 0.4316, "step": 42341 }, { "epoch": 2.371038190166872, "grad_norm": 1.3953994512557983, "learning_rate": 9.412552631578948e-05, "loss": 0.3837, "step": 42342 }, { "epoch": 2.371094187479001, "grad_norm": 1.0789560079574585, "learning_rate": 9.412526315789474e-05, "loss": 0.3729, "step": 42343 }, { "epoch": 2.37115018479113, "grad_norm": 1.348918080329895, "learning_rate": 9.412500000000001e-05, "loss": 0.3958, "step": 42344 }, { "epoch": 2.371206182103259, "grad_norm": 1.452298879623413, "learning_rate": 9.412473684210527e-05, "loss": 0.5401, "step": 42345 }, { "epoch": 2.371262179415388, "grad_norm": 1.172171950340271, "learning_rate": 9.412447368421053e-05, "loss": 0.3964, "step": 42346 }, { "epoch": 2.371318176727517, "grad_norm": 1.212975263595581, "learning_rate": 9.412421052631579e-05, "loss": 0.4021, "step": 42347 }, { "epoch": 2.371374174039646, "grad_norm": 1.376387119293213, "learning_rate": 9.412394736842106e-05, "loss": 0.5202, "step": 42348 }, { "epoch": 2.371430171351775, "grad_norm": 1.3071963787078857, "learning_rate": 9.412368421052632e-05, "loss": 0.3982, "step": 42349 }, { "epoch": 2.371486168663904, "grad_norm": 1.24335777759552, "learning_rate": 9.412342105263158e-05, "loss": 0.4069, "step": 42350 }, { "epoch": 2.371542165976033, "grad_norm": 1.262821912765503, "learning_rate": 9.412315789473684e-05, "loss": 0.5295, "step": 42351 }, { "epoch": 2.371598163288162, "grad_norm": 1.224733829498291, "learning_rate": 9.41228947368421e-05, "loss": 0.3901, "step": 42352 }, { "epoch": 2.371654160600291, "grad_norm": 1.1541788578033447, "learning_rate": 9.412263157894738e-05, "loss": 0.4325, "step": 42353 }, { "epoch": 2.3717101579124202, "grad_norm": 1.331683874130249, "learning_rate": 9.412236842105264e-05, "loss": 0.5881, "step": 42354 }, { "epoch": 2.3717661552245493, "grad_norm": 1.3279318809509277, "learning_rate": 9.41221052631579e-05, "loss": 0.4407, "step": 42355 }, { "epoch": 2.3718221525366783, "grad_norm": 1.3541243076324463, "learning_rate": 9.412184210526315e-05, "loss": 0.511, "step": 42356 }, { "epoch": 2.3718781498488073, "grad_norm": 1.2629435062408447, "learning_rate": 9.412157894736843e-05, "loss": 0.4459, "step": 42357 }, { "epoch": 2.3719341471609363, "grad_norm": 1.2103248834609985, "learning_rate": 9.412131578947369e-05, "loss": 0.4809, "step": 42358 }, { "epoch": 2.3719901444730653, "grad_norm": 1.0771657228469849, "learning_rate": 9.412105263157895e-05, "loss": 0.2962, "step": 42359 }, { "epoch": 2.3720461417851944, "grad_norm": 1.3111639022827148, "learning_rate": 9.412078947368421e-05, "loss": 0.396, "step": 42360 }, { "epoch": 2.3721021390973234, "grad_norm": 1.2420310974121094, "learning_rate": 9.412052631578948e-05, "loss": 0.2855, "step": 42361 }, { "epoch": 2.3721581364094524, "grad_norm": 1.4291789531707764, "learning_rate": 9.412026315789474e-05, "loss": 0.5448, "step": 42362 }, { "epoch": 2.3722141337215814, "grad_norm": 1.33905827999115, "learning_rate": 9.412000000000001e-05, "loss": 0.358, "step": 42363 }, { "epoch": 2.3722701310337104, "grad_norm": 1.0521758794784546, "learning_rate": 9.411973684210526e-05, "loss": 0.3166, "step": 42364 }, { "epoch": 2.3723261283458394, "grad_norm": 1.2340853214263916, "learning_rate": 9.411947368421053e-05, "loss": 0.5367, "step": 42365 }, { "epoch": 2.3723821256579685, "grad_norm": 1.116244912147522, "learning_rate": 9.411921052631579e-05, "loss": 0.2931, "step": 42366 }, { "epoch": 2.3724381229700975, "grad_norm": 1.0315062999725342, "learning_rate": 9.411894736842107e-05, "loss": 0.3077, "step": 42367 }, { "epoch": 2.3724941202822265, "grad_norm": 1.2503843307495117, "learning_rate": 9.411868421052633e-05, "loss": 0.4035, "step": 42368 }, { "epoch": 2.3725501175943555, "grad_norm": 1.4868385791778564, "learning_rate": 9.411842105263157e-05, "loss": 0.5422, "step": 42369 }, { "epoch": 2.3726061149064845, "grad_norm": 1.2327184677124023, "learning_rate": 9.411815789473685e-05, "loss": 0.4454, "step": 42370 }, { "epoch": 2.3726621122186136, "grad_norm": 1.0259407758712769, "learning_rate": 9.41178947368421e-05, "loss": 0.3556, "step": 42371 }, { "epoch": 2.3727181095307426, "grad_norm": 1.0937976837158203, "learning_rate": 9.411763157894738e-05, "loss": 0.3771, "step": 42372 }, { "epoch": 2.3727741068428716, "grad_norm": 1.10593843460083, "learning_rate": 9.411736842105264e-05, "loss": 0.3912, "step": 42373 }, { "epoch": 2.3728301041550006, "grad_norm": 1.1883816719055176, "learning_rate": 9.41171052631579e-05, "loss": 0.3656, "step": 42374 }, { "epoch": 2.3728861014671296, "grad_norm": 1.1932767629623413, "learning_rate": 9.411684210526316e-05, "loss": 0.3913, "step": 42375 }, { "epoch": 2.3729420987792587, "grad_norm": 1.00204336643219, "learning_rate": 9.411657894736843e-05, "loss": 0.3639, "step": 42376 }, { "epoch": 2.3729980960913877, "grad_norm": 1.1682087182998657, "learning_rate": 9.411631578947369e-05, "loss": 0.3734, "step": 42377 }, { "epoch": 2.3730540934035167, "grad_norm": 1.6911283731460571, "learning_rate": 9.411605263157895e-05, "loss": 0.4758, "step": 42378 }, { "epoch": 2.3731100907156457, "grad_norm": 1.0432487726211548, "learning_rate": 9.411578947368421e-05, "loss": 0.3029, "step": 42379 }, { "epoch": 2.3731660880277747, "grad_norm": 1.0524612665176392, "learning_rate": 9.411552631578948e-05, "loss": 0.3323, "step": 42380 }, { "epoch": 2.3732220853399038, "grad_norm": 0.9982818961143494, "learning_rate": 9.411526315789474e-05, "loss": 0.4652, "step": 42381 }, { "epoch": 2.3732780826520328, "grad_norm": 1.2826111316680908, "learning_rate": 9.4115e-05, "loss": 0.4497, "step": 42382 }, { "epoch": 2.373334079964162, "grad_norm": 1.3327100276947021, "learning_rate": 9.411473684210526e-05, "loss": 0.4338, "step": 42383 }, { "epoch": 2.373390077276291, "grad_norm": 1.1968905925750732, "learning_rate": 9.411447368421054e-05, "loss": 0.4065, "step": 42384 }, { "epoch": 2.37344607458842, "grad_norm": 1.0256004333496094, "learning_rate": 9.41142105263158e-05, "loss": 0.4193, "step": 42385 }, { "epoch": 2.373502071900549, "grad_norm": 1.3103188276290894, "learning_rate": 9.411394736842107e-05, "loss": 0.4892, "step": 42386 }, { "epoch": 2.373558069212678, "grad_norm": 1.2221312522888184, "learning_rate": 9.411368421052631e-05, "loss": 0.4555, "step": 42387 }, { "epoch": 2.373614066524807, "grad_norm": 1.1479402780532837, "learning_rate": 9.411342105263157e-05, "loss": 0.2965, "step": 42388 }, { "epoch": 2.373670063836936, "grad_norm": 1.3358964920043945, "learning_rate": 9.411315789473685e-05, "loss": 0.4131, "step": 42389 }, { "epoch": 2.373726061149065, "grad_norm": 1.0753743648529053, "learning_rate": 9.411289473684211e-05, "loss": 0.3822, "step": 42390 }, { "epoch": 2.373782058461194, "grad_norm": 1.2815124988555908, "learning_rate": 9.411263157894738e-05, "loss": 0.3596, "step": 42391 }, { "epoch": 2.373838055773323, "grad_norm": 1.650792121887207, "learning_rate": 9.411236842105263e-05, "loss": 0.3503, "step": 42392 }, { "epoch": 2.373894053085452, "grad_norm": 1.120585322380066, "learning_rate": 9.41121052631579e-05, "loss": 0.323, "step": 42393 }, { "epoch": 2.373950050397581, "grad_norm": 1.1304606199264526, "learning_rate": 9.411184210526316e-05, "loss": 0.4634, "step": 42394 }, { "epoch": 2.37400604770971, "grad_norm": 1.1531093120574951, "learning_rate": 9.411157894736843e-05, "loss": 0.47, "step": 42395 }, { "epoch": 2.374062045021839, "grad_norm": 1.1587066650390625, "learning_rate": 9.411131578947369e-05, "loss": 0.4124, "step": 42396 }, { "epoch": 2.374118042333968, "grad_norm": 1.1382609605789185, "learning_rate": 9.411105263157895e-05, "loss": 0.3733, "step": 42397 }, { "epoch": 2.374174039646097, "grad_norm": 1.3380318880081177, "learning_rate": 9.411078947368421e-05, "loss": 0.449, "step": 42398 }, { "epoch": 2.374230036958226, "grad_norm": 1.1523327827453613, "learning_rate": 9.411052631578949e-05, "loss": 0.5129, "step": 42399 }, { "epoch": 2.374286034270355, "grad_norm": 1.366464614868164, "learning_rate": 9.411026315789475e-05, "loss": 0.347, "step": 42400 }, { "epoch": 2.374342031582484, "grad_norm": 1.1939518451690674, "learning_rate": 9.411e-05, "loss": 0.3971, "step": 42401 }, { "epoch": 2.374398028894613, "grad_norm": 1.1485081911087036, "learning_rate": 9.410973684210527e-05, "loss": 0.4003, "step": 42402 }, { "epoch": 2.374454026206742, "grad_norm": 1.0558888912200928, "learning_rate": 9.410947368421054e-05, "loss": 0.2693, "step": 42403 }, { "epoch": 2.374510023518871, "grad_norm": 1.3160074949264526, "learning_rate": 9.41092105263158e-05, "loss": 0.373, "step": 42404 }, { "epoch": 2.374566020831, "grad_norm": 1.238418459892273, "learning_rate": 9.410894736842106e-05, "loss": 0.3897, "step": 42405 }, { "epoch": 2.3746220181431292, "grad_norm": 1.578658103942871, "learning_rate": 9.410868421052632e-05, "loss": 0.3431, "step": 42406 }, { "epoch": 2.3746780154552583, "grad_norm": 1.2434436082839966, "learning_rate": 9.410842105263158e-05, "loss": 0.5052, "step": 42407 }, { "epoch": 2.3747340127673873, "grad_norm": 1.179065465927124, "learning_rate": 9.410815789473685e-05, "loss": 0.3723, "step": 42408 }, { "epoch": 2.3747900100795163, "grad_norm": 1.3952171802520752, "learning_rate": 9.410789473684211e-05, "loss": 0.4517, "step": 42409 }, { "epoch": 2.3748460073916453, "grad_norm": 1.4435498714447021, "learning_rate": 9.410763157894737e-05, "loss": 0.3141, "step": 42410 }, { "epoch": 2.3749020047037743, "grad_norm": 1.4181057214736938, "learning_rate": 9.410736842105263e-05, "loss": 0.5501, "step": 42411 }, { "epoch": 2.3749580020159033, "grad_norm": 1.1330512762069702, "learning_rate": 9.41071052631579e-05, "loss": 0.398, "step": 42412 }, { "epoch": 2.3750139993280324, "grad_norm": 1.0308467149734497, "learning_rate": 9.410684210526316e-05, "loss": 0.338, "step": 42413 }, { "epoch": 2.3750699966401614, "grad_norm": 1.4484411478042603, "learning_rate": 9.410657894736842e-05, "loss": 0.5694, "step": 42414 }, { "epoch": 2.3751259939522904, "grad_norm": 1.166049838066101, "learning_rate": 9.410631578947368e-05, "loss": 0.429, "step": 42415 }, { "epoch": 2.3751819912644194, "grad_norm": 1.5028506517410278, "learning_rate": 9.410605263157896e-05, "loss": 0.5987, "step": 42416 }, { "epoch": 2.3752379885765484, "grad_norm": 1.2491739988327026, "learning_rate": 9.410578947368422e-05, "loss": 0.4342, "step": 42417 }, { "epoch": 2.3752939858886775, "grad_norm": 1.2192823886871338, "learning_rate": 9.410552631578949e-05, "loss": 0.3977, "step": 42418 }, { "epoch": 2.3753499832008065, "grad_norm": 1.2274869680404663, "learning_rate": 9.410526315789473e-05, "loss": 0.3943, "step": 42419 }, { "epoch": 2.3754059805129355, "grad_norm": 1.4463802576065063, "learning_rate": 9.410500000000001e-05, "loss": 0.3577, "step": 42420 }, { "epoch": 2.3754619778250645, "grad_norm": 0.9956678748130798, "learning_rate": 9.410473684210527e-05, "loss": 0.2971, "step": 42421 }, { "epoch": 2.3755179751371935, "grad_norm": 2.575307846069336, "learning_rate": 9.410447368421053e-05, "loss": 0.3574, "step": 42422 }, { "epoch": 2.3755739724493226, "grad_norm": 1.0277279615402222, "learning_rate": 9.41042105263158e-05, "loss": 0.3396, "step": 42423 }, { "epoch": 2.3756299697614516, "grad_norm": 1.3075816631317139, "learning_rate": 9.410394736842105e-05, "loss": 0.4444, "step": 42424 }, { "epoch": 2.3756859670735806, "grad_norm": 1.1444107294082642, "learning_rate": 9.410368421052632e-05, "loss": 0.3799, "step": 42425 }, { "epoch": 2.3757419643857096, "grad_norm": 1.200604796409607, "learning_rate": 9.410342105263158e-05, "loss": 0.3877, "step": 42426 }, { "epoch": 2.3757979616978386, "grad_norm": 1.075493335723877, "learning_rate": 9.410315789473685e-05, "loss": 0.3021, "step": 42427 }, { "epoch": 2.3758539590099677, "grad_norm": 1.1089084148406982, "learning_rate": 9.410289473684211e-05, "loss": 0.2997, "step": 42428 }, { "epoch": 2.3759099563220967, "grad_norm": 1.238358974456787, "learning_rate": 9.410263157894737e-05, "loss": 0.4752, "step": 42429 }, { "epoch": 2.3759659536342257, "grad_norm": 1.1755434274673462, "learning_rate": 9.410236842105263e-05, "loss": 0.4905, "step": 42430 }, { "epoch": 2.3760219509463547, "grad_norm": 2.001124382019043, "learning_rate": 9.41021052631579e-05, "loss": 0.5663, "step": 42431 }, { "epoch": 2.3760779482584837, "grad_norm": 1.191519856452942, "learning_rate": 9.410184210526317e-05, "loss": 0.4035, "step": 42432 }, { "epoch": 2.3761339455706127, "grad_norm": 1.160946249961853, "learning_rate": 9.410157894736842e-05, "loss": 0.3357, "step": 42433 }, { "epoch": 2.3761899428827418, "grad_norm": 1.127658724784851, "learning_rate": 9.410131578947368e-05, "loss": 0.3297, "step": 42434 }, { "epoch": 2.376245940194871, "grad_norm": 1.3732601404190063, "learning_rate": 9.410105263157896e-05, "loss": 0.5153, "step": 42435 }, { "epoch": 2.376301937507, "grad_norm": 1.3933708667755127, "learning_rate": 9.410078947368422e-05, "loss": 0.3478, "step": 42436 }, { "epoch": 2.376357934819129, "grad_norm": 1.1052151918411255, "learning_rate": 9.410052631578948e-05, "loss": 0.5795, "step": 42437 }, { "epoch": 2.376413932131258, "grad_norm": 1.2381658554077148, "learning_rate": 9.410026315789474e-05, "loss": 0.3963, "step": 42438 }, { "epoch": 2.376469929443387, "grad_norm": 1.3573307991027832, "learning_rate": 9.41e-05, "loss": 0.4522, "step": 42439 }, { "epoch": 2.376525926755516, "grad_norm": 1.3043878078460693, "learning_rate": 9.409973684210527e-05, "loss": 0.3714, "step": 42440 }, { "epoch": 2.376581924067645, "grad_norm": 1.2031733989715576, "learning_rate": 9.409947368421053e-05, "loss": 0.4421, "step": 42441 }, { "epoch": 2.376637921379774, "grad_norm": 1.2736499309539795, "learning_rate": 9.409921052631579e-05, "loss": 0.51, "step": 42442 }, { "epoch": 2.376693918691903, "grad_norm": 0.9469358325004578, "learning_rate": 9.409894736842105e-05, "loss": 0.259, "step": 42443 }, { "epoch": 2.376749916004032, "grad_norm": 1.1957001686096191, "learning_rate": 9.409868421052632e-05, "loss": 0.4244, "step": 42444 }, { "epoch": 2.376805913316161, "grad_norm": 1.5072258710861206, "learning_rate": 9.409842105263158e-05, "loss": 0.4276, "step": 42445 }, { "epoch": 2.37686191062829, "grad_norm": 1.1035648584365845, "learning_rate": 9.409815789473686e-05, "loss": 0.3905, "step": 42446 }, { "epoch": 2.376917907940419, "grad_norm": 1.0916134119033813, "learning_rate": 9.40978947368421e-05, "loss": 0.3051, "step": 42447 }, { "epoch": 2.376973905252548, "grad_norm": 1.3853813409805298, "learning_rate": 9.409763157894738e-05, "loss": 0.3954, "step": 42448 }, { "epoch": 2.377029902564677, "grad_norm": 1.0745974779129028, "learning_rate": 9.409736842105263e-05, "loss": 0.3295, "step": 42449 }, { "epoch": 2.377085899876806, "grad_norm": 1.5301626920700073, "learning_rate": 9.409710526315791e-05, "loss": 0.5941, "step": 42450 }, { "epoch": 2.377141897188935, "grad_norm": 1.2304660081863403, "learning_rate": 9.409684210526315e-05, "loss": 0.4205, "step": 42451 }, { "epoch": 2.377197894501064, "grad_norm": 1.218997597694397, "learning_rate": 9.409657894736843e-05, "loss": 0.341, "step": 42452 }, { "epoch": 2.377253891813193, "grad_norm": 3.525034189224243, "learning_rate": 9.409631578947369e-05, "loss": 0.4506, "step": 42453 }, { "epoch": 2.377309889125322, "grad_norm": 1.222209095954895, "learning_rate": 9.409605263157896e-05, "loss": 0.3776, "step": 42454 }, { "epoch": 2.377365886437451, "grad_norm": 1.5149116516113281, "learning_rate": 9.409578947368422e-05, "loss": 0.5059, "step": 42455 }, { "epoch": 2.37742188374958, "grad_norm": 1.2607446908950806, "learning_rate": 9.409552631578947e-05, "loss": 0.4005, "step": 42456 }, { "epoch": 2.377477881061709, "grad_norm": 1.5401476621627808, "learning_rate": 9.409526315789474e-05, "loss": 0.5519, "step": 42457 }, { "epoch": 2.3775338783738382, "grad_norm": 1.2779308557510376, "learning_rate": 9.4095e-05, "loss": 0.4528, "step": 42458 }, { "epoch": 2.3775898756859672, "grad_norm": 1.2017914056777954, "learning_rate": 9.409473684210527e-05, "loss": 0.3509, "step": 42459 }, { "epoch": 2.3776458729980963, "grad_norm": 1.1976125240325928, "learning_rate": 9.409447368421053e-05, "loss": 0.4927, "step": 42460 }, { "epoch": 2.3777018703102253, "grad_norm": 1.3936169147491455, "learning_rate": 9.409421052631579e-05, "loss": 0.4801, "step": 42461 }, { "epoch": 2.3777578676223543, "grad_norm": 1.4349616765975952, "learning_rate": 9.409394736842105e-05, "loss": 0.4154, "step": 42462 }, { "epoch": 2.3778138649344833, "grad_norm": 1.1959031820297241, "learning_rate": 9.409368421052633e-05, "loss": 0.4593, "step": 42463 }, { "epoch": 2.3778698622466123, "grad_norm": 1.3643070459365845, "learning_rate": 9.409342105263158e-05, "loss": 0.3677, "step": 42464 }, { "epoch": 2.3779258595587414, "grad_norm": 1.3468074798583984, "learning_rate": 9.409315789473684e-05, "loss": 0.3999, "step": 42465 }, { "epoch": 2.3779818568708704, "grad_norm": 1.089212417602539, "learning_rate": 9.40928947368421e-05, "loss": 0.366, "step": 42466 }, { "epoch": 2.3780378541829994, "grad_norm": 1.1597673892974854, "learning_rate": 9.409263157894738e-05, "loss": 0.3565, "step": 42467 }, { "epoch": 2.3780938514951284, "grad_norm": 1.2557344436645508, "learning_rate": 9.409236842105264e-05, "loss": 0.3627, "step": 42468 }, { "epoch": 2.3781498488072574, "grad_norm": 1.2739707231521606, "learning_rate": 9.40921052631579e-05, "loss": 0.4697, "step": 42469 }, { "epoch": 2.3782058461193865, "grad_norm": 1.112221360206604, "learning_rate": 9.409184210526316e-05, "loss": 0.3037, "step": 42470 }, { "epoch": 2.3782618434315155, "grad_norm": 1.2911196947097778, "learning_rate": 9.409157894736843e-05, "loss": 0.429, "step": 42471 }, { "epoch": 2.3783178407436445, "grad_norm": 1.1795971393585205, "learning_rate": 9.409131578947369e-05, "loss": 0.3821, "step": 42472 }, { "epoch": 2.3783738380557735, "grad_norm": 1.0450501441955566, "learning_rate": 9.409105263157895e-05, "loss": 0.383, "step": 42473 }, { "epoch": 2.3784298353679025, "grad_norm": 1.216058373451233, "learning_rate": 9.409078947368421e-05, "loss": 0.4603, "step": 42474 }, { "epoch": 2.378485832680031, "grad_norm": 1.1050595045089722, "learning_rate": 9.409052631578947e-05, "loss": 0.3484, "step": 42475 }, { "epoch": 2.3785418299921606, "grad_norm": 1.096500039100647, "learning_rate": 9.409026315789474e-05, "loss": 0.3869, "step": 42476 }, { "epoch": 2.378597827304289, "grad_norm": 1.244277000427246, "learning_rate": 9.409e-05, "loss": 0.4237, "step": 42477 }, { "epoch": 2.3786538246164186, "grad_norm": 1.6338012218475342, "learning_rate": 9.408973684210528e-05, "loss": 0.4734, "step": 42478 }, { "epoch": 2.378709821928547, "grad_norm": 1.1416494846343994, "learning_rate": 9.408947368421052e-05, "loss": 0.4312, "step": 42479 }, { "epoch": 2.3787658192406766, "grad_norm": 1.9732004404067993, "learning_rate": 9.40892105263158e-05, "loss": 0.3966, "step": 42480 }, { "epoch": 2.3788218165528052, "grad_norm": 1.327549695968628, "learning_rate": 9.408894736842105e-05, "loss": 0.3474, "step": 42481 }, { "epoch": 2.3788778138649347, "grad_norm": 1.1186048984527588, "learning_rate": 9.408868421052633e-05, "loss": 0.3206, "step": 42482 }, { "epoch": 2.3789338111770633, "grad_norm": 1.1910033226013184, "learning_rate": 9.408842105263159e-05, "loss": 0.4344, "step": 42483 }, { "epoch": 2.3789898084891927, "grad_norm": 1.308523416519165, "learning_rate": 9.408815789473685e-05, "loss": 0.514, "step": 42484 }, { "epoch": 2.3790458058013213, "grad_norm": 1.563263177871704, "learning_rate": 9.408789473684211e-05, "loss": 0.5107, "step": 42485 }, { "epoch": 2.3791018031134508, "grad_norm": 1.1659225225448608, "learning_rate": 9.408763157894738e-05, "loss": 0.3893, "step": 42486 }, { "epoch": 2.3791578004255793, "grad_norm": 1.2152005434036255, "learning_rate": 9.408736842105264e-05, "loss": 0.4992, "step": 42487 }, { "epoch": 2.379213797737709, "grad_norm": 1.2288497686386108, "learning_rate": 9.40871052631579e-05, "loss": 0.5169, "step": 42488 }, { "epoch": 2.3792697950498374, "grad_norm": 1.1491831541061401, "learning_rate": 9.408684210526316e-05, "loss": 0.3503, "step": 42489 }, { "epoch": 2.379325792361967, "grad_norm": 2.056318521499634, "learning_rate": 9.408657894736842e-05, "loss": 0.4091, "step": 42490 }, { "epoch": 2.3793817896740954, "grad_norm": 1.4687947034835815, "learning_rate": 9.408631578947369e-05, "loss": 0.5416, "step": 42491 }, { "epoch": 2.379437786986225, "grad_norm": 1.312280535697937, "learning_rate": 9.408605263157895e-05, "loss": 0.4215, "step": 42492 }, { "epoch": 2.3794937842983535, "grad_norm": 1.140360713005066, "learning_rate": 9.408578947368421e-05, "loss": 0.3485, "step": 42493 }, { "epoch": 2.379549781610483, "grad_norm": 1.2055162191390991, "learning_rate": 9.408552631578947e-05, "loss": 0.3483, "step": 42494 }, { "epoch": 2.3796057789226115, "grad_norm": 1.2068771123886108, "learning_rate": 9.408526315789474e-05, "loss": 0.3799, "step": 42495 }, { "epoch": 2.379661776234741, "grad_norm": 1.384659767150879, "learning_rate": 9.4085e-05, "loss": 0.5501, "step": 42496 }, { "epoch": 2.3797177735468695, "grad_norm": 1.3155443668365479, "learning_rate": 9.408473684210526e-05, "loss": 0.3832, "step": 42497 }, { "epoch": 2.379773770858999, "grad_norm": 1.3432371616363525, "learning_rate": 9.408447368421052e-05, "loss": 0.3812, "step": 42498 }, { "epoch": 2.3798297681711276, "grad_norm": 0.9414938688278198, "learning_rate": 9.40842105263158e-05, "loss": 0.3228, "step": 42499 }, { "epoch": 2.379885765483257, "grad_norm": 1.392841100692749, "learning_rate": 9.408394736842106e-05, "loss": 0.4655, "step": 42500 }, { "epoch": 2.3799417627953856, "grad_norm": 1.1543760299682617, "learning_rate": 9.408368421052633e-05, "loss": 0.3398, "step": 42501 }, { "epoch": 2.379997760107515, "grad_norm": 1.2505989074707031, "learning_rate": 9.408342105263158e-05, "loss": 0.3968, "step": 42502 }, { "epoch": 2.3800537574196436, "grad_norm": 1.1237807273864746, "learning_rate": 9.408315789473685e-05, "loss": 0.3717, "step": 42503 }, { "epoch": 2.380109754731773, "grad_norm": 1.085248589515686, "learning_rate": 9.408289473684211e-05, "loss": 0.4845, "step": 42504 }, { "epoch": 2.3801657520439017, "grad_norm": 1.2724963426589966, "learning_rate": 9.408263157894738e-05, "loss": 0.4373, "step": 42505 }, { "epoch": 2.380221749356031, "grad_norm": 1.1509380340576172, "learning_rate": 9.408236842105263e-05, "loss": 0.463, "step": 42506 }, { "epoch": 2.3802777466681597, "grad_norm": 1.275742530822754, "learning_rate": 9.40821052631579e-05, "loss": 0.5805, "step": 42507 }, { "epoch": 2.380333743980289, "grad_norm": 1.1412025690078735, "learning_rate": 9.408184210526316e-05, "loss": 0.3379, "step": 42508 }, { "epoch": 2.3803897412924178, "grad_norm": 1.1979143619537354, "learning_rate": 9.408157894736842e-05, "loss": 0.3587, "step": 42509 }, { "epoch": 2.3804457386045472, "grad_norm": 1.125076413154602, "learning_rate": 9.40813157894737e-05, "loss": 0.4127, "step": 42510 }, { "epoch": 2.380501735916676, "grad_norm": 1.0476491451263428, "learning_rate": 9.408105263157894e-05, "loss": 0.3689, "step": 42511 }, { "epoch": 2.3805577332288053, "grad_norm": 1.202276349067688, "learning_rate": 9.408078947368421e-05, "loss": 0.3619, "step": 42512 }, { "epoch": 2.380613730540934, "grad_norm": 1.092563509941101, "learning_rate": 9.408052631578947e-05, "loss": 0.5527, "step": 42513 }, { "epoch": 2.3806697278530633, "grad_norm": 1.2603657245635986, "learning_rate": 9.408026315789475e-05, "loss": 0.5164, "step": 42514 }, { "epoch": 2.380725725165192, "grad_norm": 1.2806247472763062, "learning_rate": 9.408000000000001e-05, "loss": 0.5413, "step": 42515 }, { "epoch": 2.3807817224773213, "grad_norm": 1.360559344291687, "learning_rate": 9.407973684210527e-05, "loss": 0.5411, "step": 42516 }, { "epoch": 2.38083771978945, "grad_norm": 1.0450483560562134, "learning_rate": 9.407947368421053e-05, "loss": 0.4087, "step": 42517 }, { "epoch": 2.380893717101579, "grad_norm": 1.2660740613937378, "learning_rate": 9.40792105263158e-05, "loss": 0.5154, "step": 42518 }, { "epoch": 2.380949714413708, "grad_norm": 1.2018334865570068, "learning_rate": 9.407894736842106e-05, "loss": 0.3713, "step": 42519 }, { "epoch": 2.381005711725837, "grad_norm": 1.1157623529434204, "learning_rate": 9.407868421052632e-05, "loss": 0.3616, "step": 42520 }, { "epoch": 2.381061709037966, "grad_norm": 1.9918997287750244, "learning_rate": 9.407842105263158e-05, "loss": 0.3589, "step": 42521 }, { "epoch": 2.381117706350095, "grad_norm": 1.1965969800949097, "learning_rate": 9.407815789473685e-05, "loss": 0.4041, "step": 42522 }, { "epoch": 2.381173703662224, "grad_norm": 1.4048922061920166, "learning_rate": 9.407789473684211e-05, "loss": 0.3891, "step": 42523 }, { "epoch": 2.381229700974353, "grad_norm": 1.3953217267990112, "learning_rate": 9.407763157894737e-05, "loss": 0.3885, "step": 42524 }, { "epoch": 2.381285698286482, "grad_norm": 1.2107235193252563, "learning_rate": 9.407736842105263e-05, "loss": 0.3978, "step": 42525 }, { "epoch": 2.381341695598611, "grad_norm": 1.2201377153396606, "learning_rate": 9.407710526315789e-05, "loss": 0.5128, "step": 42526 }, { "epoch": 2.38139769291074, "grad_norm": 1.235162615776062, "learning_rate": 9.407684210526316e-05, "loss": 0.4138, "step": 42527 }, { "epoch": 2.381453690222869, "grad_norm": 1.0170131921768188, "learning_rate": 9.407657894736842e-05, "loss": 0.4483, "step": 42528 }, { "epoch": 2.381509687534998, "grad_norm": 1.326956868171692, "learning_rate": 9.407631578947368e-05, "loss": 0.4156, "step": 42529 }, { "epoch": 2.381565684847127, "grad_norm": 1.121862530708313, "learning_rate": 9.407605263157894e-05, "loss": 0.4415, "step": 42530 }, { "epoch": 2.381621682159256, "grad_norm": 1.4975086450576782, "learning_rate": 9.407578947368422e-05, "loss": 0.3848, "step": 42531 }, { "epoch": 2.381677679471385, "grad_norm": 1.1271705627441406, "learning_rate": 9.407552631578948e-05, "loss": 0.5332, "step": 42532 }, { "epoch": 2.381733676783514, "grad_norm": 1.3706332445144653, "learning_rate": 9.407526315789475e-05, "loss": 0.3456, "step": 42533 }, { "epoch": 2.3817896740956432, "grad_norm": 1.7513446807861328, "learning_rate": 9.4075e-05, "loss": 0.4118, "step": 42534 }, { "epoch": 2.3818456714077723, "grad_norm": 1.1036343574523926, "learning_rate": 9.407473684210527e-05, "loss": 0.4785, "step": 42535 }, { "epoch": 2.3819016687199013, "grad_norm": 1.2073208093643188, "learning_rate": 9.407447368421053e-05, "loss": 0.4064, "step": 42536 }, { "epoch": 2.3819576660320303, "grad_norm": 1.003194808959961, "learning_rate": 9.40742105263158e-05, "loss": 0.3475, "step": 42537 }, { "epoch": 2.3820136633441593, "grad_norm": 1.0659860372543335, "learning_rate": 9.407394736842106e-05, "loss": 0.3201, "step": 42538 }, { "epoch": 2.3820696606562883, "grad_norm": 1.2434395551681519, "learning_rate": 9.407368421052632e-05, "loss": 0.3965, "step": 42539 }, { "epoch": 2.3821256579684174, "grad_norm": 2.6625494956970215, "learning_rate": 9.407342105263158e-05, "loss": 0.4299, "step": 42540 }, { "epoch": 2.3821816552805464, "grad_norm": 1.6783849000930786, "learning_rate": 9.407315789473686e-05, "loss": 0.3971, "step": 42541 }, { "epoch": 2.3822376525926754, "grad_norm": 1.285970687866211, "learning_rate": 9.407289473684211e-05, "loss": 0.4381, "step": 42542 }, { "epoch": 2.3822936499048044, "grad_norm": 1.2205157279968262, "learning_rate": 9.407263157894737e-05, "loss": 0.4058, "step": 42543 }, { "epoch": 2.3823496472169334, "grad_norm": 1.2811921834945679, "learning_rate": 9.407236842105263e-05, "loss": 0.393, "step": 42544 }, { "epoch": 2.3824056445290624, "grad_norm": 1.2970865964889526, "learning_rate": 9.40721052631579e-05, "loss": 0.6065, "step": 42545 }, { "epoch": 2.3824616418411915, "grad_norm": 1.2408443689346313, "learning_rate": 9.407184210526317e-05, "loss": 0.3807, "step": 42546 }, { "epoch": 2.3825176391533205, "grad_norm": 1.3409698009490967, "learning_rate": 9.407157894736843e-05, "loss": 0.4483, "step": 42547 }, { "epoch": 2.3825736364654495, "grad_norm": 1.4250755310058594, "learning_rate": 9.407131578947369e-05, "loss": 0.5103, "step": 42548 }, { "epoch": 2.3826296337775785, "grad_norm": 1.2405428886413574, "learning_rate": 9.407105263157895e-05, "loss": 0.3803, "step": 42549 }, { "epoch": 2.3826856310897075, "grad_norm": 1.17203688621521, "learning_rate": 9.407078947368422e-05, "loss": 0.393, "step": 42550 }, { "epoch": 2.3827416284018366, "grad_norm": 1.2503646612167358, "learning_rate": 9.407052631578948e-05, "loss": 0.4466, "step": 42551 }, { "epoch": 2.3827976257139656, "grad_norm": 3.6451518535614014, "learning_rate": 9.407026315789474e-05, "loss": 0.3539, "step": 42552 }, { "epoch": 2.3828536230260946, "grad_norm": 1.1758249998092651, "learning_rate": 9.407e-05, "loss": 0.384, "step": 42553 }, { "epoch": 2.3829096203382236, "grad_norm": 1.2939223051071167, "learning_rate": 9.406973684210527e-05, "loss": 0.4589, "step": 42554 }, { "epoch": 2.3829656176503526, "grad_norm": 1.0699198246002197, "learning_rate": 9.406947368421053e-05, "loss": 0.3628, "step": 42555 }, { "epoch": 2.3830216149624817, "grad_norm": 1.463165283203125, "learning_rate": 9.40692105263158e-05, "loss": 0.4143, "step": 42556 }, { "epoch": 2.3830776122746107, "grad_norm": 1.2300323247909546, "learning_rate": 9.406894736842105e-05, "loss": 0.3432, "step": 42557 }, { "epoch": 2.3831336095867397, "grad_norm": 1.3511840105056763, "learning_rate": 9.406868421052632e-05, "loss": 0.3803, "step": 42558 }, { "epoch": 2.3831896068988687, "grad_norm": 1.0859596729278564, "learning_rate": 9.406842105263158e-05, "loss": 0.3732, "step": 42559 }, { "epoch": 2.3832456042109977, "grad_norm": 1.1830259561538696, "learning_rate": 9.406815789473684e-05, "loss": 0.4137, "step": 42560 }, { "epoch": 2.3833016015231268, "grad_norm": 1.1586086750030518, "learning_rate": 9.40678947368421e-05, "loss": 0.3749, "step": 42561 }, { "epoch": 2.3833575988352558, "grad_norm": 1.0648373365402222, "learning_rate": 9.406763157894736e-05, "loss": 0.4581, "step": 42562 }, { "epoch": 2.383413596147385, "grad_norm": 1.062685489654541, "learning_rate": 9.406736842105264e-05, "loss": 0.3683, "step": 42563 }, { "epoch": 2.383469593459514, "grad_norm": 1.1497598886489868, "learning_rate": 9.40671052631579e-05, "loss": 0.3664, "step": 42564 }, { "epoch": 2.383525590771643, "grad_norm": 1.1968621015548706, "learning_rate": 9.406684210526317e-05, "loss": 0.3181, "step": 42565 }, { "epoch": 2.383581588083772, "grad_norm": 1.191324234008789, "learning_rate": 9.406657894736842e-05, "loss": 0.4757, "step": 42566 }, { "epoch": 2.383637585395901, "grad_norm": 1.2028124332427979, "learning_rate": 9.406631578947369e-05, "loss": 0.3237, "step": 42567 }, { "epoch": 2.38369358270803, "grad_norm": 1.3055126667022705, "learning_rate": 9.406605263157895e-05, "loss": 0.4884, "step": 42568 }, { "epoch": 2.383749580020159, "grad_norm": 1.5982452630996704, "learning_rate": 9.406578947368422e-05, "loss": 0.489, "step": 42569 }, { "epoch": 2.383805577332288, "grad_norm": 0.9566874504089355, "learning_rate": 9.406552631578948e-05, "loss": 0.33, "step": 42570 }, { "epoch": 2.383861574644417, "grad_norm": 1.3176177740097046, "learning_rate": 9.406526315789474e-05, "loss": 0.3599, "step": 42571 }, { "epoch": 2.383917571956546, "grad_norm": 0.9685880541801453, "learning_rate": 9.4065e-05, "loss": 0.3114, "step": 42572 }, { "epoch": 2.383973569268675, "grad_norm": 1.288273572921753, "learning_rate": 9.406473684210527e-05, "loss": 0.4814, "step": 42573 }, { "epoch": 2.384029566580804, "grad_norm": 1.2276841402053833, "learning_rate": 9.406447368421053e-05, "loss": 0.4001, "step": 42574 }, { "epoch": 2.384085563892933, "grad_norm": 1.0506620407104492, "learning_rate": 9.40642105263158e-05, "loss": 0.3761, "step": 42575 }, { "epoch": 2.384141561205062, "grad_norm": 1.33298921585083, "learning_rate": 9.406394736842105e-05, "loss": 0.4378, "step": 42576 }, { "epoch": 2.384197558517191, "grad_norm": 1.2699986696243286, "learning_rate": 9.406368421052631e-05, "loss": 0.3371, "step": 42577 }, { "epoch": 2.38425355582932, "grad_norm": 1.2369009256362915, "learning_rate": 9.406342105263159e-05, "loss": 0.3554, "step": 42578 }, { "epoch": 2.384309553141449, "grad_norm": 1.1824365854263306, "learning_rate": 9.406315789473685e-05, "loss": 0.4967, "step": 42579 }, { "epoch": 2.384365550453578, "grad_norm": 1.4971169233322144, "learning_rate": 9.40628947368421e-05, "loss": 0.3747, "step": 42580 }, { "epoch": 2.384421547765707, "grad_norm": 1.3762418031692505, "learning_rate": 9.406263157894737e-05, "loss": 0.4584, "step": 42581 }, { "epoch": 2.384477545077836, "grad_norm": 1.2758166790008545, "learning_rate": 9.406236842105264e-05, "loss": 0.3064, "step": 42582 }, { "epoch": 2.384533542389965, "grad_norm": 1.3122650384902954, "learning_rate": 9.40621052631579e-05, "loss": 0.397, "step": 42583 }, { "epoch": 2.384589539702094, "grad_norm": 1.0488005876541138, "learning_rate": 9.406184210526316e-05, "loss": 0.3866, "step": 42584 }, { "epoch": 2.384645537014223, "grad_norm": 1.020864486694336, "learning_rate": 9.406157894736842e-05, "loss": 0.2957, "step": 42585 }, { "epoch": 2.3847015343263522, "grad_norm": 1.4510560035705566, "learning_rate": 9.406131578947369e-05, "loss": 0.4678, "step": 42586 }, { "epoch": 2.3847575316384813, "grad_norm": 1.2683963775634766, "learning_rate": 9.406105263157895e-05, "loss": 0.452, "step": 42587 }, { "epoch": 2.3848135289506103, "grad_norm": 1.1072547435760498, "learning_rate": 9.406078947368422e-05, "loss": 0.3309, "step": 42588 }, { "epoch": 2.3848695262627393, "grad_norm": 1.121264934539795, "learning_rate": 9.406052631578947e-05, "loss": 0.5253, "step": 42589 }, { "epoch": 2.3849255235748683, "grad_norm": 1.1909416913986206, "learning_rate": 9.406026315789474e-05, "loss": 0.4868, "step": 42590 }, { "epoch": 2.3849815208869973, "grad_norm": 1.237779974937439, "learning_rate": 9.406e-05, "loss": 0.3394, "step": 42591 }, { "epoch": 2.3850375181991263, "grad_norm": 1.1321485042572021, "learning_rate": 9.405973684210528e-05, "loss": 0.4951, "step": 42592 }, { "epoch": 2.3850935155112554, "grad_norm": 1.1843172311782837, "learning_rate": 9.405947368421054e-05, "loss": 0.3951, "step": 42593 }, { "epoch": 2.3851495128233844, "grad_norm": 1.0003044605255127, "learning_rate": 9.405921052631578e-05, "loss": 0.3542, "step": 42594 }, { "epoch": 2.3852055101355134, "grad_norm": 15.324576377868652, "learning_rate": 9.405894736842106e-05, "loss": 0.27, "step": 42595 }, { "epoch": 2.3852615074476424, "grad_norm": 1.124688982963562, "learning_rate": 9.405868421052632e-05, "loss": 0.3357, "step": 42596 }, { "epoch": 2.3853175047597714, "grad_norm": 0.9334547519683838, "learning_rate": 9.405842105263159e-05, "loss": 0.2975, "step": 42597 }, { "epoch": 2.3853735020719005, "grad_norm": 1.2480884790420532, "learning_rate": 9.405815789473684e-05, "loss": 0.4539, "step": 42598 }, { "epoch": 2.3854294993840295, "grad_norm": 5.411787986755371, "learning_rate": 9.405789473684211e-05, "loss": 0.5638, "step": 42599 }, { "epoch": 2.3854854966961585, "grad_norm": 1.331764578819275, "learning_rate": 9.405763157894737e-05, "loss": 0.4644, "step": 42600 }, { "epoch": 2.3855414940082875, "grad_norm": 1.3296133279800415, "learning_rate": 9.405736842105264e-05, "loss": 0.6881, "step": 42601 }, { "epoch": 2.3855974913204165, "grad_norm": 1.2573027610778809, "learning_rate": 9.40571052631579e-05, "loss": 0.6897, "step": 42602 }, { "epoch": 2.3856534886325456, "grad_norm": 1.117430329322815, "learning_rate": 9.405684210526316e-05, "loss": 0.3679, "step": 42603 }, { "epoch": 2.3857094859446746, "grad_norm": 1.3276596069335938, "learning_rate": 9.405657894736842e-05, "loss": 0.5267, "step": 42604 }, { "epoch": 2.3857654832568036, "grad_norm": 1.1627373695373535, "learning_rate": 9.40563157894737e-05, "loss": 0.3476, "step": 42605 }, { "epoch": 2.3858214805689326, "grad_norm": 1.2422236204147339, "learning_rate": 9.405605263157895e-05, "loss": 0.4312, "step": 42606 }, { "epoch": 2.3858774778810616, "grad_norm": 1.2037944793701172, "learning_rate": 9.405578947368421e-05, "loss": 0.4309, "step": 42607 }, { "epoch": 2.3859334751931907, "grad_norm": 1.30731999874115, "learning_rate": 9.405552631578947e-05, "loss": 0.5137, "step": 42608 }, { "epoch": 2.3859894725053197, "grad_norm": 1.1168564558029175, "learning_rate": 9.405526315789475e-05, "loss": 0.4747, "step": 42609 }, { "epoch": 2.3860454698174487, "grad_norm": 1.1194229125976562, "learning_rate": 9.4055e-05, "loss": 0.3362, "step": 42610 }, { "epoch": 2.3861014671295777, "grad_norm": 1.008150339126587, "learning_rate": 9.405473684210527e-05, "loss": 0.5189, "step": 42611 }, { "epoch": 2.3861574644417067, "grad_norm": 1.055415153503418, "learning_rate": 9.405447368421053e-05, "loss": 0.3264, "step": 42612 }, { "epoch": 2.3862134617538358, "grad_norm": 1.0336294174194336, "learning_rate": 9.405421052631579e-05, "loss": 0.4551, "step": 42613 }, { "epoch": 2.3862694590659648, "grad_norm": 1.0394479036331177, "learning_rate": 9.405394736842106e-05, "loss": 0.3806, "step": 42614 }, { "epoch": 2.386325456378094, "grad_norm": 1.166113018989563, "learning_rate": 9.405368421052632e-05, "loss": 0.4929, "step": 42615 }, { "epoch": 2.386381453690223, "grad_norm": 4.127678394317627, "learning_rate": 9.405342105263158e-05, "loss": 0.4001, "step": 42616 }, { "epoch": 2.386437451002352, "grad_norm": 1.1513208150863647, "learning_rate": 9.405315789473684e-05, "loss": 0.3832, "step": 42617 }, { "epoch": 2.386493448314481, "grad_norm": 1.2024736404418945, "learning_rate": 9.405289473684211e-05, "loss": 0.3868, "step": 42618 }, { "epoch": 2.38654944562661, "grad_norm": 1.1587923765182495, "learning_rate": 9.405263157894737e-05, "loss": 0.4126, "step": 42619 }, { "epoch": 2.386605442938739, "grad_norm": 1.6466926336288452, "learning_rate": 9.405236842105264e-05, "loss": 0.5869, "step": 42620 }, { "epoch": 2.386661440250868, "grad_norm": 1.379857063293457, "learning_rate": 9.405210526315789e-05, "loss": 0.3721, "step": 42621 }, { "epoch": 2.386717437562997, "grad_norm": 1.1543580293655396, "learning_rate": 9.405184210526316e-05, "loss": 0.3289, "step": 42622 }, { "epoch": 2.386773434875126, "grad_norm": 0.9915464520454407, "learning_rate": 9.405157894736842e-05, "loss": 0.3488, "step": 42623 }, { "epoch": 2.386829432187255, "grad_norm": 1.0548279285430908, "learning_rate": 9.40513157894737e-05, "loss": 0.3626, "step": 42624 }, { "epoch": 2.386885429499384, "grad_norm": 1.111660122871399, "learning_rate": 9.405105263157896e-05, "loss": 0.3327, "step": 42625 }, { "epoch": 2.386941426811513, "grad_norm": 1.109354853630066, "learning_rate": 9.405078947368422e-05, "loss": 0.3759, "step": 42626 }, { "epoch": 2.386997424123642, "grad_norm": 1.059809923171997, "learning_rate": 9.405052631578948e-05, "loss": 0.3487, "step": 42627 }, { "epoch": 2.387053421435771, "grad_norm": 1.2506803274154663, "learning_rate": 9.405026315789475e-05, "loss": 0.3445, "step": 42628 }, { "epoch": 2.3871094187479, "grad_norm": 0.9957774877548218, "learning_rate": 9.405000000000001e-05, "loss": 0.3543, "step": 42629 }, { "epoch": 2.387165416060029, "grad_norm": 1.4011187553405762, "learning_rate": 9.404973684210527e-05, "loss": 0.3819, "step": 42630 }, { "epoch": 2.387221413372158, "grad_norm": 1.187614917755127, "learning_rate": 9.404947368421053e-05, "loss": 0.3956, "step": 42631 }, { "epoch": 2.387277410684287, "grad_norm": 1.0884028673171997, "learning_rate": 9.404921052631579e-05, "loss": 0.4755, "step": 42632 }, { "epoch": 2.387333407996416, "grad_norm": 1.2023686170578003, "learning_rate": 9.404894736842106e-05, "loss": 0.4647, "step": 42633 }, { "epoch": 2.387389405308545, "grad_norm": 1.1974496841430664, "learning_rate": 9.404868421052632e-05, "loss": 0.3666, "step": 42634 }, { "epoch": 2.387445402620674, "grad_norm": 1.119666337966919, "learning_rate": 9.404842105263158e-05, "loss": 0.357, "step": 42635 }, { "epoch": 2.387501399932803, "grad_norm": 1.0321756601333618, "learning_rate": 9.404815789473684e-05, "loss": 0.3378, "step": 42636 }, { "epoch": 2.387557397244932, "grad_norm": 1.3267462253570557, "learning_rate": 9.404789473684211e-05, "loss": 0.3869, "step": 42637 }, { "epoch": 2.3876133945570612, "grad_norm": 1.1031581163406372, "learning_rate": 9.404763157894737e-05, "loss": 0.4331, "step": 42638 }, { "epoch": 2.3876693918691902, "grad_norm": 1.022200345993042, "learning_rate": 9.404736842105263e-05, "loss": 0.3178, "step": 42639 }, { "epoch": 2.3877253891813193, "grad_norm": 1.3006731271743774, "learning_rate": 9.40471052631579e-05, "loss": 0.3602, "step": 42640 }, { "epoch": 2.3877813864934483, "grad_norm": 1.0300768613815308, "learning_rate": 9.404684210526317e-05, "loss": 0.3414, "step": 42641 }, { "epoch": 2.3878373838055773, "grad_norm": 1.1395182609558105, "learning_rate": 9.404657894736843e-05, "loss": 0.3352, "step": 42642 }, { "epoch": 2.3878933811177063, "grad_norm": 1.2421512603759766, "learning_rate": 9.40463157894737e-05, "loss": 0.3343, "step": 42643 }, { "epoch": 2.3879493784298353, "grad_norm": 1.195260763168335, "learning_rate": 9.404605263157895e-05, "loss": 0.3609, "step": 42644 }, { "epoch": 2.3880053757419644, "grad_norm": 1.1531985998153687, "learning_rate": 9.404578947368422e-05, "loss": 0.3348, "step": 42645 }, { "epoch": 2.3880613730540934, "grad_norm": 1.0802340507507324, "learning_rate": 9.404552631578948e-05, "loss": 0.3581, "step": 42646 }, { "epoch": 2.3881173703662224, "grad_norm": 1.3586745262145996, "learning_rate": 9.404526315789474e-05, "loss": 0.3773, "step": 42647 }, { "epoch": 2.3881733676783514, "grad_norm": 1.2025184631347656, "learning_rate": 9.404500000000001e-05, "loss": 0.4027, "step": 42648 }, { "epoch": 2.3882293649904804, "grad_norm": 1.1350380182266235, "learning_rate": 9.404473684210526e-05, "loss": 0.3496, "step": 42649 }, { "epoch": 2.3882853623026095, "grad_norm": 1.095055341720581, "learning_rate": 9.404447368421053e-05, "loss": 0.3094, "step": 42650 }, { "epoch": 2.3883413596147385, "grad_norm": 1.1523957252502441, "learning_rate": 9.404421052631579e-05, "loss": 0.3901, "step": 42651 }, { "epoch": 2.3883973569268675, "grad_norm": 1.1032826900482178, "learning_rate": 9.404394736842106e-05, "loss": 0.3929, "step": 42652 }, { "epoch": 2.3884533542389965, "grad_norm": 1.4672939777374268, "learning_rate": 9.404368421052631e-05, "loss": 0.4425, "step": 42653 }, { "epoch": 2.3885093515511255, "grad_norm": 1.3077774047851562, "learning_rate": 9.404342105263158e-05, "loss": 0.3643, "step": 42654 }, { "epoch": 2.3885653488632546, "grad_norm": 1.5778776407241821, "learning_rate": 9.404315789473684e-05, "loss": 0.4809, "step": 42655 }, { "epoch": 2.3886213461753836, "grad_norm": 1.1037013530731201, "learning_rate": 9.404289473684212e-05, "loss": 0.4742, "step": 42656 }, { "epoch": 2.3886773434875126, "grad_norm": 1.0530924797058105, "learning_rate": 9.404263157894738e-05, "loss": 0.3816, "step": 42657 }, { "epoch": 2.3887333407996416, "grad_norm": 1.268921136856079, "learning_rate": 9.404236842105264e-05, "loss": 0.5544, "step": 42658 }, { "epoch": 2.3887893381117706, "grad_norm": 1.18560791015625, "learning_rate": 9.40421052631579e-05, "loss": 0.4251, "step": 42659 }, { "epoch": 2.3888453354238997, "grad_norm": 1.2642385959625244, "learning_rate": 9.404184210526317e-05, "loss": 0.4166, "step": 42660 }, { "epoch": 2.3889013327360287, "grad_norm": 1.18563973903656, "learning_rate": 9.404157894736843e-05, "loss": 0.5054, "step": 42661 }, { "epoch": 2.3889573300481577, "grad_norm": 1.1250606775283813, "learning_rate": 9.404131578947369e-05, "loss": 0.3635, "step": 42662 }, { "epoch": 2.3890133273602867, "grad_norm": 1.5753247737884521, "learning_rate": 9.404105263157895e-05, "loss": 0.3921, "step": 42663 }, { "epoch": 2.3890693246724157, "grad_norm": 1.2122334241867065, "learning_rate": 9.404078947368421e-05, "loss": 0.4131, "step": 42664 }, { "epoch": 2.3891253219845447, "grad_norm": 1.4192795753479004, "learning_rate": 9.404052631578948e-05, "loss": 0.495, "step": 42665 }, { "epoch": 2.3891813192966738, "grad_norm": 1.0998648405075073, "learning_rate": 9.404026315789474e-05, "loss": 0.2548, "step": 42666 }, { "epoch": 2.389237316608803, "grad_norm": 1.1161388158798218, "learning_rate": 9.404e-05, "loss": 0.4076, "step": 42667 }, { "epoch": 2.389293313920932, "grad_norm": 1.1583874225616455, "learning_rate": 9.403973684210526e-05, "loss": 0.3951, "step": 42668 }, { "epoch": 2.389349311233061, "grad_norm": 1.1109627485275269, "learning_rate": 9.403947368421053e-05, "loss": 0.4145, "step": 42669 }, { "epoch": 2.38940530854519, "grad_norm": 1.3675705194473267, "learning_rate": 9.40392105263158e-05, "loss": 0.5858, "step": 42670 }, { "epoch": 2.389461305857319, "grad_norm": 1.4983429908752441, "learning_rate": 9.403894736842105e-05, "loss": 0.4275, "step": 42671 }, { "epoch": 2.389517303169448, "grad_norm": 1.1537508964538574, "learning_rate": 9.403868421052631e-05, "loss": 0.6054, "step": 42672 }, { "epoch": 2.389573300481577, "grad_norm": 1.521843671798706, "learning_rate": 9.403842105263159e-05, "loss": 0.4285, "step": 42673 }, { "epoch": 2.389629297793706, "grad_norm": 1.1374387741088867, "learning_rate": 9.403815789473685e-05, "loss": 0.4711, "step": 42674 }, { "epoch": 2.389685295105835, "grad_norm": 1.174505591392517, "learning_rate": 9.403789473684212e-05, "loss": 0.3622, "step": 42675 }, { "epoch": 2.389741292417964, "grad_norm": 1.2992359399795532, "learning_rate": 9.403763157894737e-05, "loss": 0.4013, "step": 42676 }, { "epoch": 2.389797289730093, "grad_norm": 1.2300549745559692, "learning_rate": 9.403736842105264e-05, "loss": 0.4557, "step": 42677 }, { "epoch": 2.389853287042222, "grad_norm": 1.2163190841674805, "learning_rate": 9.40371052631579e-05, "loss": 0.3282, "step": 42678 }, { "epoch": 2.389909284354351, "grad_norm": 1.3333773612976074, "learning_rate": 9.403684210526317e-05, "loss": 0.5762, "step": 42679 }, { "epoch": 2.38996528166648, "grad_norm": 1.0658111572265625, "learning_rate": 9.403657894736843e-05, "loss": 0.397, "step": 42680 }, { "epoch": 2.390021278978609, "grad_norm": 1.051979899406433, "learning_rate": 9.403631578947368e-05, "loss": 0.4385, "step": 42681 }, { "epoch": 2.390077276290738, "grad_norm": 1.2111361026763916, "learning_rate": 9.403605263157895e-05, "loss": 0.3194, "step": 42682 }, { "epoch": 2.390133273602867, "grad_norm": 1.2240217924118042, "learning_rate": 9.403578947368421e-05, "loss": 0.3632, "step": 42683 }, { "epoch": 2.390189270914996, "grad_norm": 1.3208426237106323, "learning_rate": 9.403552631578948e-05, "loss": 0.3418, "step": 42684 }, { "epoch": 2.390245268227125, "grad_norm": 1.2668181657791138, "learning_rate": 9.403526315789474e-05, "loss": 0.5016, "step": 42685 }, { "epoch": 2.390301265539254, "grad_norm": 1.1530085802078247, "learning_rate": 9.4035e-05, "loss": 0.388, "step": 42686 }, { "epoch": 2.390357262851383, "grad_norm": 1.2501213550567627, "learning_rate": 9.403473684210526e-05, "loss": 0.5076, "step": 42687 }, { "epoch": 2.390413260163512, "grad_norm": 1.1210938692092896, "learning_rate": 9.403447368421054e-05, "loss": 0.3942, "step": 42688 }, { "epoch": 2.390469257475641, "grad_norm": 1.1361290216445923, "learning_rate": 9.40342105263158e-05, "loss": 0.3223, "step": 42689 }, { "epoch": 2.3905252547877702, "grad_norm": 1.1473275423049927, "learning_rate": 9.403394736842106e-05, "loss": 0.4319, "step": 42690 }, { "epoch": 2.3905812520998992, "grad_norm": 1.6550055742263794, "learning_rate": 9.403368421052632e-05, "loss": 0.3256, "step": 42691 }, { "epoch": 2.3906372494120283, "grad_norm": 1.1310539245605469, "learning_rate": 9.403342105263159e-05, "loss": 0.292, "step": 42692 }, { "epoch": 2.3906932467241573, "grad_norm": 1.851605772972107, "learning_rate": 9.403315789473685e-05, "loss": 0.3793, "step": 42693 }, { "epoch": 2.3907492440362863, "grad_norm": 1.2337329387664795, "learning_rate": 9.403289473684211e-05, "loss": 0.4356, "step": 42694 }, { "epoch": 2.3908052413484153, "grad_norm": 1.118699073791504, "learning_rate": 9.403263157894737e-05, "loss": 0.3398, "step": 42695 }, { "epoch": 2.3908612386605443, "grad_norm": 0.984032392501831, "learning_rate": 9.403236842105264e-05, "loss": 0.3362, "step": 42696 }, { "epoch": 2.3909172359726734, "grad_norm": 1.1673319339752197, "learning_rate": 9.40321052631579e-05, "loss": 0.3786, "step": 42697 }, { "epoch": 2.3909732332848024, "grad_norm": 1.3165054321289062, "learning_rate": 9.403184210526316e-05, "loss": 0.492, "step": 42698 }, { "epoch": 2.3910292305969314, "grad_norm": 1.2805838584899902, "learning_rate": 9.403157894736842e-05, "loss": 0.5496, "step": 42699 }, { "epoch": 2.3910852279090604, "grad_norm": 1.298423409461975, "learning_rate": 9.403131578947368e-05, "loss": 0.3035, "step": 42700 }, { "epoch": 2.3911412252211894, "grad_norm": 1.1271740198135376, "learning_rate": 9.403105263157895e-05, "loss": 0.3902, "step": 42701 }, { "epoch": 2.3911972225333185, "grad_norm": 1.182063341140747, "learning_rate": 9.403078947368421e-05, "loss": 0.4121, "step": 42702 }, { "epoch": 2.3912532198454475, "grad_norm": 1.1168649196624756, "learning_rate": 9.403052631578949e-05, "loss": 0.3589, "step": 42703 }, { "epoch": 2.3913092171575765, "grad_norm": 1.0168119668960571, "learning_rate": 9.403026315789473e-05, "loss": 0.3186, "step": 42704 }, { "epoch": 2.3913652144697055, "grad_norm": 1.2867718935012817, "learning_rate": 9.403e-05, "loss": 0.494, "step": 42705 }, { "epoch": 2.3914212117818345, "grad_norm": 1.3506160974502563, "learning_rate": 9.402973684210527e-05, "loss": 0.3393, "step": 42706 }, { "epoch": 2.3914772090939636, "grad_norm": 1.448885202407837, "learning_rate": 9.402947368421054e-05, "loss": 0.3793, "step": 42707 }, { "epoch": 2.3915332064060926, "grad_norm": 1.1891429424285889, "learning_rate": 9.402921052631579e-05, "loss": 0.3184, "step": 42708 }, { "epoch": 2.3915892037182216, "grad_norm": 1.5013341903686523, "learning_rate": 9.402894736842106e-05, "loss": 0.5169, "step": 42709 }, { "epoch": 2.3916452010303506, "grad_norm": 1.4108986854553223, "learning_rate": 9.402868421052632e-05, "loss": 0.4365, "step": 42710 }, { "epoch": 2.3917011983424796, "grad_norm": 1.2258822917938232, "learning_rate": 9.402842105263159e-05, "loss": 0.4077, "step": 42711 }, { "epoch": 2.3917571956546086, "grad_norm": 1.0955442190170288, "learning_rate": 9.402815789473685e-05, "loss": 0.4137, "step": 42712 }, { "epoch": 2.3918131929667377, "grad_norm": 0.9462907314300537, "learning_rate": 9.402789473684211e-05, "loss": 0.2962, "step": 42713 }, { "epoch": 2.3918691902788667, "grad_norm": 1.1818344593048096, "learning_rate": 9.402763157894737e-05, "loss": 0.4124, "step": 42714 }, { "epoch": 2.3919251875909957, "grad_norm": 1.0877208709716797, "learning_rate": 9.402736842105263e-05, "loss": 0.4221, "step": 42715 }, { "epoch": 2.3919811849031247, "grad_norm": 1.0634098052978516, "learning_rate": 9.40271052631579e-05, "loss": 0.3711, "step": 42716 }, { "epoch": 2.3920371822152537, "grad_norm": 1.2608414888381958, "learning_rate": 9.402684210526316e-05, "loss": 0.4463, "step": 42717 }, { "epoch": 2.3920931795273828, "grad_norm": 1.2866356372833252, "learning_rate": 9.402657894736842e-05, "loss": 0.3731, "step": 42718 }, { "epoch": 2.392149176839512, "grad_norm": 1.1856920719146729, "learning_rate": 9.402631578947368e-05, "loss": 0.322, "step": 42719 }, { "epoch": 2.392205174151641, "grad_norm": 1.1983140707015991, "learning_rate": 9.402605263157896e-05, "loss": 0.3605, "step": 42720 }, { "epoch": 2.39226117146377, "grad_norm": 1.392439603805542, "learning_rate": 9.402578947368422e-05, "loss": 0.5023, "step": 42721 }, { "epoch": 2.392317168775899, "grad_norm": 1.2660714387893677, "learning_rate": 9.402552631578948e-05, "loss": 0.403, "step": 42722 }, { "epoch": 2.392373166088028, "grad_norm": 1.411842703819275, "learning_rate": 9.402526315789474e-05, "loss": 0.4247, "step": 42723 }, { "epoch": 2.392429163400157, "grad_norm": 1.3333075046539307, "learning_rate": 9.402500000000001e-05, "loss": 0.4288, "step": 42724 }, { "epoch": 2.392485160712286, "grad_norm": 1.2108970880508423, "learning_rate": 9.402473684210527e-05, "loss": 0.4055, "step": 42725 }, { "epoch": 2.392541158024415, "grad_norm": 1.088212013244629, "learning_rate": 9.402447368421053e-05, "loss": 0.4001, "step": 42726 }, { "epoch": 2.392597155336544, "grad_norm": 1.3110018968582153, "learning_rate": 9.402421052631579e-05, "loss": 0.377, "step": 42727 }, { "epoch": 2.392653152648673, "grad_norm": 1.108091950416565, "learning_rate": 9.402394736842106e-05, "loss": 0.4061, "step": 42728 }, { "epoch": 2.392709149960802, "grad_norm": 1.158329725265503, "learning_rate": 9.402368421052632e-05, "loss": 0.4217, "step": 42729 }, { "epoch": 2.392765147272931, "grad_norm": 1.5274122953414917, "learning_rate": 9.40234210526316e-05, "loss": 0.4145, "step": 42730 }, { "epoch": 2.39282114458506, "grad_norm": 1.365785837173462, "learning_rate": 9.402315789473684e-05, "loss": 0.4783, "step": 42731 }, { "epoch": 2.392877141897189, "grad_norm": 1.085085153579712, "learning_rate": 9.40228947368421e-05, "loss": 0.3383, "step": 42732 }, { "epoch": 2.392933139209318, "grad_norm": 1.1834867000579834, "learning_rate": 9.402263157894737e-05, "loss": 0.4196, "step": 42733 }, { "epoch": 2.392989136521447, "grad_norm": 1.3948897123336792, "learning_rate": 9.402236842105263e-05, "loss": 0.4615, "step": 42734 }, { "epoch": 2.393045133833576, "grad_norm": 1.0753192901611328, "learning_rate": 9.40221052631579e-05, "loss": 0.3919, "step": 42735 }, { "epoch": 2.393101131145705, "grad_norm": 1.6030347347259521, "learning_rate": 9.402184210526315e-05, "loss": 0.3469, "step": 42736 }, { "epoch": 2.393157128457834, "grad_norm": 1.2505477666854858, "learning_rate": 9.402157894736843e-05, "loss": 0.3151, "step": 42737 }, { "epoch": 2.393213125769963, "grad_norm": 1.2580486536026, "learning_rate": 9.402131578947369e-05, "loss": 0.2761, "step": 42738 }, { "epoch": 2.393269123082092, "grad_norm": 1.143646240234375, "learning_rate": 9.402105263157896e-05, "loss": 0.3778, "step": 42739 }, { "epoch": 2.393325120394221, "grad_norm": 1.2043582201004028, "learning_rate": 9.402078947368422e-05, "loss": 0.4391, "step": 42740 }, { "epoch": 2.39338111770635, "grad_norm": 0.9684794545173645, "learning_rate": 9.402052631578948e-05, "loss": 0.3339, "step": 42741 }, { "epoch": 2.3934371150184792, "grad_norm": 1.1173056364059448, "learning_rate": 9.402026315789474e-05, "loss": 0.2831, "step": 42742 }, { "epoch": 2.3934931123306082, "grad_norm": 0.9866859912872314, "learning_rate": 9.402000000000001e-05, "loss": 0.286, "step": 42743 }, { "epoch": 2.3935491096427373, "grad_norm": 3.10390305519104, "learning_rate": 9.401973684210527e-05, "loss": 0.5762, "step": 42744 }, { "epoch": 2.3936051069548663, "grad_norm": 1.401552438735962, "learning_rate": 9.401947368421053e-05, "loss": 0.3413, "step": 42745 }, { "epoch": 2.3936611042669953, "grad_norm": 1.2560654878616333, "learning_rate": 9.401921052631579e-05, "loss": 0.3923, "step": 42746 }, { "epoch": 2.3937171015791243, "grad_norm": 1.161210298538208, "learning_rate": 9.401894736842106e-05, "loss": 0.3351, "step": 42747 }, { "epoch": 2.3937730988912533, "grad_norm": 1.2119914293289185, "learning_rate": 9.401868421052632e-05, "loss": 0.4222, "step": 42748 }, { "epoch": 2.3938290962033824, "grad_norm": 1.0822558403015137, "learning_rate": 9.401842105263158e-05, "loss": 0.4639, "step": 42749 }, { "epoch": 2.3938850935155114, "grad_norm": 1.3759263753890991, "learning_rate": 9.401815789473684e-05, "loss": 0.4945, "step": 42750 }, { "epoch": 2.3939410908276404, "grad_norm": 1.3513514995574951, "learning_rate": 9.40178947368421e-05, "loss": 0.3742, "step": 42751 }, { "epoch": 2.3939970881397694, "grad_norm": 1.5572748184204102, "learning_rate": 9.401763157894738e-05, "loss": 0.3887, "step": 42752 }, { "epoch": 2.3940530854518984, "grad_norm": 1.1274296045303345, "learning_rate": 9.401736842105264e-05, "loss": 0.4432, "step": 42753 }, { "epoch": 2.3941090827640275, "grad_norm": 1.2218854427337646, "learning_rate": 9.40171052631579e-05, "loss": 0.4089, "step": 42754 }, { "epoch": 2.3941650800761565, "grad_norm": 1.073961615562439, "learning_rate": 9.401684210526316e-05, "loss": 0.5157, "step": 42755 }, { "epoch": 2.3942210773882855, "grad_norm": 1.107279896736145, "learning_rate": 9.401657894736843e-05, "loss": 0.495, "step": 42756 }, { "epoch": 2.3942770747004145, "grad_norm": 1.038818597793579, "learning_rate": 9.401631578947369e-05, "loss": 0.395, "step": 42757 }, { "epoch": 2.3943330720125435, "grad_norm": 1.2185393571853638, "learning_rate": 9.401605263157896e-05, "loss": 0.3753, "step": 42758 }, { "epoch": 2.3943890693246725, "grad_norm": 1.1542401313781738, "learning_rate": 9.401578947368421e-05, "loss": 0.4189, "step": 42759 }, { "epoch": 2.3944450666368016, "grad_norm": 1.1466140747070312, "learning_rate": 9.401552631578948e-05, "loss": 0.3728, "step": 42760 }, { "epoch": 2.3945010639489306, "grad_norm": 1.2688628435134888, "learning_rate": 9.401526315789474e-05, "loss": 0.3835, "step": 42761 }, { "epoch": 2.3945570612610596, "grad_norm": 1.1185503005981445, "learning_rate": 9.401500000000001e-05, "loss": 0.3116, "step": 42762 }, { "epoch": 2.3946130585731886, "grad_norm": 1.1198681592941284, "learning_rate": 9.401473684210526e-05, "loss": 0.3635, "step": 42763 }, { "epoch": 2.3946690558853176, "grad_norm": 1.3092260360717773, "learning_rate": 9.401447368421053e-05, "loss": 0.567, "step": 42764 }, { "epoch": 2.3947250531974467, "grad_norm": 1.058984637260437, "learning_rate": 9.401421052631579e-05, "loss": 0.342, "step": 42765 }, { "epoch": 2.3947810505095757, "grad_norm": 1.2620432376861572, "learning_rate": 9.401394736842107e-05, "loss": 0.3833, "step": 42766 }, { "epoch": 2.3948370478217047, "grad_norm": 1.0415794849395752, "learning_rate": 9.401368421052633e-05, "loss": 0.3985, "step": 42767 }, { "epoch": 2.3948930451338337, "grad_norm": 1.145587682723999, "learning_rate": 9.401342105263157e-05, "loss": 0.5011, "step": 42768 }, { "epoch": 2.3949490424459627, "grad_norm": 1.3376713991165161, "learning_rate": 9.401315789473685e-05, "loss": 0.3777, "step": 42769 }, { "epoch": 2.3950050397580918, "grad_norm": 1.276750922203064, "learning_rate": 9.40128947368421e-05, "loss": 0.3519, "step": 42770 }, { "epoch": 2.3950610370702208, "grad_norm": 1.0923223495483398, "learning_rate": 9.401263157894738e-05, "loss": 0.3671, "step": 42771 }, { "epoch": 2.39511703438235, "grad_norm": 1.375555157661438, "learning_rate": 9.401236842105264e-05, "loss": 0.4479, "step": 42772 }, { "epoch": 2.395173031694479, "grad_norm": 1.1508938074111938, "learning_rate": 9.40121052631579e-05, "loss": 0.375, "step": 42773 }, { "epoch": 2.395229029006608, "grad_norm": 1.0027307271957397, "learning_rate": 9.401184210526316e-05, "loss": 0.3353, "step": 42774 }, { "epoch": 2.395285026318737, "grad_norm": 1.1454983949661255, "learning_rate": 9.401157894736843e-05, "loss": 0.3756, "step": 42775 }, { "epoch": 2.395341023630866, "grad_norm": 1.2472492456436157, "learning_rate": 9.401131578947369e-05, "loss": 0.4752, "step": 42776 }, { "epoch": 2.395397020942995, "grad_norm": 1.1967554092407227, "learning_rate": 9.401105263157895e-05, "loss": 0.4373, "step": 42777 }, { "epoch": 2.395453018255124, "grad_norm": 1.2511521577835083, "learning_rate": 9.401078947368421e-05, "loss": 0.3579, "step": 42778 }, { "epoch": 2.395509015567253, "grad_norm": 1.672969102859497, "learning_rate": 9.401052631578948e-05, "loss": 0.441, "step": 42779 }, { "epoch": 2.395565012879382, "grad_norm": 1.1752283573150635, "learning_rate": 9.401026315789474e-05, "loss": 0.3223, "step": 42780 }, { "epoch": 2.395621010191511, "grad_norm": 1.1414518356323242, "learning_rate": 9.401e-05, "loss": 0.4768, "step": 42781 }, { "epoch": 2.39567700750364, "grad_norm": 1.2581430673599243, "learning_rate": 9.400973684210526e-05, "loss": 0.3748, "step": 42782 }, { "epoch": 2.395733004815769, "grad_norm": 1.402573585510254, "learning_rate": 9.400947368421054e-05, "loss": 0.5812, "step": 42783 }, { "epoch": 2.395789002127898, "grad_norm": 1.3187615871429443, "learning_rate": 9.40092105263158e-05, "loss": 0.4218, "step": 42784 }, { "epoch": 2.395844999440027, "grad_norm": 1.286502480506897, "learning_rate": 9.400894736842106e-05, "loss": 0.3305, "step": 42785 }, { "epoch": 2.395900996752156, "grad_norm": 1.0662767887115479, "learning_rate": 9.400868421052632e-05, "loss": 0.3683, "step": 42786 }, { "epoch": 2.395956994064285, "grad_norm": 1.2989014387130737, "learning_rate": 9.400842105263157e-05, "loss": 0.4997, "step": 42787 }, { "epoch": 2.396012991376414, "grad_norm": 1.1781584024429321, "learning_rate": 9.400815789473685e-05, "loss": 0.4547, "step": 42788 }, { "epoch": 2.396068988688543, "grad_norm": 1.5581963062286377, "learning_rate": 9.400789473684211e-05, "loss": 0.4403, "step": 42789 }, { "epoch": 2.396124986000672, "grad_norm": 1.0447933673858643, "learning_rate": 9.400763157894738e-05, "loss": 0.3238, "step": 42790 }, { "epoch": 2.396180983312801, "grad_norm": 1.1410270929336548, "learning_rate": 9.400736842105263e-05, "loss": 0.2957, "step": 42791 }, { "epoch": 2.39623698062493, "grad_norm": 1.290082573890686, "learning_rate": 9.40071052631579e-05, "loss": 0.4667, "step": 42792 }, { "epoch": 2.396292977937059, "grad_norm": 1.031409740447998, "learning_rate": 9.400684210526316e-05, "loss": 0.408, "step": 42793 }, { "epoch": 2.396348975249188, "grad_norm": 1.5443731546401978, "learning_rate": 9.400657894736843e-05, "loss": 0.761, "step": 42794 }, { "epoch": 2.3964049725613172, "grad_norm": 1.1245025396347046, "learning_rate": 9.40063157894737e-05, "loss": 0.2859, "step": 42795 }, { "epoch": 2.3964609698734463, "grad_norm": 1.1500089168548584, "learning_rate": 9.400605263157895e-05, "loss": 0.422, "step": 42796 }, { "epoch": 2.3965169671855753, "grad_norm": 1.508986234664917, "learning_rate": 9.400578947368421e-05, "loss": 0.589, "step": 42797 }, { "epoch": 2.3965729644977043, "grad_norm": 1.029876708984375, "learning_rate": 9.400552631578949e-05, "loss": 0.3333, "step": 42798 }, { "epoch": 2.3966289618098333, "grad_norm": 1.2869879007339478, "learning_rate": 9.400526315789475e-05, "loss": 0.5261, "step": 42799 }, { "epoch": 2.3966849591219623, "grad_norm": 1.2624270915985107, "learning_rate": 9.4005e-05, "loss": 0.3782, "step": 42800 }, { "epoch": 2.3967409564340914, "grad_norm": 1.1485683917999268, "learning_rate": 9.400473684210527e-05, "loss": 0.4573, "step": 42801 }, { "epoch": 2.3967969537462204, "grad_norm": 1.140895962715149, "learning_rate": 9.400447368421052e-05, "loss": 0.4054, "step": 42802 }, { "epoch": 2.3968529510583494, "grad_norm": 1.046038031578064, "learning_rate": 9.40042105263158e-05, "loss": 0.3919, "step": 42803 }, { "epoch": 2.3969089483704784, "grad_norm": 1.0892093181610107, "learning_rate": 9.400394736842106e-05, "loss": 0.3999, "step": 42804 }, { "epoch": 2.3969649456826074, "grad_norm": 0.9485438466072083, "learning_rate": 9.400368421052632e-05, "loss": 0.3217, "step": 42805 }, { "epoch": 2.397020942994736, "grad_norm": 1.319278597831726, "learning_rate": 9.400342105263158e-05, "loss": 0.4454, "step": 42806 }, { "epoch": 2.3970769403068655, "grad_norm": 1.1633769273757935, "learning_rate": 9.400315789473685e-05, "loss": 0.3626, "step": 42807 }, { "epoch": 2.397132937618994, "grad_norm": 1.0471646785736084, "learning_rate": 9.400289473684211e-05, "loss": 0.4241, "step": 42808 }, { "epoch": 2.3971889349311235, "grad_norm": 1.117857813835144, "learning_rate": 9.400263157894737e-05, "loss": 0.4194, "step": 42809 }, { "epoch": 2.397244932243252, "grad_norm": 1.3718310594558716, "learning_rate": 9.400236842105263e-05, "loss": 0.4809, "step": 42810 }, { "epoch": 2.3973009295553815, "grad_norm": 1.4049574136734009, "learning_rate": 9.40021052631579e-05, "loss": 0.4892, "step": 42811 }, { "epoch": 2.39735692686751, "grad_norm": 1.1366477012634277, "learning_rate": 9.400184210526316e-05, "loss": 0.3659, "step": 42812 }, { "epoch": 2.3974129241796396, "grad_norm": 1.2459602355957031, "learning_rate": 9.400157894736844e-05, "loss": 0.3963, "step": 42813 }, { "epoch": 2.397468921491768, "grad_norm": 1.1670079231262207, "learning_rate": 9.400131578947368e-05, "loss": 0.3154, "step": 42814 }, { "epoch": 2.3975249188038976, "grad_norm": 1.280793309211731, "learning_rate": 9.400105263157896e-05, "loss": 0.3072, "step": 42815 }, { "epoch": 2.397580916116026, "grad_norm": 1.2907675504684448, "learning_rate": 9.400078947368422e-05, "loss": 0.4184, "step": 42816 }, { "epoch": 2.3976369134281557, "grad_norm": 1.3640002012252808, "learning_rate": 9.400052631578949e-05, "loss": 0.4823, "step": 42817 }, { "epoch": 2.3976929107402842, "grad_norm": 1.5399712324142456, "learning_rate": 9.400026315789473e-05, "loss": 0.5543, "step": 42818 }, { "epoch": 2.3977489080524137, "grad_norm": 1.2145538330078125, "learning_rate": 9.4e-05, "loss": 0.3665, "step": 42819 }, { "epoch": 2.3978049053645423, "grad_norm": 1.2149910926818848, "learning_rate": 9.399973684210527e-05, "loss": 0.3596, "step": 42820 }, { "epoch": 2.3978609026766717, "grad_norm": 1.4620509147644043, "learning_rate": 9.399947368421053e-05, "loss": 0.4531, "step": 42821 }, { "epoch": 2.3979168999888003, "grad_norm": 1.2913742065429688, "learning_rate": 9.39992105263158e-05, "loss": 0.3643, "step": 42822 }, { "epoch": 2.3979728973009298, "grad_norm": 1.3904821872711182, "learning_rate": 9.399894736842105e-05, "loss": 0.3905, "step": 42823 }, { "epoch": 2.3980288946130583, "grad_norm": 1.1223782300949097, "learning_rate": 9.399868421052632e-05, "loss": 0.3046, "step": 42824 }, { "epoch": 2.398084891925188, "grad_norm": 1.0365148782730103, "learning_rate": 9.399842105263158e-05, "loss": 0.2993, "step": 42825 }, { "epoch": 2.3981408892373164, "grad_norm": 1.1204408407211304, "learning_rate": 9.399815789473685e-05, "loss": 0.3714, "step": 42826 }, { "epoch": 2.398196886549446, "grad_norm": 1.2624417543411255, "learning_rate": 9.399789473684211e-05, "loss": 0.3913, "step": 42827 }, { "epoch": 2.3982528838615744, "grad_norm": 1.0731436014175415, "learning_rate": 9.399763157894737e-05, "loss": 0.4125, "step": 42828 }, { "epoch": 2.398308881173704, "grad_norm": 1.4700095653533936, "learning_rate": 9.399736842105263e-05, "loss": 0.4481, "step": 42829 }, { "epoch": 2.3983648784858325, "grad_norm": 1.7787823677062988, "learning_rate": 9.39971052631579e-05, "loss": 0.321, "step": 42830 }, { "epoch": 2.398420875797962, "grad_norm": 1.1218106746673584, "learning_rate": 9.399684210526317e-05, "loss": 0.3995, "step": 42831 }, { "epoch": 2.3984768731100905, "grad_norm": 1.0557903051376343, "learning_rate": 9.399657894736843e-05, "loss": 0.3747, "step": 42832 }, { "epoch": 2.39853287042222, "grad_norm": 1.2945040464401245, "learning_rate": 9.399631578947368e-05, "loss": 0.547, "step": 42833 }, { "epoch": 2.3985888677343485, "grad_norm": 1.332270622253418, "learning_rate": 9.399605263157896e-05, "loss": 0.5035, "step": 42834 }, { "epoch": 2.398644865046478, "grad_norm": 0.9269304871559143, "learning_rate": 9.399578947368422e-05, "loss": 0.3199, "step": 42835 }, { "epoch": 2.3987008623586066, "grad_norm": 1.1885533332824707, "learning_rate": 9.399552631578948e-05, "loss": 0.4381, "step": 42836 }, { "epoch": 2.398756859670736, "grad_norm": 1.3204103708267212, "learning_rate": 9.399526315789474e-05, "loss": 0.4148, "step": 42837 }, { "epoch": 2.3988128569828646, "grad_norm": 1.2235286235809326, "learning_rate": 9.3995e-05, "loss": 0.4217, "step": 42838 }, { "epoch": 2.398868854294994, "grad_norm": 1.1143721342086792, "learning_rate": 9.399473684210527e-05, "loss": 0.3371, "step": 42839 }, { "epoch": 2.3989248516071227, "grad_norm": 1.0690478086471558, "learning_rate": 9.399447368421053e-05, "loss": 0.3062, "step": 42840 }, { "epoch": 2.398980848919252, "grad_norm": 1.2560781240463257, "learning_rate": 9.399421052631579e-05, "loss": 0.3706, "step": 42841 }, { "epoch": 2.3990368462313807, "grad_norm": 1.0828628540039062, "learning_rate": 9.399394736842105e-05, "loss": 0.4368, "step": 42842 }, { "epoch": 2.39909284354351, "grad_norm": 1.2249202728271484, "learning_rate": 9.399368421052632e-05, "loss": 0.4066, "step": 42843 }, { "epoch": 2.3991488408556387, "grad_norm": 1.4011949300765991, "learning_rate": 9.399342105263158e-05, "loss": 0.4649, "step": 42844 }, { "epoch": 2.399204838167768, "grad_norm": 1.3579423427581787, "learning_rate": 9.399315789473686e-05, "loss": 0.4712, "step": 42845 }, { "epoch": 2.3992608354798968, "grad_norm": 1.1339137554168701, "learning_rate": 9.39928947368421e-05, "loss": 0.4603, "step": 42846 }, { "epoch": 2.399316832792026, "grad_norm": 1.0509252548217773, "learning_rate": 9.399263157894738e-05, "loss": 0.4475, "step": 42847 }, { "epoch": 2.399372830104155, "grad_norm": 1.2603141069412231, "learning_rate": 9.399236842105263e-05, "loss": 0.4419, "step": 42848 }, { "epoch": 2.399428827416284, "grad_norm": 0.9640393257141113, "learning_rate": 9.399210526315791e-05, "loss": 0.426, "step": 42849 }, { "epoch": 2.399484824728413, "grad_norm": 1.1849970817565918, "learning_rate": 9.399184210526317e-05, "loss": 0.3901, "step": 42850 }, { "epoch": 2.399540822040542, "grad_norm": 1.0341917276382446, "learning_rate": 9.399157894736843e-05, "loss": 0.3527, "step": 42851 }, { "epoch": 2.399596819352671, "grad_norm": 1.13348388671875, "learning_rate": 9.399131578947369e-05, "loss": 0.3958, "step": 42852 }, { "epoch": 2.3996528166648, "grad_norm": 1.149422526359558, "learning_rate": 9.399105263157895e-05, "loss": 0.308, "step": 42853 }, { "epoch": 2.399708813976929, "grad_norm": 1.173488736152649, "learning_rate": 9.399078947368422e-05, "loss": 0.3812, "step": 42854 }, { "epoch": 2.399764811289058, "grad_norm": 1.33085298538208, "learning_rate": 9.399052631578947e-05, "loss": 0.4558, "step": 42855 }, { "epoch": 2.399820808601187, "grad_norm": 1.2319053411483765, "learning_rate": 9.399026315789474e-05, "loss": 0.4156, "step": 42856 }, { "epoch": 2.399876805913316, "grad_norm": 1.6285816431045532, "learning_rate": 9.399e-05, "loss": 0.4663, "step": 42857 }, { "epoch": 2.399932803225445, "grad_norm": 1.435790777206421, "learning_rate": 9.398973684210527e-05, "loss": 0.4234, "step": 42858 }, { "epoch": 2.399988800537574, "grad_norm": 1.1825608015060425, "learning_rate": 9.398947368421053e-05, "loss": 0.3957, "step": 42859 }, { "epoch": 2.400044797849703, "grad_norm": 1.055742859840393, "learning_rate": 9.398921052631579e-05, "loss": 0.3827, "step": 42860 }, { "epoch": 2.400100795161832, "grad_norm": 1.2493926286697388, "learning_rate": 9.398894736842105e-05, "loss": 0.5136, "step": 42861 }, { "epoch": 2.400156792473961, "grad_norm": 0.9950408935546875, "learning_rate": 9.398868421052633e-05, "loss": 0.3472, "step": 42862 }, { "epoch": 2.40021278978609, "grad_norm": 1.24483323097229, "learning_rate": 9.398842105263159e-05, "loss": 0.4914, "step": 42863 }, { "epoch": 2.400268787098219, "grad_norm": 1.0699388980865479, "learning_rate": 9.398815789473684e-05, "loss": 0.4175, "step": 42864 }, { "epoch": 2.400324784410348, "grad_norm": 1.0724236965179443, "learning_rate": 9.39878947368421e-05, "loss": 0.4878, "step": 42865 }, { "epoch": 2.400380781722477, "grad_norm": 1.6201810836791992, "learning_rate": 9.398763157894738e-05, "loss": 0.2849, "step": 42866 }, { "epoch": 2.400436779034606, "grad_norm": 0.9813246726989746, "learning_rate": 9.398736842105264e-05, "loss": 0.3453, "step": 42867 }, { "epoch": 2.400492776346735, "grad_norm": 1.2604049444198608, "learning_rate": 9.398710526315791e-05, "loss": 0.4922, "step": 42868 }, { "epoch": 2.400548773658864, "grad_norm": 1.163133144378662, "learning_rate": 9.398684210526316e-05, "loss": 0.4883, "step": 42869 }, { "epoch": 2.4006047709709932, "grad_norm": 1.082651972770691, "learning_rate": 9.398657894736843e-05, "loss": 0.3854, "step": 42870 }, { "epoch": 2.4006607682831222, "grad_norm": 1.1151775121688843, "learning_rate": 9.398631578947369e-05, "loss": 0.3727, "step": 42871 }, { "epoch": 2.4007167655952513, "grad_norm": 1.364314079284668, "learning_rate": 9.398605263157895e-05, "loss": 0.3411, "step": 42872 }, { "epoch": 2.4007727629073803, "grad_norm": 1.1152544021606445, "learning_rate": 9.398578947368421e-05, "loss": 0.4127, "step": 42873 }, { "epoch": 2.4008287602195093, "grad_norm": 1.131212830543518, "learning_rate": 9.398552631578947e-05, "loss": 0.33, "step": 42874 }, { "epoch": 2.4008847575316383, "grad_norm": 1.6210938692092896, "learning_rate": 9.398526315789474e-05, "loss": 0.4159, "step": 42875 }, { "epoch": 2.4009407548437673, "grad_norm": 1.2321794033050537, "learning_rate": 9.3985e-05, "loss": 0.4258, "step": 42876 }, { "epoch": 2.4009967521558964, "grad_norm": 1.2900965213775635, "learning_rate": 9.398473684210528e-05, "loss": 0.4819, "step": 42877 }, { "epoch": 2.4010527494680254, "grad_norm": 1.0988104343414307, "learning_rate": 9.398447368421052e-05, "loss": 0.5025, "step": 42878 }, { "epoch": 2.4011087467801544, "grad_norm": 1.0248181819915771, "learning_rate": 9.39842105263158e-05, "loss": 0.3141, "step": 42879 }, { "epoch": 2.4011647440922834, "grad_norm": 1.2771834135055542, "learning_rate": 9.398394736842105e-05, "loss": 0.4338, "step": 42880 }, { "epoch": 2.4012207414044124, "grad_norm": 1.1962817907333374, "learning_rate": 9.398368421052633e-05, "loss": 0.3978, "step": 42881 }, { "epoch": 2.4012767387165415, "grad_norm": 1.3730812072753906, "learning_rate": 9.398342105263159e-05, "loss": 0.3833, "step": 42882 }, { "epoch": 2.4013327360286705, "grad_norm": 1.0591527223587036, "learning_rate": 9.398315789473685e-05, "loss": 0.3921, "step": 42883 }, { "epoch": 2.4013887333407995, "grad_norm": 1.1018654108047485, "learning_rate": 9.398289473684211e-05, "loss": 0.3963, "step": 42884 }, { "epoch": 2.4014447306529285, "grad_norm": 1.2255334854125977, "learning_rate": 9.398263157894738e-05, "loss": 0.4485, "step": 42885 }, { "epoch": 2.4015007279650575, "grad_norm": 1.2397135496139526, "learning_rate": 9.398236842105264e-05, "loss": 0.4718, "step": 42886 }, { "epoch": 2.4015567252771866, "grad_norm": 1.2437766790390015, "learning_rate": 9.39821052631579e-05, "loss": 0.4592, "step": 42887 }, { "epoch": 2.4016127225893156, "grad_norm": 1.1579355001449585, "learning_rate": 9.398184210526316e-05, "loss": 0.3638, "step": 42888 }, { "epoch": 2.4016687199014446, "grad_norm": 1.391806960105896, "learning_rate": 9.398157894736842e-05, "loss": 0.3318, "step": 42889 }, { "epoch": 2.4017247172135736, "grad_norm": 1.3346620798110962, "learning_rate": 9.398131578947369e-05, "loss": 0.3775, "step": 42890 }, { "epoch": 2.4017807145257026, "grad_norm": 1.1103512048721313, "learning_rate": 9.398105263157895e-05, "loss": 0.4334, "step": 42891 }, { "epoch": 2.4018367118378317, "grad_norm": 1.2742112874984741, "learning_rate": 9.398078947368421e-05, "loss": 0.4332, "step": 42892 }, { "epoch": 2.4018927091499607, "grad_norm": 1.362546443939209, "learning_rate": 9.398052631578947e-05, "loss": 0.4832, "step": 42893 }, { "epoch": 2.4019487064620897, "grad_norm": 1.0513864755630493, "learning_rate": 9.398026315789475e-05, "loss": 0.3855, "step": 42894 }, { "epoch": 2.4020047037742187, "grad_norm": 1.298464059829712, "learning_rate": 9.398e-05, "loss": 0.43, "step": 42895 }, { "epoch": 2.4020607010863477, "grad_norm": 1.3281333446502686, "learning_rate": 9.397973684210526e-05, "loss": 0.4325, "step": 42896 }, { "epoch": 2.4021166983984767, "grad_norm": 1.0945121049880981, "learning_rate": 9.397947368421052e-05, "loss": 0.3481, "step": 42897 }, { "epoch": 2.4021726957106058, "grad_norm": 1.1759122610092163, "learning_rate": 9.39792105263158e-05, "loss": 0.402, "step": 42898 }, { "epoch": 2.402228693022735, "grad_norm": 1.086368203163147, "learning_rate": 9.397894736842106e-05, "loss": 0.3861, "step": 42899 }, { "epoch": 2.402284690334864, "grad_norm": 1.3255102634429932, "learning_rate": 9.397868421052633e-05, "loss": 0.386, "step": 42900 }, { "epoch": 2.402340687646993, "grad_norm": 1.1373614072799683, "learning_rate": 9.397842105263158e-05, "loss": 0.3563, "step": 42901 }, { "epoch": 2.402396684959122, "grad_norm": 1.3802698850631714, "learning_rate": 9.397815789473685e-05, "loss": 0.3763, "step": 42902 }, { "epoch": 2.402452682271251, "grad_norm": 1.1138663291931152, "learning_rate": 9.397789473684211e-05, "loss": 0.3746, "step": 42903 }, { "epoch": 2.40250867958338, "grad_norm": 1.2412904500961304, "learning_rate": 9.397763157894738e-05, "loss": 0.414, "step": 42904 }, { "epoch": 2.402564676895509, "grad_norm": 1.4699715375900269, "learning_rate": 9.397736842105264e-05, "loss": 0.3551, "step": 42905 }, { "epoch": 2.402620674207638, "grad_norm": 1.1739765405654907, "learning_rate": 9.397710526315789e-05, "loss": 0.5009, "step": 42906 }, { "epoch": 2.402676671519767, "grad_norm": 1.2483924627304077, "learning_rate": 9.397684210526316e-05, "loss": 0.4668, "step": 42907 }, { "epoch": 2.402732668831896, "grad_norm": 1.153741478919983, "learning_rate": 9.397657894736842e-05, "loss": 0.5129, "step": 42908 }, { "epoch": 2.402788666144025, "grad_norm": 1.2788336277008057, "learning_rate": 9.39763157894737e-05, "loss": 0.3661, "step": 42909 }, { "epoch": 2.402844663456154, "grad_norm": 1.0794240236282349, "learning_rate": 9.397605263157894e-05, "loss": 0.3934, "step": 42910 }, { "epoch": 2.402900660768283, "grad_norm": 1.0847769975662231, "learning_rate": 9.397578947368421e-05, "loss": 0.4521, "step": 42911 }, { "epoch": 2.402956658080412, "grad_norm": 1.2194803953170776, "learning_rate": 9.397552631578947e-05, "loss": 0.4577, "step": 42912 }, { "epoch": 2.403012655392541, "grad_norm": 1.2085070610046387, "learning_rate": 9.397526315789475e-05, "loss": 0.3837, "step": 42913 }, { "epoch": 2.40306865270467, "grad_norm": 1.1162357330322266, "learning_rate": 9.397500000000001e-05, "loss": 0.3928, "step": 42914 }, { "epoch": 2.403124650016799, "grad_norm": 1.1461759805679321, "learning_rate": 9.397473684210527e-05, "loss": 0.3563, "step": 42915 }, { "epoch": 2.403180647328928, "grad_norm": 1.1112070083618164, "learning_rate": 9.397447368421053e-05, "loss": 0.4599, "step": 42916 }, { "epoch": 2.403236644641057, "grad_norm": 1.3734626770019531, "learning_rate": 9.39742105263158e-05, "loss": 0.5404, "step": 42917 }, { "epoch": 2.403292641953186, "grad_norm": 1.5203425884246826, "learning_rate": 9.397394736842106e-05, "loss": 0.406, "step": 42918 }, { "epoch": 2.403348639265315, "grad_norm": 1.3640928268432617, "learning_rate": 9.397368421052632e-05, "loss": 0.6159, "step": 42919 }, { "epoch": 2.403404636577444, "grad_norm": 1.1265184879302979, "learning_rate": 9.397342105263158e-05, "loss": 0.415, "step": 42920 }, { "epoch": 2.403460633889573, "grad_norm": 1.3865538835525513, "learning_rate": 9.397315789473685e-05, "loss": 0.4299, "step": 42921 }, { "epoch": 2.4035166312017022, "grad_norm": 1.5282878875732422, "learning_rate": 9.397289473684211e-05, "loss": 0.3583, "step": 42922 }, { "epoch": 2.4035726285138312, "grad_norm": 1.1397448778152466, "learning_rate": 9.397263157894737e-05, "loss": 0.4033, "step": 42923 }, { "epoch": 2.4036286258259603, "grad_norm": 1.0645920038223267, "learning_rate": 9.397236842105263e-05, "loss": 0.3553, "step": 42924 }, { "epoch": 2.4036846231380893, "grad_norm": 1.0781527757644653, "learning_rate": 9.397210526315789e-05, "loss": 0.3036, "step": 42925 }, { "epoch": 2.4037406204502183, "grad_norm": 1.1391663551330566, "learning_rate": 9.397184210526316e-05, "loss": 0.3779, "step": 42926 }, { "epoch": 2.4037966177623473, "grad_norm": 1.1351583003997803, "learning_rate": 9.397157894736842e-05, "loss": 0.4974, "step": 42927 }, { "epoch": 2.4038526150744763, "grad_norm": 1.0221221446990967, "learning_rate": 9.397131578947368e-05, "loss": 0.3608, "step": 42928 }, { "epoch": 2.4039086123866054, "grad_norm": 1.3087012767791748, "learning_rate": 9.397105263157894e-05, "loss": 0.4143, "step": 42929 }, { "epoch": 2.4039646096987344, "grad_norm": 1.138741374015808, "learning_rate": 9.397078947368422e-05, "loss": 0.4244, "step": 42930 }, { "epoch": 2.4040206070108634, "grad_norm": 1.221858024597168, "learning_rate": 9.397052631578948e-05, "loss": 0.4279, "step": 42931 }, { "epoch": 2.4040766043229924, "grad_norm": 1.3469194173812866, "learning_rate": 9.397026315789475e-05, "loss": 0.4146, "step": 42932 }, { "epoch": 2.4041326016351214, "grad_norm": 1.137859582901001, "learning_rate": 9.397e-05, "loss": 0.4299, "step": 42933 }, { "epoch": 2.4041885989472505, "grad_norm": 1.1727951765060425, "learning_rate": 9.396973684210527e-05, "loss": 0.3589, "step": 42934 }, { "epoch": 2.4042445962593795, "grad_norm": 1.0487192869186401, "learning_rate": 9.396947368421053e-05, "loss": 0.4531, "step": 42935 }, { "epoch": 2.4043005935715085, "grad_norm": 1.4081909656524658, "learning_rate": 9.39692105263158e-05, "loss": 0.4299, "step": 42936 }, { "epoch": 2.4043565908836375, "grad_norm": 1.3632649183273315, "learning_rate": 9.396894736842106e-05, "loss": 0.3999, "step": 42937 }, { "epoch": 2.4044125881957665, "grad_norm": 1.4034035205841064, "learning_rate": 9.396868421052632e-05, "loss": 0.4279, "step": 42938 }, { "epoch": 2.4044685855078956, "grad_norm": 1.3694361448287964, "learning_rate": 9.396842105263158e-05, "loss": 0.4226, "step": 42939 }, { "epoch": 2.4045245828200246, "grad_norm": 1.2090771198272705, "learning_rate": 9.396815789473684e-05, "loss": 0.4353, "step": 42940 }, { "epoch": 2.4045805801321536, "grad_norm": 1.2322231531143188, "learning_rate": 9.396789473684211e-05, "loss": 0.3873, "step": 42941 }, { "epoch": 2.4046365774442826, "grad_norm": 1.3113725185394287, "learning_rate": 9.396763157894737e-05, "loss": 0.4536, "step": 42942 }, { "epoch": 2.4046925747564116, "grad_norm": 1.192201852798462, "learning_rate": 9.396736842105263e-05, "loss": 0.384, "step": 42943 }, { "epoch": 2.4047485720685406, "grad_norm": 1.15830397605896, "learning_rate": 9.39671052631579e-05, "loss": 0.3974, "step": 42944 }, { "epoch": 2.4048045693806697, "grad_norm": 1.2198481559753418, "learning_rate": 9.396684210526317e-05, "loss": 0.3559, "step": 42945 }, { "epoch": 2.4048605666927987, "grad_norm": 1.2232214212417603, "learning_rate": 9.396657894736843e-05, "loss": 0.4375, "step": 42946 }, { "epoch": 2.4049165640049277, "grad_norm": 1.1937845945358276, "learning_rate": 9.396631578947369e-05, "loss": 0.532, "step": 42947 }, { "epoch": 2.4049725613170567, "grad_norm": 1.3005129098892212, "learning_rate": 9.396605263157895e-05, "loss": 0.3676, "step": 42948 }, { "epoch": 2.4050285586291857, "grad_norm": 1.0055527687072754, "learning_rate": 9.396578947368422e-05, "loss": 0.3824, "step": 42949 }, { "epoch": 2.4050845559413148, "grad_norm": 1.0742515325546265, "learning_rate": 9.396552631578948e-05, "loss": 0.402, "step": 42950 }, { "epoch": 2.405140553253444, "grad_norm": 1.2483686208724976, "learning_rate": 9.396526315789474e-05, "loss": 0.3754, "step": 42951 }, { "epoch": 2.405196550565573, "grad_norm": 1.295030951499939, "learning_rate": 9.3965e-05, "loss": 0.3978, "step": 42952 }, { "epoch": 2.405252547877702, "grad_norm": 1.2300734519958496, "learning_rate": 9.396473684210527e-05, "loss": 0.3723, "step": 42953 }, { "epoch": 2.405308545189831, "grad_norm": 1.435346245765686, "learning_rate": 9.396447368421053e-05, "loss": 0.5706, "step": 42954 }, { "epoch": 2.40536454250196, "grad_norm": 1.1682757139205933, "learning_rate": 9.39642105263158e-05, "loss": 0.3188, "step": 42955 }, { "epoch": 2.405420539814089, "grad_norm": 1.174091100692749, "learning_rate": 9.396394736842105e-05, "loss": 0.4594, "step": 42956 }, { "epoch": 2.405476537126218, "grad_norm": 1.4960664510726929, "learning_rate": 9.396368421052631e-05, "loss": 0.4271, "step": 42957 }, { "epoch": 2.405532534438347, "grad_norm": 1.2805312871932983, "learning_rate": 9.396342105263158e-05, "loss": 0.4597, "step": 42958 }, { "epoch": 2.405588531750476, "grad_norm": 1.1715357303619385, "learning_rate": 9.396315789473684e-05, "loss": 0.4632, "step": 42959 }, { "epoch": 2.405644529062605, "grad_norm": 1.0852584838867188, "learning_rate": 9.396289473684212e-05, "loss": 0.4034, "step": 42960 }, { "epoch": 2.405700526374734, "grad_norm": 1.5558000802993774, "learning_rate": 9.396263157894736e-05, "loss": 0.4439, "step": 42961 }, { "epoch": 2.405756523686863, "grad_norm": 1.12416672706604, "learning_rate": 9.396236842105264e-05, "loss": 0.413, "step": 42962 }, { "epoch": 2.405812520998992, "grad_norm": 1.1439340114593506, "learning_rate": 9.39621052631579e-05, "loss": 0.3941, "step": 42963 }, { "epoch": 2.405868518311121, "grad_norm": 1.1268935203552246, "learning_rate": 9.396184210526317e-05, "loss": 0.3592, "step": 42964 }, { "epoch": 2.40592451562325, "grad_norm": 1.2831907272338867, "learning_rate": 9.396157894736842e-05, "loss": 0.4107, "step": 42965 }, { "epoch": 2.405980512935379, "grad_norm": 1.372144341468811, "learning_rate": 9.396131578947369e-05, "loss": 0.5267, "step": 42966 }, { "epoch": 2.406036510247508, "grad_norm": 1.155949354171753, "learning_rate": 9.396105263157895e-05, "loss": 0.4104, "step": 42967 }, { "epoch": 2.406092507559637, "grad_norm": 1.1827507019042969, "learning_rate": 9.396078947368422e-05, "loss": 0.3735, "step": 42968 }, { "epoch": 2.406148504871766, "grad_norm": 1.2791862487792969, "learning_rate": 9.396052631578948e-05, "loss": 0.4888, "step": 42969 }, { "epoch": 2.406204502183895, "grad_norm": 1.4813575744628906, "learning_rate": 9.396026315789474e-05, "loss": 0.4679, "step": 42970 }, { "epoch": 2.406260499496024, "grad_norm": 1.0876219272613525, "learning_rate": 9.396e-05, "loss": 0.3157, "step": 42971 }, { "epoch": 2.406316496808153, "grad_norm": 1.0424891710281372, "learning_rate": 9.395973684210527e-05, "loss": 0.3645, "step": 42972 }, { "epoch": 2.406372494120282, "grad_norm": 1.0234819650650024, "learning_rate": 9.395947368421053e-05, "loss": 0.322, "step": 42973 }, { "epoch": 2.406428491432411, "grad_norm": 1.3803379535675049, "learning_rate": 9.39592105263158e-05, "loss": 0.4928, "step": 42974 }, { "epoch": 2.4064844887445402, "grad_norm": 1.032228708267212, "learning_rate": 9.395894736842105e-05, "loss": 0.3183, "step": 42975 }, { "epoch": 2.4065404860566693, "grad_norm": 1.038776159286499, "learning_rate": 9.395868421052631e-05, "loss": 0.3472, "step": 42976 }, { "epoch": 2.4065964833687983, "grad_norm": 1.3813427686691284, "learning_rate": 9.395842105263159e-05, "loss": 0.3408, "step": 42977 }, { "epoch": 2.4066524806809273, "grad_norm": 1.138115406036377, "learning_rate": 9.395815789473685e-05, "loss": 0.3637, "step": 42978 }, { "epoch": 2.4067084779930563, "grad_norm": 0.9865298271179199, "learning_rate": 9.39578947368421e-05, "loss": 0.3841, "step": 42979 }, { "epoch": 2.4067644753051853, "grad_norm": 1.16624915599823, "learning_rate": 9.395763157894737e-05, "loss": 0.2743, "step": 42980 }, { "epoch": 2.4068204726173144, "grad_norm": 1.3586690425872803, "learning_rate": 9.395736842105264e-05, "loss": 0.4359, "step": 42981 }, { "epoch": 2.4068764699294434, "grad_norm": 1.3061840534210205, "learning_rate": 9.39571052631579e-05, "loss": 0.3645, "step": 42982 }, { "epoch": 2.4069324672415724, "grad_norm": 1.1973698139190674, "learning_rate": 9.395684210526316e-05, "loss": 0.4012, "step": 42983 }, { "epoch": 2.4069884645537014, "grad_norm": 1.3382766246795654, "learning_rate": 9.395657894736842e-05, "loss": 0.3996, "step": 42984 }, { "epoch": 2.4070444618658304, "grad_norm": 1.1486364603042603, "learning_rate": 9.395631578947369e-05, "loss": 0.3254, "step": 42985 }, { "epoch": 2.4071004591779595, "grad_norm": 1.4335806369781494, "learning_rate": 9.395605263157895e-05, "loss": 0.4245, "step": 42986 }, { "epoch": 2.4071564564900885, "grad_norm": 1.1917120218276978, "learning_rate": 9.395578947368423e-05, "loss": 0.5286, "step": 42987 }, { "epoch": 2.4072124538022175, "grad_norm": 1.2304513454437256, "learning_rate": 9.395552631578947e-05, "loss": 0.4125, "step": 42988 }, { "epoch": 2.4072684511143465, "grad_norm": 1.0390325784683228, "learning_rate": 9.395526315789474e-05, "loss": 0.3033, "step": 42989 }, { "epoch": 2.4073244484264755, "grad_norm": 1.2280079126358032, "learning_rate": 9.3955e-05, "loss": 0.3685, "step": 42990 }, { "epoch": 2.4073804457386045, "grad_norm": 1.1059726476669312, "learning_rate": 9.395473684210528e-05, "loss": 0.2905, "step": 42991 }, { "epoch": 2.4074364430507336, "grad_norm": 1.502891182899475, "learning_rate": 9.395447368421054e-05, "loss": 0.4896, "step": 42992 }, { "epoch": 2.4074924403628626, "grad_norm": 1.170148491859436, "learning_rate": 9.395421052631578e-05, "loss": 0.3585, "step": 42993 }, { "epoch": 2.4075484376749916, "grad_norm": 1.3524712324142456, "learning_rate": 9.395394736842106e-05, "loss": 0.4593, "step": 42994 }, { "epoch": 2.4076044349871206, "grad_norm": 1.226854681968689, "learning_rate": 9.395368421052632e-05, "loss": 0.3867, "step": 42995 }, { "epoch": 2.4076604322992496, "grad_norm": 1.2388516664505005, "learning_rate": 9.395342105263159e-05, "loss": 0.4901, "step": 42996 }, { "epoch": 2.4077164296113787, "grad_norm": 1.0370386838912964, "learning_rate": 9.395315789473685e-05, "loss": 0.364, "step": 42997 }, { "epoch": 2.4077724269235077, "grad_norm": 1.2158148288726807, "learning_rate": 9.395289473684211e-05, "loss": 0.3682, "step": 42998 }, { "epoch": 2.4078284242356367, "grad_norm": 1.2620946168899536, "learning_rate": 9.395263157894737e-05, "loss": 0.4716, "step": 42999 }, { "epoch": 2.4078844215477657, "grad_norm": 1.3492554426193237, "learning_rate": 9.395236842105264e-05, "loss": 0.3541, "step": 43000 }, { "epoch": 2.4079404188598947, "grad_norm": 1.1032800674438477, "learning_rate": 9.39521052631579e-05, "loss": 0.526, "step": 43001 }, { "epoch": 2.4079964161720238, "grad_norm": 1.1166880130767822, "learning_rate": 9.395184210526316e-05, "loss": 0.4579, "step": 43002 }, { "epoch": 2.4080524134841528, "grad_norm": 1.3685531616210938, "learning_rate": 9.395157894736842e-05, "loss": 0.4222, "step": 43003 }, { "epoch": 2.408108410796282, "grad_norm": 1.185243010520935, "learning_rate": 9.39513157894737e-05, "loss": 0.3348, "step": 43004 }, { "epoch": 2.408164408108411, "grad_norm": 1.3580867052078247, "learning_rate": 9.395105263157895e-05, "loss": 0.4418, "step": 43005 }, { "epoch": 2.40822040542054, "grad_norm": 1.2270675897598267, "learning_rate": 9.395078947368421e-05, "loss": 0.3925, "step": 43006 }, { "epoch": 2.408276402732669, "grad_norm": 1.0914605855941772, "learning_rate": 9.395052631578947e-05, "loss": 0.3445, "step": 43007 }, { "epoch": 2.408332400044798, "grad_norm": 1.897592544555664, "learning_rate": 9.395026315789475e-05, "loss": 0.4612, "step": 43008 }, { "epoch": 2.408388397356927, "grad_norm": 1.2961806058883667, "learning_rate": 9.395000000000001e-05, "loss": 0.4088, "step": 43009 }, { "epoch": 2.408444394669056, "grad_norm": 1.277518391609192, "learning_rate": 9.394973684210527e-05, "loss": 0.3839, "step": 43010 }, { "epoch": 2.408500391981185, "grad_norm": 1.1530386209487915, "learning_rate": 9.394947368421053e-05, "loss": 0.3807, "step": 43011 }, { "epoch": 2.408556389293314, "grad_norm": 1.3407140970230103, "learning_rate": 9.394921052631579e-05, "loss": 0.3558, "step": 43012 }, { "epoch": 2.408612386605443, "grad_norm": 1.0811842679977417, "learning_rate": 9.394894736842106e-05, "loss": 0.3878, "step": 43013 }, { "epoch": 2.408668383917572, "grad_norm": 1.2993879318237305, "learning_rate": 9.394868421052632e-05, "loss": 0.5521, "step": 43014 }, { "epoch": 2.408724381229701, "grad_norm": 1.1716740131378174, "learning_rate": 9.394842105263159e-05, "loss": 0.4078, "step": 43015 }, { "epoch": 2.40878037854183, "grad_norm": 1.3194535970687866, "learning_rate": 9.394815789473684e-05, "loss": 0.6376, "step": 43016 }, { "epoch": 2.408836375853959, "grad_norm": 1.1337637901306152, "learning_rate": 9.394789473684211e-05, "loss": 0.3578, "step": 43017 }, { "epoch": 2.408892373166088, "grad_norm": 1.2077672481536865, "learning_rate": 9.394763157894737e-05, "loss": 0.3894, "step": 43018 }, { "epoch": 2.408948370478217, "grad_norm": 1.4268771409988403, "learning_rate": 9.394736842105264e-05, "loss": 0.4158, "step": 43019 }, { "epoch": 2.409004367790346, "grad_norm": 1.1748284101486206, "learning_rate": 9.394710526315789e-05, "loss": 0.3332, "step": 43020 }, { "epoch": 2.409060365102475, "grad_norm": 1.2950421571731567, "learning_rate": 9.394684210526316e-05, "loss": 0.4042, "step": 43021 }, { "epoch": 2.409116362414604, "grad_norm": 1.2509827613830566, "learning_rate": 9.394657894736842e-05, "loss": 0.4134, "step": 43022 }, { "epoch": 2.409172359726733, "grad_norm": 1.3471120595932007, "learning_rate": 9.39463157894737e-05, "loss": 0.5651, "step": 43023 }, { "epoch": 2.409228357038862, "grad_norm": 1.3577544689178467, "learning_rate": 9.394605263157896e-05, "loss": 0.498, "step": 43024 }, { "epoch": 2.409284354350991, "grad_norm": 1.008226990699768, "learning_rate": 9.394578947368422e-05, "loss": 0.2879, "step": 43025 }, { "epoch": 2.40934035166312, "grad_norm": 1.1439552307128906, "learning_rate": 9.394552631578948e-05, "loss": 0.4208, "step": 43026 }, { "epoch": 2.4093963489752492, "grad_norm": 1.055285930633545, "learning_rate": 9.394526315789474e-05, "loss": 0.3195, "step": 43027 }, { "epoch": 2.4094523462873783, "grad_norm": 0.9813190698623657, "learning_rate": 9.394500000000001e-05, "loss": 0.2721, "step": 43028 }, { "epoch": 2.4095083435995073, "grad_norm": 1.2558999061584473, "learning_rate": 9.394473684210527e-05, "loss": 0.4237, "step": 43029 }, { "epoch": 2.4095643409116363, "grad_norm": 1.0759652853012085, "learning_rate": 9.394447368421053e-05, "loss": 0.2916, "step": 43030 }, { "epoch": 2.4096203382237653, "grad_norm": 0.8966822624206543, "learning_rate": 9.394421052631579e-05, "loss": 0.3355, "step": 43031 }, { "epoch": 2.4096763355358943, "grad_norm": 1.3577170372009277, "learning_rate": 9.394394736842106e-05, "loss": 0.3826, "step": 43032 }, { "epoch": 2.4097323328480234, "grad_norm": 1.3411133289337158, "learning_rate": 9.394368421052632e-05, "loss": 0.3963, "step": 43033 }, { "epoch": 2.4097883301601524, "grad_norm": 1.043069839477539, "learning_rate": 9.394342105263158e-05, "loss": 0.3662, "step": 43034 }, { "epoch": 2.4098443274722814, "grad_norm": 1.3937034606933594, "learning_rate": 9.394315789473684e-05, "loss": 0.5147, "step": 43035 }, { "epoch": 2.4099003247844104, "grad_norm": 1.065588355064392, "learning_rate": 9.394289473684211e-05, "loss": 0.3697, "step": 43036 }, { "epoch": 2.4099563220965394, "grad_norm": 1.199974775314331, "learning_rate": 9.394263157894737e-05, "loss": 0.3463, "step": 43037 }, { "epoch": 2.4100123194086684, "grad_norm": 1.6323134899139404, "learning_rate": 9.394236842105263e-05, "loss": 0.3932, "step": 43038 }, { "epoch": 2.4100683167207975, "grad_norm": 1.3040753602981567, "learning_rate": 9.39421052631579e-05, "loss": 0.3669, "step": 43039 }, { "epoch": 2.4101243140329265, "grad_norm": 1.1835899353027344, "learning_rate": 9.394184210526317e-05, "loss": 0.5383, "step": 43040 }, { "epoch": 2.4101803113450555, "grad_norm": 1.078391194343567, "learning_rate": 9.394157894736843e-05, "loss": 0.4446, "step": 43041 }, { "epoch": 2.4102363086571845, "grad_norm": 1.1840434074401855, "learning_rate": 9.39413157894737e-05, "loss": 0.3895, "step": 43042 }, { "epoch": 2.4102923059693135, "grad_norm": 1.3172699213027954, "learning_rate": 9.394105263157895e-05, "loss": 0.4833, "step": 43043 }, { "epoch": 2.4103483032814426, "grad_norm": 1.2804224491119385, "learning_rate": 9.39407894736842e-05, "loss": 0.3817, "step": 43044 }, { "epoch": 2.4104043005935716, "grad_norm": 1.3820586204528809, "learning_rate": 9.394052631578948e-05, "loss": 0.3568, "step": 43045 }, { "epoch": 2.4104602979057006, "grad_norm": 1.1864701509475708, "learning_rate": 9.394026315789474e-05, "loss": 0.5526, "step": 43046 }, { "epoch": 2.4105162952178296, "grad_norm": 1.1147346496582031, "learning_rate": 9.394000000000001e-05, "loss": 0.4278, "step": 43047 }, { "epoch": 2.4105722925299586, "grad_norm": 1.197258710861206, "learning_rate": 9.393973684210526e-05, "loss": 0.4131, "step": 43048 }, { "epoch": 2.4106282898420877, "grad_norm": 1.2473978996276855, "learning_rate": 9.393947368421053e-05, "loss": 0.3866, "step": 43049 }, { "epoch": 2.4106842871542167, "grad_norm": 1.3464152812957764, "learning_rate": 9.393921052631579e-05, "loss": 0.5236, "step": 43050 }, { "epoch": 2.4107402844663457, "grad_norm": 1.2231892347335815, "learning_rate": 9.393894736842106e-05, "loss": 0.4314, "step": 43051 }, { "epoch": 2.4107962817784747, "grad_norm": 1.1388556957244873, "learning_rate": 9.393868421052632e-05, "loss": 0.3246, "step": 43052 }, { "epoch": 2.4108522790906037, "grad_norm": 1.3741780519485474, "learning_rate": 9.393842105263158e-05, "loss": 0.4131, "step": 43053 }, { "epoch": 2.4109082764027328, "grad_norm": 1.1298327445983887, "learning_rate": 9.393815789473684e-05, "loss": 0.3995, "step": 43054 }, { "epoch": 2.4109642737148618, "grad_norm": 1.1314697265625, "learning_rate": 9.393789473684212e-05, "loss": 0.3856, "step": 43055 }, { "epoch": 2.411020271026991, "grad_norm": 1.2970670461654663, "learning_rate": 9.393763157894738e-05, "loss": 0.4078, "step": 43056 }, { "epoch": 2.41107626833912, "grad_norm": 1.1381648778915405, "learning_rate": 9.393736842105264e-05, "loss": 0.497, "step": 43057 }, { "epoch": 2.411132265651249, "grad_norm": 1.1786407232284546, "learning_rate": 9.39371052631579e-05, "loss": 0.3873, "step": 43058 }, { "epoch": 2.411188262963378, "grad_norm": 1.4625476598739624, "learning_rate": 9.393684210526317e-05, "loss": 0.4412, "step": 43059 }, { "epoch": 2.411244260275507, "grad_norm": 1.138588786125183, "learning_rate": 9.393657894736843e-05, "loss": 0.3834, "step": 43060 }, { "epoch": 2.411300257587636, "grad_norm": 1.2320101261138916, "learning_rate": 9.393631578947369e-05, "loss": 0.482, "step": 43061 }, { "epoch": 2.411356254899765, "grad_norm": 1.3663620948791504, "learning_rate": 9.393605263157895e-05, "loss": 0.4771, "step": 43062 }, { "epoch": 2.411412252211894, "grad_norm": 1.1438909769058228, "learning_rate": 9.393578947368421e-05, "loss": 0.3449, "step": 43063 }, { "epoch": 2.411468249524023, "grad_norm": 1.2409121990203857, "learning_rate": 9.393552631578948e-05, "loss": 0.3754, "step": 43064 }, { "epoch": 2.411524246836152, "grad_norm": 1.1740427017211914, "learning_rate": 9.393526315789474e-05, "loss": 0.4087, "step": 43065 }, { "epoch": 2.411580244148281, "grad_norm": 1.0233221054077148, "learning_rate": 9.3935e-05, "loss": 0.3297, "step": 43066 }, { "epoch": 2.41163624146041, "grad_norm": 1.2422679662704468, "learning_rate": 9.393473684210526e-05, "loss": 0.39, "step": 43067 }, { "epoch": 2.411692238772539, "grad_norm": 1.1759917736053467, "learning_rate": 9.393447368421053e-05, "loss": 0.3853, "step": 43068 }, { "epoch": 2.411748236084668, "grad_norm": 1.04758620262146, "learning_rate": 9.39342105263158e-05, "loss": 0.3977, "step": 43069 }, { "epoch": 2.411804233396797, "grad_norm": 1.1623505353927612, "learning_rate": 9.393394736842105e-05, "loss": 0.3438, "step": 43070 }, { "epoch": 2.411860230708926, "grad_norm": 1.240695834159851, "learning_rate": 9.393368421052631e-05, "loss": 0.4, "step": 43071 }, { "epoch": 2.411916228021055, "grad_norm": 1.4051753282546997, "learning_rate": 9.393342105263159e-05, "loss": 0.3885, "step": 43072 }, { "epoch": 2.411972225333184, "grad_norm": 1.0265486240386963, "learning_rate": 9.393315789473685e-05, "loss": 0.3625, "step": 43073 }, { "epoch": 2.412028222645313, "grad_norm": 1.183159351348877, "learning_rate": 9.393289473684212e-05, "loss": 0.3719, "step": 43074 }, { "epoch": 2.412084219957442, "grad_norm": 1.3969826698303223, "learning_rate": 9.393263157894737e-05, "loss": 0.4985, "step": 43075 }, { "epoch": 2.412140217269571, "grad_norm": 1.1585941314697266, "learning_rate": 9.393236842105264e-05, "loss": 0.3914, "step": 43076 }, { "epoch": 2.4121962145817, "grad_norm": 1.1349459886550903, "learning_rate": 9.39321052631579e-05, "loss": 0.3289, "step": 43077 }, { "epoch": 2.412252211893829, "grad_norm": 1.0545337200164795, "learning_rate": 9.393184210526316e-05, "loss": 0.4249, "step": 43078 }, { "epoch": 2.4123082092059582, "grad_norm": 1.295855164527893, "learning_rate": 9.393157894736843e-05, "loss": 0.4178, "step": 43079 }, { "epoch": 2.4123642065180873, "grad_norm": 1.2078099250793457, "learning_rate": 9.393131578947368e-05, "loss": 0.3209, "step": 43080 }, { "epoch": 2.4124202038302163, "grad_norm": 1.2538615465164185, "learning_rate": 9.393105263157895e-05, "loss": 0.5204, "step": 43081 }, { "epoch": 2.4124762011423453, "grad_norm": 1.229345440864563, "learning_rate": 9.393078947368421e-05, "loss": 0.3593, "step": 43082 }, { "epoch": 2.4125321984544743, "grad_norm": 1.5337655544281006, "learning_rate": 9.393052631578948e-05, "loss": 0.5777, "step": 43083 }, { "epoch": 2.4125881957666033, "grad_norm": 1.2354308366775513, "learning_rate": 9.393026315789474e-05, "loss": 0.5183, "step": 43084 }, { "epoch": 2.4126441930787323, "grad_norm": 1.061542272567749, "learning_rate": 9.393e-05, "loss": 0.3403, "step": 43085 }, { "epoch": 2.4127001903908614, "grad_norm": 1.2376623153686523, "learning_rate": 9.392973684210526e-05, "loss": 0.4698, "step": 43086 }, { "epoch": 2.4127561877029904, "grad_norm": 1.1181284189224243, "learning_rate": 9.392947368421054e-05, "loss": 0.3205, "step": 43087 }, { "epoch": 2.4128121850151194, "grad_norm": 1.375240683555603, "learning_rate": 9.39292105263158e-05, "loss": 0.3852, "step": 43088 }, { "epoch": 2.4128681823272484, "grad_norm": 1.3245576620101929, "learning_rate": 9.392894736842106e-05, "loss": 0.4236, "step": 43089 }, { "epoch": 2.4129241796393774, "grad_norm": 1.2057394981384277, "learning_rate": 9.392868421052632e-05, "loss": 0.4361, "step": 43090 }, { "epoch": 2.4129801769515065, "grad_norm": 0.9364051818847656, "learning_rate": 9.392842105263159e-05, "loss": 0.3343, "step": 43091 }, { "epoch": 2.4130361742636355, "grad_norm": 1.3495407104492188, "learning_rate": 9.392815789473685e-05, "loss": 0.4471, "step": 43092 }, { "epoch": 2.4130921715757645, "grad_norm": 1.1332517862319946, "learning_rate": 9.392789473684211e-05, "loss": 0.4485, "step": 43093 }, { "epoch": 2.4131481688878935, "grad_norm": 1.1557482481002808, "learning_rate": 9.392763157894737e-05, "loss": 0.3675, "step": 43094 }, { "epoch": 2.4132041662000225, "grad_norm": 1.103502631187439, "learning_rate": 9.392736842105263e-05, "loss": 0.5136, "step": 43095 }, { "epoch": 2.4132601635121516, "grad_norm": 1.2479134798049927, "learning_rate": 9.39271052631579e-05, "loss": 0.4389, "step": 43096 }, { "epoch": 2.4133161608242806, "grad_norm": 1.2877532243728638, "learning_rate": 9.392684210526316e-05, "loss": 0.4599, "step": 43097 }, { "epoch": 2.4133721581364096, "grad_norm": 1.163008689880371, "learning_rate": 9.392657894736842e-05, "loss": 0.3782, "step": 43098 }, { "epoch": 2.4134281554485386, "grad_norm": 1.0230954885482788, "learning_rate": 9.392631578947368e-05, "loss": 0.2977, "step": 43099 }, { "epoch": 2.4134841527606676, "grad_norm": 1.3225160837173462, "learning_rate": 9.392605263157895e-05, "loss": 0.3995, "step": 43100 }, { "epoch": 2.4135401500727967, "grad_norm": 1.1815807819366455, "learning_rate": 9.392578947368421e-05, "loss": 0.462, "step": 43101 }, { "epoch": 2.4135961473849257, "grad_norm": 1.3101767301559448, "learning_rate": 9.392552631578949e-05, "loss": 0.522, "step": 43102 }, { "epoch": 2.4136521446970547, "grad_norm": 1.246085286140442, "learning_rate": 9.392526315789473e-05, "loss": 0.5063, "step": 43103 }, { "epoch": 2.4137081420091837, "grad_norm": 1.0921101570129395, "learning_rate": 9.3925e-05, "loss": 0.3595, "step": 43104 }, { "epoch": 2.4137641393213127, "grad_norm": 1.2914799451828003, "learning_rate": 9.392473684210527e-05, "loss": 0.3731, "step": 43105 }, { "epoch": 2.4138201366334417, "grad_norm": 1.195311427116394, "learning_rate": 9.392447368421054e-05, "loss": 0.4376, "step": 43106 }, { "epoch": 2.4138761339455708, "grad_norm": 1.1262593269348145, "learning_rate": 9.39242105263158e-05, "loss": 0.4426, "step": 43107 }, { "epoch": 2.4139321312577, "grad_norm": 1.2530790567398071, "learning_rate": 9.392394736842106e-05, "loss": 0.374, "step": 43108 }, { "epoch": 2.413988128569829, "grad_norm": 1.1239497661590576, "learning_rate": 9.392368421052632e-05, "loss": 0.333, "step": 43109 }, { "epoch": 2.414044125881958, "grad_norm": 1.190722107887268, "learning_rate": 9.392342105263159e-05, "loss": 0.4351, "step": 43110 }, { "epoch": 2.414100123194087, "grad_norm": 1.214995265007019, "learning_rate": 9.392315789473685e-05, "loss": 0.431, "step": 43111 }, { "epoch": 2.414156120506216, "grad_norm": 1.322631597518921, "learning_rate": 9.392289473684211e-05, "loss": 0.4152, "step": 43112 }, { "epoch": 2.414212117818345, "grad_norm": 1.034173607826233, "learning_rate": 9.392263157894737e-05, "loss": 0.4139, "step": 43113 }, { "epoch": 2.414268115130474, "grad_norm": 1.2792304754257202, "learning_rate": 9.392236842105263e-05, "loss": 0.4924, "step": 43114 }, { "epoch": 2.414324112442603, "grad_norm": 0.9484776258468628, "learning_rate": 9.39221052631579e-05, "loss": 0.3114, "step": 43115 }, { "epoch": 2.414380109754732, "grad_norm": 1.2107387781143188, "learning_rate": 9.392184210526316e-05, "loss": 0.3528, "step": 43116 }, { "epoch": 2.414436107066861, "grad_norm": 1.1360641717910767, "learning_rate": 9.392157894736842e-05, "loss": 0.4449, "step": 43117 }, { "epoch": 2.41449210437899, "grad_norm": 1.410962700843811, "learning_rate": 9.392131578947368e-05, "loss": 0.3334, "step": 43118 }, { "epoch": 2.414548101691119, "grad_norm": 1.2803066968917847, "learning_rate": 9.392105263157896e-05, "loss": 0.4316, "step": 43119 }, { "epoch": 2.414604099003248, "grad_norm": 1.0435315370559692, "learning_rate": 9.392078947368422e-05, "loss": 0.4116, "step": 43120 }, { "epoch": 2.414660096315377, "grad_norm": 1.2325584888458252, "learning_rate": 9.392052631578948e-05, "loss": 0.6071, "step": 43121 }, { "epoch": 2.414716093627506, "grad_norm": 1.3006629943847656, "learning_rate": 9.392026315789474e-05, "loss": 0.4262, "step": 43122 }, { "epoch": 2.414772090939635, "grad_norm": 1.2508822679519653, "learning_rate": 9.392000000000001e-05, "loss": 0.5309, "step": 43123 }, { "epoch": 2.414828088251764, "grad_norm": 0.9068685173988342, "learning_rate": 9.391973684210527e-05, "loss": 0.3575, "step": 43124 }, { "epoch": 2.414884085563893, "grad_norm": 1.3196194171905518, "learning_rate": 9.391947368421053e-05, "loss": 0.5246, "step": 43125 }, { "epoch": 2.414940082876022, "grad_norm": 1.0987013578414917, "learning_rate": 9.391921052631579e-05, "loss": 0.3174, "step": 43126 }, { "epoch": 2.414996080188151, "grad_norm": 1.372594952583313, "learning_rate": 9.391894736842106e-05, "loss": 0.3966, "step": 43127 }, { "epoch": 2.41505207750028, "grad_norm": 1.1103476285934448, "learning_rate": 9.391868421052632e-05, "loss": 0.427, "step": 43128 }, { "epoch": 2.415108074812409, "grad_norm": 0.9660261273384094, "learning_rate": 9.39184210526316e-05, "loss": 0.3716, "step": 43129 }, { "epoch": 2.415164072124538, "grad_norm": 5.481461524963379, "learning_rate": 9.391815789473684e-05, "loss": 0.4418, "step": 43130 }, { "epoch": 2.4152200694366672, "grad_norm": 1.4513428211212158, "learning_rate": 9.39178947368421e-05, "loss": 0.4423, "step": 43131 }, { "epoch": 2.4152760667487962, "grad_norm": 1.5506590604782104, "learning_rate": 9.391763157894737e-05, "loss": 0.4848, "step": 43132 }, { "epoch": 2.4153320640609253, "grad_norm": 1.2460384368896484, "learning_rate": 9.391736842105263e-05, "loss": 0.5426, "step": 43133 }, { "epoch": 2.4153880613730543, "grad_norm": 1.2605429887771606, "learning_rate": 9.39171052631579e-05, "loss": 0.4097, "step": 43134 }, { "epoch": 2.4154440586851833, "grad_norm": 1.403633952140808, "learning_rate": 9.391684210526315e-05, "loss": 0.4767, "step": 43135 }, { "epoch": 2.4155000559973123, "grad_norm": 1.1948398351669312, "learning_rate": 9.391657894736843e-05, "loss": 0.43, "step": 43136 }, { "epoch": 2.415556053309441, "grad_norm": 1.2926198244094849, "learning_rate": 9.391631578947369e-05, "loss": 0.4129, "step": 43137 }, { "epoch": 2.4156120506215704, "grad_norm": 1.553526520729065, "learning_rate": 9.391605263157896e-05, "loss": 0.4335, "step": 43138 }, { "epoch": 2.415668047933699, "grad_norm": 1.153843879699707, "learning_rate": 9.391578947368422e-05, "loss": 0.3703, "step": 43139 }, { "epoch": 2.4157240452458284, "grad_norm": 1.134920358657837, "learning_rate": 9.391552631578948e-05, "loss": 0.4737, "step": 43140 }, { "epoch": 2.415780042557957, "grad_norm": 1.0548110008239746, "learning_rate": 9.391526315789474e-05, "loss": 0.3695, "step": 43141 }, { "epoch": 2.4158360398700864, "grad_norm": 1.035057783126831, "learning_rate": 9.391500000000001e-05, "loss": 0.3744, "step": 43142 }, { "epoch": 2.415892037182215, "grad_norm": 1.5538004636764526, "learning_rate": 9.391473684210527e-05, "loss": 0.4031, "step": 43143 }, { "epoch": 2.4159480344943445, "grad_norm": 1.3576792478561401, "learning_rate": 9.391447368421053e-05, "loss": 0.4627, "step": 43144 }, { "epoch": 2.416004031806473, "grad_norm": 1.1172876358032227, "learning_rate": 9.391421052631579e-05, "loss": 0.3467, "step": 43145 }, { "epoch": 2.4160600291186025, "grad_norm": 1.0625871419906616, "learning_rate": 9.391394736842106e-05, "loss": 0.3746, "step": 43146 }, { "epoch": 2.416116026430731, "grad_norm": 1.0962773561477661, "learning_rate": 9.391368421052632e-05, "loss": 0.4264, "step": 43147 }, { "epoch": 2.4161720237428606, "grad_norm": 1.1038094758987427, "learning_rate": 9.391342105263158e-05, "loss": 0.427, "step": 43148 }, { "epoch": 2.416228021054989, "grad_norm": 1.1239385604858398, "learning_rate": 9.391315789473684e-05, "loss": 0.4856, "step": 43149 }, { "epoch": 2.4162840183671186, "grad_norm": 1.0793992280960083, "learning_rate": 9.39128947368421e-05, "loss": 0.3883, "step": 43150 }, { "epoch": 2.416340015679247, "grad_norm": 1.1953121423721313, "learning_rate": 9.391263157894738e-05, "loss": 0.3327, "step": 43151 }, { "epoch": 2.4163960129913766, "grad_norm": 1.2115827798843384, "learning_rate": 9.391236842105264e-05, "loss": 0.3148, "step": 43152 }, { "epoch": 2.416452010303505, "grad_norm": 1.3389991521835327, "learning_rate": 9.39121052631579e-05, "loss": 0.4906, "step": 43153 }, { "epoch": 2.4165080076156347, "grad_norm": 1.2485898733139038, "learning_rate": 9.391184210526316e-05, "loss": 0.4216, "step": 43154 }, { "epoch": 2.4165640049277632, "grad_norm": 1.3658658266067505, "learning_rate": 9.391157894736843e-05, "loss": 0.4177, "step": 43155 }, { "epoch": 2.4166200022398927, "grad_norm": 1.165670394897461, "learning_rate": 9.391131578947369e-05, "loss": 0.4061, "step": 43156 }, { "epoch": 2.4166759995520213, "grad_norm": 1.6250052452087402, "learning_rate": 9.391105263157896e-05, "loss": 0.5847, "step": 43157 }, { "epoch": 2.4167319968641507, "grad_norm": 60.535221099853516, "learning_rate": 9.391078947368421e-05, "loss": 0.3729, "step": 43158 }, { "epoch": 2.4167879941762793, "grad_norm": 1.3058147430419922, "learning_rate": 9.391052631578948e-05, "loss": 0.3938, "step": 43159 }, { "epoch": 2.416843991488409, "grad_norm": 0.9565534591674805, "learning_rate": 9.391026315789474e-05, "loss": 0.3505, "step": 43160 }, { "epoch": 2.4168999888005374, "grad_norm": 1.2298884391784668, "learning_rate": 9.391000000000001e-05, "loss": 0.4616, "step": 43161 }, { "epoch": 2.416955986112667, "grad_norm": 0.9534597992897034, "learning_rate": 9.390973684210527e-05, "loss": 0.3291, "step": 43162 }, { "epoch": 2.4170119834247954, "grad_norm": 1.2636332511901855, "learning_rate": 9.390947368421053e-05, "loss": 0.4012, "step": 43163 }, { "epoch": 2.417067980736925, "grad_norm": 1.173264503479004, "learning_rate": 9.390921052631579e-05, "loss": 0.4676, "step": 43164 }, { "epoch": 2.4171239780490534, "grad_norm": 1.1306235790252686, "learning_rate": 9.390894736842105e-05, "loss": 0.439, "step": 43165 }, { "epoch": 2.417179975361183, "grad_norm": 1.6614991426467896, "learning_rate": 9.390868421052633e-05, "loss": 0.3796, "step": 43166 }, { "epoch": 2.4172359726733115, "grad_norm": 1.211554765701294, "learning_rate": 9.390842105263157e-05, "loss": 0.4156, "step": 43167 }, { "epoch": 2.417291969985441, "grad_norm": 1.3854492902755737, "learning_rate": 9.390815789473685e-05, "loss": 0.4769, "step": 43168 }, { "epoch": 2.4173479672975695, "grad_norm": 1.379655361175537, "learning_rate": 9.39078947368421e-05, "loss": 0.4333, "step": 43169 }, { "epoch": 2.417403964609699, "grad_norm": 0.9838935136795044, "learning_rate": 9.390763157894738e-05, "loss": 0.3197, "step": 43170 }, { "epoch": 2.4174599619218275, "grad_norm": 1.2255523204803467, "learning_rate": 9.390736842105264e-05, "loss": 0.4104, "step": 43171 }, { "epoch": 2.417515959233957, "grad_norm": 1.3336806297302246, "learning_rate": 9.39071052631579e-05, "loss": 0.4586, "step": 43172 }, { "epoch": 2.4175719565460856, "grad_norm": 1.1134190559387207, "learning_rate": 9.390684210526316e-05, "loss": 0.4706, "step": 43173 }, { "epoch": 2.417627953858215, "grad_norm": 1.2328906059265137, "learning_rate": 9.390657894736843e-05, "loss": 0.47, "step": 43174 }, { "epoch": 2.4176839511703436, "grad_norm": 1.3155572414398193, "learning_rate": 9.390631578947369e-05, "loss": 0.4967, "step": 43175 }, { "epoch": 2.417739948482473, "grad_norm": 1.430331826210022, "learning_rate": 9.390605263157895e-05, "loss": 0.4034, "step": 43176 }, { "epoch": 2.4177959457946017, "grad_norm": 1.0663114786148071, "learning_rate": 9.390578947368421e-05, "loss": 0.3639, "step": 43177 }, { "epoch": 2.4178519431067307, "grad_norm": 1.2175368070602417, "learning_rate": 9.390552631578948e-05, "loss": 0.3805, "step": 43178 }, { "epoch": 2.4179079404188597, "grad_norm": 1.1363192796707153, "learning_rate": 9.390526315789474e-05, "loss": 0.4373, "step": 43179 }, { "epoch": 2.4179639377309887, "grad_norm": 1.1539466381072998, "learning_rate": 9.3905e-05, "loss": 0.425, "step": 43180 }, { "epoch": 2.4180199350431177, "grad_norm": 1.173219919204712, "learning_rate": 9.390473684210526e-05, "loss": 0.4532, "step": 43181 }, { "epoch": 2.4180759323552468, "grad_norm": 1.0323699712753296, "learning_rate": 9.390447368421052e-05, "loss": 0.3398, "step": 43182 }, { "epoch": 2.418131929667376, "grad_norm": 1.0396913290023804, "learning_rate": 9.39042105263158e-05, "loss": 0.257, "step": 43183 }, { "epoch": 2.418187926979505, "grad_norm": 1.4664250612258911, "learning_rate": 9.390394736842106e-05, "loss": 0.645, "step": 43184 }, { "epoch": 2.418243924291634, "grad_norm": 1.12051260471344, "learning_rate": 9.390368421052632e-05, "loss": 0.3298, "step": 43185 }, { "epoch": 2.418299921603763, "grad_norm": 1.1920262575149536, "learning_rate": 9.390342105263157e-05, "loss": 0.4536, "step": 43186 }, { "epoch": 2.418355918915892, "grad_norm": 1.313310146331787, "learning_rate": 9.390315789473685e-05, "loss": 0.3502, "step": 43187 }, { "epoch": 2.418411916228021, "grad_norm": 1.0927398204803467, "learning_rate": 9.390289473684211e-05, "loss": 0.4967, "step": 43188 }, { "epoch": 2.41846791354015, "grad_norm": 1.2346994876861572, "learning_rate": 9.390263157894738e-05, "loss": 0.4715, "step": 43189 }, { "epoch": 2.418523910852279, "grad_norm": 1.2547309398651123, "learning_rate": 9.390236842105263e-05, "loss": 0.3551, "step": 43190 }, { "epoch": 2.418579908164408, "grad_norm": 1.3142169713974, "learning_rate": 9.39021052631579e-05, "loss": 0.3755, "step": 43191 }, { "epoch": 2.418635905476537, "grad_norm": 1.2423584461212158, "learning_rate": 9.390184210526316e-05, "loss": 0.3715, "step": 43192 }, { "epoch": 2.418691902788666, "grad_norm": 1.1156691312789917, "learning_rate": 9.390157894736843e-05, "loss": 0.4065, "step": 43193 }, { "epoch": 2.418747900100795, "grad_norm": 1.2996442317962646, "learning_rate": 9.39013157894737e-05, "loss": 0.3434, "step": 43194 }, { "epoch": 2.418803897412924, "grad_norm": 1.118552565574646, "learning_rate": 9.390105263157895e-05, "loss": 0.3606, "step": 43195 }, { "epoch": 2.418859894725053, "grad_norm": 1.155348539352417, "learning_rate": 9.390078947368421e-05, "loss": 0.3741, "step": 43196 }, { "epoch": 2.418915892037182, "grad_norm": 1.3397969007492065, "learning_rate": 9.390052631578949e-05, "loss": 0.4408, "step": 43197 }, { "epoch": 2.418971889349311, "grad_norm": 1.0667774677276611, "learning_rate": 9.390026315789475e-05, "loss": 0.3677, "step": 43198 }, { "epoch": 2.41902788666144, "grad_norm": 1.1114362478256226, "learning_rate": 9.39e-05, "loss": 0.3472, "step": 43199 }, { "epoch": 2.419083883973569, "grad_norm": 1.0219911336898804, "learning_rate": 9.389973684210527e-05, "loss": 0.3541, "step": 43200 }, { "epoch": 2.419139881285698, "grad_norm": 1.1505446434020996, "learning_rate": 9.389947368421053e-05, "loss": 0.4004, "step": 43201 }, { "epoch": 2.419195878597827, "grad_norm": 1.1320921182632446, "learning_rate": 9.38992105263158e-05, "loss": 0.3508, "step": 43202 }, { "epoch": 2.419251875909956, "grad_norm": 1.3824846744537354, "learning_rate": 9.389894736842106e-05, "loss": 0.42, "step": 43203 }, { "epoch": 2.419307873222085, "grad_norm": 1.0686715841293335, "learning_rate": 9.389868421052632e-05, "loss": 0.3867, "step": 43204 }, { "epoch": 2.419363870534214, "grad_norm": 1.3476555347442627, "learning_rate": 9.389842105263158e-05, "loss": 0.4533, "step": 43205 }, { "epoch": 2.419419867846343, "grad_norm": 1.0201270580291748, "learning_rate": 9.389815789473685e-05, "loss": 0.3794, "step": 43206 }, { "epoch": 2.4194758651584722, "grad_norm": 1.5151606798171997, "learning_rate": 9.389789473684211e-05, "loss": 0.5095, "step": 43207 }, { "epoch": 2.4195318624706013, "grad_norm": 1.0412055253982544, "learning_rate": 9.389763157894737e-05, "loss": 0.4149, "step": 43208 }, { "epoch": 2.4195878597827303, "grad_norm": 1.1569360494613647, "learning_rate": 9.389736842105263e-05, "loss": 0.4052, "step": 43209 }, { "epoch": 2.4196438570948593, "grad_norm": 1.0770893096923828, "learning_rate": 9.38971052631579e-05, "loss": 0.4049, "step": 43210 }, { "epoch": 2.4196998544069883, "grad_norm": 1.2463077306747437, "learning_rate": 9.389684210526316e-05, "loss": 0.3686, "step": 43211 }, { "epoch": 2.4197558517191173, "grad_norm": 1.2184151411056519, "learning_rate": 9.389657894736844e-05, "loss": 0.3882, "step": 43212 }, { "epoch": 2.4198118490312464, "grad_norm": 1.1225277185440063, "learning_rate": 9.389631578947368e-05, "loss": 0.3753, "step": 43213 }, { "epoch": 2.4198678463433754, "grad_norm": 1.248574137687683, "learning_rate": 9.389605263157896e-05, "loss": 0.3381, "step": 43214 }, { "epoch": 2.4199238436555044, "grad_norm": 1.0137771368026733, "learning_rate": 9.389578947368422e-05, "loss": 0.3379, "step": 43215 }, { "epoch": 2.4199798409676334, "grad_norm": 1.0141942501068115, "learning_rate": 9.389552631578948e-05, "loss": 0.3385, "step": 43216 }, { "epoch": 2.4200358382797624, "grad_norm": 1.3201202154159546, "learning_rate": 9.389526315789475e-05, "loss": 0.4165, "step": 43217 }, { "epoch": 2.4200918355918914, "grad_norm": 1.6294525861740112, "learning_rate": 9.3895e-05, "loss": 0.5434, "step": 43218 }, { "epoch": 2.4201478329040205, "grad_norm": 1.3794221878051758, "learning_rate": 9.389473684210527e-05, "loss": 0.6643, "step": 43219 }, { "epoch": 2.4202038302161495, "grad_norm": 1.267309308052063, "learning_rate": 9.389447368421053e-05, "loss": 0.3686, "step": 43220 }, { "epoch": 2.4202598275282785, "grad_norm": 1.4485201835632324, "learning_rate": 9.38942105263158e-05, "loss": 0.423, "step": 43221 }, { "epoch": 2.4203158248404075, "grad_norm": 1.0893577337265015, "learning_rate": 9.389394736842105e-05, "loss": 0.2788, "step": 43222 }, { "epoch": 2.4203718221525365, "grad_norm": 1.3437695503234863, "learning_rate": 9.389368421052632e-05, "loss": 0.3517, "step": 43223 }, { "epoch": 2.4204278194646656, "grad_norm": 1.0807229280471802, "learning_rate": 9.389342105263158e-05, "loss": 0.3304, "step": 43224 }, { "epoch": 2.4204838167767946, "grad_norm": 1.154120683670044, "learning_rate": 9.389315789473685e-05, "loss": 0.3893, "step": 43225 }, { "epoch": 2.4205398140889236, "grad_norm": 1.243463397026062, "learning_rate": 9.389289473684211e-05, "loss": 0.4865, "step": 43226 }, { "epoch": 2.4205958114010526, "grad_norm": 1.0998270511627197, "learning_rate": 9.389263157894737e-05, "loss": 0.27, "step": 43227 }, { "epoch": 2.4206518087131816, "grad_norm": 1.1756845712661743, "learning_rate": 9.389236842105263e-05, "loss": 0.4328, "step": 43228 }, { "epoch": 2.4207078060253107, "grad_norm": 1.040419578552246, "learning_rate": 9.38921052631579e-05, "loss": 0.4173, "step": 43229 }, { "epoch": 2.4207638033374397, "grad_norm": 1.5336278676986694, "learning_rate": 9.389184210526317e-05, "loss": 0.7211, "step": 43230 }, { "epoch": 2.4208198006495687, "grad_norm": 1.1336097717285156, "learning_rate": 9.389157894736843e-05, "loss": 0.3522, "step": 43231 }, { "epoch": 2.4208757979616977, "grad_norm": 1.3105348348617554, "learning_rate": 9.389131578947369e-05, "loss": 0.4495, "step": 43232 }, { "epoch": 2.4209317952738267, "grad_norm": 1.0838714838027954, "learning_rate": 9.389105263157896e-05, "loss": 0.4578, "step": 43233 }, { "epoch": 2.4209877925859558, "grad_norm": 1.4369572401046753, "learning_rate": 9.389078947368422e-05, "loss": 0.4893, "step": 43234 }, { "epoch": 2.4210437898980848, "grad_norm": 1.0797373056411743, "learning_rate": 9.389052631578948e-05, "loss": 0.3482, "step": 43235 }, { "epoch": 2.421099787210214, "grad_norm": 0.9420746564865112, "learning_rate": 9.389026315789474e-05, "loss": 0.2995, "step": 43236 }, { "epoch": 2.421155784522343, "grad_norm": 1.3257580995559692, "learning_rate": 9.389e-05, "loss": 0.4331, "step": 43237 }, { "epoch": 2.421211781834472, "grad_norm": 1.193414568901062, "learning_rate": 9.388973684210527e-05, "loss": 0.4675, "step": 43238 }, { "epoch": 2.421267779146601, "grad_norm": 1.1686961650848389, "learning_rate": 9.388947368421053e-05, "loss": 0.3843, "step": 43239 }, { "epoch": 2.42132377645873, "grad_norm": 1.0356837511062622, "learning_rate": 9.388921052631579e-05, "loss": 0.4006, "step": 43240 }, { "epoch": 2.421379773770859, "grad_norm": 1.1721603870391846, "learning_rate": 9.388894736842105e-05, "loss": 0.4251, "step": 43241 }, { "epoch": 2.421435771082988, "grad_norm": 1.0868178606033325, "learning_rate": 9.388868421052632e-05, "loss": 0.3316, "step": 43242 }, { "epoch": 2.421491768395117, "grad_norm": 1.1554374694824219, "learning_rate": 9.388842105263158e-05, "loss": 0.4383, "step": 43243 }, { "epoch": 2.421547765707246, "grad_norm": 1.1034214496612549, "learning_rate": 9.388815789473686e-05, "loss": 0.4773, "step": 43244 }, { "epoch": 2.421603763019375, "grad_norm": 1.2092292308807373, "learning_rate": 9.38878947368421e-05, "loss": 0.4074, "step": 43245 }, { "epoch": 2.421659760331504, "grad_norm": 1.39845609664917, "learning_rate": 9.388763157894738e-05, "loss": 0.6073, "step": 43246 }, { "epoch": 2.421715757643633, "grad_norm": 1.2786108255386353, "learning_rate": 9.388736842105264e-05, "loss": 0.5254, "step": 43247 }, { "epoch": 2.421771754955762, "grad_norm": 1.2143055200576782, "learning_rate": 9.388710526315791e-05, "loss": 0.4087, "step": 43248 }, { "epoch": 2.421827752267891, "grad_norm": 1.3010919094085693, "learning_rate": 9.388684210526317e-05, "loss": 0.4788, "step": 43249 }, { "epoch": 2.42188374958002, "grad_norm": 1.2912427186965942, "learning_rate": 9.388657894736843e-05, "loss": 0.3812, "step": 43250 }, { "epoch": 2.421939746892149, "grad_norm": 1.0862281322479248, "learning_rate": 9.388631578947369e-05, "loss": 0.3291, "step": 43251 }, { "epoch": 2.421995744204278, "grad_norm": 1.0091866254806519, "learning_rate": 9.388605263157895e-05, "loss": 0.358, "step": 43252 }, { "epoch": 2.422051741516407, "grad_norm": 1.1845595836639404, "learning_rate": 9.388578947368422e-05, "loss": 0.2845, "step": 43253 }, { "epoch": 2.422107738828536, "grad_norm": 1.8282955884933472, "learning_rate": 9.388552631578948e-05, "loss": 0.5202, "step": 43254 }, { "epoch": 2.422163736140665, "grad_norm": 1.4279855489730835, "learning_rate": 9.388526315789474e-05, "loss": 0.3703, "step": 43255 }, { "epoch": 2.422219733452794, "grad_norm": 1.1744455099105835, "learning_rate": 9.3885e-05, "loss": 0.3577, "step": 43256 }, { "epoch": 2.422275730764923, "grad_norm": 1.2386982440948486, "learning_rate": 9.388473684210527e-05, "loss": 0.6016, "step": 43257 }, { "epoch": 2.422331728077052, "grad_norm": 1.3044960498809814, "learning_rate": 9.388447368421053e-05, "loss": 0.4669, "step": 43258 }, { "epoch": 2.4223877253891812, "grad_norm": 1.5267186164855957, "learning_rate": 9.388421052631579e-05, "loss": 0.4866, "step": 43259 }, { "epoch": 2.4224437227013103, "grad_norm": 1.190309762954712, "learning_rate": 9.388394736842105e-05, "loss": 0.3326, "step": 43260 }, { "epoch": 2.4224997200134393, "grad_norm": 1.3032572269439697, "learning_rate": 9.388368421052633e-05, "loss": 0.3926, "step": 43261 }, { "epoch": 2.4225557173255683, "grad_norm": 1.1624886989593506, "learning_rate": 9.388342105263159e-05, "loss": 0.3717, "step": 43262 }, { "epoch": 2.4226117146376973, "grad_norm": 1.152766466140747, "learning_rate": 9.388315789473685e-05, "loss": 0.4647, "step": 43263 }, { "epoch": 2.4226677119498263, "grad_norm": 1.572954773902893, "learning_rate": 9.38828947368421e-05, "loss": 0.3511, "step": 43264 }, { "epoch": 2.4227237092619553, "grad_norm": 1.1790647506713867, "learning_rate": 9.388263157894738e-05, "loss": 0.397, "step": 43265 }, { "epoch": 2.4227797065740844, "grad_norm": 1.1409581899642944, "learning_rate": 9.388236842105264e-05, "loss": 0.4568, "step": 43266 }, { "epoch": 2.4228357038862134, "grad_norm": 1.3559352159500122, "learning_rate": 9.388210526315791e-05, "loss": 0.4505, "step": 43267 }, { "epoch": 2.4228917011983424, "grad_norm": 1.1805355548858643, "learning_rate": 9.388184210526316e-05, "loss": 0.3235, "step": 43268 }, { "epoch": 2.4229476985104714, "grad_norm": 1.1075761318206787, "learning_rate": 9.388157894736842e-05, "loss": 0.3424, "step": 43269 }, { "epoch": 2.4230036958226004, "grad_norm": 1.2438340187072754, "learning_rate": 9.388131578947369e-05, "loss": 0.5587, "step": 43270 }, { "epoch": 2.4230596931347295, "grad_norm": 1.3456114530563354, "learning_rate": 9.388105263157895e-05, "loss": 0.4115, "step": 43271 }, { "epoch": 2.4231156904468585, "grad_norm": 1.4254484176635742, "learning_rate": 9.388078947368421e-05, "loss": 0.4292, "step": 43272 }, { "epoch": 2.4231716877589875, "grad_norm": 1.6074918508529663, "learning_rate": 9.388052631578947e-05, "loss": 0.4058, "step": 43273 }, { "epoch": 2.4232276850711165, "grad_norm": 1.2041456699371338, "learning_rate": 9.388026315789474e-05, "loss": 0.4227, "step": 43274 }, { "epoch": 2.4232836823832455, "grad_norm": 1.1134364604949951, "learning_rate": 9.388e-05, "loss": 0.3612, "step": 43275 }, { "epoch": 2.4233396796953746, "grad_norm": 1.0012108087539673, "learning_rate": 9.387973684210528e-05, "loss": 0.3828, "step": 43276 }, { "epoch": 2.4233956770075036, "grad_norm": 1.6649832725524902, "learning_rate": 9.387947368421052e-05, "loss": 0.3591, "step": 43277 }, { "epoch": 2.4234516743196326, "grad_norm": 1.1950238943099976, "learning_rate": 9.38792105263158e-05, "loss": 0.4079, "step": 43278 }, { "epoch": 2.4235076716317616, "grad_norm": 1.4727941751480103, "learning_rate": 9.387894736842105e-05, "loss": 0.4825, "step": 43279 }, { "epoch": 2.4235636689438906, "grad_norm": 1.3424957990646362, "learning_rate": 9.387868421052633e-05, "loss": 0.4561, "step": 43280 }, { "epoch": 2.4236196662560197, "grad_norm": 1.1803845167160034, "learning_rate": 9.387842105263159e-05, "loss": 0.3833, "step": 43281 }, { "epoch": 2.4236756635681487, "grad_norm": 1.0076804161071777, "learning_rate": 9.387815789473685e-05, "loss": 0.2925, "step": 43282 }, { "epoch": 2.4237316608802777, "grad_norm": 1.3036189079284668, "learning_rate": 9.387789473684211e-05, "loss": 0.412, "step": 43283 }, { "epoch": 2.4237876581924067, "grad_norm": 1.1030062437057495, "learning_rate": 9.387763157894738e-05, "loss": 0.3355, "step": 43284 }, { "epoch": 2.4238436555045357, "grad_norm": 1.480510950088501, "learning_rate": 9.387736842105264e-05, "loss": 0.3778, "step": 43285 }, { "epoch": 2.4238996528166648, "grad_norm": 1.049233078956604, "learning_rate": 9.38771052631579e-05, "loss": 0.3181, "step": 43286 }, { "epoch": 2.4239556501287938, "grad_norm": 1.2352315187454224, "learning_rate": 9.387684210526316e-05, "loss": 0.397, "step": 43287 }, { "epoch": 2.424011647440923, "grad_norm": 1.1681936979293823, "learning_rate": 9.387657894736842e-05, "loss": 0.4868, "step": 43288 }, { "epoch": 2.424067644753052, "grad_norm": 1.1199984550476074, "learning_rate": 9.387631578947369e-05, "loss": 0.5695, "step": 43289 }, { "epoch": 2.424123642065181, "grad_norm": 1.2349029779434204, "learning_rate": 9.387605263157895e-05, "loss": 0.3664, "step": 43290 }, { "epoch": 2.42417963937731, "grad_norm": 1.2325079441070557, "learning_rate": 9.387578947368421e-05, "loss": 0.3464, "step": 43291 }, { "epoch": 2.424235636689439, "grad_norm": 1.2472261190414429, "learning_rate": 9.387552631578947e-05, "loss": 0.4071, "step": 43292 }, { "epoch": 2.424291634001568, "grad_norm": 1.2833224534988403, "learning_rate": 9.387526315789475e-05, "loss": 0.5953, "step": 43293 }, { "epoch": 2.424347631313697, "grad_norm": 1.0062750577926636, "learning_rate": 9.3875e-05, "loss": 0.3378, "step": 43294 }, { "epoch": 2.424403628625826, "grad_norm": 1.3772644996643066, "learning_rate": 9.387473684210526e-05, "loss": 0.6255, "step": 43295 }, { "epoch": 2.424459625937955, "grad_norm": 1.2342292070388794, "learning_rate": 9.387447368421052e-05, "loss": 0.364, "step": 43296 }, { "epoch": 2.424515623250084, "grad_norm": 1.0805096626281738, "learning_rate": 9.38742105263158e-05, "loss": 0.3374, "step": 43297 }, { "epoch": 2.424571620562213, "grad_norm": 1.118391752243042, "learning_rate": 9.387394736842106e-05, "loss": 0.3436, "step": 43298 }, { "epoch": 2.424627617874342, "grad_norm": 1.0967470407485962, "learning_rate": 9.387368421052633e-05, "loss": 0.474, "step": 43299 }, { "epoch": 2.424683615186471, "grad_norm": 1.2771642208099365, "learning_rate": 9.387342105263158e-05, "loss": 0.4183, "step": 43300 }, { "epoch": 2.4247396124986, "grad_norm": 1.3030600547790527, "learning_rate": 9.387315789473685e-05, "loss": 0.3994, "step": 43301 }, { "epoch": 2.424795609810729, "grad_norm": 1.1603407859802246, "learning_rate": 9.387289473684211e-05, "loss": 0.3234, "step": 43302 }, { "epoch": 2.424851607122858, "grad_norm": 1.1143550872802734, "learning_rate": 9.387263157894737e-05, "loss": 0.3263, "step": 43303 }, { "epoch": 2.424907604434987, "grad_norm": 1.081955909729004, "learning_rate": 9.387236842105264e-05, "loss": 0.4103, "step": 43304 }, { "epoch": 2.424963601747116, "grad_norm": 1.1078665256500244, "learning_rate": 9.387210526315789e-05, "loss": 0.4375, "step": 43305 }, { "epoch": 2.425019599059245, "grad_norm": 1.1512471437454224, "learning_rate": 9.387184210526316e-05, "loss": 0.3524, "step": 43306 }, { "epoch": 2.425075596371374, "grad_norm": 1.2474839687347412, "learning_rate": 9.387157894736842e-05, "loss": 0.4101, "step": 43307 }, { "epoch": 2.425131593683503, "grad_norm": 1.4544689655303955, "learning_rate": 9.38713157894737e-05, "loss": 0.417, "step": 43308 }, { "epoch": 2.425187590995632, "grad_norm": 1.1838221549987793, "learning_rate": 9.387105263157896e-05, "loss": 0.3753, "step": 43309 }, { "epoch": 2.425243588307761, "grad_norm": 1.2453458309173584, "learning_rate": 9.387078947368421e-05, "loss": 0.356, "step": 43310 }, { "epoch": 2.4252995856198902, "grad_norm": 2.3535826206207275, "learning_rate": 9.387052631578947e-05, "loss": 0.3519, "step": 43311 }, { "epoch": 2.4253555829320192, "grad_norm": 1.2159059047698975, "learning_rate": 9.387026315789475e-05, "loss": 0.4917, "step": 43312 }, { "epoch": 2.4254115802441483, "grad_norm": 1.44333016872406, "learning_rate": 9.387000000000001e-05, "loss": 0.6589, "step": 43313 }, { "epoch": 2.4254675775562773, "grad_norm": 1.2149183750152588, "learning_rate": 9.386973684210527e-05, "loss": 0.3693, "step": 43314 }, { "epoch": 2.4255235748684063, "grad_norm": 1.102017879486084, "learning_rate": 9.386947368421053e-05, "loss": 0.4015, "step": 43315 }, { "epoch": 2.4255795721805353, "grad_norm": 1.4537397623062134, "learning_rate": 9.38692105263158e-05, "loss": 0.4418, "step": 43316 }, { "epoch": 2.4256355694926643, "grad_norm": 1.1533023118972778, "learning_rate": 9.386894736842106e-05, "loss": 0.3911, "step": 43317 }, { "epoch": 2.4256915668047934, "grad_norm": 1.1877247095108032, "learning_rate": 9.386868421052632e-05, "loss": 0.3713, "step": 43318 }, { "epoch": 2.4257475641169224, "grad_norm": 1.0765410661697388, "learning_rate": 9.386842105263158e-05, "loss": 0.4446, "step": 43319 }, { "epoch": 2.4258035614290514, "grad_norm": 1.4328455924987793, "learning_rate": 9.386815789473684e-05, "loss": 0.4334, "step": 43320 }, { "epoch": 2.4258595587411804, "grad_norm": 1.2991150617599487, "learning_rate": 9.386789473684211e-05, "loss": 0.4606, "step": 43321 }, { "epoch": 2.4259155560533094, "grad_norm": 1.202183485031128, "learning_rate": 9.386763157894737e-05, "loss": 0.3483, "step": 43322 }, { "epoch": 2.4259715533654385, "grad_norm": 1.1124930381774902, "learning_rate": 9.386736842105263e-05, "loss": 0.3801, "step": 43323 }, { "epoch": 2.4260275506775675, "grad_norm": 1.2466683387756348, "learning_rate": 9.386710526315789e-05, "loss": 0.3554, "step": 43324 }, { "epoch": 2.4260835479896965, "grad_norm": 1.3240309953689575, "learning_rate": 9.386684210526316e-05, "loss": 0.4564, "step": 43325 }, { "epoch": 2.4261395453018255, "grad_norm": 1.1996407508850098, "learning_rate": 9.386657894736842e-05, "loss": 0.3253, "step": 43326 }, { "epoch": 2.4261955426139545, "grad_norm": 1.1670150756835938, "learning_rate": 9.386631578947368e-05, "loss": 0.5599, "step": 43327 }, { "epoch": 2.4262515399260836, "grad_norm": 0.950490415096283, "learning_rate": 9.386605263157894e-05, "loss": 0.3096, "step": 43328 }, { "epoch": 2.4263075372382126, "grad_norm": 1.210343599319458, "learning_rate": 9.386578947368422e-05, "loss": 0.4202, "step": 43329 }, { "epoch": 2.4263635345503416, "grad_norm": 1.4921112060546875, "learning_rate": 9.386552631578948e-05, "loss": 0.3783, "step": 43330 }, { "epoch": 2.4264195318624706, "grad_norm": 1.0691190958023071, "learning_rate": 9.386526315789475e-05, "loss": 0.4163, "step": 43331 }, { "epoch": 2.4264755291745996, "grad_norm": 1.0907038450241089, "learning_rate": 9.3865e-05, "loss": 0.3056, "step": 43332 }, { "epoch": 2.4265315264867287, "grad_norm": 1.3283377885818481, "learning_rate": 9.386473684210527e-05, "loss": 0.4833, "step": 43333 }, { "epoch": 2.4265875237988577, "grad_norm": 1.1758837699890137, "learning_rate": 9.386447368421053e-05, "loss": 0.4387, "step": 43334 }, { "epoch": 2.4266435211109867, "grad_norm": 1.3361684083938599, "learning_rate": 9.38642105263158e-05, "loss": 0.403, "step": 43335 }, { "epoch": 2.4266995184231157, "grad_norm": 1.2100074291229248, "learning_rate": 9.386394736842106e-05, "loss": 0.4033, "step": 43336 }, { "epoch": 2.4267555157352447, "grad_norm": 1.314420461654663, "learning_rate": 9.386368421052631e-05, "loss": 0.4736, "step": 43337 }, { "epoch": 2.4268115130473737, "grad_norm": 1.1464412212371826, "learning_rate": 9.386342105263158e-05, "loss": 0.4867, "step": 43338 }, { "epoch": 2.4268675103595028, "grad_norm": 1.3981119394302368, "learning_rate": 9.386315789473684e-05, "loss": 0.522, "step": 43339 }, { "epoch": 2.426923507671632, "grad_norm": 1.0674493312835693, "learning_rate": 9.386289473684212e-05, "loss": 0.4018, "step": 43340 }, { "epoch": 2.426979504983761, "grad_norm": 1.4946500062942505, "learning_rate": 9.386263157894737e-05, "loss": 0.4493, "step": 43341 }, { "epoch": 2.42703550229589, "grad_norm": 1.2862634658813477, "learning_rate": 9.386236842105263e-05, "loss": 0.4656, "step": 43342 }, { "epoch": 2.427091499608019, "grad_norm": 1.1617116928100586, "learning_rate": 9.38621052631579e-05, "loss": 0.4247, "step": 43343 }, { "epoch": 2.427147496920148, "grad_norm": 1.376528024673462, "learning_rate": 9.386184210526317e-05, "loss": 0.3746, "step": 43344 }, { "epoch": 2.427203494232277, "grad_norm": 1.2471561431884766, "learning_rate": 9.386157894736843e-05, "loss": 0.3677, "step": 43345 }, { "epoch": 2.427259491544406, "grad_norm": 1.153925895690918, "learning_rate": 9.386131578947369e-05, "loss": 0.4013, "step": 43346 }, { "epoch": 2.427315488856535, "grad_norm": 1.207470178604126, "learning_rate": 9.386105263157895e-05, "loss": 0.5585, "step": 43347 }, { "epoch": 2.427371486168664, "grad_norm": 1.0314432382583618, "learning_rate": 9.386078947368422e-05, "loss": 0.3651, "step": 43348 }, { "epoch": 2.427427483480793, "grad_norm": 1.4034732580184937, "learning_rate": 9.386052631578948e-05, "loss": 0.367, "step": 43349 }, { "epoch": 2.427483480792922, "grad_norm": 1.182443380355835, "learning_rate": 9.386026315789474e-05, "loss": 0.4221, "step": 43350 }, { "epoch": 2.427539478105051, "grad_norm": 1.2632993459701538, "learning_rate": 9.386e-05, "loss": 0.4657, "step": 43351 }, { "epoch": 2.42759547541718, "grad_norm": 1.04806649684906, "learning_rate": 9.385973684210527e-05, "loss": 0.4245, "step": 43352 }, { "epoch": 2.427651472729309, "grad_norm": 1.074133276939392, "learning_rate": 9.385947368421053e-05, "loss": 0.3508, "step": 43353 }, { "epoch": 2.427707470041438, "grad_norm": 1.3391444683074951, "learning_rate": 9.38592105263158e-05, "loss": 0.3284, "step": 43354 }, { "epoch": 2.427763467353567, "grad_norm": 1.1975843906402588, "learning_rate": 9.385894736842105e-05, "loss": 0.4113, "step": 43355 }, { "epoch": 2.427819464665696, "grad_norm": 1.0653481483459473, "learning_rate": 9.385868421052631e-05, "loss": 0.3907, "step": 43356 }, { "epoch": 2.427875461977825, "grad_norm": 1.3801605701446533, "learning_rate": 9.385842105263158e-05, "loss": 0.4362, "step": 43357 }, { "epoch": 2.427931459289954, "grad_norm": 0.9844357967376709, "learning_rate": 9.385815789473684e-05, "loss": 0.3961, "step": 43358 }, { "epoch": 2.427987456602083, "grad_norm": 1.298036813735962, "learning_rate": 9.385789473684212e-05, "loss": 0.4182, "step": 43359 }, { "epoch": 2.428043453914212, "grad_norm": 1.1005222797393799, "learning_rate": 9.385763157894736e-05, "loss": 0.4708, "step": 43360 }, { "epoch": 2.428099451226341, "grad_norm": 1.1731996536254883, "learning_rate": 9.385736842105264e-05, "loss": 0.4222, "step": 43361 }, { "epoch": 2.42815544853847, "grad_norm": 1.137898564338684, "learning_rate": 9.38571052631579e-05, "loss": 0.3345, "step": 43362 }, { "epoch": 2.4282114458505992, "grad_norm": 1.2865368127822876, "learning_rate": 9.385684210526317e-05, "loss": 0.41, "step": 43363 }, { "epoch": 2.4282674431627282, "grad_norm": 1.557415246963501, "learning_rate": 9.385657894736843e-05, "loss": 0.4678, "step": 43364 }, { "epoch": 2.4283234404748573, "grad_norm": 1.3160344362258911, "learning_rate": 9.385631578947369e-05, "loss": 0.4289, "step": 43365 }, { "epoch": 2.4283794377869863, "grad_norm": 1.038507342338562, "learning_rate": 9.385605263157895e-05, "loss": 0.4918, "step": 43366 }, { "epoch": 2.4284354350991153, "grad_norm": 1.4419865608215332, "learning_rate": 9.385578947368422e-05, "loss": 0.4504, "step": 43367 }, { "epoch": 2.4284914324112443, "grad_norm": 1.0739492177963257, "learning_rate": 9.385552631578948e-05, "loss": 0.331, "step": 43368 }, { "epoch": 2.4285474297233733, "grad_norm": 1.1418654918670654, "learning_rate": 9.385526315789474e-05, "loss": 0.417, "step": 43369 }, { "epoch": 2.4286034270355024, "grad_norm": 1.1604127883911133, "learning_rate": 9.3855e-05, "loss": 0.3685, "step": 43370 }, { "epoch": 2.4286594243476314, "grad_norm": 3.397444009780884, "learning_rate": 9.385473684210528e-05, "loss": 0.4888, "step": 43371 }, { "epoch": 2.4287154216597604, "grad_norm": 1.2603759765625, "learning_rate": 9.385447368421053e-05, "loss": 0.5675, "step": 43372 }, { "epoch": 2.4287714189718894, "grad_norm": 1.1021838188171387, "learning_rate": 9.38542105263158e-05, "loss": 0.4028, "step": 43373 }, { "epoch": 2.4288274162840184, "grad_norm": 1.0308390855789185, "learning_rate": 9.385394736842105e-05, "loss": 0.3346, "step": 43374 }, { "epoch": 2.4288834135961475, "grad_norm": 1.1870626211166382, "learning_rate": 9.385368421052631e-05, "loss": 0.5407, "step": 43375 }, { "epoch": 2.4289394109082765, "grad_norm": 1.2564014196395874, "learning_rate": 9.385342105263159e-05, "loss": 0.4037, "step": 43376 }, { "epoch": 2.4289954082204055, "grad_norm": 1.1590462923049927, "learning_rate": 9.385315789473685e-05, "loss": 0.375, "step": 43377 }, { "epoch": 2.4290514055325345, "grad_norm": 1.1806632280349731, "learning_rate": 9.38528947368421e-05, "loss": 0.5893, "step": 43378 }, { "epoch": 2.4291074028446635, "grad_norm": 1.3714137077331543, "learning_rate": 9.385263157894737e-05, "loss": 0.4624, "step": 43379 }, { "epoch": 2.4291634001567926, "grad_norm": 1.3838032484054565, "learning_rate": 9.385236842105264e-05, "loss": 0.313, "step": 43380 }, { "epoch": 2.4292193974689216, "grad_norm": 1.2200109958648682, "learning_rate": 9.38521052631579e-05, "loss": 0.4538, "step": 43381 }, { "epoch": 2.4292753947810506, "grad_norm": 1.0471696853637695, "learning_rate": 9.385184210526316e-05, "loss": 0.3655, "step": 43382 }, { "epoch": 2.4293313920931796, "grad_norm": 1.198943018913269, "learning_rate": 9.385157894736842e-05, "loss": 0.4556, "step": 43383 }, { "epoch": 2.4293873894053086, "grad_norm": 1.4081692695617676, "learning_rate": 9.385131578947369e-05, "loss": 0.4586, "step": 43384 }, { "epoch": 2.4294433867174376, "grad_norm": 1.3346951007843018, "learning_rate": 9.385105263157895e-05, "loss": 0.3244, "step": 43385 }, { "epoch": 2.4294993840295667, "grad_norm": 1.3078590631484985, "learning_rate": 9.385078947368423e-05, "loss": 0.3655, "step": 43386 }, { "epoch": 2.4295553813416957, "grad_norm": 1.1638308763504028, "learning_rate": 9.385052631578947e-05, "loss": 0.4038, "step": 43387 }, { "epoch": 2.4296113786538247, "grad_norm": 1.1087274551391602, "learning_rate": 9.385026315789474e-05, "loss": 0.3192, "step": 43388 }, { "epoch": 2.4296673759659537, "grad_norm": 1.1162607669830322, "learning_rate": 9.385e-05, "loss": 0.43, "step": 43389 }, { "epoch": 2.4297233732780827, "grad_norm": 1.0834661722183228, "learning_rate": 9.384973684210526e-05, "loss": 0.3336, "step": 43390 }, { "epoch": 2.4297793705902118, "grad_norm": 1.3958379030227661, "learning_rate": 9.384947368421054e-05, "loss": 0.4832, "step": 43391 }, { "epoch": 2.429835367902341, "grad_norm": 1.2369455099105835, "learning_rate": 9.384921052631578e-05, "loss": 0.5863, "step": 43392 }, { "epoch": 2.42989136521447, "grad_norm": 1.0227656364440918, "learning_rate": 9.384894736842106e-05, "loss": 0.3514, "step": 43393 }, { "epoch": 2.429947362526599, "grad_norm": 1.047366976737976, "learning_rate": 9.384868421052632e-05, "loss": 0.3925, "step": 43394 }, { "epoch": 2.430003359838728, "grad_norm": 1.0129384994506836, "learning_rate": 9.384842105263159e-05, "loss": 0.4072, "step": 43395 }, { "epoch": 2.430059357150857, "grad_norm": 0.9804482460021973, "learning_rate": 9.384815789473685e-05, "loss": 0.3473, "step": 43396 }, { "epoch": 2.430115354462986, "grad_norm": 1.4185374975204468, "learning_rate": 9.384789473684211e-05, "loss": 0.3489, "step": 43397 }, { "epoch": 2.430171351775115, "grad_norm": 1.0564411878585815, "learning_rate": 9.384763157894737e-05, "loss": 0.3962, "step": 43398 }, { "epoch": 2.430227349087244, "grad_norm": 1.1915143728256226, "learning_rate": 9.384736842105264e-05, "loss": 0.3832, "step": 43399 }, { "epoch": 2.430283346399373, "grad_norm": 1.1948678493499756, "learning_rate": 9.38471052631579e-05, "loss": 0.4527, "step": 43400 }, { "epoch": 2.430339343711502, "grad_norm": 1.2793842554092407, "learning_rate": 9.384684210526316e-05, "loss": 0.5447, "step": 43401 }, { "epoch": 2.430395341023631, "grad_norm": 1.0344980955123901, "learning_rate": 9.384657894736842e-05, "loss": 0.4154, "step": 43402 }, { "epoch": 2.43045133833576, "grad_norm": 1.1797325611114502, "learning_rate": 9.38463157894737e-05, "loss": 0.3862, "step": 43403 }, { "epoch": 2.430507335647889, "grad_norm": 1.1524313688278198, "learning_rate": 9.384605263157895e-05, "loss": 0.3115, "step": 43404 }, { "epoch": 2.430563332960018, "grad_norm": 1.114667534828186, "learning_rate": 9.384578947368421e-05, "loss": 0.4526, "step": 43405 }, { "epoch": 2.430619330272147, "grad_norm": 1.274276852607727, "learning_rate": 9.384552631578947e-05, "loss": 0.3929, "step": 43406 }, { "epoch": 2.430675327584276, "grad_norm": 1.360005259513855, "learning_rate": 9.384526315789473e-05, "loss": 0.4642, "step": 43407 }, { "epoch": 2.430731324896405, "grad_norm": 7.713287830352783, "learning_rate": 9.384500000000001e-05, "loss": 0.398, "step": 43408 }, { "epoch": 2.430787322208534, "grad_norm": 1.1749053001403809, "learning_rate": 9.384473684210527e-05, "loss": 0.453, "step": 43409 }, { "epoch": 2.430843319520663, "grad_norm": 1.1834076642990112, "learning_rate": 9.384447368421053e-05, "loss": 0.4047, "step": 43410 }, { "epoch": 2.430899316832792, "grad_norm": 1.1666511297225952, "learning_rate": 9.384421052631579e-05, "loss": 0.3991, "step": 43411 }, { "epoch": 2.430955314144921, "grad_norm": 1.813346266746521, "learning_rate": 9.384394736842106e-05, "loss": 0.4145, "step": 43412 }, { "epoch": 2.43101131145705, "grad_norm": 0.9740927815437317, "learning_rate": 9.384368421052632e-05, "loss": 0.5302, "step": 43413 }, { "epoch": 2.431067308769179, "grad_norm": 1.1508328914642334, "learning_rate": 9.384342105263159e-05, "loss": 0.4138, "step": 43414 }, { "epoch": 2.431123306081308, "grad_norm": 1.4510514736175537, "learning_rate": 9.384315789473684e-05, "loss": 0.3605, "step": 43415 }, { "epoch": 2.4311793033934372, "grad_norm": 1.2821334600448608, "learning_rate": 9.384289473684211e-05, "loss": 0.4614, "step": 43416 }, { "epoch": 2.4312353007055663, "grad_norm": 1.3336087465286255, "learning_rate": 9.384263157894737e-05, "loss": 0.4015, "step": 43417 }, { "epoch": 2.4312912980176953, "grad_norm": 1.3297868967056274, "learning_rate": 9.384236842105264e-05, "loss": 0.3959, "step": 43418 }, { "epoch": 2.4313472953298243, "grad_norm": 1.1191812753677368, "learning_rate": 9.384210526315789e-05, "loss": 0.3282, "step": 43419 }, { "epoch": 2.4314032926419533, "grad_norm": 1.187023401260376, "learning_rate": 9.384184210526316e-05, "loss": 0.4345, "step": 43420 }, { "epoch": 2.4314592899540823, "grad_norm": 1.2757049798965454, "learning_rate": 9.384157894736842e-05, "loss": 0.3805, "step": 43421 }, { "epoch": 2.4315152872662114, "grad_norm": 1.1709263324737549, "learning_rate": 9.38413157894737e-05, "loss": 0.3898, "step": 43422 }, { "epoch": 2.4315712845783404, "grad_norm": 1.2875988483428955, "learning_rate": 9.384105263157896e-05, "loss": 0.4805, "step": 43423 }, { "epoch": 2.4316272818904694, "grad_norm": 1.3478721380233765, "learning_rate": 9.38407894736842e-05, "loss": 0.4032, "step": 43424 }, { "epoch": 2.4316832792025984, "grad_norm": 1.7640914916992188, "learning_rate": 9.384052631578948e-05, "loss": 0.4563, "step": 43425 }, { "epoch": 2.4317392765147274, "grad_norm": 1.093003749847412, "learning_rate": 9.384026315789474e-05, "loss": 0.3327, "step": 43426 }, { "epoch": 2.4317952738268565, "grad_norm": 1.1120656728744507, "learning_rate": 9.384000000000001e-05, "loss": 0.4486, "step": 43427 }, { "epoch": 2.4318512711389855, "grad_norm": 1.2630828619003296, "learning_rate": 9.383973684210527e-05, "loss": 0.4555, "step": 43428 }, { "epoch": 2.4319072684511145, "grad_norm": 0.9723885655403137, "learning_rate": 9.383947368421053e-05, "loss": 0.3497, "step": 43429 }, { "epoch": 2.4319632657632435, "grad_norm": 1.4140599966049194, "learning_rate": 9.383921052631579e-05, "loss": 0.491, "step": 43430 }, { "epoch": 2.4320192630753725, "grad_norm": 1.132109522819519, "learning_rate": 9.383894736842106e-05, "loss": 0.4306, "step": 43431 }, { "epoch": 2.4320752603875015, "grad_norm": 1.426221251487732, "learning_rate": 9.383868421052632e-05, "loss": 0.586, "step": 43432 }, { "epoch": 2.4321312576996306, "grad_norm": 1.649672031402588, "learning_rate": 9.383842105263158e-05, "loss": 0.4764, "step": 43433 }, { "epoch": 2.4321872550117596, "grad_norm": 1.1716945171356201, "learning_rate": 9.383815789473684e-05, "loss": 0.3141, "step": 43434 }, { "epoch": 2.4322432523238886, "grad_norm": 1.1834070682525635, "learning_rate": 9.383789473684211e-05, "loss": 0.4313, "step": 43435 }, { "epoch": 2.4322992496360176, "grad_norm": 1.052927017211914, "learning_rate": 9.383763157894737e-05, "loss": 0.3753, "step": 43436 }, { "epoch": 2.4323552469481466, "grad_norm": 1.1493428945541382, "learning_rate": 9.383736842105263e-05, "loss": 0.3408, "step": 43437 }, { "epoch": 2.4324112442602757, "grad_norm": 1.233973503112793, "learning_rate": 9.38371052631579e-05, "loss": 0.2602, "step": 43438 }, { "epoch": 2.4324672415724047, "grad_norm": 1.2100218534469604, "learning_rate": 9.383684210526317e-05, "loss": 0.4832, "step": 43439 }, { "epoch": 2.4325232388845337, "grad_norm": 1.162550687789917, "learning_rate": 9.383657894736843e-05, "loss": 0.3711, "step": 43440 }, { "epoch": 2.4325792361966627, "grad_norm": 1.1472750902175903, "learning_rate": 9.383631578947369e-05, "loss": 0.3832, "step": 43441 }, { "epoch": 2.4326352335087917, "grad_norm": 1.3634388446807861, "learning_rate": 9.383605263157895e-05, "loss": 0.4464, "step": 43442 }, { "epoch": 2.4326912308209208, "grad_norm": 1.2247101068496704, "learning_rate": 9.38357894736842e-05, "loss": 0.3486, "step": 43443 }, { "epoch": 2.4327472281330498, "grad_norm": 1.378816843032837, "learning_rate": 9.383552631578948e-05, "loss": 0.4521, "step": 43444 }, { "epoch": 2.432803225445179, "grad_norm": 1.5216033458709717, "learning_rate": 9.383526315789474e-05, "loss": 0.4524, "step": 43445 }, { "epoch": 2.432859222757308, "grad_norm": 1.3239271640777588, "learning_rate": 9.383500000000001e-05, "loss": 0.413, "step": 43446 }, { "epoch": 2.432915220069437, "grad_norm": 1.3717900514602661, "learning_rate": 9.383473684210526e-05, "loss": 0.5352, "step": 43447 }, { "epoch": 2.432971217381566, "grad_norm": 1.1783660650253296, "learning_rate": 9.383447368421053e-05, "loss": 0.3006, "step": 43448 }, { "epoch": 2.433027214693695, "grad_norm": 1.282853364944458, "learning_rate": 9.383421052631579e-05, "loss": 0.3663, "step": 43449 }, { "epoch": 2.433083212005824, "grad_norm": 1.1573971509933472, "learning_rate": 9.383394736842106e-05, "loss": 0.4207, "step": 43450 }, { "epoch": 2.433139209317953, "grad_norm": 1.1459968090057373, "learning_rate": 9.383368421052632e-05, "loss": 0.3567, "step": 43451 }, { "epoch": 2.433195206630082, "grad_norm": 1.0860177278518677, "learning_rate": 9.383342105263158e-05, "loss": 0.391, "step": 43452 }, { "epoch": 2.433251203942211, "grad_norm": 1.3015120029449463, "learning_rate": 9.383315789473684e-05, "loss": 0.3851, "step": 43453 }, { "epoch": 2.43330720125434, "grad_norm": 1.2159581184387207, "learning_rate": 9.383289473684212e-05, "loss": 0.3658, "step": 43454 }, { "epoch": 2.433363198566469, "grad_norm": 1.1927125453948975, "learning_rate": 9.383263157894738e-05, "loss": 0.409, "step": 43455 }, { "epoch": 2.433419195878598, "grad_norm": 1.0856728553771973, "learning_rate": 9.383236842105264e-05, "loss": 0.4171, "step": 43456 }, { "epoch": 2.433475193190727, "grad_norm": 1.1201846599578857, "learning_rate": 9.38321052631579e-05, "loss": 0.4162, "step": 43457 }, { "epoch": 2.433531190502856, "grad_norm": 0.9723695516586304, "learning_rate": 9.383184210526316e-05, "loss": 0.3514, "step": 43458 }, { "epoch": 2.433587187814985, "grad_norm": 1.7527860403060913, "learning_rate": 9.383157894736843e-05, "loss": 0.5715, "step": 43459 }, { "epoch": 2.433643185127114, "grad_norm": 1.070355772972107, "learning_rate": 9.383131578947369e-05, "loss": 0.4033, "step": 43460 }, { "epoch": 2.433699182439243, "grad_norm": 1.1626781225204468, "learning_rate": 9.383105263157895e-05, "loss": 0.3528, "step": 43461 }, { "epoch": 2.433755179751372, "grad_norm": 1.237432599067688, "learning_rate": 9.383078947368421e-05, "loss": 0.3989, "step": 43462 }, { "epoch": 2.433811177063501, "grad_norm": 1.386903166770935, "learning_rate": 9.383052631578948e-05, "loss": 0.4431, "step": 43463 }, { "epoch": 2.43386717437563, "grad_norm": 1.3324124813079834, "learning_rate": 9.383026315789474e-05, "loss": 0.5316, "step": 43464 }, { "epoch": 2.433923171687759, "grad_norm": 1.1503700017929077, "learning_rate": 9.383e-05, "loss": 0.3643, "step": 43465 }, { "epoch": 2.433979168999888, "grad_norm": 1.1987820863723755, "learning_rate": 9.382973684210526e-05, "loss": 0.3456, "step": 43466 }, { "epoch": 2.434035166312017, "grad_norm": 1.2245421409606934, "learning_rate": 9.382947368421053e-05, "loss": 0.3478, "step": 43467 }, { "epoch": 2.434091163624146, "grad_norm": 1.0515055656433105, "learning_rate": 9.38292105263158e-05, "loss": 0.3727, "step": 43468 }, { "epoch": 2.4341471609362753, "grad_norm": 1.222813367843628, "learning_rate": 9.382894736842107e-05, "loss": 0.4322, "step": 43469 }, { "epoch": 2.434203158248404, "grad_norm": 0.9088592529296875, "learning_rate": 9.382868421052631e-05, "loss": 0.3074, "step": 43470 }, { "epoch": 2.4342591555605333, "grad_norm": 1.2925727367401123, "learning_rate": 9.382842105263159e-05, "loss": 0.4125, "step": 43471 }, { "epoch": 2.434315152872662, "grad_norm": 1.3260629177093506, "learning_rate": 9.382815789473685e-05, "loss": 0.4035, "step": 43472 }, { "epoch": 2.4343711501847913, "grad_norm": 1.110970377922058, "learning_rate": 9.382789473684212e-05, "loss": 0.3778, "step": 43473 }, { "epoch": 2.43442714749692, "grad_norm": 0.9924740791320801, "learning_rate": 9.382763157894737e-05, "loss": 0.3712, "step": 43474 }, { "epoch": 2.4344831448090494, "grad_norm": 1.052088975906372, "learning_rate": 9.382736842105264e-05, "loss": 0.3325, "step": 43475 }, { "epoch": 2.434539142121178, "grad_norm": 1.1123687028884888, "learning_rate": 9.38271052631579e-05, "loss": 0.3348, "step": 43476 }, { "epoch": 2.4345951394333074, "grad_norm": 1.0884625911712646, "learning_rate": 9.382684210526316e-05, "loss": 0.3563, "step": 43477 }, { "epoch": 2.434651136745436, "grad_norm": 1.2509570121765137, "learning_rate": 9.382657894736843e-05, "loss": 0.4616, "step": 43478 }, { "epoch": 2.4347071340575654, "grad_norm": 1.0460319519042969, "learning_rate": 9.382631578947368e-05, "loss": 0.2775, "step": 43479 }, { "epoch": 2.434763131369694, "grad_norm": 1.2151391506195068, "learning_rate": 9.382605263157895e-05, "loss": 0.4061, "step": 43480 }, { "epoch": 2.4348191286818235, "grad_norm": 0.98110032081604, "learning_rate": 9.382578947368421e-05, "loss": 0.3884, "step": 43481 }, { "epoch": 2.434875125993952, "grad_norm": 1.818224549293518, "learning_rate": 9.382552631578948e-05, "loss": 0.5784, "step": 43482 }, { "epoch": 2.4349311233060815, "grad_norm": 1.1985265016555786, "learning_rate": 9.382526315789474e-05, "loss": 0.4301, "step": 43483 }, { "epoch": 2.43498712061821, "grad_norm": 1.1882362365722656, "learning_rate": 9.3825e-05, "loss": 0.4501, "step": 43484 }, { "epoch": 2.4350431179303396, "grad_norm": 1.0997867584228516, "learning_rate": 9.382473684210526e-05, "loss": 0.3908, "step": 43485 }, { "epoch": 2.435099115242468, "grad_norm": 1.283348560333252, "learning_rate": 9.382447368421054e-05, "loss": 0.4327, "step": 43486 }, { "epoch": 2.4351551125545976, "grad_norm": 1.1015461683273315, "learning_rate": 9.38242105263158e-05, "loss": 0.3612, "step": 43487 }, { "epoch": 2.435211109866726, "grad_norm": 1.0880892276763916, "learning_rate": 9.382394736842106e-05, "loss": 0.4804, "step": 43488 }, { "epoch": 2.4352671071788556, "grad_norm": 1.246701955795288, "learning_rate": 9.382368421052632e-05, "loss": 0.4529, "step": 43489 }, { "epoch": 2.435323104490984, "grad_norm": 1.1859369277954102, "learning_rate": 9.382342105263159e-05, "loss": 0.3615, "step": 43490 }, { "epoch": 2.4353791018031137, "grad_norm": 1.1748050451278687, "learning_rate": 9.382315789473685e-05, "loss": 0.3863, "step": 43491 }, { "epoch": 2.4354350991152423, "grad_norm": 1.1295069456100464, "learning_rate": 9.382289473684211e-05, "loss": 0.3886, "step": 43492 }, { "epoch": 2.4354910964273717, "grad_norm": 1.160613775253296, "learning_rate": 9.382263157894737e-05, "loss": 0.416, "step": 43493 }, { "epoch": 2.4355470937395003, "grad_norm": 1.717684030532837, "learning_rate": 9.382236842105263e-05, "loss": 0.4655, "step": 43494 }, { "epoch": 2.4356030910516298, "grad_norm": 1.2494808435440063, "learning_rate": 9.38221052631579e-05, "loss": 0.5969, "step": 43495 }, { "epoch": 2.4356590883637583, "grad_norm": 1.0145930051803589, "learning_rate": 9.382184210526316e-05, "loss": 0.3805, "step": 43496 }, { "epoch": 2.435715085675888, "grad_norm": 1.2939825057983398, "learning_rate": 9.382157894736842e-05, "loss": 0.4869, "step": 43497 }, { "epoch": 2.4357710829880164, "grad_norm": 1.1067980527877808, "learning_rate": 9.382131578947368e-05, "loss": 0.5308, "step": 43498 }, { "epoch": 2.435827080300146, "grad_norm": 1.2373117208480835, "learning_rate": 9.382105263157895e-05, "loss": 0.3543, "step": 43499 }, { "epoch": 2.4358830776122744, "grad_norm": 1.1672812700271606, "learning_rate": 9.382078947368421e-05, "loss": 0.3616, "step": 43500 }, { "epoch": 2.435939074924404, "grad_norm": 1.0591599941253662, "learning_rate": 9.382052631578949e-05, "loss": 0.3633, "step": 43501 }, { "epoch": 2.4359950722365324, "grad_norm": 1.1283249855041504, "learning_rate": 9.382026315789473e-05, "loss": 0.4081, "step": 43502 }, { "epoch": 2.436051069548662, "grad_norm": 1.4259402751922607, "learning_rate": 9.382e-05, "loss": 0.3711, "step": 43503 }, { "epoch": 2.4361070668607905, "grad_norm": 1.2054909467697144, "learning_rate": 9.381973684210527e-05, "loss": 0.564, "step": 43504 }, { "epoch": 2.43616306417292, "grad_norm": 1.467523455619812, "learning_rate": 9.381947368421054e-05, "loss": 0.4992, "step": 43505 }, { "epoch": 2.4362190614850485, "grad_norm": 1.2247976064682007, "learning_rate": 9.38192105263158e-05, "loss": 0.3736, "step": 43506 }, { "epoch": 2.436275058797178, "grad_norm": 1.647125244140625, "learning_rate": 9.381894736842106e-05, "loss": 0.3764, "step": 43507 }, { "epoch": 2.4363310561093066, "grad_norm": 1.184555172920227, "learning_rate": 9.381868421052632e-05, "loss": 0.3848, "step": 43508 }, { "epoch": 2.4363870534214356, "grad_norm": 1.1530085802078247, "learning_rate": 9.381842105263159e-05, "loss": 0.5664, "step": 43509 }, { "epoch": 2.4364430507335646, "grad_norm": 1.1183990240097046, "learning_rate": 9.381815789473685e-05, "loss": 0.3885, "step": 43510 }, { "epoch": 2.4364990480456936, "grad_norm": 1.1145797967910767, "learning_rate": 9.381789473684211e-05, "loss": 0.335, "step": 43511 }, { "epoch": 2.4365550453578226, "grad_norm": 1.2208172082901, "learning_rate": 9.381763157894737e-05, "loss": 0.3936, "step": 43512 }, { "epoch": 2.4366110426699517, "grad_norm": 1.1112356185913086, "learning_rate": 9.381736842105263e-05, "loss": 0.3554, "step": 43513 }, { "epoch": 2.4366670399820807, "grad_norm": 1.2250603437423706, "learning_rate": 9.38171052631579e-05, "loss": 0.5187, "step": 43514 }, { "epoch": 2.4367230372942097, "grad_norm": 1.2865691184997559, "learning_rate": 9.381684210526316e-05, "loss": 0.4522, "step": 43515 }, { "epoch": 2.4367790346063387, "grad_norm": 1.0813944339752197, "learning_rate": 9.381657894736842e-05, "loss": 0.3706, "step": 43516 }, { "epoch": 2.4368350319184677, "grad_norm": 1.374730110168457, "learning_rate": 9.381631578947368e-05, "loss": 0.5834, "step": 43517 }, { "epoch": 2.4368910292305968, "grad_norm": 1.0086525678634644, "learning_rate": 9.381605263157896e-05, "loss": 0.3376, "step": 43518 }, { "epoch": 2.4369470265427258, "grad_norm": 1.1249088048934937, "learning_rate": 9.381578947368422e-05, "loss": 0.3933, "step": 43519 }, { "epoch": 2.437003023854855, "grad_norm": 1.0572181940078735, "learning_rate": 9.381552631578948e-05, "loss": 0.308, "step": 43520 }, { "epoch": 2.437059021166984, "grad_norm": 1.2151226997375488, "learning_rate": 9.381526315789474e-05, "loss": 0.4717, "step": 43521 }, { "epoch": 2.437115018479113, "grad_norm": 1.1393799781799316, "learning_rate": 9.381500000000001e-05, "loss": 0.3594, "step": 43522 }, { "epoch": 2.437171015791242, "grad_norm": 1.05832040309906, "learning_rate": 9.381473684210527e-05, "loss": 0.3329, "step": 43523 }, { "epoch": 2.437227013103371, "grad_norm": 1.2633845806121826, "learning_rate": 9.381447368421054e-05, "loss": 0.4283, "step": 43524 }, { "epoch": 2.4372830104155, "grad_norm": 1.177384376525879, "learning_rate": 9.381421052631579e-05, "loss": 0.3734, "step": 43525 }, { "epoch": 2.437339007727629, "grad_norm": 1.1382733583450317, "learning_rate": 9.381394736842106e-05, "loss": 0.4136, "step": 43526 }, { "epoch": 2.437395005039758, "grad_norm": 1.2176085710525513, "learning_rate": 9.381368421052632e-05, "loss": 0.4514, "step": 43527 }, { "epoch": 2.437451002351887, "grad_norm": 1.3244165182113647, "learning_rate": 9.381342105263158e-05, "loss": 0.4183, "step": 43528 }, { "epoch": 2.437506999664016, "grad_norm": 1.1814717054367065, "learning_rate": 9.381315789473684e-05, "loss": 0.5872, "step": 43529 }, { "epoch": 2.437562996976145, "grad_norm": 1.2787936925888062, "learning_rate": 9.38128947368421e-05, "loss": 0.441, "step": 43530 }, { "epoch": 2.437618994288274, "grad_norm": 1.197939157485962, "learning_rate": 9.381263157894737e-05, "loss": 0.5021, "step": 43531 }, { "epoch": 2.437674991600403, "grad_norm": 1.0469249486923218, "learning_rate": 9.381236842105263e-05, "loss": 0.422, "step": 43532 }, { "epoch": 2.437730988912532, "grad_norm": 1.389747142791748, "learning_rate": 9.38121052631579e-05, "loss": 0.3812, "step": 43533 }, { "epoch": 2.437786986224661, "grad_norm": 1.1925057172775269, "learning_rate": 9.381184210526315e-05, "loss": 0.4221, "step": 43534 }, { "epoch": 2.43784298353679, "grad_norm": 0.9684349298477173, "learning_rate": 9.381157894736843e-05, "loss": 0.4149, "step": 43535 }, { "epoch": 2.437898980848919, "grad_norm": 1.0804325342178345, "learning_rate": 9.381131578947369e-05, "loss": 0.3414, "step": 43536 }, { "epoch": 2.437954978161048, "grad_norm": 1.3908649682998657, "learning_rate": 9.381105263157896e-05, "loss": 0.4029, "step": 43537 }, { "epoch": 2.438010975473177, "grad_norm": 1.1876524686813354, "learning_rate": 9.381078947368422e-05, "loss": 0.4228, "step": 43538 }, { "epoch": 2.438066972785306, "grad_norm": 1.1097257137298584, "learning_rate": 9.381052631578948e-05, "loss": 0.3393, "step": 43539 }, { "epoch": 2.438122970097435, "grad_norm": 1.1329421997070312, "learning_rate": 9.381026315789474e-05, "loss": 0.4043, "step": 43540 }, { "epoch": 2.438178967409564, "grad_norm": 1.111767053604126, "learning_rate": 9.381000000000001e-05, "loss": 0.4088, "step": 43541 }, { "epoch": 2.438234964721693, "grad_norm": 1.3643239736557007, "learning_rate": 9.380973684210527e-05, "loss": 0.4138, "step": 43542 }, { "epoch": 2.4382909620338222, "grad_norm": 0.9669643640518188, "learning_rate": 9.380947368421053e-05, "loss": 0.3597, "step": 43543 }, { "epoch": 2.4383469593459512, "grad_norm": 1.2528917789459229, "learning_rate": 9.380921052631579e-05, "loss": 0.3543, "step": 43544 }, { "epoch": 2.4384029566580803, "grad_norm": 1.1798632144927979, "learning_rate": 9.380894736842105e-05, "loss": 0.4696, "step": 43545 }, { "epoch": 2.4384589539702093, "grad_norm": 1.4390419721603394, "learning_rate": 9.380868421052632e-05, "loss": 0.6272, "step": 43546 }, { "epoch": 2.4385149512823383, "grad_norm": 1.1236462593078613, "learning_rate": 9.380842105263158e-05, "loss": 0.3224, "step": 43547 }, { "epoch": 2.4385709485944673, "grad_norm": 1.0145539045333862, "learning_rate": 9.380815789473684e-05, "loss": 0.2538, "step": 43548 }, { "epoch": 2.4386269459065963, "grad_norm": 1.1043665409088135, "learning_rate": 9.38078947368421e-05, "loss": 0.3901, "step": 43549 }, { "epoch": 2.4386829432187254, "grad_norm": 1.0802520513534546, "learning_rate": 9.380763157894738e-05, "loss": 0.2895, "step": 43550 }, { "epoch": 2.4387389405308544, "grad_norm": 1.249882459640503, "learning_rate": 9.380736842105264e-05, "loss": 0.4197, "step": 43551 }, { "epoch": 2.4387949378429834, "grad_norm": 0.9152452945709229, "learning_rate": 9.38071052631579e-05, "loss": 0.344, "step": 43552 }, { "epoch": 2.4388509351551124, "grad_norm": 1.2954599857330322, "learning_rate": 9.380684210526316e-05, "loss": 0.4432, "step": 43553 }, { "epoch": 2.4389069324672414, "grad_norm": 1.2975479364395142, "learning_rate": 9.380657894736843e-05, "loss": 0.388, "step": 43554 }, { "epoch": 2.4389629297793705, "grad_norm": 1.1526026725769043, "learning_rate": 9.380631578947369e-05, "loss": 0.2732, "step": 43555 }, { "epoch": 2.4390189270914995, "grad_norm": 1.2810592651367188, "learning_rate": 9.380605263157896e-05, "loss": 0.4176, "step": 43556 }, { "epoch": 2.4390749244036285, "grad_norm": 1.2511413097381592, "learning_rate": 9.380578947368421e-05, "loss": 0.5216, "step": 43557 }, { "epoch": 2.4391309217157575, "grad_norm": 1.2433569431304932, "learning_rate": 9.380552631578948e-05, "loss": 0.452, "step": 43558 }, { "epoch": 2.4391869190278865, "grad_norm": 1.1853517293930054, "learning_rate": 9.380526315789474e-05, "loss": 0.3632, "step": 43559 }, { "epoch": 2.4392429163400156, "grad_norm": 1.0360019207000732, "learning_rate": 9.380500000000001e-05, "loss": 0.3465, "step": 43560 }, { "epoch": 2.4392989136521446, "grad_norm": 0.971160352230072, "learning_rate": 9.380473684210527e-05, "loss": 0.274, "step": 43561 }, { "epoch": 2.4393549109642736, "grad_norm": 1.2742706537246704, "learning_rate": 9.380447368421052e-05, "loss": 0.4756, "step": 43562 }, { "epoch": 2.4394109082764026, "grad_norm": 1.0161525011062622, "learning_rate": 9.38042105263158e-05, "loss": 0.3122, "step": 43563 }, { "epoch": 2.4394669055885316, "grad_norm": 1.3556708097457886, "learning_rate": 9.380394736842105e-05, "loss": 0.4056, "step": 43564 }, { "epoch": 2.4395229029006607, "grad_norm": 1.2303104400634766, "learning_rate": 9.380368421052633e-05, "loss": 0.3931, "step": 43565 }, { "epoch": 2.4395789002127897, "grad_norm": 1.114974856376648, "learning_rate": 9.380342105263159e-05, "loss": 0.3985, "step": 43566 }, { "epoch": 2.4396348975249187, "grad_norm": 1.4099518060684204, "learning_rate": 9.380315789473685e-05, "loss": 0.4567, "step": 43567 }, { "epoch": 2.4396908948370477, "grad_norm": 1.172695279121399, "learning_rate": 9.38028947368421e-05, "loss": 0.4288, "step": 43568 }, { "epoch": 2.4397468921491767, "grad_norm": 1.2217788696289062, "learning_rate": 9.380263157894738e-05, "loss": 0.4117, "step": 43569 }, { "epoch": 2.4398028894613057, "grad_norm": 1.101059079170227, "learning_rate": 9.380236842105264e-05, "loss": 0.3787, "step": 43570 }, { "epoch": 2.4398588867734348, "grad_norm": 1.3017903566360474, "learning_rate": 9.38021052631579e-05, "loss": 0.3785, "step": 43571 }, { "epoch": 2.439914884085564, "grad_norm": 1.130303978919983, "learning_rate": 9.380184210526316e-05, "loss": 0.4085, "step": 43572 }, { "epoch": 2.439970881397693, "grad_norm": 1.1646997928619385, "learning_rate": 9.380157894736843e-05, "loss": 0.3446, "step": 43573 }, { "epoch": 2.440026878709822, "grad_norm": 1.3437261581420898, "learning_rate": 9.380131578947369e-05, "loss": 0.4407, "step": 43574 }, { "epoch": 2.440082876021951, "grad_norm": 1.3229933977127075, "learning_rate": 9.380105263157895e-05, "loss": 0.3774, "step": 43575 }, { "epoch": 2.44013887333408, "grad_norm": 1.1961010694503784, "learning_rate": 9.380078947368421e-05, "loss": 0.4461, "step": 43576 }, { "epoch": 2.440194870646209, "grad_norm": 1.038870930671692, "learning_rate": 9.380052631578948e-05, "loss": 0.4113, "step": 43577 }, { "epoch": 2.440250867958338, "grad_norm": 1.1936339139938354, "learning_rate": 9.380026315789474e-05, "loss": 0.3882, "step": 43578 }, { "epoch": 2.440306865270467, "grad_norm": 1.1477329730987549, "learning_rate": 9.38e-05, "loss": 0.3641, "step": 43579 }, { "epoch": 2.440362862582596, "grad_norm": 1.2114328145980835, "learning_rate": 9.379973684210526e-05, "loss": 0.4105, "step": 43580 }, { "epoch": 2.440418859894725, "grad_norm": 1.136233925819397, "learning_rate": 9.379947368421052e-05, "loss": 0.34, "step": 43581 }, { "epoch": 2.440474857206854, "grad_norm": 1.2263675928115845, "learning_rate": 9.37992105263158e-05, "loss": 0.3782, "step": 43582 }, { "epoch": 2.440530854518983, "grad_norm": 1.0855965614318848, "learning_rate": 9.379894736842106e-05, "loss": 0.3841, "step": 43583 }, { "epoch": 2.440586851831112, "grad_norm": 0.9905179142951965, "learning_rate": 9.379868421052632e-05, "loss": 0.3653, "step": 43584 }, { "epoch": 2.440642849143241, "grad_norm": 1.2677885293960571, "learning_rate": 9.379842105263158e-05, "loss": 0.4063, "step": 43585 }, { "epoch": 2.44069884645537, "grad_norm": 1.1108193397521973, "learning_rate": 9.379815789473685e-05, "loss": 0.3224, "step": 43586 }, { "epoch": 2.440754843767499, "grad_norm": 1.31383216381073, "learning_rate": 9.379789473684211e-05, "loss": 0.4106, "step": 43587 }, { "epoch": 2.440810841079628, "grad_norm": 1.2637027502059937, "learning_rate": 9.379763157894738e-05, "loss": 0.4832, "step": 43588 }, { "epoch": 2.440866838391757, "grad_norm": 1.2205462455749512, "learning_rate": 9.379736842105263e-05, "loss": 0.4223, "step": 43589 }, { "epoch": 2.440922835703886, "grad_norm": 1.1361775398254395, "learning_rate": 9.37971052631579e-05, "loss": 0.3303, "step": 43590 }, { "epoch": 2.440978833016015, "grad_norm": 0.9849117994308472, "learning_rate": 9.379684210526316e-05, "loss": 0.2706, "step": 43591 }, { "epoch": 2.441034830328144, "grad_norm": 1.071748971939087, "learning_rate": 9.379657894736843e-05, "loss": 0.6465, "step": 43592 }, { "epoch": 2.441090827640273, "grad_norm": 1.0876084566116333, "learning_rate": 9.37963157894737e-05, "loss": 0.3637, "step": 43593 }, { "epoch": 2.441146824952402, "grad_norm": 1.1235402822494507, "learning_rate": 9.379605263157895e-05, "loss": 0.3101, "step": 43594 }, { "epoch": 2.4412028222645312, "grad_norm": 1.5383445024490356, "learning_rate": 9.379578947368421e-05, "loss": 0.3591, "step": 43595 }, { "epoch": 2.4412588195766602, "grad_norm": 1.0634770393371582, "learning_rate": 9.379552631578949e-05, "loss": 0.5025, "step": 43596 }, { "epoch": 2.4413148168887893, "grad_norm": 0.9967201352119446, "learning_rate": 9.379526315789475e-05, "loss": 0.3714, "step": 43597 }, { "epoch": 2.4413708142009183, "grad_norm": 1.3237299919128418, "learning_rate": 9.3795e-05, "loss": 0.541, "step": 43598 }, { "epoch": 2.4414268115130473, "grad_norm": 1.0238609313964844, "learning_rate": 9.379473684210527e-05, "loss": 0.3675, "step": 43599 }, { "epoch": 2.4414828088251763, "grad_norm": 1.0351039171218872, "learning_rate": 9.379447368421053e-05, "loss": 0.3415, "step": 43600 }, { "epoch": 2.4415388061373053, "grad_norm": 1.2138872146606445, "learning_rate": 9.37942105263158e-05, "loss": 0.4554, "step": 43601 }, { "epoch": 2.4415948034494344, "grad_norm": 1.0285530090332031, "learning_rate": 9.379394736842106e-05, "loss": 0.3276, "step": 43602 }, { "epoch": 2.4416508007615634, "grad_norm": 1.209574580192566, "learning_rate": 9.379368421052632e-05, "loss": 0.3885, "step": 43603 }, { "epoch": 2.4417067980736924, "grad_norm": 1.61426842212677, "learning_rate": 9.379342105263158e-05, "loss": 0.4751, "step": 43604 }, { "epoch": 2.4417627953858214, "grad_norm": 1.2000374794006348, "learning_rate": 9.379315789473685e-05, "loss": 0.3743, "step": 43605 }, { "epoch": 2.4418187926979504, "grad_norm": 2.280125141143799, "learning_rate": 9.379289473684211e-05, "loss": 0.4071, "step": 43606 }, { "epoch": 2.4418747900100795, "grad_norm": 1.0987000465393066, "learning_rate": 9.379263157894737e-05, "loss": 0.4392, "step": 43607 }, { "epoch": 2.4419307873222085, "grad_norm": 1.9124033451080322, "learning_rate": 9.379236842105263e-05, "loss": 0.4704, "step": 43608 }, { "epoch": 2.4419867846343375, "grad_norm": 1.328802466392517, "learning_rate": 9.37921052631579e-05, "loss": 0.4052, "step": 43609 }, { "epoch": 2.4420427819464665, "grad_norm": 1.102630376815796, "learning_rate": 9.379184210526316e-05, "loss": 0.3419, "step": 43610 }, { "epoch": 2.4420987792585955, "grad_norm": 1.2645013332366943, "learning_rate": 9.379157894736844e-05, "loss": 0.372, "step": 43611 }, { "epoch": 2.4421547765707246, "grad_norm": 1.1576825380325317, "learning_rate": 9.379131578947368e-05, "loss": 0.5108, "step": 43612 }, { "epoch": 2.4422107738828536, "grad_norm": 21.2763671875, "learning_rate": 9.379105263157896e-05, "loss": 0.3379, "step": 43613 }, { "epoch": 2.4422667711949826, "grad_norm": 1.2121535539627075, "learning_rate": 9.379078947368422e-05, "loss": 0.4324, "step": 43614 }, { "epoch": 2.4423227685071116, "grad_norm": 1.1602247953414917, "learning_rate": 9.379052631578948e-05, "loss": 0.3681, "step": 43615 }, { "epoch": 2.4423787658192406, "grad_norm": 1.2490981817245483, "learning_rate": 9.379026315789475e-05, "loss": 0.4606, "step": 43616 }, { "epoch": 2.4424347631313696, "grad_norm": 1.1423240900039673, "learning_rate": 9.379e-05, "loss": 0.3683, "step": 43617 }, { "epoch": 2.4424907604434987, "grad_norm": 1.0834897756576538, "learning_rate": 9.378973684210527e-05, "loss": 0.4426, "step": 43618 }, { "epoch": 2.4425467577556277, "grad_norm": 1.1412252187728882, "learning_rate": 9.378947368421053e-05, "loss": 0.5135, "step": 43619 }, { "epoch": 2.4426027550677567, "grad_norm": 1.2741209268569946, "learning_rate": 9.37892105263158e-05, "loss": 0.3978, "step": 43620 }, { "epoch": 2.4426587523798857, "grad_norm": 1.284626841545105, "learning_rate": 9.378894736842105e-05, "loss": 0.3712, "step": 43621 }, { "epoch": 2.4427147496920147, "grad_norm": 1.2163032293319702, "learning_rate": 9.378868421052632e-05, "loss": 0.3384, "step": 43622 }, { "epoch": 2.4427707470041438, "grad_norm": 1.0085527896881104, "learning_rate": 9.378842105263158e-05, "loss": 0.2756, "step": 43623 }, { "epoch": 2.442826744316273, "grad_norm": 1.2884711027145386, "learning_rate": 9.378815789473685e-05, "loss": 0.4086, "step": 43624 }, { "epoch": 2.442882741628402, "grad_norm": 1.1734811067581177, "learning_rate": 9.378789473684211e-05, "loss": 0.3701, "step": 43625 }, { "epoch": 2.442938738940531, "grad_norm": 1.1677095890045166, "learning_rate": 9.378763157894737e-05, "loss": 0.5154, "step": 43626 }, { "epoch": 2.44299473625266, "grad_norm": 1.0862445831298828, "learning_rate": 9.378736842105263e-05, "loss": 0.4135, "step": 43627 }, { "epoch": 2.443050733564789, "grad_norm": 1.1216758489608765, "learning_rate": 9.37871052631579e-05, "loss": 0.4393, "step": 43628 }, { "epoch": 2.443106730876918, "grad_norm": 0.9754014015197754, "learning_rate": 9.378684210526317e-05, "loss": 0.4312, "step": 43629 }, { "epoch": 2.443162728189047, "grad_norm": 1.6072847843170166, "learning_rate": 9.378657894736843e-05, "loss": 0.3918, "step": 43630 }, { "epoch": 2.443218725501176, "grad_norm": 1.1401312351226807, "learning_rate": 9.378631578947369e-05, "loss": 0.3287, "step": 43631 }, { "epoch": 2.443274722813305, "grad_norm": 1.3051751852035522, "learning_rate": 9.378605263157894e-05, "loss": 0.5795, "step": 43632 }, { "epoch": 2.443330720125434, "grad_norm": 1.1379828453063965, "learning_rate": 9.378578947368422e-05, "loss": 0.4222, "step": 43633 }, { "epoch": 2.443386717437563, "grad_norm": 1.2749556303024292, "learning_rate": 9.378552631578948e-05, "loss": 0.4336, "step": 43634 }, { "epoch": 2.443442714749692, "grad_norm": 1.154718279838562, "learning_rate": 9.378526315789474e-05, "loss": 0.3251, "step": 43635 }, { "epoch": 2.443498712061821, "grad_norm": 1.1960395574569702, "learning_rate": 9.3785e-05, "loss": 0.6221, "step": 43636 }, { "epoch": 2.44355470937395, "grad_norm": 1.3199608325958252, "learning_rate": 9.378473684210527e-05, "loss": 0.4653, "step": 43637 }, { "epoch": 2.443610706686079, "grad_norm": 1.226493000984192, "learning_rate": 9.378447368421053e-05, "loss": 0.4635, "step": 43638 }, { "epoch": 2.443666703998208, "grad_norm": 1.4030107259750366, "learning_rate": 9.378421052631579e-05, "loss": 0.4308, "step": 43639 }, { "epoch": 2.443722701310337, "grad_norm": 1.2558891773223877, "learning_rate": 9.378394736842105e-05, "loss": 0.4089, "step": 43640 }, { "epoch": 2.443778698622466, "grad_norm": 1.0963397026062012, "learning_rate": 9.378368421052632e-05, "loss": 0.4336, "step": 43641 }, { "epoch": 2.443834695934595, "grad_norm": 1.1146467924118042, "learning_rate": 9.378342105263158e-05, "loss": 0.3727, "step": 43642 }, { "epoch": 2.443890693246724, "grad_norm": 1.049160361289978, "learning_rate": 9.378315789473686e-05, "loss": 0.3913, "step": 43643 }, { "epoch": 2.443946690558853, "grad_norm": 1.0531044006347656, "learning_rate": 9.37828947368421e-05, "loss": 0.3898, "step": 43644 }, { "epoch": 2.444002687870982, "grad_norm": 1.0650125741958618, "learning_rate": 9.378263157894738e-05, "loss": 0.4223, "step": 43645 }, { "epoch": 2.444058685183111, "grad_norm": 1.0833420753479004, "learning_rate": 9.378236842105264e-05, "loss": 0.3063, "step": 43646 }, { "epoch": 2.44411468249524, "grad_norm": 1.0019596815109253, "learning_rate": 9.378210526315791e-05, "loss": 0.3058, "step": 43647 }, { "epoch": 2.4441706798073692, "grad_norm": 1.0865423679351807, "learning_rate": 9.378184210526317e-05, "loss": 0.3333, "step": 43648 }, { "epoch": 2.4442266771194983, "grad_norm": 1.5698976516723633, "learning_rate": 9.378157894736841e-05, "loss": 0.6244, "step": 43649 }, { "epoch": 2.4442826744316273, "grad_norm": 1.148758053779602, "learning_rate": 9.378131578947369e-05, "loss": 0.4434, "step": 43650 }, { "epoch": 2.4443386717437563, "grad_norm": 1.1846868991851807, "learning_rate": 9.378105263157895e-05, "loss": 0.3566, "step": 43651 }, { "epoch": 2.4443946690558853, "grad_norm": 1.0910898447036743, "learning_rate": 9.378078947368422e-05, "loss": 0.3292, "step": 43652 }, { "epoch": 2.4444506663680143, "grad_norm": 1.2427153587341309, "learning_rate": 9.378052631578948e-05, "loss": 0.3736, "step": 43653 }, { "epoch": 2.4445066636801434, "grad_norm": 1.362568974494934, "learning_rate": 9.378026315789474e-05, "loss": 0.3326, "step": 43654 }, { "epoch": 2.4445626609922724, "grad_norm": 1.3020566701889038, "learning_rate": 9.378e-05, "loss": 0.5816, "step": 43655 }, { "epoch": 2.4446186583044014, "grad_norm": 1.0465363264083862, "learning_rate": 9.377973684210527e-05, "loss": 0.36, "step": 43656 }, { "epoch": 2.4446746556165304, "grad_norm": 1.1095346212387085, "learning_rate": 9.377947368421053e-05, "loss": 0.3467, "step": 43657 }, { "epoch": 2.4447306529286594, "grad_norm": 1.3338831663131714, "learning_rate": 9.377921052631579e-05, "loss": 0.5412, "step": 43658 }, { "epoch": 2.4447866502407885, "grad_norm": 1.0864962339401245, "learning_rate": 9.377894736842105e-05, "loss": 0.3924, "step": 43659 }, { "epoch": 2.4448426475529175, "grad_norm": 1.048994541168213, "learning_rate": 9.377868421052633e-05, "loss": 0.392, "step": 43660 }, { "epoch": 2.4448986448650465, "grad_norm": 1.2112869024276733, "learning_rate": 9.377842105263159e-05, "loss": 0.4144, "step": 43661 }, { "epoch": 2.4449546421771755, "grad_norm": 0.9742178320884705, "learning_rate": 9.377815789473685e-05, "loss": 0.3506, "step": 43662 }, { "epoch": 2.4450106394893045, "grad_norm": 1.2229830026626587, "learning_rate": 9.37778947368421e-05, "loss": 0.3812, "step": 43663 }, { "epoch": 2.4450666368014335, "grad_norm": 1.1158320903778076, "learning_rate": 9.377763157894738e-05, "loss": 0.3382, "step": 43664 }, { "epoch": 2.4451226341135626, "grad_norm": 1.1286373138427734, "learning_rate": 9.377736842105264e-05, "loss": 0.341, "step": 43665 }, { "epoch": 2.4451786314256916, "grad_norm": 1.093476414680481, "learning_rate": 9.37771052631579e-05, "loss": 0.2808, "step": 43666 }, { "epoch": 2.4452346287378206, "grad_norm": 1.164022445678711, "learning_rate": 9.377684210526316e-05, "loss": 0.4055, "step": 43667 }, { "epoch": 2.4452906260499496, "grad_norm": 1.1908897161483765, "learning_rate": 9.377657894736842e-05, "loss": 0.4668, "step": 43668 }, { "epoch": 2.4453466233620786, "grad_norm": 1.337965965270996, "learning_rate": 9.377631578947369e-05, "loss": 0.4346, "step": 43669 }, { "epoch": 2.4454026206742077, "grad_norm": 1.3720922470092773, "learning_rate": 9.377605263157895e-05, "loss": 0.4393, "step": 43670 }, { "epoch": 2.4454586179863367, "grad_norm": 1.5693382024765015, "learning_rate": 9.377578947368422e-05, "loss": 0.3939, "step": 43671 }, { "epoch": 2.4455146152984657, "grad_norm": 1.0617972612380981, "learning_rate": 9.377552631578947e-05, "loss": 0.3239, "step": 43672 }, { "epoch": 2.4455706126105947, "grad_norm": 1.1115037202835083, "learning_rate": 9.377526315789474e-05, "loss": 0.3882, "step": 43673 }, { "epoch": 2.4456266099227237, "grad_norm": 0.9725748300552368, "learning_rate": 9.3775e-05, "loss": 0.3312, "step": 43674 }, { "epoch": 2.4456826072348528, "grad_norm": 1.3639880418777466, "learning_rate": 9.377473684210528e-05, "loss": 0.5066, "step": 43675 }, { "epoch": 2.4457386045469818, "grad_norm": 1.265601396560669, "learning_rate": 9.377447368421052e-05, "loss": 0.4099, "step": 43676 }, { "epoch": 2.445794601859111, "grad_norm": 1.0266481637954712, "learning_rate": 9.37742105263158e-05, "loss": 0.4265, "step": 43677 }, { "epoch": 2.44585059917124, "grad_norm": 1.1295057535171509, "learning_rate": 9.377394736842106e-05, "loss": 0.4222, "step": 43678 }, { "epoch": 2.445906596483369, "grad_norm": 0.9731562733650208, "learning_rate": 9.377368421052633e-05, "loss": 0.2532, "step": 43679 }, { "epoch": 2.445962593795498, "grad_norm": 1.1277998685836792, "learning_rate": 9.377342105263159e-05, "loss": 0.4147, "step": 43680 }, { "epoch": 2.446018591107627, "grad_norm": 1.2166575193405151, "learning_rate": 9.377315789473685e-05, "loss": 0.4434, "step": 43681 }, { "epoch": 2.446074588419756, "grad_norm": 1.089971899986267, "learning_rate": 9.377289473684211e-05, "loss": 0.3621, "step": 43682 }, { "epoch": 2.446130585731885, "grad_norm": 1.1873785257339478, "learning_rate": 9.377263157894737e-05, "loss": 0.335, "step": 43683 }, { "epoch": 2.446186583044014, "grad_norm": 1.5514419078826904, "learning_rate": 9.377236842105264e-05, "loss": 0.5034, "step": 43684 }, { "epoch": 2.446242580356143, "grad_norm": 1.0871385335922241, "learning_rate": 9.37721052631579e-05, "loss": 0.3635, "step": 43685 }, { "epoch": 2.446298577668272, "grad_norm": 3.2459747791290283, "learning_rate": 9.377184210526316e-05, "loss": 0.4355, "step": 43686 }, { "epoch": 2.446354574980401, "grad_norm": 1.1414971351623535, "learning_rate": 9.377157894736842e-05, "loss": 0.3802, "step": 43687 }, { "epoch": 2.44641057229253, "grad_norm": 1.1510952711105347, "learning_rate": 9.377131578947369e-05, "loss": 0.3774, "step": 43688 }, { "epoch": 2.446466569604659, "grad_norm": 1.1318522691726685, "learning_rate": 9.377105263157895e-05, "loss": 0.2816, "step": 43689 }, { "epoch": 2.446522566916788, "grad_norm": 1.124312162399292, "learning_rate": 9.377078947368421e-05, "loss": 0.3201, "step": 43690 }, { "epoch": 2.446578564228917, "grad_norm": 2.025885581970215, "learning_rate": 9.377052631578947e-05, "loss": 0.561, "step": 43691 }, { "epoch": 2.446634561541046, "grad_norm": 1.2253518104553223, "learning_rate": 9.377026315789475e-05, "loss": 0.3734, "step": 43692 }, { "epoch": 2.446690558853175, "grad_norm": 1.26633882522583, "learning_rate": 9.377e-05, "loss": 0.4172, "step": 43693 }, { "epoch": 2.446746556165304, "grad_norm": 1.433893084526062, "learning_rate": 9.376973684210526e-05, "loss": 0.4917, "step": 43694 }, { "epoch": 2.446802553477433, "grad_norm": 1.0203039646148682, "learning_rate": 9.376947368421052e-05, "loss": 0.3831, "step": 43695 }, { "epoch": 2.446858550789562, "grad_norm": 0.9234296679496765, "learning_rate": 9.37692105263158e-05, "loss": 0.3233, "step": 43696 }, { "epoch": 2.446914548101691, "grad_norm": 1.1691572666168213, "learning_rate": 9.376894736842106e-05, "loss": 0.5234, "step": 43697 }, { "epoch": 2.44697054541382, "grad_norm": 1.1446528434753418, "learning_rate": 9.376868421052633e-05, "loss": 0.3824, "step": 43698 }, { "epoch": 2.447026542725949, "grad_norm": 1.535370945930481, "learning_rate": 9.376842105263158e-05, "loss": 0.4807, "step": 43699 }, { "epoch": 2.4470825400380782, "grad_norm": 1.0322017669677734, "learning_rate": 9.376815789473684e-05, "loss": 0.3254, "step": 43700 }, { "epoch": 2.4471385373502073, "grad_norm": 1.879762887954712, "learning_rate": 9.376789473684211e-05, "loss": 0.384, "step": 43701 }, { "epoch": 2.4471945346623363, "grad_norm": 1.1668339967727661, "learning_rate": 9.376763157894737e-05, "loss": 0.3251, "step": 43702 }, { "epoch": 2.4472505319744653, "grad_norm": 1.5099295377731323, "learning_rate": 9.376736842105264e-05, "loss": 0.4476, "step": 43703 }, { "epoch": 2.4473065292865943, "grad_norm": 44.37250900268555, "learning_rate": 9.376710526315789e-05, "loss": 0.3932, "step": 43704 }, { "epoch": 2.4473625265987233, "grad_norm": 1.140372633934021, "learning_rate": 9.376684210526316e-05, "loss": 0.368, "step": 43705 }, { "epoch": 2.4474185239108524, "grad_norm": 1.0529611110687256, "learning_rate": 9.376657894736842e-05, "loss": 0.3392, "step": 43706 }, { "epoch": 2.4474745212229814, "grad_norm": 1.2084894180297852, "learning_rate": 9.37663157894737e-05, "loss": 0.35, "step": 43707 }, { "epoch": 2.4475305185351104, "grad_norm": 1.6079727411270142, "learning_rate": 9.376605263157896e-05, "loss": 0.5559, "step": 43708 }, { "epoch": 2.4475865158472394, "grad_norm": 1.3256789445877075, "learning_rate": 9.376578947368422e-05, "loss": 0.3361, "step": 43709 }, { "epoch": 2.4476425131593684, "grad_norm": 1.705016016960144, "learning_rate": 9.376552631578947e-05, "loss": 0.4373, "step": 43710 }, { "epoch": 2.4476985104714974, "grad_norm": 1.2192429304122925, "learning_rate": 9.376526315789475e-05, "loss": 0.5857, "step": 43711 }, { "epoch": 2.4477545077836265, "grad_norm": 1.0645999908447266, "learning_rate": 9.376500000000001e-05, "loss": 0.3985, "step": 43712 }, { "epoch": 2.4478105050957555, "grad_norm": 1.1529145240783691, "learning_rate": 9.376473684210527e-05, "loss": 0.3655, "step": 43713 }, { "epoch": 2.4478665024078845, "grad_norm": 1.1662991046905518, "learning_rate": 9.376447368421053e-05, "loss": 0.3829, "step": 43714 }, { "epoch": 2.4479224997200135, "grad_norm": 0.9849196672439575, "learning_rate": 9.37642105263158e-05, "loss": 0.2998, "step": 43715 }, { "epoch": 2.4479784970321425, "grad_norm": 1.6906744241714478, "learning_rate": 9.376394736842106e-05, "loss": 0.4748, "step": 43716 }, { "epoch": 2.4480344943442716, "grad_norm": 1.300154209136963, "learning_rate": 9.376368421052632e-05, "loss": 0.4192, "step": 43717 }, { "epoch": 2.4480904916564006, "grad_norm": 1.0675753355026245, "learning_rate": 9.376342105263158e-05, "loss": 0.3314, "step": 43718 }, { "epoch": 2.4481464889685296, "grad_norm": 1.8462144136428833, "learning_rate": 9.376315789473684e-05, "loss": 0.4346, "step": 43719 }, { "epoch": 2.4482024862806586, "grad_norm": 1.3585484027862549, "learning_rate": 9.376289473684211e-05, "loss": 0.4246, "step": 43720 }, { "epoch": 2.4482584835927876, "grad_norm": 1.0886788368225098, "learning_rate": 9.376263157894737e-05, "loss": 0.3985, "step": 43721 }, { "epoch": 2.4483144809049167, "grad_norm": 1.0004119873046875, "learning_rate": 9.376236842105263e-05, "loss": 0.2763, "step": 43722 }, { "epoch": 2.4483704782170457, "grad_norm": 1.22940993309021, "learning_rate": 9.376210526315789e-05, "loss": 0.4011, "step": 43723 }, { "epoch": 2.4484264755291747, "grad_norm": 1.137768030166626, "learning_rate": 9.376184210526317e-05, "loss": 0.3554, "step": 43724 }, { "epoch": 2.4484824728413037, "grad_norm": 1.2076971530914307, "learning_rate": 9.376157894736842e-05, "loss": 0.4111, "step": 43725 }, { "epoch": 2.4485384701534327, "grad_norm": 1.3236175775527954, "learning_rate": 9.37613157894737e-05, "loss": 0.4733, "step": 43726 }, { "epoch": 2.4485944674655618, "grad_norm": 1.5193358659744263, "learning_rate": 9.376105263157894e-05, "loss": 0.4315, "step": 43727 }, { "epoch": 2.4486504647776908, "grad_norm": 1.0656602382659912, "learning_rate": 9.376078947368422e-05, "loss": 0.3344, "step": 43728 }, { "epoch": 2.44870646208982, "grad_norm": 1.3186131715774536, "learning_rate": 9.376052631578948e-05, "loss": 0.4576, "step": 43729 }, { "epoch": 2.448762459401949, "grad_norm": 1.5465681552886963, "learning_rate": 9.376026315789475e-05, "loss": 0.4415, "step": 43730 }, { "epoch": 2.448818456714078, "grad_norm": 1.1777374744415283, "learning_rate": 9.376e-05, "loss": 0.3712, "step": 43731 }, { "epoch": 2.448874454026207, "grad_norm": 1.0133075714111328, "learning_rate": 9.375973684210527e-05, "loss": 0.3226, "step": 43732 }, { "epoch": 2.448930451338336, "grad_norm": 1.2057561874389648, "learning_rate": 9.375947368421053e-05, "loss": 0.335, "step": 43733 }, { "epoch": 2.448986448650465, "grad_norm": 1.2212294340133667, "learning_rate": 9.37592105263158e-05, "loss": 0.3468, "step": 43734 }, { "epoch": 2.449042445962594, "grad_norm": 1.236613392829895, "learning_rate": 9.375894736842106e-05, "loss": 0.5029, "step": 43735 }, { "epoch": 2.449098443274723, "grad_norm": 1.046769142150879, "learning_rate": 9.375868421052631e-05, "loss": 0.4269, "step": 43736 }, { "epoch": 2.449154440586852, "grad_norm": 1.3350902795791626, "learning_rate": 9.375842105263158e-05, "loss": 0.5204, "step": 43737 }, { "epoch": 2.449210437898981, "grad_norm": 1.2115131616592407, "learning_rate": 9.375815789473684e-05, "loss": 0.456, "step": 43738 }, { "epoch": 2.44926643521111, "grad_norm": 1.0237935781478882, "learning_rate": 9.375789473684212e-05, "loss": 0.3462, "step": 43739 }, { "epoch": 2.449322432523239, "grad_norm": 1.6484836339950562, "learning_rate": 9.375763157894737e-05, "loss": 0.4813, "step": 43740 }, { "epoch": 2.449378429835368, "grad_norm": 2.320385694503784, "learning_rate": 9.375736842105263e-05, "loss": 0.4933, "step": 43741 }, { "epoch": 2.449434427147497, "grad_norm": 1.1416598558425903, "learning_rate": 9.37571052631579e-05, "loss": 0.3579, "step": 43742 }, { "epoch": 2.449490424459626, "grad_norm": 1.3637193441390991, "learning_rate": 9.375684210526317e-05, "loss": 0.4303, "step": 43743 }, { "epoch": 2.449546421771755, "grad_norm": 1.2515645027160645, "learning_rate": 9.375657894736843e-05, "loss": 0.3739, "step": 43744 }, { "epoch": 2.449602419083884, "grad_norm": 1.1284507513046265, "learning_rate": 9.375631578947369e-05, "loss": 0.3438, "step": 43745 }, { "epoch": 2.449658416396013, "grad_norm": 1.0731176137924194, "learning_rate": 9.375605263157895e-05, "loss": 0.3492, "step": 43746 }, { "epoch": 2.449714413708142, "grad_norm": 1.2167754173278809, "learning_rate": 9.375578947368422e-05, "loss": 0.5333, "step": 43747 }, { "epoch": 2.449770411020271, "grad_norm": 1.2244106531143188, "learning_rate": 9.375552631578948e-05, "loss": 0.3766, "step": 43748 }, { "epoch": 2.4498264083324, "grad_norm": 1.012938380241394, "learning_rate": 9.375526315789474e-05, "loss": 0.381, "step": 43749 }, { "epoch": 2.449882405644529, "grad_norm": 1.1563268899917603, "learning_rate": 9.3755e-05, "loss": 0.3678, "step": 43750 }, { "epoch": 2.449938402956658, "grad_norm": 1.0605182647705078, "learning_rate": 9.375473684210527e-05, "loss": 0.3274, "step": 43751 }, { "epoch": 2.4499944002687872, "grad_norm": 2.353602886199951, "learning_rate": 9.375447368421053e-05, "loss": 0.3984, "step": 43752 }, { "epoch": 2.4500503975809163, "grad_norm": 1.3002004623413086, "learning_rate": 9.375421052631579e-05, "loss": 0.573, "step": 43753 }, { "epoch": 2.4501063948930453, "grad_norm": 1.1317799091339111, "learning_rate": 9.375394736842105e-05, "loss": 0.3771, "step": 43754 }, { "epoch": 2.4501623922051743, "grad_norm": 1.151565670967102, "learning_rate": 9.375368421052631e-05, "loss": 0.3281, "step": 43755 }, { "epoch": 2.4502183895173033, "grad_norm": 1.2632449865341187, "learning_rate": 9.375342105263158e-05, "loss": 0.3834, "step": 43756 }, { "epoch": 2.4502743868294323, "grad_norm": 1.2566313743591309, "learning_rate": 9.375315789473684e-05, "loss": 0.4457, "step": 43757 }, { "epoch": 2.4503303841415613, "grad_norm": 1.334071397781372, "learning_rate": 9.375289473684212e-05, "loss": 0.4616, "step": 43758 }, { "epoch": 2.4503863814536904, "grad_norm": 1.0823010206222534, "learning_rate": 9.375263157894736e-05, "loss": 0.3203, "step": 43759 }, { "epoch": 2.4504423787658194, "grad_norm": 1.4388163089752197, "learning_rate": 9.375236842105264e-05, "loss": 0.4122, "step": 43760 }, { "epoch": 2.4504983760779484, "grad_norm": 1.2031493186950684, "learning_rate": 9.37521052631579e-05, "loss": 0.3863, "step": 43761 }, { "epoch": 2.4505543733900774, "grad_norm": 1.0397971868515015, "learning_rate": 9.375184210526317e-05, "loss": 0.3096, "step": 43762 }, { "epoch": 2.4506103707022064, "grad_norm": 1.2586244344711304, "learning_rate": 9.375157894736843e-05, "loss": 0.4188, "step": 43763 }, { "epoch": 2.4506663680143355, "grad_norm": 9.512496948242188, "learning_rate": 9.375131578947369e-05, "loss": 0.354, "step": 43764 }, { "epoch": 2.4507223653264645, "grad_norm": 1.300660490989685, "learning_rate": 9.375105263157895e-05, "loss": 0.353, "step": 43765 }, { "epoch": 2.4507783626385935, "grad_norm": 1.4918419122695923, "learning_rate": 9.375078947368422e-05, "loss": 0.495, "step": 43766 }, { "epoch": 2.4508343599507225, "grad_norm": 1.2638053894042969, "learning_rate": 9.375052631578948e-05, "loss": 0.3409, "step": 43767 }, { "epoch": 2.4508903572628515, "grad_norm": 1.2711232900619507, "learning_rate": 9.375026315789474e-05, "loss": 0.4545, "step": 43768 }, { "epoch": 2.4509463545749806, "grad_norm": 1.112298846244812, "learning_rate": 9.375e-05, "loss": 0.38, "step": 43769 }, { "epoch": 2.4510023518871096, "grad_norm": 1.0446643829345703, "learning_rate": 9.374973684210526e-05, "loss": 0.3301, "step": 43770 }, { "epoch": 2.4510583491992386, "grad_norm": 1.1426150798797607, "learning_rate": 9.374947368421053e-05, "loss": 0.3669, "step": 43771 }, { "epoch": 2.4511143465113676, "grad_norm": 1.2353901863098145, "learning_rate": 9.37492105263158e-05, "loss": 0.3856, "step": 43772 }, { "epoch": 2.4511703438234966, "grad_norm": 1.0847564935684204, "learning_rate": 9.374894736842105e-05, "loss": 0.4021, "step": 43773 }, { "epoch": 2.4512263411356257, "grad_norm": 1.307093620300293, "learning_rate": 9.374868421052631e-05, "loss": 0.5238, "step": 43774 }, { "epoch": 2.4512823384477547, "grad_norm": 1.017579436302185, "learning_rate": 9.374842105263159e-05, "loss": 0.3229, "step": 43775 }, { "epoch": 2.4513383357598837, "grad_norm": 1.171047329902649, "learning_rate": 9.374815789473685e-05, "loss": 0.4943, "step": 43776 }, { "epoch": 2.4513943330720127, "grad_norm": 1.1344552040100098, "learning_rate": 9.374789473684211e-05, "loss": 0.4714, "step": 43777 }, { "epoch": 2.4514503303841417, "grad_norm": 1.21371328830719, "learning_rate": 9.374763157894737e-05, "loss": 0.3875, "step": 43778 }, { "epoch": 2.4515063276962707, "grad_norm": 1.0660808086395264, "learning_rate": 9.374736842105264e-05, "loss": 0.4513, "step": 43779 }, { "epoch": 2.4515623250083998, "grad_norm": 1.0837348699569702, "learning_rate": 9.37471052631579e-05, "loss": 0.3179, "step": 43780 }, { "epoch": 2.451618322320529, "grad_norm": 1.1555426120758057, "learning_rate": 9.374684210526317e-05, "loss": 0.393, "step": 43781 }, { "epoch": 2.451674319632658, "grad_norm": 1.4543371200561523, "learning_rate": 9.374657894736842e-05, "loss": 0.4406, "step": 43782 }, { "epoch": 2.451730316944787, "grad_norm": 1.4915732145309448, "learning_rate": 9.374631578947369e-05, "loss": 0.477, "step": 43783 }, { "epoch": 2.451786314256916, "grad_norm": 0.9504106044769287, "learning_rate": 9.374605263157895e-05, "loss": 0.3675, "step": 43784 }, { "epoch": 2.451842311569045, "grad_norm": 1.3691825866699219, "learning_rate": 9.374578947368423e-05, "loss": 0.3154, "step": 43785 }, { "epoch": 2.451898308881174, "grad_norm": 1.1487942934036255, "learning_rate": 9.374552631578947e-05, "loss": 0.352, "step": 43786 }, { "epoch": 2.451954306193303, "grad_norm": 1.5841172933578491, "learning_rate": 9.374526315789473e-05, "loss": 0.46, "step": 43787 }, { "epoch": 2.452010303505432, "grad_norm": 1.1862643957138062, "learning_rate": 9.3745e-05, "loss": 0.3524, "step": 43788 }, { "epoch": 2.452066300817561, "grad_norm": 1.168288230895996, "learning_rate": 9.374473684210526e-05, "loss": 0.2511, "step": 43789 }, { "epoch": 2.45212229812969, "grad_norm": 1.2948198318481445, "learning_rate": 9.374447368421054e-05, "loss": 0.344, "step": 43790 }, { "epoch": 2.452178295441819, "grad_norm": 0.9435927271842957, "learning_rate": 9.374421052631578e-05, "loss": 0.3356, "step": 43791 }, { "epoch": 2.452234292753948, "grad_norm": 1.1332719326019287, "learning_rate": 9.374394736842106e-05, "loss": 0.3971, "step": 43792 }, { "epoch": 2.452290290066077, "grad_norm": 1.0861185789108276, "learning_rate": 9.374368421052632e-05, "loss": 0.4332, "step": 43793 }, { "epoch": 2.452346287378206, "grad_norm": 1.4115861654281616, "learning_rate": 9.374342105263159e-05, "loss": 0.5223, "step": 43794 }, { "epoch": 2.452402284690335, "grad_norm": 1.119736909866333, "learning_rate": 9.374315789473685e-05, "loss": 0.3643, "step": 43795 }, { "epoch": 2.452458282002464, "grad_norm": 1.207983136177063, "learning_rate": 9.374289473684211e-05, "loss": 0.3963, "step": 43796 }, { "epoch": 2.452514279314593, "grad_norm": 1.0166454315185547, "learning_rate": 9.374263157894737e-05, "loss": 0.4215, "step": 43797 }, { "epoch": 2.452570276626722, "grad_norm": 1.1283265352249146, "learning_rate": 9.374236842105264e-05, "loss": 0.3324, "step": 43798 }, { "epoch": 2.4526262739388507, "grad_norm": 1.3217812776565552, "learning_rate": 9.37421052631579e-05, "loss": 0.3715, "step": 43799 }, { "epoch": 2.45268227125098, "grad_norm": 1.312993049621582, "learning_rate": 9.374184210526316e-05, "loss": 0.3835, "step": 43800 }, { "epoch": 2.4527382685631087, "grad_norm": 1.273610234260559, "learning_rate": 9.374157894736842e-05, "loss": 0.5424, "step": 43801 }, { "epoch": 2.452794265875238, "grad_norm": 1.1342265605926514, "learning_rate": 9.37413157894737e-05, "loss": 0.457, "step": 43802 }, { "epoch": 2.4528502631873668, "grad_norm": 1.2340644598007202, "learning_rate": 9.374105263157895e-05, "loss": 0.4694, "step": 43803 }, { "epoch": 2.4529062604994962, "grad_norm": 1.126397728919983, "learning_rate": 9.374078947368421e-05, "loss": 0.288, "step": 43804 }, { "epoch": 2.452962257811625, "grad_norm": 1.1056026220321655, "learning_rate": 9.374052631578947e-05, "loss": 0.4957, "step": 43805 }, { "epoch": 2.4530182551237543, "grad_norm": 1.2103379964828491, "learning_rate": 9.374026315789473e-05, "loss": 0.4769, "step": 43806 }, { "epoch": 2.453074252435883, "grad_norm": 1.147001028060913, "learning_rate": 9.374000000000001e-05, "loss": 0.3714, "step": 43807 }, { "epoch": 2.4531302497480123, "grad_norm": 1.0231484174728394, "learning_rate": 9.373973684210527e-05, "loss": 0.3541, "step": 43808 }, { "epoch": 2.453186247060141, "grad_norm": 1.1612920761108398, "learning_rate": 9.373947368421053e-05, "loss": 0.4203, "step": 43809 }, { "epoch": 2.4532422443722703, "grad_norm": 1.039345622062683, "learning_rate": 9.373921052631579e-05, "loss": 0.3911, "step": 43810 }, { "epoch": 2.453298241684399, "grad_norm": 1.053196907043457, "learning_rate": 9.373894736842106e-05, "loss": 0.4328, "step": 43811 }, { "epoch": 2.4533542389965284, "grad_norm": 1.1567507982254028, "learning_rate": 9.373868421052632e-05, "loss": 0.3564, "step": 43812 }, { "epoch": 2.453410236308657, "grad_norm": 1.066456913948059, "learning_rate": 9.373842105263159e-05, "loss": 0.3967, "step": 43813 }, { "epoch": 2.4534662336207864, "grad_norm": 1.111541986465454, "learning_rate": 9.373815789473684e-05, "loss": 0.4746, "step": 43814 }, { "epoch": 2.453522230932915, "grad_norm": 1.2412431240081787, "learning_rate": 9.373789473684211e-05, "loss": 0.4545, "step": 43815 }, { "epoch": 2.4535782282450445, "grad_norm": 1.2947889566421509, "learning_rate": 9.373763157894737e-05, "loss": 0.4872, "step": 43816 }, { "epoch": 2.453634225557173, "grad_norm": 1.2588623762130737, "learning_rate": 9.373736842105265e-05, "loss": 0.6162, "step": 43817 }, { "epoch": 2.4536902228693025, "grad_norm": 1.038870096206665, "learning_rate": 9.37371052631579e-05, "loss": 0.2991, "step": 43818 }, { "epoch": 2.453746220181431, "grad_norm": 1.0160081386566162, "learning_rate": 9.373684210526316e-05, "loss": 0.37, "step": 43819 }, { "epoch": 2.4538022174935605, "grad_norm": 1.3640297651290894, "learning_rate": 9.373657894736842e-05, "loss": 0.4391, "step": 43820 }, { "epoch": 2.453858214805689, "grad_norm": 1.1888067722320557, "learning_rate": 9.373631578947368e-05, "loss": 0.3819, "step": 43821 }, { "epoch": 2.4539142121178186, "grad_norm": 1.3223631381988525, "learning_rate": 9.373605263157896e-05, "loss": 0.4584, "step": 43822 }, { "epoch": 2.453970209429947, "grad_norm": 1.1852532625198364, "learning_rate": 9.37357894736842e-05, "loss": 0.3437, "step": 43823 }, { "epoch": 2.4540262067420766, "grad_norm": 1.1907708644866943, "learning_rate": 9.373552631578948e-05, "loss": 0.3982, "step": 43824 }, { "epoch": 2.454082204054205, "grad_norm": 2.5197865962982178, "learning_rate": 9.373526315789474e-05, "loss": 0.5213, "step": 43825 }, { "epoch": 2.4541382013663346, "grad_norm": 1.2427020072937012, "learning_rate": 9.373500000000001e-05, "loss": 0.3831, "step": 43826 }, { "epoch": 2.4541941986784632, "grad_norm": 1.1632657051086426, "learning_rate": 9.373473684210527e-05, "loss": 0.3876, "step": 43827 }, { "epoch": 2.4542501959905927, "grad_norm": 1.2760387659072876, "learning_rate": 9.373447368421053e-05, "loss": 0.5675, "step": 43828 }, { "epoch": 2.4543061933027213, "grad_norm": 0.9990751147270203, "learning_rate": 9.373421052631579e-05, "loss": 0.3503, "step": 43829 }, { "epoch": 2.4543621906148507, "grad_norm": 1.0173438787460327, "learning_rate": 9.373394736842106e-05, "loss": 0.3304, "step": 43830 }, { "epoch": 2.4544181879269793, "grad_norm": 1.1189181804656982, "learning_rate": 9.373368421052632e-05, "loss": 0.3402, "step": 43831 }, { "epoch": 2.4544741852391088, "grad_norm": 1.1344925165176392, "learning_rate": 9.373342105263158e-05, "loss": 0.3977, "step": 43832 }, { "epoch": 2.4545301825512373, "grad_norm": 1.3588571548461914, "learning_rate": 9.373315789473684e-05, "loss": 0.3711, "step": 43833 }, { "epoch": 2.454586179863367, "grad_norm": 1.3361027240753174, "learning_rate": 9.373289473684211e-05, "loss": 0.4532, "step": 43834 }, { "epoch": 2.4546421771754954, "grad_norm": 1.056199073791504, "learning_rate": 9.373263157894737e-05, "loss": 0.4686, "step": 43835 }, { "epoch": 2.454698174487625, "grad_norm": 1.0404422283172607, "learning_rate": 9.373236842105265e-05, "loss": 0.3905, "step": 43836 }, { "epoch": 2.4547541717997534, "grad_norm": 1.199611783027649, "learning_rate": 9.37321052631579e-05, "loss": 0.4326, "step": 43837 }, { "epoch": 2.454810169111883, "grad_norm": 1.0852619409561157, "learning_rate": 9.373184210526317e-05, "loss": 0.5105, "step": 43838 }, { "epoch": 2.4548661664240115, "grad_norm": 1.2011563777923584, "learning_rate": 9.373157894736843e-05, "loss": 0.3648, "step": 43839 }, { "epoch": 2.4549221637361405, "grad_norm": 1.1973915100097656, "learning_rate": 9.373131578947369e-05, "loss": 0.5492, "step": 43840 }, { "epoch": 2.4549781610482695, "grad_norm": 1.3122315406799316, "learning_rate": 9.373105263157895e-05, "loss": 0.3891, "step": 43841 }, { "epoch": 2.4550341583603985, "grad_norm": 1.1656357049942017, "learning_rate": 9.37307894736842e-05, "loss": 0.3554, "step": 43842 }, { "epoch": 2.4550901556725275, "grad_norm": 1.22404146194458, "learning_rate": 9.373052631578948e-05, "loss": 0.405, "step": 43843 }, { "epoch": 2.4551461529846565, "grad_norm": 1.2471683025360107, "learning_rate": 9.373026315789474e-05, "loss": 0.3592, "step": 43844 }, { "epoch": 2.4552021502967856, "grad_norm": 1.3199968338012695, "learning_rate": 9.373000000000001e-05, "loss": 0.4145, "step": 43845 }, { "epoch": 2.4552581476089146, "grad_norm": 1.490583062171936, "learning_rate": 9.372973684210526e-05, "loss": 0.4096, "step": 43846 }, { "epoch": 2.4553141449210436, "grad_norm": 1.3668901920318604, "learning_rate": 9.372947368421053e-05, "loss": 0.418, "step": 43847 }, { "epoch": 2.4553701422331726, "grad_norm": 1.2287410497665405, "learning_rate": 9.372921052631579e-05, "loss": 0.3582, "step": 43848 }, { "epoch": 2.4554261395453016, "grad_norm": 1.4730790853500366, "learning_rate": 9.372894736842106e-05, "loss": 0.5673, "step": 43849 }, { "epoch": 2.4554821368574307, "grad_norm": 1.4054229259490967, "learning_rate": 9.372868421052632e-05, "loss": 0.3533, "step": 43850 }, { "epoch": 2.4555381341695597, "grad_norm": 1.4618122577667236, "learning_rate": 9.372842105263158e-05, "loss": 0.4733, "step": 43851 }, { "epoch": 2.4555941314816887, "grad_norm": 1.2669718265533447, "learning_rate": 9.372815789473684e-05, "loss": 0.2999, "step": 43852 }, { "epoch": 2.4556501287938177, "grad_norm": 1.144573450088501, "learning_rate": 9.372789473684212e-05, "loss": 0.4246, "step": 43853 }, { "epoch": 2.4557061261059467, "grad_norm": 1.2047260999679565, "learning_rate": 9.372763157894738e-05, "loss": 0.4535, "step": 43854 }, { "epoch": 2.4557621234180758, "grad_norm": 1.1501902341842651, "learning_rate": 9.372736842105264e-05, "loss": 0.4079, "step": 43855 }, { "epoch": 2.455818120730205, "grad_norm": 0.9730649590492249, "learning_rate": 9.37271052631579e-05, "loss": 0.3059, "step": 43856 }, { "epoch": 2.455874118042334, "grad_norm": 1.3352222442626953, "learning_rate": 9.372684210526316e-05, "loss": 0.3857, "step": 43857 }, { "epoch": 2.455930115354463, "grad_norm": 1.0371432304382324, "learning_rate": 9.372657894736843e-05, "loss": 0.3403, "step": 43858 }, { "epoch": 2.455986112666592, "grad_norm": 1.271681547164917, "learning_rate": 9.372631578947369e-05, "loss": 0.4078, "step": 43859 }, { "epoch": 2.456042109978721, "grad_norm": 0.9867258667945862, "learning_rate": 9.372605263157895e-05, "loss": 0.2883, "step": 43860 }, { "epoch": 2.45609810729085, "grad_norm": 1.5032049417495728, "learning_rate": 9.372578947368421e-05, "loss": 0.5264, "step": 43861 }, { "epoch": 2.456154104602979, "grad_norm": 1.1476589441299438, "learning_rate": 9.372552631578948e-05, "loss": 0.5132, "step": 43862 }, { "epoch": 2.456210101915108, "grad_norm": 1.3256691694259644, "learning_rate": 9.372526315789474e-05, "loss": 0.5342, "step": 43863 }, { "epoch": 2.456266099227237, "grad_norm": 1.1948165893554688, "learning_rate": 9.3725e-05, "loss": 0.5206, "step": 43864 }, { "epoch": 2.456322096539366, "grad_norm": 0.9295013546943665, "learning_rate": 9.372473684210526e-05, "loss": 0.4475, "step": 43865 }, { "epoch": 2.456378093851495, "grad_norm": 1.3460439443588257, "learning_rate": 9.372447368421053e-05, "loss": 0.4908, "step": 43866 }, { "epoch": 2.456434091163624, "grad_norm": 1.1859809160232544, "learning_rate": 9.37242105263158e-05, "loss": 0.3331, "step": 43867 }, { "epoch": 2.456490088475753, "grad_norm": 1.2419772148132324, "learning_rate": 9.372394736842107e-05, "loss": 0.3966, "step": 43868 }, { "epoch": 2.456546085787882, "grad_norm": 1.3940320014953613, "learning_rate": 9.372368421052631e-05, "loss": 0.4059, "step": 43869 }, { "epoch": 2.456602083100011, "grad_norm": 1.1169556379318237, "learning_rate": 9.372342105263159e-05, "loss": 0.4131, "step": 43870 }, { "epoch": 2.45665808041214, "grad_norm": 1.2106316089630127, "learning_rate": 9.372315789473685e-05, "loss": 0.4508, "step": 43871 }, { "epoch": 2.456714077724269, "grad_norm": 1.2936195135116577, "learning_rate": 9.372289473684212e-05, "loss": 0.4039, "step": 43872 }, { "epoch": 2.456770075036398, "grad_norm": 1.208203911781311, "learning_rate": 9.372263157894738e-05, "loss": 0.4323, "step": 43873 }, { "epoch": 2.456826072348527, "grad_norm": 1.451072096824646, "learning_rate": 9.372236842105263e-05, "loss": 0.4534, "step": 43874 }, { "epoch": 2.456882069660656, "grad_norm": 1.1147087812423706, "learning_rate": 9.37221052631579e-05, "loss": 0.4012, "step": 43875 }, { "epoch": 2.456938066972785, "grad_norm": 1.38563072681427, "learning_rate": 9.372184210526316e-05, "loss": 0.4124, "step": 43876 }, { "epoch": 2.456994064284914, "grad_norm": 1.0656094551086426, "learning_rate": 9.372157894736843e-05, "loss": 0.3636, "step": 43877 }, { "epoch": 2.457050061597043, "grad_norm": 1.2050896883010864, "learning_rate": 9.372131578947368e-05, "loss": 0.3482, "step": 43878 }, { "epoch": 2.457106058909172, "grad_norm": 1.5593667030334473, "learning_rate": 9.372105263157895e-05, "loss": 0.5082, "step": 43879 }, { "epoch": 2.4571620562213012, "grad_norm": 1.3880250453948975, "learning_rate": 9.372078947368421e-05, "loss": 0.4241, "step": 43880 }, { "epoch": 2.4572180535334303, "grad_norm": 1.1753108501434326, "learning_rate": 9.372052631578948e-05, "loss": 0.5359, "step": 43881 }, { "epoch": 2.4572740508455593, "grad_norm": 1.5554665327072144, "learning_rate": 9.372026315789474e-05, "loss": 0.4002, "step": 43882 }, { "epoch": 2.4573300481576883, "grad_norm": 1.3856898546218872, "learning_rate": 9.372e-05, "loss": 0.4492, "step": 43883 }, { "epoch": 2.4573860454698173, "grad_norm": 1.2469426393508911, "learning_rate": 9.371973684210526e-05, "loss": 0.4838, "step": 43884 }, { "epoch": 2.4574420427819463, "grad_norm": 1.2526464462280273, "learning_rate": 9.371947368421054e-05, "loss": 0.4892, "step": 43885 }, { "epoch": 2.4574980400940754, "grad_norm": 1.2267673015594482, "learning_rate": 9.37192105263158e-05, "loss": 0.3422, "step": 43886 }, { "epoch": 2.4575540374062044, "grad_norm": 1.4503768682479858, "learning_rate": 9.371894736842106e-05, "loss": 0.5103, "step": 43887 }, { "epoch": 2.4576100347183334, "grad_norm": 3.7832815647125244, "learning_rate": 9.371868421052632e-05, "loss": 0.397, "step": 43888 }, { "epoch": 2.4576660320304624, "grad_norm": 0.9889214038848877, "learning_rate": 9.371842105263159e-05, "loss": 0.2748, "step": 43889 }, { "epoch": 2.4577220293425914, "grad_norm": 1.1794129610061646, "learning_rate": 9.371815789473685e-05, "loss": 0.3459, "step": 43890 }, { "epoch": 2.4577780266547204, "grad_norm": 1.0466742515563965, "learning_rate": 9.371789473684211e-05, "loss": 0.3532, "step": 43891 }, { "epoch": 2.4578340239668495, "grad_norm": 1.0683820247650146, "learning_rate": 9.371763157894737e-05, "loss": 0.2881, "step": 43892 }, { "epoch": 2.4578900212789785, "grad_norm": 1.264441967010498, "learning_rate": 9.371736842105263e-05, "loss": 0.4009, "step": 43893 }, { "epoch": 2.4579460185911075, "grad_norm": 1.0755852460861206, "learning_rate": 9.37171052631579e-05, "loss": 0.2855, "step": 43894 }, { "epoch": 2.4580020159032365, "grad_norm": 1.2200274467468262, "learning_rate": 9.371684210526316e-05, "loss": 0.3453, "step": 43895 }, { "epoch": 2.4580580132153655, "grad_norm": 1.0976800918579102, "learning_rate": 9.371657894736842e-05, "loss": 0.4506, "step": 43896 }, { "epoch": 2.4581140105274946, "grad_norm": 1.2620670795440674, "learning_rate": 9.371631578947368e-05, "loss": 0.4072, "step": 43897 }, { "epoch": 2.4581700078396236, "grad_norm": 1.0903087854385376, "learning_rate": 9.371605263157895e-05, "loss": 0.5302, "step": 43898 }, { "epoch": 2.4582260051517526, "grad_norm": 1.069486379623413, "learning_rate": 9.371578947368421e-05, "loss": 0.4267, "step": 43899 }, { "epoch": 2.4582820024638816, "grad_norm": 1.0436335802078247, "learning_rate": 9.371552631578949e-05, "loss": 0.3439, "step": 43900 }, { "epoch": 2.4583379997760106, "grad_norm": 1.3194355964660645, "learning_rate": 9.371526315789473e-05, "loss": 0.428, "step": 43901 }, { "epoch": 2.4583939970881397, "grad_norm": 1.2429838180541992, "learning_rate": 9.3715e-05, "loss": 0.3492, "step": 43902 }, { "epoch": 2.4584499944002687, "grad_norm": 1.0790207386016846, "learning_rate": 9.371473684210527e-05, "loss": 0.3143, "step": 43903 }, { "epoch": 2.4585059917123977, "grad_norm": 1.4567341804504395, "learning_rate": 9.371447368421054e-05, "loss": 0.5508, "step": 43904 }, { "epoch": 2.4585619890245267, "grad_norm": 1.1903563737869263, "learning_rate": 9.37142105263158e-05, "loss": 0.3575, "step": 43905 }, { "epoch": 2.4586179863366557, "grad_norm": 1.1110758781433105, "learning_rate": 9.371394736842106e-05, "loss": 0.4926, "step": 43906 }, { "epoch": 2.4586739836487848, "grad_norm": 1.6739675998687744, "learning_rate": 9.371368421052632e-05, "loss": 0.4816, "step": 43907 }, { "epoch": 2.4587299809609138, "grad_norm": 2.0655746459960938, "learning_rate": 9.371342105263158e-05, "loss": 0.664, "step": 43908 }, { "epoch": 2.458785978273043, "grad_norm": 1.246039867401123, "learning_rate": 9.371315789473685e-05, "loss": 0.4272, "step": 43909 }, { "epoch": 2.458841975585172, "grad_norm": 1.3805861473083496, "learning_rate": 9.371289473684211e-05, "loss": 0.4314, "step": 43910 }, { "epoch": 2.458897972897301, "grad_norm": 1.2322567701339722, "learning_rate": 9.371263157894737e-05, "loss": 0.3976, "step": 43911 }, { "epoch": 2.45895397020943, "grad_norm": 1.2185637950897217, "learning_rate": 9.371236842105263e-05, "loss": 0.5363, "step": 43912 }, { "epoch": 2.459009967521559, "grad_norm": 1.1679600477218628, "learning_rate": 9.37121052631579e-05, "loss": 0.3417, "step": 43913 }, { "epoch": 2.459065964833688, "grad_norm": 1.1826345920562744, "learning_rate": 9.371184210526316e-05, "loss": 0.3998, "step": 43914 }, { "epoch": 2.459121962145817, "grad_norm": 1.0912599563598633, "learning_rate": 9.371157894736842e-05, "loss": 0.3837, "step": 43915 }, { "epoch": 2.459177959457946, "grad_norm": 1.1540943384170532, "learning_rate": 9.371131578947368e-05, "loss": 0.3983, "step": 43916 }, { "epoch": 2.459233956770075, "grad_norm": 1.4016510248184204, "learning_rate": 9.371105263157896e-05, "loss": 0.4861, "step": 43917 }, { "epoch": 2.459289954082204, "grad_norm": 1.6697447299957275, "learning_rate": 9.371078947368422e-05, "loss": 0.4754, "step": 43918 }, { "epoch": 2.459345951394333, "grad_norm": 1.7862989902496338, "learning_rate": 9.371052631578948e-05, "loss": 0.3556, "step": 43919 }, { "epoch": 2.459401948706462, "grad_norm": 1.163803219795227, "learning_rate": 9.371026315789474e-05, "loss": 0.4877, "step": 43920 }, { "epoch": 2.459457946018591, "grad_norm": 1.2600648403167725, "learning_rate": 9.371000000000001e-05, "loss": 0.4212, "step": 43921 }, { "epoch": 2.45951394333072, "grad_norm": 1.0461797714233398, "learning_rate": 9.370973684210527e-05, "loss": 0.3764, "step": 43922 }, { "epoch": 2.459569940642849, "grad_norm": 1.1030359268188477, "learning_rate": 9.370947368421054e-05, "loss": 0.3828, "step": 43923 }, { "epoch": 2.459625937954978, "grad_norm": 1.1080652475357056, "learning_rate": 9.370921052631579e-05, "loss": 0.2918, "step": 43924 }, { "epoch": 2.459681935267107, "grad_norm": 1.3141148090362549, "learning_rate": 9.370894736842105e-05, "loss": 0.4478, "step": 43925 }, { "epoch": 2.459737932579236, "grad_norm": 1.2177132368087769, "learning_rate": 9.370868421052632e-05, "loss": 0.3594, "step": 43926 }, { "epoch": 2.459793929891365, "grad_norm": 1.1319793462753296, "learning_rate": 9.370842105263158e-05, "loss": 0.327, "step": 43927 }, { "epoch": 2.459849927203494, "grad_norm": 1.0238929986953735, "learning_rate": 9.370815789473685e-05, "loss": 0.4054, "step": 43928 }, { "epoch": 2.459905924515623, "grad_norm": 1.152561068534851, "learning_rate": 9.37078947368421e-05, "loss": 0.475, "step": 43929 }, { "epoch": 2.459961921827752, "grad_norm": 0.9773671627044678, "learning_rate": 9.370763157894737e-05, "loss": 0.3104, "step": 43930 }, { "epoch": 2.460017919139881, "grad_norm": 1.2545851469039917, "learning_rate": 9.370736842105263e-05, "loss": 0.3792, "step": 43931 }, { "epoch": 2.4600739164520102, "grad_norm": 1.4108887910842896, "learning_rate": 9.37071052631579e-05, "loss": 0.3936, "step": 43932 }, { "epoch": 2.4601299137641393, "grad_norm": 1.1425073146820068, "learning_rate": 9.370684210526315e-05, "loss": 0.3553, "step": 43933 }, { "epoch": 2.4601859110762683, "grad_norm": 1.1321817636489868, "learning_rate": 9.370657894736843e-05, "loss": 0.3609, "step": 43934 }, { "epoch": 2.4602419083883973, "grad_norm": 1.2184488773345947, "learning_rate": 9.370631578947369e-05, "loss": 0.4172, "step": 43935 }, { "epoch": 2.4602979057005263, "grad_norm": 1.021667242050171, "learning_rate": 9.370605263157896e-05, "loss": 0.3046, "step": 43936 }, { "epoch": 2.4603539030126553, "grad_norm": 1.2242690324783325, "learning_rate": 9.370578947368422e-05, "loss": 0.467, "step": 43937 }, { "epoch": 2.4604099003247843, "grad_norm": 1.1548478603363037, "learning_rate": 9.370552631578948e-05, "loss": 0.4465, "step": 43938 }, { "epoch": 2.4604658976369134, "grad_norm": 1.1805572509765625, "learning_rate": 9.370526315789474e-05, "loss": 0.5234, "step": 43939 }, { "epoch": 2.4605218949490424, "grad_norm": 1.3229966163635254, "learning_rate": 9.370500000000001e-05, "loss": 0.4428, "step": 43940 }, { "epoch": 2.4605778922611714, "grad_norm": 1.2532237768173218, "learning_rate": 9.370473684210527e-05, "loss": 0.4855, "step": 43941 }, { "epoch": 2.4606338895733004, "grad_norm": 1.0509499311447144, "learning_rate": 9.370447368421053e-05, "loss": 0.3579, "step": 43942 }, { "epoch": 2.4606898868854294, "grad_norm": 0.9565574526786804, "learning_rate": 9.370421052631579e-05, "loss": 0.3573, "step": 43943 }, { "epoch": 2.4607458841975585, "grad_norm": 1.2125788927078247, "learning_rate": 9.370394736842105e-05, "loss": 0.3405, "step": 43944 }, { "epoch": 2.4608018815096875, "grad_norm": 1.1953964233398438, "learning_rate": 9.370368421052632e-05, "loss": 0.5305, "step": 43945 }, { "epoch": 2.4608578788218165, "grad_norm": 1.26026451587677, "learning_rate": 9.370342105263158e-05, "loss": 0.3852, "step": 43946 }, { "epoch": 2.4609138761339455, "grad_norm": 1.179924726486206, "learning_rate": 9.370315789473684e-05, "loss": 0.3899, "step": 43947 }, { "epoch": 2.4609698734460745, "grad_norm": 1.1868436336517334, "learning_rate": 9.37028947368421e-05, "loss": 0.3986, "step": 43948 }, { "epoch": 2.4610258707582036, "grad_norm": 1.2180452346801758, "learning_rate": 9.370263157894738e-05, "loss": 0.3813, "step": 43949 }, { "epoch": 2.4610818680703326, "grad_norm": 1.1777856349945068, "learning_rate": 9.370236842105264e-05, "loss": 0.4419, "step": 43950 }, { "epoch": 2.4611378653824616, "grad_norm": 1.1629559993743896, "learning_rate": 9.37021052631579e-05, "loss": 0.4021, "step": 43951 }, { "epoch": 2.4611938626945906, "grad_norm": 2.236706256866455, "learning_rate": 9.370184210526316e-05, "loss": 0.4782, "step": 43952 }, { "epoch": 2.4612498600067196, "grad_norm": 1.864449143409729, "learning_rate": 9.370157894736843e-05, "loss": 0.3784, "step": 43953 }, { "epoch": 2.4613058573188487, "grad_norm": 1.3919776678085327, "learning_rate": 9.370131578947369e-05, "loss": 0.4957, "step": 43954 }, { "epoch": 2.4613618546309777, "grad_norm": 1.4784114360809326, "learning_rate": 9.370105263157896e-05, "loss": 0.3823, "step": 43955 }, { "epoch": 2.4614178519431067, "grad_norm": 1.1372689008712769, "learning_rate": 9.370078947368421e-05, "loss": 0.4858, "step": 43956 }, { "epoch": 2.4614738492552357, "grad_norm": 1.2237818241119385, "learning_rate": 9.370052631578948e-05, "loss": 0.4301, "step": 43957 }, { "epoch": 2.4615298465673647, "grad_norm": 1.2508577108383179, "learning_rate": 9.370026315789474e-05, "loss": 0.3645, "step": 43958 }, { "epoch": 2.4615858438794938, "grad_norm": 1.1041606664657593, "learning_rate": 9.370000000000001e-05, "loss": 0.3234, "step": 43959 }, { "epoch": 2.4616418411916228, "grad_norm": 1.509909987449646, "learning_rate": 9.369973684210527e-05, "loss": 0.4552, "step": 43960 }, { "epoch": 2.461697838503752, "grad_norm": 1.1359837055206299, "learning_rate": 9.369947368421052e-05, "loss": 0.4749, "step": 43961 }, { "epoch": 2.461753835815881, "grad_norm": 1.4343856573104858, "learning_rate": 9.36992105263158e-05, "loss": 0.2567, "step": 43962 }, { "epoch": 2.46180983312801, "grad_norm": 1.3366953134536743, "learning_rate": 9.369894736842105e-05, "loss": 0.3146, "step": 43963 }, { "epoch": 2.461865830440139, "grad_norm": 1.5013960599899292, "learning_rate": 9.369868421052633e-05, "loss": 0.427, "step": 43964 }, { "epoch": 2.461921827752268, "grad_norm": 1.2389426231384277, "learning_rate": 9.369842105263159e-05, "loss": 0.3601, "step": 43965 }, { "epoch": 2.461977825064397, "grad_norm": 1.0977588891983032, "learning_rate": 9.369815789473685e-05, "loss": 0.4056, "step": 43966 }, { "epoch": 2.462033822376526, "grad_norm": 1.286423683166504, "learning_rate": 9.36978947368421e-05, "loss": 0.512, "step": 43967 }, { "epoch": 2.462089819688655, "grad_norm": 1.1975669860839844, "learning_rate": 9.369763157894738e-05, "loss": 0.3562, "step": 43968 }, { "epoch": 2.462145817000784, "grad_norm": 1.1896835565567017, "learning_rate": 9.369736842105264e-05, "loss": 0.362, "step": 43969 }, { "epoch": 2.462201814312913, "grad_norm": 1.2846862077713013, "learning_rate": 9.36971052631579e-05, "loss": 0.4659, "step": 43970 }, { "epoch": 2.462257811625042, "grad_norm": 1.1826756000518799, "learning_rate": 9.369684210526316e-05, "loss": 0.3571, "step": 43971 }, { "epoch": 2.462313808937171, "grad_norm": 1.100092887878418, "learning_rate": 9.369657894736843e-05, "loss": 0.327, "step": 43972 }, { "epoch": 2.4623698062493, "grad_norm": 1.0298722982406616, "learning_rate": 9.369631578947369e-05, "loss": 0.461, "step": 43973 }, { "epoch": 2.462425803561429, "grad_norm": 1.1048294305801392, "learning_rate": 9.369605263157895e-05, "loss": 0.3424, "step": 43974 }, { "epoch": 2.462481800873558, "grad_norm": 0.9605061411857605, "learning_rate": 9.369578947368421e-05, "loss": 0.3005, "step": 43975 }, { "epoch": 2.462537798185687, "grad_norm": 1.3487586975097656, "learning_rate": 9.369552631578948e-05, "loss": 0.4386, "step": 43976 }, { "epoch": 2.462593795497816, "grad_norm": 1.4309014081954956, "learning_rate": 9.369526315789474e-05, "loss": 0.4834, "step": 43977 }, { "epoch": 2.462649792809945, "grad_norm": 1.056044101715088, "learning_rate": 9.3695e-05, "loss": 0.391, "step": 43978 }, { "epoch": 2.462705790122074, "grad_norm": 1.4049144983291626, "learning_rate": 9.369473684210526e-05, "loss": 0.3939, "step": 43979 }, { "epoch": 2.462761787434203, "grad_norm": 1.1205466985702515, "learning_rate": 9.369447368421052e-05, "loss": 0.3747, "step": 43980 }, { "epoch": 2.462817784746332, "grad_norm": 1.1049559116363525, "learning_rate": 9.36942105263158e-05, "loss": 0.4836, "step": 43981 }, { "epoch": 2.462873782058461, "grad_norm": 1.0575199127197266, "learning_rate": 9.369394736842106e-05, "loss": 0.3208, "step": 43982 }, { "epoch": 2.46292977937059, "grad_norm": 1.0496854782104492, "learning_rate": 9.369368421052633e-05, "loss": 0.3444, "step": 43983 }, { "epoch": 2.4629857766827192, "grad_norm": 1.1299480199813843, "learning_rate": 9.369342105263158e-05, "loss": 0.3904, "step": 43984 }, { "epoch": 2.4630417739948482, "grad_norm": 1.074558138847351, "learning_rate": 9.369315789473685e-05, "loss": 0.3424, "step": 43985 }, { "epoch": 2.4630977713069773, "grad_norm": 1.2925041913986206, "learning_rate": 9.369289473684211e-05, "loss": 0.551, "step": 43986 }, { "epoch": 2.4631537686191063, "grad_norm": 1.4883556365966797, "learning_rate": 9.369263157894738e-05, "loss": 0.4566, "step": 43987 }, { "epoch": 2.4632097659312353, "grad_norm": 1.2466665506362915, "learning_rate": 9.369236842105263e-05, "loss": 0.4134, "step": 43988 }, { "epoch": 2.4632657632433643, "grad_norm": 1.0996952056884766, "learning_rate": 9.36921052631579e-05, "loss": 0.3135, "step": 43989 }, { "epoch": 2.4633217605554933, "grad_norm": 1.114341139793396, "learning_rate": 9.369184210526316e-05, "loss": 0.3493, "step": 43990 }, { "epoch": 2.4633777578676224, "grad_norm": 1.1812667846679688, "learning_rate": 9.369157894736843e-05, "loss": 0.446, "step": 43991 }, { "epoch": 2.4634337551797514, "grad_norm": 1.137360692024231, "learning_rate": 9.36913157894737e-05, "loss": 0.4037, "step": 43992 }, { "epoch": 2.4634897524918804, "grad_norm": 1.022881269454956, "learning_rate": 9.369105263157895e-05, "loss": 0.3637, "step": 43993 }, { "epoch": 2.4635457498040094, "grad_norm": 1.2217137813568115, "learning_rate": 9.369078947368421e-05, "loss": 0.4461, "step": 43994 }, { "epoch": 2.4636017471161384, "grad_norm": 0.9796823263168335, "learning_rate": 9.369052631578947e-05, "loss": 0.3404, "step": 43995 }, { "epoch": 2.4636577444282675, "grad_norm": 1.036768913269043, "learning_rate": 9.369026315789475e-05, "loss": 0.297, "step": 43996 }, { "epoch": 2.4637137417403965, "grad_norm": 1.3559069633483887, "learning_rate": 9.369e-05, "loss": 0.3296, "step": 43997 }, { "epoch": 2.4637697390525255, "grad_norm": 1.221373200416565, "learning_rate": 9.368973684210527e-05, "loss": 0.4202, "step": 43998 }, { "epoch": 2.4638257363646545, "grad_norm": 1.3434100151062012, "learning_rate": 9.368947368421053e-05, "loss": 0.4098, "step": 43999 }, { "epoch": 2.4638817336767835, "grad_norm": 1.3566709756851196, "learning_rate": 9.36892105263158e-05, "loss": 0.4464, "step": 44000 }, { "epoch": 2.4639377309889126, "grad_norm": 0.9783819317817688, "learning_rate": 9.368894736842106e-05, "loss": 0.3247, "step": 44001 }, { "epoch": 2.4639937283010416, "grad_norm": 1.1412975788116455, "learning_rate": 9.368868421052632e-05, "loss": 0.342, "step": 44002 }, { "epoch": 2.4640497256131706, "grad_norm": 1.256348729133606, "learning_rate": 9.368842105263158e-05, "loss": 0.4867, "step": 44003 }, { "epoch": 2.4641057229252996, "grad_norm": 1.1195480823516846, "learning_rate": 9.368815789473685e-05, "loss": 0.3567, "step": 44004 }, { "epoch": 2.4641617202374286, "grad_norm": 1.0207374095916748, "learning_rate": 9.368789473684211e-05, "loss": 0.387, "step": 44005 }, { "epoch": 2.4642177175495577, "grad_norm": 1.0712746381759644, "learning_rate": 9.368763157894737e-05, "loss": 0.3746, "step": 44006 }, { "epoch": 2.4642737148616867, "grad_norm": 1.0711150169372559, "learning_rate": 9.368736842105263e-05, "loss": 0.3524, "step": 44007 }, { "epoch": 2.4643297121738157, "grad_norm": 1.1193681955337524, "learning_rate": 9.36871052631579e-05, "loss": 0.3801, "step": 44008 }, { "epoch": 2.4643857094859447, "grad_norm": 2.050351858139038, "learning_rate": 9.368684210526316e-05, "loss": 0.4216, "step": 44009 }, { "epoch": 2.4644417067980737, "grad_norm": 1.061379075050354, "learning_rate": 9.368657894736844e-05, "loss": 0.3953, "step": 44010 }, { "epoch": 2.4644977041102027, "grad_norm": 1.0632109642028809, "learning_rate": 9.368631578947368e-05, "loss": 0.2962, "step": 44011 }, { "epoch": 2.4645537014223318, "grad_norm": 1.1900075674057007, "learning_rate": 9.368605263157894e-05, "loss": 0.3209, "step": 44012 }, { "epoch": 2.464609698734461, "grad_norm": 1.0738804340362549, "learning_rate": 9.368578947368422e-05, "loss": 0.3785, "step": 44013 }, { "epoch": 2.46466569604659, "grad_norm": 1.4291799068450928, "learning_rate": 9.368552631578948e-05, "loss": 0.39, "step": 44014 }, { "epoch": 2.464721693358719, "grad_norm": 1.1958266496658325, "learning_rate": 9.368526315789475e-05, "loss": 0.4006, "step": 44015 }, { "epoch": 2.464777690670848, "grad_norm": 1.3450801372528076, "learning_rate": 9.3685e-05, "loss": 0.5331, "step": 44016 }, { "epoch": 2.464833687982977, "grad_norm": 1.0440376996994019, "learning_rate": 9.368473684210527e-05, "loss": 0.2854, "step": 44017 }, { "epoch": 2.464889685295106, "grad_norm": 1.272728681564331, "learning_rate": 9.368447368421053e-05, "loss": 0.5144, "step": 44018 }, { "epoch": 2.464945682607235, "grad_norm": 1.1235953569412231, "learning_rate": 9.36842105263158e-05, "loss": 0.2873, "step": 44019 }, { "epoch": 2.465001679919364, "grad_norm": 1.303578495979309, "learning_rate": 9.368394736842106e-05, "loss": 0.4773, "step": 44020 }, { "epoch": 2.465057677231493, "grad_norm": 1.3914977312088013, "learning_rate": 9.368368421052632e-05, "loss": 0.2957, "step": 44021 }, { "epoch": 2.465113674543622, "grad_norm": 1.169516682624817, "learning_rate": 9.368342105263158e-05, "loss": 0.3974, "step": 44022 }, { "epoch": 2.465169671855751, "grad_norm": 1.167578101158142, "learning_rate": 9.368315789473685e-05, "loss": 0.3527, "step": 44023 }, { "epoch": 2.46522566916788, "grad_norm": 1.1339764595031738, "learning_rate": 9.368289473684211e-05, "loss": 0.4494, "step": 44024 }, { "epoch": 2.465281666480009, "grad_norm": 1.078296422958374, "learning_rate": 9.368263157894737e-05, "loss": 0.3479, "step": 44025 }, { "epoch": 2.465337663792138, "grad_norm": 1.1302907466888428, "learning_rate": 9.368236842105263e-05, "loss": 0.3762, "step": 44026 }, { "epoch": 2.465393661104267, "grad_norm": 1.1961250305175781, "learning_rate": 9.36821052631579e-05, "loss": 0.334, "step": 44027 }, { "epoch": 2.465449658416396, "grad_norm": 1.1655254364013672, "learning_rate": 9.368184210526317e-05, "loss": 0.3819, "step": 44028 }, { "epoch": 2.465505655728525, "grad_norm": 1.359838843345642, "learning_rate": 9.368157894736843e-05, "loss": 0.3357, "step": 44029 }, { "epoch": 2.465561653040654, "grad_norm": 0.9917736053466797, "learning_rate": 9.368131578947369e-05, "loss": 0.3988, "step": 44030 }, { "epoch": 2.465617650352783, "grad_norm": 1.0773760080337524, "learning_rate": 9.368105263157895e-05, "loss": 0.3112, "step": 44031 }, { "epoch": 2.465673647664912, "grad_norm": 1.0271515846252441, "learning_rate": 9.368078947368422e-05, "loss": 0.3213, "step": 44032 }, { "epoch": 2.465729644977041, "grad_norm": 1.1921385526657104, "learning_rate": 9.368052631578948e-05, "loss": 0.2775, "step": 44033 }, { "epoch": 2.46578564228917, "grad_norm": 1.25236177444458, "learning_rate": 9.368026315789474e-05, "loss": 0.3778, "step": 44034 }, { "epoch": 2.465841639601299, "grad_norm": 1.499922275543213, "learning_rate": 9.368e-05, "loss": 0.6028, "step": 44035 }, { "epoch": 2.4658976369134282, "grad_norm": 1.3520774841308594, "learning_rate": 9.367973684210527e-05, "loss": 0.5467, "step": 44036 }, { "epoch": 2.4659536342255572, "grad_norm": 1.1964569091796875, "learning_rate": 9.367947368421053e-05, "loss": 0.5296, "step": 44037 }, { "epoch": 2.4660096315376863, "grad_norm": 1.1248208284378052, "learning_rate": 9.36792105263158e-05, "loss": 0.4742, "step": 44038 }, { "epoch": 2.4660656288498153, "grad_norm": 1.0359379053115845, "learning_rate": 9.367894736842105e-05, "loss": 0.3229, "step": 44039 }, { "epoch": 2.4661216261619443, "grad_norm": 1.373462200164795, "learning_rate": 9.367868421052632e-05, "loss": 0.4026, "step": 44040 }, { "epoch": 2.4661776234740733, "grad_norm": 1.202039361000061, "learning_rate": 9.367842105263158e-05, "loss": 0.4507, "step": 44041 }, { "epoch": 2.4662336207862023, "grad_norm": 1.3657056093215942, "learning_rate": 9.367815789473686e-05, "loss": 0.4638, "step": 44042 }, { "epoch": 2.4662896180983314, "grad_norm": 1.024149775505066, "learning_rate": 9.36778947368421e-05, "loss": 0.3326, "step": 44043 }, { "epoch": 2.4663456154104604, "grad_norm": 1.1358633041381836, "learning_rate": 9.367763157894738e-05, "loss": 0.4382, "step": 44044 }, { "epoch": 2.4664016127225894, "grad_norm": 1.0708404779434204, "learning_rate": 9.367736842105264e-05, "loss": 0.318, "step": 44045 }, { "epoch": 2.4664576100347184, "grad_norm": 1.3292200565338135, "learning_rate": 9.36771052631579e-05, "loss": 0.4684, "step": 44046 }, { "epoch": 2.4665136073468474, "grad_norm": 1.2605080604553223, "learning_rate": 9.367684210526317e-05, "loss": 0.4051, "step": 44047 }, { "epoch": 2.4665696046589765, "grad_norm": 1.0108362436294556, "learning_rate": 9.367657894736841e-05, "loss": 0.3772, "step": 44048 }, { "epoch": 2.4666256019711055, "grad_norm": 1.531085729598999, "learning_rate": 9.367631578947369e-05, "loss": 0.4447, "step": 44049 }, { "epoch": 2.4666815992832345, "grad_norm": 1.2041574716567993, "learning_rate": 9.367605263157895e-05, "loss": 0.4847, "step": 44050 }, { "epoch": 2.4667375965953635, "grad_norm": 1.3494386672973633, "learning_rate": 9.367578947368422e-05, "loss": 0.4032, "step": 44051 }, { "epoch": 2.4667935939074925, "grad_norm": 1.2513763904571533, "learning_rate": 9.367552631578948e-05, "loss": 0.5364, "step": 44052 }, { "epoch": 2.4668495912196216, "grad_norm": 1.1867985725402832, "learning_rate": 9.367526315789474e-05, "loss": 0.4274, "step": 44053 }, { "epoch": 2.4669055885317506, "grad_norm": 1.1427605152130127, "learning_rate": 9.3675e-05, "loss": 0.3654, "step": 44054 }, { "epoch": 2.4669615858438796, "grad_norm": 1.0306271314620972, "learning_rate": 9.367473684210527e-05, "loss": 0.3622, "step": 44055 }, { "epoch": 2.4670175831560086, "grad_norm": 1.0763897895812988, "learning_rate": 9.367447368421053e-05, "loss": 0.4505, "step": 44056 }, { "epoch": 2.4670735804681376, "grad_norm": 1.433589220046997, "learning_rate": 9.367421052631579e-05, "loss": 0.5643, "step": 44057 }, { "epoch": 2.4671295777802666, "grad_norm": 0.9540082812309265, "learning_rate": 9.367394736842105e-05, "loss": 0.311, "step": 44058 }, { "epoch": 2.4671855750923957, "grad_norm": 1.257386565208435, "learning_rate": 9.367368421052633e-05, "loss": 0.4465, "step": 44059 }, { "epoch": 2.4672415724045247, "grad_norm": 0.9749913811683655, "learning_rate": 9.367342105263159e-05, "loss": 0.396, "step": 44060 }, { "epoch": 2.4672975697166537, "grad_norm": 1.0280036926269531, "learning_rate": 9.367315789473685e-05, "loss": 0.3229, "step": 44061 }, { "epoch": 2.4673535670287827, "grad_norm": 1.1539928913116455, "learning_rate": 9.36728947368421e-05, "loss": 0.314, "step": 44062 }, { "epoch": 2.4674095643409117, "grad_norm": 0.958448052406311, "learning_rate": 9.367263157894736e-05, "loss": 0.3348, "step": 44063 }, { "epoch": 2.4674655616530408, "grad_norm": 1.329084038734436, "learning_rate": 9.367236842105264e-05, "loss": 0.4809, "step": 44064 }, { "epoch": 2.46752155896517, "grad_norm": 1.1161242723464966, "learning_rate": 9.36721052631579e-05, "loss": 0.4009, "step": 44065 }, { "epoch": 2.467577556277299, "grad_norm": 1.1887012720108032, "learning_rate": 9.367184210526316e-05, "loss": 0.4353, "step": 44066 }, { "epoch": 2.467633553589428, "grad_norm": 1.0540107488632202, "learning_rate": 9.367157894736842e-05, "loss": 0.3163, "step": 44067 }, { "epoch": 2.467689550901557, "grad_norm": 1.3905054330825806, "learning_rate": 9.367131578947369e-05, "loss": 0.5134, "step": 44068 }, { "epoch": 2.467745548213686, "grad_norm": 1.1003040075302124, "learning_rate": 9.367105263157895e-05, "loss": 0.3423, "step": 44069 }, { "epoch": 2.467801545525815, "grad_norm": 1.286909580230713, "learning_rate": 9.367078947368422e-05, "loss": 0.5266, "step": 44070 }, { "epoch": 2.467857542837944, "grad_norm": 1.0045418739318848, "learning_rate": 9.367052631578947e-05, "loss": 0.317, "step": 44071 }, { "epoch": 2.467913540150073, "grad_norm": 1.1579242944717407, "learning_rate": 9.367026315789474e-05, "loss": 0.4013, "step": 44072 }, { "epoch": 2.467969537462202, "grad_norm": 1.1702934503555298, "learning_rate": 9.367e-05, "loss": 0.3754, "step": 44073 }, { "epoch": 2.468025534774331, "grad_norm": 1.4780206680297852, "learning_rate": 9.366973684210528e-05, "loss": 0.4585, "step": 44074 }, { "epoch": 2.46808153208646, "grad_norm": 1.3680706024169922, "learning_rate": 9.366947368421054e-05, "loss": 0.4865, "step": 44075 }, { "epoch": 2.468137529398589, "grad_norm": 1.005650520324707, "learning_rate": 9.36692105263158e-05, "loss": 0.3618, "step": 44076 }, { "epoch": 2.468193526710718, "grad_norm": 1.2440506219863892, "learning_rate": 9.366894736842106e-05, "loss": 0.4509, "step": 44077 }, { "epoch": 2.468249524022847, "grad_norm": 3.961688995361328, "learning_rate": 9.366868421052633e-05, "loss": 0.4074, "step": 44078 }, { "epoch": 2.468305521334976, "grad_norm": 1.5190232992172241, "learning_rate": 9.366842105263159e-05, "loss": 0.4102, "step": 44079 }, { "epoch": 2.468361518647105, "grad_norm": 1.1501225233078003, "learning_rate": 9.366815789473685e-05, "loss": 0.3552, "step": 44080 }, { "epoch": 2.468417515959234, "grad_norm": 1.5113422870635986, "learning_rate": 9.366789473684211e-05, "loss": 0.5726, "step": 44081 }, { "epoch": 2.468473513271363, "grad_norm": 1.124323844909668, "learning_rate": 9.366763157894737e-05, "loss": 0.4517, "step": 44082 }, { "epoch": 2.468529510583492, "grad_norm": 1.2362663745880127, "learning_rate": 9.366736842105264e-05, "loss": 0.3874, "step": 44083 }, { "epoch": 2.468585507895621, "grad_norm": 1.3162689208984375, "learning_rate": 9.36671052631579e-05, "loss": 0.3758, "step": 44084 }, { "epoch": 2.46864150520775, "grad_norm": 1.3905032873153687, "learning_rate": 9.366684210526316e-05, "loss": 0.5052, "step": 44085 }, { "epoch": 2.468697502519879, "grad_norm": 1.4345086812973022, "learning_rate": 9.366657894736842e-05, "loss": 0.4463, "step": 44086 }, { "epoch": 2.468753499832008, "grad_norm": 1.2156461477279663, "learning_rate": 9.366631578947369e-05, "loss": 0.4629, "step": 44087 }, { "epoch": 2.468809497144137, "grad_norm": 1.0959806442260742, "learning_rate": 9.366605263157895e-05, "loss": 0.3785, "step": 44088 }, { "epoch": 2.4688654944562662, "grad_norm": 1.2783684730529785, "learning_rate": 9.366578947368421e-05, "loss": 0.4519, "step": 44089 }, { "epoch": 2.4689214917683953, "grad_norm": 1.615684986114502, "learning_rate": 9.366552631578947e-05, "loss": 0.3396, "step": 44090 }, { "epoch": 2.4689774890805243, "grad_norm": 1.2498834133148193, "learning_rate": 9.366526315789475e-05, "loss": 0.3973, "step": 44091 }, { "epoch": 2.4690334863926533, "grad_norm": 1.0827817916870117, "learning_rate": 9.3665e-05, "loss": 0.4252, "step": 44092 }, { "epoch": 2.4690894837047823, "grad_norm": 1.1794648170471191, "learning_rate": 9.366473684210527e-05, "loss": 0.3929, "step": 44093 }, { "epoch": 2.4691454810169113, "grad_norm": 1.0532640218734741, "learning_rate": 9.366447368421052e-05, "loss": 0.4309, "step": 44094 }, { "epoch": 2.4692014783290404, "grad_norm": 1.1812111139297485, "learning_rate": 9.36642105263158e-05, "loss": 0.399, "step": 44095 }, { "epoch": 2.4692574756411694, "grad_norm": 1.3980095386505127, "learning_rate": 9.366394736842106e-05, "loss": 0.4568, "step": 44096 }, { "epoch": 2.4693134729532984, "grad_norm": 0.947557270526886, "learning_rate": 9.366368421052633e-05, "loss": 0.3584, "step": 44097 }, { "epoch": 2.4693694702654274, "grad_norm": 1.2157189846038818, "learning_rate": 9.366342105263158e-05, "loss": 0.3158, "step": 44098 }, { "epoch": 2.4694254675775564, "grad_norm": 1.3564356565475464, "learning_rate": 9.366315789473684e-05, "loss": 0.4215, "step": 44099 }, { "epoch": 2.4694814648896855, "grad_norm": 1.1626747846603394, "learning_rate": 9.366289473684211e-05, "loss": 0.3075, "step": 44100 }, { "epoch": 2.4695374622018145, "grad_norm": 1.2534950971603394, "learning_rate": 9.366263157894737e-05, "loss": 0.3094, "step": 44101 }, { "epoch": 2.4695934595139435, "grad_norm": 1.0521891117095947, "learning_rate": 9.366236842105264e-05, "loss": 0.4207, "step": 44102 }, { "epoch": 2.4696494568260725, "grad_norm": 1.131402611732483, "learning_rate": 9.366210526315789e-05, "loss": 0.4155, "step": 44103 }, { "epoch": 2.4697054541382015, "grad_norm": 1.890564203262329, "learning_rate": 9.366184210526316e-05, "loss": 0.4706, "step": 44104 }, { "epoch": 2.4697614514503305, "grad_norm": 1.1770954132080078, "learning_rate": 9.366157894736842e-05, "loss": 0.4354, "step": 44105 }, { "epoch": 2.4698174487624596, "grad_norm": 1.0822374820709229, "learning_rate": 9.36613157894737e-05, "loss": 0.3286, "step": 44106 }, { "epoch": 2.4698734460745886, "grad_norm": 1.3864142894744873, "learning_rate": 9.366105263157896e-05, "loss": 0.3798, "step": 44107 }, { "epoch": 2.4699294433867176, "grad_norm": 1.3354579210281372, "learning_rate": 9.366078947368422e-05, "loss": 0.5007, "step": 44108 }, { "epoch": 2.4699854406988466, "grad_norm": 1.2717374563217163, "learning_rate": 9.366052631578947e-05, "loss": 0.4068, "step": 44109 }, { "epoch": 2.4700414380109756, "grad_norm": 0.9810419678688049, "learning_rate": 9.366026315789475e-05, "loss": 0.4736, "step": 44110 }, { "epoch": 2.4700974353231047, "grad_norm": 1.5323796272277832, "learning_rate": 9.366000000000001e-05, "loss": 0.4021, "step": 44111 }, { "epoch": 2.4701534326352337, "grad_norm": 1.0392820835113525, "learning_rate": 9.365973684210527e-05, "loss": 0.4277, "step": 44112 }, { "epoch": 2.4702094299473627, "grad_norm": 1.3933950662612915, "learning_rate": 9.365947368421053e-05, "loss": 0.3735, "step": 44113 }, { "epoch": 2.4702654272594917, "grad_norm": 1.2772082090377808, "learning_rate": 9.36592105263158e-05, "loss": 0.4558, "step": 44114 }, { "epoch": 2.4703214245716207, "grad_norm": 1.1120884418487549, "learning_rate": 9.365894736842106e-05, "loss": 0.3334, "step": 44115 }, { "epoch": 2.4703774218837498, "grad_norm": 1.3755912780761719, "learning_rate": 9.365868421052632e-05, "loss": 0.4307, "step": 44116 }, { "epoch": 2.4704334191958788, "grad_norm": 0.9504393935203552, "learning_rate": 9.365842105263158e-05, "loss": 0.3934, "step": 44117 }, { "epoch": 2.470489416508008, "grad_norm": 1.0620770454406738, "learning_rate": 9.365815789473684e-05, "loss": 0.3357, "step": 44118 }, { "epoch": 2.470545413820137, "grad_norm": 1.0509591102600098, "learning_rate": 9.365789473684211e-05, "loss": 0.3773, "step": 44119 }, { "epoch": 2.470601411132266, "grad_norm": 1.3002393245697021, "learning_rate": 9.365763157894737e-05, "loss": 0.5345, "step": 44120 }, { "epoch": 2.470657408444395, "grad_norm": 1.1038538217544556, "learning_rate": 9.365736842105263e-05, "loss": 0.3716, "step": 44121 }, { "epoch": 2.470713405756524, "grad_norm": 1.2519797086715698, "learning_rate": 9.365710526315789e-05, "loss": 0.4457, "step": 44122 }, { "epoch": 2.470769403068653, "grad_norm": 1.2866604328155518, "learning_rate": 9.365684210526317e-05, "loss": 0.3703, "step": 44123 }, { "epoch": 2.470825400380782, "grad_norm": 1.1615679264068604, "learning_rate": 9.365657894736843e-05, "loss": 0.3675, "step": 44124 }, { "epoch": 2.470881397692911, "grad_norm": 1.277629017829895, "learning_rate": 9.36563157894737e-05, "loss": 0.3952, "step": 44125 }, { "epoch": 2.47093739500504, "grad_norm": 0.9527827501296997, "learning_rate": 9.365605263157894e-05, "loss": 0.3516, "step": 44126 }, { "epoch": 2.470993392317169, "grad_norm": 1.086543321609497, "learning_rate": 9.365578947368422e-05, "loss": 0.4292, "step": 44127 }, { "epoch": 2.471049389629298, "grad_norm": 1.107340931892395, "learning_rate": 9.365552631578948e-05, "loss": 0.3998, "step": 44128 }, { "epoch": 2.471105386941427, "grad_norm": 1.5575259923934937, "learning_rate": 9.365526315789475e-05, "loss": 0.4101, "step": 44129 }, { "epoch": 2.4711613842535556, "grad_norm": 1.187996745109558, "learning_rate": 9.365500000000001e-05, "loss": 0.3771, "step": 44130 }, { "epoch": 2.471217381565685, "grad_norm": 1.0467430353164673, "learning_rate": 9.365473684210527e-05, "loss": 0.2957, "step": 44131 }, { "epoch": 2.4712733788778136, "grad_norm": 1.0744919776916504, "learning_rate": 9.365447368421053e-05, "loss": 0.3675, "step": 44132 }, { "epoch": 2.471329376189943, "grad_norm": 1.1523994207382202, "learning_rate": 9.365421052631579e-05, "loss": 0.4402, "step": 44133 }, { "epoch": 2.4713853735020717, "grad_norm": 1.3272831439971924, "learning_rate": 9.365394736842106e-05, "loss": 0.3924, "step": 44134 }, { "epoch": 2.471441370814201, "grad_norm": 1.1133606433868408, "learning_rate": 9.365368421052631e-05, "loss": 0.3858, "step": 44135 }, { "epoch": 2.4714973681263297, "grad_norm": 1.1648343801498413, "learning_rate": 9.365342105263158e-05, "loss": 0.5602, "step": 44136 }, { "epoch": 2.471553365438459, "grad_norm": 1.038198709487915, "learning_rate": 9.365315789473684e-05, "loss": 0.2836, "step": 44137 }, { "epoch": 2.4716093627505877, "grad_norm": 1.1249364614486694, "learning_rate": 9.365289473684212e-05, "loss": 0.3407, "step": 44138 }, { "epoch": 2.471665360062717, "grad_norm": 1.039136290550232, "learning_rate": 9.365263157894738e-05, "loss": 0.3837, "step": 44139 }, { "epoch": 2.4717213573748458, "grad_norm": 1.1571974754333496, "learning_rate": 9.365236842105263e-05, "loss": 0.3395, "step": 44140 }, { "epoch": 2.4717773546869752, "grad_norm": 1.267731785774231, "learning_rate": 9.36521052631579e-05, "loss": 0.4256, "step": 44141 }, { "epoch": 2.471833351999104, "grad_norm": 1.6461541652679443, "learning_rate": 9.365184210526317e-05, "loss": 0.4764, "step": 44142 }, { "epoch": 2.4718893493112333, "grad_norm": 1.1013351678848267, "learning_rate": 9.365157894736843e-05, "loss": 0.3444, "step": 44143 }, { "epoch": 2.471945346623362, "grad_norm": 1.2162506580352783, "learning_rate": 9.365131578947369e-05, "loss": 0.3778, "step": 44144 }, { "epoch": 2.4720013439354913, "grad_norm": 1.0435081720352173, "learning_rate": 9.365105263157895e-05, "loss": 0.5001, "step": 44145 }, { "epoch": 2.47205734124762, "grad_norm": 1.201692819595337, "learning_rate": 9.365078947368422e-05, "loss": 0.3647, "step": 44146 }, { "epoch": 2.4721133385597494, "grad_norm": 1.0861254930496216, "learning_rate": 9.365052631578948e-05, "loss": 0.3028, "step": 44147 }, { "epoch": 2.472169335871878, "grad_norm": 1.2334767580032349, "learning_rate": 9.365026315789474e-05, "loss": 0.4915, "step": 44148 }, { "epoch": 2.4722253331840074, "grad_norm": 1.0025677680969238, "learning_rate": 9.365e-05, "loss": 0.3859, "step": 44149 }, { "epoch": 2.472281330496136, "grad_norm": 1.5126653909683228, "learning_rate": 9.364973684210526e-05, "loss": 0.4494, "step": 44150 }, { "epoch": 2.4723373278082654, "grad_norm": 1.2949076890945435, "learning_rate": 9.364947368421053e-05, "loss": 0.3585, "step": 44151 }, { "epoch": 2.472393325120394, "grad_norm": 1.5175106525421143, "learning_rate": 9.364921052631579e-05, "loss": 0.5013, "step": 44152 }, { "epoch": 2.4724493224325235, "grad_norm": 1.2476967573165894, "learning_rate": 9.364894736842105e-05, "loss": 0.2908, "step": 44153 }, { "epoch": 2.472505319744652, "grad_norm": 1.2612708806991577, "learning_rate": 9.364868421052631e-05, "loss": 0.5451, "step": 44154 }, { "epoch": 2.4725613170567815, "grad_norm": 1.6813753843307495, "learning_rate": 9.364842105263158e-05, "loss": 0.3972, "step": 44155 }, { "epoch": 2.47261731436891, "grad_norm": 1.061497449874878, "learning_rate": 9.364815789473684e-05, "loss": 0.4859, "step": 44156 }, { "epoch": 2.4726733116810395, "grad_norm": 1.2612429857254028, "learning_rate": 9.364789473684212e-05, "loss": 0.3339, "step": 44157 }, { "epoch": 2.472729308993168, "grad_norm": 1.1108336448669434, "learning_rate": 9.364763157894736e-05, "loss": 0.3639, "step": 44158 }, { "epoch": 2.4727853063052976, "grad_norm": 1.0925554037094116, "learning_rate": 9.364736842105264e-05, "loss": 0.3867, "step": 44159 }, { "epoch": 2.472841303617426, "grad_norm": 1.3325393199920654, "learning_rate": 9.36471052631579e-05, "loss": 0.3766, "step": 44160 }, { "epoch": 2.4728973009295556, "grad_norm": 1.1267825365066528, "learning_rate": 9.364684210526317e-05, "loss": 0.3133, "step": 44161 }, { "epoch": 2.472953298241684, "grad_norm": 1.1658260822296143, "learning_rate": 9.364657894736843e-05, "loss": 0.4255, "step": 44162 }, { "epoch": 2.4730092955538137, "grad_norm": 1.2096806764602661, "learning_rate": 9.364631578947369e-05, "loss": 0.3835, "step": 44163 }, { "epoch": 2.4730652928659422, "grad_norm": 0.9385897517204285, "learning_rate": 9.364605263157895e-05, "loss": 0.4209, "step": 44164 }, { "epoch": 2.4731212901780717, "grad_norm": 0.9884217381477356, "learning_rate": 9.364578947368422e-05, "loss": 0.3795, "step": 44165 }, { "epoch": 2.4731772874902003, "grad_norm": 1.325321078300476, "learning_rate": 9.364552631578948e-05, "loss": 0.5624, "step": 44166 }, { "epoch": 2.4732332848023297, "grad_norm": 1.0410025119781494, "learning_rate": 9.364526315789474e-05, "loss": 0.367, "step": 44167 }, { "epoch": 2.4732892821144583, "grad_norm": 1.3467849493026733, "learning_rate": 9.3645e-05, "loss": 0.3696, "step": 44168 }, { "epoch": 2.4733452794265878, "grad_norm": 1.2354958057403564, "learning_rate": 9.364473684210526e-05, "loss": 0.4545, "step": 44169 }, { "epoch": 2.4734012767387163, "grad_norm": 1.1850742101669312, "learning_rate": 9.364447368421054e-05, "loss": 0.4972, "step": 44170 }, { "epoch": 2.4734572740508454, "grad_norm": 1.09269380569458, "learning_rate": 9.36442105263158e-05, "loss": 0.306, "step": 44171 }, { "epoch": 2.4735132713629744, "grad_norm": 1.181783676147461, "learning_rate": 9.364394736842105e-05, "loss": 0.3975, "step": 44172 }, { "epoch": 2.4735692686751034, "grad_norm": 1.132027506828308, "learning_rate": 9.364368421052631e-05, "loss": 0.4291, "step": 44173 }, { "epoch": 2.4736252659872324, "grad_norm": 1.5134623050689697, "learning_rate": 9.364342105263159e-05, "loss": 0.4832, "step": 44174 }, { "epoch": 2.4736812632993614, "grad_norm": 1.2445316314697266, "learning_rate": 9.364315789473685e-05, "loss": 0.3755, "step": 44175 }, { "epoch": 2.4737372606114905, "grad_norm": 1.2487881183624268, "learning_rate": 9.364289473684211e-05, "loss": 0.3557, "step": 44176 }, { "epoch": 2.4737932579236195, "grad_norm": 1.0553139448165894, "learning_rate": 9.364263157894737e-05, "loss": 0.4807, "step": 44177 }, { "epoch": 2.4738492552357485, "grad_norm": 1.2687548398971558, "learning_rate": 9.364236842105264e-05, "loss": 0.3559, "step": 44178 }, { "epoch": 2.4739052525478775, "grad_norm": 1.063984990119934, "learning_rate": 9.36421052631579e-05, "loss": 0.4786, "step": 44179 }, { "epoch": 2.4739612498600065, "grad_norm": 1.2033805847167969, "learning_rate": 9.364184210526317e-05, "loss": 0.3296, "step": 44180 }, { "epoch": 2.4740172471721356, "grad_norm": 0.958592414855957, "learning_rate": 9.364157894736842e-05, "loss": 0.3607, "step": 44181 }, { "epoch": 2.4740732444842646, "grad_norm": 1.4361246824264526, "learning_rate": 9.364131578947369e-05, "loss": 0.3993, "step": 44182 }, { "epoch": 2.4741292417963936, "grad_norm": 1.2696774005889893, "learning_rate": 9.364105263157895e-05, "loss": 0.4031, "step": 44183 }, { "epoch": 2.4741852391085226, "grad_norm": 1.1495460271835327, "learning_rate": 9.364078947368421e-05, "loss": 0.3673, "step": 44184 }, { "epoch": 2.4742412364206516, "grad_norm": 1.0882701873779297, "learning_rate": 9.364052631578949e-05, "loss": 0.2845, "step": 44185 }, { "epoch": 2.4742972337327807, "grad_norm": 1.030644178390503, "learning_rate": 9.364026315789473e-05, "loss": 0.3428, "step": 44186 }, { "epoch": 2.4743532310449097, "grad_norm": 1.1731072664260864, "learning_rate": 9.364e-05, "loss": 0.4346, "step": 44187 }, { "epoch": 2.4744092283570387, "grad_norm": 1.0636916160583496, "learning_rate": 9.363973684210526e-05, "loss": 0.4252, "step": 44188 }, { "epoch": 2.4744652256691677, "grad_norm": 1.3042874336242676, "learning_rate": 9.363947368421054e-05, "loss": 0.4403, "step": 44189 }, { "epoch": 2.4745212229812967, "grad_norm": 1.261055827140808, "learning_rate": 9.363921052631578e-05, "loss": 0.3018, "step": 44190 }, { "epoch": 2.4745772202934257, "grad_norm": 1.2624691724777222, "learning_rate": 9.363894736842106e-05, "loss": 0.3558, "step": 44191 }, { "epoch": 2.4746332176055548, "grad_norm": 1.0622167587280273, "learning_rate": 9.363868421052632e-05, "loss": 0.3872, "step": 44192 }, { "epoch": 2.474689214917684, "grad_norm": 1.302704095840454, "learning_rate": 9.363842105263159e-05, "loss": 0.34, "step": 44193 }, { "epoch": 2.474745212229813, "grad_norm": 1.1235733032226562, "learning_rate": 9.363815789473685e-05, "loss": 0.4324, "step": 44194 }, { "epoch": 2.474801209541942, "grad_norm": 1.168811559677124, "learning_rate": 9.363789473684211e-05, "loss": 0.381, "step": 44195 }, { "epoch": 2.474857206854071, "grad_norm": 1.1778302192687988, "learning_rate": 9.363763157894737e-05, "loss": 0.4337, "step": 44196 }, { "epoch": 2.4749132041662, "grad_norm": 1.262765884399414, "learning_rate": 9.363736842105264e-05, "loss": 0.4214, "step": 44197 }, { "epoch": 2.474969201478329, "grad_norm": 1.1716381311416626, "learning_rate": 9.36371052631579e-05, "loss": 0.357, "step": 44198 }, { "epoch": 2.475025198790458, "grad_norm": 1.2705764770507812, "learning_rate": 9.363684210526316e-05, "loss": 0.3563, "step": 44199 }, { "epoch": 2.475081196102587, "grad_norm": 1.1583493947982788, "learning_rate": 9.363657894736842e-05, "loss": 0.4596, "step": 44200 }, { "epoch": 2.475137193414716, "grad_norm": 1.0806114673614502, "learning_rate": 9.36363157894737e-05, "loss": 0.373, "step": 44201 }, { "epoch": 2.475193190726845, "grad_norm": 1.2327076196670532, "learning_rate": 9.363605263157895e-05, "loss": 0.3775, "step": 44202 }, { "epoch": 2.475249188038974, "grad_norm": 1.133095145225525, "learning_rate": 9.363578947368421e-05, "loss": 0.3428, "step": 44203 }, { "epoch": 2.475305185351103, "grad_norm": 1.2337861061096191, "learning_rate": 9.363552631578947e-05, "loss": 0.4825, "step": 44204 }, { "epoch": 2.475361182663232, "grad_norm": 1.3687037229537964, "learning_rate": 9.363526315789473e-05, "loss": 0.4871, "step": 44205 }, { "epoch": 2.475417179975361, "grad_norm": 1.0706144571304321, "learning_rate": 9.363500000000001e-05, "loss": 0.3895, "step": 44206 }, { "epoch": 2.47547317728749, "grad_norm": 1.185217261314392, "learning_rate": 9.363473684210527e-05, "loss": 0.3375, "step": 44207 }, { "epoch": 2.475529174599619, "grad_norm": 1.0831892490386963, "learning_rate": 9.363447368421053e-05, "loss": 0.4064, "step": 44208 }, { "epoch": 2.475585171911748, "grad_norm": 1.218835473060608, "learning_rate": 9.363421052631579e-05, "loss": 0.3733, "step": 44209 }, { "epoch": 2.475641169223877, "grad_norm": 1.0665627717971802, "learning_rate": 9.363394736842106e-05, "loss": 0.4019, "step": 44210 }, { "epoch": 2.475697166536006, "grad_norm": 1.2511937618255615, "learning_rate": 9.363368421052632e-05, "loss": 0.5153, "step": 44211 }, { "epoch": 2.475753163848135, "grad_norm": 1.4110581874847412, "learning_rate": 9.363342105263159e-05, "loss": 0.3775, "step": 44212 }, { "epoch": 2.475809161160264, "grad_norm": 1.0463111400604248, "learning_rate": 9.363315789473684e-05, "loss": 0.3598, "step": 44213 }, { "epoch": 2.475865158472393, "grad_norm": 1.0675930976867676, "learning_rate": 9.363289473684211e-05, "loss": 0.4516, "step": 44214 }, { "epoch": 2.475921155784522, "grad_norm": 1.0578423738479614, "learning_rate": 9.363263157894737e-05, "loss": 0.3859, "step": 44215 }, { "epoch": 2.4759771530966512, "grad_norm": 1.3587926626205444, "learning_rate": 9.363236842105265e-05, "loss": 0.43, "step": 44216 }, { "epoch": 2.4760331504087802, "grad_norm": 1.0513360500335693, "learning_rate": 9.36321052631579e-05, "loss": 0.286, "step": 44217 }, { "epoch": 2.4760891477209093, "grad_norm": 1.070373296737671, "learning_rate": 9.363184210526316e-05, "loss": 0.3816, "step": 44218 }, { "epoch": 2.4761451450330383, "grad_norm": 2.3888747692108154, "learning_rate": 9.363157894736842e-05, "loss": 0.4975, "step": 44219 }, { "epoch": 2.4762011423451673, "grad_norm": 1.155505895614624, "learning_rate": 9.363131578947368e-05, "loss": 0.3417, "step": 44220 }, { "epoch": 2.4762571396572963, "grad_norm": 1.20297110080719, "learning_rate": 9.363105263157896e-05, "loss": 0.4159, "step": 44221 }, { "epoch": 2.4763131369694253, "grad_norm": 1.0364887714385986, "learning_rate": 9.363078947368422e-05, "loss": 0.4141, "step": 44222 }, { "epoch": 2.4763691342815544, "grad_norm": 1.1430013179779053, "learning_rate": 9.363052631578948e-05, "loss": 0.3749, "step": 44223 }, { "epoch": 2.4764251315936834, "grad_norm": 1.2423121929168701, "learning_rate": 9.363026315789474e-05, "loss": 0.387, "step": 44224 }, { "epoch": 2.4764811289058124, "grad_norm": 1.3047313690185547, "learning_rate": 9.363000000000001e-05, "loss": 0.5771, "step": 44225 }, { "epoch": 2.4765371262179414, "grad_norm": 1.0119483470916748, "learning_rate": 9.362973684210527e-05, "loss": 0.3837, "step": 44226 }, { "epoch": 2.4765931235300704, "grad_norm": 1.500756859779358, "learning_rate": 9.362947368421053e-05, "loss": 0.4075, "step": 44227 }, { "epoch": 2.4766491208421995, "grad_norm": 1.291379690170288, "learning_rate": 9.362921052631579e-05, "loss": 0.3999, "step": 44228 }, { "epoch": 2.4767051181543285, "grad_norm": 2.538275718688965, "learning_rate": 9.362894736842106e-05, "loss": 0.5308, "step": 44229 }, { "epoch": 2.4767611154664575, "grad_norm": 1.202265739440918, "learning_rate": 9.362868421052632e-05, "loss": 0.5129, "step": 44230 }, { "epoch": 2.4768171127785865, "grad_norm": 1.124449610710144, "learning_rate": 9.362842105263158e-05, "loss": 0.3865, "step": 44231 }, { "epoch": 2.4768731100907155, "grad_norm": 6.918768405914307, "learning_rate": 9.362815789473684e-05, "loss": 0.3774, "step": 44232 }, { "epoch": 2.4769291074028446, "grad_norm": 0.9559996724128723, "learning_rate": 9.362789473684211e-05, "loss": 0.2914, "step": 44233 }, { "epoch": 2.4769851047149736, "grad_norm": 1.4014638662338257, "learning_rate": 9.362763157894737e-05, "loss": 0.3846, "step": 44234 }, { "epoch": 2.4770411020271026, "grad_norm": 1.219598650932312, "learning_rate": 9.362736842105265e-05, "loss": 0.5325, "step": 44235 }, { "epoch": 2.4770970993392316, "grad_norm": 1.401444673538208, "learning_rate": 9.36271052631579e-05, "loss": 0.3956, "step": 44236 }, { "epoch": 2.4771530966513606, "grad_norm": 1.232041835784912, "learning_rate": 9.362684210526315e-05, "loss": 0.4369, "step": 44237 }, { "epoch": 2.4772090939634897, "grad_norm": 1.0963729619979858, "learning_rate": 9.362657894736843e-05, "loss": 0.4629, "step": 44238 }, { "epoch": 2.4772650912756187, "grad_norm": 1.120522141456604, "learning_rate": 9.362631578947369e-05, "loss": 0.3754, "step": 44239 }, { "epoch": 2.4773210885877477, "grad_norm": 1.3960210084915161, "learning_rate": 9.362605263157896e-05, "loss": 0.5175, "step": 44240 }, { "epoch": 2.4773770858998767, "grad_norm": 1.3212816715240479, "learning_rate": 9.36257894736842e-05, "loss": 0.3626, "step": 44241 }, { "epoch": 2.4774330832120057, "grad_norm": 1.3703598976135254, "learning_rate": 9.362552631578948e-05, "loss": 0.349, "step": 44242 }, { "epoch": 2.4774890805241347, "grad_norm": 1.1253827810287476, "learning_rate": 9.362526315789474e-05, "loss": 0.2938, "step": 44243 }, { "epoch": 2.4775450778362638, "grad_norm": 1.2143992185592651, "learning_rate": 9.362500000000001e-05, "loss": 0.4504, "step": 44244 }, { "epoch": 2.477601075148393, "grad_norm": 1.4462101459503174, "learning_rate": 9.362473684210526e-05, "loss": 0.4675, "step": 44245 }, { "epoch": 2.477657072460522, "grad_norm": 1.0151829719543457, "learning_rate": 9.362447368421053e-05, "loss": 0.2997, "step": 44246 }, { "epoch": 2.477713069772651, "grad_norm": 1.3268916606903076, "learning_rate": 9.362421052631579e-05, "loss": 0.5082, "step": 44247 }, { "epoch": 2.47776906708478, "grad_norm": 1.262639045715332, "learning_rate": 9.362394736842106e-05, "loss": 0.4398, "step": 44248 }, { "epoch": 2.477825064396909, "grad_norm": 1.350880742073059, "learning_rate": 9.362368421052632e-05, "loss": 0.4232, "step": 44249 }, { "epoch": 2.477881061709038, "grad_norm": 1.3605902194976807, "learning_rate": 9.362342105263158e-05, "loss": 0.5403, "step": 44250 }, { "epoch": 2.477937059021167, "grad_norm": 1.1158623695373535, "learning_rate": 9.362315789473684e-05, "loss": 0.4137, "step": 44251 }, { "epoch": 2.477993056333296, "grad_norm": 1.388620376586914, "learning_rate": 9.362289473684212e-05, "loss": 0.4514, "step": 44252 }, { "epoch": 2.478049053645425, "grad_norm": 1.1969324350357056, "learning_rate": 9.362263157894738e-05, "loss": 0.4217, "step": 44253 }, { "epoch": 2.478105050957554, "grad_norm": 1.4513752460479736, "learning_rate": 9.362236842105264e-05, "loss": 0.486, "step": 44254 }, { "epoch": 2.478161048269683, "grad_norm": 1.3357194662094116, "learning_rate": 9.36221052631579e-05, "loss": 0.5557, "step": 44255 }, { "epoch": 2.478217045581812, "grad_norm": 1.2365554571151733, "learning_rate": 9.362184210526316e-05, "loss": 0.3582, "step": 44256 }, { "epoch": 2.478273042893941, "grad_norm": 1.4263801574707031, "learning_rate": 9.362157894736843e-05, "loss": 0.4174, "step": 44257 }, { "epoch": 2.47832904020607, "grad_norm": 1.1818263530731201, "learning_rate": 9.362131578947369e-05, "loss": 0.4068, "step": 44258 }, { "epoch": 2.478385037518199, "grad_norm": 1.1868363618850708, "learning_rate": 9.362105263157895e-05, "loss": 0.4086, "step": 44259 }, { "epoch": 2.478441034830328, "grad_norm": 1.3890413045883179, "learning_rate": 9.362078947368421e-05, "loss": 0.3857, "step": 44260 }, { "epoch": 2.478497032142457, "grad_norm": 1.1044574975967407, "learning_rate": 9.362052631578948e-05, "loss": 0.3654, "step": 44261 }, { "epoch": 2.478553029454586, "grad_norm": 1.3509091138839722, "learning_rate": 9.362026315789474e-05, "loss": 0.394, "step": 44262 }, { "epoch": 2.478609026766715, "grad_norm": 1.177695631980896, "learning_rate": 9.362e-05, "loss": 0.3574, "step": 44263 }, { "epoch": 2.478665024078844, "grad_norm": 1.0882335901260376, "learning_rate": 9.361973684210526e-05, "loss": 0.4333, "step": 44264 }, { "epoch": 2.478721021390973, "grad_norm": 1.234005093574524, "learning_rate": 9.361947368421053e-05, "loss": 0.4111, "step": 44265 }, { "epoch": 2.478777018703102, "grad_norm": 1.3114949464797974, "learning_rate": 9.36192105263158e-05, "loss": 0.546, "step": 44266 }, { "epoch": 2.478833016015231, "grad_norm": 1.1203795671463013, "learning_rate": 9.361894736842107e-05, "loss": 0.4014, "step": 44267 }, { "epoch": 2.4788890133273602, "grad_norm": 1.273461103439331, "learning_rate": 9.361868421052631e-05, "loss": 0.3747, "step": 44268 }, { "epoch": 2.4789450106394892, "grad_norm": 1.461968183517456, "learning_rate": 9.361842105263159e-05, "loss": 0.4591, "step": 44269 }, { "epoch": 2.4790010079516183, "grad_norm": 1.5604476928710938, "learning_rate": 9.361815789473685e-05, "loss": 0.5595, "step": 44270 }, { "epoch": 2.4790570052637473, "grad_norm": 1.204005479812622, "learning_rate": 9.36178947368421e-05, "loss": 0.6587, "step": 44271 }, { "epoch": 2.4791130025758763, "grad_norm": 1.3592901229858398, "learning_rate": 9.361763157894738e-05, "loss": 0.4579, "step": 44272 }, { "epoch": 2.4791689998880053, "grad_norm": 0.9730402827262878, "learning_rate": 9.361736842105263e-05, "loss": 0.44, "step": 44273 }, { "epoch": 2.4792249972001343, "grad_norm": 1.2401785850524902, "learning_rate": 9.36171052631579e-05, "loss": 0.4138, "step": 44274 }, { "epoch": 2.4792809945122634, "grad_norm": 1.1003730297088623, "learning_rate": 9.361684210526316e-05, "loss": 0.4546, "step": 44275 }, { "epoch": 2.4793369918243924, "grad_norm": 1.1668202877044678, "learning_rate": 9.361657894736843e-05, "loss": 0.3441, "step": 44276 }, { "epoch": 2.4793929891365214, "grad_norm": 1.307818055152893, "learning_rate": 9.361631578947369e-05, "loss": 0.3807, "step": 44277 }, { "epoch": 2.4794489864486504, "grad_norm": 1.1633470058441162, "learning_rate": 9.361605263157895e-05, "loss": 0.3264, "step": 44278 }, { "epoch": 2.4795049837607794, "grad_norm": 1.1844751834869385, "learning_rate": 9.361578947368421e-05, "loss": 0.399, "step": 44279 }, { "epoch": 2.4795609810729085, "grad_norm": 1.1914105415344238, "learning_rate": 9.361552631578948e-05, "loss": 0.4115, "step": 44280 }, { "epoch": 2.4796169783850375, "grad_norm": 1.1621674299240112, "learning_rate": 9.361526315789474e-05, "loss": 0.4789, "step": 44281 }, { "epoch": 2.4796729756971665, "grad_norm": 1.2318912744522095, "learning_rate": 9.3615e-05, "loss": 0.3727, "step": 44282 }, { "epoch": 2.4797289730092955, "grad_norm": 1.42116117477417, "learning_rate": 9.361473684210526e-05, "loss": 0.382, "step": 44283 }, { "epoch": 2.4797849703214245, "grad_norm": 1.2754536867141724, "learning_rate": 9.361447368421054e-05, "loss": 0.3931, "step": 44284 }, { "epoch": 2.4798409676335536, "grad_norm": 1.2895548343658447, "learning_rate": 9.36142105263158e-05, "loss": 0.4431, "step": 44285 }, { "epoch": 2.4798969649456826, "grad_norm": 1.221282958984375, "learning_rate": 9.361394736842106e-05, "loss": 0.5005, "step": 44286 }, { "epoch": 2.4799529622578116, "grad_norm": 1.1840317249298096, "learning_rate": 9.361368421052632e-05, "loss": 0.3752, "step": 44287 }, { "epoch": 2.4800089595699406, "grad_norm": 1.2022030353546143, "learning_rate": 9.361342105263158e-05, "loss": 0.3922, "step": 44288 }, { "epoch": 2.4800649568820696, "grad_norm": 1.2402735948562622, "learning_rate": 9.361315789473685e-05, "loss": 0.4245, "step": 44289 }, { "epoch": 2.4801209541941986, "grad_norm": 1.180904746055603, "learning_rate": 9.361289473684211e-05, "loss": 0.3804, "step": 44290 }, { "epoch": 2.4801769515063277, "grad_norm": 1.1709727048873901, "learning_rate": 9.361263157894737e-05, "loss": 0.3607, "step": 44291 }, { "epoch": 2.4802329488184567, "grad_norm": 1.1259634494781494, "learning_rate": 9.361236842105263e-05, "loss": 0.3738, "step": 44292 }, { "epoch": 2.4802889461305857, "grad_norm": 1.236454725265503, "learning_rate": 9.36121052631579e-05, "loss": 0.4646, "step": 44293 }, { "epoch": 2.4803449434427147, "grad_norm": 1.212053656578064, "learning_rate": 9.361184210526316e-05, "loss": 0.4888, "step": 44294 }, { "epoch": 2.4804009407548437, "grad_norm": 1.0567636489868164, "learning_rate": 9.361157894736842e-05, "loss": 0.4504, "step": 44295 }, { "epoch": 2.4804569380669728, "grad_norm": 1.4017983675003052, "learning_rate": 9.361131578947368e-05, "loss": 0.3912, "step": 44296 }, { "epoch": 2.480512935379102, "grad_norm": 1.2582666873931885, "learning_rate": 9.361105263157895e-05, "loss": 0.4469, "step": 44297 }, { "epoch": 2.480568932691231, "grad_norm": 1.0491713285446167, "learning_rate": 9.361078947368421e-05, "loss": 0.3459, "step": 44298 }, { "epoch": 2.48062493000336, "grad_norm": 1.0057142972946167, "learning_rate": 9.361052631578949e-05, "loss": 0.3196, "step": 44299 }, { "epoch": 2.480680927315489, "grad_norm": 1.0935282707214355, "learning_rate": 9.361026315789473e-05, "loss": 0.333, "step": 44300 }, { "epoch": 2.480736924627618, "grad_norm": 1.250519871711731, "learning_rate": 9.361e-05, "loss": 0.3471, "step": 44301 }, { "epoch": 2.480792921939747, "grad_norm": 1.3426438570022583, "learning_rate": 9.360973684210527e-05, "loss": 0.448, "step": 44302 }, { "epoch": 2.480848919251876, "grad_norm": 1.2694522142410278, "learning_rate": 9.360947368421054e-05, "loss": 0.4841, "step": 44303 }, { "epoch": 2.480904916564005, "grad_norm": 1.098730444908142, "learning_rate": 9.36092105263158e-05, "loss": 0.5077, "step": 44304 }, { "epoch": 2.480960913876134, "grad_norm": 1.1663686037063599, "learning_rate": 9.360894736842105e-05, "loss": 0.4174, "step": 44305 }, { "epoch": 2.481016911188263, "grad_norm": 1.2516649961471558, "learning_rate": 9.360868421052632e-05, "loss": 0.3566, "step": 44306 }, { "epoch": 2.481072908500392, "grad_norm": 1.1248210668563843, "learning_rate": 9.360842105263158e-05, "loss": 0.3192, "step": 44307 }, { "epoch": 2.481128905812521, "grad_norm": 2.4669246673583984, "learning_rate": 9.360815789473685e-05, "loss": 0.3732, "step": 44308 }, { "epoch": 2.48118490312465, "grad_norm": 1.19463050365448, "learning_rate": 9.360789473684211e-05, "loss": 0.3554, "step": 44309 }, { "epoch": 2.481240900436779, "grad_norm": 1.0442301034927368, "learning_rate": 9.360763157894737e-05, "loss": 0.3112, "step": 44310 }, { "epoch": 2.481296897748908, "grad_norm": 3.028778553009033, "learning_rate": 9.360736842105263e-05, "loss": 0.4936, "step": 44311 }, { "epoch": 2.481352895061037, "grad_norm": 1.1400574445724487, "learning_rate": 9.36071052631579e-05, "loss": 0.488, "step": 44312 }, { "epoch": 2.481408892373166, "grad_norm": 1.0928895473480225, "learning_rate": 9.360684210526316e-05, "loss": 0.3176, "step": 44313 }, { "epoch": 2.481464889685295, "grad_norm": 1.1874134540557861, "learning_rate": 9.360657894736842e-05, "loss": 0.3583, "step": 44314 }, { "epoch": 2.481520886997424, "grad_norm": 1.341428518295288, "learning_rate": 9.360631578947368e-05, "loss": 0.577, "step": 44315 }, { "epoch": 2.481576884309553, "grad_norm": 1.1175403594970703, "learning_rate": 9.360605263157896e-05, "loss": 0.3954, "step": 44316 }, { "epoch": 2.481632881621682, "grad_norm": 1.0590373277664185, "learning_rate": 9.360578947368422e-05, "loss": 0.3935, "step": 44317 }, { "epoch": 2.481688878933811, "grad_norm": 1.0659778118133545, "learning_rate": 9.360552631578948e-05, "loss": 0.3345, "step": 44318 }, { "epoch": 2.48174487624594, "grad_norm": 0.9667007327079773, "learning_rate": 9.360526315789474e-05, "loss": 0.375, "step": 44319 }, { "epoch": 2.481800873558069, "grad_norm": 1.5114572048187256, "learning_rate": 9.360500000000001e-05, "loss": 0.6055, "step": 44320 }, { "epoch": 2.4818568708701982, "grad_norm": 1.123246669769287, "learning_rate": 9.360473684210527e-05, "loss": 0.3809, "step": 44321 }, { "epoch": 2.4819128681823273, "grad_norm": 1.472427248954773, "learning_rate": 9.360447368421054e-05, "loss": 0.4051, "step": 44322 }, { "epoch": 2.4819688654944563, "grad_norm": 1.1469322443008423, "learning_rate": 9.360421052631579e-05, "loss": 0.3426, "step": 44323 }, { "epoch": 2.4820248628065853, "grad_norm": 1.1927857398986816, "learning_rate": 9.360394736842105e-05, "loss": 0.5035, "step": 44324 }, { "epoch": 2.4820808601187143, "grad_norm": 1.2909719944000244, "learning_rate": 9.360368421052632e-05, "loss": 0.4026, "step": 44325 }, { "epoch": 2.4821368574308433, "grad_norm": 1.3235340118408203, "learning_rate": 9.360342105263158e-05, "loss": 0.5212, "step": 44326 }, { "epoch": 2.4821928547429724, "grad_norm": 1.2182832956314087, "learning_rate": 9.360315789473685e-05, "loss": 0.5021, "step": 44327 }, { "epoch": 2.4822488520551014, "grad_norm": 1.1385419368743896, "learning_rate": 9.36028947368421e-05, "loss": 0.3244, "step": 44328 }, { "epoch": 2.4823048493672304, "grad_norm": 1.1761189699172974, "learning_rate": 9.360263157894737e-05, "loss": 0.4057, "step": 44329 }, { "epoch": 2.4823608466793594, "grad_norm": 1.1188102960586548, "learning_rate": 9.360236842105263e-05, "loss": 0.3156, "step": 44330 }, { "epoch": 2.4824168439914884, "grad_norm": 1.6709442138671875, "learning_rate": 9.360210526315791e-05, "loss": 0.3776, "step": 44331 }, { "epoch": 2.4824728413036175, "grad_norm": 1.4009681940078735, "learning_rate": 9.360184210526317e-05, "loss": 0.4567, "step": 44332 }, { "epoch": 2.4825288386157465, "grad_norm": 1.3581562042236328, "learning_rate": 9.360157894736843e-05, "loss": 0.3986, "step": 44333 }, { "epoch": 2.4825848359278755, "grad_norm": 1.220747947692871, "learning_rate": 9.360131578947369e-05, "loss": 0.4543, "step": 44334 }, { "epoch": 2.4826408332400045, "grad_norm": 1.236267328262329, "learning_rate": 9.360105263157896e-05, "loss": 0.4253, "step": 44335 }, { "epoch": 2.4826968305521335, "grad_norm": 1.0665589570999146, "learning_rate": 9.360078947368422e-05, "loss": 0.3402, "step": 44336 }, { "epoch": 2.4827528278642625, "grad_norm": 1.1624139547348022, "learning_rate": 9.360052631578948e-05, "loss": 0.3634, "step": 44337 }, { "epoch": 2.4828088251763916, "grad_norm": 1.1849192380905151, "learning_rate": 9.360026315789474e-05, "loss": 0.3726, "step": 44338 }, { "epoch": 2.4828648224885206, "grad_norm": 1.114926815032959, "learning_rate": 9.360000000000001e-05, "loss": 0.2879, "step": 44339 }, { "epoch": 2.4829208198006496, "grad_norm": 1.1868789196014404, "learning_rate": 9.359973684210527e-05, "loss": 0.4337, "step": 44340 }, { "epoch": 2.4829768171127786, "grad_norm": 2.4211010932922363, "learning_rate": 9.359947368421053e-05, "loss": 0.4841, "step": 44341 }, { "epoch": 2.4830328144249076, "grad_norm": 1.0909616947174072, "learning_rate": 9.359921052631579e-05, "loss": 0.3437, "step": 44342 }, { "epoch": 2.4830888117370367, "grad_norm": 1.056530475616455, "learning_rate": 9.359894736842105e-05, "loss": 0.3347, "step": 44343 }, { "epoch": 2.4831448090491657, "grad_norm": 1.4640955924987793, "learning_rate": 9.359868421052632e-05, "loss": 0.4662, "step": 44344 }, { "epoch": 2.4832008063612947, "grad_norm": 1.1382948160171509, "learning_rate": 9.359842105263158e-05, "loss": 0.4165, "step": 44345 }, { "epoch": 2.4832568036734237, "grad_norm": 2.338960647583008, "learning_rate": 9.359815789473684e-05, "loss": 0.3593, "step": 44346 }, { "epoch": 2.4833128009855527, "grad_norm": 1.2627880573272705, "learning_rate": 9.35978947368421e-05, "loss": 0.4196, "step": 44347 }, { "epoch": 2.4833687982976818, "grad_norm": 1.4457350969314575, "learning_rate": 9.359763157894738e-05, "loss": 0.4471, "step": 44348 }, { "epoch": 2.4834247956098108, "grad_norm": 1.1647441387176514, "learning_rate": 9.359736842105264e-05, "loss": 0.3835, "step": 44349 }, { "epoch": 2.48348079292194, "grad_norm": 1.1431159973144531, "learning_rate": 9.35971052631579e-05, "loss": 0.3496, "step": 44350 }, { "epoch": 2.483536790234069, "grad_norm": 1.9090896844863892, "learning_rate": 9.359684210526316e-05, "loss": 0.3803, "step": 44351 }, { "epoch": 2.483592787546198, "grad_norm": 1.4625990390777588, "learning_rate": 9.359657894736843e-05, "loss": 0.3953, "step": 44352 }, { "epoch": 2.483648784858327, "grad_norm": 1.1794495582580566, "learning_rate": 9.359631578947369e-05, "loss": 0.4426, "step": 44353 }, { "epoch": 2.483704782170456, "grad_norm": 1.1374478340148926, "learning_rate": 9.359605263157896e-05, "loss": 0.3319, "step": 44354 }, { "epoch": 2.483760779482585, "grad_norm": 1.2206623554229736, "learning_rate": 9.359578947368421e-05, "loss": 0.309, "step": 44355 }, { "epoch": 2.483816776794714, "grad_norm": 1.0533288717269897, "learning_rate": 9.359552631578948e-05, "loss": 0.3853, "step": 44356 }, { "epoch": 2.483872774106843, "grad_norm": 1.0481966733932495, "learning_rate": 9.359526315789474e-05, "loss": 0.3109, "step": 44357 }, { "epoch": 2.483928771418972, "grad_norm": 1.1098259687423706, "learning_rate": 9.3595e-05, "loss": 0.3584, "step": 44358 }, { "epoch": 2.483984768731101, "grad_norm": 1.2437944412231445, "learning_rate": 9.359473684210527e-05, "loss": 0.4526, "step": 44359 }, { "epoch": 2.48404076604323, "grad_norm": 1.125532627105713, "learning_rate": 9.359447368421052e-05, "loss": 0.3306, "step": 44360 }, { "epoch": 2.484096763355359, "grad_norm": 1.1381503343582153, "learning_rate": 9.35942105263158e-05, "loss": 0.342, "step": 44361 }, { "epoch": 2.484152760667488, "grad_norm": 1.5417871475219727, "learning_rate": 9.359394736842105e-05, "loss": 0.3742, "step": 44362 }, { "epoch": 2.484208757979617, "grad_norm": 1.2284289598464966, "learning_rate": 9.359368421052633e-05, "loss": 0.4808, "step": 44363 }, { "epoch": 2.484264755291746, "grad_norm": 1.0769321918487549, "learning_rate": 9.359342105263159e-05, "loss": 0.405, "step": 44364 }, { "epoch": 2.484320752603875, "grad_norm": 1.147834300994873, "learning_rate": 9.359315789473685e-05, "loss": 0.3571, "step": 44365 }, { "epoch": 2.484376749916004, "grad_norm": 1.1369510889053345, "learning_rate": 9.35928947368421e-05, "loss": 0.5292, "step": 44366 }, { "epoch": 2.484432747228133, "grad_norm": 1.2867659330368042, "learning_rate": 9.359263157894738e-05, "loss": 0.4032, "step": 44367 }, { "epoch": 2.484488744540262, "grad_norm": 1.1284868717193604, "learning_rate": 9.359236842105264e-05, "loss": 0.3291, "step": 44368 }, { "epoch": 2.484544741852391, "grad_norm": 1.228381633758545, "learning_rate": 9.35921052631579e-05, "loss": 0.4156, "step": 44369 }, { "epoch": 2.48460073916452, "grad_norm": 1.2863619327545166, "learning_rate": 9.359184210526316e-05, "loss": 0.4195, "step": 44370 }, { "epoch": 2.484656736476649, "grad_norm": 1.2525149583816528, "learning_rate": 9.359157894736843e-05, "loss": 0.5231, "step": 44371 }, { "epoch": 2.484712733788778, "grad_norm": 1.1156443357467651, "learning_rate": 9.359131578947369e-05, "loss": 0.4538, "step": 44372 }, { "epoch": 2.4847687311009072, "grad_norm": 1.1146354675292969, "learning_rate": 9.359105263157895e-05, "loss": 0.3729, "step": 44373 }, { "epoch": 2.4848247284130363, "grad_norm": 1.3225160837173462, "learning_rate": 9.359078947368421e-05, "loss": 0.4583, "step": 44374 }, { "epoch": 2.4848807257251653, "grad_norm": 1.6498847007751465, "learning_rate": 9.359052631578947e-05, "loss": 0.4204, "step": 44375 }, { "epoch": 2.4849367230372943, "grad_norm": 1.392510175704956, "learning_rate": 9.359026315789474e-05, "loss": 0.355, "step": 44376 }, { "epoch": 2.4849927203494233, "grad_norm": 44.35410690307617, "learning_rate": 9.359e-05, "loss": 0.4076, "step": 44377 }, { "epoch": 2.4850487176615523, "grad_norm": 1.1210507154464722, "learning_rate": 9.358973684210526e-05, "loss": 0.3591, "step": 44378 }, { "epoch": 2.4851047149736814, "grad_norm": 1.152748942375183, "learning_rate": 9.358947368421052e-05, "loss": 0.4241, "step": 44379 }, { "epoch": 2.4851607122858104, "grad_norm": 1.199645757675171, "learning_rate": 9.35892105263158e-05, "loss": 0.5081, "step": 44380 }, { "epoch": 2.4852167095979394, "grad_norm": 1.1402279138565063, "learning_rate": 9.358894736842106e-05, "loss": 0.4445, "step": 44381 }, { "epoch": 2.4852727069100684, "grad_norm": 1.1898512840270996, "learning_rate": 9.358868421052633e-05, "loss": 0.4172, "step": 44382 }, { "epoch": 2.4853287042221974, "grad_norm": 1.2633826732635498, "learning_rate": 9.358842105263158e-05, "loss": 0.3905, "step": 44383 }, { "epoch": 2.4853847015343264, "grad_norm": 1.1433947086334229, "learning_rate": 9.358815789473685e-05, "loss": 0.4654, "step": 44384 }, { "epoch": 2.4854406988464555, "grad_norm": 1.388482689857483, "learning_rate": 9.358789473684211e-05, "loss": 0.473, "step": 44385 }, { "epoch": 2.4854966961585845, "grad_norm": 1.2269740104675293, "learning_rate": 9.358763157894738e-05, "loss": 0.3988, "step": 44386 }, { "epoch": 2.4855526934707135, "grad_norm": 1.3058655261993408, "learning_rate": 9.358736842105264e-05, "loss": 0.4723, "step": 44387 }, { "epoch": 2.4856086907828425, "grad_norm": 1.2200459241867065, "learning_rate": 9.35871052631579e-05, "loss": 0.3827, "step": 44388 }, { "epoch": 2.4856646880949715, "grad_norm": 1.1663531064987183, "learning_rate": 9.358684210526316e-05, "loss": 0.4147, "step": 44389 }, { "epoch": 2.4857206854071006, "grad_norm": 1.1792305707931519, "learning_rate": 9.358657894736843e-05, "loss": 0.3393, "step": 44390 }, { "epoch": 2.4857766827192296, "grad_norm": 1.4000658988952637, "learning_rate": 9.35863157894737e-05, "loss": 0.4596, "step": 44391 }, { "epoch": 2.4858326800313586, "grad_norm": 1.0567255020141602, "learning_rate": 9.358605263157894e-05, "loss": 0.3018, "step": 44392 }, { "epoch": 2.4858886773434876, "grad_norm": 1.1478970050811768, "learning_rate": 9.358578947368421e-05, "loss": 0.5257, "step": 44393 }, { "epoch": 2.4859446746556166, "grad_norm": 1.1812740564346313, "learning_rate": 9.358552631578947e-05, "loss": 0.4569, "step": 44394 }, { "epoch": 2.4860006719677457, "grad_norm": 1.1951205730438232, "learning_rate": 9.358526315789475e-05, "loss": 0.4418, "step": 44395 }, { "epoch": 2.4860566692798747, "grad_norm": 1.0341664552688599, "learning_rate": 9.3585e-05, "loss": 0.4201, "step": 44396 }, { "epoch": 2.4861126665920037, "grad_norm": 1.0356801748275757, "learning_rate": 9.358473684210527e-05, "loss": 0.429, "step": 44397 }, { "epoch": 2.4861686639041327, "grad_norm": 1.1959925889968872, "learning_rate": 9.358447368421053e-05, "loss": 0.3728, "step": 44398 }, { "epoch": 2.4862246612162617, "grad_norm": 1.2789123058319092, "learning_rate": 9.35842105263158e-05, "loss": 0.3894, "step": 44399 }, { "epoch": 2.4862806585283908, "grad_norm": 1.1927591562271118, "learning_rate": 9.358394736842106e-05, "loss": 0.3223, "step": 44400 }, { "epoch": 2.4863366558405198, "grad_norm": 1.1825623512268066, "learning_rate": 9.358368421052632e-05, "loss": 0.4201, "step": 44401 }, { "epoch": 2.486392653152649, "grad_norm": 1.1208559274673462, "learning_rate": 9.358342105263158e-05, "loss": 0.4757, "step": 44402 }, { "epoch": 2.486448650464778, "grad_norm": 1.0040638446807861, "learning_rate": 9.358315789473685e-05, "loss": 0.4577, "step": 44403 }, { "epoch": 2.486504647776907, "grad_norm": 1.1077594757080078, "learning_rate": 9.358289473684211e-05, "loss": 0.4893, "step": 44404 }, { "epoch": 2.486560645089036, "grad_norm": 1.1230523586273193, "learning_rate": 9.358263157894737e-05, "loss": 0.3368, "step": 44405 }, { "epoch": 2.486616642401165, "grad_norm": 1.061773419380188, "learning_rate": 9.358236842105263e-05, "loss": 0.4284, "step": 44406 }, { "epoch": 2.486672639713294, "grad_norm": 1.078866720199585, "learning_rate": 9.35821052631579e-05, "loss": 0.3328, "step": 44407 }, { "epoch": 2.486728637025423, "grad_norm": 1.2729166746139526, "learning_rate": 9.358184210526316e-05, "loss": 0.4318, "step": 44408 }, { "epoch": 2.486784634337552, "grad_norm": 1.183813452720642, "learning_rate": 9.358157894736842e-05, "loss": 0.4281, "step": 44409 }, { "epoch": 2.486840631649681, "grad_norm": 1.2210932970046997, "learning_rate": 9.358131578947368e-05, "loss": 0.4753, "step": 44410 }, { "epoch": 2.48689662896181, "grad_norm": 1.0906797647476196, "learning_rate": 9.358105263157894e-05, "loss": 0.3994, "step": 44411 }, { "epoch": 2.486952626273939, "grad_norm": 1.1253408193588257, "learning_rate": 9.358078947368422e-05, "loss": 0.2988, "step": 44412 }, { "epoch": 2.487008623586068, "grad_norm": 1.3081265687942505, "learning_rate": 9.358052631578948e-05, "loss": 0.3908, "step": 44413 }, { "epoch": 2.487064620898197, "grad_norm": 1.0889675617218018, "learning_rate": 9.358026315789475e-05, "loss": 0.413, "step": 44414 }, { "epoch": 2.487120618210326, "grad_norm": 1.3809175491333008, "learning_rate": 9.358e-05, "loss": 0.45, "step": 44415 }, { "epoch": 2.487176615522455, "grad_norm": 1.1167020797729492, "learning_rate": 9.357973684210527e-05, "loss": 0.3413, "step": 44416 }, { "epoch": 2.487232612834584, "grad_norm": 1.3193563222885132, "learning_rate": 9.357947368421053e-05, "loss": 0.4888, "step": 44417 }, { "epoch": 2.487288610146713, "grad_norm": 1.1543149948120117, "learning_rate": 9.35792105263158e-05, "loss": 0.3694, "step": 44418 }, { "epoch": 2.487344607458842, "grad_norm": 1.077239990234375, "learning_rate": 9.357894736842106e-05, "loss": 0.3586, "step": 44419 }, { "epoch": 2.487400604770971, "grad_norm": 1.0811024904251099, "learning_rate": 9.357868421052632e-05, "loss": 0.3176, "step": 44420 }, { "epoch": 2.4874566020831, "grad_norm": 1.4588935375213623, "learning_rate": 9.357842105263158e-05, "loss": 0.4131, "step": 44421 }, { "epoch": 2.487512599395229, "grad_norm": 1.4578564167022705, "learning_rate": 9.357815789473685e-05, "loss": 0.4867, "step": 44422 }, { "epoch": 2.487568596707358, "grad_norm": 1.0522583723068237, "learning_rate": 9.357789473684211e-05, "loss": 0.3762, "step": 44423 }, { "epoch": 2.487624594019487, "grad_norm": 1.0071390867233276, "learning_rate": 9.357763157894737e-05, "loss": 0.2678, "step": 44424 }, { "epoch": 2.4876805913316162, "grad_norm": 1.1334043741226196, "learning_rate": 9.357736842105263e-05, "loss": 0.3766, "step": 44425 }, { "epoch": 2.4877365886437453, "grad_norm": 1.4347904920578003, "learning_rate": 9.357710526315789e-05, "loss": 0.4893, "step": 44426 }, { "epoch": 2.4877925859558743, "grad_norm": 1.2108877897262573, "learning_rate": 9.357684210526317e-05, "loss": 0.4045, "step": 44427 }, { "epoch": 2.4878485832680033, "grad_norm": 1.4925814867019653, "learning_rate": 9.357657894736843e-05, "loss": 0.3663, "step": 44428 }, { "epoch": 2.4879045805801323, "grad_norm": 1.2763339281082153, "learning_rate": 9.357631578947369e-05, "loss": 0.4363, "step": 44429 }, { "epoch": 2.4879605778922613, "grad_norm": 1.0145639181137085, "learning_rate": 9.357605263157895e-05, "loss": 0.4099, "step": 44430 }, { "epoch": 2.4880165752043903, "grad_norm": 1.1439982652664185, "learning_rate": 9.357578947368422e-05, "loss": 0.445, "step": 44431 }, { "epoch": 2.4880725725165194, "grad_norm": 1.1238441467285156, "learning_rate": 9.357552631578948e-05, "loss": 0.4545, "step": 44432 }, { "epoch": 2.4881285698286484, "grad_norm": 1.0151135921478271, "learning_rate": 9.357526315789474e-05, "loss": 0.4035, "step": 44433 }, { "epoch": 2.4881845671407774, "grad_norm": 1.2869915962219238, "learning_rate": 9.3575e-05, "loss": 0.6252, "step": 44434 }, { "epoch": 2.4882405644529064, "grad_norm": 1.1926716566085815, "learning_rate": 9.357473684210527e-05, "loss": 0.4034, "step": 44435 }, { "epoch": 2.4882965617650354, "grad_norm": 1.498978853225708, "learning_rate": 9.357447368421053e-05, "loss": 0.462, "step": 44436 }, { "epoch": 2.4883525590771645, "grad_norm": 1.2526978254318237, "learning_rate": 9.35742105263158e-05, "loss": 0.4297, "step": 44437 }, { "epoch": 2.4884085563892935, "grad_norm": 1.225989580154419, "learning_rate": 9.357394736842105e-05, "loss": 0.4025, "step": 44438 }, { "epoch": 2.4884645537014225, "grad_norm": 1.126609206199646, "learning_rate": 9.357368421052632e-05, "loss": 0.4147, "step": 44439 }, { "epoch": 2.4885205510135515, "grad_norm": 1.2837029695510864, "learning_rate": 9.357342105263158e-05, "loss": 0.4329, "step": 44440 }, { "epoch": 2.4885765483256805, "grad_norm": 1.1106743812561035, "learning_rate": 9.357315789473686e-05, "loss": 0.4723, "step": 44441 }, { "epoch": 2.4886325456378096, "grad_norm": 1.2484773397445679, "learning_rate": 9.35728947368421e-05, "loss": 0.386, "step": 44442 }, { "epoch": 2.4886885429499386, "grad_norm": 1.164198398590088, "learning_rate": 9.357263157894738e-05, "loss": 0.4549, "step": 44443 }, { "epoch": 2.4887445402620676, "grad_norm": 1.0683376789093018, "learning_rate": 9.357236842105264e-05, "loss": 0.4195, "step": 44444 }, { "epoch": 2.4888005375741966, "grad_norm": 1.1411572694778442, "learning_rate": 9.35721052631579e-05, "loss": 0.3504, "step": 44445 }, { "epoch": 2.4888565348863256, "grad_norm": 1.2010785341262817, "learning_rate": 9.357184210526317e-05, "loss": 0.444, "step": 44446 }, { "epoch": 2.4889125321984547, "grad_norm": 1.1552280187606812, "learning_rate": 9.357157894736841e-05, "loss": 0.4045, "step": 44447 }, { "epoch": 2.4889685295105837, "grad_norm": 1.1287347078323364, "learning_rate": 9.357131578947369e-05, "loss": 0.4258, "step": 44448 }, { "epoch": 2.4890245268227127, "grad_norm": 1.2057421207427979, "learning_rate": 9.357105263157895e-05, "loss": 0.3446, "step": 44449 }, { "epoch": 2.4890805241348417, "grad_norm": 1.0982046127319336, "learning_rate": 9.357078947368422e-05, "loss": 0.2673, "step": 44450 }, { "epoch": 2.4891365214469707, "grad_norm": 1.7271478176116943, "learning_rate": 9.357052631578948e-05, "loss": 0.4236, "step": 44451 }, { "epoch": 2.4891925187590997, "grad_norm": 1.3551247119903564, "learning_rate": 9.357026315789474e-05, "loss": 0.4806, "step": 44452 }, { "epoch": 2.4892485160712288, "grad_norm": 1.125567078590393, "learning_rate": 9.357e-05, "loss": 0.357, "step": 44453 }, { "epoch": 2.489304513383358, "grad_norm": 1.27196204662323, "learning_rate": 9.356973684210527e-05, "loss": 0.4471, "step": 44454 }, { "epoch": 2.489360510695487, "grad_norm": 1.3074246644973755, "learning_rate": 9.356947368421053e-05, "loss": 0.3845, "step": 44455 }, { "epoch": 2.489416508007616, "grad_norm": 1.0610613822937012, "learning_rate": 9.356921052631579e-05, "loss": 0.322, "step": 44456 }, { "epoch": 2.489472505319745, "grad_norm": 1.4347412586212158, "learning_rate": 9.356894736842105e-05, "loss": 0.4696, "step": 44457 }, { "epoch": 2.489528502631874, "grad_norm": 1.164192795753479, "learning_rate": 9.356868421052633e-05, "loss": 0.5126, "step": 44458 }, { "epoch": 2.4895844999440024, "grad_norm": 1.386047601699829, "learning_rate": 9.356842105263159e-05, "loss": 0.4049, "step": 44459 }, { "epoch": 2.489640497256132, "grad_norm": 1.360996127128601, "learning_rate": 9.356815789473685e-05, "loss": 0.3766, "step": 44460 }, { "epoch": 2.4896964945682605, "grad_norm": 0.997880220413208, "learning_rate": 9.35678947368421e-05, "loss": 0.4326, "step": 44461 }, { "epoch": 2.48975249188039, "grad_norm": 1.3211970329284668, "learning_rate": 9.356763157894736e-05, "loss": 0.4903, "step": 44462 }, { "epoch": 2.4898084891925185, "grad_norm": 1.4319353103637695, "learning_rate": 9.356736842105264e-05, "loss": 0.4173, "step": 44463 }, { "epoch": 2.489864486504648, "grad_norm": 0.9493651390075684, "learning_rate": 9.35671052631579e-05, "loss": 0.3284, "step": 44464 }, { "epoch": 2.4899204838167766, "grad_norm": 1.0870492458343506, "learning_rate": 9.356684210526316e-05, "loss": 0.4141, "step": 44465 }, { "epoch": 2.489976481128906, "grad_norm": 1.049599051475525, "learning_rate": 9.356657894736842e-05, "loss": 0.3829, "step": 44466 }, { "epoch": 2.4900324784410346, "grad_norm": 1.1602377891540527, "learning_rate": 9.356631578947369e-05, "loss": 0.3894, "step": 44467 }, { "epoch": 2.490088475753164, "grad_norm": 1.1313836574554443, "learning_rate": 9.356605263157895e-05, "loss": 0.3796, "step": 44468 }, { "epoch": 2.4901444730652926, "grad_norm": 1.1797488927841187, "learning_rate": 9.356578947368422e-05, "loss": 0.33, "step": 44469 }, { "epoch": 2.490200470377422, "grad_norm": 1.3110485076904297, "learning_rate": 9.356552631578947e-05, "loss": 0.3458, "step": 44470 }, { "epoch": 2.4902564676895507, "grad_norm": 1.1925069093704224, "learning_rate": 9.356526315789474e-05, "loss": 0.4582, "step": 44471 }, { "epoch": 2.49031246500168, "grad_norm": 1.1130527257919312, "learning_rate": 9.3565e-05, "loss": 0.4093, "step": 44472 }, { "epoch": 2.4903684623138087, "grad_norm": 1.208340048789978, "learning_rate": 9.356473684210528e-05, "loss": 0.3557, "step": 44473 }, { "epoch": 2.490424459625938, "grad_norm": 1.1466593742370605, "learning_rate": 9.356447368421054e-05, "loss": 0.3907, "step": 44474 }, { "epoch": 2.4904804569380667, "grad_norm": 1.4521417617797852, "learning_rate": 9.35642105263158e-05, "loss": 0.4391, "step": 44475 }, { "epoch": 2.490536454250196, "grad_norm": 1.3337092399597168, "learning_rate": 9.356394736842106e-05, "loss": 0.4372, "step": 44476 }, { "epoch": 2.490592451562325, "grad_norm": 1.2849644422531128, "learning_rate": 9.356368421052633e-05, "loss": 0.3156, "step": 44477 }, { "epoch": 2.4906484488744542, "grad_norm": 1.3593333959579468, "learning_rate": 9.356342105263159e-05, "loss": 0.4368, "step": 44478 }, { "epoch": 2.490704446186583, "grad_norm": 1.2579338550567627, "learning_rate": 9.356315789473685e-05, "loss": 0.3806, "step": 44479 }, { "epoch": 2.4907604434987123, "grad_norm": 1.108458161354065, "learning_rate": 9.356289473684211e-05, "loss": 0.4546, "step": 44480 }, { "epoch": 2.490816440810841, "grad_norm": 1.0899685621261597, "learning_rate": 9.356263157894737e-05, "loss": 0.3982, "step": 44481 }, { "epoch": 2.4908724381229703, "grad_norm": 1.3837579488754272, "learning_rate": 9.356236842105264e-05, "loss": 0.4146, "step": 44482 }, { "epoch": 2.490928435435099, "grad_norm": 1.0728747844696045, "learning_rate": 9.35621052631579e-05, "loss": 0.3834, "step": 44483 }, { "epoch": 2.4909844327472284, "grad_norm": 1.3314669132232666, "learning_rate": 9.356184210526316e-05, "loss": 0.4853, "step": 44484 }, { "epoch": 2.491040430059357, "grad_norm": 1.450663447380066, "learning_rate": 9.356157894736842e-05, "loss": 0.3598, "step": 44485 }, { "epoch": 2.4910964273714864, "grad_norm": 1.1993194818496704, "learning_rate": 9.35613157894737e-05, "loss": 0.4042, "step": 44486 }, { "epoch": 2.491152424683615, "grad_norm": 1.5242265462875366, "learning_rate": 9.356105263157895e-05, "loss": 0.5459, "step": 44487 }, { "epoch": 2.4912084219957444, "grad_norm": 1.3309662342071533, "learning_rate": 9.356078947368421e-05, "loss": 0.565, "step": 44488 }, { "epoch": 2.491264419307873, "grad_norm": 1.1130262613296509, "learning_rate": 9.356052631578947e-05, "loss": 0.3753, "step": 44489 }, { "epoch": 2.4913204166200025, "grad_norm": 1.0250605344772339, "learning_rate": 9.356026315789475e-05, "loss": 0.3533, "step": 44490 }, { "epoch": 2.491376413932131, "grad_norm": 1.19575035572052, "learning_rate": 9.356e-05, "loss": 0.302, "step": 44491 }, { "epoch": 2.4914324112442605, "grad_norm": 1.419180989265442, "learning_rate": 9.355973684210528e-05, "loss": 0.4853, "step": 44492 }, { "epoch": 2.491488408556389, "grad_norm": 1.2002583742141724, "learning_rate": 9.355947368421052e-05, "loss": 0.3595, "step": 44493 }, { "epoch": 2.4915444058685186, "grad_norm": 1.1833614110946655, "learning_rate": 9.35592105263158e-05, "loss": 0.3289, "step": 44494 }, { "epoch": 2.491600403180647, "grad_norm": 1.2692651748657227, "learning_rate": 9.355894736842106e-05, "loss": 0.4062, "step": 44495 }, { "epoch": 2.4916564004927766, "grad_norm": 0.9845141172409058, "learning_rate": 9.355868421052632e-05, "loss": 0.3014, "step": 44496 }, { "epoch": 2.491712397804905, "grad_norm": 7.51622200012207, "learning_rate": 9.355842105263158e-05, "loss": 0.4298, "step": 44497 }, { "epoch": 2.4917683951170346, "grad_norm": 1.1542017459869385, "learning_rate": 9.355815789473684e-05, "loss": 0.3768, "step": 44498 }, { "epoch": 2.491824392429163, "grad_norm": 1.2763515710830688, "learning_rate": 9.355789473684211e-05, "loss": 0.3632, "step": 44499 }, { "epoch": 2.4918803897412927, "grad_norm": 1.0262624025344849, "learning_rate": 9.355763157894737e-05, "loss": 0.3591, "step": 44500 }, { "epoch": 2.4919363870534212, "grad_norm": 1.1824690103530884, "learning_rate": 9.355736842105264e-05, "loss": 0.3901, "step": 44501 }, { "epoch": 2.4919923843655503, "grad_norm": 1.105539321899414, "learning_rate": 9.355710526315789e-05, "loss": 0.355, "step": 44502 }, { "epoch": 2.4920483816776793, "grad_norm": 1.0151619911193848, "learning_rate": 9.355684210526316e-05, "loss": 0.358, "step": 44503 }, { "epoch": 2.4921043789898083, "grad_norm": 1.5544389486312866, "learning_rate": 9.355657894736842e-05, "loss": 0.4465, "step": 44504 }, { "epoch": 2.4921603763019373, "grad_norm": 1.2862645387649536, "learning_rate": 9.35563157894737e-05, "loss": 0.399, "step": 44505 }, { "epoch": 2.4922163736140663, "grad_norm": 1.3210413455963135, "learning_rate": 9.355605263157896e-05, "loss": 0.6167, "step": 44506 }, { "epoch": 2.4922723709261954, "grad_norm": 1.354833960533142, "learning_rate": 9.355578947368422e-05, "loss": 0.5499, "step": 44507 }, { "epoch": 2.4923283682383244, "grad_norm": 1.7716625928878784, "learning_rate": 9.355552631578948e-05, "loss": 0.3527, "step": 44508 }, { "epoch": 2.4923843655504534, "grad_norm": 1.17331063747406, "learning_rate": 9.355526315789475e-05, "loss": 0.5329, "step": 44509 }, { "epoch": 2.4924403628625824, "grad_norm": 1.0916378498077393, "learning_rate": 9.355500000000001e-05, "loss": 0.3668, "step": 44510 }, { "epoch": 2.4924963601747114, "grad_norm": 1.2058136463165283, "learning_rate": 9.355473684210527e-05, "loss": 0.3862, "step": 44511 }, { "epoch": 2.4925523574868405, "grad_norm": 0.986126720905304, "learning_rate": 9.355447368421053e-05, "loss": 0.4441, "step": 44512 }, { "epoch": 2.4926083547989695, "grad_norm": 1.314240574836731, "learning_rate": 9.355421052631579e-05, "loss": 0.5099, "step": 44513 }, { "epoch": 2.4926643521110985, "grad_norm": 1.1080907583236694, "learning_rate": 9.355394736842106e-05, "loss": 0.4354, "step": 44514 }, { "epoch": 2.4927203494232275, "grad_norm": 1.100200891494751, "learning_rate": 9.355368421052632e-05, "loss": 0.3223, "step": 44515 }, { "epoch": 2.4927763467353565, "grad_norm": 1.2496306896209717, "learning_rate": 9.355342105263158e-05, "loss": 0.3977, "step": 44516 }, { "epoch": 2.4928323440474855, "grad_norm": 1.0086182355880737, "learning_rate": 9.355315789473684e-05, "loss": 0.3195, "step": 44517 }, { "epoch": 2.4928883413596146, "grad_norm": 1.2935634851455688, "learning_rate": 9.355289473684211e-05, "loss": 0.2798, "step": 44518 }, { "epoch": 2.4929443386717436, "grad_norm": 1.1985141038894653, "learning_rate": 9.355263157894737e-05, "loss": 0.4648, "step": 44519 }, { "epoch": 2.4930003359838726, "grad_norm": 1.1259708404541016, "learning_rate": 9.355236842105263e-05, "loss": 0.4877, "step": 44520 }, { "epoch": 2.4930563332960016, "grad_norm": 1.1808613538742065, "learning_rate": 9.355210526315789e-05, "loss": 0.4061, "step": 44521 }, { "epoch": 2.4931123306081306, "grad_norm": 1.2323607206344604, "learning_rate": 9.355184210526317e-05, "loss": 0.4406, "step": 44522 }, { "epoch": 2.4931683279202597, "grad_norm": 7.535837173461914, "learning_rate": 9.355157894736843e-05, "loss": 0.5043, "step": 44523 }, { "epoch": 2.4932243252323887, "grad_norm": 1.151171088218689, "learning_rate": 9.35513157894737e-05, "loss": 0.3319, "step": 44524 }, { "epoch": 2.4932803225445177, "grad_norm": 1.2301807403564453, "learning_rate": 9.355105263157894e-05, "loss": 0.505, "step": 44525 }, { "epoch": 2.4933363198566467, "grad_norm": 1.2326233386993408, "learning_rate": 9.355078947368422e-05, "loss": 0.3669, "step": 44526 }, { "epoch": 2.4933923171687757, "grad_norm": 1.208547830581665, "learning_rate": 9.355052631578948e-05, "loss": 0.3658, "step": 44527 }, { "epoch": 2.4934483144809048, "grad_norm": 1.1443120241165161, "learning_rate": 9.355026315789475e-05, "loss": 0.6528, "step": 44528 }, { "epoch": 2.493504311793034, "grad_norm": 1.2341182231903076, "learning_rate": 9.355000000000001e-05, "loss": 0.3968, "step": 44529 }, { "epoch": 2.493560309105163, "grad_norm": 1.1549293994903564, "learning_rate": 9.354973684210526e-05, "loss": 0.3434, "step": 44530 }, { "epoch": 2.493616306417292, "grad_norm": 1.357999563217163, "learning_rate": 9.354947368421053e-05, "loss": 0.4809, "step": 44531 }, { "epoch": 2.493672303729421, "grad_norm": 1.1309576034545898, "learning_rate": 9.354921052631579e-05, "loss": 0.3897, "step": 44532 }, { "epoch": 2.49372830104155, "grad_norm": 1.0351967811584473, "learning_rate": 9.354894736842106e-05, "loss": 0.4121, "step": 44533 }, { "epoch": 2.493784298353679, "grad_norm": 1.3602430820465088, "learning_rate": 9.354868421052632e-05, "loss": 0.5525, "step": 44534 }, { "epoch": 2.493840295665808, "grad_norm": 1.6055549383163452, "learning_rate": 9.354842105263158e-05, "loss": 0.4374, "step": 44535 }, { "epoch": 2.493896292977937, "grad_norm": 1.559765100479126, "learning_rate": 9.354815789473684e-05, "loss": 0.3399, "step": 44536 }, { "epoch": 2.493952290290066, "grad_norm": 1.170188069343567, "learning_rate": 9.354789473684212e-05, "loss": 0.3473, "step": 44537 }, { "epoch": 2.494008287602195, "grad_norm": 1.37400484085083, "learning_rate": 9.354763157894738e-05, "loss": 0.5714, "step": 44538 }, { "epoch": 2.494064284914324, "grad_norm": 1.215146541595459, "learning_rate": 9.354736842105264e-05, "loss": 0.3237, "step": 44539 }, { "epoch": 2.494120282226453, "grad_norm": 1.2419079542160034, "learning_rate": 9.35471052631579e-05, "loss": 0.5179, "step": 44540 }, { "epoch": 2.494176279538582, "grad_norm": 1.1104885339736938, "learning_rate": 9.354684210526317e-05, "loss": 0.3554, "step": 44541 }, { "epoch": 2.494232276850711, "grad_norm": 1.4985460042953491, "learning_rate": 9.354657894736843e-05, "loss": 0.512, "step": 44542 }, { "epoch": 2.49428827416284, "grad_norm": 1.1210670471191406, "learning_rate": 9.354631578947369e-05, "loss": 0.3436, "step": 44543 }, { "epoch": 2.494344271474969, "grad_norm": 1.16505765914917, "learning_rate": 9.354605263157895e-05, "loss": 0.3715, "step": 44544 }, { "epoch": 2.494400268787098, "grad_norm": 1.1544876098632812, "learning_rate": 9.354578947368422e-05, "loss": 0.331, "step": 44545 }, { "epoch": 2.494456266099227, "grad_norm": 1.709804892539978, "learning_rate": 9.354552631578948e-05, "loss": 0.544, "step": 44546 }, { "epoch": 2.494512263411356, "grad_norm": 0.9656328558921814, "learning_rate": 9.354526315789474e-05, "loss": 0.3138, "step": 44547 }, { "epoch": 2.494568260723485, "grad_norm": 1.2797094583511353, "learning_rate": 9.3545e-05, "loss": 0.4767, "step": 44548 }, { "epoch": 2.494624258035614, "grad_norm": 1.151874303817749, "learning_rate": 9.354473684210526e-05, "loss": 0.3544, "step": 44549 }, { "epoch": 2.494680255347743, "grad_norm": 1.486685037612915, "learning_rate": 9.354447368421053e-05, "loss": 0.3793, "step": 44550 }, { "epoch": 2.494736252659872, "grad_norm": 1.0361285209655762, "learning_rate": 9.354421052631579e-05, "loss": 0.4764, "step": 44551 }, { "epoch": 2.494792249972001, "grad_norm": 1.218870997428894, "learning_rate": 9.354394736842105e-05, "loss": 0.4632, "step": 44552 }, { "epoch": 2.4948482472841302, "grad_norm": 1.3622161149978638, "learning_rate": 9.354368421052631e-05, "loss": 0.3988, "step": 44553 }, { "epoch": 2.4949042445962593, "grad_norm": 1.3340386152267456, "learning_rate": 9.354342105263159e-05, "loss": 0.4068, "step": 44554 }, { "epoch": 2.4949602419083883, "grad_norm": 1.1575779914855957, "learning_rate": 9.354315789473684e-05, "loss": 0.398, "step": 44555 }, { "epoch": 2.4950162392205173, "grad_norm": 1.2215622663497925, "learning_rate": 9.354289473684212e-05, "loss": 0.4176, "step": 44556 }, { "epoch": 2.4950722365326463, "grad_norm": 1.2361446619033813, "learning_rate": 9.354263157894736e-05, "loss": 0.4508, "step": 44557 }, { "epoch": 2.4951282338447753, "grad_norm": 1.2982866764068604, "learning_rate": 9.354236842105264e-05, "loss": 0.411, "step": 44558 }, { "epoch": 2.4951842311569044, "grad_norm": 1.1623055934906006, "learning_rate": 9.35421052631579e-05, "loss": 0.3802, "step": 44559 }, { "epoch": 2.4952402284690334, "grad_norm": 1.106780767440796, "learning_rate": 9.354184210526317e-05, "loss": 0.301, "step": 44560 }, { "epoch": 2.4952962257811624, "grad_norm": 1.0043548345565796, "learning_rate": 9.354157894736843e-05, "loss": 0.3523, "step": 44561 }, { "epoch": 2.4953522230932914, "grad_norm": 1.3794631958007812, "learning_rate": 9.354131578947369e-05, "loss": 0.3433, "step": 44562 }, { "epoch": 2.4954082204054204, "grad_norm": 1.2128015756607056, "learning_rate": 9.354105263157895e-05, "loss": 0.477, "step": 44563 }, { "epoch": 2.4954642177175494, "grad_norm": 1.1903910636901855, "learning_rate": 9.354078947368422e-05, "loss": 0.3965, "step": 44564 }, { "epoch": 2.4955202150296785, "grad_norm": 1.2743923664093018, "learning_rate": 9.354052631578948e-05, "loss": 0.5635, "step": 44565 }, { "epoch": 2.4955762123418075, "grad_norm": 1.1189897060394287, "learning_rate": 9.354026315789474e-05, "loss": 0.3201, "step": 44566 }, { "epoch": 2.4956322096539365, "grad_norm": 1.3290870189666748, "learning_rate": 9.354e-05, "loss": 0.4668, "step": 44567 }, { "epoch": 2.4956882069660655, "grad_norm": 0.9568538069725037, "learning_rate": 9.353973684210526e-05, "loss": 0.3379, "step": 44568 }, { "epoch": 2.4957442042781945, "grad_norm": 1.2523893117904663, "learning_rate": 9.353947368421054e-05, "loss": 0.3831, "step": 44569 }, { "epoch": 2.4958002015903236, "grad_norm": 1.2217408418655396, "learning_rate": 9.35392105263158e-05, "loss": 0.4256, "step": 44570 }, { "epoch": 2.4958561989024526, "grad_norm": 1.054660439491272, "learning_rate": 9.353894736842105e-05, "loss": 0.4205, "step": 44571 }, { "epoch": 2.4959121962145816, "grad_norm": 1.337009072303772, "learning_rate": 9.353868421052631e-05, "loss": 0.5731, "step": 44572 }, { "epoch": 2.4959681935267106, "grad_norm": 1.045000433921814, "learning_rate": 9.353842105263159e-05, "loss": 0.3476, "step": 44573 }, { "epoch": 2.4960241908388396, "grad_norm": 1.155574917793274, "learning_rate": 9.353815789473685e-05, "loss": 0.3721, "step": 44574 }, { "epoch": 2.4960801881509687, "grad_norm": 1.5082155466079712, "learning_rate": 9.353789473684211e-05, "loss": 0.3882, "step": 44575 }, { "epoch": 2.4961361854630977, "grad_norm": 1.3360319137573242, "learning_rate": 9.353763157894737e-05, "loss": 0.4484, "step": 44576 }, { "epoch": 2.4961921827752267, "grad_norm": 1.323848009109497, "learning_rate": 9.353736842105264e-05, "loss": 0.4051, "step": 44577 }, { "epoch": 2.4962481800873557, "grad_norm": 9.819396018981934, "learning_rate": 9.35371052631579e-05, "loss": 0.4802, "step": 44578 }, { "epoch": 2.4963041773994847, "grad_norm": 1.1856976747512817, "learning_rate": 9.353684210526317e-05, "loss": 0.4157, "step": 44579 }, { "epoch": 2.4963601747116138, "grad_norm": 1.045804500579834, "learning_rate": 9.353657894736842e-05, "loss": 0.3884, "step": 44580 }, { "epoch": 2.4964161720237428, "grad_norm": 1.268300175666809, "learning_rate": 9.353631578947369e-05, "loss": 0.503, "step": 44581 }, { "epoch": 2.496472169335872, "grad_norm": 1.4763926267623901, "learning_rate": 9.353605263157895e-05, "loss": 0.5203, "step": 44582 }, { "epoch": 2.496528166648001, "grad_norm": 1.544094204902649, "learning_rate": 9.353578947368421e-05, "loss": 0.3919, "step": 44583 }, { "epoch": 2.49658416396013, "grad_norm": 1.068217158317566, "learning_rate": 9.353552631578949e-05, "loss": 0.3457, "step": 44584 }, { "epoch": 2.496640161272259, "grad_norm": 1.3168879747390747, "learning_rate": 9.353526315789473e-05, "loss": 0.3624, "step": 44585 }, { "epoch": 2.496696158584388, "grad_norm": 1.2534552812576294, "learning_rate": 9.3535e-05, "loss": 0.3504, "step": 44586 }, { "epoch": 2.496752155896517, "grad_norm": 1.0770039558410645, "learning_rate": 9.353473684210526e-05, "loss": 0.4354, "step": 44587 }, { "epoch": 2.496808153208646, "grad_norm": 1.4050458669662476, "learning_rate": 9.353447368421054e-05, "loss": 0.4579, "step": 44588 }, { "epoch": 2.496864150520775, "grad_norm": 1.3040722608566284, "learning_rate": 9.35342105263158e-05, "loss": 0.4504, "step": 44589 }, { "epoch": 2.496920147832904, "grad_norm": 1.0375791788101196, "learning_rate": 9.353394736842106e-05, "loss": 0.4131, "step": 44590 }, { "epoch": 2.496976145145033, "grad_norm": 1.1411267518997192, "learning_rate": 9.353368421052632e-05, "loss": 0.38, "step": 44591 }, { "epoch": 2.497032142457162, "grad_norm": 1.0989536046981812, "learning_rate": 9.353342105263159e-05, "loss": 0.4544, "step": 44592 }, { "epoch": 2.497088139769291, "grad_norm": 1.0053738355636597, "learning_rate": 9.353315789473685e-05, "loss": 0.367, "step": 44593 }, { "epoch": 2.49714413708142, "grad_norm": 1.3539074659347534, "learning_rate": 9.353289473684211e-05, "loss": 0.4807, "step": 44594 }, { "epoch": 2.497200134393549, "grad_norm": 1.2240935564041138, "learning_rate": 9.353263157894737e-05, "loss": 0.5392, "step": 44595 }, { "epoch": 2.497256131705678, "grad_norm": 1.0673106908798218, "learning_rate": 9.353236842105264e-05, "loss": 0.3482, "step": 44596 }, { "epoch": 2.497312129017807, "grad_norm": 1.2816349267959595, "learning_rate": 9.35321052631579e-05, "loss": 0.6039, "step": 44597 }, { "epoch": 2.497368126329936, "grad_norm": 1.082727074623108, "learning_rate": 9.353184210526316e-05, "loss": 0.4048, "step": 44598 }, { "epoch": 2.497424123642065, "grad_norm": 1.451478362083435, "learning_rate": 9.353157894736842e-05, "loss": 0.3925, "step": 44599 }, { "epoch": 2.497480120954194, "grad_norm": 1.020350694656372, "learning_rate": 9.353131578947368e-05, "loss": 0.325, "step": 44600 }, { "epoch": 2.497536118266323, "grad_norm": 1.119593858718872, "learning_rate": 9.353105263157895e-05, "loss": 0.3799, "step": 44601 }, { "epoch": 2.497592115578452, "grad_norm": 1.174675464630127, "learning_rate": 9.353078947368421e-05, "loss": 0.3132, "step": 44602 }, { "epoch": 2.497648112890581, "grad_norm": 1.2800489664077759, "learning_rate": 9.353052631578947e-05, "loss": 0.4283, "step": 44603 }, { "epoch": 2.49770411020271, "grad_norm": 1.4173389673233032, "learning_rate": 9.353026315789473e-05, "loss": 0.4295, "step": 44604 }, { "epoch": 2.4977601075148392, "grad_norm": 1.1316266059875488, "learning_rate": 9.353000000000001e-05, "loss": 0.4051, "step": 44605 }, { "epoch": 2.4978161048269683, "grad_norm": 1.1593562364578247, "learning_rate": 9.352973684210527e-05, "loss": 0.3948, "step": 44606 }, { "epoch": 2.4978721021390973, "grad_norm": 1.2337074279785156, "learning_rate": 9.352947368421053e-05, "loss": 0.4017, "step": 44607 }, { "epoch": 2.4979280994512263, "grad_norm": 0.9460651874542236, "learning_rate": 9.352921052631579e-05, "loss": 0.4544, "step": 44608 }, { "epoch": 2.4979840967633553, "grad_norm": 1.1809152364730835, "learning_rate": 9.352894736842106e-05, "loss": 0.3425, "step": 44609 }, { "epoch": 2.4980400940754843, "grad_norm": 1.2045084238052368, "learning_rate": 9.352868421052632e-05, "loss": 0.3817, "step": 44610 }, { "epoch": 2.4980960913876133, "grad_norm": 1.1365395784378052, "learning_rate": 9.352842105263159e-05, "loss": 0.3534, "step": 44611 }, { "epoch": 2.4981520886997424, "grad_norm": 1.476328730583191, "learning_rate": 9.352815789473684e-05, "loss": 0.4412, "step": 44612 }, { "epoch": 2.4982080860118714, "grad_norm": 1.2751727104187012, "learning_rate": 9.352789473684211e-05, "loss": 0.439, "step": 44613 }, { "epoch": 2.4982640833240004, "grad_norm": 1.4420068264007568, "learning_rate": 9.352763157894737e-05, "loss": 0.6271, "step": 44614 }, { "epoch": 2.4983200806361294, "grad_norm": 1.2181980609893799, "learning_rate": 9.352736842105265e-05, "loss": 0.3874, "step": 44615 }, { "epoch": 2.4983760779482584, "grad_norm": 1.0598855018615723, "learning_rate": 9.35271052631579e-05, "loss": 0.3678, "step": 44616 }, { "epoch": 2.4984320752603875, "grad_norm": 1.1201733350753784, "learning_rate": 9.352684210526315e-05, "loss": 0.4082, "step": 44617 }, { "epoch": 2.4984880725725165, "grad_norm": 1.218275547027588, "learning_rate": 9.352657894736842e-05, "loss": 0.3683, "step": 44618 }, { "epoch": 2.4985440698846455, "grad_norm": 1.186571717262268, "learning_rate": 9.352631578947368e-05, "loss": 0.4404, "step": 44619 }, { "epoch": 2.4986000671967745, "grad_norm": 1.2111152410507202, "learning_rate": 9.352605263157896e-05, "loss": 0.3227, "step": 44620 }, { "epoch": 2.4986560645089035, "grad_norm": 1.0418857336044312, "learning_rate": 9.352578947368422e-05, "loss": 0.3649, "step": 44621 }, { "epoch": 2.4987120618210326, "grad_norm": 1.175389051437378, "learning_rate": 9.352552631578948e-05, "loss": 0.381, "step": 44622 }, { "epoch": 2.4987680591331616, "grad_norm": 1.2156866788864136, "learning_rate": 9.352526315789474e-05, "loss": 0.3367, "step": 44623 }, { "epoch": 2.4988240564452906, "grad_norm": 1.110910415649414, "learning_rate": 9.352500000000001e-05, "loss": 0.427, "step": 44624 }, { "epoch": 2.4988800537574196, "grad_norm": 1.2534229755401611, "learning_rate": 9.352473684210527e-05, "loss": 0.3823, "step": 44625 }, { "epoch": 2.4989360510695486, "grad_norm": 1.2188228368759155, "learning_rate": 9.352447368421053e-05, "loss": 0.4874, "step": 44626 }, { "epoch": 2.4989920483816777, "grad_norm": 1.1332389116287231, "learning_rate": 9.352421052631579e-05, "loss": 0.38, "step": 44627 }, { "epoch": 2.4990480456938067, "grad_norm": 1.098968505859375, "learning_rate": 9.352394736842106e-05, "loss": 0.5178, "step": 44628 }, { "epoch": 2.4991040430059357, "grad_norm": 1.2652230262756348, "learning_rate": 9.352368421052632e-05, "loss": 0.4355, "step": 44629 }, { "epoch": 2.4991600403180647, "grad_norm": 1.156794786453247, "learning_rate": 9.352342105263158e-05, "loss": 0.5021, "step": 44630 }, { "epoch": 2.4992160376301937, "grad_norm": 1.033754825592041, "learning_rate": 9.352315789473684e-05, "loss": 0.3085, "step": 44631 }, { "epoch": 2.4992720349423228, "grad_norm": 1.0400224924087524, "learning_rate": 9.352289473684211e-05, "loss": 0.313, "step": 44632 }, { "epoch": 2.4993280322544518, "grad_norm": 1.517809510231018, "learning_rate": 9.352263157894737e-05, "loss": 0.3092, "step": 44633 }, { "epoch": 2.499384029566581, "grad_norm": 1.084779977798462, "learning_rate": 9.352236842105263e-05, "loss": 0.3796, "step": 44634 }, { "epoch": 2.49944002687871, "grad_norm": 0.949918270111084, "learning_rate": 9.35221052631579e-05, "loss": 0.3283, "step": 44635 }, { "epoch": 2.499496024190839, "grad_norm": 1.3239248991012573, "learning_rate": 9.352184210526315e-05, "loss": 0.494, "step": 44636 }, { "epoch": 2.499552021502968, "grad_norm": 19.85563850402832, "learning_rate": 9.352157894736843e-05, "loss": 0.4355, "step": 44637 }, { "epoch": 2.499608018815097, "grad_norm": 1.2303223609924316, "learning_rate": 9.352131578947369e-05, "loss": 0.615, "step": 44638 }, { "epoch": 2.499664016127226, "grad_norm": 1.1118488311767578, "learning_rate": 9.352105263157896e-05, "loss": 0.4323, "step": 44639 }, { "epoch": 2.499720013439355, "grad_norm": 1.187358021736145, "learning_rate": 9.35207894736842e-05, "loss": 0.3848, "step": 44640 }, { "epoch": 2.499776010751484, "grad_norm": 1.2090603113174438, "learning_rate": 9.352052631578948e-05, "loss": 0.3726, "step": 44641 }, { "epoch": 2.499832008063613, "grad_norm": 1.228467583656311, "learning_rate": 9.352026315789474e-05, "loss": 0.3517, "step": 44642 }, { "epoch": 2.499888005375742, "grad_norm": 1.5968700647354126, "learning_rate": 9.352000000000001e-05, "loss": 0.379, "step": 44643 }, { "epoch": 2.499944002687871, "grad_norm": 1.2593424320220947, "learning_rate": 9.351973684210526e-05, "loss": 0.4138, "step": 44644 }, { "epoch": 2.5, "grad_norm": 1.3016573190689087, "learning_rate": 9.351947368421053e-05, "loss": 0.3671, "step": 44645 }, { "epoch": 2.500055997312129, "grad_norm": 1.1840263605117798, "learning_rate": 9.351921052631579e-05, "loss": 0.4773, "step": 44646 }, { "epoch": 2.500111994624258, "grad_norm": 1.2678768634796143, "learning_rate": 9.351894736842107e-05, "loss": 0.3908, "step": 44647 }, { "epoch": 2.500167991936387, "grad_norm": 0.993491530418396, "learning_rate": 9.351868421052632e-05, "loss": 0.3165, "step": 44648 }, { "epoch": 2.500223989248516, "grad_norm": 2.2869937419891357, "learning_rate": 9.351842105263158e-05, "loss": 0.4759, "step": 44649 }, { "epoch": 2.500279986560645, "grad_norm": 0.9921106100082397, "learning_rate": 9.351815789473684e-05, "loss": 0.377, "step": 44650 }, { "epoch": 2.500335983872774, "grad_norm": 1.2296686172485352, "learning_rate": 9.35178947368421e-05, "loss": 0.4371, "step": 44651 }, { "epoch": 2.500391981184903, "grad_norm": 1.1348974704742432, "learning_rate": 9.351763157894738e-05, "loss": 0.3865, "step": 44652 }, { "epoch": 2.500447978497032, "grad_norm": 1.2140257358551025, "learning_rate": 9.351736842105264e-05, "loss": 0.4715, "step": 44653 }, { "epoch": 2.500503975809161, "grad_norm": 1.4428361654281616, "learning_rate": 9.35171052631579e-05, "loss": 0.4581, "step": 44654 }, { "epoch": 2.50055997312129, "grad_norm": 1.1700103282928467, "learning_rate": 9.351684210526316e-05, "loss": 0.3407, "step": 44655 }, { "epoch": 2.500615970433419, "grad_norm": 1.299353003501892, "learning_rate": 9.351657894736843e-05, "loss": 0.4395, "step": 44656 }, { "epoch": 2.5006719677455482, "grad_norm": 1.2602280378341675, "learning_rate": 9.351631578947369e-05, "loss": 0.4957, "step": 44657 }, { "epoch": 2.5007279650576772, "grad_norm": 1.3617912530899048, "learning_rate": 9.351605263157895e-05, "loss": 0.4178, "step": 44658 }, { "epoch": 2.5007839623698063, "grad_norm": 1.1706573963165283, "learning_rate": 9.351578947368421e-05, "loss": 0.316, "step": 44659 }, { "epoch": 2.5008399596819353, "grad_norm": 1.237616777420044, "learning_rate": 9.351552631578948e-05, "loss": 0.3374, "step": 44660 }, { "epoch": 2.5008959569940643, "grad_norm": 1.6490988731384277, "learning_rate": 9.351526315789474e-05, "loss": 0.4088, "step": 44661 }, { "epoch": 2.5009519543061933, "grad_norm": 1.238945484161377, "learning_rate": 9.3515e-05, "loss": 0.6542, "step": 44662 }, { "epoch": 2.5010079516183223, "grad_norm": 1.5903511047363281, "learning_rate": 9.351473684210526e-05, "loss": 0.4516, "step": 44663 }, { "epoch": 2.5010639489304514, "grad_norm": 1.311757206916809, "learning_rate": 9.351447368421053e-05, "loss": 0.4234, "step": 44664 }, { "epoch": 2.5011199462425804, "grad_norm": 1.1380364894866943, "learning_rate": 9.35142105263158e-05, "loss": 0.3385, "step": 44665 }, { "epoch": 2.5011759435547094, "grad_norm": 1.1873936653137207, "learning_rate": 9.351394736842107e-05, "loss": 0.4796, "step": 44666 }, { "epoch": 2.5012319408668384, "grad_norm": 1.6725373268127441, "learning_rate": 9.351368421052631e-05, "loss": 0.4425, "step": 44667 }, { "epoch": 2.5012879381789674, "grad_norm": 1.152907371520996, "learning_rate": 9.351342105263157e-05, "loss": 0.3858, "step": 44668 }, { "epoch": 2.5013439354910965, "grad_norm": 1.0271967649459839, "learning_rate": 9.351315789473685e-05, "loss": 0.441, "step": 44669 }, { "epoch": 2.5013999328032255, "grad_norm": 1.0780562162399292, "learning_rate": 9.35128947368421e-05, "loss": 0.3655, "step": 44670 }, { "epoch": 2.5014559301153545, "grad_norm": 0.9843218326568604, "learning_rate": 9.351263157894738e-05, "loss": 0.2872, "step": 44671 }, { "epoch": 2.5015119274274835, "grad_norm": 1.1521226167678833, "learning_rate": 9.351236842105263e-05, "loss": 0.3923, "step": 44672 }, { "epoch": 2.5015679247396125, "grad_norm": 1.3495076894760132, "learning_rate": 9.35121052631579e-05, "loss": 0.4406, "step": 44673 }, { "epoch": 2.5016239220517416, "grad_norm": 1.1498416662216187, "learning_rate": 9.351184210526316e-05, "loss": 0.3942, "step": 44674 }, { "epoch": 2.5016799193638706, "grad_norm": 1.2754565477371216, "learning_rate": 9.351157894736843e-05, "loss": 0.2719, "step": 44675 }, { "epoch": 2.5017359166759996, "grad_norm": 1.2649343013763428, "learning_rate": 9.351131578947369e-05, "loss": 0.476, "step": 44676 }, { "epoch": 2.5017919139881286, "grad_norm": 0.9628691673278809, "learning_rate": 9.351105263157895e-05, "loss": 0.3764, "step": 44677 }, { "epoch": 2.5018479113002576, "grad_norm": 1.0442384481430054, "learning_rate": 9.351078947368421e-05, "loss": 0.3362, "step": 44678 }, { "epoch": 2.5019039086123867, "grad_norm": 1.1974157094955444, "learning_rate": 9.351052631578948e-05, "loss": 0.4438, "step": 44679 }, { "epoch": 2.5019599059245157, "grad_norm": 1.1462780237197876, "learning_rate": 9.351026315789474e-05, "loss": 0.5253, "step": 44680 }, { "epoch": 2.5020159032366447, "grad_norm": 1.182629108428955, "learning_rate": 9.351e-05, "loss": 0.3761, "step": 44681 }, { "epoch": 2.5020719005487737, "grad_norm": 1.0513659715652466, "learning_rate": 9.350973684210526e-05, "loss": 0.3578, "step": 44682 }, { "epoch": 2.5021278978609027, "grad_norm": 1.19379723072052, "learning_rate": 9.350947368421054e-05, "loss": 0.4054, "step": 44683 }, { "epoch": 2.5021838951730317, "grad_norm": 1.318214774131775, "learning_rate": 9.35092105263158e-05, "loss": 0.4571, "step": 44684 }, { "epoch": 2.5022398924851608, "grad_norm": 1.1367806196212769, "learning_rate": 9.350894736842106e-05, "loss": 0.4094, "step": 44685 }, { "epoch": 2.50229588979729, "grad_norm": 1.2601944208145142, "learning_rate": 9.350868421052632e-05, "loss": 0.4854, "step": 44686 }, { "epoch": 2.502351887109419, "grad_norm": 1.2000831365585327, "learning_rate": 9.350842105263158e-05, "loss": 0.389, "step": 44687 }, { "epoch": 2.502407884421548, "grad_norm": 1.3486360311508179, "learning_rate": 9.350815789473685e-05, "loss": 0.3434, "step": 44688 }, { "epoch": 2.502463881733677, "grad_norm": 1.184953212738037, "learning_rate": 9.350789473684211e-05, "loss": 0.348, "step": 44689 }, { "epoch": 2.502519879045806, "grad_norm": 1.2773815393447876, "learning_rate": 9.350763157894737e-05, "loss": 0.5166, "step": 44690 }, { "epoch": 2.502575876357935, "grad_norm": 1.2060847282409668, "learning_rate": 9.350736842105263e-05, "loss": 0.4637, "step": 44691 }, { "epoch": 2.502631873670064, "grad_norm": 1.0748361349105835, "learning_rate": 9.35071052631579e-05, "loss": 0.3526, "step": 44692 }, { "epoch": 2.502687870982193, "grad_norm": 1.1506118774414062, "learning_rate": 9.350684210526316e-05, "loss": 0.3724, "step": 44693 }, { "epoch": 2.502743868294322, "grad_norm": 1.3560246229171753, "learning_rate": 9.350657894736843e-05, "loss": 0.4857, "step": 44694 }, { "epoch": 2.502799865606451, "grad_norm": 1.2747424840927124, "learning_rate": 9.350631578947368e-05, "loss": 0.3745, "step": 44695 }, { "epoch": 2.50285586291858, "grad_norm": 1.3685498237609863, "learning_rate": 9.350605263157895e-05, "loss": 0.4849, "step": 44696 }, { "epoch": 2.502911860230709, "grad_norm": 1.009788155555725, "learning_rate": 9.350578947368421e-05, "loss": 0.3425, "step": 44697 }, { "epoch": 2.502967857542838, "grad_norm": 1.2830582857131958, "learning_rate": 9.350552631578949e-05, "loss": 0.4866, "step": 44698 }, { "epoch": 2.503023854854967, "grad_norm": 1.2266055345535278, "learning_rate": 9.350526315789473e-05, "loss": 0.342, "step": 44699 }, { "epoch": 2.503079852167096, "grad_norm": 1.1327704191207886, "learning_rate": 9.350500000000001e-05, "loss": 0.3717, "step": 44700 }, { "epoch": 2.503135849479225, "grad_norm": 1.1795837879180908, "learning_rate": 9.350473684210527e-05, "loss": 0.4293, "step": 44701 }, { "epoch": 2.503191846791354, "grad_norm": 1.298890233039856, "learning_rate": 9.350447368421054e-05, "loss": 0.4682, "step": 44702 }, { "epoch": 2.503247844103483, "grad_norm": 1.1279315948486328, "learning_rate": 9.35042105263158e-05, "loss": 0.3738, "step": 44703 }, { "epoch": 2.503303841415612, "grad_norm": 1.2725718021392822, "learning_rate": 9.350394736842105e-05, "loss": 0.3567, "step": 44704 }, { "epoch": 2.503359838727741, "grad_norm": 1.3284419775009155, "learning_rate": 9.350368421052632e-05, "loss": 0.3593, "step": 44705 }, { "epoch": 2.50341583603987, "grad_norm": 1.8396999835968018, "learning_rate": 9.350342105263158e-05, "loss": 0.5817, "step": 44706 }, { "epoch": 2.503471833351999, "grad_norm": 1.316655158996582, "learning_rate": 9.350315789473685e-05, "loss": 0.4341, "step": 44707 }, { "epoch": 2.503527830664128, "grad_norm": 1.2661175727844238, "learning_rate": 9.350289473684211e-05, "loss": 0.3674, "step": 44708 }, { "epoch": 2.5035838279762572, "grad_norm": 1.917382001876831, "learning_rate": 9.350263157894737e-05, "loss": 0.3928, "step": 44709 }, { "epoch": 2.5036398252883862, "grad_norm": 1.1785186529159546, "learning_rate": 9.350236842105263e-05, "loss": 0.4208, "step": 44710 }, { "epoch": 2.5036958226005153, "grad_norm": 1.1549183130264282, "learning_rate": 9.35021052631579e-05, "loss": 0.4218, "step": 44711 }, { "epoch": 2.5037518199126443, "grad_norm": 1.2087838649749756, "learning_rate": 9.350184210526316e-05, "loss": 0.368, "step": 44712 }, { "epoch": 2.5038078172247733, "grad_norm": 1.0444931983947754, "learning_rate": 9.350157894736842e-05, "loss": 0.3661, "step": 44713 }, { "epoch": 2.5038638145369023, "grad_norm": 1.3926641941070557, "learning_rate": 9.350131578947368e-05, "loss": 0.3647, "step": 44714 }, { "epoch": 2.5039198118490313, "grad_norm": 1.0948355197906494, "learning_rate": 9.350105263157896e-05, "loss": 0.2584, "step": 44715 }, { "epoch": 2.5039758091611604, "grad_norm": 1.3165448904037476, "learning_rate": 9.350078947368422e-05, "loss": 0.5379, "step": 44716 }, { "epoch": 2.5040318064732894, "grad_norm": 1.1503769159317017, "learning_rate": 9.350052631578948e-05, "loss": 0.3543, "step": 44717 }, { "epoch": 2.5040878037854184, "grad_norm": 1.5424970388412476, "learning_rate": 9.350026315789474e-05, "loss": 0.4101, "step": 44718 }, { "epoch": 2.5041438010975474, "grad_norm": 1.2243847846984863, "learning_rate": 9.350000000000001e-05, "loss": 0.3538, "step": 44719 }, { "epoch": 2.5041997984096764, "grad_norm": 1.2283695936203003, "learning_rate": 9.349973684210527e-05, "loss": 0.3983, "step": 44720 }, { "epoch": 2.5042557957218055, "grad_norm": 1.2716865539550781, "learning_rate": 9.349947368421053e-05, "loss": 0.5418, "step": 44721 }, { "epoch": 2.5043117930339345, "grad_norm": 1.2942513227462769, "learning_rate": 9.349921052631579e-05, "loss": 0.3613, "step": 44722 }, { "epoch": 2.5043677903460635, "grad_norm": 1.0981354713439941, "learning_rate": 9.349894736842105e-05, "loss": 0.3238, "step": 44723 }, { "epoch": 2.5044237876581925, "grad_norm": 1.3400903940200806, "learning_rate": 9.349868421052632e-05, "loss": 0.3353, "step": 44724 }, { "epoch": 2.5044797849703215, "grad_norm": 1.2113410234451294, "learning_rate": 9.349842105263158e-05, "loss": 0.3542, "step": 44725 }, { "epoch": 2.5045357822824506, "grad_norm": 1.2446471452713013, "learning_rate": 9.349815789473685e-05, "loss": 0.3027, "step": 44726 }, { "epoch": 2.5045917795945796, "grad_norm": 1.138442039489746, "learning_rate": 9.34978947368421e-05, "loss": 0.3691, "step": 44727 }, { "epoch": 2.5046477769067086, "grad_norm": 1.3231689929962158, "learning_rate": 9.349763157894737e-05, "loss": 0.3512, "step": 44728 }, { "epoch": 2.5047037742188376, "grad_norm": 1.287824273109436, "learning_rate": 9.349736842105263e-05, "loss": 0.381, "step": 44729 }, { "epoch": 2.5047597715309666, "grad_norm": 1.1315996646881104, "learning_rate": 9.349710526315791e-05, "loss": 0.3756, "step": 44730 }, { "epoch": 2.5048157688430956, "grad_norm": 1.2799986600875854, "learning_rate": 9.349684210526317e-05, "loss": 0.3874, "step": 44731 }, { "epoch": 2.5048717661552247, "grad_norm": 1.2432392835617065, "learning_rate": 9.349657894736843e-05, "loss": 0.2736, "step": 44732 }, { "epoch": 2.5049277634673537, "grad_norm": 1.2550952434539795, "learning_rate": 9.349631578947369e-05, "loss": 0.4735, "step": 44733 }, { "epoch": 2.5049837607794827, "grad_norm": 1.1441066265106201, "learning_rate": 9.349605263157896e-05, "loss": 0.372, "step": 44734 }, { "epoch": 2.5050397580916117, "grad_norm": 1.1870216131210327, "learning_rate": 9.349578947368422e-05, "loss": 0.386, "step": 44735 }, { "epoch": 2.5050957554037407, "grad_norm": 1.2146191596984863, "learning_rate": 9.349552631578948e-05, "loss": 0.3392, "step": 44736 }, { "epoch": 2.5051517527158698, "grad_norm": 1.1965885162353516, "learning_rate": 9.349526315789474e-05, "loss": 0.4497, "step": 44737 }, { "epoch": 2.505207750027999, "grad_norm": 1.1423410177230835, "learning_rate": 9.3495e-05, "loss": 0.3101, "step": 44738 }, { "epoch": 2.505263747340128, "grad_norm": 1.023838996887207, "learning_rate": 9.349473684210527e-05, "loss": 0.2957, "step": 44739 }, { "epoch": 2.505319744652257, "grad_norm": 1.3482131958007812, "learning_rate": 9.349447368421053e-05, "loss": 0.5415, "step": 44740 }, { "epoch": 2.505375741964386, "grad_norm": 1.3883073329925537, "learning_rate": 9.349421052631579e-05, "loss": 0.5359, "step": 44741 }, { "epoch": 2.505431739276515, "grad_norm": 1.2522249221801758, "learning_rate": 9.349394736842105e-05, "loss": 0.3965, "step": 44742 }, { "epoch": 2.505487736588644, "grad_norm": 1.0210261344909668, "learning_rate": 9.349368421052632e-05, "loss": 0.2961, "step": 44743 }, { "epoch": 2.505543733900773, "grad_norm": 1.5919770002365112, "learning_rate": 9.349342105263158e-05, "loss": 0.4089, "step": 44744 }, { "epoch": 2.505599731212902, "grad_norm": 1.1701679229736328, "learning_rate": 9.349315789473684e-05, "loss": 0.3503, "step": 44745 }, { "epoch": 2.505655728525031, "grad_norm": 1.1423298120498657, "learning_rate": 9.34928947368421e-05, "loss": 0.4315, "step": 44746 }, { "epoch": 2.50571172583716, "grad_norm": 1.3764071464538574, "learning_rate": 9.349263157894738e-05, "loss": 0.3889, "step": 44747 }, { "epoch": 2.505767723149289, "grad_norm": 1.380503535270691, "learning_rate": 9.349236842105264e-05, "loss": 0.4675, "step": 44748 }, { "epoch": 2.505823720461418, "grad_norm": 1.128568410873413, "learning_rate": 9.349210526315791e-05, "loss": 0.4233, "step": 44749 }, { "epoch": 2.505879717773547, "grad_norm": 1.4740972518920898, "learning_rate": 9.349184210526316e-05, "loss": 0.4184, "step": 44750 }, { "epoch": 2.505935715085676, "grad_norm": 1.5354292392730713, "learning_rate": 9.349157894736843e-05, "loss": 0.6251, "step": 44751 }, { "epoch": 2.505991712397805, "grad_norm": 1.0475372076034546, "learning_rate": 9.349131578947369e-05, "loss": 0.3222, "step": 44752 }, { "epoch": 2.506047709709934, "grad_norm": 1.1628245115280151, "learning_rate": 9.349105263157896e-05, "loss": 0.4823, "step": 44753 }, { "epoch": 2.506103707022063, "grad_norm": 2.2967488765716553, "learning_rate": 9.349078947368421e-05, "loss": 0.3765, "step": 44754 }, { "epoch": 2.506159704334192, "grad_norm": 1.2709400653839111, "learning_rate": 9.349052631578947e-05, "loss": 0.4373, "step": 44755 }, { "epoch": 2.506215701646321, "grad_norm": 0.9283302426338196, "learning_rate": 9.349026315789474e-05, "loss": 0.3354, "step": 44756 }, { "epoch": 2.50627169895845, "grad_norm": 1.3800677061080933, "learning_rate": 9.349e-05, "loss": 0.5264, "step": 44757 }, { "epoch": 2.506327696270579, "grad_norm": 1.177882194519043, "learning_rate": 9.348973684210527e-05, "loss": 0.4655, "step": 44758 }, { "epoch": 2.506383693582708, "grad_norm": 1.2900073528289795, "learning_rate": 9.348947368421052e-05, "loss": 0.3857, "step": 44759 }, { "epoch": 2.506439690894837, "grad_norm": 1.0525318384170532, "learning_rate": 9.34892105263158e-05, "loss": 0.3985, "step": 44760 }, { "epoch": 2.506495688206966, "grad_norm": 1.3303323984146118, "learning_rate": 9.348894736842105e-05, "loss": 0.3265, "step": 44761 }, { "epoch": 2.5065516855190952, "grad_norm": 1.3088613748550415, "learning_rate": 9.348868421052633e-05, "loss": 0.3647, "step": 44762 }, { "epoch": 2.5066076828312243, "grad_norm": 1.0863131284713745, "learning_rate": 9.348842105263159e-05, "loss": 0.4025, "step": 44763 }, { "epoch": 2.5066636801433533, "grad_norm": 1.4102165699005127, "learning_rate": 9.348815789473685e-05, "loss": 0.3959, "step": 44764 }, { "epoch": 2.5067196774554823, "grad_norm": 1.0793423652648926, "learning_rate": 9.34878947368421e-05, "loss": 0.5526, "step": 44765 }, { "epoch": 2.5067756747676113, "grad_norm": 1.2461094856262207, "learning_rate": 9.348763157894738e-05, "loss": 0.3942, "step": 44766 }, { "epoch": 2.5068316720797403, "grad_norm": 2.523122549057007, "learning_rate": 9.348736842105264e-05, "loss": 0.3373, "step": 44767 }, { "epoch": 2.5068876693918694, "grad_norm": 1.1954014301300049, "learning_rate": 9.34871052631579e-05, "loss": 0.3472, "step": 44768 }, { "epoch": 2.5069436667039984, "grad_norm": 1.023280382156372, "learning_rate": 9.348684210526316e-05, "loss": 0.3267, "step": 44769 }, { "epoch": 2.506999664016127, "grad_norm": 1.2781025171279907, "learning_rate": 9.348657894736843e-05, "loss": 0.3043, "step": 44770 }, { "epoch": 2.5070556613282564, "grad_norm": 1.1376464366912842, "learning_rate": 9.348631578947369e-05, "loss": 0.3542, "step": 44771 }, { "epoch": 2.507111658640385, "grad_norm": 1.0966333150863647, "learning_rate": 9.348605263157895e-05, "loss": 0.3635, "step": 44772 }, { "epoch": 2.5071676559525145, "grad_norm": 1.1147621870040894, "learning_rate": 9.348578947368421e-05, "loss": 0.4341, "step": 44773 }, { "epoch": 2.507223653264643, "grad_norm": 0.9573778510093689, "learning_rate": 9.348552631578947e-05, "loss": 0.2992, "step": 44774 }, { "epoch": 2.5072796505767725, "grad_norm": 1.322076678276062, "learning_rate": 9.348526315789474e-05, "loss": 0.4546, "step": 44775 }, { "epoch": 2.507335647888901, "grad_norm": 1.108537197113037, "learning_rate": 9.3485e-05, "loss": 0.3718, "step": 44776 }, { "epoch": 2.5073916452010305, "grad_norm": 1.3583650588989258, "learning_rate": 9.348473684210526e-05, "loss": 0.5452, "step": 44777 }, { "epoch": 2.507447642513159, "grad_norm": 1.4060158729553223, "learning_rate": 9.348447368421052e-05, "loss": 0.3256, "step": 44778 }, { "epoch": 2.5075036398252886, "grad_norm": 0.9820559024810791, "learning_rate": 9.34842105263158e-05, "loss": 0.3332, "step": 44779 }, { "epoch": 2.507559637137417, "grad_norm": 1.1356019973754883, "learning_rate": 9.348394736842106e-05, "loss": 0.4063, "step": 44780 }, { "epoch": 2.5076156344495466, "grad_norm": 0.9587997794151306, "learning_rate": 9.348368421052633e-05, "loss": 0.3454, "step": 44781 }, { "epoch": 2.507671631761675, "grad_norm": 1.2400751113891602, "learning_rate": 9.348342105263158e-05, "loss": 0.429, "step": 44782 }, { "epoch": 2.5077276290738046, "grad_norm": 1.120600938796997, "learning_rate": 9.348315789473685e-05, "loss": 0.5239, "step": 44783 }, { "epoch": 2.507783626385933, "grad_norm": 1.1395894289016724, "learning_rate": 9.348289473684211e-05, "loss": 0.4214, "step": 44784 }, { "epoch": 2.5078396236980627, "grad_norm": 1.3470524549484253, "learning_rate": 9.348263157894738e-05, "loss": 0.3757, "step": 44785 }, { "epoch": 2.5078956210101913, "grad_norm": 1.1209582090377808, "learning_rate": 9.348236842105264e-05, "loss": 0.3477, "step": 44786 }, { "epoch": 2.5079516183223207, "grad_norm": 1.6194506883621216, "learning_rate": 9.34821052631579e-05, "loss": 0.3662, "step": 44787 }, { "epoch": 2.5080076156344493, "grad_norm": 1.4752724170684814, "learning_rate": 9.348184210526316e-05, "loss": 0.4196, "step": 44788 }, { "epoch": 2.5080636129465788, "grad_norm": 1.1000336408615112, "learning_rate": 9.348157894736842e-05, "loss": 0.3127, "step": 44789 }, { "epoch": 2.5081196102587073, "grad_norm": 1.4938992261886597, "learning_rate": 9.34813157894737e-05, "loss": 0.4267, "step": 44790 }, { "epoch": 2.508175607570837, "grad_norm": 1.1985728740692139, "learning_rate": 9.348105263157895e-05, "loss": 0.3426, "step": 44791 }, { "epoch": 2.5082316048829654, "grad_norm": 1.3682639598846436, "learning_rate": 9.348078947368421e-05, "loss": 0.4814, "step": 44792 }, { "epoch": 2.508287602195095, "grad_norm": 1.1228935718536377, "learning_rate": 9.348052631578947e-05, "loss": 0.3589, "step": 44793 }, { "epoch": 2.5083435995072234, "grad_norm": 1.0209373235702515, "learning_rate": 9.348026315789475e-05, "loss": 0.4057, "step": 44794 }, { "epoch": 2.508399596819353, "grad_norm": 1.5994699001312256, "learning_rate": 9.348e-05, "loss": 0.565, "step": 44795 }, { "epoch": 2.5084555941314814, "grad_norm": 1.1077467203140259, "learning_rate": 9.347973684210527e-05, "loss": 0.4683, "step": 44796 }, { "epoch": 2.508511591443611, "grad_norm": 1.3415753841400146, "learning_rate": 9.347947368421053e-05, "loss": 0.4564, "step": 44797 }, { "epoch": 2.5085675887557395, "grad_norm": 1.2240289449691772, "learning_rate": 9.34792105263158e-05, "loss": 0.4228, "step": 44798 }, { "epoch": 2.508623586067869, "grad_norm": 1.2324711084365845, "learning_rate": 9.347894736842106e-05, "loss": 0.4194, "step": 44799 }, { "epoch": 2.5086795833799975, "grad_norm": 1.3035613298416138, "learning_rate": 9.347868421052632e-05, "loss": 0.4778, "step": 44800 }, { "epoch": 2.508735580692127, "grad_norm": 1.1946133375167847, "learning_rate": 9.347842105263158e-05, "loss": 0.5276, "step": 44801 }, { "epoch": 2.5087915780042556, "grad_norm": 1.1371045112609863, "learning_rate": 9.347815789473685e-05, "loss": 0.3981, "step": 44802 }, { "epoch": 2.508847575316385, "grad_norm": 1.5162835121154785, "learning_rate": 9.347789473684211e-05, "loss": 0.4246, "step": 44803 }, { "epoch": 2.5089035726285136, "grad_norm": 1.1307345628738403, "learning_rate": 9.347763157894738e-05, "loss": 0.3362, "step": 44804 }, { "epoch": 2.508959569940643, "grad_norm": 1.1523785591125488, "learning_rate": 9.347736842105263e-05, "loss": 0.3283, "step": 44805 }, { "epoch": 2.5090155672527716, "grad_norm": 0.9287214875221252, "learning_rate": 9.34771052631579e-05, "loss": 0.3207, "step": 44806 }, { "epoch": 2.509071564564901, "grad_norm": 0.9575591683387756, "learning_rate": 9.347684210526316e-05, "loss": 0.2962, "step": 44807 }, { "epoch": 2.5091275618770297, "grad_norm": 1.4418352842330933, "learning_rate": 9.347657894736842e-05, "loss": 0.4925, "step": 44808 }, { "epoch": 2.509183559189159, "grad_norm": 1.2317215204238892, "learning_rate": 9.347631578947368e-05, "loss": 0.345, "step": 44809 }, { "epoch": 2.5092395565012877, "grad_norm": 1.242807388305664, "learning_rate": 9.347605263157894e-05, "loss": 0.3959, "step": 44810 }, { "epoch": 2.509295553813417, "grad_norm": 1.2883909940719604, "learning_rate": 9.347578947368422e-05, "loss": 0.4667, "step": 44811 }, { "epoch": 2.5093515511255458, "grad_norm": 1.6157764196395874, "learning_rate": 9.347552631578948e-05, "loss": 0.6144, "step": 44812 }, { "epoch": 2.509407548437675, "grad_norm": 1.1221263408660889, "learning_rate": 9.347526315789475e-05, "loss": 0.3846, "step": 44813 }, { "epoch": 2.509463545749804, "grad_norm": 1.9636478424072266, "learning_rate": 9.3475e-05, "loss": 0.4537, "step": 44814 }, { "epoch": 2.5095195430619333, "grad_norm": 1.1219887733459473, "learning_rate": 9.347473684210527e-05, "loss": 0.4722, "step": 44815 }, { "epoch": 2.509575540374062, "grad_norm": 1.2659757137298584, "learning_rate": 9.347447368421053e-05, "loss": 0.4594, "step": 44816 }, { "epoch": 2.5096315376861913, "grad_norm": 1.1388801336288452, "learning_rate": 9.34742105263158e-05, "loss": 0.3963, "step": 44817 }, { "epoch": 2.50968753499832, "grad_norm": 1.4837017059326172, "learning_rate": 9.347394736842106e-05, "loss": 0.4176, "step": 44818 }, { "epoch": 2.5097435323104493, "grad_norm": 1.1181418895721436, "learning_rate": 9.347368421052632e-05, "loss": 0.3433, "step": 44819 }, { "epoch": 2.509799529622578, "grad_norm": 1.1035248041152954, "learning_rate": 9.347342105263158e-05, "loss": 0.3439, "step": 44820 }, { "epoch": 2.5098555269347074, "grad_norm": 1.2026946544647217, "learning_rate": 9.347315789473685e-05, "loss": 0.398, "step": 44821 }, { "epoch": 2.509911524246836, "grad_norm": 1.1365171670913696, "learning_rate": 9.347289473684211e-05, "loss": 0.3844, "step": 44822 }, { "epoch": 2.5099675215589654, "grad_norm": 1.1773253679275513, "learning_rate": 9.347263157894737e-05, "loss": 0.3002, "step": 44823 }, { "epoch": 2.510023518871094, "grad_norm": 1.6119105815887451, "learning_rate": 9.347236842105263e-05, "loss": 0.383, "step": 44824 }, { "epoch": 2.5100795161832234, "grad_norm": 1.4468108415603638, "learning_rate": 9.347210526315789e-05, "loss": 0.4419, "step": 44825 }, { "epoch": 2.510135513495352, "grad_norm": 1.131170630455017, "learning_rate": 9.347184210526317e-05, "loss": 0.4063, "step": 44826 }, { "epoch": 2.5101915108074815, "grad_norm": 0.9969378113746643, "learning_rate": 9.347157894736843e-05, "loss": 0.3209, "step": 44827 }, { "epoch": 2.51024750811961, "grad_norm": 1.2613555192947388, "learning_rate": 9.347131578947369e-05, "loss": 0.5424, "step": 44828 }, { "epoch": 2.5103035054317395, "grad_norm": 1.34848952293396, "learning_rate": 9.347105263157895e-05, "loss": 0.5583, "step": 44829 }, { "epoch": 2.510359502743868, "grad_norm": 1.1752198934555054, "learning_rate": 9.347078947368422e-05, "loss": 0.4375, "step": 44830 }, { "epoch": 2.5104155000559976, "grad_norm": 1.29131281375885, "learning_rate": 9.347052631578948e-05, "loss": 0.4499, "step": 44831 }, { "epoch": 2.510471497368126, "grad_norm": 1.0799561738967896, "learning_rate": 9.347026315789474e-05, "loss": 0.4558, "step": 44832 }, { "epoch": 2.5105274946802556, "grad_norm": 1.5851231813430786, "learning_rate": 9.347e-05, "loss": 0.3901, "step": 44833 }, { "epoch": 2.510583491992384, "grad_norm": 1.1060611009597778, "learning_rate": 9.346973684210527e-05, "loss": 0.3985, "step": 44834 }, { "epoch": 2.5106394893045136, "grad_norm": 1.2519795894622803, "learning_rate": 9.346947368421053e-05, "loss": 0.3983, "step": 44835 }, { "epoch": 2.510695486616642, "grad_norm": 1.2525273561477661, "learning_rate": 9.34692105263158e-05, "loss": 0.4165, "step": 44836 }, { "epoch": 2.5107514839287717, "grad_norm": 0.97767174243927, "learning_rate": 9.346894736842105e-05, "loss": 0.2777, "step": 44837 }, { "epoch": 2.5108074812409003, "grad_norm": 1.2371069192886353, "learning_rate": 9.346868421052632e-05, "loss": 0.4148, "step": 44838 }, { "epoch": 2.5108634785530297, "grad_norm": 1.32941472530365, "learning_rate": 9.346842105263158e-05, "loss": 0.615, "step": 44839 }, { "epoch": 2.5109194758651583, "grad_norm": 0.9924670457839966, "learning_rate": 9.346815789473686e-05, "loss": 0.3319, "step": 44840 }, { "epoch": 2.5109754731772878, "grad_norm": 1.2255840301513672, "learning_rate": 9.346789473684212e-05, "loss": 0.3632, "step": 44841 }, { "epoch": 2.5110314704894163, "grad_norm": 1.4669734239578247, "learning_rate": 9.346763157894736e-05, "loss": 0.5695, "step": 44842 }, { "epoch": 2.511087467801546, "grad_norm": 1.2088528871536255, "learning_rate": 9.346736842105264e-05, "loss": 0.3351, "step": 44843 }, { "epoch": 2.5111434651136744, "grad_norm": 1.3070580959320068, "learning_rate": 9.34671052631579e-05, "loss": 0.5017, "step": 44844 }, { "epoch": 2.511199462425804, "grad_norm": 1.03866708278656, "learning_rate": 9.346684210526317e-05, "loss": 0.3976, "step": 44845 }, { "epoch": 2.5112554597379324, "grad_norm": 1.590061902999878, "learning_rate": 9.346657894736841e-05, "loss": 0.28, "step": 44846 }, { "epoch": 2.511311457050062, "grad_norm": 1.2179813385009766, "learning_rate": 9.346631578947369e-05, "loss": 0.5433, "step": 44847 }, { "epoch": 2.5113674543621904, "grad_norm": 2.9194672107696533, "learning_rate": 9.346605263157895e-05, "loss": 0.4616, "step": 44848 }, { "epoch": 2.51142345167432, "grad_norm": 1.089234709739685, "learning_rate": 9.346578947368422e-05, "loss": 0.3061, "step": 44849 }, { "epoch": 2.5114794489864485, "grad_norm": 1.0947996377944946, "learning_rate": 9.346552631578948e-05, "loss": 0.3551, "step": 44850 }, { "epoch": 2.511535446298578, "grad_norm": 1.2092132568359375, "learning_rate": 9.346526315789474e-05, "loss": 0.3332, "step": 44851 }, { "epoch": 2.5115914436107065, "grad_norm": 1.2373971939086914, "learning_rate": 9.3465e-05, "loss": 0.3388, "step": 44852 }, { "epoch": 2.5116474409228355, "grad_norm": 8.125524520874023, "learning_rate": 9.346473684210527e-05, "loss": 0.538, "step": 44853 }, { "epoch": 2.5117034382349646, "grad_norm": 1.4691716432571411, "learning_rate": 9.346447368421053e-05, "loss": 0.4294, "step": 44854 }, { "epoch": 2.5117594355470936, "grad_norm": 1.2349040508270264, "learning_rate": 9.346421052631579e-05, "loss": 0.5085, "step": 44855 }, { "epoch": 2.5118154328592226, "grad_norm": 1.2856624126434326, "learning_rate": 9.346394736842105e-05, "loss": 0.3606, "step": 44856 }, { "epoch": 2.5118714301713516, "grad_norm": 1.2632639408111572, "learning_rate": 9.346368421052633e-05, "loss": 0.4337, "step": 44857 }, { "epoch": 2.5119274274834806, "grad_norm": 1.580068588256836, "learning_rate": 9.346342105263159e-05, "loss": 0.4892, "step": 44858 }, { "epoch": 2.5119834247956097, "grad_norm": 1.4663323163986206, "learning_rate": 9.346315789473685e-05, "loss": 0.4151, "step": 44859 }, { "epoch": 2.5120394221077387, "grad_norm": 1.3076223134994507, "learning_rate": 9.34628947368421e-05, "loss": 0.4489, "step": 44860 }, { "epoch": 2.5120954194198677, "grad_norm": 1.6174418926239014, "learning_rate": 9.346263157894737e-05, "loss": 0.4643, "step": 44861 }, { "epoch": 2.5121514167319967, "grad_norm": 1.0677871704101562, "learning_rate": 9.346236842105264e-05, "loss": 0.3172, "step": 44862 }, { "epoch": 2.5122074140441257, "grad_norm": 1.259846568107605, "learning_rate": 9.34621052631579e-05, "loss": 0.3394, "step": 44863 }, { "epoch": 2.5122634113562547, "grad_norm": 1.1495903730392456, "learning_rate": 9.346184210526316e-05, "loss": 0.3107, "step": 44864 }, { "epoch": 2.5123194086683838, "grad_norm": 1.021652102470398, "learning_rate": 9.346157894736842e-05, "loss": 0.3296, "step": 44865 }, { "epoch": 2.512375405980513, "grad_norm": 1.2996822595596313, "learning_rate": 9.346131578947369e-05, "loss": 0.4432, "step": 44866 }, { "epoch": 2.512431403292642, "grad_norm": 1.3068476915359497, "learning_rate": 9.346105263157895e-05, "loss": 0.4548, "step": 44867 }, { "epoch": 2.512487400604771, "grad_norm": 1.1853389739990234, "learning_rate": 9.346078947368422e-05, "loss": 0.4578, "step": 44868 }, { "epoch": 2.5125433979169, "grad_norm": 1.3115187883377075, "learning_rate": 9.346052631578947e-05, "loss": 0.4174, "step": 44869 }, { "epoch": 2.512599395229029, "grad_norm": 1.3815999031066895, "learning_rate": 9.346026315789474e-05, "loss": 0.3732, "step": 44870 }, { "epoch": 2.512655392541158, "grad_norm": 1.068616271018982, "learning_rate": 9.346e-05, "loss": 0.346, "step": 44871 }, { "epoch": 2.512711389853287, "grad_norm": 1.1265902519226074, "learning_rate": 9.345973684210528e-05, "loss": 0.3245, "step": 44872 }, { "epoch": 2.512767387165416, "grad_norm": 1.0503796339035034, "learning_rate": 9.345947368421054e-05, "loss": 0.3052, "step": 44873 }, { "epoch": 2.512823384477545, "grad_norm": 1.1018083095550537, "learning_rate": 9.34592105263158e-05, "loss": 0.4371, "step": 44874 }, { "epoch": 2.512879381789674, "grad_norm": 1.4471663236618042, "learning_rate": 9.345894736842106e-05, "loss": 0.4307, "step": 44875 }, { "epoch": 2.512935379101803, "grad_norm": 1.0819631814956665, "learning_rate": 9.345868421052632e-05, "loss": 0.4652, "step": 44876 }, { "epoch": 2.512991376413932, "grad_norm": 1.0855740308761597, "learning_rate": 9.345842105263159e-05, "loss": 0.3975, "step": 44877 }, { "epoch": 2.513047373726061, "grad_norm": 1.1854274272918701, "learning_rate": 9.345815789473685e-05, "loss": 0.3471, "step": 44878 }, { "epoch": 2.51310337103819, "grad_norm": 1.0236424207687378, "learning_rate": 9.345789473684211e-05, "loss": 0.3814, "step": 44879 }, { "epoch": 2.513159368350319, "grad_norm": 1.3927698135375977, "learning_rate": 9.345763157894737e-05, "loss": 0.4134, "step": 44880 }, { "epoch": 2.513215365662448, "grad_norm": 8.184261322021484, "learning_rate": 9.345736842105264e-05, "loss": 0.4522, "step": 44881 }, { "epoch": 2.513271362974577, "grad_norm": 2.255575180053711, "learning_rate": 9.34571052631579e-05, "loss": 0.5953, "step": 44882 }, { "epoch": 2.513327360286706, "grad_norm": 1.0129317045211792, "learning_rate": 9.345684210526316e-05, "loss": 0.3147, "step": 44883 }, { "epoch": 2.513383357598835, "grad_norm": 1.1146271228790283, "learning_rate": 9.345657894736842e-05, "loss": 0.5345, "step": 44884 }, { "epoch": 2.513439354910964, "grad_norm": 1.20524263381958, "learning_rate": 9.34563157894737e-05, "loss": 0.4027, "step": 44885 }, { "epoch": 2.513495352223093, "grad_norm": 1.0501710176467896, "learning_rate": 9.345605263157895e-05, "loss": 0.3371, "step": 44886 }, { "epoch": 2.513551349535222, "grad_norm": 1.3221904039382935, "learning_rate": 9.345578947368421e-05, "loss": 0.4243, "step": 44887 }, { "epoch": 2.513607346847351, "grad_norm": 1.3419839143753052, "learning_rate": 9.345552631578947e-05, "loss": 0.3276, "step": 44888 }, { "epoch": 2.5136633441594802, "grad_norm": 0.968989908695221, "learning_rate": 9.345526315789475e-05, "loss": 0.3418, "step": 44889 }, { "epoch": 2.5137193414716092, "grad_norm": 1.2165015935897827, "learning_rate": 9.3455e-05, "loss": 0.4474, "step": 44890 }, { "epoch": 2.5137753387837383, "grad_norm": 1.1653826236724854, "learning_rate": 9.345473684210528e-05, "loss": 0.4719, "step": 44891 }, { "epoch": 2.5138313360958673, "grad_norm": 1.0929332971572876, "learning_rate": 9.345447368421053e-05, "loss": 0.4604, "step": 44892 }, { "epoch": 2.5138873334079963, "grad_norm": 1.0278501510620117, "learning_rate": 9.345421052631578e-05, "loss": 0.4443, "step": 44893 }, { "epoch": 2.5139433307201253, "grad_norm": 1.2402970790863037, "learning_rate": 9.345394736842106e-05, "loss": 0.4572, "step": 44894 }, { "epoch": 2.5139993280322543, "grad_norm": 1.3332836627960205, "learning_rate": 9.345368421052632e-05, "loss": 0.3434, "step": 44895 }, { "epoch": 2.5140553253443834, "grad_norm": 1.216914176940918, "learning_rate": 9.345342105263159e-05, "loss": 0.3555, "step": 44896 }, { "epoch": 2.5141113226565124, "grad_norm": 1.308079481124878, "learning_rate": 9.345315789473684e-05, "loss": 0.4229, "step": 44897 }, { "epoch": 2.5141673199686414, "grad_norm": 1.1437238454818726, "learning_rate": 9.345289473684211e-05, "loss": 0.4043, "step": 44898 }, { "epoch": 2.5142233172807704, "grad_norm": 1.3466852903366089, "learning_rate": 9.345263157894737e-05, "loss": 0.4086, "step": 44899 }, { "epoch": 2.5142793145928994, "grad_norm": 1.2115637063980103, "learning_rate": 9.345236842105264e-05, "loss": 0.567, "step": 44900 }, { "epoch": 2.5143353119050285, "grad_norm": 1.2190706729888916, "learning_rate": 9.345210526315789e-05, "loss": 0.6623, "step": 44901 }, { "epoch": 2.5143913092171575, "grad_norm": 1.0553839206695557, "learning_rate": 9.345184210526316e-05, "loss": 0.4239, "step": 44902 }, { "epoch": 2.5144473065292865, "grad_norm": 1.414591908454895, "learning_rate": 9.345157894736842e-05, "loss": 0.3563, "step": 44903 }, { "epoch": 2.5145033038414155, "grad_norm": 1.1586596965789795, "learning_rate": 9.34513157894737e-05, "loss": 0.4577, "step": 44904 }, { "epoch": 2.5145593011535445, "grad_norm": 1.2129076719284058, "learning_rate": 9.345105263157896e-05, "loss": 0.6084, "step": 44905 }, { "epoch": 2.5146152984656736, "grad_norm": 1.448001503944397, "learning_rate": 9.345078947368422e-05, "loss": 0.4939, "step": 44906 }, { "epoch": 2.5146712957778026, "grad_norm": 1.0205128192901611, "learning_rate": 9.345052631578948e-05, "loss": 0.3671, "step": 44907 }, { "epoch": 2.5147272930899316, "grad_norm": 1.4580501317977905, "learning_rate": 9.345026315789475e-05, "loss": 0.4371, "step": 44908 }, { "epoch": 2.5147832904020606, "grad_norm": 0.9909213185310364, "learning_rate": 9.345000000000001e-05, "loss": 0.3443, "step": 44909 }, { "epoch": 2.5148392877141896, "grad_norm": 1.4379419088363647, "learning_rate": 9.344973684210527e-05, "loss": 0.3435, "step": 44910 }, { "epoch": 2.5148952850263186, "grad_norm": 1.00141179561615, "learning_rate": 9.344947368421053e-05, "loss": 0.3569, "step": 44911 }, { "epoch": 2.5149512823384477, "grad_norm": 1.019567608833313, "learning_rate": 9.344921052631579e-05, "loss": 0.3208, "step": 44912 }, { "epoch": 2.5150072796505767, "grad_norm": 1.207808494567871, "learning_rate": 9.344894736842106e-05, "loss": 0.4349, "step": 44913 }, { "epoch": 2.5150632769627057, "grad_norm": 1.4679515361785889, "learning_rate": 9.344868421052632e-05, "loss": 0.3923, "step": 44914 }, { "epoch": 2.5151192742748347, "grad_norm": 1.0824780464172363, "learning_rate": 9.344842105263158e-05, "loss": 0.2899, "step": 44915 }, { "epoch": 2.5151752715869637, "grad_norm": 1.0728927850723267, "learning_rate": 9.344815789473684e-05, "loss": 0.3198, "step": 44916 }, { "epoch": 2.5152312688990928, "grad_norm": 1.2754501104354858, "learning_rate": 9.344789473684211e-05, "loss": 0.4084, "step": 44917 }, { "epoch": 2.515287266211222, "grad_norm": 1.4581232070922852, "learning_rate": 9.344763157894737e-05, "loss": 0.487, "step": 44918 }, { "epoch": 2.515343263523351, "grad_norm": 0.9967120289802551, "learning_rate": 9.344736842105263e-05, "loss": 0.4334, "step": 44919 }, { "epoch": 2.51539926083548, "grad_norm": 1.0986638069152832, "learning_rate": 9.344710526315789e-05, "loss": 0.439, "step": 44920 }, { "epoch": 2.515455258147609, "grad_norm": 1.3770689964294434, "learning_rate": 9.344684210526317e-05, "loss": 0.4033, "step": 44921 }, { "epoch": 2.515511255459738, "grad_norm": 0.9774295091629028, "learning_rate": 9.344657894736843e-05, "loss": 0.3056, "step": 44922 }, { "epoch": 2.515567252771867, "grad_norm": 1.3876203298568726, "learning_rate": 9.34463157894737e-05, "loss": 0.5283, "step": 44923 }, { "epoch": 2.515623250083996, "grad_norm": 1.1342518329620361, "learning_rate": 9.344605263157894e-05, "loss": 0.4154, "step": 44924 }, { "epoch": 2.515679247396125, "grad_norm": 3.111009120941162, "learning_rate": 9.344578947368422e-05, "loss": 0.3657, "step": 44925 }, { "epoch": 2.515735244708254, "grad_norm": 1.144946575164795, "learning_rate": 9.344552631578948e-05, "loss": 0.3353, "step": 44926 }, { "epoch": 2.515791242020383, "grad_norm": 0.9770836234092712, "learning_rate": 9.344526315789475e-05, "loss": 0.323, "step": 44927 }, { "epoch": 2.515847239332512, "grad_norm": 1.0280795097351074, "learning_rate": 9.344500000000001e-05, "loss": 0.334, "step": 44928 }, { "epoch": 2.515903236644641, "grad_norm": 1.0440059900283813, "learning_rate": 9.344473684210526e-05, "loss": 0.3485, "step": 44929 }, { "epoch": 2.51595923395677, "grad_norm": 1.4271368980407715, "learning_rate": 9.344447368421053e-05, "loss": 0.376, "step": 44930 }, { "epoch": 2.516015231268899, "grad_norm": 1.1543761491775513, "learning_rate": 9.344421052631579e-05, "loss": 0.3957, "step": 44931 }, { "epoch": 2.516071228581028, "grad_norm": 1.255721092224121, "learning_rate": 9.344394736842106e-05, "loss": 0.5025, "step": 44932 }, { "epoch": 2.516127225893157, "grad_norm": 1.1022852659225464, "learning_rate": 9.344368421052632e-05, "loss": 0.3901, "step": 44933 }, { "epoch": 2.516183223205286, "grad_norm": 1.430487036705017, "learning_rate": 9.344342105263158e-05, "loss": 0.4536, "step": 44934 }, { "epoch": 2.516239220517415, "grad_norm": 1.1101151704788208, "learning_rate": 9.344315789473684e-05, "loss": 0.3305, "step": 44935 }, { "epoch": 2.516295217829544, "grad_norm": 1.1055845022201538, "learning_rate": 9.344289473684212e-05, "loss": 0.3563, "step": 44936 }, { "epoch": 2.516351215141673, "grad_norm": 1.3700309991836548, "learning_rate": 9.344263157894738e-05, "loss": 0.388, "step": 44937 }, { "epoch": 2.516407212453802, "grad_norm": 1.1935478448867798, "learning_rate": 9.344236842105264e-05, "loss": 0.5133, "step": 44938 }, { "epoch": 2.516463209765931, "grad_norm": 1.3323646783828735, "learning_rate": 9.34421052631579e-05, "loss": 0.4344, "step": 44939 }, { "epoch": 2.51651920707806, "grad_norm": 1.147402048110962, "learning_rate": 9.344184210526317e-05, "loss": 0.3988, "step": 44940 }, { "epoch": 2.5165752043901892, "grad_norm": 1.0571316480636597, "learning_rate": 9.344157894736843e-05, "loss": 0.2771, "step": 44941 }, { "epoch": 2.5166312017023182, "grad_norm": 1.1549065113067627, "learning_rate": 9.344131578947369e-05, "loss": 0.4805, "step": 44942 }, { "epoch": 2.5166871990144473, "grad_norm": 1.6336346864700317, "learning_rate": 9.344105263157895e-05, "loss": 0.4339, "step": 44943 }, { "epoch": 2.5167431963265763, "grad_norm": 1.232505440711975, "learning_rate": 9.344078947368422e-05, "loss": 0.4188, "step": 44944 }, { "epoch": 2.5167991936387053, "grad_norm": 1.0953078269958496, "learning_rate": 9.344052631578948e-05, "loss": 0.4922, "step": 44945 }, { "epoch": 2.5168551909508343, "grad_norm": 1.0453640222549438, "learning_rate": 9.344026315789474e-05, "loss": 0.3212, "step": 44946 }, { "epoch": 2.5169111882629633, "grad_norm": 1.1152838468551636, "learning_rate": 9.344e-05, "loss": 0.4817, "step": 44947 }, { "epoch": 2.5169671855750924, "grad_norm": 0.9481313824653625, "learning_rate": 9.343973684210526e-05, "loss": 0.3571, "step": 44948 }, { "epoch": 2.5170231828872214, "grad_norm": 1.3109378814697266, "learning_rate": 9.343947368421053e-05, "loss": 0.4554, "step": 44949 }, { "epoch": 2.5170791801993504, "grad_norm": 1.0466681718826294, "learning_rate": 9.343921052631579e-05, "loss": 0.3763, "step": 44950 }, { "epoch": 2.5171351775114794, "grad_norm": 1.1284700632095337, "learning_rate": 9.343894736842107e-05, "loss": 0.351, "step": 44951 }, { "epoch": 2.5171911748236084, "grad_norm": 1.2319542169570923, "learning_rate": 9.343868421052631e-05, "loss": 0.4552, "step": 44952 }, { "epoch": 2.5172471721357375, "grad_norm": 1.0170358419418335, "learning_rate": 9.343842105263159e-05, "loss": 0.3283, "step": 44953 }, { "epoch": 2.5173031694478665, "grad_norm": 1.0471830368041992, "learning_rate": 9.343815789473685e-05, "loss": 0.3781, "step": 44954 }, { "epoch": 2.5173591667599955, "grad_norm": 1.6155041456222534, "learning_rate": 9.343789473684212e-05, "loss": 0.4377, "step": 44955 }, { "epoch": 2.5174151640721245, "grad_norm": 1.072693109512329, "learning_rate": 9.343763157894736e-05, "loss": 0.4797, "step": 44956 }, { "epoch": 2.5174711613842535, "grad_norm": 1.1484203338623047, "learning_rate": 9.343736842105264e-05, "loss": 0.3885, "step": 44957 }, { "epoch": 2.5175271586963825, "grad_norm": 1.503772258758545, "learning_rate": 9.34371052631579e-05, "loss": 0.4369, "step": 44958 }, { "epoch": 2.5175831560085116, "grad_norm": 1.1823770999908447, "learning_rate": 9.343684210526317e-05, "loss": 0.3695, "step": 44959 }, { "epoch": 2.5176391533206406, "grad_norm": 1.205325961112976, "learning_rate": 9.343657894736843e-05, "loss": 0.3792, "step": 44960 }, { "epoch": 2.5176951506327696, "grad_norm": 1.4646451473236084, "learning_rate": 9.343631578947369e-05, "loss": 0.491, "step": 44961 }, { "epoch": 2.5177511479448986, "grad_norm": 1.1776790618896484, "learning_rate": 9.343605263157895e-05, "loss": 0.3946, "step": 44962 }, { "epoch": 2.5178071452570276, "grad_norm": 0.9953280687332153, "learning_rate": 9.343578947368421e-05, "loss": 0.4312, "step": 44963 }, { "epoch": 2.5178631425691567, "grad_norm": 1.0994231700897217, "learning_rate": 9.343552631578948e-05, "loss": 0.4054, "step": 44964 }, { "epoch": 2.5179191398812857, "grad_norm": 0.9563860893249512, "learning_rate": 9.343526315789474e-05, "loss": 0.2767, "step": 44965 }, { "epoch": 2.5179751371934147, "grad_norm": 1.2504534721374512, "learning_rate": 9.3435e-05, "loss": 0.3696, "step": 44966 }, { "epoch": 2.5180311345055437, "grad_norm": 0.9428294897079468, "learning_rate": 9.343473684210526e-05, "loss": 0.3675, "step": 44967 }, { "epoch": 2.5180871318176727, "grad_norm": 1.3348417282104492, "learning_rate": 9.343447368421054e-05, "loss": 0.4198, "step": 44968 }, { "epoch": 2.5181431291298018, "grad_norm": 1.2012553215026855, "learning_rate": 9.34342105263158e-05, "loss": 0.4658, "step": 44969 }, { "epoch": 2.518199126441931, "grad_norm": 1.1559463739395142, "learning_rate": 9.343394736842105e-05, "loss": 0.3906, "step": 44970 }, { "epoch": 2.51825512375406, "grad_norm": 1.1877522468566895, "learning_rate": 9.343368421052631e-05, "loss": 0.4526, "step": 44971 }, { "epoch": 2.518311121066189, "grad_norm": 1.5210577249526978, "learning_rate": 9.343342105263159e-05, "loss": 0.3942, "step": 44972 }, { "epoch": 2.518367118378318, "grad_norm": 1.4321980476379395, "learning_rate": 9.343315789473685e-05, "loss": 0.4002, "step": 44973 }, { "epoch": 2.518423115690447, "grad_norm": 1.023194670677185, "learning_rate": 9.343289473684211e-05, "loss": 0.3982, "step": 44974 }, { "epoch": 2.518479113002576, "grad_norm": 1.208418607711792, "learning_rate": 9.343263157894737e-05, "loss": 0.3328, "step": 44975 }, { "epoch": 2.518535110314705, "grad_norm": 1.31124746799469, "learning_rate": 9.343236842105264e-05, "loss": 0.3582, "step": 44976 }, { "epoch": 2.518591107626834, "grad_norm": 1.5660958290100098, "learning_rate": 9.34321052631579e-05, "loss": 0.3897, "step": 44977 }, { "epoch": 2.518647104938963, "grad_norm": 1.4612643718719482, "learning_rate": 9.343184210526317e-05, "loss": 0.4789, "step": 44978 }, { "epoch": 2.518703102251092, "grad_norm": 1.2671443223953247, "learning_rate": 9.343157894736842e-05, "loss": 0.3856, "step": 44979 }, { "epoch": 2.518759099563221, "grad_norm": 1.078467607498169, "learning_rate": 9.343131578947368e-05, "loss": 0.2855, "step": 44980 }, { "epoch": 2.51881509687535, "grad_norm": 1.160021424293518, "learning_rate": 9.343105263157895e-05, "loss": 0.4486, "step": 44981 }, { "epoch": 2.518871094187479, "grad_norm": 1.1333954334259033, "learning_rate": 9.343078947368421e-05, "loss": 0.4205, "step": 44982 }, { "epoch": 2.518927091499608, "grad_norm": 1.1214267015457153, "learning_rate": 9.343052631578949e-05, "loss": 0.3243, "step": 44983 }, { "epoch": 2.518983088811737, "grad_norm": 1.0001105070114136, "learning_rate": 9.343026315789473e-05, "loss": 0.3383, "step": 44984 }, { "epoch": 2.519039086123866, "grad_norm": 1.1627229452133179, "learning_rate": 9.343e-05, "loss": 0.4093, "step": 44985 }, { "epoch": 2.519095083435995, "grad_norm": 1.2763195037841797, "learning_rate": 9.342973684210526e-05, "loss": 0.525, "step": 44986 }, { "epoch": 2.519151080748124, "grad_norm": 1.234586477279663, "learning_rate": 9.342947368421054e-05, "loss": 0.4677, "step": 44987 }, { "epoch": 2.519207078060253, "grad_norm": 1.3622779846191406, "learning_rate": 9.34292105263158e-05, "loss": 0.5741, "step": 44988 }, { "epoch": 2.519263075372382, "grad_norm": 1.3100652694702148, "learning_rate": 9.342894736842106e-05, "loss": 0.6675, "step": 44989 }, { "epoch": 2.519319072684511, "grad_norm": 1.146007776260376, "learning_rate": 9.342868421052632e-05, "loss": 0.3324, "step": 44990 }, { "epoch": 2.51937506999664, "grad_norm": 1.2564313411712646, "learning_rate": 9.342842105263159e-05, "loss": 0.4336, "step": 44991 }, { "epoch": 2.519431067308769, "grad_norm": 1.1478283405303955, "learning_rate": 9.342815789473685e-05, "loss": 0.3981, "step": 44992 }, { "epoch": 2.519487064620898, "grad_norm": 1.2597934007644653, "learning_rate": 9.342789473684211e-05, "loss": 0.4504, "step": 44993 }, { "epoch": 2.5195430619330272, "grad_norm": 1.3104852437973022, "learning_rate": 9.342763157894737e-05, "loss": 0.4432, "step": 44994 }, { "epoch": 2.5195990592451563, "grad_norm": 1.2330958843231201, "learning_rate": 9.342736842105264e-05, "loss": 0.3808, "step": 44995 }, { "epoch": 2.5196550565572853, "grad_norm": 1.0490154027938843, "learning_rate": 9.34271052631579e-05, "loss": 0.3291, "step": 44996 }, { "epoch": 2.5197110538694143, "grad_norm": 1.1007615327835083, "learning_rate": 9.342684210526316e-05, "loss": 0.4098, "step": 44997 }, { "epoch": 2.5197670511815433, "grad_norm": 1.170037865638733, "learning_rate": 9.342657894736842e-05, "loss": 0.3506, "step": 44998 }, { "epoch": 2.5198230484936723, "grad_norm": 2.0011634826660156, "learning_rate": 9.342631578947368e-05, "loss": 0.4867, "step": 44999 }, { "epoch": 2.5198790458058014, "grad_norm": 1.2240971326828003, "learning_rate": 9.342605263157896e-05, "loss": 0.4368, "step": 45000 }, { "epoch": 2.5199350431179304, "grad_norm": 1.1403335332870483, "learning_rate": 9.342578947368421e-05, "loss": 0.4468, "step": 45001 }, { "epoch": 2.5199910404300594, "grad_norm": 1.1571733951568604, "learning_rate": 9.342552631578947e-05, "loss": 0.3807, "step": 45002 }, { "epoch": 2.5200470377421884, "grad_norm": 0.9375881552696228, "learning_rate": 9.342526315789473e-05, "loss": 0.3517, "step": 45003 }, { "epoch": 2.5201030350543174, "grad_norm": 1.1599394083023071, "learning_rate": 9.342500000000001e-05, "loss": 0.436, "step": 45004 }, { "epoch": 2.5201590323664464, "grad_norm": 6.312375545501709, "learning_rate": 9.342473684210527e-05, "loss": 0.3994, "step": 45005 }, { "epoch": 2.5202150296785755, "grad_norm": 1.3332339525222778, "learning_rate": 9.342447368421054e-05, "loss": 0.5155, "step": 45006 }, { "epoch": 2.5202710269907045, "grad_norm": 1.2522649765014648, "learning_rate": 9.342421052631579e-05, "loss": 0.5232, "step": 45007 }, { "epoch": 2.5203270243028335, "grad_norm": 1.0837116241455078, "learning_rate": 9.342394736842106e-05, "loss": 0.3897, "step": 45008 }, { "epoch": 2.5203830216149625, "grad_norm": 1.2225641012191772, "learning_rate": 9.342368421052632e-05, "loss": 0.4053, "step": 45009 }, { "epoch": 2.5204390189270915, "grad_norm": 1.3245056867599487, "learning_rate": 9.342342105263159e-05, "loss": 0.3482, "step": 45010 }, { "epoch": 2.5204950162392206, "grad_norm": 1.0236687660217285, "learning_rate": 9.342315789473684e-05, "loss": 0.4086, "step": 45011 }, { "epoch": 2.5205510135513496, "grad_norm": 1.4843497276306152, "learning_rate": 9.342289473684211e-05, "loss": 0.5144, "step": 45012 }, { "epoch": 2.5206070108634786, "grad_norm": 1.1718429327011108, "learning_rate": 9.342263157894737e-05, "loss": 0.3906, "step": 45013 }, { "epoch": 2.5206630081756076, "grad_norm": 1.0507522821426392, "learning_rate": 9.342236842105263e-05, "loss": 0.3336, "step": 45014 }, { "epoch": 2.5207190054877366, "grad_norm": 1.1859264373779297, "learning_rate": 9.34221052631579e-05, "loss": 0.3965, "step": 45015 }, { "epoch": 2.5207750027998657, "grad_norm": 0.9355223178863525, "learning_rate": 9.342184210526315e-05, "loss": 0.3265, "step": 45016 }, { "epoch": 2.5208310001119947, "grad_norm": 1.4237927198410034, "learning_rate": 9.342157894736842e-05, "loss": 0.3464, "step": 45017 }, { "epoch": 2.5208869974241237, "grad_norm": 0.9275553226470947, "learning_rate": 9.342131578947368e-05, "loss": 0.3218, "step": 45018 }, { "epoch": 2.5209429947362527, "grad_norm": 1.118915319442749, "learning_rate": 9.342105263157896e-05, "loss": 0.3699, "step": 45019 }, { "epoch": 2.5209989920483817, "grad_norm": 1.1228718757629395, "learning_rate": 9.342078947368422e-05, "loss": 0.3493, "step": 45020 }, { "epoch": 2.5210549893605108, "grad_norm": 1.3951319456100464, "learning_rate": 9.342052631578948e-05, "loss": 0.3988, "step": 45021 }, { "epoch": 2.5211109866726398, "grad_norm": 1.1483542919158936, "learning_rate": 9.342026315789474e-05, "loss": 0.4051, "step": 45022 }, { "epoch": 2.521166983984769, "grad_norm": 1.1666837930679321, "learning_rate": 9.342000000000001e-05, "loss": 0.4742, "step": 45023 }, { "epoch": 2.521222981296898, "grad_norm": 1.1819350719451904, "learning_rate": 9.341973684210527e-05, "loss": 0.2938, "step": 45024 }, { "epoch": 2.521278978609027, "grad_norm": 1.0625483989715576, "learning_rate": 9.341947368421053e-05, "loss": 0.4375, "step": 45025 }, { "epoch": 2.521334975921156, "grad_norm": 1.3652863502502441, "learning_rate": 9.341921052631579e-05, "loss": 0.4873, "step": 45026 }, { "epoch": 2.521390973233285, "grad_norm": 0.968160092830658, "learning_rate": 9.341894736842106e-05, "loss": 0.327, "step": 45027 }, { "epoch": 2.521446970545414, "grad_norm": 1.3170801401138306, "learning_rate": 9.341868421052632e-05, "loss": 0.4211, "step": 45028 }, { "epoch": 2.521502967857543, "grad_norm": 0.946506917476654, "learning_rate": 9.341842105263158e-05, "loss": 0.4307, "step": 45029 }, { "epoch": 2.521558965169672, "grad_norm": 1.1729804277420044, "learning_rate": 9.341815789473684e-05, "loss": 0.4079, "step": 45030 }, { "epoch": 2.521614962481801, "grad_norm": 1.6278482675552368, "learning_rate": 9.34178947368421e-05, "loss": 0.493, "step": 45031 }, { "epoch": 2.52167095979393, "grad_norm": 1.1343801021575928, "learning_rate": 9.341763157894737e-05, "loss": 0.3559, "step": 45032 }, { "epoch": 2.521726957106059, "grad_norm": 1.1425701379776, "learning_rate": 9.341736842105263e-05, "loss": 0.34, "step": 45033 }, { "epoch": 2.521782954418188, "grad_norm": 1.3116127252578735, "learning_rate": 9.34171052631579e-05, "loss": 0.3707, "step": 45034 }, { "epoch": 2.521838951730317, "grad_norm": 1.2800461053848267, "learning_rate": 9.341684210526315e-05, "loss": 0.3339, "step": 45035 }, { "epoch": 2.521894949042446, "grad_norm": 1.0552483797073364, "learning_rate": 9.341657894736843e-05, "loss": 0.4507, "step": 45036 }, { "epoch": 2.521950946354575, "grad_norm": 1.2434345483779907, "learning_rate": 9.341631578947369e-05, "loss": 0.4956, "step": 45037 }, { "epoch": 2.522006943666704, "grad_norm": 1.1293774843215942, "learning_rate": 9.341605263157896e-05, "loss": 0.3899, "step": 45038 }, { "epoch": 2.522062940978833, "grad_norm": 1.1378203630447388, "learning_rate": 9.34157894736842e-05, "loss": 0.4032, "step": 45039 }, { "epoch": 2.522118938290962, "grad_norm": 1.2980105876922607, "learning_rate": 9.341552631578948e-05, "loss": 0.3017, "step": 45040 }, { "epoch": 2.522174935603091, "grad_norm": 1.2770673036575317, "learning_rate": 9.341526315789474e-05, "loss": 0.4895, "step": 45041 }, { "epoch": 2.52223093291522, "grad_norm": 1.2009093761444092, "learning_rate": 9.341500000000001e-05, "loss": 0.3455, "step": 45042 }, { "epoch": 2.522286930227349, "grad_norm": 0.9977799654006958, "learning_rate": 9.341473684210527e-05, "loss": 0.3911, "step": 45043 }, { "epoch": 2.522342927539478, "grad_norm": 1.513598918914795, "learning_rate": 9.341447368421053e-05, "loss": 0.4044, "step": 45044 }, { "epoch": 2.522398924851607, "grad_norm": 0.9576770067214966, "learning_rate": 9.341421052631579e-05, "loss": 0.358, "step": 45045 }, { "epoch": 2.5224549221637362, "grad_norm": 1.1021870374679565, "learning_rate": 9.341394736842107e-05, "loss": 0.3989, "step": 45046 }, { "epoch": 2.5225109194758653, "grad_norm": 1.2287181615829468, "learning_rate": 9.341368421052632e-05, "loss": 0.3621, "step": 45047 }, { "epoch": 2.5225669167879943, "grad_norm": 1.1992675065994263, "learning_rate": 9.341342105263158e-05, "loss": 0.3832, "step": 45048 }, { "epoch": 2.5226229141001233, "grad_norm": 1.4756604433059692, "learning_rate": 9.341315789473684e-05, "loss": 0.3589, "step": 45049 }, { "epoch": 2.5226789114122523, "grad_norm": 1.2160685062408447, "learning_rate": 9.34128947368421e-05, "loss": 0.4059, "step": 45050 }, { "epoch": 2.5227349087243813, "grad_norm": 1.4132702350616455, "learning_rate": 9.341263157894738e-05, "loss": 0.6246, "step": 45051 }, { "epoch": 2.5227909060365103, "grad_norm": 1.3800989389419556, "learning_rate": 9.341236842105264e-05, "loss": 0.6802, "step": 45052 }, { "epoch": 2.5228469033486394, "grad_norm": 1.4345715045928955, "learning_rate": 9.34121052631579e-05, "loss": 0.4364, "step": 45053 }, { "epoch": 2.5229029006607684, "grad_norm": 0.9079887270927429, "learning_rate": 9.341184210526316e-05, "loss": 0.3043, "step": 45054 }, { "epoch": 2.5229588979728974, "grad_norm": 1.3440006971359253, "learning_rate": 9.341157894736843e-05, "loss": 0.5654, "step": 45055 }, { "epoch": 2.5230148952850264, "grad_norm": 1.4064229726791382, "learning_rate": 9.341131578947369e-05, "loss": 0.5295, "step": 45056 }, { "epoch": 2.5230708925971554, "grad_norm": 1.1477257013320923, "learning_rate": 9.341105263157895e-05, "loss": 0.4029, "step": 45057 }, { "epoch": 2.5231268899092845, "grad_norm": 1.649611234664917, "learning_rate": 9.341078947368421e-05, "loss": 0.3661, "step": 45058 }, { "epoch": 2.5231828872214135, "grad_norm": 1.430660367012024, "learning_rate": 9.341052631578948e-05, "loss": 0.53, "step": 45059 }, { "epoch": 2.5232388845335425, "grad_norm": 0.9649778604507446, "learning_rate": 9.341026315789474e-05, "loss": 0.3671, "step": 45060 }, { "epoch": 2.5232948818456715, "grad_norm": 1.4579644203186035, "learning_rate": 9.341000000000002e-05, "loss": 0.4924, "step": 45061 }, { "epoch": 2.5233508791578005, "grad_norm": 1.1260203123092651, "learning_rate": 9.340973684210526e-05, "loss": 0.3724, "step": 45062 }, { "epoch": 2.5234068764699296, "grad_norm": 1.1087557077407837, "learning_rate": 9.340947368421053e-05, "loss": 0.3606, "step": 45063 }, { "epoch": 2.5234628737820586, "grad_norm": 1.1141988039016724, "learning_rate": 9.34092105263158e-05, "loss": 0.4391, "step": 45064 }, { "epoch": 2.5235188710941876, "grad_norm": 1.3191722631454468, "learning_rate": 9.340894736842107e-05, "loss": 0.4731, "step": 45065 }, { "epoch": 2.5235748684063166, "grad_norm": 1.41512131690979, "learning_rate": 9.340868421052631e-05, "loss": 0.3902, "step": 45066 }, { "epoch": 2.5236308657184456, "grad_norm": 1.687557339668274, "learning_rate": 9.340842105263157e-05, "loss": 0.3659, "step": 45067 }, { "epoch": 2.5236868630305747, "grad_norm": 1.1788462400436401, "learning_rate": 9.340815789473685e-05, "loss": 0.3276, "step": 45068 }, { "epoch": 2.5237428603427037, "grad_norm": 1.1703671216964722, "learning_rate": 9.34078947368421e-05, "loss": 0.3723, "step": 45069 }, { "epoch": 2.5237988576548327, "grad_norm": 1.241195559501648, "learning_rate": 9.340763157894738e-05, "loss": 0.422, "step": 45070 }, { "epoch": 2.5238548549669617, "grad_norm": 0.9738800525665283, "learning_rate": 9.340736842105263e-05, "loss": 0.3217, "step": 45071 }, { "epoch": 2.5239108522790907, "grad_norm": 1.2739169597625732, "learning_rate": 9.34071052631579e-05, "loss": 0.4885, "step": 45072 }, { "epoch": 2.5239668495912198, "grad_norm": 0.9963579177856445, "learning_rate": 9.340684210526316e-05, "loss": 0.3149, "step": 45073 }, { "epoch": 2.5240228469033488, "grad_norm": 0.9164132475852966, "learning_rate": 9.340657894736843e-05, "loss": 0.3975, "step": 45074 }, { "epoch": 2.524078844215478, "grad_norm": 1.0604761838912964, "learning_rate": 9.340631578947369e-05, "loss": 0.424, "step": 45075 }, { "epoch": 2.524134841527607, "grad_norm": 1.1388243436813354, "learning_rate": 9.340605263157895e-05, "loss": 0.4245, "step": 45076 }, { "epoch": 2.524190838839736, "grad_norm": 1.5361894369125366, "learning_rate": 9.340578947368421e-05, "loss": 0.5117, "step": 45077 }, { "epoch": 2.524246836151865, "grad_norm": 1.1749378442764282, "learning_rate": 9.340552631578948e-05, "loss": 0.3992, "step": 45078 }, { "epoch": 2.524302833463994, "grad_norm": 1.165282964706421, "learning_rate": 9.340526315789474e-05, "loss": 0.482, "step": 45079 }, { "epoch": 2.524358830776123, "grad_norm": 2.387977123260498, "learning_rate": 9.3405e-05, "loss": 0.3764, "step": 45080 }, { "epoch": 2.524414828088252, "grad_norm": 1.0958046913146973, "learning_rate": 9.340473684210526e-05, "loss": 0.3359, "step": 45081 }, { "epoch": 2.524470825400381, "grad_norm": 1.565321683883667, "learning_rate": 9.340447368421054e-05, "loss": 0.2915, "step": 45082 }, { "epoch": 2.52452682271251, "grad_norm": 1.3294605016708374, "learning_rate": 9.34042105263158e-05, "loss": 0.4456, "step": 45083 }, { "epoch": 2.524582820024639, "grad_norm": 1.3427128791809082, "learning_rate": 9.340394736842106e-05, "loss": 0.3722, "step": 45084 }, { "epoch": 2.524638817336768, "grad_norm": 1.0191469192504883, "learning_rate": 9.340368421052632e-05, "loss": 0.3192, "step": 45085 }, { "epoch": 2.524694814648897, "grad_norm": 1.278325080871582, "learning_rate": 9.340342105263158e-05, "loss": 0.3817, "step": 45086 }, { "epoch": 2.524750811961026, "grad_norm": 1.2492661476135254, "learning_rate": 9.340315789473685e-05, "loss": 0.3923, "step": 45087 }, { "epoch": 2.524806809273155, "grad_norm": 1.042992353439331, "learning_rate": 9.340289473684211e-05, "loss": 0.3939, "step": 45088 }, { "epoch": 2.524862806585284, "grad_norm": 1.5448247194290161, "learning_rate": 9.340263157894737e-05, "loss": 0.3493, "step": 45089 }, { "epoch": 2.524918803897413, "grad_norm": 1.5317858457565308, "learning_rate": 9.340236842105263e-05, "loss": 0.4325, "step": 45090 }, { "epoch": 2.524974801209542, "grad_norm": 1.2117873430252075, "learning_rate": 9.34021052631579e-05, "loss": 0.4355, "step": 45091 }, { "epoch": 2.525030798521671, "grad_norm": 1.4078155755996704, "learning_rate": 9.340184210526316e-05, "loss": 0.3512, "step": 45092 }, { "epoch": 2.5250867958338, "grad_norm": 1.2072077989578247, "learning_rate": 9.340157894736844e-05, "loss": 0.5043, "step": 45093 }, { "epoch": 2.525142793145929, "grad_norm": 1.0573042631149292, "learning_rate": 9.340131578947368e-05, "loss": 0.3367, "step": 45094 }, { "epoch": 2.525198790458058, "grad_norm": 0.9739222526550293, "learning_rate": 9.340105263157895e-05, "loss": 0.3845, "step": 45095 }, { "epoch": 2.525254787770187, "grad_norm": 1.2553620338439941, "learning_rate": 9.340078947368421e-05, "loss": 0.4464, "step": 45096 }, { "epoch": 2.525310785082316, "grad_norm": 1.2545933723449707, "learning_rate": 9.340052631578949e-05, "loss": 0.4025, "step": 45097 }, { "epoch": 2.5253667823944452, "grad_norm": 1.13052499294281, "learning_rate": 9.340026315789475e-05, "loss": 0.453, "step": 45098 }, { "epoch": 2.5254227797065743, "grad_norm": 1.2982596158981323, "learning_rate": 9.340000000000001e-05, "loss": 0.5166, "step": 45099 }, { "epoch": 2.5254787770187033, "grad_norm": 1.1480048894882202, "learning_rate": 9.339973684210527e-05, "loss": 0.3518, "step": 45100 }, { "epoch": 2.525534774330832, "grad_norm": 1.0768511295318604, "learning_rate": 9.339947368421053e-05, "loss": 0.3888, "step": 45101 }, { "epoch": 2.5255907716429613, "grad_norm": 1.5143009424209595, "learning_rate": 9.33992105263158e-05, "loss": 0.4153, "step": 45102 }, { "epoch": 2.52564676895509, "grad_norm": 0.9787368774414062, "learning_rate": 9.339894736842105e-05, "loss": 0.3268, "step": 45103 }, { "epoch": 2.5257027662672193, "grad_norm": 1.5540978908538818, "learning_rate": 9.339868421052632e-05, "loss": 0.4002, "step": 45104 }, { "epoch": 2.525758763579348, "grad_norm": 1.0439560413360596, "learning_rate": 9.339842105263158e-05, "loss": 0.408, "step": 45105 }, { "epoch": 2.5258147608914774, "grad_norm": 1.1765691041946411, "learning_rate": 9.339815789473685e-05, "loss": 0.436, "step": 45106 }, { "epoch": 2.525870758203606, "grad_norm": 1.1970053911209106, "learning_rate": 9.339789473684211e-05, "loss": 0.3781, "step": 45107 }, { "epoch": 2.5259267555157354, "grad_norm": 1.1379094123840332, "learning_rate": 9.339763157894737e-05, "loss": 0.4421, "step": 45108 }, { "epoch": 2.525982752827864, "grad_norm": 1.3179931640625, "learning_rate": 9.339736842105263e-05, "loss": 0.3257, "step": 45109 }, { "epoch": 2.5260387501399935, "grad_norm": 1.0589743852615356, "learning_rate": 9.33971052631579e-05, "loss": 0.2979, "step": 45110 }, { "epoch": 2.526094747452122, "grad_norm": 1.413193941116333, "learning_rate": 9.339684210526316e-05, "loss": 0.4256, "step": 45111 }, { "epoch": 2.5261507447642515, "grad_norm": 1.419501543045044, "learning_rate": 9.339657894736842e-05, "loss": 0.4531, "step": 45112 }, { "epoch": 2.52620674207638, "grad_norm": 1.2457424402236938, "learning_rate": 9.339631578947368e-05, "loss": 0.3737, "step": 45113 }, { "epoch": 2.5262627393885095, "grad_norm": 1.2079306840896606, "learning_rate": 9.339605263157896e-05, "loss": 0.3401, "step": 45114 }, { "epoch": 2.526318736700638, "grad_norm": 1.0016108751296997, "learning_rate": 9.339578947368422e-05, "loss": 0.3256, "step": 45115 }, { "epoch": 2.5263747340127676, "grad_norm": 1.2053142786026, "learning_rate": 9.339552631578948e-05, "loss": 0.4198, "step": 45116 }, { "epoch": 2.526430731324896, "grad_norm": 1.1074261665344238, "learning_rate": 9.339526315789474e-05, "loss": 0.4297, "step": 45117 }, { "epoch": 2.5264867286370256, "grad_norm": 1.0737534761428833, "learning_rate": 9.3395e-05, "loss": 0.396, "step": 45118 }, { "epoch": 2.526542725949154, "grad_norm": 1.097792148590088, "learning_rate": 9.339473684210527e-05, "loss": 0.4653, "step": 45119 }, { "epoch": 2.5265987232612837, "grad_norm": 1.238970398902893, "learning_rate": 9.339447368421053e-05, "loss": 0.4188, "step": 45120 }, { "epoch": 2.5266547205734122, "grad_norm": 1.3464114665985107, "learning_rate": 9.339421052631579e-05, "loss": 0.5059, "step": 45121 }, { "epoch": 2.5267107178855417, "grad_norm": 1.4598034620285034, "learning_rate": 9.339394736842105e-05, "loss": 0.3773, "step": 45122 }, { "epoch": 2.5267667151976703, "grad_norm": 1.0902695655822754, "learning_rate": 9.339368421052632e-05, "loss": 0.5127, "step": 45123 }, { "epoch": 2.5268227125097997, "grad_norm": 1.230257511138916, "learning_rate": 9.339342105263158e-05, "loss": 0.455, "step": 45124 }, { "epoch": 2.5268787098219283, "grad_norm": 1.2285648584365845, "learning_rate": 9.339315789473685e-05, "loss": 0.3085, "step": 45125 }, { "epoch": 2.5269347071340578, "grad_norm": 1.129361629486084, "learning_rate": 9.33928947368421e-05, "loss": 0.4479, "step": 45126 }, { "epoch": 2.5269907044461863, "grad_norm": 1.2754857540130615, "learning_rate": 9.339263157894737e-05, "loss": 0.3292, "step": 45127 }, { "epoch": 2.527046701758316, "grad_norm": 1.2990093231201172, "learning_rate": 9.339236842105263e-05, "loss": 0.3503, "step": 45128 }, { "epoch": 2.5271026990704444, "grad_norm": 1.4103270769119263, "learning_rate": 9.339210526315791e-05, "loss": 0.4367, "step": 45129 }, { "epoch": 2.527158696382574, "grad_norm": 1.131492018699646, "learning_rate": 9.339184210526317e-05, "loss": 0.3644, "step": 45130 }, { "epoch": 2.5272146936947024, "grad_norm": 1.294791579246521, "learning_rate": 9.339157894736843e-05, "loss": 0.4252, "step": 45131 }, { "epoch": 2.527270691006832, "grad_norm": 1.4098106622695923, "learning_rate": 9.339131578947369e-05, "loss": 0.3889, "step": 45132 }, { "epoch": 2.5273266883189605, "grad_norm": 1.0930731296539307, "learning_rate": 9.339105263157896e-05, "loss": 0.4311, "step": 45133 }, { "epoch": 2.52738268563109, "grad_norm": 1.3909887075424194, "learning_rate": 9.339078947368422e-05, "loss": 0.515, "step": 45134 }, { "epoch": 2.5274386829432185, "grad_norm": 1.3683050870895386, "learning_rate": 9.339052631578948e-05, "loss": 0.6142, "step": 45135 }, { "epoch": 2.527494680255348, "grad_norm": 1.382804274559021, "learning_rate": 9.339026315789474e-05, "loss": 0.461, "step": 45136 }, { "epoch": 2.5275506775674765, "grad_norm": 1.2536604404449463, "learning_rate": 9.339e-05, "loss": 0.3702, "step": 45137 }, { "epoch": 2.527606674879606, "grad_norm": 1.1602362394332886, "learning_rate": 9.338973684210527e-05, "loss": 0.3101, "step": 45138 }, { "epoch": 2.5276626721917346, "grad_norm": 0.9738381505012512, "learning_rate": 9.338947368421053e-05, "loss": 0.2954, "step": 45139 }, { "epoch": 2.527718669503864, "grad_norm": 1.1538786888122559, "learning_rate": 9.338921052631579e-05, "loss": 0.4015, "step": 45140 }, { "epoch": 2.5277746668159926, "grad_norm": 1.1494847536087036, "learning_rate": 9.338894736842105e-05, "loss": 0.3916, "step": 45141 }, { "epoch": 2.527830664128122, "grad_norm": 1.0634634494781494, "learning_rate": 9.338868421052632e-05, "loss": 0.3858, "step": 45142 }, { "epoch": 2.5278866614402506, "grad_norm": 1.1732077598571777, "learning_rate": 9.338842105263158e-05, "loss": 0.4547, "step": 45143 }, { "epoch": 2.52794265875238, "grad_norm": 1.3576562404632568, "learning_rate": 9.338815789473684e-05, "loss": 0.4196, "step": 45144 }, { "epoch": 2.5279986560645087, "grad_norm": 1.6606502532958984, "learning_rate": 9.33878947368421e-05, "loss": 0.4791, "step": 45145 }, { "epoch": 2.528054653376638, "grad_norm": 1.2869861125946045, "learning_rate": 9.338763157894738e-05, "loss": 0.3888, "step": 45146 }, { "epoch": 2.5281106506887667, "grad_norm": 0.9239200353622437, "learning_rate": 9.338736842105264e-05, "loss": 0.3941, "step": 45147 }, { "epoch": 2.528166648000896, "grad_norm": 1.1052995920181274, "learning_rate": 9.338710526315791e-05, "loss": 0.3587, "step": 45148 }, { "epoch": 2.5282226453130248, "grad_norm": 1.1069886684417725, "learning_rate": 9.338684210526316e-05, "loss": 0.3702, "step": 45149 }, { "epoch": 2.5282786426251542, "grad_norm": 4.872007369995117, "learning_rate": 9.338657894736843e-05, "loss": 0.4661, "step": 45150 }, { "epoch": 2.528334639937283, "grad_norm": 1.1149044036865234, "learning_rate": 9.338631578947369e-05, "loss": 0.3993, "step": 45151 }, { "epoch": 2.5283906372494123, "grad_norm": 1.0310707092285156, "learning_rate": 9.338605263157895e-05, "loss": 0.3898, "step": 45152 }, { "epoch": 2.528446634561541, "grad_norm": 1.0805010795593262, "learning_rate": 9.338578947368422e-05, "loss": 0.4205, "step": 45153 }, { "epoch": 2.5285026318736703, "grad_norm": 1.1837612390518188, "learning_rate": 9.338552631578947e-05, "loss": 0.4698, "step": 45154 }, { "epoch": 2.528558629185799, "grad_norm": 1.3161191940307617, "learning_rate": 9.338526315789474e-05, "loss": 0.4502, "step": 45155 }, { "epoch": 2.5286146264979283, "grad_norm": 1.1102139949798584, "learning_rate": 9.3385e-05, "loss": 0.359, "step": 45156 }, { "epoch": 2.528670623810057, "grad_norm": 1.18354070186615, "learning_rate": 9.338473684210527e-05, "loss": 0.3612, "step": 45157 }, { "epoch": 2.5287266211221864, "grad_norm": 1.3239307403564453, "learning_rate": 9.338447368421052e-05, "loss": 0.474, "step": 45158 }, { "epoch": 2.528782618434315, "grad_norm": 1.0875134468078613, "learning_rate": 9.33842105263158e-05, "loss": 0.3601, "step": 45159 }, { "epoch": 2.5288386157464444, "grad_norm": 1.3045045137405396, "learning_rate": 9.338394736842105e-05, "loss": 0.4849, "step": 45160 }, { "epoch": 2.528894613058573, "grad_norm": 1.6577404737472534, "learning_rate": 9.338368421052633e-05, "loss": 0.6871, "step": 45161 }, { "epoch": 2.5289506103707025, "grad_norm": 1.1601085662841797, "learning_rate": 9.338342105263159e-05, "loss": 0.3436, "step": 45162 }, { "epoch": 2.529006607682831, "grad_norm": 1.2022532224655151, "learning_rate": 9.338315789473685e-05, "loss": 0.4046, "step": 45163 }, { "epoch": 2.5290626049949605, "grad_norm": 1.071622371673584, "learning_rate": 9.33828947368421e-05, "loss": 0.3933, "step": 45164 }, { "epoch": 2.529118602307089, "grad_norm": 1.0860626697540283, "learning_rate": 9.338263157894738e-05, "loss": 0.3671, "step": 45165 }, { "epoch": 2.5291745996192185, "grad_norm": 1.3062572479248047, "learning_rate": 9.338236842105264e-05, "loss": 0.4975, "step": 45166 }, { "epoch": 2.529230596931347, "grad_norm": 1.188965082168579, "learning_rate": 9.33821052631579e-05, "loss": 0.3607, "step": 45167 }, { "epoch": 2.5292865942434766, "grad_norm": 1.1517301797866821, "learning_rate": 9.338184210526316e-05, "loss": 0.3599, "step": 45168 }, { "epoch": 2.529342591555605, "grad_norm": 1.232240080833435, "learning_rate": 9.338157894736843e-05, "loss": 0.4651, "step": 45169 }, { "epoch": 2.5293985888677346, "grad_norm": 1.176153540611267, "learning_rate": 9.338131578947369e-05, "loss": 0.3881, "step": 45170 }, { "epoch": 2.529454586179863, "grad_norm": 1.0519949197769165, "learning_rate": 9.338105263157895e-05, "loss": 0.3461, "step": 45171 }, { "epoch": 2.5295105834919926, "grad_norm": 1.444745421409607, "learning_rate": 9.338078947368421e-05, "loss": 0.5805, "step": 45172 }, { "epoch": 2.5295665808041212, "grad_norm": 1.1891999244689941, "learning_rate": 9.338052631578947e-05, "loss": 0.4302, "step": 45173 }, { "epoch": 2.5296225781162507, "grad_norm": 1.153363823890686, "learning_rate": 9.338026315789474e-05, "loss": 0.3532, "step": 45174 }, { "epoch": 2.5296785754283793, "grad_norm": 1.2139067649841309, "learning_rate": 9.338e-05, "loss": 0.4526, "step": 45175 }, { "epoch": 2.5297345727405087, "grad_norm": 1.1376354694366455, "learning_rate": 9.337973684210526e-05, "loss": 0.4158, "step": 45176 }, { "epoch": 2.5297905700526373, "grad_norm": 1.1992076635360718, "learning_rate": 9.337947368421052e-05, "loss": 0.5269, "step": 45177 }, { "epoch": 2.5298465673647668, "grad_norm": 1.1391613483428955, "learning_rate": 9.33792105263158e-05, "loss": 0.4084, "step": 45178 }, { "epoch": 2.5299025646768953, "grad_norm": 1.122583270072937, "learning_rate": 9.337894736842106e-05, "loss": 0.3477, "step": 45179 }, { "epoch": 2.529958561989025, "grad_norm": 1.106078863143921, "learning_rate": 9.337868421052633e-05, "loss": 0.3117, "step": 45180 }, { "epoch": 2.5300145593011534, "grad_norm": 1.2008103132247925, "learning_rate": 9.337842105263158e-05, "loss": 0.3407, "step": 45181 }, { "epoch": 2.530070556613283, "grad_norm": 1.2463667392730713, "learning_rate": 9.337815789473685e-05, "loss": 0.36, "step": 45182 }, { "epoch": 2.5301265539254114, "grad_norm": 1.1172571182250977, "learning_rate": 9.337789473684211e-05, "loss": 0.3473, "step": 45183 }, { "epoch": 2.5301825512375404, "grad_norm": 1.3725645542144775, "learning_rate": 9.337763157894738e-05, "loss": 0.4122, "step": 45184 }, { "epoch": 2.5302385485496695, "grad_norm": 0.9529877305030823, "learning_rate": 9.337736842105264e-05, "loss": 0.4425, "step": 45185 }, { "epoch": 2.5302945458617985, "grad_norm": 1.0737224817276, "learning_rate": 9.33771052631579e-05, "loss": 0.416, "step": 45186 }, { "epoch": 2.5303505431739275, "grad_norm": 1.338065266609192, "learning_rate": 9.337684210526316e-05, "loss": 0.3699, "step": 45187 }, { "epoch": 2.5304065404860565, "grad_norm": 1.1875625848770142, "learning_rate": 9.337657894736842e-05, "loss": 0.316, "step": 45188 }, { "epoch": 2.5304625377981855, "grad_norm": 1.0232272148132324, "learning_rate": 9.33763157894737e-05, "loss": 0.4504, "step": 45189 }, { "epoch": 2.5305185351103145, "grad_norm": 1.1358455419540405, "learning_rate": 9.337605263157895e-05, "loss": 0.3543, "step": 45190 }, { "epoch": 2.5305745324224436, "grad_norm": 1.235042929649353, "learning_rate": 9.337578947368421e-05, "loss": 0.3676, "step": 45191 }, { "epoch": 2.5306305297345726, "grad_norm": 1.7193940877914429, "learning_rate": 9.337552631578947e-05, "loss": 0.5011, "step": 45192 }, { "epoch": 2.5306865270467016, "grad_norm": 1.0871737003326416, "learning_rate": 9.337526315789475e-05, "loss": 0.3449, "step": 45193 }, { "epoch": 2.5307425243588306, "grad_norm": 1.1899616718292236, "learning_rate": 9.3375e-05, "loss": 0.4164, "step": 45194 }, { "epoch": 2.5307985216709596, "grad_norm": 1.2257051467895508, "learning_rate": 9.337473684210527e-05, "loss": 0.3094, "step": 45195 }, { "epoch": 2.5308545189830887, "grad_norm": 1.1478500366210938, "learning_rate": 9.337447368421053e-05, "loss": 0.3197, "step": 45196 }, { "epoch": 2.5309105162952177, "grad_norm": 1.124822974205017, "learning_rate": 9.33742105263158e-05, "loss": 0.4217, "step": 45197 }, { "epoch": 2.5309665136073467, "grad_norm": 1.260844111442566, "learning_rate": 9.337394736842106e-05, "loss": 0.3711, "step": 45198 }, { "epoch": 2.5310225109194757, "grad_norm": 1.2106292247772217, "learning_rate": 9.337368421052632e-05, "loss": 0.4464, "step": 45199 }, { "epoch": 2.5310785082316047, "grad_norm": 1.400875449180603, "learning_rate": 9.337342105263158e-05, "loss": 0.28, "step": 45200 }, { "epoch": 2.5311345055437338, "grad_norm": 1.1131926774978638, "learning_rate": 9.337315789473685e-05, "loss": 0.651, "step": 45201 }, { "epoch": 2.531190502855863, "grad_norm": 1.451791524887085, "learning_rate": 9.337289473684211e-05, "loss": 0.297, "step": 45202 }, { "epoch": 2.531246500167992, "grad_norm": 1.2021558284759521, "learning_rate": 9.337263157894738e-05, "loss": 0.4299, "step": 45203 }, { "epoch": 2.531302497480121, "grad_norm": 1.460461139678955, "learning_rate": 9.337236842105263e-05, "loss": 0.3899, "step": 45204 }, { "epoch": 2.53135849479225, "grad_norm": 1.3322944641113281, "learning_rate": 9.337210526315789e-05, "loss": 0.3108, "step": 45205 }, { "epoch": 2.531414492104379, "grad_norm": 1.264677882194519, "learning_rate": 9.337184210526316e-05, "loss": 0.354, "step": 45206 }, { "epoch": 2.531470489416508, "grad_norm": 1.2151342630386353, "learning_rate": 9.337157894736842e-05, "loss": 0.49, "step": 45207 }, { "epoch": 2.531526486728637, "grad_norm": 1.2918343544006348, "learning_rate": 9.33713157894737e-05, "loss": 0.4085, "step": 45208 }, { "epoch": 2.531582484040766, "grad_norm": 1.3364993333816528, "learning_rate": 9.337105263157894e-05, "loss": 0.3112, "step": 45209 }, { "epoch": 2.531638481352895, "grad_norm": 1.2031617164611816, "learning_rate": 9.337078947368422e-05, "loss": 0.4125, "step": 45210 }, { "epoch": 2.531694478665024, "grad_norm": 1.063269853591919, "learning_rate": 9.337052631578948e-05, "loss": 0.3567, "step": 45211 }, { "epoch": 2.531750475977153, "grad_norm": 1.306514024734497, "learning_rate": 9.337026315789475e-05, "loss": 0.4826, "step": 45212 }, { "epoch": 2.531806473289282, "grad_norm": 1.0742863416671753, "learning_rate": 9.337e-05, "loss": 0.2993, "step": 45213 }, { "epoch": 2.531862470601411, "grad_norm": 1.084478497505188, "learning_rate": 9.336973684210527e-05, "loss": 0.4825, "step": 45214 }, { "epoch": 2.53191846791354, "grad_norm": 1.1511906385421753, "learning_rate": 9.336947368421053e-05, "loss": 0.4134, "step": 45215 }, { "epoch": 2.531974465225669, "grad_norm": 1.4297105073928833, "learning_rate": 9.33692105263158e-05, "loss": 0.3668, "step": 45216 }, { "epoch": 2.532030462537798, "grad_norm": 1.131028175354004, "learning_rate": 9.336894736842106e-05, "loss": 0.341, "step": 45217 }, { "epoch": 2.532086459849927, "grad_norm": 0.9926726818084717, "learning_rate": 9.336868421052632e-05, "loss": 0.3477, "step": 45218 }, { "epoch": 2.532142457162056, "grad_norm": 1.5053209066390991, "learning_rate": 9.336842105263158e-05, "loss": 0.3904, "step": 45219 }, { "epoch": 2.532198454474185, "grad_norm": 1.4318617582321167, "learning_rate": 9.336815789473685e-05, "loss": 0.5467, "step": 45220 }, { "epoch": 2.532254451786314, "grad_norm": 1.2721027135849, "learning_rate": 9.336789473684211e-05, "loss": 0.3927, "step": 45221 }, { "epoch": 2.532310449098443, "grad_norm": 1.144276738166809, "learning_rate": 9.336763157894737e-05, "loss": 0.4427, "step": 45222 }, { "epoch": 2.532366446410572, "grad_norm": 0.998488187789917, "learning_rate": 9.336736842105263e-05, "loss": 0.4444, "step": 45223 }, { "epoch": 2.532422443722701, "grad_norm": 1.9380106925964355, "learning_rate": 9.336710526315789e-05, "loss": 0.3967, "step": 45224 }, { "epoch": 2.53247844103483, "grad_norm": 1.3981908559799194, "learning_rate": 9.336684210526317e-05, "loss": 0.34, "step": 45225 }, { "epoch": 2.5325344383469592, "grad_norm": 1.2559336423873901, "learning_rate": 9.336657894736843e-05, "loss": 0.3915, "step": 45226 }, { "epoch": 2.5325904356590883, "grad_norm": 1.497085452079773, "learning_rate": 9.336631578947369e-05, "loss": 0.4074, "step": 45227 }, { "epoch": 2.5326464329712173, "grad_norm": 1.298133373260498, "learning_rate": 9.336605263157895e-05, "loss": 0.4729, "step": 45228 }, { "epoch": 2.5327024302833463, "grad_norm": 1.0105904340744019, "learning_rate": 9.336578947368422e-05, "loss": 0.353, "step": 45229 }, { "epoch": 2.5327584275954753, "grad_norm": 1.1941754817962646, "learning_rate": 9.336552631578948e-05, "loss": 0.3754, "step": 45230 }, { "epoch": 2.5328144249076043, "grad_norm": 1.2362611293792725, "learning_rate": 9.336526315789474e-05, "loss": 0.5003, "step": 45231 }, { "epoch": 2.5328704222197334, "grad_norm": 1.0890926122665405, "learning_rate": 9.3365e-05, "loss": 0.4168, "step": 45232 }, { "epoch": 2.5329264195318624, "grad_norm": 1.344289779663086, "learning_rate": 9.336473684210527e-05, "loss": 0.4641, "step": 45233 }, { "epoch": 2.5329824168439914, "grad_norm": 1.1892579793930054, "learning_rate": 9.336447368421053e-05, "loss": 0.3389, "step": 45234 }, { "epoch": 2.5330384141561204, "grad_norm": 1.1842209100723267, "learning_rate": 9.33642105263158e-05, "loss": 0.3071, "step": 45235 }, { "epoch": 2.5330944114682494, "grad_norm": 1.0685248374938965, "learning_rate": 9.336394736842105e-05, "loss": 0.3465, "step": 45236 }, { "epoch": 2.5331504087803784, "grad_norm": 1.8892714977264404, "learning_rate": 9.336368421052632e-05, "loss": 0.6618, "step": 45237 }, { "epoch": 2.5332064060925075, "grad_norm": 0.9832775592803955, "learning_rate": 9.336342105263158e-05, "loss": 0.4002, "step": 45238 }, { "epoch": 2.5332624034046365, "grad_norm": 1.1432660818099976, "learning_rate": 9.336315789473684e-05, "loss": 0.3566, "step": 45239 }, { "epoch": 2.5333184007167655, "grad_norm": 1.0462316274642944, "learning_rate": 9.336289473684212e-05, "loss": 0.334, "step": 45240 }, { "epoch": 2.5333743980288945, "grad_norm": 1.3578897714614868, "learning_rate": 9.336263157894736e-05, "loss": 0.5442, "step": 45241 }, { "epoch": 2.5334303953410235, "grad_norm": 1.9784818887710571, "learning_rate": 9.336236842105264e-05, "loss": 0.6246, "step": 45242 }, { "epoch": 2.5334863926531526, "grad_norm": 1.3815408945083618, "learning_rate": 9.33621052631579e-05, "loss": 0.49, "step": 45243 }, { "epoch": 2.5335423899652816, "grad_norm": 1.2610872983932495, "learning_rate": 9.336184210526317e-05, "loss": 0.3834, "step": 45244 }, { "epoch": 2.5335983872774106, "grad_norm": 1.2550121545791626, "learning_rate": 9.336157894736843e-05, "loss": 0.4933, "step": 45245 }, { "epoch": 2.5336543845895396, "grad_norm": 1.279241681098938, "learning_rate": 9.336131578947369e-05, "loss": 0.2893, "step": 45246 }, { "epoch": 2.5337103819016686, "grad_norm": 1.0732061862945557, "learning_rate": 9.336105263157895e-05, "loss": 0.3774, "step": 45247 }, { "epoch": 2.5337663792137977, "grad_norm": 1.111973524093628, "learning_rate": 9.336078947368422e-05, "loss": 0.3549, "step": 45248 }, { "epoch": 2.5338223765259267, "grad_norm": 1.1784390211105347, "learning_rate": 9.336052631578948e-05, "loss": 0.4648, "step": 45249 }, { "epoch": 2.5338783738380557, "grad_norm": 1.2090518474578857, "learning_rate": 9.336026315789474e-05, "loss": 0.4341, "step": 45250 }, { "epoch": 2.5339343711501847, "grad_norm": 2.4343459606170654, "learning_rate": 9.336e-05, "loss": 0.4336, "step": 45251 }, { "epoch": 2.5339903684623137, "grad_norm": 1.2436964511871338, "learning_rate": 9.335973684210527e-05, "loss": 0.4915, "step": 45252 }, { "epoch": 2.5340463657744428, "grad_norm": 0.9614510536193848, "learning_rate": 9.335947368421053e-05, "loss": 0.2904, "step": 45253 }, { "epoch": 2.5341023630865718, "grad_norm": 1.4795252084732056, "learning_rate": 9.33592105263158e-05, "loss": 0.5812, "step": 45254 }, { "epoch": 2.534158360398701, "grad_norm": 1.1069657802581787, "learning_rate": 9.335894736842105e-05, "loss": 0.3125, "step": 45255 }, { "epoch": 2.53421435771083, "grad_norm": 1.0722161531448364, "learning_rate": 9.335868421052631e-05, "loss": 0.3727, "step": 45256 }, { "epoch": 2.534270355022959, "grad_norm": 1.054937720298767, "learning_rate": 9.335842105263159e-05, "loss": 0.2395, "step": 45257 }, { "epoch": 2.534326352335088, "grad_norm": 1.1573792695999146, "learning_rate": 9.335815789473685e-05, "loss": 0.4847, "step": 45258 }, { "epoch": 2.534382349647217, "grad_norm": 1.2153877019882202, "learning_rate": 9.33578947368421e-05, "loss": 0.3613, "step": 45259 }, { "epoch": 2.534438346959346, "grad_norm": 1.262445330619812, "learning_rate": 9.335763157894737e-05, "loss": 0.364, "step": 45260 }, { "epoch": 2.534494344271475, "grad_norm": 1.3474647998809814, "learning_rate": 9.335736842105264e-05, "loss": 0.4819, "step": 45261 }, { "epoch": 2.534550341583604, "grad_norm": 1.1217551231384277, "learning_rate": 9.33571052631579e-05, "loss": 0.4093, "step": 45262 }, { "epoch": 2.534606338895733, "grad_norm": 1.5300803184509277, "learning_rate": 9.335684210526316e-05, "loss": 0.5184, "step": 45263 }, { "epoch": 2.534662336207862, "grad_norm": 1.216748595237732, "learning_rate": 9.335657894736842e-05, "loss": 0.3831, "step": 45264 }, { "epoch": 2.534718333519991, "grad_norm": 1.0298793315887451, "learning_rate": 9.335631578947369e-05, "loss": 0.354, "step": 45265 }, { "epoch": 2.53477433083212, "grad_norm": 1.0117430686950684, "learning_rate": 9.335605263157895e-05, "loss": 0.424, "step": 45266 }, { "epoch": 2.534830328144249, "grad_norm": 1.319228172302246, "learning_rate": 9.335578947368422e-05, "loss": 0.4444, "step": 45267 }, { "epoch": 2.534886325456378, "grad_norm": 1.2935110330581665, "learning_rate": 9.335552631578947e-05, "loss": 0.6328, "step": 45268 }, { "epoch": 2.534942322768507, "grad_norm": 1.1519806385040283, "learning_rate": 9.335526315789474e-05, "loss": 0.3794, "step": 45269 }, { "epoch": 2.534998320080636, "grad_norm": 1.0636451244354248, "learning_rate": 9.3355e-05, "loss": 0.3567, "step": 45270 }, { "epoch": 2.535054317392765, "grad_norm": 1.390102505683899, "learning_rate": 9.335473684210528e-05, "loss": 0.4264, "step": 45271 }, { "epoch": 2.535110314704894, "grad_norm": 1.598382830619812, "learning_rate": 9.335447368421054e-05, "loss": 0.4391, "step": 45272 }, { "epoch": 2.535166312017023, "grad_norm": 3.8355326652526855, "learning_rate": 9.335421052631578e-05, "loss": 0.6235, "step": 45273 }, { "epoch": 2.535222309329152, "grad_norm": 1.801727294921875, "learning_rate": 9.335394736842106e-05, "loss": 0.6115, "step": 45274 }, { "epoch": 2.535278306641281, "grad_norm": 1.1578021049499512, "learning_rate": 9.335368421052632e-05, "loss": 0.358, "step": 45275 }, { "epoch": 2.53533430395341, "grad_norm": 1.3351460695266724, "learning_rate": 9.335342105263159e-05, "loss": 0.4415, "step": 45276 }, { "epoch": 2.535390301265539, "grad_norm": 1.5685043334960938, "learning_rate": 9.335315789473685e-05, "loss": 0.3998, "step": 45277 }, { "epoch": 2.5354462985776682, "grad_norm": 1.455068588256836, "learning_rate": 9.335289473684211e-05, "loss": 0.4018, "step": 45278 }, { "epoch": 2.5355022958897973, "grad_norm": 1.0965807437896729, "learning_rate": 9.335263157894737e-05, "loss": 0.3251, "step": 45279 }, { "epoch": 2.5355582932019263, "grad_norm": 1.163636565208435, "learning_rate": 9.335236842105264e-05, "loss": 0.4291, "step": 45280 }, { "epoch": 2.5356142905140553, "grad_norm": 1.3461495637893677, "learning_rate": 9.33521052631579e-05, "loss": 0.5616, "step": 45281 }, { "epoch": 2.5356702878261843, "grad_norm": 1.5996973514556885, "learning_rate": 9.335184210526316e-05, "loss": 0.6035, "step": 45282 }, { "epoch": 2.5357262851383133, "grad_norm": 1.376313328742981, "learning_rate": 9.335157894736842e-05, "loss": 0.4519, "step": 45283 }, { "epoch": 2.5357822824504423, "grad_norm": 1.0798572301864624, "learning_rate": 9.33513157894737e-05, "loss": 0.3787, "step": 45284 }, { "epoch": 2.5358382797625714, "grad_norm": 1.2517855167388916, "learning_rate": 9.335105263157895e-05, "loss": 0.3608, "step": 45285 }, { "epoch": 2.5358942770747004, "grad_norm": 1.2725660800933838, "learning_rate": 9.335078947368421e-05, "loss": 0.3656, "step": 45286 }, { "epoch": 2.5359502743868294, "grad_norm": 1.1492847204208374, "learning_rate": 9.335052631578947e-05, "loss": 0.412, "step": 45287 }, { "epoch": 2.5360062716989584, "grad_norm": 1.1615641117095947, "learning_rate": 9.335026315789475e-05, "loss": 0.4044, "step": 45288 }, { "epoch": 2.5360622690110874, "grad_norm": 1.2334094047546387, "learning_rate": 9.335e-05, "loss": 0.3515, "step": 45289 }, { "epoch": 2.5361182663232165, "grad_norm": 1.7674583196640015, "learning_rate": 9.334973684210528e-05, "loss": 0.4485, "step": 45290 }, { "epoch": 2.5361742636353455, "grad_norm": 1.076705813407898, "learning_rate": 9.334947368421053e-05, "loss": 0.3818, "step": 45291 }, { "epoch": 2.5362302609474745, "grad_norm": 1.0605013370513916, "learning_rate": 9.334921052631578e-05, "loss": 0.5019, "step": 45292 }, { "epoch": 2.5362862582596035, "grad_norm": 1.206882357597351, "learning_rate": 9.334894736842106e-05, "loss": 0.357, "step": 45293 }, { "epoch": 2.5363422555717325, "grad_norm": 1.3598541021347046, "learning_rate": 9.334868421052632e-05, "loss": 0.4731, "step": 45294 }, { "epoch": 2.5363982528838616, "grad_norm": 1.396519422531128, "learning_rate": 9.334842105263159e-05, "loss": 0.549, "step": 45295 }, { "epoch": 2.5364542501959906, "grad_norm": 0.9747830033302307, "learning_rate": 9.334815789473684e-05, "loss": 0.3111, "step": 45296 }, { "epoch": 2.5365102475081196, "grad_norm": 1.3529704809188843, "learning_rate": 9.334789473684211e-05, "loss": 0.4898, "step": 45297 }, { "epoch": 2.5365662448202486, "grad_norm": 1.397033452987671, "learning_rate": 9.334763157894737e-05, "loss": 0.4368, "step": 45298 }, { "epoch": 2.5366222421323776, "grad_norm": 1.7389013767242432, "learning_rate": 9.334736842105264e-05, "loss": 0.5111, "step": 45299 }, { "epoch": 2.5366782394445067, "grad_norm": 1.1006819009780884, "learning_rate": 9.33471052631579e-05, "loss": 0.432, "step": 45300 }, { "epoch": 2.5367342367566357, "grad_norm": 0.9172592759132385, "learning_rate": 9.334684210526316e-05, "loss": 0.2798, "step": 45301 }, { "epoch": 2.5367902340687647, "grad_norm": 1.7873425483703613, "learning_rate": 9.334657894736842e-05, "loss": 0.506, "step": 45302 }, { "epoch": 2.5368462313808937, "grad_norm": 1.2786563634872437, "learning_rate": 9.33463157894737e-05, "loss": 0.4152, "step": 45303 }, { "epoch": 2.5369022286930227, "grad_norm": 1.6191579103469849, "learning_rate": 9.334605263157896e-05, "loss": 0.3895, "step": 45304 }, { "epoch": 2.5369582260051518, "grad_norm": 1.182167887687683, "learning_rate": 9.334578947368422e-05, "loss": 0.3104, "step": 45305 }, { "epoch": 2.5370142233172808, "grad_norm": 1.30289888381958, "learning_rate": 9.334552631578948e-05, "loss": 0.4209, "step": 45306 }, { "epoch": 2.53707022062941, "grad_norm": 1.35088312625885, "learning_rate": 9.334526315789475e-05, "loss": 0.4159, "step": 45307 }, { "epoch": 2.537126217941539, "grad_norm": 1.144099473953247, "learning_rate": 9.334500000000001e-05, "loss": 0.336, "step": 45308 }, { "epoch": 2.537182215253668, "grad_norm": 1.0986301898956299, "learning_rate": 9.334473684210527e-05, "loss": 0.436, "step": 45309 }, { "epoch": 2.537238212565797, "grad_norm": 1.4699419736862183, "learning_rate": 9.334447368421053e-05, "loss": 0.4374, "step": 45310 }, { "epoch": 2.537294209877926, "grad_norm": 1.1298091411590576, "learning_rate": 9.334421052631579e-05, "loss": 0.341, "step": 45311 }, { "epoch": 2.537350207190055, "grad_norm": 1.1286169290542603, "learning_rate": 9.334394736842106e-05, "loss": 0.396, "step": 45312 }, { "epoch": 2.537406204502184, "grad_norm": 1.0614938735961914, "learning_rate": 9.334368421052632e-05, "loss": 0.3653, "step": 45313 }, { "epoch": 2.537462201814313, "grad_norm": 1.449526309967041, "learning_rate": 9.334342105263158e-05, "loss": 0.3266, "step": 45314 }, { "epoch": 2.537518199126442, "grad_norm": 1.019828200340271, "learning_rate": 9.334315789473684e-05, "loss": 0.4084, "step": 45315 }, { "epoch": 2.537574196438571, "grad_norm": 1.0963102579116821, "learning_rate": 9.334289473684211e-05, "loss": 0.294, "step": 45316 }, { "epoch": 2.5376301937507, "grad_norm": 1.0415846109390259, "learning_rate": 9.334263157894737e-05, "loss": 0.4018, "step": 45317 }, { "epoch": 2.537686191062829, "grad_norm": 1.3552651405334473, "learning_rate": 9.334236842105263e-05, "loss": 0.3391, "step": 45318 }, { "epoch": 2.537742188374958, "grad_norm": 1.0553205013275146, "learning_rate": 9.334210526315789e-05, "loss": 0.4012, "step": 45319 }, { "epoch": 2.537798185687087, "grad_norm": 1.4028840065002441, "learning_rate": 9.334184210526317e-05, "loss": 0.5818, "step": 45320 }, { "epoch": 2.537854182999216, "grad_norm": 1.035807490348816, "learning_rate": 9.334157894736843e-05, "loss": 0.3006, "step": 45321 }, { "epoch": 2.537910180311345, "grad_norm": 0.9817575216293335, "learning_rate": 9.33413157894737e-05, "loss": 0.3093, "step": 45322 }, { "epoch": 2.537966177623474, "grad_norm": 1.3165676593780518, "learning_rate": 9.334105263157894e-05, "loss": 0.509, "step": 45323 }, { "epoch": 2.538022174935603, "grad_norm": 1.2350512742996216, "learning_rate": 9.334078947368422e-05, "loss": 0.4871, "step": 45324 }, { "epoch": 2.538078172247732, "grad_norm": 1.2546879053115845, "learning_rate": 9.334052631578948e-05, "loss": 0.3917, "step": 45325 }, { "epoch": 2.538134169559861, "grad_norm": 1.2261762619018555, "learning_rate": 9.334026315789474e-05, "loss": 0.3834, "step": 45326 }, { "epoch": 2.53819016687199, "grad_norm": 1.1783288717269897, "learning_rate": 9.334000000000001e-05, "loss": 0.3996, "step": 45327 }, { "epoch": 2.538246164184119, "grad_norm": 1.246935486793518, "learning_rate": 9.333973684210526e-05, "loss": 0.3269, "step": 45328 }, { "epoch": 2.538302161496248, "grad_norm": 1.1102646589279175, "learning_rate": 9.333947368421053e-05, "loss": 0.3952, "step": 45329 }, { "epoch": 2.5383581588083772, "grad_norm": 1.8977389335632324, "learning_rate": 9.333921052631579e-05, "loss": 0.3676, "step": 45330 }, { "epoch": 2.5384141561205062, "grad_norm": 1.0553873777389526, "learning_rate": 9.333894736842106e-05, "loss": 0.3199, "step": 45331 }, { "epoch": 2.5384701534326353, "grad_norm": 1.1308423280715942, "learning_rate": 9.333868421052632e-05, "loss": 0.4141, "step": 45332 }, { "epoch": 2.5385261507447643, "grad_norm": 1.1441901922225952, "learning_rate": 9.333842105263158e-05, "loss": 0.3888, "step": 45333 }, { "epoch": 2.5385821480568933, "grad_norm": 1.1931356191635132, "learning_rate": 9.333815789473684e-05, "loss": 0.3902, "step": 45334 }, { "epoch": 2.5386381453690223, "grad_norm": 1.1282448768615723, "learning_rate": 9.333789473684212e-05, "loss": 0.3455, "step": 45335 }, { "epoch": 2.5386941426811513, "grad_norm": 1.3573325872421265, "learning_rate": 9.333763157894738e-05, "loss": 0.3766, "step": 45336 }, { "epoch": 2.5387501399932804, "grad_norm": 1.047177791595459, "learning_rate": 9.333736842105264e-05, "loss": 0.4228, "step": 45337 }, { "epoch": 2.5388061373054094, "grad_norm": 1.1990817785263062, "learning_rate": 9.33371052631579e-05, "loss": 0.408, "step": 45338 }, { "epoch": 2.5388621346175384, "grad_norm": 1.1718990802764893, "learning_rate": 9.333684210526317e-05, "loss": 0.4653, "step": 45339 }, { "epoch": 2.5389181319296674, "grad_norm": 1.2509409189224243, "learning_rate": 9.333657894736843e-05, "loss": 0.4103, "step": 45340 }, { "epoch": 2.5389741292417964, "grad_norm": 1.145836353302002, "learning_rate": 9.333631578947369e-05, "loss": 0.3526, "step": 45341 }, { "epoch": 2.5390301265539255, "grad_norm": 1.0795141458511353, "learning_rate": 9.333605263157895e-05, "loss": 0.2988, "step": 45342 }, { "epoch": 2.5390861238660545, "grad_norm": 1.1540536880493164, "learning_rate": 9.333578947368421e-05, "loss": 0.3905, "step": 45343 }, { "epoch": 2.5391421211781835, "grad_norm": 1.049790620803833, "learning_rate": 9.333552631578948e-05, "loss": 0.4103, "step": 45344 }, { "epoch": 2.5391981184903125, "grad_norm": 1.0093870162963867, "learning_rate": 9.333526315789474e-05, "loss": 0.3091, "step": 45345 }, { "epoch": 2.5392541158024415, "grad_norm": 1.8718820810317993, "learning_rate": 9.3335e-05, "loss": 0.4476, "step": 45346 }, { "epoch": 2.5393101131145706, "grad_norm": 0.969196081161499, "learning_rate": 9.333473684210526e-05, "loss": 0.3074, "step": 45347 }, { "epoch": 2.5393661104266996, "grad_norm": 1.3815734386444092, "learning_rate": 9.333447368421053e-05, "loss": 0.3527, "step": 45348 }, { "epoch": 2.5394221077388286, "grad_norm": 1.0754064321517944, "learning_rate": 9.333421052631579e-05, "loss": 0.3193, "step": 45349 }, { "epoch": 2.5394781050509576, "grad_norm": 1.1894993782043457, "learning_rate": 9.333394736842107e-05, "loss": 0.4357, "step": 45350 }, { "epoch": 2.5395341023630866, "grad_norm": 1.2097920179367065, "learning_rate": 9.333368421052631e-05, "loss": 0.4072, "step": 45351 }, { "epoch": 2.5395900996752157, "grad_norm": 1.1850552558898926, "learning_rate": 9.333342105263159e-05, "loss": 0.4633, "step": 45352 }, { "epoch": 2.5396460969873447, "grad_norm": 1.4764612913131714, "learning_rate": 9.333315789473685e-05, "loss": 0.4032, "step": 45353 }, { "epoch": 2.5397020942994737, "grad_norm": 1.0935046672821045, "learning_rate": 9.333289473684212e-05, "loss": 0.3736, "step": 45354 }, { "epoch": 2.5397580916116027, "grad_norm": 1.6461457014083862, "learning_rate": 9.333263157894738e-05, "loss": 0.3943, "step": 45355 }, { "epoch": 2.5398140889237317, "grad_norm": 1.2088853120803833, "learning_rate": 9.333236842105264e-05, "loss": 0.3462, "step": 45356 }, { "epoch": 2.5398700862358607, "grad_norm": 1.3382636308670044, "learning_rate": 9.33321052631579e-05, "loss": 0.4208, "step": 45357 }, { "epoch": 2.5399260835479898, "grad_norm": 1.1970593929290771, "learning_rate": 9.333184210526317e-05, "loss": 0.3519, "step": 45358 }, { "epoch": 2.539982080860119, "grad_norm": 1.3002225160598755, "learning_rate": 9.333157894736843e-05, "loss": 0.4825, "step": 45359 }, { "epoch": 2.540038078172248, "grad_norm": 1.05095374584198, "learning_rate": 9.333131578947368e-05, "loss": 0.4767, "step": 45360 }, { "epoch": 2.540094075484377, "grad_norm": 1.097488284111023, "learning_rate": 9.333105263157895e-05, "loss": 0.4171, "step": 45361 }, { "epoch": 2.540150072796506, "grad_norm": 1.2171536684036255, "learning_rate": 9.333078947368421e-05, "loss": 0.4819, "step": 45362 }, { "epoch": 2.540206070108635, "grad_norm": 1.0202728509902954, "learning_rate": 9.333052631578948e-05, "loss": 0.2963, "step": 45363 }, { "epoch": 2.540262067420764, "grad_norm": 1.006606101989746, "learning_rate": 9.333026315789474e-05, "loss": 0.4239, "step": 45364 }, { "epoch": 2.540318064732893, "grad_norm": 1.2740468978881836, "learning_rate": 9.333e-05, "loss": 0.4306, "step": 45365 }, { "epoch": 2.540374062045022, "grad_norm": 0.925267219543457, "learning_rate": 9.332973684210526e-05, "loss": 0.2875, "step": 45366 }, { "epoch": 2.540430059357151, "grad_norm": 1.3783094882965088, "learning_rate": 9.332947368421054e-05, "loss": 0.3498, "step": 45367 }, { "epoch": 2.54048605666928, "grad_norm": 1.3869962692260742, "learning_rate": 9.33292105263158e-05, "loss": 0.3389, "step": 45368 }, { "epoch": 2.540542053981409, "grad_norm": 1.250387191772461, "learning_rate": 9.332894736842106e-05, "loss": 0.4545, "step": 45369 }, { "epoch": 2.540598051293538, "grad_norm": 1.0628342628479004, "learning_rate": 9.332868421052631e-05, "loss": 0.3775, "step": 45370 }, { "epoch": 2.540654048605667, "grad_norm": 1.237439751625061, "learning_rate": 9.332842105263159e-05, "loss": 0.3827, "step": 45371 }, { "epoch": 2.540710045917796, "grad_norm": 1.0984333753585815, "learning_rate": 9.332815789473685e-05, "loss": 0.4277, "step": 45372 }, { "epoch": 2.540766043229925, "grad_norm": 1.289589524269104, "learning_rate": 9.332789473684211e-05, "loss": 0.4805, "step": 45373 }, { "epoch": 2.540822040542054, "grad_norm": 1.5857789516448975, "learning_rate": 9.332763157894737e-05, "loss": 0.5066, "step": 45374 }, { "epoch": 2.540878037854183, "grad_norm": 1.4003747701644897, "learning_rate": 9.332736842105264e-05, "loss": 0.4243, "step": 45375 }, { "epoch": 2.540934035166312, "grad_norm": 1.0537551641464233, "learning_rate": 9.33271052631579e-05, "loss": 0.3499, "step": 45376 }, { "epoch": 2.540990032478441, "grad_norm": 1.1186370849609375, "learning_rate": 9.332684210526316e-05, "loss": 0.3504, "step": 45377 }, { "epoch": 2.54104602979057, "grad_norm": 1.3935378789901733, "learning_rate": 9.332657894736842e-05, "loss": 0.5814, "step": 45378 }, { "epoch": 2.541102027102699, "grad_norm": 1.0203542709350586, "learning_rate": 9.332631578947368e-05, "loss": 0.4926, "step": 45379 }, { "epoch": 2.541158024414828, "grad_norm": 1.0868418216705322, "learning_rate": 9.332605263157895e-05, "loss": 0.2937, "step": 45380 }, { "epoch": 2.541214021726957, "grad_norm": 1.2829493284225464, "learning_rate": 9.332578947368421e-05, "loss": 0.5876, "step": 45381 }, { "epoch": 2.5412700190390862, "grad_norm": 1.0392296314239502, "learning_rate": 9.332552631578949e-05, "loss": 0.3307, "step": 45382 }, { "epoch": 2.5413260163512152, "grad_norm": 1.1340253353118896, "learning_rate": 9.332526315789473e-05, "loss": 0.4648, "step": 45383 }, { "epoch": 2.5413820136633443, "grad_norm": 1.3888187408447266, "learning_rate": 9.3325e-05, "loss": 0.3755, "step": 45384 }, { "epoch": 2.5414380109754733, "grad_norm": 1.1885852813720703, "learning_rate": 9.332473684210526e-05, "loss": 0.5954, "step": 45385 }, { "epoch": 2.5414940082876023, "grad_norm": 0.9921424984931946, "learning_rate": 9.332447368421054e-05, "loss": 0.4178, "step": 45386 }, { "epoch": 2.5415500055997313, "grad_norm": 0.9037467241287231, "learning_rate": 9.33242105263158e-05, "loss": 0.3812, "step": 45387 }, { "epoch": 2.5416060029118603, "grad_norm": 1.3360651731491089, "learning_rate": 9.332394736842106e-05, "loss": 0.3397, "step": 45388 }, { "epoch": 2.5416620002239894, "grad_norm": 1.2790435552597046, "learning_rate": 9.332368421052632e-05, "loss": 0.3841, "step": 45389 }, { "epoch": 2.5417179975361184, "grad_norm": 1.0188674926757812, "learning_rate": 9.332342105263159e-05, "loss": 0.3353, "step": 45390 }, { "epoch": 2.5417739948482474, "grad_norm": 1.1912813186645508, "learning_rate": 9.332315789473685e-05, "loss": 0.3891, "step": 45391 }, { "epoch": 2.5418299921603764, "grad_norm": 1.2114315032958984, "learning_rate": 9.332289473684211e-05, "loss": 0.3174, "step": 45392 }, { "epoch": 2.5418859894725054, "grad_norm": 1.1330804824829102, "learning_rate": 9.332263157894737e-05, "loss": 0.4282, "step": 45393 }, { "epoch": 2.5419419867846345, "grad_norm": 1.0338659286499023, "learning_rate": 9.332236842105263e-05, "loss": 0.4529, "step": 45394 }, { "epoch": 2.5419979840967635, "grad_norm": 1.4313313961029053, "learning_rate": 9.33221052631579e-05, "loss": 0.3806, "step": 45395 }, { "epoch": 2.5420539814088925, "grad_norm": 1.3672512769699097, "learning_rate": 9.332184210526316e-05, "loss": 0.3381, "step": 45396 }, { "epoch": 2.5421099787210215, "grad_norm": 1.4325401782989502, "learning_rate": 9.332157894736842e-05, "loss": 0.5508, "step": 45397 }, { "epoch": 2.5421659760331505, "grad_norm": 1.0843567848205566, "learning_rate": 9.332131578947368e-05, "loss": 0.3739, "step": 45398 }, { "epoch": 2.5422219733452796, "grad_norm": 1.3448868989944458, "learning_rate": 9.332105263157896e-05, "loss": 0.4028, "step": 45399 }, { "epoch": 2.5422779706574086, "grad_norm": 0.9155479073524475, "learning_rate": 9.332078947368422e-05, "loss": 0.313, "step": 45400 }, { "epoch": 2.5423339679695376, "grad_norm": 1.1040098667144775, "learning_rate": 9.332052631578947e-05, "loss": 0.6229, "step": 45401 }, { "epoch": 2.5423899652816666, "grad_norm": 1.0508986711502075, "learning_rate": 9.332026315789473e-05, "loss": 0.3536, "step": 45402 }, { "epoch": 2.5424459625937956, "grad_norm": 1.090962529182434, "learning_rate": 9.332000000000001e-05, "loss": 0.3271, "step": 45403 }, { "epoch": 2.5425019599059246, "grad_norm": 1.3420130014419556, "learning_rate": 9.331973684210527e-05, "loss": 0.3727, "step": 45404 }, { "epoch": 2.5425579572180537, "grad_norm": 1.089132308959961, "learning_rate": 9.331947368421054e-05, "loss": 0.346, "step": 45405 }, { "epoch": 2.5426139545301827, "grad_norm": 1.1936652660369873, "learning_rate": 9.331921052631579e-05, "loss": 0.3804, "step": 45406 }, { "epoch": 2.5426699518423117, "grad_norm": 1.0668883323669434, "learning_rate": 9.331894736842106e-05, "loss": 0.3809, "step": 45407 }, { "epoch": 2.5427259491544407, "grad_norm": 1.3738937377929688, "learning_rate": 9.331868421052632e-05, "loss": 0.4322, "step": 45408 }, { "epoch": 2.5427819464665697, "grad_norm": 1.1574946641921997, "learning_rate": 9.33184210526316e-05, "loss": 0.3818, "step": 45409 }, { "epoch": 2.5428379437786988, "grad_norm": 1.4381228685379028, "learning_rate": 9.331815789473685e-05, "loss": 0.4719, "step": 45410 }, { "epoch": 2.542893941090828, "grad_norm": 1.4009007215499878, "learning_rate": 9.331789473684211e-05, "loss": 0.4715, "step": 45411 }, { "epoch": 2.542949938402957, "grad_norm": 1.1210962533950806, "learning_rate": 9.331763157894737e-05, "loss": 0.309, "step": 45412 }, { "epoch": 2.543005935715086, "grad_norm": 1.0451971292495728, "learning_rate": 9.331736842105263e-05, "loss": 0.3305, "step": 45413 }, { "epoch": 2.543061933027215, "grad_norm": 1.4638595581054688, "learning_rate": 9.33171052631579e-05, "loss": 0.4155, "step": 45414 }, { "epoch": 2.543117930339344, "grad_norm": 1.1173802614212036, "learning_rate": 9.331684210526315e-05, "loss": 0.5106, "step": 45415 }, { "epoch": 2.543173927651473, "grad_norm": 1.135046124458313, "learning_rate": 9.331657894736842e-05, "loss": 0.3677, "step": 45416 }, { "epoch": 2.543229924963602, "grad_norm": 1.26047945022583, "learning_rate": 9.331631578947368e-05, "loss": 0.3525, "step": 45417 }, { "epoch": 2.543285922275731, "grad_norm": 1.547135591506958, "learning_rate": 9.331605263157896e-05, "loss": 0.5662, "step": 45418 }, { "epoch": 2.54334191958786, "grad_norm": 1.1728248596191406, "learning_rate": 9.331578947368422e-05, "loss": 0.3539, "step": 45419 }, { "epoch": 2.543397916899989, "grad_norm": 1.2535096406936646, "learning_rate": 9.331552631578948e-05, "loss": 0.4595, "step": 45420 }, { "epoch": 2.543453914212118, "grad_norm": 1.23871648311615, "learning_rate": 9.331526315789474e-05, "loss": 0.3997, "step": 45421 }, { "epoch": 2.543509911524247, "grad_norm": 1.249550700187683, "learning_rate": 9.331500000000001e-05, "loss": 0.3602, "step": 45422 }, { "epoch": 2.543565908836376, "grad_norm": 1.2181986570358276, "learning_rate": 9.331473684210527e-05, "loss": 0.3148, "step": 45423 }, { "epoch": 2.543621906148505, "grad_norm": 1.0738900899887085, "learning_rate": 9.331447368421053e-05, "loss": 0.3604, "step": 45424 }, { "epoch": 2.543677903460634, "grad_norm": 1.245262861251831, "learning_rate": 9.331421052631579e-05, "loss": 0.4391, "step": 45425 }, { "epoch": 2.543733900772763, "grad_norm": 1.3473206758499146, "learning_rate": 9.331394736842106e-05, "loss": 0.5701, "step": 45426 }, { "epoch": 2.543789898084892, "grad_norm": 1.2382371425628662, "learning_rate": 9.331368421052632e-05, "loss": 0.3456, "step": 45427 }, { "epoch": 2.543845895397021, "grad_norm": 1.2661610841751099, "learning_rate": 9.331342105263158e-05, "loss": 0.4297, "step": 45428 }, { "epoch": 2.54390189270915, "grad_norm": 1.0677636861801147, "learning_rate": 9.331315789473684e-05, "loss": 0.3873, "step": 45429 }, { "epoch": 2.543957890021279, "grad_norm": 1.1617891788482666, "learning_rate": 9.33128947368421e-05, "loss": 0.3592, "step": 45430 }, { "epoch": 2.544013887333408, "grad_norm": 1.2983496189117432, "learning_rate": 9.331263157894738e-05, "loss": 0.4615, "step": 45431 }, { "epoch": 2.5440698846455367, "grad_norm": 1.3749406337738037, "learning_rate": 9.331236842105263e-05, "loss": 0.5818, "step": 45432 }, { "epoch": 2.544125881957666, "grad_norm": 1.2134195566177368, "learning_rate": 9.33121052631579e-05, "loss": 0.4888, "step": 45433 }, { "epoch": 2.5441818792697948, "grad_norm": 0.9438391923904419, "learning_rate": 9.331184210526315e-05, "loss": 0.3715, "step": 45434 }, { "epoch": 2.5442378765819242, "grad_norm": 1.4753665924072266, "learning_rate": 9.331157894736843e-05, "loss": 0.4589, "step": 45435 }, { "epoch": 2.544293873894053, "grad_norm": 1.2656809091567993, "learning_rate": 9.331131578947369e-05, "loss": 0.4044, "step": 45436 }, { "epoch": 2.5443498712061823, "grad_norm": 1.1619596481323242, "learning_rate": 9.331105263157896e-05, "loss": 0.3703, "step": 45437 }, { "epoch": 2.544405868518311, "grad_norm": 1.2375251054763794, "learning_rate": 9.33107894736842e-05, "loss": 0.4455, "step": 45438 }, { "epoch": 2.5444618658304403, "grad_norm": 1.629641056060791, "learning_rate": 9.331052631578948e-05, "loss": 0.548, "step": 45439 }, { "epoch": 2.544517863142569, "grad_norm": 1.3591606616973877, "learning_rate": 9.331026315789474e-05, "loss": 0.4843, "step": 45440 }, { "epoch": 2.5445738604546984, "grad_norm": 1.1018253564834595, "learning_rate": 9.331000000000001e-05, "loss": 0.41, "step": 45441 }, { "epoch": 2.544629857766827, "grad_norm": 1.372522234916687, "learning_rate": 9.330973684210527e-05, "loss": 0.5011, "step": 45442 }, { "epoch": 2.5446858550789564, "grad_norm": 2.0608558654785156, "learning_rate": 9.330947368421053e-05, "loss": 0.4013, "step": 45443 }, { "epoch": 2.544741852391085, "grad_norm": 1.164616584777832, "learning_rate": 9.330921052631579e-05, "loss": 0.5258, "step": 45444 }, { "epoch": 2.5447978497032144, "grad_norm": 1.1393369436264038, "learning_rate": 9.330894736842107e-05, "loss": 0.3221, "step": 45445 }, { "epoch": 2.544853847015343, "grad_norm": 1.0013407468795776, "learning_rate": 9.330868421052633e-05, "loss": 0.4308, "step": 45446 }, { "epoch": 2.5449098443274725, "grad_norm": 1.4113047122955322, "learning_rate": 9.330842105263158e-05, "loss": 0.5197, "step": 45447 }, { "epoch": 2.544965841639601, "grad_norm": 1.1284620761871338, "learning_rate": 9.330815789473684e-05, "loss": 0.4599, "step": 45448 }, { "epoch": 2.5450218389517305, "grad_norm": 1.1629842519760132, "learning_rate": 9.33078947368421e-05, "loss": 0.3811, "step": 45449 }, { "epoch": 2.545077836263859, "grad_norm": 0.9989349246025085, "learning_rate": 9.330763157894738e-05, "loss": 0.2139, "step": 45450 }, { "epoch": 2.5451338335759885, "grad_norm": 1.1623722314834595, "learning_rate": 9.330736842105264e-05, "loss": 0.4573, "step": 45451 }, { "epoch": 2.545189830888117, "grad_norm": 1.3423378467559814, "learning_rate": 9.33071052631579e-05, "loss": 0.39, "step": 45452 }, { "epoch": 2.5452458282002466, "grad_norm": 1.486692190170288, "learning_rate": 9.330684210526316e-05, "loss": 0.6149, "step": 45453 }, { "epoch": 2.545301825512375, "grad_norm": 1.2401117086410522, "learning_rate": 9.330657894736843e-05, "loss": 0.4917, "step": 45454 }, { "epoch": 2.5453578228245046, "grad_norm": 1.0880720615386963, "learning_rate": 9.330631578947369e-05, "loss": 0.4575, "step": 45455 }, { "epoch": 2.545413820136633, "grad_norm": 1.3601131439208984, "learning_rate": 9.330605263157895e-05, "loss": 0.5534, "step": 45456 }, { "epoch": 2.5454698174487627, "grad_norm": 1.218003749847412, "learning_rate": 9.330578947368421e-05, "loss": 0.372, "step": 45457 }, { "epoch": 2.5455258147608912, "grad_norm": 1.161191701889038, "learning_rate": 9.330552631578948e-05, "loss": 0.352, "step": 45458 }, { "epoch": 2.5455818120730207, "grad_norm": 1.1991173028945923, "learning_rate": 9.330526315789474e-05, "loss": 0.4329, "step": 45459 }, { "epoch": 2.5456378093851493, "grad_norm": 1.2213611602783203, "learning_rate": 9.330500000000002e-05, "loss": 0.3869, "step": 45460 }, { "epoch": 2.5456938066972787, "grad_norm": 1.490718126296997, "learning_rate": 9.330473684210526e-05, "loss": 0.4245, "step": 45461 }, { "epoch": 2.5457498040094073, "grad_norm": 1.0505620241165161, "learning_rate": 9.330447368421053e-05, "loss": 0.3715, "step": 45462 }, { "epoch": 2.5458058013215368, "grad_norm": 1.304104208946228, "learning_rate": 9.33042105263158e-05, "loss": 0.4309, "step": 45463 }, { "epoch": 2.5458617986336654, "grad_norm": 1.1874130964279175, "learning_rate": 9.330394736842105e-05, "loss": 0.6305, "step": 45464 }, { "epoch": 2.545917795945795, "grad_norm": 1.0002580881118774, "learning_rate": 9.330368421052631e-05, "loss": 0.3999, "step": 45465 }, { "epoch": 2.5459737932579234, "grad_norm": 1.0980788469314575, "learning_rate": 9.330342105263157e-05, "loss": 0.3493, "step": 45466 }, { "epoch": 2.546029790570053, "grad_norm": 1.0435987710952759, "learning_rate": 9.330315789473685e-05, "loss": 0.3904, "step": 45467 }, { "epoch": 2.5460857878821814, "grad_norm": 1.568314552307129, "learning_rate": 9.330289473684211e-05, "loss": 0.383, "step": 45468 }, { "epoch": 2.546141785194311, "grad_norm": 1.3833121061325073, "learning_rate": 9.330263157894738e-05, "loss": 0.4097, "step": 45469 }, { "epoch": 2.5461977825064395, "grad_norm": 1.0114573240280151, "learning_rate": 9.330236842105263e-05, "loss": 0.3791, "step": 45470 }, { "epoch": 2.546253779818569, "grad_norm": 0.9822787046432495, "learning_rate": 9.33021052631579e-05, "loss": 0.3211, "step": 45471 }, { "epoch": 2.5463097771306975, "grad_norm": 1.2400672435760498, "learning_rate": 9.330184210526316e-05, "loss": 0.5172, "step": 45472 }, { "epoch": 2.546365774442827, "grad_norm": 1.0592838525772095, "learning_rate": 9.330157894736843e-05, "loss": 0.3034, "step": 45473 }, { "epoch": 2.5464217717549555, "grad_norm": 1.0443229675292969, "learning_rate": 9.330131578947369e-05, "loss": 0.4256, "step": 45474 }, { "epoch": 2.546477769067085, "grad_norm": 1.2022860050201416, "learning_rate": 9.330105263157895e-05, "loss": 0.3589, "step": 45475 }, { "epoch": 2.5465337663792136, "grad_norm": 1.1963139772415161, "learning_rate": 9.330078947368421e-05, "loss": 0.4322, "step": 45476 }, { "epoch": 2.546589763691343, "grad_norm": 1.0549054145812988, "learning_rate": 9.330052631578949e-05, "loss": 0.3288, "step": 45477 }, { "epoch": 2.5466457610034716, "grad_norm": 1.3584388494491577, "learning_rate": 9.330026315789474e-05, "loss": 0.4263, "step": 45478 }, { "epoch": 2.546701758315601, "grad_norm": 1.3955621719360352, "learning_rate": 9.33e-05, "loss": 0.4291, "step": 45479 }, { "epoch": 2.5467577556277297, "grad_norm": 1.454166293144226, "learning_rate": 9.329973684210526e-05, "loss": 0.5663, "step": 45480 }, { "epoch": 2.546813752939859, "grad_norm": 1.1491954326629639, "learning_rate": 9.329947368421052e-05, "loss": 0.398, "step": 45481 }, { "epoch": 2.5468697502519877, "grad_norm": 1.2691184282302856, "learning_rate": 9.32992105263158e-05, "loss": 0.3803, "step": 45482 }, { "epoch": 2.546925747564117, "grad_norm": 1.129529595375061, "learning_rate": 9.329894736842106e-05, "loss": 0.3835, "step": 45483 }, { "epoch": 2.5469817448762457, "grad_norm": 1.1957905292510986, "learning_rate": 9.329868421052632e-05, "loss": 0.4623, "step": 45484 }, { "epoch": 2.547037742188375, "grad_norm": 1.1748223304748535, "learning_rate": 9.329842105263158e-05, "loss": 0.4202, "step": 45485 }, { "epoch": 2.5470937395005038, "grad_norm": 1.213054895401001, "learning_rate": 9.329815789473685e-05, "loss": 0.4349, "step": 45486 }, { "epoch": 2.5471497368126332, "grad_norm": 1.1977232694625854, "learning_rate": 9.329789473684211e-05, "loss": 0.316, "step": 45487 }, { "epoch": 2.547205734124762, "grad_norm": 1.4981600046157837, "learning_rate": 9.329763157894737e-05, "loss": 0.3926, "step": 45488 }, { "epoch": 2.5472617314368913, "grad_norm": 1.3002710342407227, "learning_rate": 9.329736842105263e-05, "loss": 0.4907, "step": 45489 }, { "epoch": 2.54731772874902, "grad_norm": 1.3732635974884033, "learning_rate": 9.32971052631579e-05, "loss": 0.5696, "step": 45490 }, { "epoch": 2.5473737260611493, "grad_norm": 1.5697863101959229, "learning_rate": 9.329684210526316e-05, "loss": 0.362, "step": 45491 }, { "epoch": 2.547429723373278, "grad_norm": 1.3127086162567139, "learning_rate": 9.329657894736844e-05, "loss": 0.3902, "step": 45492 }, { "epoch": 2.5474857206854074, "grad_norm": 1.1133718490600586, "learning_rate": 9.329631578947368e-05, "loss": 0.4, "step": 45493 }, { "epoch": 2.547541717997536, "grad_norm": 1.1892738342285156, "learning_rate": 9.329605263157895e-05, "loss": 0.3888, "step": 45494 }, { "epoch": 2.5475977153096654, "grad_norm": 1.3525183200836182, "learning_rate": 9.329578947368421e-05, "loss": 0.4122, "step": 45495 }, { "epoch": 2.547653712621794, "grad_norm": 1.0153789520263672, "learning_rate": 9.329552631578949e-05, "loss": 0.4387, "step": 45496 }, { "epoch": 2.5477097099339234, "grad_norm": 1.2033063173294067, "learning_rate": 9.329526315789475e-05, "loss": 0.456, "step": 45497 }, { "epoch": 2.547765707246052, "grad_norm": 1.0247381925582886, "learning_rate": 9.3295e-05, "loss": 0.4086, "step": 45498 }, { "epoch": 2.5478217045581815, "grad_norm": 1.1156585216522217, "learning_rate": 9.329473684210527e-05, "loss": 0.4221, "step": 45499 }, { "epoch": 2.54787770187031, "grad_norm": 1.5074691772460938, "learning_rate": 9.329447368421053e-05, "loss": 0.3805, "step": 45500 }, { "epoch": 2.5479336991824395, "grad_norm": 1.043045997619629, "learning_rate": 9.32942105263158e-05, "loss": 0.3723, "step": 45501 }, { "epoch": 2.547989696494568, "grad_norm": 1.202847957611084, "learning_rate": 9.329394736842106e-05, "loss": 0.3136, "step": 45502 }, { "epoch": 2.5480456938066975, "grad_norm": 1.126823902130127, "learning_rate": 9.329368421052632e-05, "loss": 0.2795, "step": 45503 }, { "epoch": 2.548101691118826, "grad_norm": 1.3956373929977417, "learning_rate": 9.329342105263158e-05, "loss": 0.3798, "step": 45504 }, { "epoch": 2.5481576884309556, "grad_norm": 1.2856290340423584, "learning_rate": 9.329315789473685e-05, "loss": 0.4415, "step": 45505 }, { "epoch": 2.548213685743084, "grad_norm": 1.1042677164077759, "learning_rate": 9.329289473684211e-05, "loss": 0.3779, "step": 45506 }, { "epoch": 2.5482696830552136, "grad_norm": 1.2296535968780518, "learning_rate": 9.329263157894737e-05, "loss": 0.4062, "step": 45507 }, { "epoch": 2.548325680367342, "grad_norm": 1.1364452838897705, "learning_rate": 9.329236842105263e-05, "loss": 0.4145, "step": 45508 }, { "epoch": 2.5483816776794717, "grad_norm": 1.105017066001892, "learning_rate": 9.32921052631579e-05, "loss": 0.374, "step": 45509 }, { "epoch": 2.5484376749916002, "grad_norm": 1.396712064743042, "learning_rate": 9.329184210526316e-05, "loss": 0.4227, "step": 45510 }, { "epoch": 2.5484936723037297, "grad_norm": 1.1925255060195923, "learning_rate": 9.329157894736842e-05, "loss": 0.4084, "step": 45511 }, { "epoch": 2.5485496696158583, "grad_norm": 1.0134319067001343, "learning_rate": 9.329131578947368e-05, "loss": 0.2773, "step": 45512 }, { "epoch": 2.5486056669279877, "grad_norm": 1.4131910800933838, "learning_rate": 9.329105263157896e-05, "loss": 0.4967, "step": 45513 }, { "epoch": 2.5486616642401163, "grad_norm": 1.197297215461731, "learning_rate": 9.329078947368422e-05, "loss": 0.4896, "step": 45514 }, { "epoch": 2.5487176615522453, "grad_norm": 1.1274511814117432, "learning_rate": 9.329052631578948e-05, "loss": 0.451, "step": 45515 }, { "epoch": 2.5487736588643743, "grad_norm": 1.1043752431869507, "learning_rate": 9.329026315789474e-05, "loss": 0.3595, "step": 45516 }, { "epoch": 2.5488296561765034, "grad_norm": 1.386168122291565, "learning_rate": 9.329e-05, "loss": 0.4609, "step": 45517 }, { "epoch": 2.5488856534886324, "grad_norm": 1.2402522563934326, "learning_rate": 9.328973684210527e-05, "loss": 0.3086, "step": 45518 }, { "epoch": 2.5489416508007614, "grad_norm": 1.0370854139328003, "learning_rate": 9.328947368421053e-05, "loss": 0.3099, "step": 45519 }, { "epoch": 2.5489976481128904, "grad_norm": 3.557767152786255, "learning_rate": 9.328921052631579e-05, "loss": 0.4006, "step": 45520 }, { "epoch": 2.5490536454250194, "grad_norm": 1.1421376466751099, "learning_rate": 9.328894736842105e-05, "loss": 0.3455, "step": 45521 }, { "epoch": 2.5491096427371485, "grad_norm": 1.2116714715957642, "learning_rate": 9.328868421052632e-05, "loss": 0.4625, "step": 45522 }, { "epoch": 2.5491656400492775, "grad_norm": 1.2807080745697021, "learning_rate": 9.328842105263158e-05, "loss": 0.3261, "step": 45523 }, { "epoch": 2.5492216373614065, "grad_norm": 1.1547012329101562, "learning_rate": 9.328815789473685e-05, "loss": 0.4012, "step": 45524 }, { "epoch": 2.5492776346735355, "grad_norm": 1.322356104850769, "learning_rate": 9.32878947368421e-05, "loss": 0.4321, "step": 45525 }, { "epoch": 2.5493336319856645, "grad_norm": 1.0448459386825562, "learning_rate": 9.328763157894737e-05, "loss": 0.3675, "step": 45526 }, { "epoch": 2.5493896292977936, "grad_norm": 1.3963772058486938, "learning_rate": 9.328736842105263e-05, "loss": 0.6169, "step": 45527 }, { "epoch": 2.5494456266099226, "grad_norm": 1.1270365715026855, "learning_rate": 9.328710526315791e-05, "loss": 0.3338, "step": 45528 }, { "epoch": 2.5495016239220516, "grad_norm": 1.1276499032974243, "learning_rate": 9.328684210526317e-05, "loss": 0.3833, "step": 45529 }, { "epoch": 2.5495576212341806, "grad_norm": 1.1946803331375122, "learning_rate": 9.328657894736843e-05, "loss": 0.4669, "step": 45530 }, { "epoch": 2.5496136185463096, "grad_norm": 1.0077214241027832, "learning_rate": 9.328631578947369e-05, "loss": 0.3313, "step": 45531 }, { "epoch": 2.5496696158584387, "grad_norm": 1.118607521057129, "learning_rate": 9.328605263157896e-05, "loss": 0.4058, "step": 45532 }, { "epoch": 2.5497256131705677, "grad_norm": 1.1313105821609497, "learning_rate": 9.328578947368422e-05, "loss": 0.3715, "step": 45533 }, { "epoch": 2.5497816104826967, "grad_norm": 1.1950281858444214, "learning_rate": 9.328552631578948e-05, "loss": 0.3671, "step": 45534 }, { "epoch": 2.5498376077948257, "grad_norm": 0.9588315486907959, "learning_rate": 9.328526315789474e-05, "loss": 0.28, "step": 45535 }, { "epoch": 2.5498936051069547, "grad_norm": 1.04447340965271, "learning_rate": 9.3285e-05, "loss": 0.3435, "step": 45536 }, { "epoch": 2.5499496024190837, "grad_norm": 1.1422456502914429, "learning_rate": 9.328473684210527e-05, "loss": 0.3618, "step": 45537 }, { "epoch": 2.5500055997312128, "grad_norm": 1.268076777458191, "learning_rate": 9.328447368421053e-05, "loss": 0.4105, "step": 45538 }, { "epoch": 2.550061597043342, "grad_norm": 1.2519358396530151, "learning_rate": 9.328421052631579e-05, "loss": 0.382, "step": 45539 }, { "epoch": 2.550117594355471, "grad_norm": 1.3188177347183228, "learning_rate": 9.328394736842105e-05, "loss": 0.2919, "step": 45540 }, { "epoch": 2.5501735916676, "grad_norm": 1.4831511974334717, "learning_rate": 9.328368421052632e-05, "loss": 0.4791, "step": 45541 }, { "epoch": 2.550229588979729, "grad_norm": 1.0797375440597534, "learning_rate": 9.328342105263158e-05, "loss": 0.4061, "step": 45542 }, { "epoch": 2.550285586291858, "grad_norm": 1.144763469696045, "learning_rate": 9.328315789473684e-05, "loss": 0.424, "step": 45543 }, { "epoch": 2.550341583603987, "grad_norm": 1.2958213090896606, "learning_rate": 9.32828947368421e-05, "loss": 0.3746, "step": 45544 }, { "epoch": 2.550397580916116, "grad_norm": 1.3364577293395996, "learning_rate": 9.328263157894738e-05, "loss": 0.4446, "step": 45545 }, { "epoch": 2.550453578228245, "grad_norm": 0.9293643832206726, "learning_rate": 9.328236842105264e-05, "loss": 0.4455, "step": 45546 }, { "epoch": 2.550509575540374, "grad_norm": 1.1867369413375854, "learning_rate": 9.328210526315791e-05, "loss": 0.5685, "step": 45547 }, { "epoch": 2.550565572852503, "grad_norm": 1.7388741970062256, "learning_rate": 9.328184210526316e-05, "loss": 0.4748, "step": 45548 }, { "epoch": 2.550621570164632, "grad_norm": 1.1298727989196777, "learning_rate": 9.328157894736843e-05, "loss": 0.369, "step": 45549 }, { "epoch": 2.550677567476761, "grad_norm": 1.265620231628418, "learning_rate": 9.328131578947369e-05, "loss": 0.3635, "step": 45550 }, { "epoch": 2.55073356478889, "grad_norm": 1.1824848651885986, "learning_rate": 9.328105263157895e-05, "loss": 0.5387, "step": 45551 }, { "epoch": 2.550789562101019, "grad_norm": 1.5040555000305176, "learning_rate": 9.328078947368422e-05, "loss": 0.591, "step": 45552 }, { "epoch": 2.550845559413148, "grad_norm": 0.9612807035446167, "learning_rate": 9.328052631578947e-05, "loss": 0.3174, "step": 45553 }, { "epoch": 2.550901556725277, "grad_norm": 1.1754109859466553, "learning_rate": 9.328026315789474e-05, "loss": 0.4166, "step": 45554 }, { "epoch": 2.550957554037406, "grad_norm": 1.1892164945602417, "learning_rate": 9.328e-05, "loss": 0.5019, "step": 45555 }, { "epoch": 2.551013551349535, "grad_norm": 0.9745883345603943, "learning_rate": 9.327973684210527e-05, "loss": 0.4013, "step": 45556 }, { "epoch": 2.551069548661664, "grad_norm": 0.9784499406814575, "learning_rate": 9.327947368421053e-05, "loss": 0.3361, "step": 45557 }, { "epoch": 2.551125545973793, "grad_norm": 0.9853188991546631, "learning_rate": 9.32792105263158e-05, "loss": 0.333, "step": 45558 }, { "epoch": 2.551181543285922, "grad_norm": 1.1957056522369385, "learning_rate": 9.327894736842105e-05, "loss": 0.4235, "step": 45559 }, { "epoch": 2.551237540598051, "grad_norm": 1.2127811908721924, "learning_rate": 9.327868421052633e-05, "loss": 0.3969, "step": 45560 }, { "epoch": 2.55129353791018, "grad_norm": 1.0176527500152588, "learning_rate": 9.327842105263159e-05, "loss": 0.3465, "step": 45561 }, { "epoch": 2.5513495352223092, "grad_norm": 1.3686647415161133, "learning_rate": 9.327815789473685e-05, "loss": 0.4607, "step": 45562 }, { "epoch": 2.5514055325344382, "grad_norm": 1.4335973262786865, "learning_rate": 9.32778947368421e-05, "loss": 0.3478, "step": 45563 }, { "epoch": 2.5514615298465673, "grad_norm": 1.0871343612670898, "learning_rate": 9.327763157894738e-05, "loss": 0.3685, "step": 45564 }, { "epoch": 2.5515175271586963, "grad_norm": 1.2457151412963867, "learning_rate": 9.327736842105264e-05, "loss": 0.4463, "step": 45565 }, { "epoch": 2.5515735244708253, "grad_norm": 1.2166354656219482, "learning_rate": 9.32771052631579e-05, "loss": 0.5207, "step": 45566 }, { "epoch": 2.5516295217829543, "grad_norm": 1.0356742143630981, "learning_rate": 9.327684210526316e-05, "loss": 0.3431, "step": 45567 }, { "epoch": 2.5516855190950833, "grad_norm": 1.560911774635315, "learning_rate": 9.327657894736842e-05, "loss": 0.529, "step": 45568 }, { "epoch": 2.5517415164072124, "grad_norm": 1.2377440929412842, "learning_rate": 9.327631578947369e-05, "loss": 0.4086, "step": 45569 }, { "epoch": 2.5517975137193414, "grad_norm": 0.9890557527542114, "learning_rate": 9.327605263157895e-05, "loss": 0.3385, "step": 45570 }, { "epoch": 2.5518535110314704, "grad_norm": 1.3722487688064575, "learning_rate": 9.327578947368421e-05, "loss": 0.4394, "step": 45571 }, { "epoch": 2.5519095083435994, "grad_norm": 1.1656464338302612, "learning_rate": 9.327552631578947e-05, "loss": 0.4531, "step": 45572 }, { "epoch": 2.5519655056557284, "grad_norm": 1.2346620559692383, "learning_rate": 9.327526315789474e-05, "loss": 0.436, "step": 45573 }, { "epoch": 2.5520215029678575, "grad_norm": 1.304569125175476, "learning_rate": 9.3275e-05, "loss": 0.3939, "step": 45574 }, { "epoch": 2.5520775002799865, "grad_norm": 1.3827192783355713, "learning_rate": 9.327473684210526e-05, "loss": 0.393, "step": 45575 }, { "epoch": 2.5521334975921155, "grad_norm": 1.0160528421401978, "learning_rate": 9.327447368421052e-05, "loss": 0.3062, "step": 45576 }, { "epoch": 2.5521894949042445, "grad_norm": 1.3612617254257202, "learning_rate": 9.32742105263158e-05, "loss": 0.4366, "step": 45577 }, { "epoch": 2.5522454922163735, "grad_norm": 1.078266978263855, "learning_rate": 9.327394736842106e-05, "loss": 0.4817, "step": 45578 }, { "epoch": 2.5523014895285026, "grad_norm": 1.2146620750427246, "learning_rate": 9.327368421052633e-05, "loss": 0.3633, "step": 45579 }, { "epoch": 2.5523574868406316, "grad_norm": 1.4247937202453613, "learning_rate": 9.327342105263158e-05, "loss": 0.5016, "step": 45580 }, { "epoch": 2.5524134841527606, "grad_norm": 1.3972868919372559, "learning_rate": 9.327315789473685e-05, "loss": 0.4021, "step": 45581 }, { "epoch": 2.5524694814648896, "grad_norm": 1.277218222618103, "learning_rate": 9.327289473684211e-05, "loss": 0.4911, "step": 45582 }, { "epoch": 2.5525254787770186, "grad_norm": 1.1201694011688232, "learning_rate": 9.327263157894738e-05, "loss": 0.4254, "step": 45583 }, { "epoch": 2.5525814760891476, "grad_norm": 1.1593981981277466, "learning_rate": 9.327236842105264e-05, "loss": 0.4175, "step": 45584 }, { "epoch": 2.5526374734012767, "grad_norm": 1.2248313426971436, "learning_rate": 9.327210526315789e-05, "loss": 0.4873, "step": 45585 }, { "epoch": 2.5526934707134057, "grad_norm": 0.9427081942558289, "learning_rate": 9.327184210526316e-05, "loss": 0.2833, "step": 45586 }, { "epoch": 2.5527494680255347, "grad_norm": 1.0200705528259277, "learning_rate": 9.327157894736842e-05, "loss": 0.3746, "step": 45587 }, { "epoch": 2.5528054653376637, "grad_norm": 1.0756677389144897, "learning_rate": 9.32713157894737e-05, "loss": 0.3722, "step": 45588 }, { "epoch": 2.5528614626497927, "grad_norm": 1.0629841089248657, "learning_rate": 9.327105263157895e-05, "loss": 0.4169, "step": 45589 }, { "epoch": 2.5529174599619218, "grad_norm": 1.1962857246398926, "learning_rate": 9.327078947368421e-05, "loss": 0.4261, "step": 45590 }, { "epoch": 2.552973457274051, "grad_norm": 1.4027111530303955, "learning_rate": 9.327052631578947e-05, "loss": 0.5373, "step": 45591 }, { "epoch": 2.55302945458618, "grad_norm": 1.379607915878296, "learning_rate": 9.327026315789475e-05, "loss": 0.4226, "step": 45592 }, { "epoch": 2.553085451898309, "grad_norm": 1.4633805751800537, "learning_rate": 9.327e-05, "loss": 0.4066, "step": 45593 }, { "epoch": 2.553141449210438, "grad_norm": 1.0058832168579102, "learning_rate": 9.326973684210527e-05, "loss": 0.3139, "step": 45594 }, { "epoch": 2.553197446522567, "grad_norm": 1.4055863618850708, "learning_rate": 9.326947368421053e-05, "loss": 0.4012, "step": 45595 }, { "epoch": 2.553253443834696, "grad_norm": 0.9868448376655579, "learning_rate": 9.32692105263158e-05, "loss": 0.3782, "step": 45596 }, { "epoch": 2.553309441146825, "grad_norm": 1.2650110721588135, "learning_rate": 9.326894736842106e-05, "loss": 0.5607, "step": 45597 }, { "epoch": 2.553365438458954, "grad_norm": 1.39922034740448, "learning_rate": 9.326868421052632e-05, "loss": 0.437, "step": 45598 }, { "epoch": 2.553421435771083, "grad_norm": 1.281574010848999, "learning_rate": 9.326842105263158e-05, "loss": 0.348, "step": 45599 }, { "epoch": 2.553477433083212, "grad_norm": 1.137755274772644, "learning_rate": 9.326815789473685e-05, "loss": 0.461, "step": 45600 }, { "epoch": 2.553533430395341, "grad_norm": 1.351910948753357, "learning_rate": 9.326789473684211e-05, "loss": 0.4383, "step": 45601 }, { "epoch": 2.55358942770747, "grad_norm": 1.5636895895004272, "learning_rate": 9.326763157894737e-05, "loss": 0.4454, "step": 45602 }, { "epoch": 2.553645425019599, "grad_norm": 1.0131797790527344, "learning_rate": 9.326736842105263e-05, "loss": 0.3249, "step": 45603 }, { "epoch": 2.553701422331728, "grad_norm": 1.34159255027771, "learning_rate": 9.326710526315789e-05, "loss": 0.5007, "step": 45604 }, { "epoch": 2.553757419643857, "grad_norm": 1.2082847356796265, "learning_rate": 9.326684210526316e-05, "loss": 0.4757, "step": 45605 }, { "epoch": 2.553813416955986, "grad_norm": 1.1693072319030762, "learning_rate": 9.326657894736842e-05, "loss": 0.312, "step": 45606 }, { "epoch": 2.553869414268115, "grad_norm": 1.2204186916351318, "learning_rate": 9.32663157894737e-05, "loss": 0.3884, "step": 45607 }, { "epoch": 2.553925411580244, "grad_norm": 2.8457987308502197, "learning_rate": 9.326605263157894e-05, "loss": 0.3027, "step": 45608 }, { "epoch": 2.553981408892373, "grad_norm": 1.3017693758010864, "learning_rate": 9.326578947368422e-05, "loss": 0.3121, "step": 45609 }, { "epoch": 2.554037406204502, "grad_norm": 1.1814875602722168, "learning_rate": 9.326552631578948e-05, "loss": 0.392, "step": 45610 }, { "epoch": 2.554093403516631, "grad_norm": 0.9689959287643433, "learning_rate": 9.326526315789475e-05, "loss": 0.3155, "step": 45611 }, { "epoch": 2.55414940082876, "grad_norm": 1.1015560626983643, "learning_rate": 9.326500000000001e-05, "loss": 0.5243, "step": 45612 }, { "epoch": 2.554205398140889, "grad_norm": 1.238231897354126, "learning_rate": 9.326473684210527e-05, "loss": 0.4462, "step": 45613 }, { "epoch": 2.5542613954530182, "grad_norm": 1.0320967435836792, "learning_rate": 9.326447368421053e-05, "loss": 0.4327, "step": 45614 }, { "epoch": 2.5543173927651472, "grad_norm": 1.3268393278121948, "learning_rate": 9.32642105263158e-05, "loss": 0.3836, "step": 45615 }, { "epoch": 2.5543733900772763, "grad_norm": 1.2125952243804932, "learning_rate": 9.326394736842106e-05, "loss": 0.3865, "step": 45616 }, { "epoch": 2.5544293873894053, "grad_norm": 1.5733534097671509, "learning_rate": 9.326368421052632e-05, "loss": 0.6021, "step": 45617 }, { "epoch": 2.5544853847015343, "grad_norm": 1.2419317960739136, "learning_rate": 9.326342105263158e-05, "loss": 0.4697, "step": 45618 }, { "epoch": 2.5545413820136633, "grad_norm": 1.016759991645813, "learning_rate": 9.326315789473684e-05, "loss": 0.2857, "step": 45619 }, { "epoch": 2.5545973793257923, "grad_norm": 1.1140037775039673, "learning_rate": 9.326289473684211e-05, "loss": 0.3096, "step": 45620 }, { "epoch": 2.5546533766379214, "grad_norm": 1.2843455076217651, "learning_rate": 9.326263157894737e-05, "loss": 0.3755, "step": 45621 }, { "epoch": 2.5547093739500504, "grad_norm": 1.187127947807312, "learning_rate": 9.326236842105263e-05, "loss": 0.4097, "step": 45622 }, { "epoch": 2.5547653712621794, "grad_norm": 1.1217719316482544, "learning_rate": 9.32621052631579e-05, "loss": 0.39, "step": 45623 }, { "epoch": 2.5548213685743084, "grad_norm": 1.2366663217544556, "learning_rate": 9.326184210526317e-05, "loss": 0.4215, "step": 45624 }, { "epoch": 2.5548773658864374, "grad_norm": 1.2192249298095703, "learning_rate": 9.326157894736843e-05, "loss": 0.4296, "step": 45625 }, { "epoch": 2.5549333631985665, "grad_norm": 1.794351577758789, "learning_rate": 9.326131578947369e-05, "loss": 0.6779, "step": 45626 }, { "epoch": 2.5549893605106955, "grad_norm": 1.2011408805847168, "learning_rate": 9.326105263157895e-05, "loss": 0.3865, "step": 45627 }, { "epoch": 2.5550453578228245, "grad_norm": 1.2304136753082275, "learning_rate": 9.326078947368422e-05, "loss": 0.492, "step": 45628 }, { "epoch": 2.5551013551349535, "grad_norm": 1.0056356191635132, "learning_rate": 9.326052631578948e-05, "loss": 0.3073, "step": 45629 }, { "epoch": 2.5551573524470825, "grad_norm": 1.4691132307052612, "learning_rate": 9.326026315789474e-05, "loss": 0.4013, "step": 45630 }, { "epoch": 2.5552133497592115, "grad_norm": 1.1730116605758667, "learning_rate": 9.326e-05, "loss": 0.589, "step": 45631 }, { "epoch": 2.5552693470713406, "grad_norm": 1.1387131214141846, "learning_rate": 9.325973684210527e-05, "loss": 0.3943, "step": 45632 }, { "epoch": 2.5553253443834696, "grad_norm": 1.13905668258667, "learning_rate": 9.325947368421053e-05, "loss": 0.346, "step": 45633 }, { "epoch": 2.5553813416955986, "grad_norm": 1.4302362203598022, "learning_rate": 9.32592105263158e-05, "loss": 0.4856, "step": 45634 }, { "epoch": 2.5554373390077276, "grad_norm": 1.1664444208145142, "learning_rate": 9.325894736842105e-05, "loss": 0.2877, "step": 45635 }, { "epoch": 2.5554933363198566, "grad_norm": 1.1313159465789795, "learning_rate": 9.325868421052631e-05, "loss": 0.3581, "step": 45636 }, { "epoch": 2.5555493336319857, "grad_norm": 1.4714298248291016, "learning_rate": 9.325842105263158e-05, "loss": 0.4252, "step": 45637 }, { "epoch": 2.5556053309441147, "grad_norm": 1.4931342601776123, "learning_rate": 9.325815789473684e-05, "loss": 0.4858, "step": 45638 }, { "epoch": 2.5556613282562437, "grad_norm": 1.1312881708145142, "learning_rate": 9.325789473684212e-05, "loss": 0.3891, "step": 45639 }, { "epoch": 2.5557173255683727, "grad_norm": 1.3210203647613525, "learning_rate": 9.325763157894736e-05, "loss": 0.4444, "step": 45640 }, { "epoch": 2.5557733228805017, "grad_norm": 1.2131695747375488, "learning_rate": 9.325736842105264e-05, "loss": 0.3467, "step": 45641 }, { "epoch": 2.5558293201926308, "grad_norm": 1.1313854455947876, "learning_rate": 9.32571052631579e-05, "loss": 0.458, "step": 45642 }, { "epoch": 2.55588531750476, "grad_norm": 1.3144233226776123, "learning_rate": 9.325684210526317e-05, "loss": 0.3914, "step": 45643 }, { "epoch": 2.555941314816889, "grad_norm": 1.18085515499115, "learning_rate": 9.325657894736843e-05, "loss": 0.3232, "step": 45644 }, { "epoch": 2.555997312129018, "grad_norm": 1.4017281532287598, "learning_rate": 9.325631578947369e-05, "loss": 0.5163, "step": 45645 }, { "epoch": 2.556053309441147, "grad_norm": 1.2561012506484985, "learning_rate": 9.325605263157895e-05, "loss": 0.4668, "step": 45646 }, { "epoch": 2.556109306753276, "grad_norm": 1.210850715637207, "learning_rate": 9.325578947368422e-05, "loss": 0.3906, "step": 45647 }, { "epoch": 2.556165304065405, "grad_norm": 1.1935440301895142, "learning_rate": 9.325552631578948e-05, "loss": 0.3574, "step": 45648 }, { "epoch": 2.556221301377534, "grad_norm": 1.3698830604553223, "learning_rate": 9.325526315789474e-05, "loss": 0.3904, "step": 45649 }, { "epoch": 2.556277298689663, "grad_norm": 1.1001482009887695, "learning_rate": 9.3255e-05, "loss": 0.3224, "step": 45650 }, { "epoch": 2.556333296001792, "grad_norm": 1.2250148057937622, "learning_rate": 9.325473684210527e-05, "loss": 0.3642, "step": 45651 }, { "epoch": 2.556389293313921, "grad_norm": 1.1564087867736816, "learning_rate": 9.325447368421053e-05, "loss": 0.5367, "step": 45652 }, { "epoch": 2.55644529062605, "grad_norm": 1.075554370880127, "learning_rate": 9.32542105263158e-05, "loss": 0.3751, "step": 45653 }, { "epoch": 2.556501287938179, "grad_norm": 1.1332156658172607, "learning_rate": 9.325394736842105e-05, "loss": 0.5071, "step": 45654 }, { "epoch": 2.556557285250308, "grad_norm": 1.383529543876648, "learning_rate": 9.325368421052631e-05, "loss": 0.5627, "step": 45655 }, { "epoch": 2.556613282562437, "grad_norm": 1.28536057472229, "learning_rate": 9.325342105263159e-05, "loss": 0.2972, "step": 45656 }, { "epoch": 2.556669279874566, "grad_norm": 1.1395752429962158, "learning_rate": 9.325315789473685e-05, "loss": 0.4714, "step": 45657 }, { "epoch": 2.556725277186695, "grad_norm": 1.3974889516830444, "learning_rate": 9.32528947368421e-05, "loss": 0.3769, "step": 45658 }, { "epoch": 2.556781274498824, "grad_norm": 0.9640372395515442, "learning_rate": 9.325263157894737e-05, "loss": 0.3609, "step": 45659 }, { "epoch": 2.556837271810953, "grad_norm": 1.2757364511489868, "learning_rate": 9.325236842105264e-05, "loss": 0.4384, "step": 45660 }, { "epoch": 2.556893269123082, "grad_norm": 1.1255967617034912, "learning_rate": 9.32521052631579e-05, "loss": 0.3759, "step": 45661 }, { "epoch": 2.556949266435211, "grad_norm": 1.0359216928482056, "learning_rate": 9.325184210526317e-05, "loss": 0.3848, "step": 45662 }, { "epoch": 2.55700526374734, "grad_norm": 1.2480062246322632, "learning_rate": 9.325157894736842e-05, "loss": 0.3645, "step": 45663 }, { "epoch": 2.557061261059469, "grad_norm": 1.1469781398773193, "learning_rate": 9.325131578947369e-05, "loss": 0.4041, "step": 45664 }, { "epoch": 2.557117258371598, "grad_norm": 1.367587685585022, "learning_rate": 9.325105263157895e-05, "loss": 0.3825, "step": 45665 }, { "epoch": 2.557173255683727, "grad_norm": 1.728960394859314, "learning_rate": 9.325078947368422e-05, "loss": 0.4866, "step": 45666 }, { "epoch": 2.5572292529958562, "grad_norm": 1.1842454671859741, "learning_rate": 9.325052631578947e-05, "loss": 0.3705, "step": 45667 }, { "epoch": 2.5572852503079853, "grad_norm": 1.195404052734375, "learning_rate": 9.325026315789474e-05, "loss": 0.3402, "step": 45668 }, { "epoch": 2.5573412476201143, "grad_norm": 1.595546841621399, "learning_rate": 9.325e-05, "loss": 0.3296, "step": 45669 }, { "epoch": 2.5573972449322433, "grad_norm": 1.2033277750015259, "learning_rate": 9.324973684210528e-05, "loss": 0.384, "step": 45670 }, { "epoch": 2.5574532422443723, "grad_norm": 3.279566526412964, "learning_rate": 9.324947368421054e-05, "loss": 0.3414, "step": 45671 }, { "epoch": 2.5575092395565013, "grad_norm": 1.2063119411468506, "learning_rate": 9.324921052631578e-05, "loss": 0.3675, "step": 45672 }, { "epoch": 2.5575652368686304, "grad_norm": 1.162808895111084, "learning_rate": 9.324894736842106e-05, "loss": 0.4085, "step": 45673 }, { "epoch": 2.5576212341807594, "grad_norm": 1.3233859539031982, "learning_rate": 9.324868421052632e-05, "loss": 0.3672, "step": 45674 }, { "epoch": 2.5576772314928884, "grad_norm": 1.0006930828094482, "learning_rate": 9.324842105263159e-05, "loss": 0.3662, "step": 45675 }, { "epoch": 2.5577332288050174, "grad_norm": 1.1017684936523438, "learning_rate": 9.324815789473685e-05, "loss": 0.3555, "step": 45676 }, { "epoch": 2.5577892261171464, "grad_norm": 1.2516436576843262, "learning_rate": 9.324789473684211e-05, "loss": 0.5026, "step": 45677 }, { "epoch": 2.5578452234292754, "grad_norm": 1.251975655555725, "learning_rate": 9.324763157894737e-05, "loss": 0.4134, "step": 45678 }, { "epoch": 2.5579012207414045, "grad_norm": 0.9582602977752686, "learning_rate": 9.324736842105264e-05, "loss": 0.3548, "step": 45679 }, { "epoch": 2.5579572180535335, "grad_norm": 1.4859552383422852, "learning_rate": 9.32471052631579e-05, "loss": 0.5554, "step": 45680 }, { "epoch": 2.5580132153656625, "grad_norm": 1.015962839126587, "learning_rate": 9.324684210526316e-05, "loss": 0.4235, "step": 45681 }, { "epoch": 2.5580692126777915, "grad_norm": 1.1603929996490479, "learning_rate": 9.324657894736842e-05, "loss": 0.3881, "step": 45682 }, { "epoch": 2.5581252099899205, "grad_norm": 1.2697588205337524, "learning_rate": 9.32463157894737e-05, "loss": 0.5235, "step": 45683 }, { "epoch": 2.5581812073020496, "grad_norm": 1.1522265672683716, "learning_rate": 9.324605263157895e-05, "loss": 0.3555, "step": 45684 }, { "epoch": 2.5582372046141786, "grad_norm": 1.1480109691619873, "learning_rate": 9.324578947368421e-05, "loss": 0.335, "step": 45685 }, { "epoch": 2.5582932019263076, "grad_norm": 1.244274616241455, "learning_rate": 9.324552631578947e-05, "loss": 0.3773, "step": 45686 }, { "epoch": 2.5583491992384366, "grad_norm": 1.1693480014801025, "learning_rate": 9.324526315789475e-05, "loss": 0.6179, "step": 45687 }, { "epoch": 2.5584051965505656, "grad_norm": 1.3929036855697632, "learning_rate": 9.3245e-05, "loss": 0.3648, "step": 45688 }, { "epoch": 2.5584611938626947, "grad_norm": 1.0592700242996216, "learning_rate": 9.324473684210527e-05, "loss": 0.4285, "step": 45689 }, { "epoch": 2.5585171911748237, "grad_norm": 1.013587474822998, "learning_rate": 9.324447368421053e-05, "loss": 0.4833, "step": 45690 }, { "epoch": 2.5585731884869527, "grad_norm": 1.4980398416519165, "learning_rate": 9.324421052631579e-05, "loss": 0.3609, "step": 45691 }, { "epoch": 2.5586291857990817, "grad_norm": 1.169128656387329, "learning_rate": 9.324394736842106e-05, "loss": 0.3945, "step": 45692 }, { "epoch": 2.5586851831112107, "grad_norm": 1.200084924697876, "learning_rate": 9.324368421052632e-05, "loss": 0.4536, "step": 45693 }, { "epoch": 2.5587411804233398, "grad_norm": 1.5824177265167236, "learning_rate": 9.324342105263159e-05, "loss": 0.4062, "step": 45694 }, { "epoch": 2.5587971777354688, "grad_norm": 0.9825482964515686, "learning_rate": 9.324315789473684e-05, "loss": 0.2983, "step": 45695 }, { "epoch": 2.558853175047598, "grad_norm": 1.2140988111495972, "learning_rate": 9.324289473684211e-05, "loss": 0.3609, "step": 45696 }, { "epoch": 2.558909172359727, "grad_norm": 0.875757098197937, "learning_rate": 9.324263157894737e-05, "loss": 0.268, "step": 45697 }, { "epoch": 2.558965169671856, "grad_norm": 1.0031931400299072, "learning_rate": 9.324236842105264e-05, "loss": 0.338, "step": 45698 }, { "epoch": 2.559021166983985, "grad_norm": 1.2444010972976685, "learning_rate": 9.32421052631579e-05, "loss": 0.4075, "step": 45699 }, { "epoch": 2.559077164296114, "grad_norm": 1.4407545328140259, "learning_rate": 9.324184210526316e-05, "loss": 0.3802, "step": 45700 }, { "epoch": 2.559133161608243, "grad_norm": 0.972081184387207, "learning_rate": 9.324157894736842e-05, "loss": 0.2922, "step": 45701 }, { "epoch": 2.559189158920372, "grad_norm": 1.0640215873718262, "learning_rate": 9.32413157894737e-05, "loss": 0.3579, "step": 45702 }, { "epoch": 2.559245156232501, "grad_norm": 1.2619779109954834, "learning_rate": 9.324105263157896e-05, "loss": 0.4187, "step": 45703 }, { "epoch": 2.55930115354463, "grad_norm": 1.8751230239868164, "learning_rate": 9.324078947368422e-05, "loss": 0.4124, "step": 45704 }, { "epoch": 2.559357150856759, "grad_norm": 1.206122636795044, "learning_rate": 9.324052631578948e-05, "loss": 0.4839, "step": 45705 }, { "epoch": 2.559413148168888, "grad_norm": 1.3412203788757324, "learning_rate": 9.324026315789474e-05, "loss": 0.5816, "step": 45706 }, { "epoch": 2.559469145481017, "grad_norm": 1.2568408250808716, "learning_rate": 9.324000000000001e-05, "loss": 0.506, "step": 45707 }, { "epoch": 2.559525142793146, "grad_norm": 1.2885011434555054, "learning_rate": 9.323973684210527e-05, "loss": 0.4702, "step": 45708 }, { "epoch": 2.559581140105275, "grad_norm": 1.052280306816101, "learning_rate": 9.323947368421053e-05, "loss": 0.4138, "step": 45709 }, { "epoch": 2.559637137417404, "grad_norm": 1.1961079835891724, "learning_rate": 9.323921052631579e-05, "loss": 0.4005, "step": 45710 }, { "epoch": 2.559693134729533, "grad_norm": 1.0916801691055298, "learning_rate": 9.323894736842106e-05, "loss": 0.3366, "step": 45711 }, { "epoch": 2.559749132041662, "grad_norm": 1.1278434991836548, "learning_rate": 9.323868421052632e-05, "loss": 0.4107, "step": 45712 }, { "epoch": 2.559805129353791, "grad_norm": 2.5425703525543213, "learning_rate": 9.323842105263158e-05, "loss": 0.3212, "step": 45713 }, { "epoch": 2.55986112666592, "grad_norm": 1.120042085647583, "learning_rate": 9.323815789473684e-05, "loss": 0.425, "step": 45714 }, { "epoch": 2.559917123978049, "grad_norm": 1.0947351455688477, "learning_rate": 9.323789473684211e-05, "loss": 0.3601, "step": 45715 }, { "epoch": 2.559973121290178, "grad_norm": 1.2361760139465332, "learning_rate": 9.323763157894737e-05, "loss": 0.4042, "step": 45716 }, { "epoch": 2.560029118602307, "grad_norm": 1.1049026250839233, "learning_rate": 9.323736842105265e-05, "loss": 0.4318, "step": 45717 }, { "epoch": 2.560085115914436, "grad_norm": 1.195594310760498, "learning_rate": 9.323710526315789e-05, "loss": 0.3597, "step": 45718 }, { "epoch": 2.5601411132265652, "grad_norm": 1.2355653047561646, "learning_rate": 9.323684210526317e-05, "loss": 0.3344, "step": 45719 }, { "epoch": 2.5601971105386943, "grad_norm": 1.0339802503585815, "learning_rate": 9.323657894736843e-05, "loss": 0.3592, "step": 45720 }, { "epoch": 2.5602531078508233, "grad_norm": 1.7107774019241333, "learning_rate": 9.32363157894737e-05, "loss": 0.4588, "step": 45721 }, { "epoch": 2.5603091051629523, "grad_norm": 1.2407349348068237, "learning_rate": 9.323605263157895e-05, "loss": 0.3533, "step": 45722 }, { "epoch": 2.5603651024750813, "grad_norm": 1.1276711225509644, "learning_rate": 9.32357894736842e-05, "loss": 0.3698, "step": 45723 }, { "epoch": 2.5604210997872103, "grad_norm": 1.4450361728668213, "learning_rate": 9.323552631578948e-05, "loss": 0.4524, "step": 45724 }, { "epoch": 2.5604770970993393, "grad_norm": 1.1825019121170044, "learning_rate": 9.323526315789474e-05, "loss": 0.5107, "step": 45725 }, { "epoch": 2.5605330944114684, "grad_norm": 1.134027123451233, "learning_rate": 9.323500000000001e-05, "loss": 0.417, "step": 45726 }, { "epoch": 2.5605890917235974, "grad_norm": 1.032018780708313, "learning_rate": 9.323473684210526e-05, "loss": 0.3408, "step": 45727 }, { "epoch": 2.5606450890357264, "grad_norm": 1.6317919492721558, "learning_rate": 9.323447368421053e-05, "loss": 0.5422, "step": 45728 }, { "epoch": 2.5607010863478554, "grad_norm": 1.0493611097335815, "learning_rate": 9.323421052631579e-05, "loss": 0.3266, "step": 45729 }, { "epoch": 2.5607570836599844, "grad_norm": 1.3089109659194946, "learning_rate": 9.323394736842106e-05, "loss": 0.4636, "step": 45730 }, { "epoch": 2.5608130809721135, "grad_norm": 1.0117363929748535, "learning_rate": 9.323368421052632e-05, "loss": 0.3684, "step": 45731 }, { "epoch": 2.5608690782842425, "grad_norm": 1.2079764604568481, "learning_rate": 9.323342105263158e-05, "loss": 0.2532, "step": 45732 }, { "epoch": 2.5609250755963715, "grad_norm": 1.0275832414627075, "learning_rate": 9.323315789473684e-05, "loss": 0.343, "step": 45733 }, { "epoch": 2.5609810729085005, "grad_norm": 1.2589458227157593, "learning_rate": 9.323289473684212e-05, "loss": 0.3578, "step": 45734 }, { "epoch": 2.5610370702206295, "grad_norm": 1.2149940729141235, "learning_rate": 9.323263157894738e-05, "loss": 0.3968, "step": 45735 }, { "epoch": 2.5610930675327586, "grad_norm": 1.1274718046188354, "learning_rate": 9.323236842105264e-05, "loss": 0.3794, "step": 45736 }, { "epoch": 2.5611490648448876, "grad_norm": 1.2266300916671753, "learning_rate": 9.32321052631579e-05, "loss": 0.4552, "step": 45737 }, { "epoch": 2.5612050621570166, "grad_norm": 0.9906600713729858, "learning_rate": 9.323184210526317e-05, "loss": 0.3175, "step": 45738 }, { "epoch": 2.5612610594691456, "grad_norm": 1.0905526876449585, "learning_rate": 9.323157894736843e-05, "loss": 0.4285, "step": 45739 }, { "epoch": 2.5613170567812746, "grad_norm": 0.9535433650016785, "learning_rate": 9.323131578947369e-05, "loss": 0.3932, "step": 45740 }, { "epoch": 2.5613730540934037, "grad_norm": 1.311492919921875, "learning_rate": 9.323105263157895e-05, "loss": 0.4295, "step": 45741 }, { "epoch": 2.5614290514055327, "grad_norm": 1.2669093608856201, "learning_rate": 9.323078947368421e-05, "loss": 0.4549, "step": 45742 }, { "epoch": 2.5614850487176617, "grad_norm": 1.3176826238632202, "learning_rate": 9.323052631578948e-05, "loss": 0.528, "step": 45743 }, { "epoch": 2.5615410460297907, "grad_norm": 1.1536734104156494, "learning_rate": 9.323026315789474e-05, "loss": 0.4261, "step": 45744 }, { "epoch": 2.5615970433419197, "grad_norm": 1.254226803779602, "learning_rate": 9.323e-05, "loss": 0.317, "step": 45745 }, { "epoch": 2.5616530406540488, "grad_norm": 1.5932658910751343, "learning_rate": 9.322973684210526e-05, "loss": 0.5025, "step": 45746 }, { "epoch": 2.5617090379661778, "grad_norm": 1.348116159439087, "learning_rate": 9.322947368421053e-05, "loss": 0.4817, "step": 45747 }, { "epoch": 2.561765035278307, "grad_norm": 1.0877364873886108, "learning_rate": 9.322921052631579e-05, "loss": 0.3022, "step": 45748 }, { "epoch": 2.561821032590436, "grad_norm": 1.4115625619888306, "learning_rate": 9.322894736842107e-05, "loss": 0.4522, "step": 45749 }, { "epoch": 2.561877029902565, "grad_norm": 1.363369345664978, "learning_rate": 9.322868421052631e-05, "loss": 0.4922, "step": 45750 }, { "epoch": 2.561933027214694, "grad_norm": 1.1387078762054443, "learning_rate": 9.322842105263159e-05, "loss": 0.3572, "step": 45751 }, { "epoch": 2.561989024526823, "grad_norm": 1.1272447109222412, "learning_rate": 9.322815789473685e-05, "loss": 0.3686, "step": 45752 }, { "epoch": 2.562045021838952, "grad_norm": 1.0637720823287964, "learning_rate": 9.322789473684212e-05, "loss": 0.4098, "step": 45753 }, { "epoch": 2.562101019151081, "grad_norm": 1.205729603767395, "learning_rate": 9.322763157894738e-05, "loss": 0.4362, "step": 45754 }, { "epoch": 2.56215701646321, "grad_norm": 1.383683681488037, "learning_rate": 9.322736842105264e-05, "loss": 0.3607, "step": 45755 }, { "epoch": 2.562213013775339, "grad_norm": 0.9735696315765381, "learning_rate": 9.32271052631579e-05, "loss": 0.2919, "step": 45756 }, { "epoch": 2.562269011087468, "grad_norm": 1.0302846431732178, "learning_rate": 9.322684210526316e-05, "loss": 0.3606, "step": 45757 }, { "epoch": 2.562325008399597, "grad_norm": 1.5440552234649658, "learning_rate": 9.322657894736843e-05, "loss": 0.5488, "step": 45758 }, { "epoch": 2.562381005711726, "grad_norm": 1.5616835355758667, "learning_rate": 9.322631578947369e-05, "loss": 0.449, "step": 45759 }, { "epoch": 2.562437003023855, "grad_norm": 1.2558298110961914, "learning_rate": 9.322605263157895e-05, "loss": 0.4412, "step": 45760 }, { "epoch": 2.562493000335984, "grad_norm": 1.0505138635635376, "learning_rate": 9.322578947368421e-05, "loss": 0.3367, "step": 45761 }, { "epoch": 2.562548997648113, "grad_norm": 1.183586597442627, "learning_rate": 9.322552631578948e-05, "loss": 0.3887, "step": 45762 }, { "epoch": 2.5626049949602416, "grad_norm": 1.3686237335205078, "learning_rate": 9.322526315789474e-05, "loss": 0.3549, "step": 45763 }, { "epoch": 2.562660992272371, "grad_norm": 1.3428999185562134, "learning_rate": 9.3225e-05, "loss": 0.4532, "step": 45764 }, { "epoch": 2.5627169895844997, "grad_norm": 1.0158320665359497, "learning_rate": 9.322473684210526e-05, "loss": 0.3652, "step": 45765 }, { "epoch": 2.562772986896629, "grad_norm": 1.281711459159851, "learning_rate": 9.322447368421054e-05, "loss": 0.3821, "step": 45766 }, { "epoch": 2.5628289842087577, "grad_norm": 1.1996203660964966, "learning_rate": 9.32242105263158e-05, "loss": 0.4204, "step": 45767 }, { "epoch": 2.562884981520887, "grad_norm": 1.2329365015029907, "learning_rate": 9.322394736842106e-05, "loss": 0.5543, "step": 45768 }, { "epoch": 2.5629409788330157, "grad_norm": 1.2657426595687866, "learning_rate": 9.322368421052631e-05, "loss": 0.4744, "step": 45769 }, { "epoch": 2.562996976145145, "grad_norm": 0.9845755696296692, "learning_rate": 9.322342105263159e-05, "loss": 0.3641, "step": 45770 }, { "epoch": 2.563052973457274, "grad_norm": 1.2035232782363892, "learning_rate": 9.322315789473685e-05, "loss": 0.4131, "step": 45771 }, { "epoch": 2.5631089707694032, "grad_norm": 1.303586483001709, "learning_rate": 9.322289473684212e-05, "loss": 0.3969, "step": 45772 }, { "epoch": 2.563164968081532, "grad_norm": 1.195119023323059, "learning_rate": 9.322263157894737e-05, "loss": 0.459, "step": 45773 }, { "epoch": 2.5632209653936613, "grad_norm": 1.2336194515228271, "learning_rate": 9.322236842105264e-05, "loss": 0.4525, "step": 45774 }, { "epoch": 2.56327696270579, "grad_norm": 1.088029384613037, "learning_rate": 9.32221052631579e-05, "loss": 0.3257, "step": 45775 }, { "epoch": 2.5633329600179193, "grad_norm": 1.2099227905273438, "learning_rate": 9.322184210526316e-05, "loss": 0.377, "step": 45776 }, { "epoch": 2.563388957330048, "grad_norm": 1.1198279857635498, "learning_rate": 9.322157894736842e-05, "loss": 0.3777, "step": 45777 }, { "epoch": 2.5634449546421774, "grad_norm": 1.1287174224853516, "learning_rate": 9.322131578947368e-05, "loss": 0.4526, "step": 45778 }, { "epoch": 2.563500951954306, "grad_norm": 1.0839780569076538, "learning_rate": 9.322105263157895e-05, "loss": 0.4133, "step": 45779 }, { "epoch": 2.5635569492664354, "grad_norm": 1.1413198709487915, "learning_rate": 9.322078947368421e-05, "loss": 0.3812, "step": 45780 }, { "epoch": 2.563612946578564, "grad_norm": 1.3592926263809204, "learning_rate": 9.322052631578949e-05, "loss": 0.4621, "step": 45781 }, { "epoch": 2.5636689438906934, "grad_norm": 1.391784429550171, "learning_rate": 9.322026315789473e-05, "loss": 0.503, "step": 45782 }, { "epoch": 2.563724941202822, "grad_norm": 2.1052563190460205, "learning_rate": 9.322e-05, "loss": 0.4295, "step": 45783 }, { "epoch": 2.5637809385149515, "grad_norm": 1.1694837808609009, "learning_rate": 9.321973684210527e-05, "loss": 0.4858, "step": 45784 }, { "epoch": 2.56383693582708, "grad_norm": 1.1886311769485474, "learning_rate": 9.321947368421054e-05, "loss": 0.3549, "step": 45785 }, { "epoch": 2.5638929331392095, "grad_norm": 1.321065902709961, "learning_rate": 9.32192105263158e-05, "loss": 0.422, "step": 45786 }, { "epoch": 2.563948930451338, "grad_norm": 15.169246673583984, "learning_rate": 9.321894736842106e-05, "loss": 0.3438, "step": 45787 }, { "epoch": 2.5640049277634676, "grad_norm": 1.134333610534668, "learning_rate": 9.321868421052632e-05, "loss": 0.4274, "step": 45788 }, { "epoch": 2.564060925075596, "grad_norm": 1.0968347787857056, "learning_rate": 9.321842105263159e-05, "loss": 0.3614, "step": 45789 }, { "epoch": 2.5641169223877256, "grad_norm": 1.3577218055725098, "learning_rate": 9.321815789473685e-05, "loss": 0.2972, "step": 45790 }, { "epoch": 2.564172919699854, "grad_norm": 1.853683352470398, "learning_rate": 9.321789473684211e-05, "loss": 0.4007, "step": 45791 }, { "epoch": 2.5642289170119836, "grad_norm": 0.9404269456863403, "learning_rate": 9.321763157894737e-05, "loss": 0.3209, "step": 45792 }, { "epoch": 2.564284914324112, "grad_norm": 1.1967439651489258, "learning_rate": 9.321736842105263e-05, "loss": 0.5329, "step": 45793 }, { "epoch": 2.5643409116362417, "grad_norm": 1.1776896715164185, "learning_rate": 9.32171052631579e-05, "loss": 0.3775, "step": 45794 }, { "epoch": 2.5643969089483702, "grad_norm": 1.7608062028884888, "learning_rate": 9.321684210526316e-05, "loss": 0.369, "step": 45795 }, { "epoch": 2.5644529062604997, "grad_norm": 1.0942223072052002, "learning_rate": 9.321657894736842e-05, "loss": 0.3538, "step": 45796 }, { "epoch": 2.5645089035726283, "grad_norm": 1.1120693683624268, "learning_rate": 9.321631578947368e-05, "loss": 0.4077, "step": 45797 }, { "epoch": 2.5645649008847577, "grad_norm": 1.82485032081604, "learning_rate": 9.321605263157896e-05, "loss": 0.4356, "step": 45798 }, { "epoch": 2.5646208981968863, "grad_norm": 1.3394153118133545, "learning_rate": 9.321578947368422e-05, "loss": 0.4355, "step": 45799 }, { "epoch": 2.564676895509016, "grad_norm": 1.3118377923965454, "learning_rate": 9.321552631578947e-05, "loss": 0.5102, "step": 45800 }, { "epoch": 2.5647328928211444, "grad_norm": 1.3070013523101807, "learning_rate": 9.321526315789473e-05, "loss": 0.3757, "step": 45801 }, { "epoch": 2.564788890133274, "grad_norm": 1.694876790046692, "learning_rate": 9.321500000000001e-05, "loss": 0.4263, "step": 45802 }, { "epoch": 2.5648448874454024, "grad_norm": 1.2137254476547241, "learning_rate": 9.321473684210527e-05, "loss": 0.5445, "step": 45803 }, { "epoch": 2.564900884757532, "grad_norm": 1.4015522003173828, "learning_rate": 9.321447368421054e-05, "loss": 0.4854, "step": 45804 }, { "epoch": 2.5649568820696604, "grad_norm": 1.118233561515808, "learning_rate": 9.321421052631579e-05, "loss": 0.3749, "step": 45805 }, { "epoch": 2.56501287938179, "grad_norm": 1.0573805570602417, "learning_rate": 9.321394736842106e-05, "loss": 0.3215, "step": 45806 }, { "epoch": 2.5650688766939185, "grad_norm": 1.412376046180725, "learning_rate": 9.321368421052632e-05, "loss": 0.3604, "step": 45807 }, { "epoch": 2.565124874006048, "grad_norm": 1.00432550907135, "learning_rate": 9.32134210526316e-05, "loss": 0.3239, "step": 45808 }, { "epoch": 2.5651808713181765, "grad_norm": 1.1237467527389526, "learning_rate": 9.321315789473685e-05, "loss": 0.3716, "step": 45809 }, { "epoch": 2.565236868630306, "grad_norm": 1.2478457689285278, "learning_rate": 9.32128947368421e-05, "loss": 0.4387, "step": 45810 }, { "epoch": 2.5652928659424346, "grad_norm": 1.244915246963501, "learning_rate": 9.321263157894737e-05, "loss": 0.356, "step": 45811 }, { "epoch": 2.565348863254564, "grad_norm": 1.1220879554748535, "learning_rate": 9.321236842105263e-05, "loss": 0.413, "step": 45812 }, { "epoch": 2.5654048605666926, "grad_norm": 1.0878968238830566, "learning_rate": 9.32121052631579e-05, "loss": 0.4896, "step": 45813 }, { "epoch": 2.565460857878822, "grad_norm": 1.2028474807739258, "learning_rate": 9.321184210526315e-05, "loss": 0.378, "step": 45814 }, { "epoch": 2.5655168551909506, "grad_norm": 1.1307753324508667, "learning_rate": 9.321157894736843e-05, "loss": 0.3998, "step": 45815 }, { "epoch": 2.56557285250308, "grad_norm": 1.0618451833724976, "learning_rate": 9.321131578947368e-05, "loss": 0.3663, "step": 45816 }, { "epoch": 2.5656288498152087, "grad_norm": 1.0767853260040283, "learning_rate": 9.321105263157896e-05, "loss": 0.3767, "step": 45817 }, { "epoch": 2.565684847127338, "grad_norm": 1.1243631839752197, "learning_rate": 9.321078947368422e-05, "loss": 0.3373, "step": 45818 }, { "epoch": 2.5657408444394667, "grad_norm": 1.1264264583587646, "learning_rate": 9.321052631578948e-05, "loss": 0.3969, "step": 45819 }, { "epoch": 2.565796841751596, "grad_norm": 1.7147566080093384, "learning_rate": 9.321026315789474e-05, "loss": 0.4581, "step": 45820 }, { "epoch": 2.5658528390637247, "grad_norm": 1.2335530519485474, "learning_rate": 9.321000000000001e-05, "loss": 0.359, "step": 45821 }, { "epoch": 2.565908836375854, "grad_norm": 1.323560118675232, "learning_rate": 9.320973684210527e-05, "loss": 0.3521, "step": 45822 }, { "epoch": 2.565964833687983, "grad_norm": 1.0332026481628418, "learning_rate": 9.320947368421053e-05, "loss": 0.3351, "step": 45823 }, { "epoch": 2.5660208310001122, "grad_norm": 1.0913318395614624, "learning_rate": 9.320921052631579e-05, "loss": 0.3768, "step": 45824 }, { "epoch": 2.566076828312241, "grad_norm": 1.2505122423171997, "learning_rate": 9.320894736842106e-05, "loss": 0.4706, "step": 45825 }, { "epoch": 2.5661328256243703, "grad_norm": 1.257021427154541, "learning_rate": 9.320868421052632e-05, "loss": 0.4098, "step": 45826 }, { "epoch": 2.566188822936499, "grad_norm": 1.1770654916763306, "learning_rate": 9.320842105263158e-05, "loss": 0.3293, "step": 45827 }, { "epoch": 2.5662448202486283, "grad_norm": 1.200415849685669, "learning_rate": 9.320815789473684e-05, "loss": 0.4818, "step": 45828 }, { "epoch": 2.566300817560757, "grad_norm": 1.106445074081421, "learning_rate": 9.32078947368421e-05, "loss": 0.3376, "step": 45829 }, { "epoch": 2.5663568148728864, "grad_norm": 1.2312828302383423, "learning_rate": 9.320763157894738e-05, "loss": 0.3006, "step": 45830 }, { "epoch": 2.566412812185015, "grad_norm": 0.9670117497444153, "learning_rate": 9.320736842105263e-05, "loss": 0.3307, "step": 45831 }, { "epoch": 2.5664688094971444, "grad_norm": 1.3457633256912231, "learning_rate": 9.32071052631579e-05, "loss": 0.3607, "step": 45832 }, { "epoch": 2.566524806809273, "grad_norm": 1.153135895729065, "learning_rate": 9.320684210526315e-05, "loss": 0.4922, "step": 45833 }, { "epoch": 2.5665808041214024, "grad_norm": 1.2689006328582764, "learning_rate": 9.320657894736843e-05, "loss": 0.3541, "step": 45834 }, { "epoch": 2.566636801433531, "grad_norm": 1.425445795059204, "learning_rate": 9.320631578947369e-05, "loss": 0.3613, "step": 45835 }, { "epoch": 2.5666927987456605, "grad_norm": 1.100764274597168, "learning_rate": 9.320605263157896e-05, "loss": 0.4537, "step": 45836 }, { "epoch": 2.566748796057789, "grad_norm": 1.0209444761276245, "learning_rate": 9.320578947368421e-05, "loss": 0.3195, "step": 45837 }, { "epoch": 2.5668047933699185, "grad_norm": 1.1242780685424805, "learning_rate": 9.320552631578948e-05, "loss": 0.3592, "step": 45838 }, { "epoch": 2.566860790682047, "grad_norm": 1.2996630668640137, "learning_rate": 9.320526315789474e-05, "loss": 0.5063, "step": 45839 }, { "epoch": 2.5669167879941766, "grad_norm": 1.2525056600570679, "learning_rate": 9.320500000000001e-05, "loss": 0.4167, "step": 45840 }, { "epoch": 2.566972785306305, "grad_norm": 1.1569304466247559, "learning_rate": 9.320473684210527e-05, "loss": 0.3451, "step": 45841 }, { "epoch": 2.5670287826184346, "grad_norm": 1.0828945636749268, "learning_rate": 9.320447368421053e-05, "loss": 0.4607, "step": 45842 }, { "epoch": 2.567084779930563, "grad_norm": 1.0482077598571777, "learning_rate": 9.320421052631579e-05, "loss": 0.3929, "step": 45843 }, { "epoch": 2.567140777242692, "grad_norm": 1.3216757774353027, "learning_rate": 9.320394736842105e-05, "loss": 0.3432, "step": 45844 }, { "epoch": 2.567196774554821, "grad_norm": 1.2227122783660889, "learning_rate": 9.320368421052633e-05, "loss": 0.4154, "step": 45845 }, { "epoch": 2.5672527718669502, "grad_norm": 1.1086390018463135, "learning_rate": 9.320342105263159e-05, "loss": 0.4039, "step": 45846 }, { "epoch": 2.5673087691790792, "grad_norm": 1.3790932893753052, "learning_rate": 9.320315789473684e-05, "loss": 0.3644, "step": 45847 }, { "epoch": 2.5673647664912083, "grad_norm": 1.1006743907928467, "learning_rate": 9.32028947368421e-05, "loss": 0.3973, "step": 45848 }, { "epoch": 2.5674207638033373, "grad_norm": 1.2658334970474243, "learning_rate": 9.320263157894738e-05, "loss": 0.4366, "step": 45849 }, { "epoch": 2.5674767611154663, "grad_norm": 1.237228274345398, "learning_rate": 9.320236842105264e-05, "loss": 0.4115, "step": 45850 }, { "epoch": 2.5675327584275953, "grad_norm": 0.971743106842041, "learning_rate": 9.32021052631579e-05, "loss": 0.5167, "step": 45851 }, { "epoch": 2.5675887557397243, "grad_norm": 0.9546078443527222, "learning_rate": 9.320184210526316e-05, "loss": 0.3766, "step": 45852 }, { "epoch": 2.5676447530518534, "grad_norm": 1.215753197669983, "learning_rate": 9.320157894736843e-05, "loss": 0.3179, "step": 45853 }, { "epoch": 2.5677007503639824, "grad_norm": 1.1478941440582275, "learning_rate": 9.320131578947369e-05, "loss": 0.3614, "step": 45854 }, { "epoch": 2.5677567476761114, "grad_norm": 1.1370004415512085, "learning_rate": 9.320105263157895e-05, "loss": 0.3273, "step": 45855 }, { "epoch": 2.5678127449882404, "grad_norm": 1.2187919616699219, "learning_rate": 9.320078947368421e-05, "loss": 0.4245, "step": 45856 }, { "epoch": 2.5678687423003694, "grad_norm": 0.967246413230896, "learning_rate": 9.320052631578948e-05, "loss": 0.3181, "step": 45857 }, { "epoch": 2.5679247396124985, "grad_norm": 1.1272865533828735, "learning_rate": 9.320026315789474e-05, "loss": 0.3703, "step": 45858 }, { "epoch": 2.5679807369246275, "grad_norm": 1.245540738105774, "learning_rate": 9.320000000000002e-05, "loss": 0.3682, "step": 45859 }, { "epoch": 2.5680367342367565, "grad_norm": 1.0926406383514404, "learning_rate": 9.319973684210526e-05, "loss": 0.3463, "step": 45860 }, { "epoch": 2.5680927315488855, "grad_norm": 1.1934795379638672, "learning_rate": 9.319947368421052e-05, "loss": 0.4253, "step": 45861 }, { "epoch": 2.5681487288610145, "grad_norm": 0.8828509449958801, "learning_rate": 9.31992105263158e-05, "loss": 0.3313, "step": 45862 }, { "epoch": 2.5682047261731435, "grad_norm": 1.3817951679229736, "learning_rate": 9.319894736842105e-05, "loss": 0.3679, "step": 45863 }, { "epoch": 2.5682607234852726, "grad_norm": 1.1344029903411865, "learning_rate": 9.319868421052633e-05, "loss": 0.3592, "step": 45864 }, { "epoch": 2.5683167207974016, "grad_norm": 1.2899951934814453, "learning_rate": 9.319842105263157e-05, "loss": 0.4868, "step": 45865 }, { "epoch": 2.5683727181095306, "grad_norm": 1.247365117073059, "learning_rate": 9.319815789473685e-05, "loss": 0.3539, "step": 45866 }, { "epoch": 2.5684287154216596, "grad_norm": 1.1153408288955688, "learning_rate": 9.319789473684211e-05, "loss": 0.4292, "step": 45867 }, { "epoch": 2.5684847127337886, "grad_norm": 1.203562617301941, "learning_rate": 9.319763157894738e-05, "loss": 0.3415, "step": 45868 }, { "epoch": 2.5685407100459177, "grad_norm": 1.2654542922973633, "learning_rate": 9.319736842105263e-05, "loss": 0.5575, "step": 45869 }, { "epoch": 2.5685967073580467, "grad_norm": 1.1862399578094482, "learning_rate": 9.31971052631579e-05, "loss": 0.4209, "step": 45870 }, { "epoch": 2.5686527046701757, "grad_norm": 1.116708755493164, "learning_rate": 9.319684210526316e-05, "loss": 0.4045, "step": 45871 }, { "epoch": 2.5687087019823047, "grad_norm": 1.1129900217056274, "learning_rate": 9.319657894736843e-05, "loss": 0.4008, "step": 45872 }, { "epoch": 2.5687646992944337, "grad_norm": 1.2307490110397339, "learning_rate": 9.319631578947369e-05, "loss": 0.4568, "step": 45873 }, { "epoch": 2.5688206966065628, "grad_norm": 1.087794542312622, "learning_rate": 9.319605263157895e-05, "loss": 0.3188, "step": 45874 }, { "epoch": 2.568876693918692, "grad_norm": 1.0588098764419556, "learning_rate": 9.319578947368421e-05, "loss": 0.3026, "step": 45875 }, { "epoch": 2.568932691230821, "grad_norm": 1.0869641304016113, "learning_rate": 9.319552631578949e-05, "loss": 0.4921, "step": 45876 }, { "epoch": 2.56898868854295, "grad_norm": 1.0289782285690308, "learning_rate": 9.319526315789474e-05, "loss": 0.5033, "step": 45877 }, { "epoch": 2.569044685855079, "grad_norm": 1.1767892837524414, "learning_rate": 9.3195e-05, "loss": 0.3714, "step": 45878 }, { "epoch": 2.569100683167208, "grad_norm": 1.3125910758972168, "learning_rate": 9.319473684210526e-05, "loss": 0.5378, "step": 45879 }, { "epoch": 2.569156680479337, "grad_norm": 0.9992678165435791, "learning_rate": 9.319447368421052e-05, "loss": 0.3305, "step": 45880 }, { "epoch": 2.569212677791466, "grad_norm": 0.9862795472145081, "learning_rate": 9.31942105263158e-05, "loss": 0.4197, "step": 45881 }, { "epoch": 2.569268675103595, "grad_norm": 1.3451968431472778, "learning_rate": 9.319394736842106e-05, "loss": 0.3356, "step": 45882 }, { "epoch": 2.569324672415724, "grad_norm": 1.1789580583572388, "learning_rate": 9.319368421052632e-05, "loss": 0.5989, "step": 45883 }, { "epoch": 2.569380669727853, "grad_norm": 1.1294902563095093, "learning_rate": 9.319342105263158e-05, "loss": 0.3753, "step": 45884 }, { "epoch": 2.569436667039982, "grad_norm": 1.2693742513656616, "learning_rate": 9.319315789473685e-05, "loss": 0.4586, "step": 45885 }, { "epoch": 2.569492664352111, "grad_norm": 0.9450874924659729, "learning_rate": 9.319289473684211e-05, "loss": 0.313, "step": 45886 }, { "epoch": 2.56954866166424, "grad_norm": 1.0429608821868896, "learning_rate": 9.319263157894737e-05, "loss": 0.3548, "step": 45887 }, { "epoch": 2.569604658976369, "grad_norm": 1.0232843160629272, "learning_rate": 9.319236842105263e-05, "loss": 0.2763, "step": 45888 }, { "epoch": 2.569660656288498, "grad_norm": 1.0586025714874268, "learning_rate": 9.31921052631579e-05, "loss": 0.3469, "step": 45889 }, { "epoch": 2.569716653600627, "grad_norm": 1.1040581464767456, "learning_rate": 9.319184210526316e-05, "loss": 0.4477, "step": 45890 }, { "epoch": 2.569772650912756, "grad_norm": 1.170853853225708, "learning_rate": 9.319157894736844e-05, "loss": 0.3692, "step": 45891 }, { "epoch": 2.569828648224885, "grad_norm": 1.1636574268341064, "learning_rate": 9.319131578947368e-05, "loss": 0.4486, "step": 45892 }, { "epoch": 2.569884645537014, "grad_norm": 1.180637001991272, "learning_rate": 9.319105263157895e-05, "loss": 0.4515, "step": 45893 }, { "epoch": 2.569940642849143, "grad_norm": 1.3041411638259888, "learning_rate": 9.319078947368421e-05, "loss": 0.3805, "step": 45894 }, { "epoch": 2.569996640161272, "grad_norm": 1.2125961780548096, "learning_rate": 9.319052631578949e-05, "loss": 0.5076, "step": 45895 }, { "epoch": 2.570052637473401, "grad_norm": 1.3724616765975952, "learning_rate": 9.319026315789475e-05, "loss": 0.4547, "step": 45896 }, { "epoch": 2.57010863478553, "grad_norm": 1.2791975736618042, "learning_rate": 9.319e-05, "loss": 0.3991, "step": 45897 }, { "epoch": 2.570164632097659, "grad_norm": 1.160436749458313, "learning_rate": 9.318973684210527e-05, "loss": 0.376, "step": 45898 }, { "epoch": 2.5702206294097882, "grad_norm": 1.408887505531311, "learning_rate": 9.318947368421053e-05, "loss": 0.3605, "step": 45899 }, { "epoch": 2.5702766267219173, "grad_norm": 1.084800362586975, "learning_rate": 9.31892105263158e-05, "loss": 0.3598, "step": 45900 }, { "epoch": 2.5703326240340463, "grad_norm": 1.1411126852035522, "learning_rate": 9.318894736842106e-05, "loss": 0.4537, "step": 45901 }, { "epoch": 2.5703886213461753, "grad_norm": 1.1892144680023193, "learning_rate": 9.318868421052632e-05, "loss": 0.4685, "step": 45902 }, { "epoch": 2.5704446186583043, "grad_norm": 1.0740100145339966, "learning_rate": 9.318842105263158e-05, "loss": 0.3666, "step": 45903 }, { "epoch": 2.5705006159704333, "grad_norm": 0.806837797164917, "learning_rate": 9.318815789473685e-05, "loss": 0.2794, "step": 45904 }, { "epoch": 2.5705566132825624, "grad_norm": 1.1996101140975952, "learning_rate": 9.318789473684211e-05, "loss": 0.3135, "step": 45905 }, { "epoch": 2.5706126105946914, "grad_norm": 1.085951566696167, "learning_rate": 9.318763157894737e-05, "loss": 0.3917, "step": 45906 }, { "epoch": 2.5706686079068204, "grad_norm": 1.1220403909683228, "learning_rate": 9.318736842105263e-05, "loss": 0.4032, "step": 45907 }, { "epoch": 2.5707246052189494, "grad_norm": 1.2868386507034302, "learning_rate": 9.31871052631579e-05, "loss": 0.3779, "step": 45908 }, { "epoch": 2.5707806025310784, "grad_norm": 0.8794397711753845, "learning_rate": 9.318684210526316e-05, "loss": 0.2525, "step": 45909 }, { "epoch": 2.5708365998432074, "grad_norm": 1.1922544240951538, "learning_rate": 9.318657894736842e-05, "loss": 0.4778, "step": 45910 }, { "epoch": 2.5708925971553365, "grad_norm": 2.926259756088257, "learning_rate": 9.318631578947368e-05, "loss": 0.5772, "step": 45911 }, { "epoch": 2.5709485944674655, "grad_norm": 1.1517329216003418, "learning_rate": 9.318605263157896e-05, "loss": 0.4383, "step": 45912 }, { "epoch": 2.5710045917795945, "grad_norm": 1.367051124572754, "learning_rate": 9.318578947368422e-05, "loss": 0.4252, "step": 45913 }, { "epoch": 2.5710605890917235, "grad_norm": 1.1536588668823242, "learning_rate": 9.318552631578948e-05, "loss": 0.398, "step": 45914 }, { "epoch": 2.5711165864038525, "grad_norm": 1.1822625398635864, "learning_rate": 9.318526315789474e-05, "loss": 0.471, "step": 45915 }, { "epoch": 2.5711725837159816, "grad_norm": 1.2724826335906982, "learning_rate": 9.3185e-05, "loss": 0.3487, "step": 45916 }, { "epoch": 2.5712285810281106, "grad_norm": 1.4357938766479492, "learning_rate": 9.318473684210527e-05, "loss": 0.48, "step": 45917 }, { "epoch": 2.5712845783402396, "grad_norm": 1.1453899145126343, "learning_rate": 9.318447368421053e-05, "loss": 0.3791, "step": 45918 }, { "epoch": 2.5713405756523686, "grad_norm": 1.3468393087387085, "learning_rate": 9.31842105263158e-05, "loss": 0.325, "step": 45919 }, { "epoch": 2.5713965729644976, "grad_norm": 1.0968509912490845, "learning_rate": 9.318394736842105e-05, "loss": 0.3483, "step": 45920 }, { "epoch": 2.5714525702766267, "grad_norm": 1.1521761417388916, "learning_rate": 9.318368421052632e-05, "loss": 0.4365, "step": 45921 }, { "epoch": 2.5715085675887557, "grad_norm": 1.0848356485366821, "learning_rate": 9.318342105263158e-05, "loss": 0.3655, "step": 45922 }, { "epoch": 2.5715645649008847, "grad_norm": 1.0041016340255737, "learning_rate": 9.318315789473686e-05, "loss": 0.3306, "step": 45923 }, { "epoch": 2.5716205622130137, "grad_norm": 1.3201125860214233, "learning_rate": 9.31828947368421e-05, "loss": 0.4463, "step": 45924 }, { "epoch": 2.5716765595251427, "grad_norm": 1.4077279567718506, "learning_rate": 9.318263157894737e-05, "loss": 0.472, "step": 45925 }, { "epoch": 2.5717325568372718, "grad_norm": 1.3431774377822876, "learning_rate": 9.318236842105263e-05, "loss": 0.3716, "step": 45926 }, { "epoch": 2.5717885541494008, "grad_norm": 1.0158594846725464, "learning_rate": 9.318210526315791e-05, "loss": 0.3127, "step": 45927 }, { "epoch": 2.57184455146153, "grad_norm": 1.1353245973587036, "learning_rate": 9.318184210526317e-05, "loss": 0.4089, "step": 45928 }, { "epoch": 2.571900548773659, "grad_norm": 1.1977567672729492, "learning_rate": 9.318157894736843e-05, "loss": 0.4002, "step": 45929 }, { "epoch": 2.571956546085788, "grad_norm": 1.6051511764526367, "learning_rate": 9.318131578947369e-05, "loss": 0.4603, "step": 45930 }, { "epoch": 2.572012543397917, "grad_norm": 1.0861529111862183, "learning_rate": 9.318105263157895e-05, "loss": 0.266, "step": 45931 }, { "epoch": 2.572068540710046, "grad_norm": 1.0562223196029663, "learning_rate": 9.318078947368422e-05, "loss": 0.359, "step": 45932 }, { "epoch": 2.572124538022175, "grad_norm": 0.9891675114631653, "learning_rate": 9.318052631578948e-05, "loss": 0.3565, "step": 45933 }, { "epoch": 2.572180535334304, "grad_norm": 1.4393901824951172, "learning_rate": 9.318026315789474e-05, "loss": 0.4011, "step": 45934 }, { "epoch": 2.572236532646433, "grad_norm": 1.237891435623169, "learning_rate": 9.318e-05, "loss": 0.4604, "step": 45935 }, { "epoch": 2.572292529958562, "grad_norm": 1.0912176370620728, "learning_rate": 9.317973684210527e-05, "loss": 0.3481, "step": 45936 }, { "epoch": 2.572348527270691, "grad_norm": 1.2254101037979126, "learning_rate": 9.317947368421053e-05, "loss": 0.2802, "step": 45937 }, { "epoch": 2.57240452458282, "grad_norm": 1.397782325744629, "learning_rate": 9.317921052631579e-05, "loss": 0.4698, "step": 45938 }, { "epoch": 2.572460521894949, "grad_norm": 1.6142774820327759, "learning_rate": 9.317894736842105e-05, "loss": 0.4267, "step": 45939 }, { "epoch": 2.572516519207078, "grad_norm": 1.0425869226455688, "learning_rate": 9.317868421052632e-05, "loss": 0.3316, "step": 45940 }, { "epoch": 2.572572516519207, "grad_norm": 1.012616753578186, "learning_rate": 9.317842105263158e-05, "loss": 0.508, "step": 45941 }, { "epoch": 2.572628513831336, "grad_norm": 1.1643704175949097, "learning_rate": 9.317815789473684e-05, "loss": 0.3385, "step": 45942 }, { "epoch": 2.572684511143465, "grad_norm": 1.028082251548767, "learning_rate": 9.31778947368421e-05, "loss": 0.343, "step": 45943 }, { "epoch": 2.572740508455594, "grad_norm": 1.3522393703460693, "learning_rate": 9.317763157894738e-05, "loss": 0.4234, "step": 45944 }, { "epoch": 2.572796505767723, "grad_norm": 1.1051863431930542, "learning_rate": 9.317736842105264e-05, "loss": 0.4046, "step": 45945 }, { "epoch": 2.572852503079852, "grad_norm": 1.5609067678451538, "learning_rate": 9.317710526315791e-05, "loss": 0.4757, "step": 45946 }, { "epoch": 2.572908500391981, "grad_norm": 1.1490943431854248, "learning_rate": 9.317684210526316e-05, "loss": 0.5026, "step": 45947 }, { "epoch": 2.57296449770411, "grad_norm": 1.4425662755966187, "learning_rate": 9.317657894736842e-05, "loss": 0.5196, "step": 45948 }, { "epoch": 2.573020495016239, "grad_norm": 1.1500816345214844, "learning_rate": 9.317631578947369e-05, "loss": 0.3398, "step": 45949 }, { "epoch": 2.573076492328368, "grad_norm": 0.9664220809936523, "learning_rate": 9.317605263157895e-05, "loss": 0.356, "step": 45950 }, { "epoch": 2.5731324896404972, "grad_norm": 1.1630465984344482, "learning_rate": 9.317578947368422e-05, "loss": 0.3445, "step": 45951 }, { "epoch": 2.5731884869526263, "grad_norm": 1.26423180103302, "learning_rate": 9.317552631578947e-05, "loss": 0.4185, "step": 45952 }, { "epoch": 2.5732444842647553, "grad_norm": 1.2639659643173218, "learning_rate": 9.317526315789474e-05, "loss": 0.3768, "step": 45953 }, { "epoch": 2.5733004815768843, "grad_norm": 1.079248309135437, "learning_rate": 9.3175e-05, "loss": 0.3106, "step": 45954 }, { "epoch": 2.5733564788890133, "grad_norm": 1.1732382774353027, "learning_rate": 9.317473684210527e-05, "loss": 0.4007, "step": 45955 }, { "epoch": 2.5734124762011423, "grad_norm": 1.1750617027282715, "learning_rate": 9.317447368421053e-05, "loss": 0.3312, "step": 45956 }, { "epoch": 2.5734684735132713, "grad_norm": 0.8681473731994629, "learning_rate": 9.31742105263158e-05, "loss": 0.2851, "step": 45957 }, { "epoch": 2.5735244708254004, "grad_norm": 1.1687676906585693, "learning_rate": 9.317394736842105e-05, "loss": 0.3381, "step": 45958 }, { "epoch": 2.5735804681375294, "grad_norm": 0.9861500263214111, "learning_rate": 9.317368421052633e-05, "loss": 0.3698, "step": 45959 }, { "epoch": 2.5736364654496584, "grad_norm": 1.2362630367279053, "learning_rate": 9.317342105263159e-05, "loss": 0.4299, "step": 45960 }, { "epoch": 2.5736924627617874, "grad_norm": 1.0838663578033447, "learning_rate": 9.317315789473685e-05, "loss": 0.4099, "step": 45961 }, { "epoch": 2.5737484600739164, "grad_norm": 1.8621318340301514, "learning_rate": 9.31728947368421e-05, "loss": 0.3261, "step": 45962 }, { "epoch": 2.5738044573860455, "grad_norm": 1.1353189945220947, "learning_rate": 9.317263157894738e-05, "loss": 0.3837, "step": 45963 }, { "epoch": 2.5738604546981745, "grad_norm": 0.9756107926368713, "learning_rate": 9.317236842105264e-05, "loss": 0.4376, "step": 45964 }, { "epoch": 2.5739164520103035, "grad_norm": 1.469584345817566, "learning_rate": 9.31721052631579e-05, "loss": 0.4687, "step": 45965 }, { "epoch": 2.5739724493224325, "grad_norm": 1.4445240497589111, "learning_rate": 9.317184210526316e-05, "loss": 0.6256, "step": 45966 }, { "epoch": 2.5740284466345615, "grad_norm": 1.159963607788086, "learning_rate": 9.317157894736842e-05, "loss": 0.3382, "step": 45967 }, { "epoch": 2.5740844439466906, "grad_norm": 1.350013256072998, "learning_rate": 9.317131578947369e-05, "loss": 0.4519, "step": 45968 }, { "epoch": 2.5741404412588196, "grad_norm": 1.1050174236297607, "learning_rate": 9.317105263157895e-05, "loss": 0.3941, "step": 45969 }, { "epoch": 2.5741964385709486, "grad_norm": 1.1208648681640625, "learning_rate": 9.317078947368421e-05, "loss": 0.4117, "step": 45970 }, { "epoch": 2.5742524358830776, "grad_norm": 1.2012273073196411, "learning_rate": 9.317052631578947e-05, "loss": 0.3426, "step": 45971 }, { "epoch": 2.5743084331952066, "grad_norm": 1.1187759637832642, "learning_rate": 9.317026315789474e-05, "loss": 0.377, "step": 45972 }, { "epoch": 2.5743644305073357, "grad_norm": 1.0412623882293701, "learning_rate": 9.317e-05, "loss": 0.396, "step": 45973 }, { "epoch": 2.5744204278194647, "grad_norm": 1.1677817106246948, "learning_rate": 9.316973684210528e-05, "loss": 0.3081, "step": 45974 }, { "epoch": 2.5744764251315937, "grad_norm": 1.2372729778289795, "learning_rate": 9.316947368421052e-05, "loss": 0.5085, "step": 45975 }, { "epoch": 2.5745324224437227, "grad_norm": 1.1914616823196411, "learning_rate": 9.31692105263158e-05, "loss": 0.4636, "step": 45976 }, { "epoch": 2.5745884197558517, "grad_norm": 1.2914350032806396, "learning_rate": 9.316894736842106e-05, "loss": 0.4887, "step": 45977 }, { "epoch": 2.5746444170679808, "grad_norm": 1.4277184009552002, "learning_rate": 9.316868421052633e-05, "loss": 0.6, "step": 45978 }, { "epoch": 2.5747004143801098, "grad_norm": 1.4837602376937866, "learning_rate": 9.316842105263158e-05, "loss": 0.4269, "step": 45979 }, { "epoch": 2.574756411692239, "grad_norm": 1.2476377487182617, "learning_rate": 9.316815789473685e-05, "loss": 0.4336, "step": 45980 }, { "epoch": 2.574812409004368, "grad_norm": 1.2525088787078857, "learning_rate": 9.316789473684211e-05, "loss": 0.325, "step": 45981 }, { "epoch": 2.574868406316497, "grad_norm": 1.3420413732528687, "learning_rate": 9.316763157894737e-05, "loss": 0.3898, "step": 45982 }, { "epoch": 2.574924403628626, "grad_norm": 1.2477458715438843, "learning_rate": 9.316736842105264e-05, "loss": 0.3686, "step": 45983 }, { "epoch": 2.574980400940755, "grad_norm": 1.1343430280685425, "learning_rate": 9.316710526315789e-05, "loss": 0.3928, "step": 45984 }, { "epoch": 2.575036398252884, "grad_norm": 1.3381307125091553, "learning_rate": 9.316684210526316e-05, "loss": 0.3457, "step": 45985 }, { "epoch": 2.575092395565013, "grad_norm": 1.1925050020217896, "learning_rate": 9.316657894736842e-05, "loss": 0.4944, "step": 45986 }, { "epoch": 2.575148392877142, "grad_norm": 2.661663055419922, "learning_rate": 9.31663157894737e-05, "loss": 0.3457, "step": 45987 }, { "epoch": 2.575204390189271, "grad_norm": 1.2135288715362549, "learning_rate": 9.316605263157895e-05, "loss": 0.3453, "step": 45988 }, { "epoch": 2.5752603875014, "grad_norm": 1.183806300163269, "learning_rate": 9.316578947368421e-05, "loss": 0.4, "step": 45989 }, { "epoch": 2.575316384813529, "grad_norm": 1.4379099607467651, "learning_rate": 9.316552631578947e-05, "loss": 0.5303, "step": 45990 }, { "epoch": 2.575372382125658, "grad_norm": 1.1737977266311646, "learning_rate": 9.316526315789475e-05, "loss": 0.3619, "step": 45991 }, { "epoch": 2.575428379437787, "grad_norm": 1.280013918876648, "learning_rate": 9.3165e-05, "loss": 0.4399, "step": 45992 }, { "epoch": 2.575484376749916, "grad_norm": 1.1782958507537842, "learning_rate": 9.316473684210527e-05, "loss": 0.3295, "step": 45993 }, { "epoch": 2.575540374062045, "grad_norm": 1.1879894733428955, "learning_rate": 9.316447368421053e-05, "loss": 0.4469, "step": 45994 }, { "epoch": 2.575596371374174, "grad_norm": 1.0662089586257935, "learning_rate": 9.31642105263158e-05, "loss": 0.3716, "step": 45995 }, { "epoch": 2.575652368686303, "grad_norm": 1.3476603031158447, "learning_rate": 9.316394736842106e-05, "loss": 0.36, "step": 45996 }, { "epoch": 2.575708365998432, "grad_norm": 1.289571762084961, "learning_rate": 9.316368421052632e-05, "loss": 0.3726, "step": 45997 }, { "epoch": 2.575764363310561, "grad_norm": 1.0155049562454224, "learning_rate": 9.316342105263158e-05, "loss": 0.3355, "step": 45998 }, { "epoch": 2.57582036062269, "grad_norm": 0.9930593967437744, "learning_rate": 9.316315789473684e-05, "loss": 0.3609, "step": 45999 }, { "epoch": 2.575876357934819, "grad_norm": 1.5688420534133911, "learning_rate": 9.316289473684211e-05, "loss": 0.3788, "step": 46000 }, { "epoch": 2.575932355246948, "grad_norm": 1.0519797801971436, "learning_rate": 9.316263157894737e-05, "loss": 0.3114, "step": 46001 }, { "epoch": 2.575988352559077, "grad_norm": 1.3388243913650513, "learning_rate": 9.316236842105263e-05, "loss": 0.3639, "step": 46002 }, { "epoch": 2.5760443498712062, "grad_norm": 1.2444498538970947, "learning_rate": 9.316210526315789e-05, "loss": 0.5302, "step": 46003 }, { "epoch": 2.5761003471833352, "grad_norm": 0.947036862373352, "learning_rate": 9.316184210526316e-05, "loss": 0.3336, "step": 46004 }, { "epoch": 2.5761563444954643, "grad_norm": 1.1413090229034424, "learning_rate": 9.316157894736842e-05, "loss": 0.3762, "step": 46005 }, { "epoch": 2.5762123418075933, "grad_norm": 1.0925520658493042, "learning_rate": 9.31613157894737e-05, "loss": 0.4306, "step": 46006 }, { "epoch": 2.5762683391197223, "grad_norm": 1.22615385055542, "learning_rate": 9.316105263157894e-05, "loss": 0.4755, "step": 46007 }, { "epoch": 2.5763243364318513, "grad_norm": 1.077869176864624, "learning_rate": 9.316078947368422e-05, "loss": 0.3231, "step": 46008 }, { "epoch": 2.5763803337439803, "grad_norm": 1.1102615594863892, "learning_rate": 9.316052631578948e-05, "loss": 0.3705, "step": 46009 }, { "epoch": 2.5764363310561094, "grad_norm": 1.5360053777694702, "learning_rate": 9.316026315789475e-05, "loss": 0.4663, "step": 46010 }, { "epoch": 2.5764923283682384, "grad_norm": 1.1477519273757935, "learning_rate": 9.316000000000001e-05, "loss": 0.3577, "step": 46011 }, { "epoch": 2.5765483256803674, "grad_norm": 1.1828967332839966, "learning_rate": 9.315973684210527e-05, "loss": 0.3282, "step": 46012 }, { "epoch": 2.5766043229924964, "grad_norm": 1.071303129196167, "learning_rate": 9.315947368421053e-05, "loss": 0.3449, "step": 46013 }, { "epoch": 2.5766603203046254, "grad_norm": 1.121272325515747, "learning_rate": 9.31592105263158e-05, "loss": 0.4479, "step": 46014 }, { "epoch": 2.5767163176167545, "grad_norm": 1.4459490776062012, "learning_rate": 9.315894736842106e-05, "loss": 0.445, "step": 46015 }, { "epoch": 2.5767723149288835, "grad_norm": 1.2250949144363403, "learning_rate": 9.315868421052632e-05, "loss": 0.5084, "step": 46016 }, { "epoch": 2.5768283122410125, "grad_norm": 0.9811687469482422, "learning_rate": 9.315842105263158e-05, "loss": 0.4029, "step": 46017 }, { "epoch": 2.5768843095531415, "grad_norm": 1.3154581785202026, "learning_rate": 9.315815789473684e-05, "loss": 0.3411, "step": 46018 }, { "epoch": 2.5769403068652705, "grad_norm": 1.0479449033737183, "learning_rate": 9.315789473684211e-05, "loss": 0.3482, "step": 46019 }, { "epoch": 2.5769963041773996, "grad_norm": 1.1492435932159424, "learning_rate": 9.315763157894737e-05, "loss": 0.3607, "step": 46020 }, { "epoch": 2.5770523014895286, "grad_norm": 1.0657223463058472, "learning_rate": 9.315736842105263e-05, "loss": 0.3465, "step": 46021 }, { "epoch": 2.5771082988016576, "grad_norm": 1.4544436931610107, "learning_rate": 9.31571052631579e-05, "loss": 0.6357, "step": 46022 }, { "epoch": 2.5771642961137866, "grad_norm": 1.054081916809082, "learning_rate": 9.315684210526317e-05, "loss": 0.3657, "step": 46023 }, { "epoch": 2.5772202934259156, "grad_norm": 1.0402076244354248, "learning_rate": 9.315657894736843e-05, "loss": 0.3238, "step": 46024 }, { "epoch": 2.5772762907380447, "grad_norm": 1.1456807851791382, "learning_rate": 9.315631578947369e-05, "loss": 0.4309, "step": 46025 }, { "epoch": 2.5773322880501737, "grad_norm": 1.136291742324829, "learning_rate": 9.315605263157895e-05, "loss": 0.2956, "step": 46026 }, { "epoch": 2.5773882853623027, "grad_norm": 1.1384942531585693, "learning_rate": 9.315578947368422e-05, "loss": 0.3905, "step": 46027 }, { "epoch": 2.5774442826744317, "grad_norm": 1.0591740608215332, "learning_rate": 9.315552631578948e-05, "loss": 0.4322, "step": 46028 }, { "epoch": 2.5775002799865607, "grad_norm": 1.1473219394683838, "learning_rate": 9.315526315789475e-05, "loss": 0.3987, "step": 46029 }, { "epoch": 2.5775562772986897, "grad_norm": 1.072749376296997, "learning_rate": 9.3155e-05, "loss": 0.3893, "step": 46030 }, { "epoch": 2.5776122746108188, "grad_norm": 1.4761978387832642, "learning_rate": 9.315473684210527e-05, "loss": 0.5563, "step": 46031 }, { "epoch": 2.577668271922948, "grad_norm": 1.0590451955795288, "learning_rate": 9.315447368421053e-05, "loss": 0.3499, "step": 46032 }, { "epoch": 2.577724269235077, "grad_norm": 0.8593736886978149, "learning_rate": 9.31542105263158e-05, "loss": 0.2561, "step": 46033 }, { "epoch": 2.577780266547206, "grad_norm": 1.2652361392974854, "learning_rate": 9.315394736842105e-05, "loss": 0.5819, "step": 46034 }, { "epoch": 2.577836263859335, "grad_norm": 1.181604266166687, "learning_rate": 9.315368421052631e-05, "loss": 0.4735, "step": 46035 }, { "epoch": 2.577892261171464, "grad_norm": 1.280901551246643, "learning_rate": 9.315342105263158e-05, "loss": 0.5325, "step": 46036 }, { "epoch": 2.577948258483593, "grad_norm": 1.1786962747573853, "learning_rate": 9.315315789473684e-05, "loss": 0.4014, "step": 46037 }, { "epoch": 2.578004255795722, "grad_norm": 1.397752046585083, "learning_rate": 9.315289473684212e-05, "loss": 0.3402, "step": 46038 }, { "epoch": 2.578060253107851, "grad_norm": 1.1130037307739258, "learning_rate": 9.315263157894736e-05, "loss": 0.4848, "step": 46039 }, { "epoch": 2.57811625041998, "grad_norm": 1.360951542854309, "learning_rate": 9.315236842105264e-05, "loss": 0.449, "step": 46040 }, { "epoch": 2.578172247732109, "grad_norm": 1.142729640007019, "learning_rate": 9.31521052631579e-05, "loss": 0.3708, "step": 46041 }, { "epoch": 2.578228245044238, "grad_norm": 0.9992817044258118, "learning_rate": 9.315184210526317e-05, "loss": 0.3213, "step": 46042 }, { "epoch": 2.578284242356367, "grad_norm": 1.3108927011489868, "learning_rate": 9.315157894736843e-05, "loss": 0.4167, "step": 46043 }, { "epoch": 2.578340239668496, "grad_norm": 1.5837466716766357, "learning_rate": 9.315131578947369e-05, "loss": 0.5196, "step": 46044 }, { "epoch": 2.578396236980625, "grad_norm": 0.927727222442627, "learning_rate": 9.315105263157895e-05, "loss": 0.3432, "step": 46045 }, { "epoch": 2.578452234292754, "grad_norm": 1.1483770608901978, "learning_rate": 9.315078947368422e-05, "loss": 0.4136, "step": 46046 }, { "epoch": 2.578508231604883, "grad_norm": 1.0409961938858032, "learning_rate": 9.315052631578948e-05, "loss": 0.4282, "step": 46047 }, { "epoch": 2.578564228917012, "grad_norm": 0.9694766402244568, "learning_rate": 9.315026315789474e-05, "loss": 0.4193, "step": 46048 }, { "epoch": 2.578620226229141, "grad_norm": 1.0620146989822388, "learning_rate": 9.315e-05, "loss": 0.3902, "step": 46049 }, { "epoch": 2.57867622354127, "grad_norm": 1.1515355110168457, "learning_rate": 9.314973684210527e-05, "loss": 0.376, "step": 46050 }, { "epoch": 2.578732220853399, "grad_norm": 1.056361436843872, "learning_rate": 9.314947368421053e-05, "loss": 0.3444, "step": 46051 }, { "epoch": 2.578788218165528, "grad_norm": 1.074299931526184, "learning_rate": 9.31492105263158e-05, "loss": 0.4389, "step": 46052 }, { "epoch": 2.578844215477657, "grad_norm": 1.1565958261489868, "learning_rate": 9.314894736842105e-05, "loss": 0.4372, "step": 46053 }, { "epoch": 2.578900212789786, "grad_norm": 1.2462416887283325, "learning_rate": 9.314868421052631e-05, "loss": 0.3845, "step": 46054 }, { "epoch": 2.5789562101019152, "grad_norm": 1.2445703744888306, "learning_rate": 9.314842105263159e-05, "loss": 0.3762, "step": 46055 }, { "epoch": 2.5790122074140442, "grad_norm": 1.1718096733093262, "learning_rate": 9.314815789473685e-05, "loss": 0.3526, "step": 46056 }, { "epoch": 2.5790682047261733, "grad_norm": 2.2784805297851562, "learning_rate": 9.31478947368421e-05, "loss": 0.6718, "step": 46057 }, { "epoch": 2.5791242020383023, "grad_norm": 1.2471497058868408, "learning_rate": 9.314763157894737e-05, "loss": 0.3402, "step": 46058 }, { "epoch": 2.5791801993504313, "grad_norm": 1.159052848815918, "learning_rate": 9.314736842105264e-05, "loss": 0.3907, "step": 46059 }, { "epoch": 2.5792361966625603, "grad_norm": 1.467471718788147, "learning_rate": 9.31471052631579e-05, "loss": 0.4823, "step": 46060 }, { "epoch": 2.5792921939746893, "grad_norm": 1.1169527769088745, "learning_rate": 9.314684210526317e-05, "loss": 0.3697, "step": 46061 }, { "epoch": 2.5793481912868184, "grad_norm": 1.257900357246399, "learning_rate": 9.314657894736842e-05, "loss": 0.4065, "step": 46062 }, { "epoch": 2.5794041885989474, "grad_norm": 1.2235114574432373, "learning_rate": 9.314631578947369e-05, "loss": 0.3677, "step": 46063 }, { "epoch": 2.5794601859110764, "grad_norm": 1.4424211978912354, "learning_rate": 9.314605263157895e-05, "loss": 0.4321, "step": 46064 }, { "epoch": 2.5795161832232054, "grad_norm": 1.1728360652923584, "learning_rate": 9.314578947368422e-05, "loss": 0.5346, "step": 46065 }, { "epoch": 2.5795721805353344, "grad_norm": 1.0435181856155396, "learning_rate": 9.314552631578948e-05, "loss": 0.3737, "step": 46066 }, { "epoch": 2.5796281778474635, "grad_norm": 1.06194269657135, "learning_rate": 9.314526315789474e-05, "loss": 0.3952, "step": 46067 }, { "epoch": 2.5796841751595925, "grad_norm": 1.3419098854064941, "learning_rate": 9.3145e-05, "loss": 0.3814, "step": 46068 }, { "epoch": 2.5797401724717215, "grad_norm": 1.0676994323730469, "learning_rate": 9.314473684210526e-05, "loss": 0.4272, "step": 46069 }, { "epoch": 2.5797961697838505, "grad_norm": 1.0949664115905762, "learning_rate": 9.314447368421054e-05, "loss": 0.3657, "step": 46070 }, { "epoch": 2.5798521670959795, "grad_norm": 1.0061603784561157, "learning_rate": 9.314421052631578e-05, "loss": 0.2596, "step": 46071 }, { "epoch": 2.5799081644081086, "grad_norm": 1.1637463569641113, "learning_rate": 9.314394736842106e-05, "loss": 0.4634, "step": 46072 }, { "epoch": 2.5799641617202376, "grad_norm": 0.9348063468933105, "learning_rate": 9.314368421052632e-05, "loss": 0.3751, "step": 46073 }, { "epoch": 2.5800201590323666, "grad_norm": 1.1514455080032349, "learning_rate": 9.314342105263159e-05, "loss": 0.3176, "step": 46074 }, { "epoch": 2.5800761563444956, "grad_norm": 1.101204752922058, "learning_rate": 9.314315789473685e-05, "loss": 0.3418, "step": 46075 }, { "epoch": 2.5801321536566246, "grad_norm": 0.9068188667297363, "learning_rate": 9.314289473684211e-05, "loss": 0.2782, "step": 46076 }, { "epoch": 2.5801881509687536, "grad_norm": 1.2096669673919678, "learning_rate": 9.314263157894737e-05, "loss": 0.4085, "step": 46077 }, { "epoch": 2.5802441482808827, "grad_norm": 2.3137753009796143, "learning_rate": 9.314236842105264e-05, "loss": 0.3432, "step": 46078 }, { "epoch": 2.5803001455930117, "grad_norm": 1.6223537921905518, "learning_rate": 9.31421052631579e-05, "loss": 0.427, "step": 46079 }, { "epoch": 2.5803561429051407, "grad_norm": 1.9210237264633179, "learning_rate": 9.314184210526316e-05, "loss": 0.5861, "step": 46080 }, { "epoch": 2.5804121402172697, "grad_norm": 1.4032903909683228, "learning_rate": 9.314157894736842e-05, "loss": 0.3324, "step": 46081 }, { "epoch": 2.5804681375293987, "grad_norm": 1.6977425813674927, "learning_rate": 9.31413157894737e-05, "loss": 0.5505, "step": 46082 }, { "epoch": 2.5805241348415278, "grad_norm": 1.3876904249191284, "learning_rate": 9.314105263157895e-05, "loss": 0.3835, "step": 46083 }, { "epoch": 2.580580132153657, "grad_norm": 1.3035751581192017, "learning_rate": 9.314078947368423e-05, "loss": 0.4425, "step": 46084 }, { "epoch": 2.580636129465786, "grad_norm": 3.136500597000122, "learning_rate": 9.314052631578947e-05, "loss": 0.345, "step": 46085 }, { "epoch": 2.580692126777915, "grad_norm": 1.2417433261871338, "learning_rate": 9.314026315789473e-05, "loss": 0.4438, "step": 46086 }, { "epoch": 2.580748124090044, "grad_norm": 1.2220112085342407, "learning_rate": 9.314e-05, "loss": 0.3782, "step": 46087 }, { "epoch": 2.580804121402173, "grad_norm": 1.2444084882736206, "learning_rate": 9.313973684210527e-05, "loss": 0.4214, "step": 46088 }, { "epoch": 2.580860118714302, "grad_norm": 1.8742244243621826, "learning_rate": 9.313947368421053e-05, "loss": 0.528, "step": 46089 }, { "epoch": 2.580916116026431, "grad_norm": 1.5566651821136475, "learning_rate": 9.313921052631579e-05, "loss": 0.5614, "step": 46090 }, { "epoch": 2.58097211333856, "grad_norm": 1.311989188194275, "learning_rate": 9.313894736842106e-05, "loss": 0.5929, "step": 46091 }, { "epoch": 2.5810281106506885, "grad_norm": 1.1537903547286987, "learning_rate": 9.313868421052632e-05, "loss": 0.3887, "step": 46092 }, { "epoch": 2.581084107962818, "grad_norm": 1.5106817483901978, "learning_rate": 9.313842105263159e-05, "loss": 0.4065, "step": 46093 }, { "epoch": 2.5811401052749465, "grad_norm": 1.0126575231552124, "learning_rate": 9.313815789473684e-05, "loss": 0.3849, "step": 46094 }, { "epoch": 2.581196102587076, "grad_norm": 1.2632516622543335, "learning_rate": 9.313789473684211e-05, "loss": 0.4506, "step": 46095 }, { "epoch": 2.5812520998992046, "grad_norm": 1.5963244438171387, "learning_rate": 9.313763157894737e-05, "loss": 0.406, "step": 46096 }, { "epoch": 2.581308097211334, "grad_norm": 1.444861888885498, "learning_rate": 9.313736842105264e-05, "loss": 0.6319, "step": 46097 }, { "epoch": 2.5813640945234626, "grad_norm": 1.2981599569320679, "learning_rate": 9.31371052631579e-05, "loss": 0.4995, "step": 46098 }, { "epoch": 2.581420091835592, "grad_norm": 0.8989390134811401, "learning_rate": 9.313684210526316e-05, "loss": 0.3089, "step": 46099 }, { "epoch": 2.5814760891477206, "grad_norm": 1.0667085647583008, "learning_rate": 9.313657894736842e-05, "loss": 0.3027, "step": 46100 }, { "epoch": 2.58153208645985, "grad_norm": 1.2525547742843628, "learning_rate": 9.31363157894737e-05, "loss": 0.3871, "step": 46101 }, { "epoch": 2.5815880837719787, "grad_norm": 1.4255220890045166, "learning_rate": 9.313605263157896e-05, "loss": 0.4652, "step": 46102 }, { "epoch": 2.581644081084108, "grad_norm": 1.0932177305221558, "learning_rate": 9.313578947368422e-05, "loss": 0.3531, "step": 46103 }, { "epoch": 2.5817000783962367, "grad_norm": 1.2329206466674805, "learning_rate": 9.313552631578948e-05, "loss": 0.4215, "step": 46104 }, { "epoch": 2.581756075708366, "grad_norm": 1.2955242395401, "learning_rate": 9.313526315789474e-05, "loss": 0.3689, "step": 46105 }, { "epoch": 2.5818120730204948, "grad_norm": 1.2032455205917358, "learning_rate": 9.313500000000001e-05, "loss": 0.4864, "step": 46106 }, { "epoch": 2.581868070332624, "grad_norm": 1.3263942003250122, "learning_rate": 9.313473684210527e-05, "loss": 0.3865, "step": 46107 }, { "epoch": 2.581924067644753, "grad_norm": 1.122671365737915, "learning_rate": 9.313447368421053e-05, "loss": 0.248, "step": 46108 }, { "epoch": 2.5819800649568823, "grad_norm": 1.2542387247085571, "learning_rate": 9.313421052631579e-05, "loss": 0.5328, "step": 46109 }, { "epoch": 2.582036062269011, "grad_norm": 1.110645055770874, "learning_rate": 9.313394736842106e-05, "loss": 0.3752, "step": 46110 }, { "epoch": 2.5820920595811403, "grad_norm": 1.1280505657196045, "learning_rate": 9.313368421052632e-05, "loss": 0.4218, "step": 46111 }, { "epoch": 2.582148056893269, "grad_norm": 1.1789436340332031, "learning_rate": 9.313342105263158e-05, "loss": 0.4076, "step": 46112 }, { "epoch": 2.5822040542053983, "grad_norm": 1.2088063955307007, "learning_rate": 9.313315789473684e-05, "loss": 0.4068, "step": 46113 }, { "epoch": 2.582260051517527, "grad_norm": 1.3133759498596191, "learning_rate": 9.313289473684211e-05, "loss": 0.363, "step": 46114 }, { "epoch": 2.5823160488296564, "grad_norm": 1.0569279193878174, "learning_rate": 9.313263157894737e-05, "loss": 0.3917, "step": 46115 }, { "epoch": 2.582372046141785, "grad_norm": 1.1340553760528564, "learning_rate": 9.313236842105265e-05, "loss": 0.4839, "step": 46116 }, { "epoch": 2.5824280434539144, "grad_norm": 1.097480058670044, "learning_rate": 9.313210526315789e-05, "loss": 0.4591, "step": 46117 }, { "epoch": 2.582484040766043, "grad_norm": 1.458385944366455, "learning_rate": 9.313184210526317e-05, "loss": 0.5556, "step": 46118 }, { "epoch": 2.5825400380781725, "grad_norm": 1.0876173973083496, "learning_rate": 9.313157894736843e-05, "loss": 0.3459, "step": 46119 }, { "epoch": 2.582596035390301, "grad_norm": 1.0457082986831665, "learning_rate": 9.313131578947369e-05, "loss": 0.3347, "step": 46120 }, { "epoch": 2.5826520327024305, "grad_norm": 1.3583276271820068, "learning_rate": 9.313105263157896e-05, "loss": 0.4201, "step": 46121 }, { "epoch": 2.582708030014559, "grad_norm": 1.4155386686325073, "learning_rate": 9.31307894736842e-05, "loss": 0.4608, "step": 46122 }, { "epoch": 2.5827640273266885, "grad_norm": 0.9848421216011047, "learning_rate": 9.313052631578948e-05, "loss": 0.3612, "step": 46123 }, { "epoch": 2.582820024638817, "grad_norm": 1.5432130098342896, "learning_rate": 9.313026315789474e-05, "loss": 0.5421, "step": 46124 }, { "epoch": 2.5828760219509466, "grad_norm": 1.2114622592926025, "learning_rate": 9.313000000000001e-05, "loss": 0.5826, "step": 46125 }, { "epoch": 2.582932019263075, "grad_norm": 1.0866502523422241, "learning_rate": 9.312973684210526e-05, "loss": 0.3279, "step": 46126 }, { "epoch": 2.5829880165752046, "grad_norm": 1.450933575630188, "learning_rate": 9.312947368421053e-05, "loss": 0.4545, "step": 46127 }, { "epoch": 2.583044013887333, "grad_norm": 1.2252963781356812, "learning_rate": 9.312921052631579e-05, "loss": 0.3875, "step": 46128 }, { "epoch": 2.5831000111994626, "grad_norm": 1.4160548448562622, "learning_rate": 9.312894736842106e-05, "loss": 0.4755, "step": 46129 }, { "epoch": 2.583156008511591, "grad_norm": 1.111814022064209, "learning_rate": 9.312868421052632e-05, "loss": 0.4226, "step": 46130 }, { "epoch": 2.5832120058237207, "grad_norm": 1.1783521175384521, "learning_rate": 9.312842105263158e-05, "loss": 0.4091, "step": 46131 }, { "epoch": 2.5832680031358493, "grad_norm": 1.3160145282745361, "learning_rate": 9.312815789473684e-05, "loss": 0.3478, "step": 46132 }, { "epoch": 2.5833240004479787, "grad_norm": 1.1390950679779053, "learning_rate": 9.312789473684212e-05, "loss": 0.3533, "step": 46133 }, { "epoch": 2.5833799977601073, "grad_norm": 1.1132807731628418, "learning_rate": 9.312763157894738e-05, "loss": 0.408, "step": 46134 }, { "epoch": 2.5834359950722368, "grad_norm": 1.3719650506973267, "learning_rate": 9.312736842105264e-05, "loss": 0.392, "step": 46135 }, { "epoch": 2.5834919923843653, "grad_norm": 1.7874808311462402, "learning_rate": 9.31271052631579e-05, "loss": 0.393, "step": 46136 }, { "epoch": 2.583547989696495, "grad_norm": 0.940536618232727, "learning_rate": 9.312684210526317e-05, "loss": 0.3134, "step": 46137 }, { "epoch": 2.5836039870086234, "grad_norm": 1.33277428150177, "learning_rate": 9.312657894736843e-05, "loss": 0.4385, "step": 46138 }, { "epoch": 2.583659984320753, "grad_norm": 1.152375340461731, "learning_rate": 9.312631578947369e-05, "loss": 0.4978, "step": 46139 }, { "epoch": 2.5837159816328814, "grad_norm": 1.1312285661697388, "learning_rate": 9.312605263157895e-05, "loss": 0.446, "step": 46140 }, { "epoch": 2.583771978945011, "grad_norm": 1.226535439491272, "learning_rate": 9.312578947368421e-05, "loss": 0.4768, "step": 46141 }, { "epoch": 2.5838279762571394, "grad_norm": 1.7996746301651, "learning_rate": 9.312552631578948e-05, "loss": 0.4884, "step": 46142 }, { "epoch": 2.583883973569269, "grad_norm": 1.5521812438964844, "learning_rate": 9.312526315789474e-05, "loss": 0.6233, "step": 46143 }, { "epoch": 2.5839399708813975, "grad_norm": 1.3173505067825317, "learning_rate": 9.3125e-05, "loss": 0.2906, "step": 46144 }, { "epoch": 2.583995968193527, "grad_norm": 1.1461753845214844, "learning_rate": 9.312473684210526e-05, "loss": 0.4117, "step": 46145 }, { "epoch": 2.5840519655056555, "grad_norm": 1.248136281967163, "learning_rate": 9.312447368421053e-05, "loss": 0.4202, "step": 46146 }, { "epoch": 2.584107962817785, "grad_norm": 1.1915944814682007, "learning_rate": 9.312421052631579e-05, "loss": 0.3287, "step": 46147 }, { "epoch": 2.5841639601299136, "grad_norm": 1.2087528705596924, "learning_rate": 9.312394736842107e-05, "loss": 0.4107, "step": 46148 }, { "epoch": 2.584219957442043, "grad_norm": 1.3676587343215942, "learning_rate": 9.312368421052631e-05, "loss": 0.3929, "step": 46149 }, { "epoch": 2.5842759547541716, "grad_norm": 1.058803915977478, "learning_rate": 9.312342105263159e-05, "loss": 0.3817, "step": 46150 }, { "epoch": 2.584331952066301, "grad_norm": 1.0382416248321533, "learning_rate": 9.312315789473685e-05, "loss": 0.3762, "step": 46151 }, { "epoch": 2.5843879493784296, "grad_norm": 1.0956649780273438, "learning_rate": 9.312289473684212e-05, "loss": 0.3801, "step": 46152 }, { "epoch": 2.584443946690559, "grad_norm": 1.1819037199020386, "learning_rate": 9.312263157894738e-05, "loss": 0.349, "step": 46153 }, { "epoch": 2.5844999440026877, "grad_norm": 1.3597872257232666, "learning_rate": 9.312236842105264e-05, "loss": 0.3677, "step": 46154 }, { "epoch": 2.584555941314817, "grad_norm": 1.3350191116333008, "learning_rate": 9.31221052631579e-05, "loss": 0.3295, "step": 46155 }, { "epoch": 2.5846119386269457, "grad_norm": 1.059969425201416, "learning_rate": 9.312184210526316e-05, "loss": 0.361, "step": 46156 }, { "epoch": 2.584667935939075, "grad_norm": 0.9629393815994263, "learning_rate": 9.312157894736843e-05, "loss": 0.3669, "step": 46157 }, { "epoch": 2.5847239332512038, "grad_norm": 1.1319193840026855, "learning_rate": 9.312131578947369e-05, "loss": 0.3581, "step": 46158 }, { "epoch": 2.584779930563333, "grad_norm": 1.2861007452011108, "learning_rate": 9.312105263157895e-05, "loss": 0.5574, "step": 46159 }, { "epoch": 2.584835927875462, "grad_norm": 1.279954195022583, "learning_rate": 9.312078947368421e-05, "loss": 0.4105, "step": 46160 }, { "epoch": 2.5848919251875913, "grad_norm": 1.2599095106124878, "learning_rate": 9.312052631578948e-05, "loss": 0.5204, "step": 46161 }, { "epoch": 2.58494792249972, "grad_norm": 1.1875824928283691, "learning_rate": 9.312026315789474e-05, "loss": 0.3558, "step": 46162 }, { "epoch": 2.5850039198118493, "grad_norm": 1.1571764945983887, "learning_rate": 9.312e-05, "loss": 0.3278, "step": 46163 }, { "epoch": 2.585059917123978, "grad_norm": 1.2332196235656738, "learning_rate": 9.311973684210526e-05, "loss": 0.4497, "step": 46164 }, { "epoch": 2.5851159144361073, "grad_norm": 1.0362366437911987, "learning_rate": 9.311947368421054e-05, "loss": 0.3395, "step": 46165 }, { "epoch": 2.585171911748236, "grad_norm": 1.149551272392273, "learning_rate": 9.31192105263158e-05, "loss": 0.3847, "step": 46166 }, { "epoch": 2.5852279090603654, "grad_norm": 1.1010514497756958, "learning_rate": 9.311894736842106e-05, "loss": 0.3965, "step": 46167 }, { "epoch": 2.585283906372494, "grad_norm": 1.1411404609680176, "learning_rate": 9.311868421052632e-05, "loss": 0.4159, "step": 46168 }, { "epoch": 2.5853399036846234, "grad_norm": 1.258015513420105, "learning_rate": 9.311842105263159e-05, "loss": 0.4268, "step": 46169 }, { "epoch": 2.585395900996752, "grad_norm": 1.1511561870574951, "learning_rate": 9.311815789473685e-05, "loss": 0.4248, "step": 46170 }, { "epoch": 2.5854518983088814, "grad_norm": 1.1136687994003296, "learning_rate": 9.311789473684212e-05, "loss": 0.3705, "step": 46171 }, { "epoch": 2.58550789562101, "grad_norm": 1.070802927017212, "learning_rate": 9.311763157894737e-05, "loss": 0.3682, "step": 46172 }, { "epoch": 2.5855638929331395, "grad_norm": 1.627197265625, "learning_rate": 9.311736842105263e-05, "loss": 0.5591, "step": 46173 }, { "epoch": 2.585619890245268, "grad_norm": 1.195107102394104, "learning_rate": 9.31171052631579e-05, "loss": 0.437, "step": 46174 }, { "epoch": 2.585675887557397, "grad_norm": 1.1197826862335205, "learning_rate": 9.311684210526316e-05, "loss": 0.377, "step": 46175 }, { "epoch": 2.585731884869526, "grad_norm": 1.4083714485168457, "learning_rate": 9.311657894736843e-05, "loss": 0.3296, "step": 46176 }, { "epoch": 2.585787882181655, "grad_norm": 1.4512770175933838, "learning_rate": 9.311631578947368e-05, "loss": 0.3438, "step": 46177 }, { "epoch": 2.585843879493784, "grad_norm": 1.2137832641601562, "learning_rate": 9.311605263157895e-05, "loss": 0.3822, "step": 46178 }, { "epoch": 2.585899876805913, "grad_norm": 0.9990248084068298, "learning_rate": 9.311578947368421e-05, "loss": 0.3782, "step": 46179 }, { "epoch": 2.585955874118042, "grad_norm": 1.4267797470092773, "learning_rate": 9.311552631578949e-05, "loss": 0.4335, "step": 46180 }, { "epoch": 2.586011871430171, "grad_norm": 1.2732597589492798, "learning_rate": 9.311526315789473e-05, "loss": 0.5702, "step": 46181 }, { "epoch": 2.5860678687423, "grad_norm": 1.0875052213668823, "learning_rate": 9.3115e-05, "loss": 0.4149, "step": 46182 }, { "epoch": 2.5861238660544292, "grad_norm": 1.1585497856140137, "learning_rate": 9.311473684210527e-05, "loss": 0.4281, "step": 46183 }, { "epoch": 2.5861798633665583, "grad_norm": 1.0399203300476074, "learning_rate": 9.311447368421054e-05, "loss": 0.432, "step": 46184 }, { "epoch": 2.5862358606786873, "grad_norm": 1.38935387134552, "learning_rate": 9.31142105263158e-05, "loss": 0.4513, "step": 46185 }, { "epoch": 2.5862918579908163, "grad_norm": 1.4753776788711548, "learning_rate": 9.311394736842106e-05, "loss": 0.4661, "step": 46186 }, { "epoch": 2.5863478553029453, "grad_norm": 1.1953052282333374, "learning_rate": 9.311368421052632e-05, "loss": 0.4839, "step": 46187 }, { "epoch": 2.5864038526150743, "grad_norm": 1.0560799837112427, "learning_rate": 9.311342105263159e-05, "loss": 0.4439, "step": 46188 }, { "epoch": 2.5864598499272033, "grad_norm": 1.1305561065673828, "learning_rate": 9.311315789473685e-05, "loss": 0.403, "step": 46189 }, { "epoch": 2.5865158472393324, "grad_norm": 1.1195857524871826, "learning_rate": 9.311289473684211e-05, "loss": 0.3444, "step": 46190 }, { "epoch": 2.5865718445514614, "grad_norm": 1.049867033958435, "learning_rate": 9.311263157894737e-05, "loss": 0.3421, "step": 46191 }, { "epoch": 2.5866278418635904, "grad_norm": 1.3990519046783447, "learning_rate": 9.311236842105263e-05, "loss": 0.4355, "step": 46192 }, { "epoch": 2.5866838391757194, "grad_norm": 1.2467957735061646, "learning_rate": 9.31121052631579e-05, "loss": 0.3774, "step": 46193 }, { "epoch": 2.5867398364878484, "grad_norm": 1.0563324689865112, "learning_rate": 9.311184210526316e-05, "loss": 0.3194, "step": 46194 }, { "epoch": 2.5867958337999775, "grad_norm": 1.231909990310669, "learning_rate": 9.311157894736842e-05, "loss": 0.402, "step": 46195 }, { "epoch": 2.5868518311121065, "grad_norm": 1.0885509252548218, "learning_rate": 9.311131578947368e-05, "loss": 0.4035, "step": 46196 }, { "epoch": 2.5869078284242355, "grad_norm": 1.448405146598816, "learning_rate": 9.311105263157896e-05, "loss": 0.4949, "step": 46197 }, { "epoch": 2.5869638257363645, "grad_norm": 1.551013708114624, "learning_rate": 9.311078947368422e-05, "loss": 0.4266, "step": 46198 }, { "epoch": 2.5870198230484935, "grad_norm": 1.246457576751709, "learning_rate": 9.311052631578948e-05, "loss": 0.4558, "step": 46199 }, { "epoch": 2.5870758203606226, "grad_norm": 1.4119164943695068, "learning_rate": 9.311026315789473e-05, "loss": 0.3869, "step": 46200 }, { "epoch": 2.5871318176727516, "grad_norm": 1.4596666097640991, "learning_rate": 9.311000000000001e-05, "loss": 0.4522, "step": 46201 }, { "epoch": 2.5871878149848806, "grad_norm": 1.3969022035598755, "learning_rate": 9.310973684210527e-05, "loss": 0.5272, "step": 46202 }, { "epoch": 2.5872438122970096, "grad_norm": 1.2330166101455688, "learning_rate": 9.310947368421054e-05, "loss": 0.4256, "step": 46203 }, { "epoch": 2.5872998096091386, "grad_norm": 1.050488829612732, "learning_rate": 9.310921052631579e-05, "loss": 0.4009, "step": 46204 }, { "epoch": 2.5873558069212677, "grad_norm": 1.5017423629760742, "learning_rate": 9.310894736842106e-05, "loss": 0.4551, "step": 46205 }, { "epoch": 2.5874118042333967, "grad_norm": 1.1235839128494263, "learning_rate": 9.310868421052632e-05, "loss": 0.3989, "step": 46206 }, { "epoch": 2.5874678015455257, "grad_norm": 1.1039791107177734, "learning_rate": 9.310842105263158e-05, "loss": 0.3786, "step": 46207 }, { "epoch": 2.5875237988576547, "grad_norm": 1.1936520338058472, "learning_rate": 9.310815789473685e-05, "loss": 0.3841, "step": 46208 }, { "epoch": 2.5875797961697837, "grad_norm": 1.161730170249939, "learning_rate": 9.31078947368421e-05, "loss": 0.3911, "step": 46209 }, { "epoch": 2.5876357934819127, "grad_norm": 0.9859711527824402, "learning_rate": 9.310763157894737e-05, "loss": 0.3708, "step": 46210 }, { "epoch": 2.5876917907940418, "grad_norm": 1.176864743232727, "learning_rate": 9.310736842105263e-05, "loss": 0.4341, "step": 46211 }, { "epoch": 2.587747788106171, "grad_norm": 1.0070438385009766, "learning_rate": 9.31071052631579e-05, "loss": 0.318, "step": 46212 }, { "epoch": 2.5878037854183, "grad_norm": 1.3358042240142822, "learning_rate": 9.310684210526317e-05, "loss": 0.4245, "step": 46213 }, { "epoch": 2.587859782730429, "grad_norm": 1.1505718231201172, "learning_rate": 9.310657894736843e-05, "loss": 0.4625, "step": 46214 }, { "epoch": 2.587915780042558, "grad_norm": 1.3368526697158813, "learning_rate": 9.310631578947368e-05, "loss": 0.5161, "step": 46215 }, { "epoch": 2.587971777354687, "grad_norm": 1.2531532049179077, "learning_rate": 9.310605263157896e-05, "loss": 0.3739, "step": 46216 }, { "epoch": 2.588027774666816, "grad_norm": 1.1150315999984741, "learning_rate": 9.310578947368422e-05, "loss": 0.4028, "step": 46217 }, { "epoch": 2.588083771978945, "grad_norm": 1.269049882888794, "learning_rate": 9.310552631578948e-05, "loss": 0.3915, "step": 46218 }, { "epoch": 2.588139769291074, "grad_norm": 1.1772435903549194, "learning_rate": 9.310526315789474e-05, "loss": 0.3252, "step": 46219 }, { "epoch": 2.588195766603203, "grad_norm": 1.1974480152130127, "learning_rate": 9.310500000000001e-05, "loss": 0.4289, "step": 46220 }, { "epoch": 2.588251763915332, "grad_norm": 1.2302343845367432, "learning_rate": 9.310473684210527e-05, "loss": 0.379, "step": 46221 }, { "epoch": 2.588307761227461, "grad_norm": 1.2236132621765137, "learning_rate": 9.310447368421053e-05, "loss": 0.4447, "step": 46222 }, { "epoch": 2.58836375853959, "grad_norm": 1.1185723543167114, "learning_rate": 9.310421052631579e-05, "loss": 0.3863, "step": 46223 }, { "epoch": 2.588419755851719, "grad_norm": 1.0115447044372559, "learning_rate": 9.310394736842105e-05, "loss": 0.331, "step": 46224 }, { "epoch": 2.588475753163848, "grad_norm": 1.1912463903427124, "learning_rate": 9.310368421052632e-05, "loss": 0.3945, "step": 46225 }, { "epoch": 2.588531750475977, "grad_norm": 1.0816136598587036, "learning_rate": 9.310342105263158e-05, "loss": 0.3278, "step": 46226 }, { "epoch": 2.588587747788106, "grad_norm": 1.2076407670974731, "learning_rate": 9.310315789473684e-05, "loss": 0.403, "step": 46227 }, { "epoch": 2.588643745100235, "grad_norm": 1.2909005880355835, "learning_rate": 9.31028947368421e-05, "loss": 0.3652, "step": 46228 }, { "epoch": 2.588699742412364, "grad_norm": 1.0006271600723267, "learning_rate": 9.310263157894738e-05, "loss": 0.3912, "step": 46229 }, { "epoch": 2.588755739724493, "grad_norm": 1.245719313621521, "learning_rate": 9.310236842105264e-05, "loss": 0.373, "step": 46230 }, { "epoch": 2.588811737036622, "grad_norm": 1.5838624238967896, "learning_rate": 9.310210526315791e-05, "loss": 0.4055, "step": 46231 }, { "epoch": 2.588867734348751, "grad_norm": 1.228395938873291, "learning_rate": 9.310184210526315e-05, "loss": 0.5576, "step": 46232 }, { "epoch": 2.58892373166088, "grad_norm": 1.313234567642212, "learning_rate": 9.310157894736843e-05, "loss": 0.4007, "step": 46233 }, { "epoch": 2.588979728973009, "grad_norm": 1.3885167837142944, "learning_rate": 9.310131578947369e-05, "loss": 0.5922, "step": 46234 }, { "epoch": 2.5890357262851382, "grad_norm": 1.1877297163009644, "learning_rate": 9.310105263157896e-05, "loss": 0.4966, "step": 46235 }, { "epoch": 2.5890917235972672, "grad_norm": 1.1633297204971313, "learning_rate": 9.310078947368421e-05, "loss": 0.3257, "step": 46236 }, { "epoch": 2.5891477209093963, "grad_norm": 1.0928453207015991, "learning_rate": 9.310052631578948e-05, "loss": 0.2998, "step": 46237 }, { "epoch": 2.5892037182215253, "grad_norm": 1.0469619035720825, "learning_rate": 9.310026315789474e-05, "loss": 0.3869, "step": 46238 }, { "epoch": 2.5892597155336543, "grad_norm": 1.0504008531570435, "learning_rate": 9.310000000000001e-05, "loss": 0.3194, "step": 46239 }, { "epoch": 2.5893157128457833, "grad_norm": 1.1703003644943237, "learning_rate": 9.309973684210527e-05, "loss": 0.3038, "step": 46240 }, { "epoch": 2.5893717101579123, "grad_norm": 1.3680009841918945, "learning_rate": 9.309947368421052e-05, "loss": 0.4307, "step": 46241 }, { "epoch": 2.5894277074700414, "grad_norm": 1.7781453132629395, "learning_rate": 9.309921052631579e-05, "loss": 0.3945, "step": 46242 }, { "epoch": 2.5894837047821704, "grad_norm": 1.120333194732666, "learning_rate": 9.309894736842105e-05, "loss": 0.3481, "step": 46243 }, { "epoch": 2.5895397020942994, "grad_norm": 1.2128700017929077, "learning_rate": 9.309868421052633e-05, "loss": 0.3608, "step": 46244 }, { "epoch": 2.5895956994064284, "grad_norm": 1.1390490531921387, "learning_rate": 9.309842105263159e-05, "loss": 0.497, "step": 46245 }, { "epoch": 2.5896516967185574, "grad_norm": 1.183670997619629, "learning_rate": 9.309815789473684e-05, "loss": 0.411, "step": 46246 }, { "epoch": 2.5897076940306865, "grad_norm": 1.2987509965896606, "learning_rate": 9.30978947368421e-05, "loss": 0.3711, "step": 46247 }, { "epoch": 2.5897636913428155, "grad_norm": 1.0237271785736084, "learning_rate": 9.309763157894738e-05, "loss": 0.3306, "step": 46248 }, { "epoch": 2.5898196886549445, "grad_norm": 1.253071904182434, "learning_rate": 9.309736842105264e-05, "loss": 0.4109, "step": 46249 }, { "epoch": 2.5898756859670735, "grad_norm": 1.2068551778793335, "learning_rate": 9.30971052631579e-05, "loss": 0.3682, "step": 46250 }, { "epoch": 2.5899316832792025, "grad_norm": 0.9989849925041199, "learning_rate": 9.309684210526316e-05, "loss": 0.3073, "step": 46251 }, { "epoch": 2.5899876805913316, "grad_norm": 1.171868085861206, "learning_rate": 9.309657894736843e-05, "loss": 0.3735, "step": 46252 }, { "epoch": 2.5900436779034606, "grad_norm": 1.1079397201538086, "learning_rate": 9.309631578947369e-05, "loss": 0.4156, "step": 46253 }, { "epoch": 2.5900996752155896, "grad_norm": 1.6420271396636963, "learning_rate": 9.309605263157895e-05, "loss": 0.4815, "step": 46254 }, { "epoch": 2.5901556725277186, "grad_norm": 1.2622206211090088, "learning_rate": 9.309578947368421e-05, "loss": 0.5426, "step": 46255 }, { "epoch": 2.5902116698398476, "grad_norm": 1.3164116144180298, "learning_rate": 9.309552631578948e-05, "loss": 0.3236, "step": 46256 }, { "epoch": 2.5902676671519766, "grad_norm": 1.7366224527359009, "learning_rate": 9.309526315789474e-05, "loss": 0.4633, "step": 46257 }, { "epoch": 2.5903236644641057, "grad_norm": 0.9842000603675842, "learning_rate": 9.309500000000002e-05, "loss": 0.2767, "step": 46258 }, { "epoch": 2.5903796617762347, "grad_norm": 1.1537973880767822, "learning_rate": 9.309473684210526e-05, "loss": 0.3579, "step": 46259 }, { "epoch": 2.5904356590883637, "grad_norm": 1.2546542882919312, "learning_rate": 9.309447368421052e-05, "loss": 0.4915, "step": 46260 }, { "epoch": 2.5904916564004927, "grad_norm": 1.2191046476364136, "learning_rate": 9.30942105263158e-05, "loss": 0.4695, "step": 46261 }, { "epoch": 2.5905476537126217, "grad_norm": 1.3442881107330322, "learning_rate": 9.309394736842105e-05, "loss": 0.4343, "step": 46262 }, { "epoch": 2.5906036510247508, "grad_norm": 1.3232883214950562, "learning_rate": 9.309368421052633e-05, "loss": 0.4336, "step": 46263 }, { "epoch": 2.59065964833688, "grad_norm": 0.9567542672157288, "learning_rate": 9.309342105263157e-05, "loss": 0.3133, "step": 46264 }, { "epoch": 2.590715645649009, "grad_norm": 1.2464120388031006, "learning_rate": 9.309315789473685e-05, "loss": 0.4245, "step": 46265 }, { "epoch": 2.590771642961138, "grad_norm": 1.1727780103683472, "learning_rate": 9.309289473684211e-05, "loss": 0.442, "step": 46266 }, { "epoch": 2.590827640273267, "grad_norm": 1.4074660539627075, "learning_rate": 9.309263157894738e-05, "loss": 0.4253, "step": 46267 }, { "epoch": 2.590883637585396, "grad_norm": 1.0359115600585938, "learning_rate": 9.309236842105264e-05, "loss": 0.2886, "step": 46268 }, { "epoch": 2.590939634897525, "grad_norm": 1.2708663940429688, "learning_rate": 9.30921052631579e-05, "loss": 0.4186, "step": 46269 }, { "epoch": 2.590995632209654, "grad_norm": 1.2987852096557617, "learning_rate": 9.309184210526316e-05, "loss": 0.5822, "step": 46270 }, { "epoch": 2.591051629521783, "grad_norm": 1.1190284490585327, "learning_rate": 9.309157894736843e-05, "loss": 0.6175, "step": 46271 }, { "epoch": 2.591107626833912, "grad_norm": 1.1067454814910889, "learning_rate": 9.309131578947369e-05, "loss": 0.3865, "step": 46272 }, { "epoch": 2.591163624146041, "grad_norm": 1.2553926706314087, "learning_rate": 9.309105263157895e-05, "loss": 0.4105, "step": 46273 }, { "epoch": 2.59121962145817, "grad_norm": 1.1765193939208984, "learning_rate": 9.309078947368421e-05, "loss": 0.4001, "step": 46274 }, { "epoch": 2.591275618770299, "grad_norm": 1.1803947687149048, "learning_rate": 9.309052631578949e-05, "loss": 0.3658, "step": 46275 }, { "epoch": 2.591331616082428, "grad_norm": 1.1997201442718506, "learning_rate": 9.309026315789475e-05, "loss": 0.5434, "step": 46276 }, { "epoch": 2.591387613394557, "grad_norm": 1.0450609922409058, "learning_rate": 9.309e-05, "loss": 0.3593, "step": 46277 }, { "epoch": 2.591443610706686, "grad_norm": 1.0744925737380981, "learning_rate": 9.308973684210526e-05, "loss": 0.4222, "step": 46278 }, { "epoch": 2.591499608018815, "grad_norm": 0.9694228172302246, "learning_rate": 9.308947368421052e-05, "loss": 0.339, "step": 46279 }, { "epoch": 2.591555605330944, "grad_norm": 0.9629693627357483, "learning_rate": 9.30892105263158e-05, "loss": 0.3697, "step": 46280 }, { "epoch": 2.591611602643073, "grad_norm": 1.0900285243988037, "learning_rate": 9.308894736842106e-05, "loss": 0.4263, "step": 46281 }, { "epoch": 2.591667599955202, "grad_norm": 1.033509612083435, "learning_rate": 9.308868421052632e-05, "loss": 0.2765, "step": 46282 }, { "epoch": 2.591723597267331, "grad_norm": 1.081179141998291, "learning_rate": 9.308842105263158e-05, "loss": 0.2802, "step": 46283 }, { "epoch": 2.59177959457946, "grad_norm": 1.3849984407424927, "learning_rate": 9.308815789473685e-05, "loss": 0.4324, "step": 46284 }, { "epoch": 2.591835591891589, "grad_norm": 1.608033299446106, "learning_rate": 9.308789473684211e-05, "loss": 0.335, "step": 46285 }, { "epoch": 2.591891589203718, "grad_norm": 2.0281853675842285, "learning_rate": 9.308763157894737e-05, "loss": 0.3771, "step": 46286 }, { "epoch": 2.5919475865158472, "grad_norm": 1.3969900608062744, "learning_rate": 9.308736842105263e-05, "loss": 0.4417, "step": 46287 }, { "epoch": 2.5920035838279762, "grad_norm": 1.2181472778320312, "learning_rate": 9.30871052631579e-05, "loss": 0.4052, "step": 46288 }, { "epoch": 2.5920595811401053, "grad_norm": 0.9348074197769165, "learning_rate": 9.308684210526316e-05, "loss": 0.2563, "step": 46289 }, { "epoch": 2.5921155784522343, "grad_norm": 1.2464137077331543, "learning_rate": 9.308657894736844e-05, "loss": 0.545, "step": 46290 }, { "epoch": 2.5921715757643633, "grad_norm": 0.9846921563148499, "learning_rate": 9.308631578947368e-05, "loss": 0.3108, "step": 46291 }, { "epoch": 2.5922275730764923, "grad_norm": 1.2812780141830444, "learning_rate": 9.308605263157896e-05, "loss": 0.4695, "step": 46292 }, { "epoch": 2.5922835703886213, "grad_norm": 1.041433572769165, "learning_rate": 9.308578947368421e-05, "loss": 0.2697, "step": 46293 }, { "epoch": 2.5923395677007504, "grad_norm": 1.2597488164901733, "learning_rate": 9.308552631578947e-05, "loss": 0.4214, "step": 46294 }, { "epoch": 2.5923955650128794, "grad_norm": 1.0989595651626587, "learning_rate": 9.308526315789475e-05, "loss": 0.5186, "step": 46295 }, { "epoch": 2.5924515623250084, "grad_norm": 1.103491187095642, "learning_rate": 9.3085e-05, "loss": 0.3612, "step": 46296 }, { "epoch": 2.5925075596371374, "grad_norm": 1.0412403345108032, "learning_rate": 9.308473684210527e-05, "loss": 0.3184, "step": 46297 }, { "epoch": 2.5925635569492664, "grad_norm": 1.0847171545028687, "learning_rate": 9.308447368421053e-05, "loss": 0.3547, "step": 46298 }, { "epoch": 2.5926195542613955, "grad_norm": 1.170149803161621, "learning_rate": 9.30842105263158e-05, "loss": 0.4565, "step": 46299 }, { "epoch": 2.5926755515735245, "grad_norm": 1.2003085613250732, "learning_rate": 9.308394736842106e-05, "loss": 0.5156, "step": 46300 }, { "epoch": 2.5927315488856535, "grad_norm": 1.15707528591156, "learning_rate": 9.308368421052632e-05, "loss": 0.4768, "step": 46301 }, { "epoch": 2.5927875461977825, "grad_norm": 1.2563356161117554, "learning_rate": 9.308342105263158e-05, "loss": 0.4962, "step": 46302 }, { "epoch": 2.5928435435099115, "grad_norm": 1.0945630073547363, "learning_rate": 9.308315789473685e-05, "loss": 0.3812, "step": 46303 }, { "epoch": 2.5928995408220405, "grad_norm": 0.9414119720458984, "learning_rate": 9.308289473684211e-05, "loss": 0.2923, "step": 46304 }, { "epoch": 2.5929555381341696, "grad_norm": 1.2058321237564087, "learning_rate": 9.308263157894737e-05, "loss": 0.463, "step": 46305 }, { "epoch": 2.5930115354462986, "grad_norm": 1.1503084897994995, "learning_rate": 9.308236842105263e-05, "loss": 0.3851, "step": 46306 }, { "epoch": 2.5930675327584276, "grad_norm": 1.205010175704956, "learning_rate": 9.30821052631579e-05, "loss": 0.4971, "step": 46307 }, { "epoch": 2.5931235300705566, "grad_norm": 1.0483094453811646, "learning_rate": 9.308184210526316e-05, "loss": 0.3803, "step": 46308 }, { "epoch": 2.5931795273826856, "grad_norm": 1.314060926437378, "learning_rate": 9.308157894736842e-05, "loss": 0.4673, "step": 46309 }, { "epoch": 2.5932355246948147, "grad_norm": 1.3096795082092285, "learning_rate": 9.308131578947368e-05, "loss": 0.3963, "step": 46310 }, { "epoch": 2.5932915220069437, "grad_norm": 1.3893641233444214, "learning_rate": 9.308105263157894e-05, "loss": 0.4069, "step": 46311 }, { "epoch": 2.5933475193190727, "grad_norm": 1.2928149700164795, "learning_rate": 9.308078947368422e-05, "loss": 0.4374, "step": 46312 }, { "epoch": 2.5934035166312017, "grad_norm": 1.154284954071045, "learning_rate": 9.308052631578948e-05, "loss": 0.4114, "step": 46313 }, { "epoch": 2.5934595139433307, "grad_norm": 1.2459124326705933, "learning_rate": 9.308026315789474e-05, "loss": 0.3966, "step": 46314 }, { "epoch": 2.5935155112554598, "grad_norm": 1.0978296995162964, "learning_rate": 9.308e-05, "loss": 0.4238, "step": 46315 }, { "epoch": 2.593571508567589, "grad_norm": 1.4941835403442383, "learning_rate": 9.307973684210527e-05, "loss": 0.3848, "step": 46316 }, { "epoch": 2.593627505879718, "grad_norm": 1.096929669380188, "learning_rate": 9.307947368421053e-05, "loss": 0.3566, "step": 46317 }, { "epoch": 2.593683503191847, "grad_norm": 1.2642258405685425, "learning_rate": 9.30792105263158e-05, "loss": 0.5874, "step": 46318 }, { "epoch": 2.593739500503976, "grad_norm": 1.2297660112380981, "learning_rate": 9.307894736842105e-05, "loss": 0.4101, "step": 46319 }, { "epoch": 2.593795497816105, "grad_norm": 1.0441217422485352, "learning_rate": 9.307868421052632e-05, "loss": 0.3274, "step": 46320 }, { "epoch": 2.593851495128234, "grad_norm": 1.0500421524047852, "learning_rate": 9.307842105263158e-05, "loss": 0.4838, "step": 46321 }, { "epoch": 2.593907492440363, "grad_norm": 1.1367409229278564, "learning_rate": 9.307815789473686e-05, "loss": 0.3975, "step": 46322 }, { "epoch": 2.593963489752492, "grad_norm": 1.3118699789047241, "learning_rate": 9.307789473684211e-05, "loss": 0.3542, "step": 46323 }, { "epoch": 2.594019487064621, "grad_norm": 1.1071178913116455, "learning_rate": 9.307763157894737e-05, "loss": 0.3284, "step": 46324 }, { "epoch": 2.59407548437675, "grad_norm": 1.0018930435180664, "learning_rate": 9.307736842105263e-05, "loss": 0.2769, "step": 46325 }, { "epoch": 2.594131481688879, "grad_norm": 1.2303153276443481, "learning_rate": 9.307710526315791e-05, "loss": 0.329, "step": 46326 }, { "epoch": 2.594187479001008, "grad_norm": 1.1183476448059082, "learning_rate": 9.307684210526317e-05, "loss": 0.3309, "step": 46327 }, { "epoch": 2.594243476313137, "grad_norm": 1.2909375429153442, "learning_rate": 9.307657894736841e-05, "loss": 0.3618, "step": 46328 }, { "epoch": 2.594299473625266, "grad_norm": 1.3322277069091797, "learning_rate": 9.307631578947369e-05, "loss": 0.4223, "step": 46329 }, { "epoch": 2.594355470937395, "grad_norm": 1.0959221124649048, "learning_rate": 9.307605263157895e-05, "loss": 0.4415, "step": 46330 }, { "epoch": 2.594411468249524, "grad_norm": 1.1785094738006592, "learning_rate": 9.307578947368422e-05, "loss": 0.4333, "step": 46331 }, { "epoch": 2.594467465561653, "grad_norm": 1.228363275527954, "learning_rate": 9.307552631578948e-05, "loss": 0.4117, "step": 46332 }, { "epoch": 2.594523462873782, "grad_norm": 1.4216464757919312, "learning_rate": 9.307526315789474e-05, "loss": 0.3942, "step": 46333 }, { "epoch": 2.594579460185911, "grad_norm": 1.234973669052124, "learning_rate": 9.3075e-05, "loss": 0.3639, "step": 46334 }, { "epoch": 2.59463545749804, "grad_norm": 1.4586713314056396, "learning_rate": 9.307473684210527e-05, "loss": 0.3931, "step": 46335 }, { "epoch": 2.594691454810169, "grad_norm": 1.3273762464523315, "learning_rate": 9.307447368421053e-05, "loss": 0.411, "step": 46336 }, { "epoch": 2.594747452122298, "grad_norm": 1.116397500038147, "learning_rate": 9.307421052631579e-05, "loss": 0.4085, "step": 46337 }, { "epoch": 2.594803449434427, "grad_norm": 1.081879734992981, "learning_rate": 9.307394736842105e-05, "loss": 0.3788, "step": 46338 }, { "epoch": 2.594859446746556, "grad_norm": 1.1855541467666626, "learning_rate": 9.307368421052632e-05, "loss": 0.3918, "step": 46339 }, { "epoch": 2.5949154440586852, "grad_norm": 1.2762482166290283, "learning_rate": 9.307342105263158e-05, "loss": 0.4214, "step": 46340 }, { "epoch": 2.5949714413708143, "grad_norm": 1.2305614948272705, "learning_rate": 9.307315789473684e-05, "loss": 0.3271, "step": 46341 }, { "epoch": 2.5950274386829433, "grad_norm": 1.743653655052185, "learning_rate": 9.30728947368421e-05, "loss": 0.7206, "step": 46342 }, { "epoch": 2.5950834359950723, "grad_norm": 1.3808716535568237, "learning_rate": 9.307263157894738e-05, "loss": 0.5562, "step": 46343 }, { "epoch": 2.5951394333072013, "grad_norm": 1.1509453058242798, "learning_rate": 9.307236842105264e-05, "loss": 0.3664, "step": 46344 }, { "epoch": 2.5951954306193303, "grad_norm": 1.0026576519012451, "learning_rate": 9.30721052631579e-05, "loss": 0.3386, "step": 46345 }, { "epoch": 2.5952514279314594, "grad_norm": 1.435781717300415, "learning_rate": 9.307184210526316e-05, "loss": 0.5191, "step": 46346 }, { "epoch": 2.5953074252435884, "grad_norm": 1.3051142692565918, "learning_rate": 9.307157894736842e-05, "loss": 0.6892, "step": 46347 }, { "epoch": 2.5953634225557174, "grad_norm": 1.2356524467468262, "learning_rate": 9.307131578947369e-05, "loss": 0.3988, "step": 46348 }, { "epoch": 2.5954194198678464, "grad_norm": 1.0640677213668823, "learning_rate": 9.307105263157895e-05, "loss": 0.4973, "step": 46349 }, { "epoch": 2.5954754171799754, "grad_norm": 1.3630106449127197, "learning_rate": 9.307078947368422e-05, "loss": 0.5907, "step": 46350 }, { "epoch": 2.5955314144921044, "grad_norm": 1.2732597589492798, "learning_rate": 9.307052631578947e-05, "loss": 0.4914, "step": 46351 }, { "epoch": 2.5955874118042335, "grad_norm": 1.0415436029434204, "learning_rate": 9.307026315789474e-05, "loss": 0.4629, "step": 46352 }, { "epoch": 2.5956434091163625, "grad_norm": 1.2142295837402344, "learning_rate": 9.307e-05, "loss": 0.4085, "step": 46353 }, { "epoch": 2.5956994064284915, "grad_norm": 1.2304552793502808, "learning_rate": 9.306973684210527e-05, "loss": 0.3546, "step": 46354 }, { "epoch": 2.5957554037406205, "grad_norm": 1.071213722229004, "learning_rate": 9.306947368421053e-05, "loss": 0.3885, "step": 46355 }, { "epoch": 2.5958114010527495, "grad_norm": 1.065073013305664, "learning_rate": 9.30692105263158e-05, "loss": 0.3458, "step": 46356 }, { "epoch": 2.5958673983648786, "grad_norm": 5.032269477844238, "learning_rate": 9.306894736842105e-05, "loss": 0.3571, "step": 46357 }, { "epoch": 2.5959233956770076, "grad_norm": 1.0765151977539062, "learning_rate": 9.306868421052633e-05, "loss": 0.4723, "step": 46358 }, { "epoch": 2.5959793929891366, "grad_norm": 1.0870074033737183, "learning_rate": 9.306842105263159e-05, "loss": 0.504, "step": 46359 }, { "epoch": 2.5960353903012656, "grad_norm": 1.2023396492004395, "learning_rate": 9.306815789473685e-05, "loss": 0.3668, "step": 46360 }, { "epoch": 2.5960913876133946, "grad_norm": 1.0600939989089966, "learning_rate": 9.30678947368421e-05, "loss": 0.4239, "step": 46361 }, { "epoch": 2.5961473849255237, "grad_norm": 3.0582919120788574, "learning_rate": 9.306763157894737e-05, "loss": 0.3892, "step": 46362 }, { "epoch": 2.5962033822376527, "grad_norm": 1.0472710132598877, "learning_rate": 9.306736842105264e-05, "loss": 0.4776, "step": 46363 }, { "epoch": 2.5962593795497817, "grad_norm": 1.1434522867202759, "learning_rate": 9.30671052631579e-05, "loss": 0.5073, "step": 46364 }, { "epoch": 2.5963153768619107, "grad_norm": 1.149087905883789, "learning_rate": 9.306684210526316e-05, "loss": 0.5352, "step": 46365 }, { "epoch": 2.5963713741740397, "grad_norm": 1.2497172355651855, "learning_rate": 9.306657894736842e-05, "loss": 0.429, "step": 46366 }, { "epoch": 2.5964273714861688, "grad_norm": 1.3501276969909668, "learning_rate": 9.306631578947369e-05, "loss": 0.3541, "step": 46367 }, { "epoch": 2.5964833687982978, "grad_norm": 1.020113229751587, "learning_rate": 9.306605263157895e-05, "loss": 0.3417, "step": 46368 }, { "epoch": 2.596539366110427, "grad_norm": 1.4978958368301392, "learning_rate": 9.306578947368421e-05, "loss": 0.5658, "step": 46369 }, { "epoch": 2.596595363422556, "grad_norm": 1.1411269903182983, "learning_rate": 9.306552631578947e-05, "loss": 0.4128, "step": 46370 }, { "epoch": 2.596651360734685, "grad_norm": 1.3018317222595215, "learning_rate": 9.306526315789474e-05, "loss": 0.4062, "step": 46371 }, { "epoch": 2.596707358046814, "grad_norm": 1.0208609104156494, "learning_rate": 9.3065e-05, "loss": 0.3452, "step": 46372 }, { "epoch": 2.596763355358943, "grad_norm": 1.126666784286499, "learning_rate": 9.306473684210528e-05, "loss": 0.3561, "step": 46373 }, { "epoch": 2.596819352671072, "grad_norm": 1.3486653566360474, "learning_rate": 9.306447368421052e-05, "loss": 0.4562, "step": 46374 }, { "epoch": 2.596875349983201, "grad_norm": 1.3658181428909302, "learning_rate": 9.30642105263158e-05, "loss": 0.3916, "step": 46375 }, { "epoch": 2.59693134729533, "grad_norm": 1.173493504524231, "learning_rate": 9.306394736842106e-05, "loss": 0.363, "step": 46376 }, { "epoch": 2.596987344607459, "grad_norm": 1.268128752708435, "learning_rate": 9.306368421052633e-05, "loss": 0.5235, "step": 46377 }, { "epoch": 2.597043341919588, "grad_norm": 1.022345781326294, "learning_rate": 9.306342105263159e-05, "loss": 0.3314, "step": 46378 }, { "epoch": 2.597099339231717, "grad_norm": 1.3354579210281372, "learning_rate": 9.306315789473685e-05, "loss": 0.5914, "step": 46379 }, { "epoch": 2.597155336543846, "grad_norm": 1.1834393739700317, "learning_rate": 9.306289473684211e-05, "loss": 0.3164, "step": 46380 }, { "epoch": 2.597211333855975, "grad_norm": 1.0589256286621094, "learning_rate": 9.306263157894737e-05, "loss": 0.3791, "step": 46381 }, { "epoch": 2.597267331168104, "grad_norm": 1.0989223718643188, "learning_rate": 9.306236842105264e-05, "loss": 0.406, "step": 46382 }, { "epoch": 2.597323328480233, "grad_norm": 1.840376615524292, "learning_rate": 9.306210526315789e-05, "loss": 0.4698, "step": 46383 }, { "epoch": 2.597379325792362, "grad_norm": 1.2043107748031616, "learning_rate": 9.306184210526316e-05, "loss": 0.3954, "step": 46384 }, { "epoch": 2.597435323104491, "grad_norm": 1.103837013244629, "learning_rate": 9.306157894736842e-05, "loss": 0.3648, "step": 46385 }, { "epoch": 2.59749132041662, "grad_norm": 1.0096098184585571, "learning_rate": 9.30613157894737e-05, "loss": 0.346, "step": 46386 }, { "epoch": 2.597547317728749, "grad_norm": 1.1829688549041748, "learning_rate": 9.306105263157895e-05, "loss": 0.3753, "step": 46387 }, { "epoch": 2.597603315040878, "grad_norm": 1.4263122081756592, "learning_rate": 9.306078947368421e-05, "loss": 0.3576, "step": 46388 }, { "epoch": 2.597659312353007, "grad_norm": 1.3190265893936157, "learning_rate": 9.306052631578947e-05, "loss": 0.3338, "step": 46389 }, { "epoch": 2.597715309665136, "grad_norm": 0.9917435050010681, "learning_rate": 9.306026315789475e-05, "loss": 0.3164, "step": 46390 }, { "epoch": 2.597771306977265, "grad_norm": 1.0683341026306152, "learning_rate": 9.306000000000001e-05, "loss": 0.444, "step": 46391 }, { "epoch": 2.5978273042893942, "grad_norm": 1.2895150184631348, "learning_rate": 9.305973684210527e-05, "loss": 0.3916, "step": 46392 }, { "epoch": 2.5978833016015233, "grad_norm": 0.968153178691864, "learning_rate": 9.305947368421053e-05, "loss": 0.3465, "step": 46393 }, { "epoch": 2.5979392989136523, "grad_norm": 1.0791574716567993, "learning_rate": 9.30592105263158e-05, "loss": 0.3661, "step": 46394 }, { "epoch": 2.5979952962257813, "grad_norm": 1.0966107845306396, "learning_rate": 9.305894736842106e-05, "loss": 0.3442, "step": 46395 }, { "epoch": 2.5980512935379103, "grad_norm": 1.0598368644714355, "learning_rate": 9.305868421052632e-05, "loss": 0.2701, "step": 46396 }, { "epoch": 2.5981072908500393, "grad_norm": 1.027726173400879, "learning_rate": 9.305842105263158e-05, "loss": 0.2789, "step": 46397 }, { "epoch": 2.5981632881621683, "grad_norm": 1.2512030601501465, "learning_rate": 9.305815789473684e-05, "loss": 0.4415, "step": 46398 }, { "epoch": 2.5982192854742974, "grad_norm": 1.259636640548706, "learning_rate": 9.305789473684211e-05, "loss": 0.4764, "step": 46399 }, { "epoch": 2.5982752827864264, "grad_norm": 1.3739681243896484, "learning_rate": 9.305763157894737e-05, "loss": 0.4599, "step": 46400 }, { "epoch": 2.5983312800985554, "grad_norm": 1.242702603340149, "learning_rate": 9.305736842105263e-05, "loss": 0.4708, "step": 46401 }, { "epoch": 2.5983872774106844, "grad_norm": 1.1186217069625854, "learning_rate": 9.305710526315789e-05, "loss": 0.4253, "step": 46402 }, { "epoch": 2.5984432747228134, "grad_norm": 1.3703936338424683, "learning_rate": 9.305684210526316e-05, "loss": 0.4427, "step": 46403 }, { "epoch": 2.5984992720349425, "grad_norm": 1.2901157140731812, "learning_rate": 9.305657894736842e-05, "loss": 0.4259, "step": 46404 }, { "epoch": 2.5985552693470715, "grad_norm": 1.2709455490112305, "learning_rate": 9.30563157894737e-05, "loss": 0.4528, "step": 46405 }, { "epoch": 2.5986112666592005, "grad_norm": 1.4229942560195923, "learning_rate": 9.305605263157894e-05, "loss": 0.5051, "step": 46406 }, { "epoch": 2.5986672639713295, "grad_norm": 0.9639158248901367, "learning_rate": 9.305578947368422e-05, "loss": 0.3377, "step": 46407 }, { "epoch": 2.5987232612834585, "grad_norm": 1.2738722562789917, "learning_rate": 9.305552631578948e-05, "loss": 0.6525, "step": 46408 }, { "epoch": 2.5987792585955876, "grad_norm": 1.1387035846710205, "learning_rate": 9.305526315789475e-05, "loss": 0.4028, "step": 46409 }, { "epoch": 2.5988352559077166, "grad_norm": 1.1342895030975342, "learning_rate": 9.305500000000001e-05, "loss": 0.3728, "step": 46410 }, { "epoch": 2.5988912532198456, "grad_norm": 1.2227965593338013, "learning_rate": 9.305473684210527e-05, "loss": 0.4896, "step": 46411 }, { "epoch": 2.5989472505319746, "grad_norm": 1.304675817489624, "learning_rate": 9.305447368421053e-05, "loss": 0.4238, "step": 46412 }, { "epoch": 2.5990032478441036, "grad_norm": 1.3673406839370728, "learning_rate": 9.30542105263158e-05, "loss": 0.4155, "step": 46413 }, { "epoch": 2.5990592451562327, "grad_norm": 1.2016870975494385, "learning_rate": 9.305394736842106e-05, "loss": 0.4107, "step": 46414 }, { "epoch": 2.5991152424683617, "grad_norm": 1.2177479267120361, "learning_rate": 9.305368421052632e-05, "loss": 0.3277, "step": 46415 }, { "epoch": 2.5991712397804907, "grad_norm": 1.3576984405517578, "learning_rate": 9.305342105263158e-05, "loss": 0.4359, "step": 46416 }, { "epoch": 2.5992272370926197, "grad_norm": 1.2905004024505615, "learning_rate": 9.305315789473684e-05, "loss": 0.335, "step": 46417 }, { "epoch": 2.5992832344047487, "grad_norm": 1.2379571199417114, "learning_rate": 9.305289473684211e-05, "loss": 0.34, "step": 46418 }, { "epoch": 2.5993392317168778, "grad_norm": 1.1430139541625977, "learning_rate": 9.305263157894737e-05, "loss": 0.3974, "step": 46419 }, { "epoch": 2.5993952290290068, "grad_norm": 1.2210012674331665, "learning_rate": 9.305236842105263e-05, "loss": 0.4135, "step": 46420 }, { "epoch": 2.599451226341136, "grad_norm": 1.2502466440200806, "learning_rate": 9.30521052631579e-05, "loss": 0.3892, "step": 46421 }, { "epoch": 2.599507223653265, "grad_norm": 1.5734598636627197, "learning_rate": 9.305184210526317e-05, "loss": 0.4753, "step": 46422 }, { "epoch": 2.5995632209653934, "grad_norm": 1.3160457611083984, "learning_rate": 9.305157894736843e-05, "loss": 0.4401, "step": 46423 }, { "epoch": 2.599619218277523, "grad_norm": 1.126574158668518, "learning_rate": 9.305131578947369e-05, "loss": 0.4322, "step": 46424 }, { "epoch": 2.5996752155896514, "grad_norm": 0.979223906993866, "learning_rate": 9.305105263157895e-05, "loss": 0.3052, "step": 46425 }, { "epoch": 2.599731212901781, "grad_norm": 1.0468416213989258, "learning_rate": 9.305078947368422e-05, "loss": 0.2731, "step": 46426 }, { "epoch": 2.5997872102139095, "grad_norm": 1.0872056484222412, "learning_rate": 9.305052631578948e-05, "loss": 0.4303, "step": 46427 }, { "epoch": 2.599843207526039, "grad_norm": 1.2613859176635742, "learning_rate": 9.305026315789475e-05, "loss": 0.391, "step": 46428 }, { "epoch": 2.5998992048381675, "grad_norm": 1.1846903562545776, "learning_rate": 9.305e-05, "loss": 0.3151, "step": 46429 }, { "epoch": 2.599955202150297, "grad_norm": 1.0874963998794556, "learning_rate": 9.304973684210527e-05, "loss": 0.3574, "step": 46430 }, { "epoch": 2.6000111994624255, "grad_norm": 1.1916323900222778, "learning_rate": 9.304947368421053e-05, "loss": 0.3601, "step": 46431 }, { "epoch": 2.600067196774555, "grad_norm": 1.1626559495925903, "learning_rate": 9.304921052631579e-05, "loss": 0.3617, "step": 46432 }, { "epoch": 2.6001231940866836, "grad_norm": 1.147560477256775, "learning_rate": 9.304894736842106e-05, "loss": 0.4632, "step": 46433 }, { "epoch": 2.600179191398813, "grad_norm": 1.1132581233978271, "learning_rate": 9.304868421052631e-05, "loss": 0.663, "step": 46434 }, { "epoch": 2.6002351887109416, "grad_norm": 1.254677414894104, "learning_rate": 9.304842105263158e-05, "loss": 0.4212, "step": 46435 }, { "epoch": 2.600291186023071, "grad_norm": 1.2145379781723022, "learning_rate": 9.304815789473684e-05, "loss": 0.368, "step": 46436 }, { "epoch": 2.6003471833351997, "grad_norm": 1.1810592412948608, "learning_rate": 9.304789473684212e-05, "loss": 0.3461, "step": 46437 }, { "epoch": 2.600403180647329, "grad_norm": 1.1920262575149536, "learning_rate": 9.304763157894736e-05, "loss": 0.3849, "step": 46438 }, { "epoch": 2.6004591779594577, "grad_norm": 1.1273354291915894, "learning_rate": 9.304736842105264e-05, "loss": 0.3623, "step": 46439 }, { "epoch": 2.600515175271587, "grad_norm": 1.3927035331726074, "learning_rate": 9.30471052631579e-05, "loss": 0.3894, "step": 46440 }, { "epoch": 2.6005711725837157, "grad_norm": 1.291559100151062, "learning_rate": 9.304684210526317e-05, "loss": 0.374, "step": 46441 }, { "epoch": 2.600627169895845, "grad_norm": 1.0641018152236938, "learning_rate": 9.304657894736843e-05, "loss": 0.3254, "step": 46442 }, { "epoch": 2.6006831672079738, "grad_norm": 1.4104937314987183, "learning_rate": 9.304631578947369e-05, "loss": 0.4582, "step": 46443 }, { "epoch": 2.6007391645201032, "grad_norm": 1.227135181427002, "learning_rate": 9.304605263157895e-05, "loss": 0.5633, "step": 46444 }, { "epoch": 2.600795161832232, "grad_norm": 1.0533151626586914, "learning_rate": 9.304578947368422e-05, "loss": 0.3805, "step": 46445 }, { "epoch": 2.6008511591443613, "grad_norm": 1.1004160642623901, "learning_rate": 9.304552631578948e-05, "loss": 0.4832, "step": 46446 }, { "epoch": 2.60090715645649, "grad_norm": 1.0386230945587158, "learning_rate": 9.304526315789474e-05, "loss": 0.3892, "step": 46447 }, { "epoch": 2.6009631537686193, "grad_norm": 1.1947985887527466, "learning_rate": 9.3045e-05, "loss": 0.2936, "step": 46448 }, { "epoch": 2.601019151080748, "grad_norm": 1.3050929307937622, "learning_rate": 9.304473684210526e-05, "loss": 0.3935, "step": 46449 }, { "epoch": 2.6010751483928773, "grad_norm": 1.1932874917984009, "learning_rate": 9.304447368421053e-05, "loss": 0.4036, "step": 46450 }, { "epoch": 2.601131145705006, "grad_norm": 0.9661457538604736, "learning_rate": 9.30442105263158e-05, "loss": 0.3628, "step": 46451 }, { "epoch": 2.6011871430171354, "grad_norm": 1.4228384494781494, "learning_rate": 9.304394736842105e-05, "loss": 0.7101, "step": 46452 }, { "epoch": 2.601243140329264, "grad_norm": 1.2521458864212036, "learning_rate": 9.304368421052631e-05, "loss": 0.4048, "step": 46453 }, { "epoch": 2.6012991376413934, "grad_norm": 1.1667956113815308, "learning_rate": 9.304342105263159e-05, "loss": 0.4032, "step": 46454 }, { "epoch": 2.601355134953522, "grad_norm": 1.0820139646530151, "learning_rate": 9.304315789473685e-05, "loss": 0.3675, "step": 46455 }, { "epoch": 2.6014111322656515, "grad_norm": 1.1574163436889648, "learning_rate": 9.30428947368421e-05, "loss": 0.4527, "step": 46456 }, { "epoch": 2.60146712957778, "grad_norm": 1.113939642906189, "learning_rate": 9.304263157894737e-05, "loss": 0.414, "step": 46457 }, { "epoch": 2.6015231268899095, "grad_norm": 1.214937448501587, "learning_rate": 9.304236842105264e-05, "loss": 0.3992, "step": 46458 }, { "epoch": 2.601579124202038, "grad_norm": 1.1163384914398193, "learning_rate": 9.30421052631579e-05, "loss": 0.4927, "step": 46459 }, { "epoch": 2.6016351215141675, "grad_norm": 1.4073883295059204, "learning_rate": 9.304184210526317e-05, "loss": 0.6369, "step": 46460 }, { "epoch": 2.601691118826296, "grad_norm": 1.188210129737854, "learning_rate": 9.304157894736842e-05, "loss": 0.3661, "step": 46461 }, { "epoch": 2.6017471161384256, "grad_norm": 1.5695980787277222, "learning_rate": 9.304131578947369e-05, "loss": 0.3368, "step": 46462 }, { "epoch": 2.601803113450554, "grad_norm": 1.061631202697754, "learning_rate": 9.304105263157895e-05, "loss": 0.2741, "step": 46463 }, { "epoch": 2.6018591107626836, "grad_norm": 1.2739530801773071, "learning_rate": 9.304078947368422e-05, "loss": 0.5952, "step": 46464 }, { "epoch": 2.601915108074812, "grad_norm": 1.2000675201416016, "learning_rate": 9.304052631578948e-05, "loss": 0.3457, "step": 46465 }, { "epoch": 2.6019711053869417, "grad_norm": 0.9580949544906616, "learning_rate": 9.304026315789473e-05, "loss": 0.4259, "step": 46466 }, { "epoch": 2.6020271026990702, "grad_norm": 1.2437034845352173, "learning_rate": 9.304e-05, "loss": 0.4251, "step": 46467 }, { "epoch": 2.6020831000111997, "grad_norm": 1.0650449991226196, "learning_rate": 9.303973684210526e-05, "loss": 0.4338, "step": 46468 }, { "epoch": 2.6021390973233283, "grad_norm": 1.244703769683838, "learning_rate": 9.303947368421054e-05, "loss": 0.3511, "step": 46469 }, { "epoch": 2.6021950946354577, "grad_norm": 1.3416006565093994, "learning_rate": 9.30392105263158e-05, "loss": 0.4138, "step": 46470 }, { "epoch": 2.6022510919475863, "grad_norm": 1.353318691253662, "learning_rate": 9.303894736842106e-05, "loss": 0.352, "step": 46471 }, { "epoch": 2.6023070892597158, "grad_norm": 1.4492400884628296, "learning_rate": 9.303868421052632e-05, "loss": 0.4157, "step": 46472 }, { "epoch": 2.6023630865718443, "grad_norm": 1.1647073030471802, "learning_rate": 9.303842105263159e-05, "loss": 0.3766, "step": 46473 }, { "epoch": 2.602419083883974, "grad_norm": 1.231884479522705, "learning_rate": 9.303815789473685e-05, "loss": 0.3742, "step": 46474 }, { "epoch": 2.6024750811961024, "grad_norm": 1.00637948513031, "learning_rate": 9.303789473684211e-05, "loss": 0.3254, "step": 46475 }, { "epoch": 2.602531078508232, "grad_norm": 1.198676347732544, "learning_rate": 9.303763157894737e-05, "loss": 0.4207, "step": 46476 }, { "epoch": 2.6025870758203604, "grad_norm": 1.2479243278503418, "learning_rate": 9.303736842105264e-05, "loss": 0.4484, "step": 46477 }, { "epoch": 2.60264307313249, "grad_norm": 1.097208023071289, "learning_rate": 9.30371052631579e-05, "loss": 0.4814, "step": 46478 }, { "epoch": 2.6026990704446185, "grad_norm": 1.277184009552002, "learning_rate": 9.303684210526316e-05, "loss": 0.3587, "step": 46479 }, { "epoch": 2.602755067756748, "grad_norm": 1.0342180728912354, "learning_rate": 9.303657894736842e-05, "loss": 0.3902, "step": 46480 }, { "epoch": 2.6028110650688765, "grad_norm": 1.42534339427948, "learning_rate": 9.30363157894737e-05, "loss": 0.4532, "step": 46481 }, { "epoch": 2.602867062381006, "grad_norm": 1.2270914316177368, "learning_rate": 9.303605263157895e-05, "loss": 0.4947, "step": 46482 }, { "epoch": 2.6029230596931345, "grad_norm": 1.6300156116485596, "learning_rate": 9.303578947368421e-05, "loss": 0.377, "step": 46483 }, { "epoch": 2.602979057005264, "grad_norm": 0.9486250877380371, "learning_rate": 9.303552631578947e-05, "loss": 0.2987, "step": 46484 }, { "epoch": 2.6030350543173926, "grad_norm": 1.0906494855880737, "learning_rate": 9.303526315789473e-05, "loss": 0.3443, "step": 46485 }, { "epoch": 2.603091051629522, "grad_norm": 1.5641968250274658, "learning_rate": 9.3035e-05, "loss": 0.5295, "step": 46486 }, { "epoch": 2.6031470489416506, "grad_norm": 1.4293843507766724, "learning_rate": 9.303473684210527e-05, "loss": 0.3326, "step": 46487 }, { "epoch": 2.60320304625378, "grad_norm": 1.3640592098236084, "learning_rate": 9.303447368421053e-05, "loss": 0.4591, "step": 46488 }, { "epoch": 2.6032590435659086, "grad_norm": 1.1550309658050537, "learning_rate": 9.303421052631579e-05, "loss": 0.3343, "step": 46489 }, { "epoch": 2.603315040878038, "grad_norm": 1.0223761796951294, "learning_rate": 9.303394736842106e-05, "loss": 0.2891, "step": 46490 }, { "epoch": 2.6033710381901667, "grad_norm": 1.1564793586730957, "learning_rate": 9.303368421052632e-05, "loss": 0.3646, "step": 46491 }, { "epoch": 2.603427035502296, "grad_norm": 4.680053234100342, "learning_rate": 9.303342105263159e-05, "loss": 0.4865, "step": 46492 }, { "epoch": 2.6034830328144247, "grad_norm": 1.1159172058105469, "learning_rate": 9.303315789473684e-05, "loss": 0.2847, "step": 46493 }, { "epoch": 2.603539030126554, "grad_norm": 1.0027815103530884, "learning_rate": 9.303289473684211e-05, "loss": 0.4123, "step": 46494 }, { "epoch": 2.6035950274386828, "grad_norm": 1.0499452352523804, "learning_rate": 9.303263157894737e-05, "loss": 0.2889, "step": 46495 }, { "epoch": 2.6036510247508122, "grad_norm": 1.4142584800720215, "learning_rate": 9.303236842105264e-05, "loss": 0.5174, "step": 46496 }, { "epoch": 2.603707022062941, "grad_norm": 0.989594578742981, "learning_rate": 9.30321052631579e-05, "loss": 0.3169, "step": 46497 }, { "epoch": 2.6037630193750703, "grad_norm": 1.1057268381118774, "learning_rate": 9.303184210526316e-05, "loss": 0.2985, "step": 46498 }, { "epoch": 2.603819016687199, "grad_norm": 1.039628028869629, "learning_rate": 9.303157894736842e-05, "loss": 0.4595, "step": 46499 }, { "epoch": 2.6038750139993283, "grad_norm": 1.202059030532837, "learning_rate": 9.30313157894737e-05, "loss": 0.422, "step": 46500 }, { "epoch": 2.603931011311457, "grad_norm": 1.0177092552185059, "learning_rate": 9.303105263157896e-05, "loss": 0.3472, "step": 46501 }, { "epoch": 2.6039870086235863, "grad_norm": 1.3162777423858643, "learning_rate": 9.303078947368422e-05, "loss": 0.3866, "step": 46502 }, { "epoch": 2.604043005935715, "grad_norm": 1.2071207761764526, "learning_rate": 9.303052631578948e-05, "loss": 0.4198, "step": 46503 }, { "epoch": 2.6040990032478444, "grad_norm": 1.9643195867538452, "learning_rate": 9.303026315789474e-05, "loss": 0.5239, "step": 46504 }, { "epoch": 2.604155000559973, "grad_norm": 1.4066940546035767, "learning_rate": 9.303000000000001e-05, "loss": 0.412, "step": 46505 }, { "epoch": 2.604210997872102, "grad_norm": 1.056431770324707, "learning_rate": 9.302973684210527e-05, "loss": 0.2899, "step": 46506 }, { "epoch": 2.604266995184231, "grad_norm": 0.9736373424530029, "learning_rate": 9.302947368421053e-05, "loss": 0.3523, "step": 46507 }, { "epoch": 2.60432299249636, "grad_norm": 1.0358986854553223, "learning_rate": 9.302921052631579e-05, "loss": 0.302, "step": 46508 }, { "epoch": 2.604378989808489, "grad_norm": 1.272763967514038, "learning_rate": 9.302894736842106e-05, "loss": 0.4631, "step": 46509 }, { "epoch": 2.604434987120618, "grad_norm": 1.051343560218811, "learning_rate": 9.302868421052632e-05, "loss": 0.3608, "step": 46510 }, { "epoch": 2.604490984432747, "grad_norm": 1.0620665550231934, "learning_rate": 9.302842105263158e-05, "loss": 0.3109, "step": 46511 }, { "epoch": 2.604546981744876, "grad_norm": 1.5440928936004639, "learning_rate": 9.302815789473684e-05, "loss": 0.4604, "step": 46512 }, { "epoch": 2.604602979057005, "grad_norm": 1.2319453954696655, "learning_rate": 9.302789473684211e-05, "loss": 0.3864, "step": 46513 }, { "epoch": 2.604658976369134, "grad_norm": 1.358185887336731, "learning_rate": 9.302763157894737e-05, "loss": 0.3582, "step": 46514 }, { "epoch": 2.604714973681263, "grad_norm": 1.1510616540908813, "learning_rate": 9.302736842105265e-05, "loss": 0.3906, "step": 46515 }, { "epoch": 2.604770970993392, "grad_norm": 1.4592418670654297, "learning_rate": 9.302710526315789e-05, "loss": 0.3838, "step": 46516 }, { "epoch": 2.604826968305521, "grad_norm": 1.2418780326843262, "learning_rate": 9.302684210526317e-05, "loss": 0.4018, "step": 46517 }, { "epoch": 2.60488296561765, "grad_norm": 1.6895040273666382, "learning_rate": 9.302657894736843e-05, "loss": 0.3782, "step": 46518 }, { "epoch": 2.6049389629297792, "grad_norm": 1.0250135660171509, "learning_rate": 9.302631578947369e-05, "loss": 0.3604, "step": 46519 }, { "epoch": 2.6049949602419082, "grad_norm": 1.4300905466079712, "learning_rate": 9.302605263157896e-05, "loss": 0.3191, "step": 46520 }, { "epoch": 2.6050509575540373, "grad_norm": 1.3257346153259277, "learning_rate": 9.30257894736842e-05, "loss": 0.5257, "step": 46521 }, { "epoch": 2.6051069548661663, "grad_norm": 1.2085860967636108, "learning_rate": 9.302552631578948e-05, "loss": 0.4289, "step": 46522 }, { "epoch": 2.6051629521782953, "grad_norm": 1.2155348062515259, "learning_rate": 9.302526315789474e-05, "loss": 0.4195, "step": 46523 }, { "epoch": 2.6052189494904243, "grad_norm": 1.0436477661132812, "learning_rate": 9.302500000000001e-05, "loss": 0.3814, "step": 46524 }, { "epoch": 2.6052749468025533, "grad_norm": 1.1576848030090332, "learning_rate": 9.302473684210527e-05, "loss": 0.3968, "step": 46525 }, { "epoch": 2.6053309441146824, "grad_norm": 6.950581073760986, "learning_rate": 9.302447368421053e-05, "loss": 0.5232, "step": 46526 }, { "epoch": 2.6053869414268114, "grad_norm": 1.756662368774414, "learning_rate": 9.302421052631579e-05, "loss": 0.4306, "step": 46527 }, { "epoch": 2.6054429387389404, "grad_norm": 1.0839502811431885, "learning_rate": 9.302394736842106e-05, "loss": 0.3517, "step": 46528 }, { "epoch": 2.6054989360510694, "grad_norm": 1.1785544157028198, "learning_rate": 9.302368421052632e-05, "loss": 0.3655, "step": 46529 }, { "epoch": 2.6055549333631984, "grad_norm": 1.1357667446136475, "learning_rate": 9.302342105263158e-05, "loss": 0.3231, "step": 46530 }, { "epoch": 2.6056109306753275, "grad_norm": 1.2374035120010376, "learning_rate": 9.302315789473684e-05, "loss": 0.3943, "step": 46531 }, { "epoch": 2.6056669279874565, "grad_norm": 1.360485553741455, "learning_rate": 9.302289473684212e-05, "loss": 0.4535, "step": 46532 }, { "epoch": 2.6057229252995855, "grad_norm": 1.2514512538909912, "learning_rate": 9.302263157894738e-05, "loss": 0.4974, "step": 46533 }, { "epoch": 2.6057789226117145, "grad_norm": 1.1692370176315308, "learning_rate": 9.302236842105264e-05, "loss": 0.3499, "step": 46534 }, { "epoch": 2.6058349199238435, "grad_norm": 0.9827505946159363, "learning_rate": 9.30221052631579e-05, "loss": 0.3726, "step": 46535 }, { "epoch": 2.6058909172359725, "grad_norm": 1.0020781755447388, "learning_rate": 9.302184210526316e-05, "loss": 0.2868, "step": 46536 }, { "epoch": 2.6059469145481016, "grad_norm": 1.3046605587005615, "learning_rate": 9.302157894736843e-05, "loss": 0.3484, "step": 46537 }, { "epoch": 2.6060029118602306, "grad_norm": 1.155605435371399, "learning_rate": 9.302131578947369e-05, "loss": 0.4737, "step": 46538 }, { "epoch": 2.6060589091723596, "grad_norm": 1.1728681325912476, "learning_rate": 9.302105263157895e-05, "loss": 0.4589, "step": 46539 }, { "epoch": 2.6061149064844886, "grad_norm": 1.2775704860687256, "learning_rate": 9.302078947368421e-05, "loss": 0.4299, "step": 46540 }, { "epoch": 2.6061709037966176, "grad_norm": 1.2015646696090698, "learning_rate": 9.302052631578948e-05, "loss": 0.4057, "step": 46541 }, { "epoch": 2.6062269011087467, "grad_norm": 1.1919351816177368, "learning_rate": 9.302026315789474e-05, "loss": 0.5465, "step": 46542 }, { "epoch": 2.6062828984208757, "grad_norm": 1.0784245729446411, "learning_rate": 9.302e-05, "loss": 0.4397, "step": 46543 }, { "epoch": 2.6063388957330047, "grad_norm": 1.0491892099380493, "learning_rate": 9.301973684210526e-05, "loss": 0.4944, "step": 46544 }, { "epoch": 2.6063948930451337, "grad_norm": 1.2368894815444946, "learning_rate": 9.301947368421053e-05, "loss": 0.5697, "step": 46545 }, { "epoch": 2.6064508903572627, "grad_norm": 0.9837244153022766, "learning_rate": 9.301921052631579e-05, "loss": 0.4152, "step": 46546 }, { "epoch": 2.6065068876693918, "grad_norm": 1.315573811531067, "learning_rate": 9.301894736842107e-05, "loss": 0.3764, "step": 46547 }, { "epoch": 2.6065628849815208, "grad_norm": 1.3167028427124023, "learning_rate": 9.301868421052631e-05, "loss": 0.4615, "step": 46548 }, { "epoch": 2.60661888229365, "grad_norm": 1.3782520294189453, "learning_rate": 9.301842105263159e-05, "loss": 0.3486, "step": 46549 }, { "epoch": 2.606674879605779, "grad_norm": 1.0889307260513306, "learning_rate": 9.301815789473685e-05, "loss": 0.3447, "step": 46550 }, { "epoch": 2.606730876917908, "grad_norm": 1.2331639528274536, "learning_rate": 9.301789473684212e-05, "loss": 0.3692, "step": 46551 }, { "epoch": 2.606786874230037, "grad_norm": 1.1452991962432861, "learning_rate": 9.301763157894738e-05, "loss": 0.4368, "step": 46552 }, { "epoch": 2.606842871542166, "grad_norm": 1.2455683946609497, "learning_rate": 9.301736842105262e-05, "loss": 0.4391, "step": 46553 }, { "epoch": 2.606898868854295, "grad_norm": 1.530213475227356, "learning_rate": 9.30171052631579e-05, "loss": 0.4022, "step": 46554 }, { "epoch": 2.606954866166424, "grad_norm": 1.0618746280670166, "learning_rate": 9.301684210526316e-05, "loss": 0.44, "step": 46555 }, { "epoch": 2.607010863478553, "grad_norm": 1.0888922214508057, "learning_rate": 9.301657894736843e-05, "loss": 0.4074, "step": 46556 }, { "epoch": 2.607066860790682, "grad_norm": 1.3950855731964111, "learning_rate": 9.301631578947369e-05, "loss": 0.3681, "step": 46557 }, { "epoch": 2.607122858102811, "grad_norm": 1.1784887313842773, "learning_rate": 9.301605263157895e-05, "loss": 0.3617, "step": 46558 }, { "epoch": 2.60717885541494, "grad_norm": 1.236409306526184, "learning_rate": 9.301578947368421e-05, "loss": 0.4029, "step": 46559 }, { "epoch": 2.607234852727069, "grad_norm": 1.3023930788040161, "learning_rate": 9.301552631578948e-05, "loss": 0.477, "step": 46560 }, { "epoch": 2.607290850039198, "grad_norm": 1.027319073677063, "learning_rate": 9.301526315789474e-05, "loss": 0.3757, "step": 46561 }, { "epoch": 2.607346847351327, "grad_norm": 1.0279603004455566, "learning_rate": 9.3015e-05, "loss": 0.3608, "step": 46562 }, { "epoch": 2.607402844663456, "grad_norm": 1.2154029607772827, "learning_rate": 9.301473684210526e-05, "loss": 0.5579, "step": 46563 }, { "epoch": 2.607458841975585, "grad_norm": 2.1505751609802246, "learning_rate": 9.301447368421054e-05, "loss": 0.3735, "step": 46564 }, { "epoch": 2.607514839287714, "grad_norm": 1.3641637563705444, "learning_rate": 9.30142105263158e-05, "loss": 0.5112, "step": 46565 }, { "epoch": 2.607570836599843, "grad_norm": 1.1326463222503662, "learning_rate": 9.301394736842106e-05, "loss": 0.2707, "step": 46566 }, { "epoch": 2.607626833911972, "grad_norm": 1.152876615524292, "learning_rate": 9.301368421052632e-05, "loss": 0.3703, "step": 46567 }, { "epoch": 2.607682831224101, "grad_norm": 1.386849045753479, "learning_rate": 9.301342105263159e-05, "loss": 0.3763, "step": 46568 }, { "epoch": 2.60773882853623, "grad_norm": 1.6253869533538818, "learning_rate": 9.301315789473685e-05, "loss": 0.4848, "step": 46569 }, { "epoch": 2.607794825848359, "grad_norm": 1.4386910200119019, "learning_rate": 9.301289473684211e-05, "loss": 0.3897, "step": 46570 }, { "epoch": 2.607850823160488, "grad_norm": 1.4965099096298218, "learning_rate": 9.301263157894737e-05, "loss": 0.4671, "step": 46571 }, { "epoch": 2.6079068204726172, "grad_norm": 1.1429165601730347, "learning_rate": 9.301236842105263e-05, "loss": 0.5394, "step": 46572 }, { "epoch": 2.6079628177847463, "grad_norm": 1.2738810777664185, "learning_rate": 9.30121052631579e-05, "loss": 0.4492, "step": 46573 }, { "epoch": 2.6080188150968753, "grad_norm": 1.5004088878631592, "learning_rate": 9.301184210526316e-05, "loss": 0.4748, "step": 46574 }, { "epoch": 2.6080748124090043, "grad_norm": 3.550349235534668, "learning_rate": 9.301157894736843e-05, "loss": 0.3674, "step": 46575 }, { "epoch": 2.6081308097211333, "grad_norm": 1.0376020669937134, "learning_rate": 9.301131578947368e-05, "loss": 0.3438, "step": 46576 }, { "epoch": 2.6081868070332623, "grad_norm": 1.4004347324371338, "learning_rate": 9.301105263157895e-05, "loss": 0.4473, "step": 46577 }, { "epoch": 2.6082428043453914, "grad_norm": 1.0414180755615234, "learning_rate": 9.301078947368421e-05, "loss": 0.3527, "step": 46578 }, { "epoch": 2.6082988016575204, "grad_norm": 1.1772812604904175, "learning_rate": 9.301052631578949e-05, "loss": 0.282, "step": 46579 }, { "epoch": 2.6083547989696494, "grad_norm": 1.2752467393875122, "learning_rate": 9.301026315789475e-05, "loss": 0.476, "step": 46580 }, { "epoch": 2.6084107962817784, "grad_norm": 1.4263345003128052, "learning_rate": 9.301e-05, "loss": 0.5376, "step": 46581 }, { "epoch": 2.6084667935939074, "grad_norm": 1.284131407737732, "learning_rate": 9.300973684210527e-05, "loss": 0.3322, "step": 46582 }, { "epoch": 2.6085227909060364, "grad_norm": 1.3838568925857544, "learning_rate": 9.300947368421054e-05, "loss": 0.4009, "step": 46583 }, { "epoch": 2.6085787882181655, "grad_norm": 1.0283442735671997, "learning_rate": 9.30092105263158e-05, "loss": 0.3557, "step": 46584 }, { "epoch": 2.6086347855302945, "grad_norm": 1.3276817798614502, "learning_rate": 9.300894736842106e-05, "loss": 0.4271, "step": 46585 }, { "epoch": 2.6086907828424235, "grad_norm": 1.7550311088562012, "learning_rate": 9.300868421052632e-05, "loss": 0.3818, "step": 46586 }, { "epoch": 2.6087467801545525, "grad_norm": 1.219070553779602, "learning_rate": 9.300842105263158e-05, "loss": 0.5494, "step": 46587 }, { "epoch": 2.6088027774666815, "grad_norm": 1.201698660850525, "learning_rate": 9.300815789473685e-05, "loss": 0.4026, "step": 46588 }, { "epoch": 2.6088587747788106, "grad_norm": 1.2091761827468872, "learning_rate": 9.300789473684211e-05, "loss": 0.4615, "step": 46589 }, { "epoch": 2.6089147720909396, "grad_norm": 1.363063097000122, "learning_rate": 9.300763157894737e-05, "loss": 0.4248, "step": 46590 }, { "epoch": 2.6089707694030686, "grad_norm": 1.4333062171936035, "learning_rate": 9.300736842105263e-05, "loss": 0.5165, "step": 46591 }, { "epoch": 2.6090267667151976, "grad_norm": 1.2628583908081055, "learning_rate": 9.30071052631579e-05, "loss": 0.3636, "step": 46592 }, { "epoch": 2.6090827640273266, "grad_norm": 1.2157214879989624, "learning_rate": 9.300684210526316e-05, "loss": 0.4027, "step": 46593 }, { "epoch": 2.6091387613394557, "grad_norm": 1.1543291807174683, "learning_rate": 9.300657894736842e-05, "loss": 0.4073, "step": 46594 }, { "epoch": 2.6091947586515847, "grad_norm": 1.238649606704712, "learning_rate": 9.300631578947368e-05, "loss": 0.3677, "step": 46595 }, { "epoch": 2.6092507559637137, "grad_norm": 1.2070821523666382, "learning_rate": 9.300605263157896e-05, "loss": 0.3631, "step": 46596 }, { "epoch": 2.6093067532758427, "grad_norm": 1.1323351860046387, "learning_rate": 9.300578947368422e-05, "loss": 0.3464, "step": 46597 }, { "epoch": 2.6093627505879717, "grad_norm": 1.1080574989318848, "learning_rate": 9.300552631578948e-05, "loss": 0.5159, "step": 46598 }, { "epoch": 2.6094187479001008, "grad_norm": 1.1234997510910034, "learning_rate": 9.300526315789473e-05, "loss": 0.3592, "step": 46599 }, { "epoch": 2.6094747452122298, "grad_norm": 1.3733960390090942, "learning_rate": 9.300500000000001e-05, "loss": 0.3532, "step": 46600 }, { "epoch": 2.609530742524359, "grad_norm": 1.1492043733596802, "learning_rate": 9.300473684210527e-05, "loss": 0.4223, "step": 46601 }, { "epoch": 2.609586739836488, "grad_norm": 1.1460093259811401, "learning_rate": 9.300447368421054e-05, "loss": 0.4638, "step": 46602 }, { "epoch": 2.609642737148617, "grad_norm": 1.3171253204345703, "learning_rate": 9.300421052631579e-05, "loss": 0.3983, "step": 46603 }, { "epoch": 2.609698734460746, "grad_norm": 1.28238844871521, "learning_rate": 9.300394736842105e-05, "loss": 0.3827, "step": 46604 }, { "epoch": 2.609754731772875, "grad_norm": 1.1583778858184814, "learning_rate": 9.300368421052632e-05, "loss": 0.4178, "step": 46605 }, { "epoch": 2.609810729085004, "grad_norm": 1.1171513795852661, "learning_rate": 9.300342105263158e-05, "loss": 0.4351, "step": 46606 }, { "epoch": 2.609866726397133, "grad_norm": 1.218218445777893, "learning_rate": 9.300315789473685e-05, "loss": 0.4024, "step": 46607 }, { "epoch": 2.609922723709262, "grad_norm": 1.0216279029846191, "learning_rate": 9.30028947368421e-05, "loss": 0.4262, "step": 46608 }, { "epoch": 2.609978721021391, "grad_norm": 2.543121814727783, "learning_rate": 9.300263157894737e-05, "loss": 0.429, "step": 46609 }, { "epoch": 2.61003471833352, "grad_norm": 1.2610794305801392, "learning_rate": 9.300236842105263e-05, "loss": 0.374, "step": 46610 }, { "epoch": 2.610090715645649, "grad_norm": 1.0212806463241577, "learning_rate": 9.30021052631579e-05, "loss": 0.317, "step": 46611 }, { "epoch": 2.610146712957778, "grad_norm": 1.213247299194336, "learning_rate": 9.300184210526317e-05, "loss": 0.4415, "step": 46612 }, { "epoch": 2.610202710269907, "grad_norm": 1.2385320663452148, "learning_rate": 9.300157894736843e-05, "loss": 0.3539, "step": 46613 }, { "epoch": 2.610258707582036, "grad_norm": 1.3352187871932983, "learning_rate": 9.300131578947369e-05, "loss": 0.3995, "step": 46614 }, { "epoch": 2.610314704894165, "grad_norm": 3.493345022201538, "learning_rate": 9.300105263157896e-05, "loss": 0.3789, "step": 46615 }, { "epoch": 2.610370702206294, "grad_norm": 1.8478645086288452, "learning_rate": 9.300078947368422e-05, "loss": 0.4978, "step": 46616 }, { "epoch": 2.610426699518423, "grad_norm": 1.2096174955368042, "learning_rate": 9.300052631578948e-05, "loss": 0.286, "step": 46617 }, { "epoch": 2.610482696830552, "grad_norm": 1.294213056564331, "learning_rate": 9.300026315789474e-05, "loss": 0.3751, "step": 46618 }, { "epoch": 2.610538694142681, "grad_norm": 1.1654341220855713, "learning_rate": 9.300000000000001e-05, "loss": 0.4262, "step": 46619 }, { "epoch": 2.61059469145481, "grad_norm": 1.1719697713851929, "learning_rate": 9.299973684210527e-05, "loss": 0.3955, "step": 46620 }, { "epoch": 2.610650688766939, "grad_norm": 1.2508690357208252, "learning_rate": 9.299947368421053e-05, "loss": 0.4435, "step": 46621 }, { "epoch": 2.610706686079068, "grad_norm": 1.1091339588165283, "learning_rate": 9.299921052631579e-05, "loss": 0.4597, "step": 46622 }, { "epoch": 2.610762683391197, "grad_norm": 1.0975714921951294, "learning_rate": 9.299894736842105e-05, "loss": 0.3784, "step": 46623 }, { "epoch": 2.6108186807033262, "grad_norm": 0.9454960823059082, "learning_rate": 9.299868421052632e-05, "loss": 0.2611, "step": 46624 }, { "epoch": 2.6108746780154553, "grad_norm": 1.1642026901245117, "learning_rate": 9.299842105263158e-05, "loss": 0.4509, "step": 46625 }, { "epoch": 2.6109306753275843, "grad_norm": 1.5556083917617798, "learning_rate": 9.299815789473684e-05, "loss": 0.6253, "step": 46626 }, { "epoch": 2.6109866726397133, "grad_norm": 1.224578619003296, "learning_rate": 9.29978947368421e-05, "loss": 0.3381, "step": 46627 }, { "epoch": 2.6110426699518423, "grad_norm": 3.255194902420044, "learning_rate": 9.299763157894738e-05, "loss": 0.5234, "step": 46628 }, { "epoch": 2.6110986672639713, "grad_norm": 1.1428229808807373, "learning_rate": 9.299736842105264e-05, "loss": 0.3073, "step": 46629 }, { "epoch": 2.6111546645761003, "grad_norm": 1.4078631401062012, "learning_rate": 9.299710526315791e-05, "loss": 0.2542, "step": 46630 }, { "epoch": 2.6112106618882294, "grad_norm": 1.068902611732483, "learning_rate": 9.299684210526315e-05, "loss": 0.396, "step": 46631 }, { "epoch": 2.6112666592003584, "grad_norm": 1.0733650922775269, "learning_rate": 9.299657894736843e-05, "loss": 0.3553, "step": 46632 }, { "epoch": 2.6113226565124874, "grad_norm": 1.222968578338623, "learning_rate": 9.299631578947369e-05, "loss": 0.334, "step": 46633 }, { "epoch": 2.6113786538246164, "grad_norm": 1.3344064950942993, "learning_rate": 9.299605263157896e-05, "loss": 0.4478, "step": 46634 }, { "epoch": 2.6114346511367454, "grad_norm": 1.1899425983428955, "learning_rate": 9.299578947368422e-05, "loss": 0.3186, "step": 46635 }, { "epoch": 2.6114906484488745, "grad_norm": 1.0654300451278687, "learning_rate": 9.299552631578948e-05, "loss": 0.3854, "step": 46636 }, { "epoch": 2.6115466457610035, "grad_norm": 1.1135302782058716, "learning_rate": 9.299526315789474e-05, "loss": 0.5509, "step": 46637 }, { "epoch": 2.6116026430731325, "grad_norm": 1.1014275550842285, "learning_rate": 9.299500000000001e-05, "loss": 0.4745, "step": 46638 }, { "epoch": 2.6116586403852615, "grad_norm": 1.0376029014587402, "learning_rate": 9.299473684210527e-05, "loss": 0.3105, "step": 46639 }, { "epoch": 2.6117146376973905, "grad_norm": 1.4303823709487915, "learning_rate": 9.299447368421052e-05, "loss": 0.4444, "step": 46640 }, { "epoch": 2.6117706350095196, "grad_norm": 1.406695008277893, "learning_rate": 9.299421052631579e-05, "loss": 0.3542, "step": 46641 }, { "epoch": 2.6118266323216486, "grad_norm": 1.0023406744003296, "learning_rate": 9.299394736842105e-05, "loss": 0.3404, "step": 46642 }, { "epoch": 2.6118826296337776, "grad_norm": 1.2660740613937378, "learning_rate": 9.299368421052633e-05, "loss": 0.5272, "step": 46643 }, { "epoch": 2.6119386269459066, "grad_norm": 1.1643739938735962, "learning_rate": 9.299342105263159e-05, "loss": 0.4506, "step": 46644 }, { "epoch": 2.6119946242580356, "grad_norm": 1.2932565212249756, "learning_rate": 9.299315789473685e-05, "loss": 0.3818, "step": 46645 }, { "epoch": 2.6120506215701647, "grad_norm": 1.4373708963394165, "learning_rate": 9.29928947368421e-05, "loss": 0.5381, "step": 46646 }, { "epoch": 2.6121066188822937, "grad_norm": 1.4364473819732666, "learning_rate": 9.299263157894738e-05, "loss": 0.5348, "step": 46647 }, { "epoch": 2.6121626161944227, "grad_norm": 1.1308902502059937, "learning_rate": 9.299236842105264e-05, "loss": 0.5657, "step": 46648 }, { "epoch": 2.6122186135065517, "grad_norm": 1.3225475549697876, "learning_rate": 9.29921052631579e-05, "loss": 0.465, "step": 46649 }, { "epoch": 2.6122746108186807, "grad_norm": 1.1010724306106567, "learning_rate": 9.299184210526316e-05, "loss": 0.3833, "step": 46650 }, { "epoch": 2.6123306081308098, "grad_norm": 1.1573151350021362, "learning_rate": 9.299157894736843e-05, "loss": 0.3548, "step": 46651 }, { "epoch": 2.6123866054429388, "grad_norm": 1.2825130224227905, "learning_rate": 9.299131578947369e-05, "loss": 0.4014, "step": 46652 }, { "epoch": 2.612442602755068, "grad_norm": 1.2304491996765137, "learning_rate": 9.299105263157895e-05, "loss": 0.475, "step": 46653 }, { "epoch": 2.612498600067197, "grad_norm": 0.9783585667610168, "learning_rate": 9.299078947368421e-05, "loss": 0.3348, "step": 46654 }, { "epoch": 2.612554597379326, "grad_norm": 1.2408232688903809, "learning_rate": 9.299052631578948e-05, "loss": 0.4249, "step": 46655 }, { "epoch": 2.612610594691455, "grad_norm": 1.1084270477294922, "learning_rate": 9.299026315789474e-05, "loss": 0.408, "step": 46656 }, { "epoch": 2.612666592003584, "grad_norm": 1.1768100261688232, "learning_rate": 9.299e-05, "loss": 0.3857, "step": 46657 }, { "epoch": 2.612722589315713, "grad_norm": 1.1828205585479736, "learning_rate": 9.298973684210526e-05, "loss": 0.3799, "step": 46658 }, { "epoch": 2.612778586627842, "grad_norm": 1.1990939378738403, "learning_rate": 9.298947368421052e-05, "loss": 0.4627, "step": 46659 }, { "epoch": 2.612834583939971, "grad_norm": 1.1285840272903442, "learning_rate": 9.29892105263158e-05, "loss": 0.3348, "step": 46660 }, { "epoch": 2.6128905812521, "grad_norm": 1.2289525270462036, "learning_rate": 9.298894736842105e-05, "loss": 0.363, "step": 46661 }, { "epoch": 2.612946578564229, "grad_norm": 1.4180353879928589, "learning_rate": 9.298868421052633e-05, "loss": 0.3898, "step": 46662 }, { "epoch": 2.613002575876358, "grad_norm": 1.1110385656356812, "learning_rate": 9.298842105263157e-05, "loss": 0.3456, "step": 46663 }, { "epoch": 2.613058573188487, "grad_norm": 1.4971394538879395, "learning_rate": 9.298815789473685e-05, "loss": 0.4093, "step": 46664 }, { "epoch": 2.613114570500616, "grad_norm": 1.0828325748443604, "learning_rate": 9.298789473684211e-05, "loss": 0.443, "step": 46665 }, { "epoch": 2.613170567812745, "grad_norm": 1.202742338180542, "learning_rate": 9.298763157894738e-05, "loss": 0.4079, "step": 46666 }, { "epoch": 2.613226565124874, "grad_norm": 1.0951130390167236, "learning_rate": 9.298736842105264e-05, "loss": 0.4235, "step": 46667 }, { "epoch": 2.613282562437003, "grad_norm": 1.319145917892456, "learning_rate": 9.29871052631579e-05, "loss": 0.4171, "step": 46668 }, { "epoch": 2.613338559749132, "grad_norm": 1.2181124687194824, "learning_rate": 9.298684210526316e-05, "loss": 0.4251, "step": 46669 }, { "epoch": 2.613394557061261, "grad_norm": 1.2492749691009521, "learning_rate": 9.298657894736843e-05, "loss": 0.4055, "step": 46670 }, { "epoch": 2.61345055437339, "grad_norm": 1.1912407875061035, "learning_rate": 9.298631578947369e-05, "loss": 0.441, "step": 46671 }, { "epoch": 2.613506551685519, "grad_norm": 1.2228444814682007, "learning_rate": 9.298605263157895e-05, "loss": 0.4807, "step": 46672 }, { "epoch": 2.613562548997648, "grad_norm": 0.9490548372268677, "learning_rate": 9.298578947368421e-05, "loss": 0.2669, "step": 46673 }, { "epoch": 2.613618546309777, "grad_norm": 1.1885175704956055, "learning_rate": 9.298552631578947e-05, "loss": 0.4388, "step": 46674 }, { "epoch": 2.613674543621906, "grad_norm": 1.39473557472229, "learning_rate": 9.298526315789475e-05, "loss": 0.432, "step": 46675 }, { "epoch": 2.6137305409340352, "grad_norm": 1.1211905479431152, "learning_rate": 9.2985e-05, "loss": 0.5124, "step": 46676 }, { "epoch": 2.6137865382461642, "grad_norm": 1.2203218936920166, "learning_rate": 9.298473684210526e-05, "loss": 0.4477, "step": 46677 }, { "epoch": 2.6138425355582933, "grad_norm": 1.369370460510254, "learning_rate": 9.298447368421052e-05, "loss": 0.3663, "step": 46678 }, { "epoch": 2.6138985328704223, "grad_norm": 1.027858853340149, "learning_rate": 9.29842105263158e-05, "loss": 0.4191, "step": 46679 }, { "epoch": 2.6139545301825513, "grad_norm": 1.3899892568588257, "learning_rate": 9.298394736842106e-05, "loss": 0.4292, "step": 46680 }, { "epoch": 2.6140105274946803, "grad_norm": 1.0923852920532227, "learning_rate": 9.298368421052632e-05, "loss": 0.3487, "step": 46681 }, { "epoch": 2.6140665248068093, "grad_norm": 1.1200538873672485, "learning_rate": 9.298342105263158e-05, "loss": 0.3073, "step": 46682 }, { "epoch": 2.6141225221189384, "grad_norm": 1.6595849990844727, "learning_rate": 9.298315789473685e-05, "loss": 0.5936, "step": 46683 }, { "epoch": 2.6141785194310674, "grad_norm": 1.1285338401794434, "learning_rate": 9.298289473684211e-05, "loss": 0.3796, "step": 46684 }, { "epoch": 2.6142345167431964, "grad_norm": 1.2834473848342896, "learning_rate": 9.298263157894738e-05, "loss": 0.3864, "step": 46685 }, { "epoch": 2.6142905140553254, "grad_norm": 1.5202257633209229, "learning_rate": 9.298236842105263e-05, "loss": 0.5937, "step": 46686 }, { "epoch": 2.6143465113674544, "grad_norm": 1.0306605100631714, "learning_rate": 9.29821052631579e-05, "loss": 0.3639, "step": 46687 }, { "epoch": 2.6144025086795835, "grad_norm": 1.1619771718978882, "learning_rate": 9.298184210526316e-05, "loss": 0.3971, "step": 46688 }, { "epoch": 2.6144585059917125, "grad_norm": 1.212755560874939, "learning_rate": 9.298157894736844e-05, "loss": 0.4113, "step": 46689 }, { "epoch": 2.6145145033038415, "grad_norm": 1.5457602739334106, "learning_rate": 9.298131578947368e-05, "loss": 0.395, "step": 46690 }, { "epoch": 2.6145705006159705, "grad_norm": 0.9748319387435913, "learning_rate": 9.298105263157894e-05, "loss": 0.371, "step": 46691 }, { "epoch": 2.6146264979280995, "grad_norm": 1.1145292520523071, "learning_rate": 9.298078947368421e-05, "loss": 0.4219, "step": 46692 }, { "epoch": 2.6146824952402286, "grad_norm": 1.1746282577514648, "learning_rate": 9.298052631578947e-05, "loss": 0.3978, "step": 46693 }, { "epoch": 2.6147384925523576, "grad_norm": 1.0057501792907715, "learning_rate": 9.298026315789475e-05, "loss": 0.3702, "step": 46694 }, { "epoch": 2.6147944898644866, "grad_norm": 1.2233495712280273, "learning_rate": 9.298e-05, "loss": 0.4914, "step": 46695 }, { "epoch": 2.6148504871766156, "grad_norm": 2.321775197982788, "learning_rate": 9.297973684210527e-05, "loss": 0.521, "step": 46696 }, { "epoch": 2.6149064844887446, "grad_norm": 1.1047756671905518, "learning_rate": 9.297947368421053e-05, "loss": 0.2826, "step": 46697 }, { "epoch": 2.6149624818008737, "grad_norm": 1.0921597480773926, "learning_rate": 9.29792105263158e-05, "loss": 0.4113, "step": 46698 }, { "epoch": 2.6150184791130027, "grad_norm": 1.0332170724868774, "learning_rate": 9.297894736842106e-05, "loss": 0.3345, "step": 46699 }, { "epoch": 2.6150744764251317, "grad_norm": 0.9269431233406067, "learning_rate": 9.297868421052632e-05, "loss": 0.2807, "step": 46700 }, { "epoch": 2.6151304737372607, "grad_norm": 1.647600531578064, "learning_rate": 9.297842105263158e-05, "loss": 0.3338, "step": 46701 }, { "epoch": 2.6151864710493897, "grad_norm": 1.2438921928405762, "learning_rate": 9.297815789473685e-05, "loss": 0.3225, "step": 46702 }, { "epoch": 2.6152424683615187, "grad_norm": 0.9726871252059937, "learning_rate": 9.297789473684211e-05, "loss": 0.3034, "step": 46703 }, { "epoch": 2.6152984656736478, "grad_norm": 1.1678202152252197, "learning_rate": 9.297763157894737e-05, "loss": 0.3353, "step": 46704 }, { "epoch": 2.615354462985777, "grad_norm": 1.0944322347640991, "learning_rate": 9.297736842105263e-05, "loss": 0.3435, "step": 46705 }, { "epoch": 2.615410460297906, "grad_norm": 1.160312533378601, "learning_rate": 9.29771052631579e-05, "loss": 0.4908, "step": 46706 }, { "epoch": 2.615466457610035, "grad_norm": 1.211646318435669, "learning_rate": 9.297684210526317e-05, "loss": 0.4731, "step": 46707 }, { "epoch": 2.615522454922164, "grad_norm": 1.1735705137252808, "learning_rate": 9.297657894736842e-05, "loss": 0.3869, "step": 46708 }, { "epoch": 2.615578452234293, "grad_norm": 1.3701469898223877, "learning_rate": 9.297631578947368e-05, "loss": 0.3438, "step": 46709 }, { "epoch": 2.615634449546422, "grad_norm": 2.0871517658233643, "learning_rate": 9.297605263157894e-05, "loss": 0.3194, "step": 46710 }, { "epoch": 2.615690446858551, "grad_norm": 1.24550199508667, "learning_rate": 9.297578947368422e-05, "loss": 0.3119, "step": 46711 }, { "epoch": 2.61574644417068, "grad_norm": 1.0324658155441284, "learning_rate": 9.297552631578948e-05, "loss": 0.3611, "step": 46712 }, { "epoch": 2.615802441482809, "grad_norm": 1.21067476272583, "learning_rate": 9.297526315789474e-05, "loss": 0.3275, "step": 46713 }, { "epoch": 2.615858438794938, "grad_norm": 1.3327478170394897, "learning_rate": 9.2975e-05, "loss": 0.3432, "step": 46714 }, { "epoch": 2.615914436107067, "grad_norm": 1.0227373838424683, "learning_rate": 9.297473684210527e-05, "loss": 0.3751, "step": 46715 }, { "epoch": 2.615970433419196, "grad_norm": 1.2362167835235596, "learning_rate": 9.297447368421053e-05, "loss": 0.4402, "step": 46716 }, { "epoch": 2.616026430731325, "grad_norm": 1.2102203369140625, "learning_rate": 9.29742105263158e-05, "loss": 0.4519, "step": 46717 }, { "epoch": 2.616082428043454, "grad_norm": 1.2363210916519165, "learning_rate": 9.297394736842105e-05, "loss": 0.446, "step": 46718 }, { "epoch": 2.616138425355583, "grad_norm": 1.0664886236190796, "learning_rate": 9.297368421052632e-05, "loss": 0.4039, "step": 46719 }, { "epoch": 2.616194422667712, "grad_norm": 1.5525882244110107, "learning_rate": 9.297342105263158e-05, "loss": 0.395, "step": 46720 }, { "epoch": 2.616250419979841, "grad_norm": 1.2278224229812622, "learning_rate": 9.297315789473686e-05, "loss": 0.4857, "step": 46721 }, { "epoch": 2.61630641729197, "grad_norm": 1.246382236480713, "learning_rate": 9.297289473684212e-05, "loss": 0.4867, "step": 46722 }, { "epoch": 2.616362414604099, "grad_norm": 1.1361393928527832, "learning_rate": 9.297263157894737e-05, "loss": 0.5091, "step": 46723 }, { "epoch": 2.616418411916228, "grad_norm": 1.275478720664978, "learning_rate": 9.297236842105263e-05, "loss": 0.542, "step": 46724 }, { "epoch": 2.616474409228357, "grad_norm": 1.2570104598999023, "learning_rate": 9.29721052631579e-05, "loss": 0.4444, "step": 46725 }, { "epoch": 2.616530406540486, "grad_norm": 1.2875245809555054, "learning_rate": 9.297184210526317e-05, "loss": 0.5084, "step": 46726 }, { "epoch": 2.616586403852615, "grad_norm": 1.2645480632781982, "learning_rate": 9.297157894736843e-05, "loss": 0.4481, "step": 46727 }, { "epoch": 2.6166424011647442, "grad_norm": 1.5479607582092285, "learning_rate": 9.297131578947369e-05, "loss": 0.5425, "step": 46728 }, { "epoch": 2.6166983984768732, "grad_norm": 1.2086495161056519, "learning_rate": 9.297105263157895e-05, "loss": 0.4281, "step": 46729 }, { "epoch": 2.6167543957890023, "grad_norm": 1.4190393686294556, "learning_rate": 9.297078947368422e-05, "loss": 0.6183, "step": 46730 }, { "epoch": 2.6168103931011313, "grad_norm": 1.3730733394622803, "learning_rate": 9.297052631578948e-05, "loss": 0.4142, "step": 46731 }, { "epoch": 2.6168663904132603, "grad_norm": 1.212880253791809, "learning_rate": 9.297026315789474e-05, "loss": 0.365, "step": 46732 }, { "epoch": 2.6169223877253893, "grad_norm": 1.1551216840744019, "learning_rate": 9.297e-05, "loss": 0.4941, "step": 46733 }, { "epoch": 2.6169783850375183, "grad_norm": 1.172528624534607, "learning_rate": 9.296973684210527e-05, "loss": 0.4252, "step": 46734 }, { "epoch": 2.6170343823496474, "grad_norm": 1.14329195022583, "learning_rate": 9.296947368421053e-05, "loss": 0.5225, "step": 46735 }, { "epoch": 2.6170903796617764, "grad_norm": 1.9598976373672485, "learning_rate": 9.296921052631579e-05, "loss": 0.4284, "step": 46736 }, { "epoch": 2.6171463769739054, "grad_norm": 0.9865350127220154, "learning_rate": 9.296894736842105e-05, "loss": 0.3303, "step": 46737 }, { "epoch": 2.6172023742860344, "grad_norm": 1.1210765838623047, "learning_rate": 9.296868421052632e-05, "loss": 0.3316, "step": 46738 }, { "epoch": 2.6172583715981634, "grad_norm": 1.0898633003234863, "learning_rate": 9.296842105263158e-05, "loss": 0.4742, "step": 46739 }, { "epoch": 2.6173143689102925, "grad_norm": 1.0996369123458862, "learning_rate": 9.296815789473686e-05, "loss": 0.2971, "step": 46740 }, { "epoch": 2.6173703662224215, "grad_norm": 1.3314951658248901, "learning_rate": 9.29678947368421e-05, "loss": 0.4739, "step": 46741 }, { "epoch": 2.6174263635345505, "grad_norm": 1.0952012538909912, "learning_rate": 9.296763157894738e-05, "loss": 0.3667, "step": 46742 }, { "epoch": 2.6174823608466795, "grad_norm": 1.3140828609466553, "learning_rate": 9.296736842105264e-05, "loss": 0.3883, "step": 46743 }, { "epoch": 2.6175383581588085, "grad_norm": 1.1674870252609253, "learning_rate": 9.29671052631579e-05, "loss": 0.3587, "step": 46744 }, { "epoch": 2.6175943554709376, "grad_norm": 1.0534330606460571, "learning_rate": 9.296684210526316e-05, "loss": 0.3889, "step": 46745 }, { "epoch": 2.6176503527830666, "grad_norm": 1.3798774480819702, "learning_rate": 9.296657894736842e-05, "loss": 0.518, "step": 46746 }, { "epoch": 2.6177063500951956, "grad_norm": 1.4897041320800781, "learning_rate": 9.296631578947369e-05, "loss": 0.3668, "step": 46747 }, { "epoch": 2.6177623474073246, "grad_norm": 1.3185303211212158, "learning_rate": 9.296605263157895e-05, "loss": 0.2848, "step": 46748 }, { "epoch": 2.6178183447194536, "grad_norm": 0.9920732378959656, "learning_rate": 9.296578947368422e-05, "loss": 0.3637, "step": 46749 }, { "epoch": 2.6178743420315826, "grad_norm": 1.2335031032562256, "learning_rate": 9.296552631578947e-05, "loss": 0.5493, "step": 46750 }, { "epoch": 2.6179303393437117, "grad_norm": 1.6651972532272339, "learning_rate": 9.296526315789474e-05, "loss": 0.4698, "step": 46751 }, { "epoch": 2.6179863366558407, "grad_norm": 1.4533491134643555, "learning_rate": 9.2965e-05, "loss": 0.4126, "step": 46752 }, { "epoch": 2.6180423339679697, "grad_norm": 1.130131483078003, "learning_rate": 9.296473684210528e-05, "loss": 0.3973, "step": 46753 }, { "epoch": 2.6180983312800983, "grad_norm": 1.3289220333099365, "learning_rate": 9.296447368421053e-05, "loss": 0.478, "step": 46754 }, { "epoch": 2.6181543285922277, "grad_norm": 1.2845627069473267, "learning_rate": 9.29642105263158e-05, "loss": 0.4362, "step": 46755 }, { "epoch": 2.6182103259043563, "grad_norm": 1.0131199359893799, "learning_rate": 9.296394736842105e-05, "loss": 0.414, "step": 46756 }, { "epoch": 2.618266323216486, "grad_norm": 1.1362448930740356, "learning_rate": 9.296368421052633e-05, "loss": 0.3894, "step": 46757 }, { "epoch": 2.6183223205286144, "grad_norm": 1.0010775327682495, "learning_rate": 9.296342105263159e-05, "loss": 0.3246, "step": 46758 }, { "epoch": 2.618378317840744, "grad_norm": 1.145743727684021, "learning_rate": 9.296315789473685e-05, "loss": 0.3619, "step": 46759 }, { "epoch": 2.6184343151528724, "grad_norm": 1.2314954996109009, "learning_rate": 9.29628947368421e-05, "loss": 0.4092, "step": 46760 }, { "epoch": 2.618490312465002, "grad_norm": 1.763024926185608, "learning_rate": 9.296263157894737e-05, "loss": 0.4066, "step": 46761 }, { "epoch": 2.6185463097771304, "grad_norm": 1.14560866355896, "learning_rate": 9.296236842105264e-05, "loss": 0.3184, "step": 46762 }, { "epoch": 2.61860230708926, "grad_norm": 1.3619943857192993, "learning_rate": 9.29621052631579e-05, "loss": 0.4676, "step": 46763 }, { "epoch": 2.6186583044013885, "grad_norm": 1.2551202774047852, "learning_rate": 9.296184210526316e-05, "loss": 0.4017, "step": 46764 }, { "epoch": 2.618714301713518, "grad_norm": 1.1253986358642578, "learning_rate": 9.296157894736842e-05, "loss": 0.3926, "step": 46765 }, { "epoch": 2.6187702990256465, "grad_norm": 1.3530446290969849, "learning_rate": 9.296131578947369e-05, "loss": 0.5482, "step": 46766 }, { "epoch": 2.618826296337776, "grad_norm": 1.4351688623428345, "learning_rate": 9.296105263157895e-05, "loss": 0.3681, "step": 46767 }, { "epoch": 2.6188822936499045, "grad_norm": 1.9886863231658936, "learning_rate": 9.296078947368421e-05, "loss": 0.4713, "step": 46768 }, { "epoch": 2.618938290962034, "grad_norm": 1.2003042697906494, "learning_rate": 9.296052631578947e-05, "loss": 0.4211, "step": 46769 }, { "epoch": 2.6189942882741626, "grad_norm": 1.2783092260360718, "learning_rate": 9.296026315789474e-05, "loss": 0.5489, "step": 46770 }, { "epoch": 2.619050285586292, "grad_norm": 1.1137007474899292, "learning_rate": 9.296e-05, "loss": 0.2743, "step": 46771 }, { "epoch": 2.6191062828984206, "grad_norm": 0.9794380068778992, "learning_rate": 9.295973684210528e-05, "loss": 0.3035, "step": 46772 }, { "epoch": 2.61916228021055, "grad_norm": 1.138496994972229, "learning_rate": 9.295947368421052e-05, "loss": 0.4509, "step": 46773 }, { "epoch": 2.6192182775226787, "grad_norm": 1.253280520439148, "learning_rate": 9.29592105263158e-05, "loss": 0.3246, "step": 46774 }, { "epoch": 2.619274274834808, "grad_norm": 1.2355107069015503, "learning_rate": 9.295894736842106e-05, "loss": 0.442, "step": 46775 }, { "epoch": 2.6193302721469367, "grad_norm": 1.708880066871643, "learning_rate": 9.295868421052633e-05, "loss": 0.4466, "step": 46776 }, { "epoch": 2.619386269459066, "grad_norm": 1.1865224838256836, "learning_rate": 9.295842105263159e-05, "loss": 0.3459, "step": 46777 }, { "epoch": 2.6194422667711947, "grad_norm": 1.3466836214065552, "learning_rate": 9.295815789473684e-05, "loss": 0.4544, "step": 46778 }, { "epoch": 2.619498264083324, "grad_norm": 1.2605745792388916, "learning_rate": 9.295789473684211e-05, "loss": 0.6802, "step": 46779 }, { "epoch": 2.6195542613954528, "grad_norm": 1.003353238105774, "learning_rate": 9.295763157894737e-05, "loss": 0.3653, "step": 46780 }, { "epoch": 2.6196102587075822, "grad_norm": 1.4013078212738037, "learning_rate": 9.295736842105264e-05, "loss": 0.3567, "step": 46781 }, { "epoch": 2.619666256019711, "grad_norm": 1.143011450767517, "learning_rate": 9.29571052631579e-05, "loss": 0.4626, "step": 46782 }, { "epoch": 2.6197222533318403, "grad_norm": 1.1356501579284668, "learning_rate": 9.295684210526316e-05, "loss": 0.3323, "step": 46783 }, { "epoch": 2.619778250643969, "grad_norm": 1.2056152820587158, "learning_rate": 9.295657894736842e-05, "loss": 0.52, "step": 46784 }, { "epoch": 2.6198342479560983, "grad_norm": 1.6221848726272583, "learning_rate": 9.29563157894737e-05, "loss": 0.4295, "step": 46785 }, { "epoch": 2.619890245268227, "grad_norm": 1.4846909046173096, "learning_rate": 9.295605263157895e-05, "loss": 0.6415, "step": 46786 }, { "epoch": 2.6199462425803564, "grad_norm": 1.3210195302963257, "learning_rate": 9.295578947368421e-05, "loss": 0.561, "step": 46787 }, { "epoch": 2.620002239892485, "grad_norm": 1.148607850074768, "learning_rate": 9.295552631578947e-05, "loss": 0.5336, "step": 46788 }, { "epoch": 2.6200582372046144, "grad_norm": 1.1712279319763184, "learning_rate": 9.295526315789475e-05, "loss": 0.5014, "step": 46789 }, { "epoch": 2.620114234516743, "grad_norm": 1.163711667060852, "learning_rate": 9.295500000000001e-05, "loss": 0.2981, "step": 46790 }, { "epoch": 2.6201702318288724, "grad_norm": 1.3419890403747559, "learning_rate": 9.295473684210527e-05, "loss": 0.4093, "step": 46791 }, { "epoch": 2.620226229141001, "grad_norm": 1.1486859321594238, "learning_rate": 9.295447368421053e-05, "loss": 0.3793, "step": 46792 }, { "epoch": 2.6202822264531305, "grad_norm": 1.024530291557312, "learning_rate": 9.29542105263158e-05, "loss": 0.4485, "step": 46793 }, { "epoch": 2.620338223765259, "grad_norm": 1.150249719619751, "learning_rate": 9.295394736842106e-05, "loss": 0.3444, "step": 46794 }, { "epoch": 2.6203942210773885, "grad_norm": 1.1997238397598267, "learning_rate": 9.295368421052632e-05, "loss": 0.4375, "step": 46795 }, { "epoch": 2.620450218389517, "grad_norm": 1.2190262079238892, "learning_rate": 9.295342105263158e-05, "loss": 0.3787, "step": 46796 }, { "epoch": 2.6205062157016465, "grad_norm": 1.053983211517334, "learning_rate": 9.295315789473684e-05, "loss": 0.3154, "step": 46797 }, { "epoch": 2.620562213013775, "grad_norm": 1.0090112686157227, "learning_rate": 9.295289473684211e-05, "loss": 0.3573, "step": 46798 }, { "epoch": 2.6206182103259046, "grad_norm": 1.162980556488037, "learning_rate": 9.295263157894737e-05, "loss": 0.3862, "step": 46799 }, { "epoch": 2.620674207638033, "grad_norm": 1.1985747814178467, "learning_rate": 9.295236842105263e-05, "loss": 0.4161, "step": 46800 }, { "epoch": 2.6207302049501626, "grad_norm": 1.1420365571975708, "learning_rate": 9.295210526315789e-05, "loss": 0.4306, "step": 46801 }, { "epoch": 2.620786202262291, "grad_norm": 0.9453822374343872, "learning_rate": 9.295184210526316e-05, "loss": 0.3628, "step": 46802 }, { "epoch": 2.6208421995744207, "grad_norm": 1.2848963737487793, "learning_rate": 9.295157894736842e-05, "loss": 0.3084, "step": 46803 }, { "epoch": 2.6208981968865492, "grad_norm": 1.2864497900009155, "learning_rate": 9.29513157894737e-05, "loss": 0.38, "step": 46804 }, { "epoch": 2.6209541941986787, "grad_norm": 1.0188215970993042, "learning_rate": 9.295105263157894e-05, "loss": 0.358, "step": 46805 }, { "epoch": 2.6210101915108073, "grad_norm": 1.063781976699829, "learning_rate": 9.295078947368422e-05, "loss": 0.3996, "step": 46806 }, { "epoch": 2.6210661888229367, "grad_norm": 1.3126211166381836, "learning_rate": 9.295052631578948e-05, "loss": 0.4277, "step": 46807 }, { "epoch": 2.6211221861350653, "grad_norm": 1.275341510772705, "learning_rate": 9.295026315789475e-05, "loss": 0.3897, "step": 46808 }, { "epoch": 2.6211781834471948, "grad_norm": 1.46393620967865, "learning_rate": 9.295000000000001e-05, "loss": 0.3813, "step": 46809 }, { "epoch": 2.6212341807593234, "grad_norm": 1.3119186162948608, "learning_rate": 9.294973684210527e-05, "loss": 0.4948, "step": 46810 }, { "epoch": 2.621290178071453, "grad_norm": 1.063950777053833, "learning_rate": 9.294947368421053e-05, "loss": 0.4053, "step": 46811 }, { "epoch": 2.6213461753835814, "grad_norm": 1.1416627168655396, "learning_rate": 9.294921052631579e-05, "loss": 0.4408, "step": 46812 }, { "epoch": 2.621402172695711, "grad_norm": 1.0680553913116455, "learning_rate": 9.294894736842106e-05, "loss": 0.4368, "step": 46813 }, { "epoch": 2.6214581700078394, "grad_norm": 1.3229248523712158, "learning_rate": 9.294868421052632e-05, "loss": 0.3357, "step": 46814 }, { "epoch": 2.621514167319969, "grad_norm": 1.49494206905365, "learning_rate": 9.294842105263158e-05, "loss": 0.3979, "step": 46815 }, { "epoch": 2.6215701646320975, "grad_norm": 1.183313012123108, "learning_rate": 9.294815789473684e-05, "loss": 0.3195, "step": 46816 }, { "epoch": 2.621626161944227, "grad_norm": 1.0830750465393066, "learning_rate": 9.294789473684211e-05, "loss": 0.4149, "step": 46817 }, { "epoch": 2.6216821592563555, "grad_norm": 1.0247219800949097, "learning_rate": 9.294763157894737e-05, "loss": 0.2976, "step": 46818 }, { "epoch": 2.621738156568485, "grad_norm": 1.2096703052520752, "learning_rate": 9.294736842105263e-05, "loss": 0.3536, "step": 46819 }, { "epoch": 2.6217941538806135, "grad_norm": 1.2946943044662476, "learning_rate": 9.29471052631579e-05, "loss": 0.5974, "step": 46820 }, { "epoch": 2.621850151192743, "grad_norm": 1.3831393718719482, "learning_rate": 9.294684210526317e-05, "loss": 0.4063, "step": 46821 }, { "epoch": 2.6219061485048716, "grad_norm": 1.3210722208023071, "learning_rate": 9.294657894736843e-05, "loss": 0.4165, "step": 46822 }, { "epoch": 2.621962145817001, "grad_norm": 1.2941522598266602, "learning_rate": 9.294631578947369e-05, "loss": 0.4364, "step": 46823 }, { "epoch": 2.6220181431291296, "grad_norm": 1.3078821897506714, "learning_rate": 9.294605263157895e-05, "loss": 0.4047, "step": 46824 }, { "epoch": 2.622074140441259, "grad_norm": 1.2820583581924438, "learning_rate": 9.294578947368422e-05, "loss": 0.4297, "step": 46825 }, { "epoch": 2.6221301377533877, "grad_norm": 1.212156891822815, "learning_rate": 9.294552631578948e-05, "loss": 0.4591, "step": 46826 }, { "epoch": 2.622186135065517, "grad_norm": 1.0013808012008667, "learning_rate": 9.294526315789475e-05, "loss": 0.3915, "step": 46827 }, { "epoch": 2.6222421323776457, "grad_norm": 1.1047852039337158, "learning_rate": 9.2945e-05, "loss": 0.3849, "step": 46828 }, { "epoch": 2.622298129689775, "grad_norm": 1.251317024230957, "learning_rate": 9.294473684210526e-05, "loss": 0.4171, "step": 46829 }, { "epoch": 2.6223541270019037, "grad_norm": 1.0286364555358887, "learning_rate": 9.294447368421053e-05, "loss": 0.3906, "step": 46830 }, { "epoch": 2.622410124314033, "grad_norm": 1.1840567588806152, "learning_rate": 9.294421052631579e-05, "loss": 0.4472, "step": 46831 }, { "epoch": 2.6224661216261618, "grad_norm": 1.273462176322937, "learning_rate": 9.294394736842106e-05, "loss": 0.6608, "step": 46832 }, { "epoch": 2.6225221189382912, "grad_norm": 1.1367168426513672, "learning_rate": 9.294368421052631e-05, "loss": 0.3169, "step": 46833 }, { "epoch": 2.62257811625042, "grad_norm": 1.3610601425170898, "learning_rate": 9.294342105263158e-05, "loss": 0.4391, "step": 46834 }, { "epoch": 2.6226341135625493, "grad_norm": 1.3763405084609985, "learning_rate": 9.294315789473684e-05, "loss": 0.4942, "step": 46835 }, { "epoch": 2.622690110874678, "grad_norm": 1.277869701385498, "learning_rate": 9.294289473684212e-05, "loss": 0.4401, "step": 46836 }, { "epoch": 2.622746108186807, "grad_norm": 1.535495400428772, "learning_rate": 9.294263157894736e-05, "loss": 0.3942, "step": 46837 }, { "epoch": 2.622802105498936, "grad_norm": 1.0069700479507446, "learning_rate": 9.294236842105264e-05, "loss": 0.4211, "step": 46838 }, { "epoch": 2.622858102811065, "grad_norm": 1.3738114833831787, "learning_rate": 9.29421052631579e-05, "loss": 0.444, "step": 46839 }, { "epoch": 2.622914100123194, "grad_norm": 1.6657222509384155, "learning_rate": 9.294184210526317e-05, "loss": 0.5791, "step": 46840 }, { "epoch": 2.622970097435323, "grad_norm": 1.159819483757019, "learning_rate": 9.294157894736843e-05, "loss": 0.3734, "step": 46841 }, { "epoch": 2.623026094747452, "grad_norm": 1.6650866270065308, "learning_rate": 9.294131578947369e-05, "loss": 0.3606, "step": 46842 }, { "epoch": 2.623082092059581, "grad_norm": 1.0191733837127686, "learning_rate": 9.294105263157895e-05, "loss": 0.3686, "step": 46843 }, { "epoch": 2.62313808937171, "grad_norm": 1.2151050567626953, "learning_rate": 9.294078947368422e-05, "loss": 0.421, "step": 46844 }, { "epoch": 2.623194086683839, "grad_norm": 1.4377672672271729, "learning_rate": 9.294052631578948e-05, "loss": 0.3522, "step": 46845 }, { "epoch": 2.623250083995968, "grad_norm": 1.1337298154830933, "learning_rate": 9.294026315789474e-05, "loss": 0.4244, "step": 46846 }, { "epoch": 2.623306081308097, "grad_norm": 0.9996371865272522, "learning_rate": 9.294e-05, "loss": 0.3824, "step": 46847 }, { "epoch": 2.623362078620226, "grad_norm": 1.2200084924697876, "learning_rate": 9.293973684210526e-05, "loss": 0.4108, "step": 46848 }, { "epoch": 2.623418075932355, "grad_norm": 1.142985463142395, "learning_rate": 9.293947368421053e-05, "loss": 0.4335, "step": 46849 }, { "epoch": 2.623474073244484, "grad_norm": 1.1654499769210815, "learning_rate": 9.29392105263158e-05, "loss": 0.4249, "step": 46850 }, { "epoch": 2.623530070556613, "grad_norm": 0.9991153478622437, "learning_rate": 9.293894736842105e-05, "loss": 0.2508, "step": 46851 }, { "epoch": 2.623586067868742, "grad_norm": 1.164332628250122, "learning_rate": 9.293868421052631e-05, "loss": 0.3906, "step": 46852 }, { "epoch": 2.623642065180871, "grad_norm": 1.0930718183517456, "learning_rate": 9.293842105263159e-05, "loss": 0.4893, "step": 46853 }, { "epoch": 2.623698062493, "grad_norm": 1.1107914447784424, "learning_rate": 9.293815789473685e-05, "loss": 0.2895, "step": 46854 }, { "epoch": 2.623754059805129, "grad_norm": 1.153120517730713, "learning_rate": 9.29378947368421e-05, "loss": 0.469, "step": 46855 }, { "epoch": 2.6238100571172582, "grad_norm": 1.1557214260101318, "learning_rate": 9.293763157894737e-05, "loss": 0.3707, "step": 46856 }, { "epoch": 2.6238660544293873, "grad_norm": 1.144906759262085, "learning_rate": 9.293736842105264e-05, "loss": 0.3731, "step": 46857 }, { "epoch": 2.6239220517415163, "grad_norm": 1.194730520248413, "learning_rate": 9.29371052631579e-05, "loss": 0.4565, "step": 46858 }, { "epoch": 2.6239780490536453, "grad_norm": 1.2038601636886597, "learning_rate": 9.293684210526317e-05, "loss": 0.4596, "step": 46859 }, { "epoch": 2.6240340463657743, "grad_norm": 0.9323737025260925, "learning_rate": 9.293657894736842e-05, "loss": 0.3498, "step": 46860 }, { "epoch": 2.6240900436779033, "grad_norm": 1.0257657766342163, "learning_rate": 9.293631578947369e-05, "loss": 0.3493, "step": 46861 }, { "epoch": 2.6241460409900323, "grad_norm": 1.3857697248458862, "learning_rate": 9.293605263157895e-05, "loss": 0.3482, "step": 46862 }, { "epoch": 2.6242020383021614, "grad_norm": 1.1450566053390503, "learning_rate": 9.293578947368422e-05, "loss": 0.3862, "step": 46863 }, { "epoch": 2.6242580356142904, "grad_norm": 0.9813950061798096, "learning_rate": 9.293552631578948e-05, "loss": 0.3308, "step": 46864 }, { "epoch": 2.6243140329264194, "grad_norm": 1.206701397895813, "learning_rate": 9.293526315789473e-05, "loss": 0.384, "step": 46865 }, { "epoch": 2.6243700302385484, "grad_norm": 1.4730498790740967, "learning_rate": 9.2935e-05, "loss": 0.5002, "step": 46866 }, { "epoch": 2.6244260275506774, "grad_norm": 25.71799087524414, "learning_rate": 9.293473684210526e-05, "loss": 0.3861, "step": 46867 }, { "epoch": 2.6244820248628065, "grad_norm": 1.2271591424942017, "learning_rate": 9.293447368421054e-05, "loss": 0.3942, "step": 46868 }, { "epoch": 2.6245380221749355, "grad_norm": 1.3096116781234741, "learning_rate": 9.29342105263158e-05, "loss": 0.4031, "step": 46869 }, { "epoch": 2.6245940194870645, "grad_norm": 1.063765048980713, "learning_rate": 9.293394736842106e-05, "loss": 0.3613, "step": 46870 }, { "epoch": 2.6246500167991935, "grad_norm": 1.2308342456817627, "learning_rate": 9.293368421052632e-05, "loss": 0.4228, "step": 46871 }, { "epoch": 2.6247060141113225, "grad_norm": 1.0634870529174805, "learning_rate": 9.293342105263159e-05, "loss": 0.388, "step": 46872 }, { "epoch": 2.6247620114234516, "grad_norm": 1.1167595386505127, "learning_rate": 9.293315789473685e-05, "loss": 0.4569, "step": 46873 }, { "epoch": 2.6248180087355806, "grad_norm": 1.3785927295684814, "learning_rate": 9.293289473684211e-05, "loss": 0.4373, "step": 46874 }, { "epoch": 2.6248740060477096, "grad_norm": 1.1960278749465942, "learning_rate": 9.293263157894737e-05, "loss": 0.4188, "step": 46875 }, { "epoch": 2.6249300033598386, "grad_norm": 1.3218187093734741, "learning_rate": 9.293236842105264e-05, "loss": 0.3839, "step": 46876 }, { "epoch": 2.6249860006719676, "grad_norm": 0.9911853671073914, "learning_rate": 9.29321052631579e-05, "loss": 0.3853, "step": 46877 }, { "epoch": 2.6250419979840967, "grad_norm": 1.4362967014312744, "learning_rate": 9.293184210526316e-05, "loss": 0.3796, "step": 46878 }, { "epoch": 2.6250979952962257, "grad_norm": 1.0741127729415894, "learning_rate": 9.293157894736842e-05, "loss": 0.411, "step": 46879 }, { "epoch": 2.6251539926083547, "grad_norm": 1.2929226160049438, "learning_rate": 9.29313157894737e-05, "loss": 0.3197, "step": 46880 }, { "epoch": 2.6252099899204837, "grad_norm": 1.2102035284042358, "learning_rate": 9.293105263157895e-05, "loss": 0.2452, "step": 46881 }, { "epoch": 2.6252659872326127, "grad_norm": 1.6133804321289062, "learning_rate": 9.293078947368421e-05, "loss": 0.376, "step": 46882 }, { "epoch": 2.6253219845447417, "grad_norm": 1.11672842502594, "learning_rate": 9.293052631578947e-05, "loss": 0.4684, "step": 46883 }, { "epoch": 2.6253779818568708, "grad_norm": 1.1760737895965576, "learning_rate": 9.293026315789473e-05, "loss": 0.4643, "step": 46884 }, { "epoch": 2.625433979169, "grad_norm": 1.3039157390594482, "learning_rate": 9.293e-05, "loss": 0.375, "step": 46885 }, { "epoch": 2.625489976481129, "grad_norm": 1.256610631942749, "learning_rate": 9.292973684210527e-05, "loss": 0.3342, "step": 46886 }, { "epoch": 2.625545973793258, "grad_norm": 1.287553071975708, "learning_rate": 9.292947368421054e-05, "loss": 0.3702, "step": 46887 }, { "epoch": 2.625601971105387, "grad_norm": 1.1641291379928589, "learning_rate": 9.292921052631579e-05, "loss": 0.3848, "step": 46888 }, { "epoch": 2.625657968417516, "grad_norm": 1.2423171997070312, "learning_rate": 9.292894736842106e-05, "loss": 0.4469, "step": 46889 }, { "epoch": 2.625713965729645, "grad_norm": 1.170389175415039, "learning_rate": 9.292868421052632e-05, "loss": 0.3731, "step": 46890 }, { "epoch": 2.625769963041774, "grad_norm": 1.1667860746383667, "learning_rate": 9.292842105263159e-05, "loss": 0.4254, "step": 46891 }, { "epoch": 2.625825960353903, "grad_norm": 1.0317459106445312, "learning_rate": 9.292815789473684e-05, "loss": 0.3392, "step": 46892 }, { "epoch": 2.625881957666032, "grad_norm": 0.99285489320755, "learning_rate": 9.292789473684211e-05, "loss": 0.3249, "step": 46893 }, { "epoch": 2.625937954978161, "grad_norm": 1.5093568563461304, "learning_rate": 9.292763157894737e-05, "loss": 0.3336, "step": 46894 }, { "epoch": 2.62599395229029, "grad_norm": 0.966926097869873, "learning_rate": 9.292736842105264e-05, "loss": 0.3285, "step": 46895 }, { "epoch": 2.626049949602419, "grad_norm": 1.4612423181533813, "learning_rate": 9.29271052631579e-05, "loss": 0.4652, "step": 46896 }, { "epoch": 2.626105946914548, "grad_norm": 1.4244133234024048, "learning_rate": 9.292684210526316e-05, "loss": 0.4029, "step": 46897 }, { "epoch": 2.626161944226677, "grad_norm": 1.1992512941360474, "learning_rate": 9.292657894736842e-05, "loss": 0.3859, "step": 46898 }, { "epoch": 2.626217941538806, "grad_norm": 1.3435248136520386, "learning_rate": 9.292631578947368e-05, "loss": 0.4226, "step": 46899 }, { "epoch": 2.626273938850935, "grad_norm": 1.3188532590866089, "learning_rate": 9.292605263157896e-05, "loss": 0.3834, "step": 46900 }, { "epoch": 2.626329936163064, "grad_norm": 1.232077956199646, "learning_rate": 9.292578947368422e-05, "loss": 0.4165, "step": 46901 }, { "epoch": 2.626385933475193, "grad_norm": 1.9849222898483276, "learning_rate": 9.292552631578948e-05, "loss": 0.4392, "step": 46902 }, { "epoch": 2.626441930787322, "grad_norm": 1.1978445053100586, "learning_rate": 9.292526315789474e-05, "loss": 0.4123, "step": 46903 }, { "epoch": 2.626497928099451, "grad_norm": 1.1746307611465454, "learning_rate": 9.292500000000001e-05, "loss": 0.42, "step": 46904 }, { "epoch": 2.62655392541158, "grad_norm": 0.9686213731765747, "learning_rate": 9.292473684210527e-05, "loss": 0.2971, "step": 46905 }, { "epoch": 2.626609922723709, "grad_norm": 1.1451046466827393, "learning_rate": 9.292447368421053e-05, "loss": 0.4534, "step": 46906 }, { "epoch": 2.626665920035838, "grad_norm": 1.5036507844924927, "learning_rate": 9.292421052631579e-05, "loss": 0.5551, "step": 46907 }, { "epoch": 2.6267219173479672, "grad_norm": 0.944596529006958, "learning_rate": 9.292394736842106e-05, "loss": 0.258, "step": 46908 }, { "epoch": 2.6267779146600962, "grad_norm": 1.2620248794555664, "learning_rate": 9.292368421052632e-05, "loss": 0.3711, "step": 46909 }, { "epoch": 2.6268339119722253, "grad_norm": 1.1045030355453491, "learning_rate": 9.292342105263158e-05, "loss": 0.395, "step": 46910 }, { "epoch": 2.6268899092843543, "grad_norm": 1.62193763256073, "learning_rate": 9.292315789473684e-05, "loss": 0.394, "step": 46911 }, { "epoch": 2.6269459065964833, "grad_norm": 1.3000679016113281, "learning_rate": 9.292289473684211e-05, "loss": 0.4478, "step": 46912 }, { "epoch": 2.6270019039086123, "grad_norm": 0.984430730342865, "learning_rate": 9.292263157894737e-05, "loss": 0.2648, "step": 46913 }, { "epoch": 2.6270579012207413, "grad_norm": 1.4571577310562134, "learning_rate": 9.292236842105265e-05, "loss": 0.5023, "step": 46914 }, { "epoch": 2.6271138985328704, "grad_norm": 1.1449741125106812, "learning_rate": 9.292210526315789e-05, "loss": 0.3326, "step": 46915 }, { "epoch": 2.6271698958449994, "grad_norm": 1.0675387382507324, "learning_rate": 9.292184210526315e-05, "loss": 0.4177, "step": 46916 }, { "epoch": 2.6272258931571284, "grad_norm": 1.390217661857605, "learning_rate": 9.292157894736843e-05, "loss": 0.377, "step": 46917 }, { "epoch": 2.6272818904692574, "grad_norm": 1.4298555850982666, "learning_rate": 9.292131578947369e-05, "loss": 0.3869, "step": 46918 }, { "epoch": 2.6273378877813864, "grad_norm": 1.584778904914856, "learning_rate": 9.292105263157896e-05, "loss": 0.4475, "step": 46919 }, { "epoch": 2.6273938850935155, "grad_norm": 1.3577972650527954, "learning_rate": 9.29207894736842e-05, "loss": 0.5336, "step": 46920 }, { "epoch": 2.6274498824056445, "grad_norm": 1.4191336631774902, "learning_rate": 9.292052631578948e-05, "loss": 0.3963, "step": 46921 }, { "epoch": 2.6275058797177735, "grad_norm": 1.4518589973449707, "learning_rate": 9.292026315789474e-05, "loss": 0.3855, "step": 46922 }, { "epoch": 2.6275618770299025, "grad_norm": 9.198808670043945, "learning_rate": 9.292000000000001e-05, "loss": 0.4031, "step": 46923 }, { "epoch": 2.6276178743420315, "grad_norm": 1.1096291542053223, "learning_rate": 9.291973684210527e-05, "loss": 0.4039, "step": 46924 }, { "epoch": 2.6276738716541606, "grad_norm": 1.6860275268554688, "learning_rate": 9.291947368421053e-05, "loss": 0.4522, "step": 46925 }, { "epoch": 2.6277298689662896, "grad_norm": 1.2366893291473389, "learning_rate": 9.291921052631579e-05, "loss": 0.5135, "step": 46926 }, { "epoch": 2.6277858662784186, "grad_norm": 1.3470619916915894, "learning_rate": 9.291894736842106e-05, "loss": 0.4922, "step": 46927 }, { "epoch": 2.6278418635905476, "grad_norm": 1.3473389148712158, "learning_rate": 9.291868421052632e-05, "loss": 0.3981, "step": 46928 }, { "epoch": 2.6278978609026766, "grad_norm": 1.1883580684661865, "learning_rate": 9.291842105263158e-05, "loss": 0.3534, "step": 46929 }, { "epoch": 2.6279538582148056, "grad_norm": 1.0331083536148071, "learning_rate": 9.291815789473684e-05, "loss": 0.3202, "step": 46930 }, { "epoch": 2.6280098555269347, "grad_norm": 0.9996214509010315, "learning_rate": 9.291789473684212e-05, "loss": 0.3337, "step": 46931 }, { "epoch": 2.6280658528390637, "grad_norm": 1.5348716974258423, "learning_rate": 9.291763157894738e-05, "loss": 0.3376, "step": 46932 }, { "epoch": 2.6281218501511927, "grad_norm": 1.3197225332260132, "learning_rate": 9.291736842105264e-05, "loss": 0.4879, "step": 46933 }, { "epoch": 2.6281778474633217, "grad_norm": 1.050466775894165, "learning_rate": 9.29171052631579e-05, "loss": 0.3923, "step": 46934 }, { "epoch": 2.6282338447754507, "grad_norm": 1.2297800779342651, "learning_rate": 9.291684210526316e-05, "loss": 0.3992, "step": 46935 }, { "epoch": 2.6282898420875798, "grad_norm": 1.0194079875946045, "learning_rate": 9.291657894736843e-05, "loss": 0.3898, "step": 46936 }, { "epoch": 2.628345839399709, "grad_norm": 0.9917104244232178, "learning_rate": 9.291631578947369e-05, "loss": 0.3579, "step": 46937 }, { "epoch": 2.628401836711838, "grad_norm": 0.9514667987823486, "learning_rate": 9.291605263157895e-05, "loss": 0.3832, "step": 46938 }, { "epoch": 2.628457834023967, "grad_norm": 1.1570740938186646, "learning_rate": 9.291578947368421e-05, "loss": 0.46, "step": 46939 }, { "epoch": 2.628513831336096, "grad_norm": 1.1597615480422974, "learning_rate": 9.291552631578948e-05, "loss": 0.4247, "step": 46940 }, { "epoch": 2.628569828648225, "grad_norm": 1.1936694383621216, "learning_rate": 9.291526315789474e-05, "loss": 0.4533, "step": 46941 }, { "epoch": 2.628625825960354, "grad_norm": 1.2868821620941162, "learning_rate": 9.291500000000001e-05, "loss": 0.359, "step": 46942 }, { "epoch": 2.628681823272483, "grad_norm": 1.1153076887130737, "learning_rate": 9.291473684210526e-05, "loss": 0.3036, "step": 46943 }, { "epoch": 2.628737820584612, "grad_norm": 1.0310460329055786, "learning_rate": 9.291447368421053e-05, "loss": 0.4035, "step": 46944 }, { "epoch": 2.628793817896741, "grad_norm": 1.1082276105880737, "learning_rate": 9.29142105263158e-05, "loss": 0.5377, "step": 46945 }, { "epoch": 2.62884981520887, "grad_norm": 1.0089575052261353, "learning_rate": 9.291394736842107e-05, "loss": 0.306, "step": 46946 }, { "epoch": 2.628905812520999, "grad_norm": 1.1472532749176025, "learning_rate": 9.291368421052631e-05, "loss": 0.3529, "step": 46947 }, { "epoch": 2.628961809833128, "grad_norm": 1.1784005165100098, "learning_rate": 9.291342105263159e-05, "loss": 0.4072, "step": 46948 }, { "epoch": 2.629017807145257, "grad_norm": 1.0315799713134766, "learning_rate": 9.291315789473685e-05, "loss": 0.4009, "step": 46949 }, { "epoch": 2.629073804457386, "grad_norm": 1.257932186126709, "learning_rate": 9.29128947368421e-05, "loss": 0.4242, "step": 46950 }, { "epoch": 2.629129801769515, "grad_norm": 1.4812002182006836, "learning_rate": 9.291263157894738e-05, "loss": 0.4213, "step": 46951 }, { "epoch": 2.629185799081644, "grad_norm": 1.1574113368988037, "learning_rate": 9.291236842105263e-05, "loss": 0.4464, "step": 46952 }, { "epoch": 2.629241796393773, "grad_norm": 1.3412389755249023, "learning_rate": 9.29121052631579e-05, "loss": 0.4203, "step": 46953 }, { "epoch": 2.629297793705902, "grad_norm": 1.7697086334228516, "learning_rate": 9.291184210526316e-05, "loss": 0.3715, "step": 46954 }, { "epoch": 2.629353791018031, "grad_norm": 1.2484581470489502, "learning_rate": 9.291157894736843e-05, "loss": 0.4023, "step": 46955 }, { "epoch": 2.62940978833016, "grad_norm": 1.1666266918182373, "learning_rate": 9.291131578947369e-05, "loss": 0.408, "step": 46956 }, { "epoch": 2.629465785642289, "grad_norm": 1.3479361534118652, "learning_rate": 9.291105263157895e-05, "loss": 0.4975, "step": 46957 }, { "epoch": 2.629521782954418, "grad_norm": 1.1643496751785278, "learning_rate": 9.291078947368421e-05, "loss": 0.4184, "step": 46958 }, { "epoch": 2.629577780266547, "grad_norm": 1.085070252418518, "learning_rate": 9.291052631578948e-05, "loss": 0.3462, "step": 46959 }, { "epoch": 2.6296337775786762, "grad_norm": 1.0632199048995972, "learning_rate": 9.291026315789474e-05, "loss": 0.4678, "step": 46960 }, { "epoch": 2.6296897748908052, "grad_norm": 1.7379429340362549, "learning_rate": 9.291e-05, "loss": 0.5269, "step": 46961 }, { "epoch": 2.6297457722029343, "grad_norm": 1.1484042406082153, "learning_rate": 9.290973684210526e-05, "loss": 0.3737, "step": 46962 }, { "epoch": 2.6298017695150633, "grad_norm": 1.0907198190689087, "learning_rate": 9.290947368421054e-05, "loss": 0.3051, "step": 46963 }, { "epoch": 2.6298577668271923, "grad_norm": 1.4166656732559204, "learning_rate": 9.29092105263158e-05, "loss": 0.4188, "step": 46964 }, { "epoch": 2.6299137641393213, "grad_norm": 1.0852526426315308, "learning_rate": 9.290894736842106e-05, "loss": 0.4816, "step": 46965 }, { "epoch": 2.6299697614514503, "grad_norm": 1.251293420791626, "learning_rate": 9.290868421052632e-05, "loss": 0.5106, "step": 46966 }, { "epoch": 2.6300257587635794, "grad_norm": 1.3656463623046875, "learning_rate": 9.290842105263158e-05, "loss": 0.5036, "step": 46967 }, { "epoch": 2.6300817560757084, "grad_norm": 1.1081115007400513, "learning_rate": 9.290815789473685e-05, "loss": 0.433, "step": 46968 }, { "epoch": 2.6301377533878374, "grad_norm": 1.1234749555587769, "learning_rate": 9.290789473684211e-05, "loss": 0.3871, "step": 46969 }, { "epoch": 2.6301937506999664, "grad_norm": 1.28562331199646, "learning_rate": 9.290763157894737e-05, "loss": 0.5969, "step": 46970 }, { "epoch": 2.6302497480120954, "grad_norm": 1.0516748428344727, "learning_rate": 9.290736842105263e-05, "loss": 0.3274, "step": 46971 }, { "epoch": 2.6303057453242245, "grad_norm": 2.0585405826568604, "learning_rate": 9.29071052631579e-05, "loss": 0.4967, "step": 46972 }, { "epoch": 2.6303617426363535, "grad_norm": 1.1156389713287354, "learning_rate": 9.290684210526316e-05, "loss": 0.3613, "step": 46973 }, { "epoch": 2.6304177399484825, "grad_norm": 1.154118299484253, "learning_rate": 9.290657894736843e-05, "loss": 0.3427, "step": 46974 }, { "epoch": 2.6304737372606115, "grad_norm": 1.2301157712936401, "learning_rate": 9.290631578947368e-05, "loss": 0.4436, "step": 46975 }, { "epoch": 2.6305297345727405, "grad_norm": 5.588713645935059, "learning_rate": 9.290605263157895e-05, "loss": 0.3457, "step": 46976 }, { "epoch": 2.6305857318848695, "grad_norm": 1.3277336359024048, "learning_rate": 9.290578947368421e-05, "loss": 0.4595, "step": 46977 }, { "epoch": 2.6306417291969986, "grad_norm": 1.2962141036987305, "learning_rate": 9.290552631578949e-05, "loss": 0.4179, "step": 46978 }, { "epoch": 2.6306977265091276, "grad_norm": 1.1249175071716309, "learning_rate": 9.290526315789475e-05, "loss": 0.5147, "step": 46979 }, { "epoch": 2.6307537238212566, "grad_norm": 1.299116611480713, "learning_rate": 9.2905e-05, "loss": 0.4371, "step": 46980 }, { "epoch": 2.6308097211333856, "grad_norm": 1.0215312242507935, "learning_rate": 9.290473684210527e-05, "loss": 0.3774, "step": 46981 }, { "epoch": 2.6308657184455146, "grad_norm": 1.327256679534912, "learning_rate": 9.290447368421054e-05, "loss": 0.3434, "step": 46982 }, { "epoch": 2.6309217157576437, "grad_norm": 1.212263584136963, "learning_rate": 9.29042105263158e-05, "loss": 0.4863, "step": 46983 }, { "epoch": 2.6309777130697727, "grad_norm": 1.2237985134124756, "learning_rate": 9.290394736842106e-05, "loss": 0.3657, "step": 46984 }, { "epoch": 2.6310337103819017, "grad_norm": 1.1181721687316895, "learning_rate": 9.290368421052632e-05, "loss": 0.4082, "step": 46985 }, { "epoch": 2.6310897076940307, "grad_norm": 1.048292636871338, "learning_rate": 9.290342105263158e-05, "loss": 0.3604, "step": 46986 }, { "epoch": 2.6311457050061597, "grad_norm": 1.0829049348831177, "learning_rate": 9.290315789473685e-05, "loss": 0.4133, "step": 46987 }, { "epoch": 2.6312017023182888, "grad_norm": 1.189784049987793, "learning_rate": 9.290289473684211e-05, "loss": 0.3572, "step": 46988 }, { "epoch": 2.631257699630418, "grad_norm": 1.4107248783111572, "learning_rate": 9.290263157894737e-05, "loss": 0.4099, "step": 46989 }, { "epoch": 2.631313696942547, "grad_norm": 1.120826244354248, "learning_rate": 9.290236842105263e-05, "loss": 0.4595, "step": 46990 }, { "epoch": 2.631369694254676, "grad_norm": 1.0824512243270874, "learning_rate": 9.29021052631579e-05, "loss": 0.3438, "step": 46991 }, { "epoch": 2.631425691566805, "grad_norm": 1.2773828506469727, "learning_rate": 9.290184210526316e-05, "loss": 0.4036, "step": 46992 }, { "epoch": 2.631481688878934, "grad_norm": 1.039052128791809, "learning_rate": 9.290157894736842e-05, "loss": 0.4689, "step": 46993 }, { "epoch": 2.631537686191063, "grad_norm": 1.19552743434906, "learning_rate": 9.290131578947368e-05, "loss": 0.3705, "step": 46994 }, { "epoch": 2.631593683503192, "grad_norm": 1.3612709045410156, "learning_rate": 9.290105263157896e-05, "loss": 0.4747, "step": 46995 }, { "epoch": 2.631649680815321, "grad_norm": 1.4303760528564453, "learning_rate": 9.290078947368422e-05, "loss": 0.4465, "step": 46996 }, { "epoch": 2.63170567812745, "grad_norm": 1.2032630443572998, "learning_rate": 9.290052631578949e-05, "loss": 0.4565, "step": 46997 }, { "epoch": 2.631761675439579, "grad_norm": 1.4642654657363892, "learning_rate": 9.290026315789474e-05, "loss": 0.4018, "step": 46998 }, { "epoch": 2.631817672751708, "grad_norm": 1.0528929233551025, "learning_rate": 9.290000000000001e-05, "loss": 0.3331, "step": 46999 }, { "epoch": 2.631873670063837, "grad_norm": 1.539127230644226, "learning_rate": 9.289973684210527e-05, "loss": 0.4537, "step": 47000 }, { "epoch": 2.631929667375966, "grad_norm": 1.1344317197799683, "learning_rate": 9.289947368421054e-05, "loss": 0.3488, "step": 47001 }, { "epoch": 2.631985664688095, "grad_norm": 1.1277116537094116, "learning_rate": 9.289921052631579e-05, "loss": 0.3822, "step": 47002 }, { "epoch": 2.632041662000224, "grad_norm": 0.9893087148666382, "learning_rate": 9.289894736842105e-05, "loss": 0.3022, "step": 47003 }, { "epoch": 2.632097659312353, "grad_norm": 1.4053288698196411, "learning_rate": 9.289868421052632e-05, "loss": 0.4629, "step": 47004 }, { "epoch": 2.632153656624482, "grad_norm": 1.2331140041351318, "learning_rate": 9.289842105263158e-05, "loss": 0.4773, "step": 47005 }, { "epoch": 2.632209653936611, "grad_norm": 1.2761956453323364, "learning_rate": 9.289815789473685e-05, "loss": 0.4183, "step": 47006 }, { "epoch": 2.63226565124874, "grad_norm": 1.1187763214111328, "learning_rate": 9.28978947368421e-05, "loss": 0.4508, "step": 47007 }, { "epoch": 2.632321648560869, "grad_norm": 0.9472700953483582, "learning_rate": 9.289763157894737e-05, "loss": 0.3241, "step": 47008 }, { "epoch": 2.632377645872998, "grad_norm": 1.1563911437988281, "learning_rate": 9.289736842105263e-05, "loss": 0.4682, "step": 47009 }, { "epoch": 2.632433643185127, "grad_norm": 1.0195175409317017, "learning_rate": 9.28971052631579e-05, "loss": 0.3863, "step": 47010 }, { "epoch": 2.632489640497256, "grad_norm": 1.56032395362854, "learning_rate": 9.289684210526317e-05, "loss": 0.426, "step": 47011 }, { "epoch": 2.632545637809385, "grad_norm": 1.059169054031372, "learning_rate": 9.289657894736843e-05, "loss": 0.3269, "step": 47012 }, { "epoch": 2.6326016351215142, "grad_norm": 1.08601713180542, "learning_rate": 9.289631578947369e-05, "loss": 0.4132, "step": 47013 }, { "epoch": 2.6326576324336433, "grad_norm": 1.2722054719924927, "learning_rate": 9.289605263157896e-05, "loss": 0.337, "step": 47014 }, { "epoch": 2.6327136297457723, "grad_norm": 0.9643957018852234, "learning_rate": 9.289578947368422e-05, "loss": 0.3015, "step": 47015 }, { "epoch": 2.6327696270579013, "grad_norm": 1.1986796855926514, "learning_rate": 9.289552631578948e-05, "loss": 0.4105, "step": 47016 }, { "epoch": 2.6328256243700303, "grad_norm": 1.449905276298523, "learning_rate": 9.289526315789474e-05, "loss": 0.4174, "step": 47017 }, { "epoch": 2.6328816216821593, "grad_norm": 1.181523323059082, "learning_rate": 9.289500000000001e-05, "loss": 0.326, "step": 47018 }, { "epoch": 2.6329376189942884, "grad_norm": 1.2053459882736206, "learning_rate": 9.289473684210527e-05, "loss": 0.4061, "step": 47019 }, { "epoch": 2.6329936163064174, "grad_norm": 1.5102391242980957, "learning_rate": 9.289447368421053e-05, "loss": 0.6364, "step": 47020 }, { "epoch": 2.6330496136185464, "grad_norm": 1.1346380710601807, "learning_rate": 9.289421052631579e-05, "loss": 0.3587, "step": 47021 }, { "epoch": 2.6331056109306754, "grad_norm": 1.4418588876724243, "learning_rate": 9.289394736842105e-05, "loss": 0.3857, "step": 47022 }, { "epoch": 2.6331616082428044, "grad_norm": 0.9776993989944458, "learning_rate": 9.289368421052632e-05, "loss": 0.3339, "step": 47023 }, { "epoch": 2.6332176055549334, "grad_norm": 1.1112648248672485, "learning_rate": 9.289342105263158e-05, "loss": 0.3305, "step": 47024 }, { "epoch": 2.6332736028670625, "grad_norm": 1.172960877418518, "learning_rate": 9.289315789473684e-05, "loss": 0.4012, "step": 47025 }, { "epoch": 2.6333296001791915, "grad_norm": 1.1801588535308838, "learning_rate": 9.28928947368421e-05, "loss": 0.3398, "step": 47026 }, { "epoch": 2.6333855974913205, "grad_norm": 1.0496631860733032, "learning_rate": 9.289263157894738e-05, "loss": 0.3505, "step": 47027 }, { "epoch": 2.6334415948034495, "grad_norm": 1.0360312461853027, "learning_rate": 9.289236842105264e-05, "loss": 0.382, "step": 47028 }, { "epoch": 2.6334975921155785, "grad_norm": 1.087490200996399, "learning_rate": 9.289210526315791e-05, "loss": 0.4375, "step": 47029 }, { "epoch": 2.6335535894277076, "grad_norm": 1.371126413345337, "learning_rate": 9.289184210526315e-05, "loss": 0.4592, "step": 47030 }, { "epoch": 2.6336095867398366, "grad_norm": 1.1268630027770996, "learning_rate": 9.289157894736843e-05, "loss": 0.4559, "step": 47031 }, { "epoch": 2.6336655840519656, "grad_norm": 1.2383745908737183, "learning_rate": 9.289131578947369e-05, "loss": 0.4004, "step": 47032 }, { "epoch": 2.6337215813640946, "grad_norm": 1.1104247570037842, "learning_rate": 9.289105263157896e-05, "loss": 0.4466, "step": 47033 }, { "epoch": 2.6337775786762236, "grad_norm": 1.1377804279327393, "learning_rate": 9.289078947368422e-05, "loss": 0.4069, "step": 47034 }, { "epoch": 2.6338335759883527, "grad_norm": 1.3107728958129883, "learning_rate": 9.289052631578948e-05, "loss": 0.6453, "step": 47035 }, { "epoch": 2.6338895733004817, "grad_norm": 1.147043228149414, "learning_rate": 9.289026315789474e-05, "loss": 0.4879, "step": 47036 }, { "epoch": 2.6339455706126107, "grad_norm": 1.217816948890686, "learning_rate": 9.289e-05, "loss": 0.3928, "step": 47037 }, { "epoch": 2.6340015679247397, "grad_norm": 1.022105097770691, "learning_rate": 9.288973684210527e-05, "loss": 0.4093, "step": 47038 }, { "epoch": 2.6340575652368687, "grad_norm": 1.2954506874084473, "learning_rate": 9.288947368421052e-05, "loss": 0.4198, "step": 47039 }, { "epoch": 2.6341135625489978, "grad_norm": 1.022426724433899, "learning_rate": 9.288921052631579e-05, "loss": 0.3374, "step": 47040 }, { "epoch": 2.6341695598611268, "grad_norm": 1.114538311958313, "learning_rate": 9.288894736842105e-05, "loss": 0.3343, "step": 47041 }, { "epoch": 2.634225557173256, "grad_norm": 1.1801730394363403, "learning_rate": 9.288868421052633e-05, "loss": 0.3972, "step": 47042 }, { "epoch": 2.634281554485385, "grad_norm": 1.5636847019195557, "learning_rate": 9.288842105263159e-05, "loss": 0.4531, "step": 47043 }, { "epoch": 2.634337551797514, "grad_norm": 1.074082851409912, "learning_rate": 9.288815789473685e-05, "loss": 0.5799, "step": 47044 }, { "epoch": 2.634393549109643, "grad_norm": 1.2119306325912476, "learning_rate": 9.28878947368421e-05, "loss": 0.3589, "step": 47045 }, { "epoch": 2.634449546421772, "grad_norm": 1.1573002338409424, "learning_rate": 9.288763157894738e-05, "loss": 0.3342, "step": 47046 }, { "epoch": 2.634505543733901, "grad_norm": 1.228115439414978, "learning_rate": 9.288736842105264e-05, "loss": 0.3552, "step": 47047 }, { "epoch": 2.63456154104603, "grad_norm": 1.1376709938049316, "learning_rate": 9.28871052631579e-05, "loss": 0.459, "step": 47048 }, { "epoch": 2.634617538358159, "grad_norm": 1.2990806102752686, "learning_rate": 9.288684210526316e-05, "loss": 0.3231, "step": 47049 }, { "epoch": 2.634673535670288, "grad_norm": 1.468453049659729, "learning_rate": 9.288657894736843e-05, "loss": 0.272, "step": 47050 }, { "epoch": 2.634729532982417, "grad_norm": 1.0406954288482666, "learning_rate": 9.288631578947369e-05, "loss": 0.3387, "step": 47051 }, { "epoch": 2.634785530294546, "grad_norm": 1.073775053024292, "learning_rate": 9.288605263157896e-05, "loss": 0.293, "step": 47052 }, { "epoch": 2.634841527606675, "grad_norm": 1.3651704788208008, "learning_rate": 9.288578947368421e-05, "loss": 0.4932, "step": 47053 }, { "epoch": 2.634897524918804, "grad_norm": 1.257690668106079, "learning_rate": 9.288552631578947e-05, "loss": 0.5209, "step": 47054 }, { "epoch": 2.634953522230933, "grad_norm": 1.138351321220398, "learning_rate": 9.288526315789474e-05, "loss": 0.4218, "step": 47055 }, { "epoch": 2.635009519543062, "grad_norm": 1.2450518608093262, "learning_rate": 9.2885e-05, "loss": 0.5495, "step": 47056 }, { "epoch": 2.635065516855191, "grad_norm": Infinity, "learning_rate": 9.2885e-05, "loss": 0.3253, "step": 47057 }, { "epoch": 2.63512151416732, "grad_norm": 1.440476655960083, "learning_rate": 9.288473684210526e-05, "loss": 0.4147, "step": 47058 }, { "epoch": 2.635177511479449, "grad_norm": 1.3521078824996948, "learning_rate": 9.288447368421052e-05, "loss": 0.5, "step": 47059 }, { "epoch": 2.635233508791578, "grad_norm": 1.2027658224105835, "learning_rate": 9.28842105263158e-05, "loss": 0.4045, "step": 47060 }, { "epoch": 2.635289506103707, "grad_norm": 1.1683554649353027, "learning_rate": 9.288394736842106e-05, "loss": 0.422, "step": 47061 }, { "epoch": 2.635345503415836, "grad_norm": 1.333648920059204, "learning_rate": 9.288368421052633e-05, "loss": 0.5873, "step": 47062 }, { "epoch": 2.635401500727965, "grad_norm": 1.1779141426086426, "learning_rate": 9.288342105263157e-05, "loss": 0.3586, "step": 47063 }, { "epoch": 2.635457498040094, "grad_norm": 1.0689085721969604, "learning_rate": 9.288315789473685e-05, "loss": 0.3111, "step": 47064 }, { "epoch": 2.6355134953522232, "grad_norm": 1.0622735023498535, "learning_rate": 9.288289473684211e-05, "loss": 0.3866, "step": 47065 }, { "epoch": 2.6355694926643523, "grad_norm": 1.306572437286377, "learning_rate": 9.288263157894738e-05, "loss": 0.4758, "step": 47066 }, { "epoch": 2.6356254899764813, "grad_norm": 1.3974851369857788, "learning_rate": 9.288236842105264e-05, "loss": 0.4602, "step": 47067 }, { "epoch": 2.6356814872886103, "grad_norm": 1.0201671123504639, "learning_rate": 9.28821052631579e-05, "loss": 0.3692, "step": 47068 }, { "epoch": 2.6357374846007393, "grad_norm": 1.1542476415634155, "learning_rate": 9.288184210526316e-05, "loss": 0.3613, "step": 47069 }, { "epoch": 2.6357934819128683, "grad_norm": 1.2347222566604614, "learning_rate": 9.288157894736843e-05, "loss": 0.5051, "step": 47070 }, { "epoch": 2.6358494792249973, "grad_norm": 1.1261557340621948, "learning_rate": 9.288131578947369e-05, "loss": 0.3834, "step": 47071 }, { "epoch": 2.6359054765371264, "grad_norm": 1.3250292539596558, "learning_rate": 9.288105263157895e-05, "loss": 0.4385, "step": 47072 }, { "epoch": 2.6359614738492554, "grad_norm": 1.7122801542282104, "learning_rate": 9.288078947368421e-05, "loss": 0.4768, "step": 47073 }, { "epoch": 2.6360174711613844, "grad_norm": 1.144468069076538, "learning_rate": 9.288052631578947e-05, "loss": 0.3499, "step": 47074 }, { "epoch": 2.6360734684735134, "grad_norm": 1.2208809852600098, "learning_rate": 9.288026315789475e-05, "loss": 0.3005, "step": 47075 }, { "epoch": 2.6361294657856424, "grad_norm": 1.7636222839355469, "learning_rate": 9.288e-05, "loss": 0.435, "step": 47076 }, { "epoch": 2.6361854630977715, "grad_norm": 1.6449847221374512, "learning_rate": 9.287973684210526e-05, "loss": 0.2522, "step": 47077 }, { "epoch": 2.6362414604099005, "grad_norm": 1.0474190711975098, "learning_rate": 9.287947368421052e-05, "loss": 0.3655, "step": 47078 }, { "epoch": 2.6362974577220295, "grad_norm": 1.1536836624145508, "learning_rate": 9.28792105263158e-05, "loss": 0.4376, "step": 47079 }, { "epoch": 2.6363534550341585, "grad_norm": 1.1514642238616943, "learning_rate": 9.287894736842106e-05, "loss": 0.3202, "step": 47080 }, { "epoch": 2.6364094523462875, "grad_norm": 1.098402500152588, "learning_rate": 9.287868421052632e-05, "loss": 0.3741, "step": 47081 }, { "epoch": 2.6364654496584166, "grad_norm": 1.1826136112213135, "learning_rate": 9.287842105263158e-05, "loss": 0.4155, "step": 47082 }, { "epoch": 2.6365214469705456, "grad_norm": 1.149810552597046, "learning_rate": 9.287815789473685e-05, "loss": 0.4187, "step": 47083 }, { "epoch": 2.6365774442826746, "grad_norm": 1.220630168914795, "learning_rate": 9.287789473684211e-05, "loss": 0.3424, "step": 47084 }, { "epoch": 2.636633441594803, "grad_norm": 1.2126140594482422, "learning_rate": 9.287763157894738e-05, "loss": 0.4373, "step": 47085 }, { "epoch": 2.6366894389069326, "grad_norm": 1.2777427434921265, "learning_rate": 9.287736842105263e-05, "loss": 0.598, "step": 47086 }, { "epoch": 2.636745436219061, "grad_norm": 1.172796607017517, "learning_rate": 9.28771052631579e-05, "loss": 0.4064, "step": 47087 }, { "epoch": 2.6368014335311907, "grad_norm": 1.424055576324463, "learning_rate": 9.287684210526316e-05, "loss": 0.4675, "step": 47088 }, { "epoch": 2.6368574308433192, "grad_norm": 1.1924517154693604, "learning_rate": 9.287657894736842e-05, "loss": 0.3833, "step": 47089 }, { "epoch": 2.6369134281554487, "grad_norm": 1.1879324913024902, "learning_rate": 9.28763157894737e-05, "loss": 0.34, "step": 47090 }, { "epoch": 2.6369694254675773, "grad_norm": 1.2103761434555054, "learning_rate": 9.287605263157894e-05, "loss": 0.4749, "step": 47091 }, { "epoch": 2.6370254227797068, "grad_norm": 0.9686588048934937, "learning_rate": 9.287578947368422e-05, "loss": 0.3235, "step": 47092 }, { "epoch": 2.6370814200918353, "grad_norm": 1.100850224494934, "learning_rate": 9.287552631578947e-05, "loss": 0.3143, "step": 47093 }, { "epoch": 2.637137417403965, "grad_norm": 1.1561366319656372, "learning_rate": 9.287526315789475e-05, "loss": 0.3784, "step": 47094 }, { "epoch": 2.6371934147160934, "grad_norm": 1.2292882204055786, "learning_rate": 9.2875e-05, "loss": 0.3818, "step": 47095 }, { "epoch": 2.637249412028223, "grad_norm": 1.344846248626709, "learning_rate": 9.287473684210527e-05, "loss": 0.3279, "step": 47096 }, { "epoch": 2.6373054093403514, "grad_norm": 1.0718538761138916, "learning_rate": 9.287447368421053e-05, "loss": 0.3904, "step": 47097 }, { "epoch": 2.637361406652481, "grad_norm": 1.2847731113433838, "learning_rate": 9.28742105263158e-05, "loss": 0.4544, "step": 47098 }, { "epoch": 2.6374174039646094, "grad_norm": 1.1354014873504639, "learning_rate": 9.287394736842106e-05, "loss": 0.4271, "step": 47099 }, { "epoch": 2.637473401276739, "grad_norm": 1.6589752435684204, "learning_rate": 9.287368421052632e-05, "loss": 0.4755, "step": 47100 }, { "epoch": 2.6375293985888675, "grad_norm": 1.148756742477417, "learning_rate": 9.287342105263158e-05, "loss": 0.4227, "step": 47101 }, { "epoch": 2.637585395900997, "grad_norm": 1.616754412651062, "learning_rate": 9.287315789473685e-05, "loss": 0.3574, "step": 47102 }, { "epoch": 2.6376413932131255, "grad_norm": 1.1399976015090942, "learning_rate": 9.287289473684211e-05, "loss": 0.4349, "step": 47103 }, { "epoch": 2.637697390525255, "grad_norm": 1.0837591886520386, "learning_rate": 9.287263157894737e-05, "loss": 0.4025, "step": 47104 }, { "epoch": 2.6377533878373836, "grad_norm": 1.160154104232788, "learning_rate": 9.287236842105263e-05, "loss": 0.3851, "step": 47105 }, { "epoch": 2.637809385149513, "grad_norm": 1.4484425783157349, "learning_rate": 9.28721052631579e-05, "loss": 0.5181, "step": 47106 }, { "epoch": 2.6378653824616416, "grad_norm": 1.1300444602966309, "learning_rate": 9.287184210526317e-05, "loss": 0.3668, "step": 47107 }, { "epoch": 2.637921379773771, "grad_norm": 1.387725591659546, "learning_rate": 9.287157894736842e-05, "loss": 0.4759, "step": 47108 }, { "epoch": 2.6379773770858996, "grad_norm": 1.018106460571289, "learning_rate": 9.287131578947368e-05, "loss": 0.3314, "step": 47109 }, { "epoch": 2.638033374398029, "grad_norm": 1.1239522695541382, "learning_rate": 9.287105263157894e-05, "loss": 0.4261, "step": 47110 }, { "epoch": 2.6380893717101577, "grad_norm": 1.288457989692688, "learning_rate": 9.287078947368422e-05, "loss": 0.3915, "step": 47111 }, { "epoch": 2.638145369022287, "grad_norm": 1.1860618591308594, "learning_rate": 9.287052631578948e-05, "loss": 0.3751, "step": 47112 }, { "epoch": 2.6382013663344157, "grad_norm": 1.0909477472305298, "learning_rate": 9.287026315789474e-05, "loss": 0.395, "step": 47113 }, { "epoch": 2.638257363646545, "grad_norm": 1.2550222873687744, "learning_rate": 9.287e-05, "loss": 0.3579, "step": 47114 }, { "epoch": 2.6383133609586737, "grad_norm": 1.2246642112731934, "learning_rate": 9.286973684210527e-05, "loss": 0.4079, "step": 47115 }, { "epoch": 2.638369358270803, "grad_norm": 1.1546550989151, "learning_rate": 9.286947368421053e-05, "loss": 0.3901, "step": 47116 }, { "epoch": 2.638425355582932, "grad_norm": 1.117661476135254, "learning_rate": 9.28692105263158e-05, "loss": 0.3684, "step": 47117 }, { "epoch": 2.6384813528950612, "grad_norm": 1.0887564420700073, "learning_rate": 9.286894736842105e-05, "loss": 0.3612, "step": 47118 }, { "epoch": 2.63853735020719, "grad_norm": 1.2764697074890137, "learning_rate": 9.286868421052632e-05, "loss": 0.5214, "step": 47119 }, { "epoch": 2.6385933475193193, "grad_norm": 1.1876147985458374, "learning_rate": 9.286842105263158e-05, "loss": 0.3824, "step": 47120 }, { "epoch": 2.638649344831448, "grad_norm": 1.1166566610336304, "learning_rate": 9.286815789473686e-05, "loss": 0.3498, "step": 47121 }, { "epoch": 2.6387053421435773, "grad_norm": 1.0656471252441406, "learning_rate": 9.286789473684212e-05, "loss": 0.3418, "step": 47122 }, { "epoch": 2.638761339455706, "grad_norm": 1.378637671470642, "learning_rate": 9.286763157894738e-05, "loss": 0.3847, "step": 47123 }, { "epoch": 2.6388173367678354, "grad_norm": 1.19815194606781, "learning_rate": 9.286736842105263e-05, "loss": 0.3878, "step": 47124 }, { "epoch": 2.638873334079964, "grad_norm": 1.042303442955017, "learning_rate": 9.28671052631579e-05, "loss": 0.4004, "step": 47125 }, { "epoch": 2.6389293313920934, "grad_norm": 1.1364494562149048, "learning_rate": 9.286684210526317e-05, "loss": 0.4205, "step": 47126 }, { "epoch": 2.638985328704222, "grad_norm": 1.3343371152877808, "learning_rate": 9.286657894736843e-05, "loss": 0.3113, "step": 47127 }, { "epoch": 2.6390413260163514, "grad_norm": 1.4137060642242432, "learning_rate": 9.286631578947369e-05, "loss": 0.378, "step": 47128 }, { "epoch": 2.63909732332848, "grad_norm": 1.2229394912719727, "learning_rate": 9.286605263157895e-05, "loss": 0.4349, "step": 47129 }, { "epoch": 2.6391533206406095, "grad_norm": 1.1136841773986816, "learning_rate": 9.286578947368422e-05, "loss": 0.2989, "step": 47130 }, { "epoch": 2.639209317952738, "grad_norm": 1.4584095478057861, "learning_rate": 9.286552631578948e-05, "loss": 0.5938, "step": 47131 }, { "epoch": 2.6392653152648675, "grad_norm": 1.0897856950759888, "learning_rate": 9.286526315789474e-05, "loss": 0.4156, "step": 47132 }, { "epoch": 2.639321312576996, "grad_norm": 1.0507068634033203, "learning_rate": 9.2865e-05, "loss": 0.3409, "step": 47133 }, { "epoch": 2.6393773098891256, "grad_norm": 1.0902528762817383, "learning_rate": 9.286473684210527e-05, "loss": 0.3685, "step": 47134 }, { "epoch": 2.639433307201254, "grad_norm": 1.1191939115524292, "learning_rate": 9.286447368421053e-05, "loss": 0.3469, "step": 47135 }, { "epoch": 2.6394893045133836, "grad_norm": 1.2916542291641235, "learning_rate": 9.286421052631579e-05, "loss": 0.3909, "step": 47136 }, { "epoch": 2.639545301825512, "grad_norm": 1.1472058296203613, "learning_rate": 9.286394736842105e-05, "loss": 0.3312, "step": 47137 }, { "epoch": 2.6396012991376416, "grad_norm": 1.1692471504211426, "learning_rate": 9.286368421052633e-05, "loss": 0.4288, "step": 47138 }, { "epoch": 2.63965729644977, "grad_norm": 1.3263696432113647, "learning_rate": 9.286342105263158e-05, "loss": 0.3914, "step": 47139 }, { "epoch": 2.6397132937618997, "grad_norm": 1.0097308158874512, "learning_rate": 9.286315789473686e-05, "loss": 0.4063, "step": 47140 }, { "epoch": 2.6397692910740282, "grad_norm": 1.333343505859375, "learning_rate": 9.28628947368421e-05, "loss": 0.4624, "step": 47141 }, { "epoch": 2.6398252883861577, "grad_norm": 0.9409430027008057, "learning_rate": 9.286263157894736e-05, "loss": 0.399, "step": 47142 }, { "epoch": 2.6398812856982863, "grad_norm": 1.144340991973877, "learning_rate": 9.286236842105264e-05, "loss": 0.371, "step": 47143 }, { "epoch": 2.6399372830104157, "grad_norm": 1.2190780639648438, "learning_rate": 9.28621052631579e-05, "loss": 0.3923, "step": 47144 }, { "epoch": 2.6399932803225443, "grad_norm": 1.3309259414672852, "learning_rate": 9.286184210526317e-05, "loss": 0.4158, "step": 47145 }, { "epoch": 2.640049277634674, "grad_norm": 1.1447874307632446, "learning_rate": 9.286157894736842e-05, "loss": 0.4537, "step": 47146 }, { "epoch": 2.6401052749468024, "grad_norm": 1.3446931838989258, "learning_rate": 9.286131578947369e-05, "loss": 0.4532, "step": 47147 }, { "epoch": 2.640161272258932, "grad_norm": 1.4104149341583252, "learning_rate": 9.286105263157895e-05, "loss": 0.4165, "step": 47148 }, { "epoch": 2.6402172695710604, "grad_norm": 1.412948727607727, "learning_rate": 9.286078947368422e-05, "loss": 0.5424, "step": 47149 }, { "epoch": 2.64027326688319, "grad_norm": 1.2615450620651245, "learning_rate": 9.286052631578947e-05, "loss": 0.4352, "step": 47150 }, { "epoch": 2.6403292641953184, "grad_norm": 1.1721049547195435, "learning_rate": 9.286026315789474e-05, "loss": 0.3884, "step": 47151 }, { "epoch": 2.640385261507448, "grad_norm": 1.5796034336090088, "learning_rate": 9.286e-05, "loss": 0.5045, "step": 47152 }, { "epoch": 2.6404412588195765, "grad_norm": 1.7254384756088257, "learning_rate": 9.285973684210528e-05, "loss": 0.4435, "step": 47153 }, { "epoch": 2.640497256131706, "grad_norm": 1.2196675539016724, "learning_rate": 9.285947368421054e-05, "loss": 0.3874, "step": 47154 }, { "epoch": 2.6405532534438345, "grad_norm": 1.4416377544403076, "learning_rate": 9.28592105263158e-05, "loss": 0.4471, "step": 47155 }, { "epoch": 2.640609250755964, "grad_norm": 1.1695175170898438, "learning_rate": 9.285894736842105e-05, "loss": 0.3903, "step": 47156 }, { "epoch": 2.6406652480680926, "grad_norm": 1.232383131980896, "learning_rate": 9.285868421052633e-05, "loss": 0.4447, "step": 47157 }, { "epoch": 2.640721245380222, "grad_norm": 1.3042259216308594, "learning_rate": 9.285842105263159e-05, "loss": 0.4469, "step": 47158 }, { "epoch": 2.6407772426923506, "grad_norm": 1.0352526903152466, "learning_rate": 9.285815789473685e-05, "loss": 0.4154, "step": 47159 }, { "epoch": 2.64083324000448, "grad_norm": 1.2063462734222412, "learning_rate": 9.285789473684211e-05, "loss": 0.3043, "step": 47160 }, { "epoch": 2.6408892373166086, "grad_norm": 2.4879558086395264, "learning_rate": 9.285763157894737e-05, "loss": 0.5878, "step": 47161 }, { "epoch": 2.640945234628738, "grad_norm": 1.1312165260314941, "learning_rate": 9.285736842105264e-05, "loss": 0.3508, "step": 47162 }, { "epoch": 2.6410012319408667, "grad_norm": 1.5276029109954834, "learning_rate": 9.28571052631579e-05, "loss": 0.406, "step": 47163 }, { "epoch": 2.641057229252996, "grad_norm": 1.3302150964736938, "learning_rate": 9.285684210526316e-05, "loss": 0.5139, "step": 47164 }, { "epoch": 2.6411132265651247, "grad_norm": 1.1590670347213745, "learning_rate": 9.285657894736842e-05, "loss": 0.3705, "step": 47165 }, { "epoch": 2.641169223877254, "grad_norm": 1.179772973060608, "learning_rate": 9.285631578947369e-05, "loss": 0.313, "step": 47166 }, { "epoch": 2.6412252211893827, "grad_norm": 1.2565361261367798, "learning_rate": 9.285605263157895e-05, "loss": 0.481, "step": 47167 }, { "epoch": 2.6412812185015118, "grad_norm": 1.1663827896118164, "learning_rate": 9.285578947368421e-05, "loss": 0.3625, "step": 47168 }, { "epoch": 2.641337215813641, "grad_norm": 1.2567182779312134, "learning_rate": 9.285552631578947e-05, "loss": 0.3578, "step": 47169 }, { "epoch": 2.64139321312577, "grad_norm": 1.1425122022628784, "learning_rate": 9.285526315789474e-05, "loss": 0.3073, "step": 47170 }, { "epoch": 2.641449210437899, "grad_norm": 1.2015430927276611, "learning_rate": 9.2855e-05, "loss": 0.3737, "step": 47171 }, { "epoch": 2.641505207750028, "grad_norm": 1.0744110345840454, "learning_rate": 9.285473684210528e-05, "loss": 0.3106, "step": 47172 }, { "epoch": 2.641561205062157, "grad_norm": 1.582350254058838, "learning_rate": 9.285447368421052e-05, "loss": 0.4133, "step": 47173 }, { "epoch": 2.641617202374286, "grad_norm": 1.3005602359771729, "learning_rate": 9.28542105263158e-05, "loss": 0.327, "step": 47174 }, { "epoch": 2.641673199686415, "grad_norm": 1.4784561395645142, "learning_rate": 9.285394736842106e-05, "loss": 0.4519, "step": 47175 }, { "epoch": 2.641729196998544, "grad_norm": 0.9423432350158691, "learning_rate": 9.285368421052632e-05, "loss": 0.3176, "step": 47176 }, { "epoch": 2.641785194310673, "grad_norm": 1.397945761680603, "learning_rate": 9.285342105263159e-05, "loss": 0.41, "step": 47177 }, { "epoch": 2.641841191622802, "grad_norm": 1.6130889654159546, "learning_rate": 9.285315789473684e-05, "loss": 0.4107, "step": 47178 }, { "epoch": 2.641897188934931, "grad_norm": 1.1258587837219238, "learning_rate": 9.285289473684211e-05, "loss": 0.3231, "step": 47179 }, { "epoch": 2.64195318624706, "grad_norm": 1.605398416519165, "learning_rate": 9.285263157894737e-05, "loss": 0.3592, "step": 47180 }, { "epoch": 2.642009183559189, "grad_norm": 1.09932541847229, "learning_rate": 9.285236842105264e-05, "loss": 0.3887, "step": 47181 }, { "epoch": 2.642065180871318, "grad_norm": 1.2176731824874878, "learning_rate": 9.28521052631579e-05, "loss": 0.3711, "step": 47182 }, { "epoch": 2.642121178183447, "grad_norm": 1.0824843645095825, "learning_rate": 9.285184210526316e-05, "loss": 0.4474, "step": 47183 }, { "epoch": 2.642177175495576, "grad_norm": 0.9846916794776917, "learning_rate": 9.285157894736842e-05, "loss": 0.327, "step": 47184 }, { "epoch": 2.642233172807705, "grad_norm": 1.218879222869873, "learning_rate": 9.28513157894737e-05, "loss": 0.423, "step": 47185 }, { "epoch": 2.642289170119834, "grad_norm": 1.1151536703109741, "learning_rate": 9.285105263157895e-05, "loss": 0.4367, "step": 47186 }, { "epoch": 2.642345167431963, "grad_norm": 1.048687219619751, "learning_rate": 9.285078947368421e-05, "loss": 0.3578, "step": 47187 }, { "epoch": 2.642401164744092, "grad_norm": 1.1753637790679932, "learning_rate": 9.285052631578947e-05, "loss": 0.3725, "step": 47188 }, { "epoch": 2.642457162056221, "grad_norm": 1.3802372217178345, "learning_rate": 9.285026315789475e-05, "loss": 0.4557, "step": 47189 }, { "epoch": 2.64251315936835, "grad_norm": 1.2999836206436157, "learning_rate": 9.285000000000001e-05, "loss": 0.2983, "step": 47190 }, { "epoch": 2.642569156680479, "grad_norm": 0.8757368922233582, "learning_rate": 9.284973684210527e-05, "loss": 0.38, "step": 47191 }, { "epoch": 2.6426251539926082, "grad_norm": 0.9733116626739502, "learning_rate": 9.284947368421053e-05, "loss": 0.3875, "step": 47192 }, { "epoch": 2.6426811513047372, "grad_norm": 1.4432021379470825, "learning_rate": 9.284921052631579e-05, "loss": 0.38, "step": 47193 }, { "epoch": 2.6427371486168663, "grad_norm": 1.0729929208755493, "learning_rate": 9.284894736842106e-05, "loss": 0.3593, "step": 47194 }, { "epoch": 2.6427931459289953, "grad_norm": 1.0462443828582764, "learning_rate": 9.284868421052632e-05, "loss": 0.3983, "step": 47195 }, { "epoch": 2.6428491432411243, "grad_norm": 1.0544977188110352, "learning_rate": 9.284842105263158e-05, "loss": 0.4023, "step": 47196 }, { "epoch": 2.6429051405532533, "grad_norm": 1.2211987972259521, "learning_rate": 9.284815789473684e-05, "loss": 0.329, "step": 47197 }, { "epoch": 2.6429611378653823, "grad_norm": 1.1181702613830566, "learning_rate": 9.284789473684211e-05, "loss": 0.4007, "step": 47198 }, { "epoch": 2.6430171351775114, "grad_norm": 1.0558886528015137, "learning_rate": 9.284763157894737e-05, "loss": 0.374, "step": 47199 }, { "epoch": 2.6430731324896404, "grad_norm": 1.1354585886001587, "learning_rate": 9.284736842105265e-05, "loss": 0.385, "step": 47200 }, { "epoch": 2.6431291298017694, "grad_norm": 1.1070374250411987, "learning_rate": 9.284710526315789e-05, "loss": 0.3705, "step": 47201 }, { "epoch": 2.6431851271138984, "grad_norm": 1.0600956678390503, "learning_rate": 9.284684210526316e-05, "loss": 0.3822, "step": 47202 }, { "epoch": 2.6432411244260274, "grad_norm": 1.2256706953048706, "learning_rate": 9.284657894736842e-05, "loss": 0.5034, "step": 47203 }, { "epoch": 2.6432971217381565, "grad_norm": 1.125280499458313, "learning_rate": 9.28463157894737e-05, "loss": 0.3256, "step": 47204 }, { "epoch": 2.6433531190502855, "grad_norm": 1.1259568929672241, "learning_rate": 9.284605263157894e-05, "loss": 0.4374, "step": 47205 }, { "epoch": 2.6434091163624145, "grad_norm": 1.5906413793563843, "learning_rate": 9.284578947368422e-05, "loss": 0.3999, "step": 47206 }, { "epoch": 2.6434651136745435, "grad_norm": 1.1292152404785156, "learning_rate": 9.284552631578948e-05, "loss": 0.3563, "step": 47207 }, { "epoch": 2.6435211109866725, "grad_norm": 1.183517575263977, "learning_rate": 9.284526315789475e-05, "loss": 0.3691, "step": 47208 }, { "epoch": 2.6435771082988015, "grad_norm": 1.117695689201355, "learning_rate": 9.284500000000001e-05, "loss": 0.3178, "step": 47209 }, { "epoch": 2.6436331056109306, "grad_norm": 1.180788516998291, "learning_rate": 9.284473684210526e-05, "loss": 0.3938, "step": 47210 }, { "epoch": 2.6436891029230596, "grad_norm": 0.9863051772117615, "learning_rate": 9.284447368421053e-05, "loss": 0.3903, "step": 47211 }, { "epoch": 2.6437451002351886, "grad_norm": 1.2895848751068115, "learning_rate": 9.284421052631579e-05, "loss": 0.3234, "step": 47212 }, { "epoch": 2.6438010975473176, "grad_norm": 1.3952655792236328, "learning_rate": 9.284394736842106e-05, "loss": 0.4951, "step": 47213 }, { "epoch": 2.6438570948594466, "grad_norm": 1.3119497299194336, "learning_rate": 9.284368421052632e-05, "loss": 0.3507, "step": 47214 }, { "epoch": 2.6439130921715757, "grad_norm": 5.615248203277588, "learning_rate": 9.284342105263158e-05, "loss": 0.4002, "step": 47215 }, { "epoch": 2.6439690894837047, "grad_norm": 1.4216052293777466, "learning_rate": 9.284315789473684e-05, "loss": 0.4313, "step": 47216 }, { "epoch": 2.6440250867958337, "grad_norm": 1.1733524799346924, "learning_rate": 9.284289473684211e-05, "loss": 0.3226, "step": 47217 }, { "epoch": 2.6440810841079627, "grad_norm": 1.182119607925415, "learning_rate": 9.284263157894737e-05, "loss": 0.4195, "step": 47218 }, { "epoch": 2.6441370814200917, "grad_norm": 1.0446248054504395, "learning_rate": 9.284236842105263e-05, "loss": 0.3429, "step": 47219 }, { "epoch": 2.6441930787322208, "grad_norm": 1.0906153917312622, "learning_rate": 9.28421052631579e-05, "loss": 0.3823, "step": 47220 }, { "epoch": 2.6442490760443498, "grad_norm": 1.1202232837677002, "learning_rate": 9.284184210526317e-05, "loss": 0.3345, "step": 47221 }, { "epoch": 2.644305073356479, "grad_norm": 1.2506822347640991, "learning_rate": 9.284157894736843e-05, "loss": 0.4919, "step": 47222 }, { "epoch": 2.644361070668608, "grad_norm": 1.3186287879943848, "learning_rate": 9.284131578947369e-05, "loss": 0.3928, "step": 47223 }, { "epoch": 2.644417067980737, "grad_norm": 1.183869481086731, "learning_rate": 9.284105263157895e-05, "loss": 0.2906, "step": 47224 }, { "epoch": 2.644473065292866, "grad_norm": 1.106387734413147, "learning_rate": 9.284078947368422e-05, "loss": 0.3984, "step": 47225 }, { "epoch": 2.644529062604995, "grad_norm": 1.3914591073989868, "learning_rate": 9.284052631578948e-05, "loss": 0.5154, "step": 47226 }, { "epoch": 2.644585059917124, "grad_norm": 0.9328710436820984, "learning_rate": 9.284026315789475e-05, "loss": 0.4217, "step": 47227 }, { "epoch": 2.644641057229253, "grad_norm": 1.1350332498550415, "learning_rate": 9.284e-05, "loss": 0.4972, "step": 47228 }, { "epoch": 2.644697054541382, "grad_norm": 1.0180718898773193, "learning_rate": 9.283973684210526e-05, "loss": 0.4106, "step": 47229 }, { "epoch": 2.644753051853511, "grad_norm": 2.093587875366211, "learning_rate": 9.283947368421053e-05, "loss": 0.5163, "step": 47230 }, { "epoch": 2.64480904916564, "grad_norm": 1.3041139841079712, "learning_rate": 9.283921052631579e-05, "loss": 0.4097, "step": 47231 }, { "epoch": 2.644865046477769, "grad_norm": 1.0746735334396362, "learning_rate": 9.283894736842106e-05, "loss": 0.329, "step": 47232 }, { "epoch": 2.644921043789898, "grad_norm": 1.0773961544036865, "learning_rate": 9.283868421052631e-05, "loss": 0.3002, "step": 47233 }, { "epoch": 2.644977041102027, "grad_norm": 1.2955607175827026, "learning_rate": 9.283842105263158e-05, "loss": 0.413, "step": 47234 }, { "epoch": 2.645033038414156, "grad_norm": 1.051275372505188, "learning_rate": 9.283815789473684e-05, "loss": 0.3119, "step": 47235 }, { "epoch": 2.645089035726285, "grad_norm": 1.5349481105804443, "learning_rate": 9.283789473684212e-05, "loss": 0.5248, "step": 47236 }, { "epoch": 2.645145033038414, "grad_norm": 1.2725114822387695, "learning_rate": 9.283763157894738e-05, "loss": 0.3738, "step": 47237 }, { "epoch": 2.645201030350543, "grad_norm": 1.241368293762207, "learning_rate": 9.283736842105264e-05, "loss": 0.4822, "step": 47238 }, { "epoch": 2.645257027662672, "grad_norm": 1.1658673286437988, "learning_rate": 9.28371052631579e-05, "loss": 0.416, "step": 47239 }, { "epoch": 2.645313024974801, "grad_norm": 1.1237760782241821, "learning_rate": 9.283684210526317e-05, "loss": 0.411, "step": 47240 }, { "epoch": 2.64536902228693, "grad_norm": 1.3881566524505615, "learning_rate": 9.283657894736843e-05, "loss": 0.5757, "step": 47241 }, { "epoch": 2.645425019599059, "grad_norm": 1.1334192752838135, "learning_rate": 9.283631578947369e-05, "loss": 0.301, "step": 47242 }, { "epoch": 2.645481016911188, "grad_norm": 1.0927727222442627, "learning_rate": 9.283605263157895e-05, "loss": 0.3437, "step": 47243 }, { "epoch": 2.645537014223317, "grad_norm": 1.1004325151443481, "learning_rate": 9.283578947368422e-05, "loss": 0.3649, "step": 47244 }, { "epoch": 2.6455930115354462, "grad_norm": 1.276138424873352, "learning_rate": 9.283552631578948e-05, "loss": 0.4153, "step": 47245 }, { "epoch": 2.6456490088475753, "grad_norm": 0.998623251914978, "learning_rate": 9.283526315789474e-05, "loss": 0.3307, "step": 47246 }, { "epoch": 2.6457050061597043, "grad_norm": 1.175546407699585, "learning_rate": 9.2835e-05, "loss": 0.5867, "step": 47247 }, { "epoch": 2.6457610034718333, "grad_norm": 1.2784581184387207, "learning_rate": 9.283473684210526e-05, "loss": 0.3392, "step": 47248 }, { "epoch": 2.6458170007839623, "grad_norm": 1.1743813753128052, "learning_rate": 9.283447368421053e-05, "loss": 0.3941, "step": 47249 }, { "epoch": 2.6458729980960913, "grad_norm": 1.2884882688522339, "learning_rate": 9.28342105263158e-05, "loss": 0.3728, "step": 47250 }, { "epoch": 2.6459289954082204, "grad_norm": 1.058132290840149, "learning_rate": 9.283394736842105e-05, "loss": 0.4094, "step": 47251 }, { "epoch": 2.6459849927203494, "grad_norm": 1.3293882608413696, "learning_rate": 9.283368421052631e-05, "loss": 0.381, "step": 47252 }, { "epoch": 2.6460409900324784, "grad_norm": 1.1831815242767334, "learning_rate": 9.283342105263159e-05, "loss": 0.3488, "step": 47253 }, { "epoch": 2.6460969873446074, "grad_norm": 6.530035972595215, "learning_rate": 9.283315789473685e-05, "loss": 0.344, "step": 47254 }, { "epoch": 2.6461529846567364, "grad_norm": 1.131874918937683, "learning_rate": 9.283289473684212e-05, "loss": 0.374, "step": 47255 }, { "epoch": 2.6462089819688654, "grad_norm": 1.2201600074768066, "learning_rate": 9.283263157894737e-05, "loss": 0.3764, "step": 47256 }, { "epoch": 2.6462649792809945, "grad_norm": 1.1607797145843506, "learning_rate": 9.283236842105264e-05, "loss": 0.3651, "step": 47257 }, { "epoch": 2.6463209765931235, "grad_norm": 1.3369439840316772, "learning_rate": 9.28321052631579e-05, "loss": 0.4542, "step": 47258 }, { "epoch": 2.6463769739052525, "grad_norm": 1.062454104423523, "learning_rate": 9.283184210526317e-05, "loss": 0.3453, "step": 47259 }, { "epoch": 2.6464329712173815, "grad_norm": 1.051140546798706, "learning_rate": 9.283157894736842e-05, "loss": 0.3722, "step": 47260 }, { "epoch": 2.6464889685295105, "grad_norm": 1.0293678045272827, "learning_rate": 9.283131578947369e-05, "loss": 0.3157, "step": 47261 }, { "epoch": 2.6465449658416396, "grad_norm": 1.340517520904541, "learning_rate": 9.283105263157895e-05, "loss": 0.4257, "step": 47262 }, { "epoch": 2.6466009631537686, "grad_norm": 1.098403811454773, "learning_rate": 9.283078947368421e-05, "loss": 0.3506, "step": 47263 }, { "epoch": 2.6466569604658976, "grad_norm": 1.0847928524017334, "learning_rate": 9.283052631578948e-05, "loss": 0.4435, "step": 47264 }, { "epoch": 2.6467129577780266, "grad_norm": 1.1143977642059326, "learning_rate": 9.283026315789473e-05, "loss": 0.3855, "step": 47265 }, { "epoch": 2.6467689550901556, "grad_norm": 1.0994781255722046, "learning_rate": 9.283e-05, "loss": 0.4448, "step": 47266 }, { "epoch": 2.6468249524022847, "grad_norm": 1.099743366241455, "learning_rate": 9.282973684210526e-05, "loss": 0.3969, "step": 47267 }, { "epoch": 2.6468809497144137, "grad_norm": 0.950365424156189, "learning_rate": 9.282947368421054e-05, "loss": 0.3622, "step": 47268 }, { "epoch": 2.6469369470265427, "grad_norm": 1.0168462991714478, "learning_rate": 9.28292105263158e-05, "loss": 0.3827, "step": 47269 }, { "epoch": 2.6469929443386717, "grad_norm": 1.253541111946106, "learning_rate": 9.282894736842106e-05, "loss": 0.4529, "step": 47270 }, { "epoch": 2.6470489416508007, "grad_norm": 1.2013014554977417, "learning_rate": 9.282868421052632e-05, "loss": 0.4195, "step": 47271 }, { "epoch": 2.6471049389629298, "grad_norm": 1.0248223543167114, "learning_rate": 9.282842105263159e-05, "loss": 0.3701, "step": 47272 }, { "epoch": 2.6471609362750588, "grad_norm": 1.272473692893982, "learning_rate": 9.282815789473685e-05, "loss": 0.485, "step": 47273 }, { "epoch": 2.647216933587188, "grad_norm": 1.1103805303573608, "learning_rate": 9.282789473684211e-05, "loss": 0.3753, "step": 47274 }, { "epoch": 2.647272930899317, "grad_norm": 1.204677939414978, "learning_rate": 9.282763157894737e-05, "loss": 0.4391, "step": 47275 }, { "epoch": 2.647328928211446, "grad_norm": 1.1374101638793945, "learning_rate": 9.282736842105264e-05, "loss": 0.3061, "step": 47276 }, { "epoch": 2.647384925523575, "grad_norm": 1.308250904083252, "learning_rate": 9.28271052631579e-05, "loss": 0.3883, "step": 47277 }, { "epoch": 2.647440922835704, "grad_norm": 1.36335289478302, "learning_rate": 9.282684210526316e-05, "loss": 0.4856, "step": 47278 }, { "epoch": 2.647496920147833, "grad_norm": 1.4866420030593872, "learning_rate": 9.282657894736842e-05, "loss": 0.5682, "step": 47279 }, { "epoch": 2.647552917459962, "grad_norm": 1.0454217195510864, "learning_rate": 9.282631578947368e-05, "loss": 0.3042, "step": 47280 }, { "epoch": 2.647608914772091, "grad_norm": 1.0047128200531006, "learning_rate": 9.282605263157895e-05, "loss": 0.4155, "step": 47281 }, { "epoch": 2.64766491208422, "grad_norm": 1.2558155059814453, "learning_rate": 9.282578947368421e-05, "loss": 0.368, "step": 47282 }, { "epoch": 2.647720909396349, "grad_norm": 1.1668663024902344, "learning_rate": 9.282552631578947e-05, "loss": 0.4067, "step": 47283 }, { "epoch": 2.647776906708478, "grad_norm": 1.1425472497940063, "learning_rate": 9.282526315789473e-05, "loss": 0.3402, "step": 47284 }, { "epoch": 2.647832904020607, "grad_norm": 0.9857205152511597, "learning_rate": 9.2825e-05, "loss": 0.2803, "step": 47285 }, { "epoch": 2.647888901332736, "grad_norm": 1.0854580402374268, "learning_rate": 9.282473684210527e-05, "loss": 0.3682, "step": 47286 }, { "epoch": 2.647944898644865, "grad_norm": 1.3884446620941162, "learning_rate": 9.282447368421054e-05, "loss": 0.3748, "step": 47287 }, { "epoch": 2.648000895956994, "grad_norm": 1.1964740753173828, "learning_rate": 9.282421052631579e-05, "loss": 0.3485, "step": 47288 }, { "epoch": 2.648056893269123, "grad_norm": 1.0712134838104248, "learning_rate": 9.282394736842106e-05, "loss": 0.3677, "step": 47289 }, { "epoch": 2.648112890581252, "grad_norm": 1.2058523893356323, "learning_rate": 9.282368421052632e-05, "loss": 0.4677, "step": 47290 }, { "epoch": 2.648168887893381, "grad_norm": 1.2572628259658813, "learning_rate": 9.282342105263159e-05, "loss": 0.6057, "step": 47291 }, { "epoch": 2.64822488520551, "grad_norm": 1.2058522701263428, "learning_rate": 9.282315789473685e-05, "loss": 0.3912, "step": 47292 }, { "epoch": 2.648280882517639, "grad_norm": 1.2105376720428467, "learning_rate": 9.282289473684211e-05, "loss": 0.4281, "step": 47293 }, { "epoch": 2.648336879829768, "grad_norm": 1.2736655473709106, "learning_rate": 9.282263157894737e-05, "loss": 0.33, "step": 47294 }, { "epoch": 2.648392877141897, "grad_norm": 1.4279433488845825, "learning_rate": 9.282236842105264e-05, "loss": 0.4527, "step": 47295 }, { "epoch": 2.648448874454026, "grad_norm": 1.18088960647583, "learning_rate": 9.28221052631579e-05, "loss": 0.3482, "step": 47296 }, { "epoch": 2.6485048717661552, "grad_norm": 1.1353497505187988, "learning_rate": 9.282184210526315e-05, "loss": 0.4168, "step": 47297 }, { "epoch": 2.6485608690782843, "grad_norm": 0.9586924910545349, "learning_rate": 9.282157894736842e-05, "loss": 0.3707, "step": 47298 }, { "epoch": 2.6486168663904133, "grad_norm": 1.0255540609359741, "learning_rate": 9.282131578947368e-05, "loss": 0.3196, "step": 47299 }, { "epoch": 2.6486728637025423, "grad_norm": 1.0755798816680908, "learning_rate": 9.282105263157896e-05, "loss": 0.3349, "step": 47300 }, { "epoch": 2.6487288610146713, "grad_norm": 0.945899248123169, "learning_rate": 9.282078947368422e-05, "loss": 0.3058, "step": 47301 }, { "epoch": 2.6487848583268003, "grad_norm": 1.1393210887908936, "learning_rate": 9.282052631578948e-05, "loss": 0.3303, "step": 47302 }, { "epoch": 2.6488408556389293, "grad_norm": 1.3186050653457642, "learning_rate": 9.282026315789474e-05, "loss": 0.3951, "step": 47303 }, { "epoch": 2.6488968529510584, "grad_norm": 1.1286953687667847, "learning_rate": 9.282000000000001e-05, "loss": 0.5325, "step": 47304 }, { "epoch": 2.6489528502631874, "grad_norm": 1.2224489450454712, "learning_rate": 9.281973684210527e-05, "loss": 0.4545, "step": 47305 }, { "epoch": 2.6490088475753164, "grad_norm": 1.1597079038619995, "learning_rate": 9.281947368421053e-05, "loss": 0.3806, "step": 47306 }, { "epoch": 2.6490648448874454, "grad_norm": 1.3673837184906006, "learning_rate": 9.281921052631579e-05, "loss": 0.4738, "step": 47307 }, { "epoch": 2.6491208421995744, "grad_norm": 1.3606059551239014, "learning_rate": 9.281894736842106e-05, "loss": 0.5845, "step": 47308 }, { "epoch": 2.6491768395117035, "grad_norm": 1.2028276920318604, "learning_rate": 9.281868421052632e-05, "loss": 0.5625, "step": 47309 }, { "epoch": 2.6492328368238325, "grad_norm": 1.1546616554260254, "learning_rate": 9.281842105263158e-05, "loss": 0.394, "step": 47310 }, { "epoch": 2.6492888341359615, "grad_norm": 1.2212735414505005, "learning_rate": 9.281815789473684e-05, "loss": 0.5187, "step": 47311 }, { "epoch": 2.6493448314480905, "grad_norm": 1.46455979347229, "learning_rate": 9.281789473684211e-05, "loss": 0.4435, "step": 47312 }, { "epoch": 2.6494008287602195, "grad_norm": 1.1484616994857788, "learning_rate": 9.281763157894737e-05, "loss": 0.4464, "step": 47313 }, { "epoch": 2.6494568260723486, "grad_norm": 1.334641695022583, "learning_rate": 9.281736842105263e-05, "loss": 0.5189, "step": 47314 }, { "epoch": 2.6495128233844776, "grad_norm": 1.1570926904678345, "learning_rate": 9.28171052631579e-05, "loss": 0.3425, "step": 47315 }, { "epoch": 2.6495688206966066, "grad_norm": 1.4560905694961548, "learning_rate": 9.281684210526315e-05, "loss": 0.3559, "step": 47316 }, { "epoch": 2.6496248180087356, "grad_norm": 0.9309477210044861, "learning_rate": 9.281657894736843e-05, "loss": 0.369, "step": 47317 }, { "epoch": 2.6496808153208646, "grad_norm": 1.1213810443878174, "learning_rate": 9.281631578947369e-05, "loss": 0.3781, "step": 47318 }, { "epoch": 2.6497368126329937, "grad_norm": 1.2483534812927246, "learning_rate": 9.281605263157896e-05, "loss": 0.5511, "step": 47319 }, { "epoch": 2.6497928099451227, "grad_norm": 1.2032537460327148, "learning_rate": 9.28157894736842e-05, "loss": 0.5573, "step": 47320 }, { "epoch": 2.6498488072572517, "grad_norm": 1.1514328718185425, "learning_rate": 9.281552631578948e-05, "loss": 0.4671, "step": 47321 }, { "epoch": 2.6499048045693807, "grad_norm": 1.3692694902420044, "learning_rate": 9.281526315789474e-05, "loss": 0.4237, "step": 47322 }, { "epoch": 2.6499608018815097, "grad_norm": 1.2762064933776855, "learning_rate": 9.281500000000001e-05, "loss": 0.4395, "step": 47323 }, { "epoch": 2.6500167991936388, "grad_norm": 1.1670019626617432, "learning_rate": 9.281473684210527e-05, "loss": 0.4248, "step": 47324 }, { "epoch": 2.6500727965057678, "grad_norm": 1.1209040880203247, "learning_rate": 9.281447368421053e-05, "loss": 0.3066, "step": 47325 }, { "epoch": 2.650128793817897, "grad_norm": 1.0088346004486084, "learning_rate": 9.281421052631579e-05, "loss": 0.3912, "step": 47326 }, { "epoch": 2.650184791130026, "grad_norm": 1.4989216327667236, "learning_rate": 9.281394736842106e-05, "loss": 0.6362, "step": 47327 }, { "epoch": 2.650240788442155, "grad_norm": 1.0243817567825317, "learning_rate": 9.281368421052632e-05, "loss": 0.4693, "step": 47328 }, { "epoch": 2.650296785754284, "grad_norm": 1.275313138961792, "learning_rate": 9.281342105263158e-05, "loss": 0.3666, "step": 47329 }, { "epoch": 2.650352783066413, "grad_norm": 1.2796465158462524, "learning_rate": 9.281315789473684e-05, "loss": 0.4374, "step": 47330 }, { "epoch": 2.650408780378542, "grad_norm": 1.2002606391906738, "learning_rate": 9.28128947368421e-05, "loss": 0.4139, "step": 47331 }, { "epoch": 2.650464777690671, "grad_norm": 1.2478233575820923, "learning_rate": 9.281263157894738e-05, "loss": 0.3436, "step": 47332 }, { "epoch": 2.6505207750028, "grad_norm": 1.0271979570388794, "learning_rate": 9.281236842105264e-05, "loss": 0.3169, "step": 47333 }, { "epoch": 2.650576772314929, "grad_norm": 1.110460877418518, "learning_rate": 9.28121052631579e-05, "loss": 0.5511, "step": 47334 }, { "epoch": 2.650632769627058, "grad_norm": 1.2558114528656006, "learning_rate": 9.281184210526316e-05, "loss": 0.384, "step": 47335 }, { "epoch": 2.650688766939187, "grad_norm": 1.1179598569869995, "learning_rate": 9.281157894736843e-05, "loss": 0.4063, "step": 47336 }, { "epoch": 2.650744764251316, "grad_norm": 1.2945671081542969, "learning_rate": 9.281131578947369e-05, "loss": 0.5105, "step": 47337 }, { "epoch": 2.650800761563445, "grad_norm": 1.469787836074829, "learning_rate": 9.281105263157895e-05, "loss": 0.4152, "step": 47338 }, { "epoch": 2.650856758875574, "grad_norm": 1.1698426008224487, "learning_rate": 9.281078947368421e-05, "loss": 0.4583, "step": 47339 }, { "epoch": 2.650912756187703, "grad_norm": 1.0619993209838867, "learning_rate": 9.281052631578948e-05, "loss": 0.477, "step": 47340 }, { "epoch": 2.650968753499832, "grad_norm": 1.285013198852539, "learning_rate": 9.281026315789474e-05, "loss": 0.4247, "step": 47341 }, { "epoch": 2.651024750811961, "grad_norm": 1.3380341529846191, "learning_rate": 9.281000000000001e-05, "loss": 0.4429, "step": 47342 }, { "epoch": 2.65108074812409, "grad_norm": 1.215882420539856, "learning_rate": 9.280973684210526e-05, "loss": 0.3162, "step": 47343 }, { "epoch": 2.651136745436219, "grad_norm": 1.1475400924682617, "learning_rate": 9.280947368421053e-05, "loss": 0.3682, "step": 47344 }, { "epoch": 2.651192742748348, "grad_norm": 1.3506118059158325, "learning_rate": 9.28092105263158e-05, "loss": 0.4964, "step": 47345 }, { "epoch": 2.651248740060477, "grad_norm": 1.0748189687728882, "learning_rate": 9.280894736842107e-05, "loss": 0.3636, "step": 47346 }, { "epoch": 2.651304737372606, "grad_norm": 1.2732067108154297, "learning_rate": 9.280868421052633e-05, "loss": 0.5344, "step": 47347 }, { "epoch": 2.651360734684735, "grad_norm": 1.168340802192688, "learning_rate": 9.280842105263159e-05, "loss": 0.3395, "step": 47348 }, { "epoch": 2.6514167319968642, "grad_norm": 0.9707564115524292, "learning_rate": 9.280815789473685e-05, "loss": 0.2856, "step": 47349 }, { "epoch": 2.6514727293089932, "grad_norm": 0.9960217475891113, "learning_rate": 9.28078947368421e-05, "loss": 0.4184, "step": 47350 }, { "epoch": 2.6515287266211223, "grad_norm": 1.2831878662109375, "learning_rate": 9.280763157894738e-05, "loss": 0.4136, "step": 47351 }, { "epoch": 2.6515847239332513, "grad_norm": 1.343133568763733, "learning_rate": 9.280736842105263e-05, "loss": 0.4377, "step": 47352 }, { "epoch": 2.6516407212453803, "grad_norm": 1.416107177734375, "learning_rate": 9.28071052631579e-05, "loss": 0.3826, "step": 47353 }, { "epoch": 2.6516967185575093, "grad_norm": 1.010925054550171, "learning_rate": 9.280684210526316e-05, "loss": 0.3297, "step": 47354 }, { "epoch": 2.6517527158696383, "grad_norm": 1.0699340105056763, "learning_rate": 9.280657894736843e-05, "loss": 0.3486, "step": 47355 }, { "epoch": 2.6518087131817674, "grad_norm": 1.038496971130371, "learning_rate": 9.280631578947369e-05, "loss": 0.3042, "step": 47356 }, { "epoch": 2.6518647104938964, "grad_norm": 1.157675862312317, "learning_rate": 9.280605263157895e-05, "loss": 0.4852, "step": 47357 }, { "epoch": 2.6519207078060254, "grad_norm": 1.3081722259521484, "learning_rate": 9.280578947368421e-05, "loss": 0.3504, "step": 47358 }, { "epoch": 2.6519767051181544, "grad_norm": 1.2764074802398682, "learning_rate": 9.280552631578948e-05, "loss": 0.3753, "step": 47359 }, { "epoch": 2.6520327024302834, "grad_norm": 1.314063310623169, "learning_rate": 9.280526315789474e-05, "loss": 0.4127, "step": 47360 }, { "epoch": 2.6520886997424125, "grad_norm": 1.0930436849594116, "learning_rate": 9.2805e-05, "loss": 0.3436, "step": 47361 }, { "epoch": 2.6521446970545415, "grad_norm": 1.1373069286346436, "learning_rate": 9.280473684210526e-05, "loss": 0.3545, "step": 47362 }, { "epoch": 2.6522006943666705, "grad_norm": 1.5980607271194458, "learning_rate": 9.280447368421054e-05, "loss": 0.3955, "step": 47363 }, { "epoch": 2.6522566916787995, "grad_norm": 1.351307988166809, "learning_rate": 9.28042105263158e-05, "loss": 0.5676, "step": 47364 }, { "epoch": 2.6523126889909285, "grad_norm": 1.281726360321045, "learning_rate": 9.280394736842106e-05, "loss": 0.4463, "step": 47365 }, { "epoch": 2.6523686863030576, "grad_norm": 1.12949538230896, "learning_rate": 9.280368421052632e-05, "loss": 0.3685, "step": 47366 }, { "epoch": 2.6524246836151866, "grad_norm": 1.0199346542358398, "learning_rate": 9.280342105263158e-05, "loss": 0.2803, "step": 47367 }, { "epoch": 2.6524806809273156, "grad_norm": 1.6842765808105469, "learning_rate": 9.280315789473685e-05, "loss": 0.4908, "step": 47368 }, { "epoch": 2.6525366782394446, "grad_norm": 0.9752827286720276, "learning_rate": 9.280289473684211e-05, "loss": 0.3323, "step": 47369 }, { "epoch": 2.6525926755515736, "grad_norm": 1.1426132917404175, "learning_rate": 9.280263157894737e-05, "loss": 0.4139, "step": 47370 }, { "epoch": 2.6526486728637027, "grad_norm": 6.249517917633057, "learning_rate": 9.280236842105263e-05, "loss": 0.4077, "step": 47371 }, { "epoch": 2.6527046701758317, "grad_norm": 1.3404966592788696, "learning_rate": 9.28021052631579e-05, "loss": 0.3328, "step": 47372 }, { "epoch": 2.6527606674879607, "grad_norm": 1.247960090637207, "learning_rate": 9.280184210526316e-05, "loss": 0.4228, "step": 47373 }, { "epoch": 2.6528166648000897, "grad_norm": 0.9732924699783325, "learning_rate": 9.280157894736843e-05, "loss": 0.2848, "step": 47374 }, { "epoch": 2.6528726621122187, "grad_norm": 1.1455532312393188, "learning_rate": 9.280131578947368e-05, "loss": 0.5429, "step": 47375 }, { "epoch": 2.6529286594243477, "grad_norm": 1.296518087387085, "learning_rate": 9.280105263157895e-05, "loss": 0.3566, "step": 47376 }, { "epoch": 2.6529846567364768, "grad_norm": 1.4415994882583618, "learning_rate": 9.280078947368421e-05, "loss": 0.2905, "step": 47377 }, { "epoch": 2.653040654048606, "grad_norm": 1.1553828716278076, "learning_rate": 9.280052631578949e-05, "loss": 0.4326, "step": 47378 }, { "epoch": 2.653096651360735, "grad_norm": 1.3821310997009277, "learning_rate": 9.280026315789475e-05, "loss": 0.5058, "step": 47379 }, { "epoch": 2.653152648672864, "grad_norm": 1.188806176185608, "learning_rate": 9.28e-05, "loss": 0.5167, "step": 47380 }, { "epoch": 2.653208645984993, "grad_norm": 1.2384036779403687, "learning_rate": 9.279973684210527e-05, "loss": 0.4195, "step": 47381 }, { "epoch": 2.653264643297122, "grad_norm": 1.2527974843978882, "learning_rate": 9.279947368421054e-05, "loss": 0.4913, "step": 47382 }, { "epoch": 2.653320640609251, "grad_norm": 1.0805578231811523, "learning_rate": 9.27992105263158e-05, "loss": 0.315, "step": 47383 }, { "epoch": 2.65337663792138, "grad_norm": 1.1112421751022339, "learning_rate": 9.279894736842106e-05, "loss": 0.4358, "step": 47384 }, { "epoch": 2.653432635233509, "grad_norm": 1.6203702688217163, "learning_rate": 9.279868421052632e-05, "loss": 0.4184, "step": 47385 }, { "epoch": 2.653488632545638, "grad_norm": 1.3104215860366821, "learning_rate": 9.279842105263158e-05, "loss": 0.4823, "step": 47386 }, { "epoch": 2.653544629857767, "grad_norm": 1.146422266960144, "learning_rate": 9.279815789473685e-05, "loss": 0.4654, "step": 47387 }, { "epoch": 2.653600627169896, "grad_norm": 1.1565003395080566, "learning_rate": 9.279789473684211e-05, "loss": 0.3961, "step": 47388 }, { "epoch": 2.653656624482025, "grad_norm": 1.336767315864563, "learning_rate": 9.279763157894737e-05, "loss": 0.4712, "step": 47389 }, { "epoch": 2.653712621794154, "grad_norm": 1.2935867309570312, "learning_rate": 9.279736842105263e-05, "loss": 0.4177, "step": 47390 }, { "epoch": 2.653768619106283, "grad_norm": 1.2471948862075806, "learning_rate": 9.27971052631579e-05, "loss": 0.4183, "step": 47391 }, { "epoch": 2.653824616418412, "grad_norm": 1.2142927646636963, "learning_rate": 9.279684210526316e-05, "loss": 0.3329, "step": 47392 }, { "epoch": 2.653880613730541, "grad_norm": 1.2951856851577759, "learning_rate": 9.279657894736842e-05, "loss": 0.5949, "step": 47393 }, { "epoch": 2.65393661104267, "grad_norm": 1.2066380977630615, "learning_rate": 9.279631578947368e-05, "loss": 0.3499, "step": 47394 }, { "epoch": 2.653992608354799, "grad_norm": 1.0970025062561035, "learning_rate": 9.279605263157896e-05, "loss": 0.3028, "step": 47395 }, { "epoch": 2.654048605666928, "grad_norm": 1.1269798278808594, "learning_rate": 9.279578947368422e-05, "loss": 0.3506, "step": 47396 }, { "epoch": 2.654104602979057, "grad_norm": 1.2115707397460938, "learning_rate": 9.279552631578949e-05, "loss": 0.3385, "step": 47397 }, { "epoch": 2.654160600291186, "grad_norm": 1.1550346612930298, "learning_rate": 9.279526315789474e-05, "loss": 0.2769, "step": 47398 }, { "epoch": 2.654216597603315, "grad_norm": 1.2555100917816162, "learning_rate": 9.279500000000001e-05, "loss": 0.5388, "step": 47399 }, { "epoch": 2.654272594915444, "grad_norm": 1.220895528793335, "learning_rate": 9.279473684210527e-05, "loss": 0.462, "step": 47400 }, { "epoch": 2.6543285922275732, "grad_norm": 1.2442971467971802, "learning_rate": 9.279447368421053e-05, "loss": 0.3198, "step": 47401 }, { "epoch": 2.6543845895397022, "grad_norm": 1.3912742137908936, "learning_rate": 9.27942105263158e-05, "loss": 0.5326, "step": 47402 }, { "epoch": 2.6544405868518313, "grad_norm": 1.3022576570510864, "learning_rate": 9.279394736842105e-05, "loss": 0.3787, "step": 47403 }, { "epoch": 2.6544965841639603, "grad_norm": 2.773176670074463, "learning_rate": 9.279368421052632e-05, "loss": 0.5761, "step": 47404 }, { "epoch": 2.6545525814760893, "grad_norm": 1.1172339916229248, "learning_rate": 9.279342105263158e-05, "loss": 0.4468, "step": 47405 }, { "epoch": 2.6546085787882183, "grad_norm": 1.4781614542007446, "learning_rate": 9.279315789473685e-05, "loss": 0.3895, "step": 47406 }, { "epoch": 2.6546645761003473, "grad_norm": 1.0744173526763916, "learning_rate": 9.27928947368421e-05, "loss": 0.4567, "step": 47407 }, { "epoch": 2.6547205734124764, "grad_norm": 1.2259507179260254, "learning_rate": 9.279263157894737e-05, "loss": 0.3969, "step": 47408 }, { "epoch": 2.6547765707246054, "grad_norm": 1.0328724384307861, "learning_rate": 9.279236842105263e-05, "loss": 0.3437, "step": 47409 }, { "epoch": 2.6548325680367344, "grad_norm": 1.122828722000122, "learning_rate": 9.27921052631579e-05, "loss": 0.4198, "step": 47410 }, { "epoch": 2.6548885653488634, "grad_norm": 1.177795171737671, "learning_rate": 9.279184210526317e-05, "loss": 0.428, "step": 47411 }, { "epoch": 2.6549445626609924, "grad_norm": 0.9679808616638184, "learning_rate": 9.279157894736843e-05, "loss": 0.3814, "step": 47412 }, { "epoch": 2.6550005599731215, "grad_norm": 1.1406450271606445, "learning_rate": 9.279131578947369e-05, "loss": 0.4707, "step": 47413 }, { "epoch": 2.6550565572852505, "grad_norm": 1.7606542110443115, "learning_rate": 9.279105263157896e-05, "loss": 0.3474, "step": 47414 }, { "epoch": 2.6551125545973795, "grad_norm": 1.4737811088562012, "learning_rate": 9.279078947368422e-05, "loss": 0.3827, "step": 47415 }, { "epoch": 2.655168551909508, "grad_norm": 1.1982736587524414, "learning_rate": 9.279052631578948e-05, "loss": 0.3369, "step": 47416 }, { "epoch": 2.6552245492216375, "grad_norm": 1.1504876613616943, "learning_rate": 9.279026315789474e-05, "loss": 0.4056, "step": 47417 }, { "epoch": 2.655280546533766, "grad_norm": 1.2437138557434082, "learning_rate": 9.279e-05, "loss": 0.5052, "step": 47418 }, { "epoch": 2.6553365438458956, "grad_norm": 1.0265378952026367, "learning_rate": 9.278973684210527e-05, "loss": 0.3347, "step": 47419 }, { "epoch": 2.655392541158024, "grad_norm": 4.583118438720703, "learning_rate": 9.278947368421053e-05, "loss": 0.4028, "step": 47420 }, { "epoch": 2.6554485384701536, "grad_norm": 1.1025794744491577, "learning_rate": 9.278921052631579e-05, "loss": 0.3467, "step": 47421 }, { "epoch": 2.655504535782282, "grad_norm": 1.2353644371032715, "learning_rate": 9.278894736842105e-05, "loss": 0.3243, "step": 47422 }, { "epoch": 2.6555605330944116, "grad_norm": 1.1974589824676514, "learning_rate": 9.278868421052632e-05, "loss": 0.3577, "step": 47423 }, { "epoch": 2.65561653040654, "grad_norm": 1.1033002138137817, "learning_rate": 9.278842105263158e-05, "loss": 0.3417, "step": 47424 }, { "epoch": 2.6556725277186697, "grad_norm": 1.198957085609436, "learning_rate": 9.278815789473684e-05, "loss": 0.3737, "step": 47425 }, { "epoch": 2.6557285250307983, "grad_norm": 1.387298345565796, "learning_rate": 9.27878947368421e-05, "loss": 0.5178, "step": 47426 }, { "epoch": 2.6557845223429277, "grad_norm": 0.951326310634613, "learning_rate": 9.278763157894738e-05, "loss": 0.3306, "step": 47427 }, { "epoch": 2.6558405196550563, "grad_norm": 1.0141007900238037, "learning_rate": 9.278736842105264e-05, "loss": 0.3155, "step": 47428 }, { "epoch": 2.6558965169671858, "grad_norm": 1.4019529819488525, "learning_rate": 9.278710526315791e-05, "loss": 0.4005, "step": 47429 }, { "epoch": 2.6559525142793143, "grad_norm": 1.391941785812378, "learning_rate": 9.278684210526315e-05, "loss": 0.4454, "step": 47430 }, { "epoch": 2.656008511591444, "grad_norm": 1.25059175491333, "learning_rate": 9.278657894736843e-05, "loss": 0.4378, "step": 47431 }, { "epoch": 2.6560645089035724, "grad_norm": 1.3228929042816162, "learning_rate": 9.278631578947369e-05, "loss": 0.4045, "step": 47432 }, { "epoch": 2.656120506215702, "grad_norm": 1.3107473850250244, "learning_rate": 9.278605263157896e-05, "loss": 0.4424, "step": 47433 }, { "epoch": 2.6561765035278304, "grad_norm": 1.0154026746749878, "learning_rate": 9.278578947368422e-05, "loss": 0.4263, "step": 47434 }, { "epoch": 2.65623250083996, "grad_norm": 1.5786575078964233, "learning_rate": 9.278552631578947e-05, "loss": 0.5746, "step": 47435 }, { "epoch": 2.6562884981520885, "grad_norm": 1.0666754245758057, "learning_rate": 9.278526315789474e-05, "loss": 0.3852, "step": 47436 }, { "epoch": 2.656344495464218, "grad_norm": 1.1458840370178223, "learning_rate": 9.2785e-05, "loss": 0.3973, "step": 47437 }, { "epoch": 2.6564004927763465, "grad_norm": 1.4595284461975098, "learning_rate": 9.278473684210527e-05, "loss": 0.4544, "step": 47438 }, { "epoch": 2.656456490088476, "grad_norm": 1.2383803129196167, "learning_rate": 9.278447368421053e-05, "loss": 0.4303, "step": 47439 }, { "epoch": 2.6565124874006045, "grad_norm": 1.1134872436523438, "learning_rate": 9.278421052631579e-05, "loss": 0.3184, "step": 47440 }, { "epoch": 2.656568484712734, "grad_norm": 1.103294849395752, "learning_rate": 9.278394736842105e-05, "loss": 0.4878, "step": 47441 }, { "epoch": 2.6566244820248626, "grad_norm": 1.2575212717056274, "learning_rate": 9.278368421052633e-05, "loss": 0.3501, "step": 47442 }, { "epoch": 2.656680479336992, "grad_norm": 1.1125930547714233, "learning_rate": 9.278342105263159e-05, "loss": 0.361, "step": 47443 }, { "epoch": 2.6567364766491206, "grad_norm": 1.5357236862182617, "learning_rate": 9.278315789473685e-05, "loss": 0.53, "step": 47444 }, { "epoch": 2.65679247396125, "grad_norm": 1.138389229774475, "learning_rate": 9.27828947368421e-05, "loss": 0.3939, "step": 47445 }, { "epoch": 2.6568484712733786, "grad_norm": 1.6490459442138672, "learning_rate": 9.278263157894738e-05, "loss": 0.4478, "step": 47446 }, { "epoch": 2.656904468585508, "grad_norm": 1.3572652339935303, "learning_rate": 9.278236842105264e-05, "loss": 0.5394, "step": 47447 }, { "epoch": 2.6569604658976367, "grad_norm": 0.926978349685669, "learning_rate": 9.27821052631579e-05, "loss": 0.2687, "step": 47448 }, { "epoch": 2.657016463209766, "grad_norm": 1.1402649879455566, "learning_rate": 9.278184210526316e-05, "loss": 0.394, "step": 47449 }, { "epoch": 2.6570724605218947, "grad_norm": 1.4772206544876099, "learning_rate": 9.278157894736843e-05, "loss": 0.5178, "step": 47450 }, { "epoch": 2.657128457834024, "grad_norm": 1.31520676612854, "learning_rate": 9.278131578947369e-05, "loss": 0.5268, "step": 47451 }, { "epoch": 2.6571844551461528, "grad_norm": 1.0168590545654297, "learning_rate": 9.278105263157895e-05, "loss": 0.3349, "step": 47452 }, { "epoch": 2.657240452458282, "grad_norm": 1.129348874092102, "learning_rate": 9.278078947368421e-05, "loss": 0.3473, "step": 47453 }, { "epoch": 2.657296449770411, "grad_norm": 1.2584019899368286, "learning_rate": 9.278052631578947e-05, "loss": 0.3661, "step": 47454 }, { "epoch": 2.6573524470825403, "grad_norm": 1.8298060894012451, "learning_rate": 9.278026315789474e-05, "loss": 0.6074, "step": 47455 }, { "epoch": 2.657408444394669, "grad_norm": 1.0128612518310547, "learning_rate": 9.278e-05, "loss": 0.3593, "step": 47456 }, { "epoch": 2.6574644417067983, "grad_norm": 1.1211118698120117, "learning_rate": 9.277973684210528e-05, "loss": 0.4933, "step": 47457 }, { "epoch": 2.657520439018927, "grad_norm": 1.2811659574508667, "learning_rate": 9.277947368421052e-05, "loss": 0.375, "step": 47458 }, { "epoch": 2.6575764363310563, "grad_norm": 1.6519309282302856, "learning_rate": 9.27792105263158e-05, "loss": 0.4228, "step": 47459 }, { "epoch": 2.657632433643185, "grad_norm": 1.163385272026062, "learning_rate": 9.277894736842106e-05, "loss": 0.3622, "step": 47460 }, { "epoch": 2.6576884309553144, "grad_norm": 1.2200430631637573, "learning_rate": 9.277868421052633e-05, "loss": 0.3449, "step": 47461 }, { "epoch": 2.657744428267443, "grad_norm": 1.2044298648834229, "learning_rate": 9.277842105263157e-05, "loss": 0.4778, "step": 47462 }, { "epoch": 2.6578004255795724, "grad_norm": 1.1120169162750244, "learning_rate": 9.277815789473685e-05, "loss": 0.4296, "step": 47463 }, { "epoch": 2.657856422891701, "grad_norm": 1.1753687858581543, "learning_rate": 9.277789473684211e-05, "loss": 0.434, "step": 47464 }, { "epoch": 2.6579124202038305, "grad_norm": 1.3142558336257935, "learning_rate": 9.277763157894738e-05, "loss": 0.3449, "step": 47465 }, { "epoch": 2.657968417515959, "grad_norm": 1.0876353979110718, "learning_rate": 9.277736842105264e-05, "loss": 0.3579, "step": 47466 }, { "epoch": 2.6580244148280885, "grad_norm": 1.6637771129608154, "learning_rate": 9.27771052631579e-05, "loss": 0.5025, "step": 47467 }, { "epoch": 2.658080412140217, "grad_norm": 1.139370322227478, "learning_rate": 9.277684210526316e-05, "loss": 0.3388, "step": 47468 }, { "epoch": 2.6581364094523465, "grad_norm": 4.984150409698486, "learning_rate": 9.277657894736843e-05, "loss": 0.4036, "step": 47469 }, { "epoch": 2.658192406764475, "grad_norm": 1.0501899719238281, "learning_rate": 9.277631578947369e-05, "loss": 0.3863, "step": 47470 }, { "epoch": 2.6582484040766046, "grad_norm": 1.1435717344284058, "learning_rate": 9.277605263157895e-05, "loss": 0.564, "step": 47471 }, { "epoch": 2.658304401388733, "grad_norm": 1.5279697179794312, "learning_rate": 9.277578947368421e-05, "loss": 0.416, "step": 47472 }, { "epoch": 2.6583603987008626, "grad_norm": 1.0355006456375122, "learning_rate": 9.277552631578947e-05, "loss": 0.3201, "step": 47473 }, { "epoch": 2.658416396012991, "grad_norm": 1.1237143278121948, "learning_rate": 9.277526315789475e-05, "loss": 0.4628, "step": 47474 }, { "epoch": 2.6584723933251206, "grad_norm": 1.1637531518936157, "learning_rate": 9.2775e-05, "loss": 0.4, "step": 47475 }, { "epoch": 2.658528390637249, "grad_norm": 1.2168872356414795, "learning_rate": 9.277473684210527e-05, "loss": 0.3764, "step": 47476 }, { "epoch": 2.6585843879493787, "grad_norm": 1.3048585653305054, "learning_rate": 9.277447368421052e-05, "loss": 0.4718, "step": 47477 }, { "epoch": 2.6586403852615073, "grad_norm": 1.0684360265731812, "learning_rate": 9.27742105263158e-05, "loss": 0.5087, "step": 47478 }, { "epoch": 2.6586963825736367, "grad_norm": 1.2147250175476074, "learning_rate": 9.277394736842106e-05, "loss": 0.4338, "step": 47479 }, { "epoch": 2.6587523798857653, "grad_norm": 1.0636107921600342, "learning_rate": 9.277368421052632e-05, "loss": 0.2823, "step": 47480 }, { "epoch": 2.6588083771978948, "grad_norm": 1.0709655284881592, "learning_rate": 9.277342105263158e-05, "loss": 0.3091, "step": 47481 }, { "epoch": 2.6588643745100233, "grad_norm": 1.2647533416748047, "learning_rate": 9.277315789473685e-05, "loss": 0.4359, "step": 47482 }, { "epoch": 2.658920371822153, "grad_norm": 1.1466622352600098, "learning_rate": 9.277289473684211e-05, "loss": 0.4158, "step": 47483 }, { "epoch": 2.6589763691342814, "grad_norm": 1.3347880840301514, "learning_rate": 9.277263157894738e-05, "loss": 0.413, "step": 47484 }, { "epoch": 2.659032366446411, "grad_norm": 2.219022035598755, "learning_rate": 9.277236842105263e-05, "loss": 0.4263, "step": 47485 }, { "epoch": 2.6590883637585394, "grad_norm": 1.1262099742889404, "learning_rate": 9.27721052631579e-05, "loss": 0.3577, "step": 47486 }, { "epoch": 2.659144361070669, "grad_norm": 1.1133038997650146, "learning_rate": 9.277184210526316e-05, "loss": 0.353, "step": 47487 }, { "epoch": 2.6592003583827974, "grad_norm": 1.2435643672943115, "learning_rate": 9.277157894736842e-05, "loss": 0.4069, "step": 47488 }, { "epoch": 2.659256355694927, "grad_norm": 1.1728836297988892, "learning_rate": 9.27713157894737e-05, "loss": 0.3877, "step": 47489 }, { "epoch": 2.6593123530070555, "grad_norm": 1.1074012517929077, "learning_rate": 9.277105263157894e-05, "loss": 0.3398, "step": 47490 }, { "epoch": 2.659368350319185, "grad_norm": 1.1784555912017822, "learning_rate": 9.277078947368422e-05, "loss": 0.3061, "step": 47491 }, { "epoch": 2.6594243476313135, "grad_norm": 1.2618969678878784, "learning_rate": 9.277052631578947e-05, "loss": 0.5589, "step": 47492 }, { "epoch": 2.659480344943443, "grad_norm": 2.4528260231018066, "learning_rate": 9.277026315789475e-05, "loss": 0.4601, "step": 47493 }, { "epoch": 2.6595363422555716, "grad_norm": 1.0737504959106445, "learning_rate": 9.277000000000001e-05, "loss": 0.4018, "step": 47494 }, { "epoch": 2.659592339567701, "grad_norm": 1.3025546073913574, "learning_rate": 9.276973684210527e-05, "loss": 0.4166, "step": 47495 }, { "epoch": 2.6596483368798296, "grad_norm": 1.1772878170013428, "learning_rate": 9.276947368421053e-05, "loss": 0.421, "step": 47496 }, { "epoch": 2.659704334191959, "grad_norm": 1.1376831531524658, "learning_rate": 9.27692105263158e-05, "loss": 0.4561, "step": 47497 }, { "epoch": 2.6597603315040876, "grad_norm": 0.9929221272468567, "learning_rate": 9.276894736842106e-05, "loss": 0.3392, "step": 47498 }, { "epoch": 2.6598163288162167, "grad_norm": 1.189436674118042, "learning_rate": 9.276868421052632e-05, "loss": 0.3948, "step": 47499 }, { "epoch": 2.6598723261283457, "grad_norm": 1.2430880069732666, "learning_rate": 9.276842105263158e-05, "loss": 0.4323, "step": 47500 }, { "epoch": 2.6599283234404747, "grad_norm": 1.106285810470581, "learning_rate": 9.276815789473685e-05, "loss": 0.3353, "step": 47501 }, { "epoch": 2.6599843207526037, "grad_norm": 1.01632821559906, "learning_rate": 9.276789473684211e-05, "loss": 0.2796, "step": 47502 }, { "epoch": 2.6600403180647327, "grad_norm": 0.9593456983566284, "learning_rate": 9.276763157894737e-05, "loss": 0.3113, "step": 47503 }, { "epoch": 2.6600963153768618, "grad_norm": 1.1246867179870605, "learning_rate": 9.276736842105263e-05, "loss": 0.3786, "step": 47504 }, { "epoch": 2.6601523126889908, "grad_norm": 1.0139086246490479, "learning_rate": 9.276710526315789e-05, "loss": 0.3511, "step": 47505 }, { "epoch": 2.66020831000112, "grad_norm": 1.1895309686660767, "learning_rate": 9.276684210526317e-05, "loss": 0.3669, "step": 47506 }, { "epoch": 2.660264307313249, "grad_norm": 1.460293173789978, "learning_rate": 9.276657894736843e-05, "loss": 0.4111, "step": 47507 }, { "epoch": 2.660320304625378, "grad_norm": 1.1458388566970825, "learning_rate": 9.276631578947368e-05, "loss": 0.3038, "step": 47508 }, { "epoch": 2.660376301937507, "grad_norm": 1.026963472366333, "learning_rate": 9.276605263157894e-05, "loss": 0.3554, "step": 47509 }, { "epoch": 2.660432299249636, "grad_norm": 1.1522372961044312, "learning_rate": 9.276578947368422e-05, "loss": 0.3597, "step": 47510 }, { "epoch": 2.660488296561765, "grad_norm": 1.4190419912338257, "learning_rate": 9.276552631578948e-05, "loss": 0.5335, "step": 47511 }, { "epoch": 2.660544293873894, "grad_norm": 1.1921638250350952, "learning_rate": 9.276526315789474e-05, "loss": 0.2936, "step": 47512 }, { "epoch": 2.660600291186023, "grad_norm": 1.2409456968307495, "learning_rate": 9.2765e-05, "loss": 0.3491, "step": 47513 }, { "epoch": 2.660656288498152, "grad_norm": 1.6299793720245361, "learning_rate": 9.276473684210527e-05, "loss": 0.4457, "step": 47514 }, { "epoch": 2.660712285810281, "grad_norm": 1.0544073581695557, "learning_rate": 9.276447368421053e-05, "loss": 0.3843, "step": 47515 }, { "epoch": 2.66076828312241, "grad_norm": 1.4418237209320068, "learning_rate": 9.27642105263158e-05, "loss": 0.4643, "step": 47516 }, { "epoch": 2.660824280434539, "grad_norm": 1.3180662393569946, "learning_rate": 9.276394736842105e-05, "loss": 0.4648, "step": 47517 }, { "epoch": 2.660880277746668, "grad_norm": 1.3610684871673584, "learning_rate": 9.276368421052632e-05, "loss": 0.409, "step": 47518 }, { "epoch": 2.660936275058797, "grad_norm": 1.2219200134277344, "learning_rate": 9.276342105263158e-05, "loss": 0.3288, "step": 47519 }, { "epoch": 2.660992272370926, "grad_norm": 1.127295970916748, "learning_rate": 9.276315789473686e-05, "loss": 0.3613, "step": 47520 }, { "epoch": 2.661048269683055, "grad_norm": 0.9292310476303101, "learning_rate": 9.276289473684212e-05, "loss": 0.3013, "step": 47521 }, { "epoch": 2.661104266995184, "grad_norm": 1.5516446828842163, "learning_rate": 9.276263157894736e-05, "loss": 0.4484, "step": 47522 }, { "epoch": 2.661160264307313, "grad_norm": 1.1948965787887573, "learning_rate": 9.276236842105263e-05, "loss": 0.3744, "step": 47523 }, { "epoch": 2.661216261619442, "grad_norm": 1.0692087411880493, "learning_rate": 9.27621052631579e-05, "loss": 0.4457, "step": 47524 }, { "epoch": 2.661272258931571, "grad_norm": 1.2923316955566406, "learning_rate": 9.276184210526317e-05, "loss": 0.4898, "step": 47525 }, { "epoch": 2.6613282562437, "grad_norm": 1.3638238906860352, "learning_rate": 9.276157894736843e-05, "loss": 0.4599, "step": 47526 }, { "epoch": 2.661384253555829, "grad_norm": 1.0682158470153809, "learning_rate": 9.276131578947369e-05, "loss": 0.3927, "step": 47527 }, { "epoch": 2.661440250867958, "grad_norm": 1.2292553186416626, "learning_rate": 9.276105263157895e-05, "loss": 0.4903, "step": 47528 }, { "epoch": 2.6614962481800872, "grad_norm": 1.2993570566177368, "learning_rate": 9.276078947368422e-05, "loss": 0.4181, "step": 47529 }, { "epoch": 2.6615522454922163, "grad_norm": 1.3693057298660278, "learning_rate": 9.276052631578948e-05, "loss": 0.4961, "step": 47530 }, { "epoch": 2.6616082428043453, "grad_norm": 1.1759835481643677, "learning_rate": 9.276026315789474e-05, "loss": 0.3923, "step": 47531 }, { "epoch": 2.6616642401164743, "grad_norm": 1.3165513277053833, "learning_rate": 9.276e-05, "loss": 0.5496, "step": 47532 }, { "epoch": 2.6617202374286033, "grad_norm": 1.412943720817566, "learning_rate": 9.275973684210527e-05, "loss": 0.3417, "step": 47533 }, { "epoch": 2.6617762347407323, "grad_norm": 1.6130561828613281, "learning_rate": 9.275947368421053e-05, "loss": 0.4891, "step": 47534 }, { "epoch": 2.6618322320528613, "grad_norm": 1.1690733432769775, "learning_rate": 9.275921052631579e-05, "loss": 0.5798, "step": 47535 }, { "epoch": 2.6618882293649904, "grad_norm": 1.4799929857254028, "learning_rate": 9.275894736842105e-05, "loss": 0.5649, "step": 47536 }, { "epoch": 2.6619442266771194, "grad_norm": 2.2744240760803223, "learning_rate": 9.275868421052633e-05, "loss": 0.4301, "step": 47537 }, { "epoch": 2.6620002239892484, "grad_norm": 1.0771297216415405, "learning_rate": 9.275842105263159e-05, "loss": 0.4837, "step": 47538 }, { "epoch": 2.6620562213013774, "grad_norm": 1.273617148399353, "learning_rate": 9.275815789473684e-05, "loss": 0.4142, "step": 47539 }, { "epoch": 2.6621122186135064, "grad_norm": 1.191590666770935, "learning_rate": 9.27578947368421e-05, "loss": 0.4429, "step": 47540 }, { "epoch": 2.6621682159256355, "grad_norm": 1.0914942026138306, "learning_rate": 9.275763157894736e-05, "loss": 0.329, "step": 47541 }, { "epoch": 2.6622242132377645, "grad_norm": 1.109630823135376, "learning_rate": 9.275736842105264e-05, "loss": 0.3849, "step": 47542 }, { "epoch": 2.6622802105498935, "grad_norm": 1.02004873752594, "learning_rate": 9.27571052631579e-05, "loss": 0.3487, "step": 47543 }, { "epoch": 2.6623362078620225, "grad_norm": 1.0554287433624268, "learning_rate": 9.275684210526317e-05, "loss": 0.5054, "step": 47544 }, { "epoch": 2.6623922051741515, "grad_norm": 1.2499020099639893, "learning_rate": 9.275657894736842e-05, "loss": 0.4873, "step": 47545 }, { "epoch": 2.6624482024862806, "grad_norm": 1.192051649093628, "learning_rate": 9.275631578947369e-05, "loss": 0.5205, "step": 47546 }, { "epoch": 2.6625041997984096, "grad_norm": 1.1780436038970947, "learning_rate": 9.275605263157895e-05, "loss": 0.4158, "step": 47547 }, { "epoch": 2.6625601971105386, "grad_norm": 1.2634131908416748, "learning_rate": 9.275578947368422e-05, "loss": 0.4462, "step": 47548 }, { "epoch": 2.6626161944226676, "grad_norm": 1.068765640258789, "learning_rate": 9.275552631578948e-05, "loss": 0.4764, "step": 47549 }, { "epoch": 2.6626721917347966, "grad_norm": 1.3456987142562866, "learning_rate": 9.275526315789474e-05, "loss": 0.3999, "step": 47550 }, { "epoch": 2.6627281890469257, "grad_norm": 1.2645747661590576, "learning_rate": 9.2755e-05, "loss": 0.5458, "step": 47551 }, { "epoch": 2.6627841863590547, "grad_norm": 1.092883586883545, "learning_rate": 9.275473684210528e-05, "loss": 0.3779, "step": 47552 }, { "epoch": 2.6628401836711837, "grad_norm": 1.0276927947998047, "learning_rate": 9.275447368421054e-05, "loss": 0.3754, "step": 47553 }, { "epoch": 2.6628961809833127, "grad_norm": 0.9178904294967651, "learning_rate": 9.27542105263158e-05, "loss": 0.2767, "step": 47554 }, { "epoch": 2.6629521782954417, "grad_norm": 1.1357015371322632, "learning_rate": 9.275394736842105e-05, "loss": 0.5254, "step": 47555 }, { "epoch": 2.6630081756075707, "grad_norm": 0.9717265963554382, "learning_rate": 9.275368421052631e-05, "loss": 0.2569, "step": 47556 }, { "epoch": 2.6630641729196998, "grad_norm": 1.4066895246505737, "learning_rate": 9.275342105263159e-05, "loss": 0.4267, "step": 47557 }, { "epoch": 2.663120170231829, "grad_norm": 1.2285960912704468, "learning_rate": 9.275315789473685e-05, "loss": 0.4188, "step": 47558 }, { "epoch": 2.663176167543958, "grad_norm": 1.316941738128662, "learning_rate": 9.275289473684211e-05, "loss": 0.4087, "step": 47559 }, { "epoch": 2.663232164856087, "grad_norm": 1.264624834060669, "learning_rate": 9.275263157894737e-05, "loss": 0.4329, "step": 47560 }, { "epoch": 2.663288162168216, "grad_norm": 1.1164597272872925, "learning_rate": 9.275236842105264e-05, "loss": 0.2901, "step": 47561 }, { "epoch": 2.663344159480345, "grad_norm": 1.1543498039245605, "learning_rate": 9.27521052631579e-05, "loss": 0.363, "step": 47562 }, { "epoch": 2.663400156792474, "grad_norm": 1.302572250366211, "learning_rate": 9.275184210526316e-05, "loss": 0.4103, "step": 47563 }, { "epoch": 2.663456154104603, "grad_norm": 1.5761260986328125, "learning_rate": 9.275157894736842e-05, "loss": 0.5826, "step": 47564 }, { "epoch": 2.663512151416732, "grad_norm": 1.9424961805343628, "learning_rate": 9.275131578947369e-05, "loss": 0.4995, "step": 47565 }, { "epoch": 2.663568148728861, "grad_norm": 1.548863410949707, "learning_rate": 9.275105263157895e-05, "loss": 0.4664, "step": 47566 }, { "epoch": 2.66362414604099, "grad_norm": 1.0366572141647339, "learning_rate": 9.275078947368421e-05, "loss": 0.3418, "step": 47567 }, { "epoch": 2.663680143353119, "grad_norm": 1.2684104442596436, "learning_rate": 9.275052631578947e-05, "loss": 0.6641, "step": 47568 }, { "epoch": 2.663736140665248, "grad_norm": 1.0629364252090454, "learning_rate": 9.275026315789475e-05, "loss": 0.4015, "step": 47569 }, { "epoch": 2.663792137977377, "grad_norm": 1.089124083518982, "learning_rate": 9.275e-05, "loss": 0.484, "step": 47570 }, { "epoch": 2.663848135289506, "grad_norm": 1.1646091938018799, "learning_rate": 9.274973684210528e-05, "loss": 0.3841, "step": 47571 }, { "epoch": 2.663904132601635, "grad_norm": 1.1608027219772339, "learning_rate": 9.274947368421052e-05, "loss": 0.445, "step": 47572 }, { "epoch": 2.663960129913764, "grad_norm": 1.2543586492538452, "learning_rate": 9.274921052631578e-05, "loss": 0.4621, "step": 47573 }, { "epoch": 2.664016127225893, "grad_norm": 1.136059284210205, "learning_rate": 9.274894736842106e-05, "loss": 0.4294, "step": 47574 }, { "epoch": 2.664072124538022, "grad_norm": 1.2578800916671753, "learning_rate": 9.274868421052632e-05, "loss": 0.3795, "step": 47575 }, { "epoch": 2.664128121850151, "grad_norm": 0.8623762726783752, "learning_rate": 9.274842105263159e-05, "loss": 0.2558, "step": 47576 }, { "epoch": 2.66418411916228, "grad_norm": 1.2714905738830566, "learning_rate": 9.274815789473684e-05, "loss": 0.4006, "step": 47577 }, { "epoch": 2.664240116474409, "grad_norm": 1.0374336242675781, "learning_rate": 9.274789473684211e-05, "loss": 0.3227, "step": 47578 }, { "epoch": 2.664296113786538, "grad_norm": 1.0971360206604004, "learning_rate": 9.274763157894737e-05, "loss": 0.4454, "step": 47579 }, { "epoch": 2.664352111098667, "grad_norm": 1.3018372058868408, "learning_rate": 9.274736842105264e-05, "loss": 0.4517, "step": 47580 }, { "epoch": 2.6644081084107962, "grad_norm": 1.4711925983428955, "learning_rate": 9.27471052631579e-05, "loss": 0.4173, "step": 47581 }, { "epoch": 2.6644641057229252, "grad_norm": 1.0012773275375366, "learning_rate": 9.274684210526316e-05, "loss": 0.3926, "step": 47582 }, { "epoch": 2.6645201030350543, "grad_norm": 1.1273266077041626, "learning_rate": 9.274657894736842e-05, "loss": 0.4498, "step": 47583 }, { "epoch": 2.6645761003471833, "grad_norm": 1.4104565382003784, "learning_rate": 9.27463157894737e-05, "loss": 0.3628, "step": 47584 }, { "epoch": 2.6646320976593123, "grad_norm": 1.2100521326065063, "learning_rate": 9.274605263157895e-05, "loss": 0.3483, "step": 47585 }, { "epoch": 2.6646880949714413, "grad_norm": 1.0484782457351685, "learning_rate": 9.274578947368421e-05, "loss": 0.479, "step": 47586 }, { "epoch": 2.6647440922835703, "grad_norm": 1.1991688013076782, "learning_rate": 9.274552631578947e-05, "loss": 0.3608, "step": 47587 }, { "epoch": 2.6648000895956994, "grad_norm": 1.3280956745147705, "learning_rate": 9.274526315789475e-05, "loss": 0.3743, "step": 47588 }, { "epoch": 2.6648560869078284, "grad_norm": 1.2627779245376587, "learning_rate": 9.274500000000001e-05, "loss": 0.4256, "step": 47589 }, { "epoch": 2.6649120842199574, "grad_norm": 1.196451187133789, "learning_rate": 9.274473684210527e-05, "loss": 0.3947, "step": 47590 }, { "epoch": 2.6649680815320864, "grad_norm": 1.1108018159866333, "learning_rate": 9.274447368421053e-05, "loss": 0.3601, "step": 47591 }, { "epoch": 2.6650240788442154, "grad_norm": 1.2697044610977173, "learning_rate": 9.274421052631579e-05, "loss": 0.5211, "step": 47592 }, { "epoch": 2.6650800761563445, "grad_norm": 1.1213499307632446, "learning_rate": 9.274394736842106e-05, "loss": 0.2913, "step": 47593 }, { "epoch": 2.6651360734684735, "grad_norm": 1.0762097835540771, "learning_rate": 9.274368421052632e-05, "loss": 0.4065, "step": 47594 }, { "epoch": 2.6651920707806025, "grad_norm": 1.1248291730880737, "learning_rate": 9.274342105263158e-05, "loss": 0.3523, "step": 47595 }, { "epoch": 2.6652480680927315, "grad_norm": 2.2400875091552734, "learning_rate": 9.274315789473684e-05, "loss": 0.5946, "step": 47596 }, { "epoch": 2.6653040654048605, "grad_norm": 0.987905740737915, "learning_rate": 9.274289473684211e-05, "loss": 0.3015, "step": 47597 }, { "epoch": 2.6653600627169896, "grad_norm": 1.1405946016311646, "learning_rate": 9.274263157894737e-05, "loss": 0.3567, "step": 47598 }, { "epoch": 2.6654160600291186, "grad_norm": 1.245200276374817, "learning_rate": 9.274236842105265e-05, "loss": 0.3658, "step": 47599 }, { "epoch": 2.6654720573412476, "grad_norm": 1.2087043523788452, "learning_rate": 9.274210526315789e-05, "loss": 0.3692, "step": 47600 }, { "epoch": 2.6655280546533766, "grad_norm": 1.0968115329742432, "learning_rate": 9.274184210526316e-05, "loss": 0.3851, "step": 47601 }, { "epoch": 2.6655840519655056, "grad_norm": 1.2881309986114502, "learning_rate": 9.274157894736842e-05, "loss": 0.3776, "step": 47602 }, { "epoch": 2.6656400492776346, "grad_norm": 1.0842046737670898, "learning_rate": 9.27413157894737e-05, "loss": 0.3982, "step": 47603 }, { "epoch": 2.6656960465897637, "grad_norm": 1.14646315574646, "learning_rate": 9.274105263157896e-05, "loss": 0.3668, "step": 47604 }, { "epoch": 2.6657520439018927, "grad_norm": 1.6181786060333252, "learning_rate": 9.274078947368422e-05, "loss": 0.3516, "step": 47605 }, { "epoch": 2.6658080412140217, "grad_norm": 1.264983057975769, "learning_rate": 9.274052631578948e-05, "loss": 0.3816, "step": 47606 }, { "epoch": 2.6658640385261507, "grad_norm": 1.2994030714035034, "learning_rate": 9.274026315789475e-05, "loss": 0.3726, "step": 47607 }, { "epoch": 2.6659200358382797, "grad_norm": 1.1678959131240845, "learning_rate": 9.274000000000001e-05, "loss": 0.414, "step": 47608 }, { "epoch": 2.6659760331504088, "grad_norm": 1.1982603073120117, "learning_rate": 9.273973684210526e-05, "loss": 0.4165, "step": 47609 }, { "epoch": 2.666032030462538, "grad_norm": 1.1993447542190552, "learning_rate": 9.273947368421053e-05, "loss": 0.4076, "step": 47610 }, { "epoch": 2.666088027774667, "grad_norm": 1.4822980165481567, "learning_rate": 9.273921052631579e-05, "loss": 0.5479, "step": 47611 }, { "epoch": 2.666144025086796, "grad_norm": 1.056164026260376, "learning_rate": 9.273894736842106e-05, "loss": 0.3972, "step": 47612 }, { "epoch": 2.666200022398925, "grad_norm": 1.2561849355697632, "learning_rate": 9.273868421052632e-05, "loss": 0.3351, "step": 47613 }, { "epoch": 2.666256019711054, "grad_norm": 1.2079919576644897, "learning_rate": 9.273842105263158e-05, "loss": 0.4385, "step": 47614 }, { "epoch": 2.666312017023183, "grad_norm": 0.975650429725647, "learning_rate": 9.273815789473684e-05, "loss": 0.3503, "step": 47615 }, { "epoch": 2.666368014335312, "grad_norm": 1.1701608896255493, "learning_rate": 9.273789473684211e-05, "loss": 0.3421, "step": 47616 }, { "epoch": 2.666424011647441, "grad_norm": 9.973010063171387, "learning_rate": 9.273763157894737e-05, "loss": 0.5261, "step": 47617 }, { "epoch": 2.66648000895957, "grad_norm": 1.176804542541504, "learning_rate": 9.273736842105263e-05, "loss": 0.3659, "step": 47618 }, { "epoch": 2.666536006271699, "grad_norm": 1.129718542098999, "learning_rate": 9.27371052631579e-05, "loss": 0.3772, "step": 47619 }, { "epoch": 2.666592003583828, "grad_norm": 1.212834358215332, "learning_rate": 9.273684210526317e-05, "loss": 0.5393, "step": 47620 }, { "epoch": 2.666648000895957, "grad_norm": 1.045158863067627, "learning_rate": 9.273657894736843e-05, "loss": 0.382, "step": 47621 }, { "epoch": 2.666703998208086, "grad_norm": 1.3724884986877441, "learning_rate": 9.273631578947369e-05, "loss": 0.3346, "step": 47622 }, { "epoch": 2.666759995520215, "grad_norm": 1.428062915802002, "learning_rate": 9.273605263157895e-05, "loss": 0.2812, "step": 47623 }, { "epoch": 2.666815992832344, "grad_norm": 1.3064239025115967, "learning_rate": 9.273578947368422e-05, "loss": 0.3652, "step": 47624 }, { "epoch": 2.666871990144473, "grad_norm": 1.2269973754882812, "learning_rate": 9.273552631578948e-05, "loss": 0.4256, "step": 47625 }, { "epoch": 2.666927987456602, "grad_norm": 1.1800638437271118, "learning_rate": 9.273526315789474e-05, "loss": 0.3497, "step": 47626 }, { "epoch": 2.666983984768731, "grad_norm": 1.008231520652771, "learning_rate": 9.2735e-05, "loss": 0.3556, "step": 47627 }, { "epoch": 2.66703998208086, "grad_norm": 1.3462977409362793, "learning_rate": 9.273473684210526e-05, "loss": 0.3348, "step": 47628 }, { "epoch": 2.667095979392989, "grad_norm": 1.1017541885375977, "learning_rate": 9.273447368421053e-05, "loss": 0.3892, "step": 47629 }, { "epoch": 2.667151976705118, "grad_norm": 1.2252403497695923, "learning_rate": 9.273421052631579e-05, "loss": 0.3412, "step": 47630 }, { "epoch": 2.667207974017247, "grad_norm": 1.3084675073623657, "learning_rate": 9.273394736842106e-05, "loss": 0.5525, "step": 47631 }, { "epoch": 2.667263971329376, "grad_norm": 1.0266379117965698, "learning_rate": 9.273368421052631e-05, "loss": 0.4568, "step": 47632 }, { "epoch": 2.6673199686415052, "grad_norm": 1.2163810729980469, "learning_rate": 9.273342105263158e-05, "loss": 0.5049, "step": 47633 }, { "epoch": 2.6673759659536342, "grad_norm": 1.2116118669509888, "learning_rate": 9.273315789473684e-05, "loss": 0.3168, "step": 47634 }, { "epoch": 2.6674319632657633, "grad_norm": 1.1918892860412598, "learning_rate": 9.273289473684212e-05, "loss": 0.3678, "step": 47635 }, { "epoch": 2.6674879605778923, "grad_norm": 1.1233115196228027, "learning_rate": 9.273263157894738e-05, "loss": 0.3353, "step": 47636 }, { "epoch": 2.6675439578900213, "grad_norm": 1.2912232875823975, "learning_rate": 9.273236842105264e-05, "loss": 0.3578, "step": 47637 }, { "epoch": 2.6675999552021503, "grad_norm": 1.139631748199463, "learning_rate": 9.27321052631579e-05, "loss": 0.4722, "step": 47638 }, { "epoch": 2.6676559525142793, "grad_norm": 1.1909219026565552, "learning_rate": 9.273184210526317e-05, "loss": 0.368, "step": 47639 }, { "epoch": 2.6677119498264084, "grad_norm": 1.0619488954544067, "learning_rate": 9.273157894736843e-05, "loss": 0.3735, "step": 47640 }, { "epoch": 2.6677679471385374, "grad_norm": 1.5012789964675903, "learning_rate": 9.273131578947369e-05, "loss": 0.4321, "step": 47641 }, { "epoch": 2.6678239444506664, "grad_norm": 0.9738088250160217, "learning_rate": 9.273105263157895e-05, "loss": 0.4287, "step": 47642 }, { "epoch": 2.6678799417627954, "grad_norm": 0.9064452648162842, "learning_rate": 9.273078947368421e-05, "loss": 0.2431, "step": 47643 }, { "epoch": 2.6679359390749244, "grad_norm": 1.1912959814071655, "learning_rate": 9.273052631578948e-05, "loss": 0.4859, "step": 47644 }, { "epoch": 2.6679919363870535, "grad_norm": 1.25887930393219, "learning_rate": 9.273026315789474e-05, "loss": 0.4192, "step": 47645 }, { "epoch": 2.6680479336991825, "grad_norm": 1.5504930019378662, "learning_rate": 9.273e-05, "loss": 0.4707, "step": 47646 }, { "epoch": 2.6681039310113115, "grad_norm": 1.2293529510498047, "learning_rate": 9.272973684210526e-05, "loss": 0.4578, "step": 47647 }, { "epoch": 2.6681599283234405, "grad_norm": 1.8161752223968506, "learning_rate": 9.272947368421053e-05, "loss": 0.4477, "step": 47648 }, { "epoch": 2.6682159256355695, "grad_norm": 1.1746166944503784, "learning_rate": 9.27292105263158e-05, "loss": 0.3473, "step": 47649 }, { "epoch": 2.6682719229476985, "grad_norm": 1.2853899002075195, "learning_rate": 9.272894736842105e-05, "loss": 0.5221, "step": 47650 }, { "epoch": 2.6683279202598276, "grad_norm": 0.9979504346847534, "learning_rate": 9.272868421052631e-05, "loss": 0.4125, "step": 47651 }, { "epoch": 2.6683839175719566, "grad_norm": 1.3434330224990845, "learning_rate": 9.272842105263159e-05, "loss": 0.4953, "step": 47652 }, { "epoch": 2.6684399148840856, "grad_norm": 1.3204206228256226, "learning_rate": 9.272815789473685e-05, "loss": 0.4644, "step": 47653 }, { "epoch": 2.6684959121962146, "grad_norm": 1.3389912843704224, "learning_rate": 9.272789473684212e-05, "loss": 0.5843, "step": 47654 }, { "epoch": 2.6685519095083436, "grad_norm": 1.1455268859863281, "learning_rate": 9.272763157894737e-05, "loss": 0.4536, "step": 47655 }, { "epoch": 2.6686079068204727, "grad_norm": 1.0680243968963623, "learning_rate": 9.272736842105264e-05, "loss": 0.3066, "step": 47656 }, { "epoch": 2.6686639041326017, "grad_norm": 1.2559292316436768, "learning_rate": 9.27271052631579e-05, "loss": 0.3111, "step": 47657 }, { "epoch": 2.6687199014447307, "grad_norm": 1.0821160078048706, "learning_rate": 9.272684210526317e-05, "loss": 0.387, "step": 47658 }, { "epoch": 2.6687758987568597, "grad_norm": 1.4170722961425781, "learning_rate": 9.272657894736842e-05, "loss": 0.4487, "step": 47659 }, { "epoch": 2.6688318960689887, "grad_norm": 1.3915019035339355, "learning_rate": 9.272631578947368e-05, "loss": 0.5105, "step": 47660 }, { "epoch": 2.6688878933811178, "grad_norm": 1.410212755203247, "learning_rate": 9.272605263157895e-05, "loss": 0.4084, "step": 47661 }, { "epoch": 2.668943890693247, "grad_norm": 1.3044352531433105, "learning_rate": 9.272578947368421e-05, "loss": 0.4028, "step": 47662 }, { "epoch": 2.668999888005376, "grad_norm": 1.2218925952911377, "learning_rate": 9.272552631578948e-05, "loss": 0.3886, "step": 47663 }, { "epoch": 2.669055885317505, "grad_norm": 1.5004249811172485, "learning_rate": 9.272526315789473e-05, "loss": 0.3935, "step": 47664 }, { "epoch": 2.669111882629634, "grad_norm": 1.314431071281433, "learning_rate": 9.2725e-05, "loss": 0.3841, "step": 47665 }, { "epoch": 2.669167879941763, "grad_norm": 0.9777299761772156, "learning_rate": 9.272473684210526e-05, "loss": 0.3477, "step": 47666 }, { "epoch": 2.669223877253892, "grad_norm": 1.036908745765686, "learning_rate": 9.272447368421054e-05, "loss": 0.396, "step": 47667 }, { "epoch": 2.669279874566021, "grad_norm": 1.1879122257232666, "learning_rate": 9.27242105263158e-05, "loss": 0.2804, "step": 47668 }, { "epoch": 2.66933587187815, "grad_norm": 1.2770706415176392, "learning_rate": 9.272394736842106e-05, "loss": 0.5406, "step": 47669 }, { "epoch": 2.669391869190279, "grad_norm": 1.174457311630249, "learning_rate": 9.272368421052632e-05, "loss": 0.4791, "step": 47670 }, { "epoch": 2.669447866502408, "grad_norm": 1.2456554174423218, "learning_rate": 9.272342105263159e-05, "loss": 0.3471, "step": 47671 }, { "epoch": 2.669503863814537, "grad_norm": 1.3959336280822754, "learning_rate": 9.272315789473685e-05, "loss": 0.6184, "step": 47672 }, { "epoch": 2.669559861126666, "grad_norm": 1.3390270471572876, "learning_rate": 9.272289473684211e-05, "loss": 0.3971, "step": 47673 }, { "epoch": 2.669615858438795, "grad_norm": 1.325117588043213, "learning_rate": 9.272263157894737e-05, "loss": 0.39, "step": 47674 }, { "epoch": 2.669671855750924, "grad_norm": 1.154538631439209, "learning_rate": 9.272236842105264e-05, "loss": 0.3239, "step": 47675 }, { "epoch": 2.669727853063053, "grad_norm": 1.6201868057250977, "learning_rate": 9.27221052631579e-05, "loss": 0.6737, "step": 47676 }, { "epoch": 2.669783850375182, "grad_norm": 1.2993992567062378, "learning_rate": 9.272184210526316e-05, "loss": 0.3818, "step": 47677 }, { "epoch": 2.669839847687311, "grad_norm": 1.2092458009719849, "learning_rate": 9.272157894736842e-05, "loss": 0.4103, "step": 47678 }, { "epoch": 2.66989584499944, "grad_norm": 1.2663904428482056, "learning_rate": 9.272131578947368e-05, "loss": 0.4258, "step": 47679 }, { "epoch": 2.669951842311569, "grad_norm": 1.0981320142745972, "learning_rate": 9.272105263157895e-05, "loss": 0.4174, "step": 47680 }, { "epoch": 2.670007839623698, "grad_norm": 1.3481465578079224, "learning_rate": 9.272078947368421e-05, "loss": 0.4323, "step": 47681 }, { "epoch": 2.670063836935827, "grad_norm": 1.2047775983810425, "learning_rate": 9.272052631578947e-05, "loss": 0.4473, "step": 47682 }, { "epoch": 2.670119834247956, "grad_norm": 1.2452057600021362, "learning_rate": 9.272026315789473e-05, "loss": 0.3302, "step": 47683 }, { "epoch": 2.670175831560085, "grad_norm": 1.124908447265625, "learning_rate": 9.272e-05, "loss": 0.347, "step": 47684 }, { "epoch": 2.670231828872214, "grad_norm": 1.1847048997879028, "learning_rate": 9.271973684210527e-05, "loss": 0.451, "step": 47685 }, { "epoch": 2.6702878261843432, "grad_norm": 1.3739248514175415, "learning_rate": 9.271947368421054e-05, "loss": 0.4343, "step": 47686 }, { "epoch": 2.6703438234964723, "grad_norm": 1.4640392065048218, "learning_rate": 9.271921052631579e-05, "loss": 0.3897, "step": 47687 }, { "epoch": 2.6703998208086013, "grad_norm": 2.0532164573669434, "learning_rate": 9.271894736842106e-05, "loss": 0.4406, "step": 47688 }, { "epoch": 2.6704558181207303, "grad_norm": 1.177236795425415, "learning_rate": 9.271868421052632e-05, "loss": 0.4548, "step": 47689 }, { "epoch": 2.6705118154328593, "grad_norm": 1.1050159931182861, "learning_rate": 9.271842105263159e-05, "loss": 0.3929, "step": 47690 }, { "epoch": 2.6705678127449883, "grad_norm": 1.2016139030456543, "learning_rate": 9.271815789473685e-05, "loss": 0.3876, "step": 47691 }, { "epoch": 2.6706238100571174, "grad_norm": 1.090924859046936, "learning_rate": 9.271789473684211e-05, "loss": 0.3816, "step": 47692 }, { "epoch": 2.6706798073692464, "grad_norm": 1.4255425930023193, "learning_rate": 9.271763157894737e-05, "loss": 0.5017, "step": 47693 }, { "epoch": 2.6707358046813754, "grad_norm": 1.1507585048675537, "learning_rate": 9.271736842105263e-05, "loss": 0.3509, "step": 47694 }, { "epoch": 2.6707918019935044, "grad_norm": 0.9805853962898254, "learning_rate": 9.27171052631579e-05, "loss": 0.2738, "step": 47695 }, { "epoch": 2.6708477993056334, "grad_norm": 1.2460591793060303, "learning_rate": 9.271684210526316e-05, "loss": 0.505, "step": 47696 }, { "epoch": 2.6709037966177624, "grad_norm": 1.3697493076324463, "learning_rate": 9.271657894736842e-05, "loss": 0.5751, "step": 47697 }, { "epoch": 2.6709597939298915, "grad_norm": 1.0968867540359497, "learning_rate": 9.271631578947368e-05, "loss": 0.4676, "step": 47698 }, { "epoch": 2.6710157912420205, "grad_norm": 1.3214744329452515, "learning_rate": 9.271605263157896e-05, "loss": 0.5485, "step": 47699 }, { "epoch": 2.6710717885541495, "grad_norm": 1.2098684310913086, "learning_rate": 9.271578947368422e-05, "loss": 0.5283, "step": 47700 }, { "epoch": 2.6711277858662785, "grad_norm": 1.2724970579147339, "learning_rate": 9.271552631578948e-05, "loss": 0.5644, "step": 47701 }, { "epoch": 2.6711837831784075, "grad_norm": 1.4909911155700684, "learning_rate": 9.271526315789474e-05, "loss": 0.381, "step": 47702 }, { "epoch": 2.6712397804905366, "grad_norm": 0.9529215097427368, "learning_rate": 9.271500000000001e-05, "loss": 0.3875, "step": 47703 }, { "epoch": 2.6712957778026656, "grad_norm": 1.017738699913025, "learning_rate": 9.271473684210527e-05, "loss": 0.3505, "step": 47704 }, { "epoch": 2.6713517751147946, "grad_norm": 1.2561829090118408, "learning_rate": 9.271447368421053e-05, "loss": 0.5288, "step": 47705 }, { "epoch": 2.6714077724269236, "grad_norm": 0.9466624855995178, "learning_rate": 9.271421052631579e-05, "loss": 0.3294, "step": 47706 }, { "epoch": 2.6714637697390526, "grad_norm": 1.1562076807022095, "learning_rate": 9.271394736842106e-05, "loss": 0.3638, "step": 47707 }, { "epoch": 2.6715197670511817, "grad_norm": 1.2548937797546387, "learning_rate": 9.271368421052632e-05, "loss": 0.4375, "step": 47708 }, { "epoch": 2.6715757643633107, "grad_norm": 1.2531824111938477, "learning_rate": 9.27134210526316e-05, "loss": 0.3676, "step": 47709 }, { "epoch": 2.6716317616754397, "grad_norm": 1.2538422346115112, "learning_rate": 9.271315789473684e-05, "loss": 0.4406, "step": 47710 }, { "epoch": 2.6716877589875687, "grad_norm": 2.7898976802825928, "learning_rate": 9.271289473684211e-05, "loss": 0.3839, "step": 47711 }, { "epoch": 2.6717437562996977, "grad_norm": 1.247151255607605, "learning_rate": 9.271263157894737e-05, "loss": 0.3611, "step": 47712 }, { "epoch": 2.6717997536118268, "grad_norm": 1.3634530305862427, "learning_rate": 9.271236842105263e-05, "loss": 0.6133, "step": 47713 }, { "epoch": 2.6718557509239558, "grad_norm": 1.2633709907531738, "learning_rate": 9.27121052631579e-05, "loss": 0.3984, "step": 47714 }, { "epoch": 2.671911748236085, "grad_norm": 1.0147438049316406, "learning_rate": 9.271184210526315e-05, "loss": 0.3739, "step": 47715 }, { "epoch": 2.671967745548214, "grad_norm": 1.1683586835861206, "learning_rate": 9.271157894736843e-05, "loss": 0.3168, "step": 47716 }, { "epoch": 2.672023742860343, "grad_norm": 1.554422378540039, "learning_rate": 9.271131578947369e-05, "loss": 0.4177, "step": 47717 }, { "epoch": 2.672079740172472, "grad_norm": 1.0523382425308228, "learning_rate": 9.271105263157896e-05, "loss": 0.3649, "step": 47718 }, { "epoch": 2.672135737484601, "grad_norm": 1.0413740873336792, "learning_rate": 9.27107894736842e-05, "loss": 0.3302, "step": 47719 }, { "epoch": 2.67219173479673, "grad_norm": 1.0472010374069214, "learning_rate": 9.271052631578948e-05, "loss": 0.4117, "step": 47720 }, { "epoch": 2.672247732108859, "grad_norm": 1.0614008903503418, "learning_rate": 9.271026315789474e-05, "loss": 0.3228, "step": 47721 }, { "epoch": 2.672303729420988, "grad_norm": 1.2798857688903809, "learning_rate": 9.271000000000001e-05, "loss": 0.47, "step": 47722 }, { "epoch": 2.672359726733117, "grad_norm": 1.3511453866958618, "learning_rate": 9.270973684210527e-05, "loss": 0.509, "step": 47723 }, { "epoch": 2.672415724045246, "grad_norm": 1.0842405557632446, "learning_rate": 9.270947368421053e-05, "loss": 0.3972, "step": 47724 }, { "epoch": 2.672471721357375, "grad_norm": 1.1405714750289917, "learning_rate": 9.270921052631579e-05, "loss": 0.4452, "step": 47725 }, { "epoch": 2.672527718669504, "grad_norm": 1.2329267263412476, "learning_rate": 9.270894736842106e-05, "loss": 0.3298, "step": 47726 }, { "epoch": 2.672583715981633, "grad_norm": 1.4733500480651855, "learning_rate": 9.270868421052632e-05, "loss": 0.4643, "step": 47727 }, { "epoch": 2.672639713293762, "grad_norm": 1.2314311265945435, "learning_rate": 9.270842105263158e-05, "loss": 0.4534, "step": 47728 }, { "epoch": 2.672695710605891, "grad_norm": 1.321105718612671, "learning_rate": 9.270815789473684e-05, "loss": 0.3554, "step": 47729 }, { "epoch": 2.67275170791802, "grad_norm": 1.2114466428756714, "learning_rate": 9.27078947368421e-05, "loss": 0.2903, "step": 47730 }, { "epoch": 2.672807705230149, "grad_norm": 1.116355538368225, "learning_rate": 9.270763157894738e-05, "loss": 0.3899, "step": 47731 }, { "epoch": 2.672863702542278, "grad_norm": 0.9207002520561218, "learning_rate": 9.270736842105264e-05, "loss": 0.2722, "step": 47732 }, { "epoch": 2.672919699854407, "grad_norm": 2.3024168014526367, "learning_rate": 9.27071052631579e-05, "loss": 0.3817, "step": 47733 }, { "epoch": 2.672975697166536, "grad_norm": 1.370310664176941, "learning_rate": 9.270684210526316e-05, "loss": 0.3584, "step": 47734 }, { "epoch": 2.673031694478665, "grad_norm": 1.244425654411316, "learning_rate": 9.270657894736843e-05, "loss": 0.427, "step": 47735 }, { "epoch": 2.673087691790794, "grad_norm": 1.5430843830108643, "learning_rate": 9.270631578947369e-05, "loss": 0.3776, "step": 47736 }, { "epoch": 2.673143689102923, "grad_norm": 1.0521957874298096, "learning_rate": 9.270605263157895e-05, "loss": 0.3554, "step": 47737 }, { "epoch": 2.6731996864150522, "grad_norm": 1.0691338777542114, "learning_rate": 9.270578947368421e-05, "loss": 0.2817, "step": 47738 }, { "epoch": 2.6732556837271813, "grad_norm": 1.0008784532546997, "learning_rate": 9.270552631578948e-05, "loss": 0.2828, "step": 47739 }, { "epoch": 2.6733116810393103, "grad_norm": 1.1579856872558594, "learning_rate": 9.270526315789474e-05, "loss": 0.4353, "step": 47740 }, { "epoch": 2.6733676783514393, "grad_norm": 0.9530447125434875, "learning_rate": 9.270500000000001e-05, "loss": 0.3462, "step": 47741 }, { "epoch": 2.6734236756635683, "grad_norm": 1.923862338066101, "learning_rate": 9.270473684210526e-05, "loss": 0.4766, "step": 47742 }, { "epoch": 2.6734796729756973, "grad_norm": 1.365543246269226, "learning_rate": 9.270447368421053e-05, "loss": 0.4353, "step": 47743 }, { "epoch": 2.6735356702878263, "grad_norm": 1.2928060293197632, "learning_rate": 9.27042105263158e-05, "loss": 0.5077, "step": 47744 }, { "epoch": 2.6735916675999554, "grad_norm": 1.5102511644363403, "learning_rate": 9.270394736842107e-05, "loss": 0.4113, "step": 47745 }, { "epoch": 2.6736476649120844, "grad_norm": 28.981225967407227, "learning_rate": 9.270368421052633e-05, "loss": 0.2987, "step": 47746 }, { "epoch": 2.673703662224213, "grad_norm": 0.9775345325469971, "learning_rate": 9.270342105263157e-05, "loss": 0.3122, "step": 47747 }, { "epoch": 2.6737596595363424, "grad_norm": 1.050163984298706, "learning_rate": 9.270315789473685e-05, "loss": 0.3286, "step": 47748 }, { "epoch": 2.673815656848471, "grad_norm": 1.158350944519043, "learning_rate": 9.27028947368421e-05, "loss": 0.4127, "step": 47749 }, { "epoch": 2.6738716541606005, "grad_norm": 1.257048487663269, "learning_rate": 9.270263157894738e-05, "loss": 0.3862, "step": 47750 }, { "epoch": 2.673927651472729, "grad_norm": 1.4238168001174927, "learning_rate": 9.270236842105264e-05, "loss": 0.4913, "step": 47751 }, { "epoch": 2.6739836487848585, "grad_norm": 1.4065065383911133, "learning_rate": 9.27021052631579e-05, "loss": 0.4144, "step": 47752 }, { "epoch": 2.674039646096987, "grad_norm": 1.1196796894073486, "learning_rate": 9.270184210526316e-05, "loss": 0.4319, "step": 47753 }, { "epoch": 2.6740956434091165, "grad_norm": 1.1342880725860596, "learning_rate": 9.270157894736843e-05, "loss": 0.363, "step": 47754 }, { "epoch": 2.674151640721245, "grad_norm": 1.2477728128433228, "learning_rate": 9.270131578947369e-05, "loss": 0.4862, "step": 47755 }, { "epoch": 2.6742076380333746, "grad_norm": 1.174963116645813, "learning_rate": 9.270105263157895e-05, "loss": 0.524, "step": 47756 }, { "epoch": 2.674263635345503, "grad_norm": 1.1364448070526123, "learning_rate": 9.270078947368421e-05, "loss": 0.3597, "step": 47757 }, { "epoch": 2.6743196326576326, "grad_norm": 1.0814162492752075, "learning_rate": 9.270052631578948e-05, "loss": 0.3927, "step": 47758 }, { "epoch": 2.674375629969761, "grad_norm": 1.194346308708191, "learning_rate": 9.270026315789474e-05, "loss": 0.409, "step": 47759 }, { "epoch": 2.6744316272818907, "grad_norm": 1.2346943616867065, "learning_rate": 9.27e-05, "loss": 0.3805, "step": 47760 }, { "epoch": 2.6744876245940192, "grad_norm": 1.2514721155166626, "learning_rate": 9.269973684210526e-05, "loss": 0.581, "step": 47761 }, { "epoch": 2.6745436219061487, "grad_norm": 1.1615569591522217, "learning_rate": 9.269947368421054e-05, "loss": 0.3865, "step": 47762 }, { "epoch": 2.6745996192182773, "grad_norm": 1.43671715259552, "learning_rate": 9.26992105263158e-05, "loss": 0.3051, "step": 47763 }, { "epoch": 2.6746556165304067, "grad_norm": 1.1561893224716187, "learning_rate": 9.269894736842106e-05, "loss": 0.433, "step": 47764 }, { "epoch": 2.6747116138425353, "grad_norm": 1.2000234127044678, "learning_rate": 9.269868421052632e-05, "loss": 0.3716, "step": 47765 }, { "epoch": 2.6747676111546648, "grad_norm": 1.0337213277816772, "learning_rate": 9.269842105263158e-05, "loss": 0.3815, "step": 47766 }, { "epoch": 2.6748236084667933, "grad_norm": 1.2272530794143677, "learning_rate": 9.269815789473685e-05, "loss": 0.5235, "step": 47767 }, { "epoch": 2.674879605778923, "grad_norm": 1.3122800588607788, "learning_rate": 9.269789473684211e-05, "loss": 0.3988, "step": 47768 }, { "epoch": 2.6749356030910514, "grad_norm": 1.0575898885726929, "learning_rate": 9.269763157894737e-05, "loss": 0.3546, "step": 47769 }, { "epoch": 2.674991600403181, "grad_norm": 1.240477442741394, "learning_rate": 9.269736842105263e-05, "loss": 0.3454, "step": 47770 }, { "epoch": 2.6750475977153094, "grad_norm": 1.0041407346725464, "learning_rate": 9.26971052631579e-05, "loss": 0.2848, "step": 47771 }, { "epoch": 2.675103595027439, "grad_norm": 1.426337718963623, "learning_rate": 9.269684210526316e-05, "loss": 0.4202, "step": 47772 }, { "epoch": 2.6751595923395675, "grad_norm": 1.2755252122879028, "learning_rate": 9.269657894736843e-05, "loss": 0.3307, "step": 47773 }, { "epoch": 2.675215589651697, "grad_norm": 1.0364798307418823, "learning_rate": 9.269631578947368e-05, "loss": 0.5004, "step": 47774 }, { "epoch": 2.6752715869638255, "grad_norm": 1.1622111797332764, "learning_rate": 9.269605263157895e-05, "loss": 0.3771, "step": 47775 }, { "epoch": 2.675327584275955, "grad_norm": 1.380347728729248, "learning_rate": 9.269578947368421e-05, "loss": 0.4674, "step": 47776 }, { "epoch": 2.6753835815880835, "grad_norm": 1.1976853609085083, "learning_rate": 9.269552631578949e-05, "loss": 0.3967, "step": 47777 }, { "epoch": 2.675439578900213, "grad_norm": 1.1495815515518188, "learning_rate": 9.269526315789475e-05, "loss": 0.429, "step": 47778 }, { "epoch": 2.6754955762123416, "grad_norm": 1.116594672203064, "learning_rate": 9.2695e-05, "loss": 0.385, "step": 47779 }, { "epoch": 2.675551573524471, "grad_norm": 0.999987781047821, "learning_rate": 9.269473684210527e-05, "loss": 0.3361, "step": 47780 }, { "epoch": 2.6756075708365996, "grad_norm": 1.139424443244934, "learning_rate": 9.269447368421053e-05, "loss": 0.4051, "step": 47781 }, { "epoch": 2.675663568148729, "grad_norm": 1.2612146139144897, "learning_rate": 9.26942105263158e-05, "loss": 0.4113, "step": 47782 }, { "epoch": 2.6757195654608577, "grad_norm": 1.6711788177490234, "learning_rate": 9.269394736842106e-05, "loss": 0.4349, "step": 47783 }, { "epoch": 2.675775562772987, "grad_norm": 1.3835583925247192, "learning_rate": 9.269368421052632e-05, "loss": 0.4294, "step": 47784 }, { "epoch": 2.6758315600851157, "grad_norm": 1.3838880062103271, "learning_rate": 9.269342105263158e-05, "loss": 0.3625, "step": 47785 }, { "epoch": 2.675887557397245, "grad_norm": 1.1828957796096802, "learning_rate": 9.269315789473685e-05, "loss": 0.4543, "step": 47786 }, { "epoch": 2.6759435547093737, "grad_norm": 1.4447481632232666, "learning_rate": 9.269289473684211e-05, "loss": 0.3617, "step": 47787 }, { "epoch": 2.675999552021503, "grad_norm": 1.4834697246551514, "learning_rate": 9.269263157894737e-05, "loss": 0.457, "step": 47788 }, { "epoch": 2.6760555493336318, "grad_norm": 1.3348218202590942, "learning_rate": 9.269236842105263e-05, "loss": 0.363, "step": 47789 }, { "epoch": 2.6761115466457612, "grad_norm": 1.2873613834381104, "learning_rate": 9.26921052631579e-05, "loss": 0.4467, "step": 47790 }, { "epoch": 2.67616754395789, "grad_norm": 1.587181568145752, "learning_rate": 9.269184210526316e-05, "loss": 0.4814, "step": 47791 }, { "epoch": 2.6762235412700193, "grad_norm": 0.9964888095855713, "learning_rate": 9.269157894736842e-05, "loss": 0.4351, "step": 47792 }, { "epoch": 2.676279538582148, "grad_norm": 1.1419146060943604, "learning_rate": 9.269131578947368e-05, "loss": 0.3941, "step": 47793 }, { "epoch": 2.6763355358942773, "grad_norm": 2.001640796661377, "learning_rate": 9.269105263157896e-05, "loss": 0.3904, "step": 47794 }, { "epoch": 2.676391533206406, "grad_norm": 1.2208929061889648, "learning_rate": 9.269078947368422e-05, "loss": 0.3498, "step": 47795 }, { "epoch": 2.6764475305185353, "grad_norm": 1.1475390195846558, "learning_rate": 9.269052631578949e-05, "loss": 0.3543, "step": 47796 }, { "epoch": 2.676503527830664, "grad_norm": 1.1290451288223267, "learning_rate": 9.269026315789474e-05, "loss": 0.3907, "step": 47797 }, { "epoch": 2.6765595251427934, "grad_norm": 2.4756264686584473, "learning_rate": 9.269e-05, "loss": 0.4052, "step": 47798 }, { "epoch": 2.676615522454922, "grad_norm": 1.2377738952636719, "learning_rate": 9.268973684210527e-05, "loss": 0.3952, "step": 47799 }, { "epoch": 2.6766715197670514, "grad_norm": 1.3301308155059814, "learning_rate": 9.268947368421053e-05, "loss": 0.4608, "step": 47800 }, { "epoch": 2.67672751707918, "grad_norm": 1.5669701099395752, "learning_rate": 9.26892105263158e-05, "loss": 0.383, "step": 47801 }, { "epoch": 2.6767835143913095, "grad_norm": 1.0153595209121704, "learning_rate": 9.268894736842105e-05, "loss": 0.3226, "step": 47802 }, { "epoch": 2.676839511703438, "grad_norm": 1.3522647619247437, "learning_rate": 9.268868421052632e-05, "loss": 0.5446, "step": 47803 }, { "epoch": 2.6768955090155675, "grad_norm": 1.0125373601913452, "learning_rate": 9.268842105263158e-05, "loss": 0.2999, "step": 47804 }, { "epoch": 2.676951506327696, "grad_norm": 1.5722360610961914, "learning_rate": 9.268815789473685e-05, "loss": 0.5023, "step": 47805 }, { "epoch": 2.6770075036398255, "grad_norm": 1.3264153003692627, "learning_rate": 9.268789473684211e-05, "loss": 0.4459, "step": 47806 }, { "epoch": 2.677063500951954, "grad_norm": 1.2622379064559937, "learning_rate": 9.268763157894737e-05, "loss": 0.4154, "step": 47807 }, { "epoch": 2.6771194982640836, "grad_norm": 1.1937501430511475, "learning_rate": 9.268736842105263e-05, "loss": 0.3245, "step": 47808 }, { "epoch": 2.677175495576212, "grad_norm": 1.1173481941223145, "learning_rate": 9.26871052631579e-05, "loss": 0.5287, "step": 47809 }, { "epoch": 2.6772314928883416, "grad_norm": 1.136512041091919, "learning_rate": 9.268684210526317e-05, "loss": 0.3518, "step": 47810 }, { "epoch": 2.67728749020047, "grad_norm": 1.8841850757598877, "learning_rate": 9.268657894736843e-05, "loss": 0.4851, "step": 47811 }, { "epoch": 2.6773434875125997, "grad_norm": 1.0801796913146973, "learning_rate": 9.268631578947369e-05, "loss": 0.343, "step": 47812 }, { "epoch": 2.6773994848247282, "grad_norm": 1.3997802734375, "learning_rate": 9.268605263157896e-05, "loss": 0.3583, "step": 47813 }, { "epoch": 2.6774554821368577, "grad_norm": 1.2265769243240356, "learning_rate": 9.268578947368422e-05, "loss": 0.3904, "step": 47814 }, { "epoch": 2.6775114794489863, "grad_norm": 1.3216334581375122, "learning_rate": 9.268552631578948e-05, "loss": 0.4882, "step": 47815 }, { "epoch": 2.6775674767611157, "grad_norm": 1.2003467082977295, "learning_rate": 9.268526315789474e-05, "loss": 0.3937, "step": 47816 }, { "epoch": 2.6776234740732443, "grad_norm": 1.140363097190857, "learning_rate": 9.2685e-05, "loss": 0.3342, "step": 47817 }, { "epoch": 2.6776794713853738, "grad_norm": 1.2253869771957397, "learning_rate": 9.268473684210527e-05, "loss": 0.4294, "step": 47818 }, { "epoch": 2.6777354686975023, "grad_norm": 1.280599594116211, "learning_rate": 9.268447368421053e-05, "loss": 0.4078, "step": 47819 }, { "epoch": 2.677791466009632, "grad_norm": 1.1165099143981934, "learning_rate": 9.268421052631579e-05, "loss": 0.3207, "step": 47820 }, { "epoch": 2.6778474633217604, "grad_norm": 1.1123110055923462, "learning_rate": 9.268394736842105e-05, "loss": 0.4881, "step": 47821 }, { "epoch": 2.67790346063389, "grad_norm": 1.076734185218811, "learning_rate": 9.268368421052632e-05, "loss": 0.4382, "step": 47822 }, { "epoch": 2.6779594579460184, "grad_norm": 1.2299330234527588, "learning_rate": 9.268342105263158e-05, "loss": 0.3107, "step": 47823 }, { "epoch": 2.678015455258148, "grad_norm": 1.223860263824463, "learning_rate": 9.268315789473684e-05, "loss": 0.4295, "step": 47824 }, { "epoch": 2.6780714525702765, "grad_norm": 1.0006104707717896, "learning_rate": 9.26828947368421e-05, "loss": 0.3384, "step": 47825 }, { "epoch": 2.678127449882406, "grad_norm": 1.1087127923965454, "learning_rate": 9.268263157894738e-05, "loss": 0.3521, "step": 47826 }, { "epoch": 2.6781834471945345, "grad_norm": 1.0691715478897095, "learning_rate": 9.268236842105264e-05, "loss": 0.3602, "step": 47827 }, { "epoch": 2.678239444506664, "grad_norm": 0.965553879737854, "learning_rate": 9.268210526315791e-05, "loss": 0.3315, "step": 47828 }, { "epoch": 2.6782954418187925, "grad_norm": 1.1629024744033813, "learning_rate": 9.268184210526316e-05, "loss": 0.3639, "step": 47829 }, { "epoch": 2.6783514391309216, "grad_norm": 1.5215692520141602, "learning_rate": 9.268157894736843e-05, "loss": 0.4933, "step": 47830 }, { "epoch": 2.6784074364430506, "grad_norm": 1.2850102186203003, "learning_rate": 9.268131578947369e-05, "loss": 0.3993, "step": 47831 }, { "epoch": 2.6784634337551796, "grad_norm": 1.3956345319747925, "learning_rate": 9.268105263157896e-05, "loss": 0.4413, "step": 47832 }, { "epoch": 2.6785194310673086, "grad_norm": 1.1221729516983032, "learning_rate": 9.268078947368422e-05, "loss": 0.334, "step": 47833 }, { "epoch": 2.6785754283794376, "grad_norm": 1.2323111295700073, "learning_rate": 9.268052631578947e-05, "loss": 0.4356, "step": 47834 }, { "epoch": 2.6786314256915666, "grad_norm": 1.4334977865219116, "learning_rate": 9.268026315789474e-05, "loss": 0.456, "step": 47835 }, { "epoch": 2.6786874230036957, "grad_norm": 1.2918404340744019, "learning_rate": 9.268e-05, "loss": 0.4275, "step": 47836 }, { "epoch": 2.6787434203158247, "grad_norm": 1.4713538885116577, "learning_rate": 9.267973684210527e-05, "loss": 0.5327, "step": 47837 }, { "epoch": 2.6787994176279537, "grad_norm": 1.343296766281128, "learning_rate": 9.267947368421053e-05, "loss": 0.5149, "step": 47838 }, { "epoch": 2.6788554149400827, "grad_norm": 1.1042585372924805, "learning_rate": 9.267921052631579e-05, "loss": 0.4674, "step": 47839 }, { "epoch": 2.6789114122522117, "grad_norm": 1.247188687324524, "learning_rate": 9.267894736842105e-05, "loss": 0.349, "step": 47840 }, { "epoch": 2.6789674095643408, "grad_norm": 1.980407953262329, "learning_rate": 9.267868421052633e-05, "loss": 0.3829, "step": 47841 }, { "epoch": 2.67902340687647, "grad_norm": 1.2237497568130493, "learning_rate": 9.267842105263159e-05, "loss": 0.5391, "step": 47842 }, { "epoch": 2.679079404188599, "grad_norm": 1.0744506120681763, "learning_rate": 9.267815789473685e-05, "loss": 0.458, "step": 47843 }, { "epoch": 2.679135401500728, "grad_norm": 1.7879174947738647, "learning_rate": 9.26778947368421e-05, "loss": 0.4406, "step": 47844 }, { "epoch": 2.679191398812857, "grad_norm": 1.2716103792190552, "learning_rate": 9.267763157894738e-05, "loss": 0.3587, "step": 47845 }, { "epoch": 2.679247396124986, "grad_norm": 1.2254962921142578, "learning_rate": 9.267736842105264e-05, "loss": 0.4714, "step": 47846 }, { "epoch": 2.679303393437115, "grad_norm": 1.1475086212158203, "learning_rate": 9.26771052631579e-05, "loss": 0.498, "step": 47847 }, { "epoch": 2.679359390749244, "grad_norm": 1.4059903621673584, "learning_rate": 9.267684210526316e-05, "loss": 0.3839, "step": 47848 }, { "epoch": 2.679415388061373, "grad_norm": 1.29823637008667, "learning_rate": 9.267657894736843e-05, "loss": 0.3926, "step": 47849 }, { "epoch": 2.679471385373502, "grad_norm": 1.236661434173584, "learning_rate": 9.267631578947369e-05, "loss": 0.3623, "step": 47850 }, { "epoch": 2.679527382685631, "grad_norm": 1.1398160457611084, "learning_rate": 9.267605263157895e-05, "loss": 0.2873, "step": 47851 }, { "epoch": 2.67958337999776, "grad_norm": 1.5209718942642212, "learning_rate": 9.267578947368421e-05, "loss": 0.4466, "step": 47852 }, { "epoch": 2.679639377309889, "grad_norm": 1.498822569847107, "learning_rate": 9.267552631578947e-05, "loss": 0.433, "step": 47853 }, { "epoch": 2.679695374622018, "grad_norm": 1.8030474185943604, "learning_rate": 9.267526315789474e-05, "loss": 0.4509, "step": 47854 }, { "epoch": 2.679751371934147, "grad_norm": 1.1488314867019653, "learning_rate": 9.2675e-05, "loss": 0.3625, "step": 47855 }, { "epoch": 2.679807369246276, "grad_norm": 1.2726997137069702, "learning_rate": 9.267473684210528e-05, "loss": 0.4461, "step": 47856 }, { "epoch": 2.679863366558405, "grad_norm": 1.2456368207931519, "learning_rate": 9.267447368421052e-05, "loss": 0.5879, "step": 47857 }, { "epoch": 2.679919363870534, "grad_norm": 1.3350324630737305, "learning_rate": 9.26742105263158e-05, "loss": 0.4616, "step": 47858 }, { "epoch": 2.679975361182663, "grad_norm": 1.104844093322754, "learning_rate": 9.267394736842106e-05, "loss": 0.3642, "step": 47859 }, { "epoch": 2.680031358494792, "grad_norm": 1.3494511842727661, "learning_rate": 9.267368421052633e-05, "loss": 0.4125, "step": 47860 }, { "epoch": 2.680087355806921, "grad_norm": 1.1848931312561035, "learning_rate": 9.267342105263157e-05, "loss": 0.4639, "step": 47861 }, { "epoch": 2.68014335311905, "grad_norm": 1.1642677783966064, "learning_rate": 9.267315789473685e-05, "loss": 0.3487, "step": 47862 }, { "epoch": 2.680199350431179, "grad_norm": 1.3135396242141724, "learning_rate": 9.267289473684211e-05, "loss": 0.4558, "step": 47863 }, { "epoch": 2.680255347743308, "grad_norm": 1.04069185256958, "learning_rate": 9.267263157894738e-05, "loss": 0.3909, "step": 47864 }, { "epoch": 2.6803113450554372, "grad_norm": 1.0357638597488403, "learning_rate": 9.267236842105264e-05, "loss": 0.3585, "step": 47865 }, { "epoch": 2.6803673423675662, "grad_norm": 1.0681413412094116, "learning_rate": 9.26721052631579e-05, "loss": 0.3252, "step": 47866 }, { "epoch": 2.6804233396796953, "grad_norm": 1.5507864952087402, "learning_rate": 9.267184210526316e-05, "loss": 0.4486, "step": 47867 }, { "epoch": 2.6804793369918243, "grad_norm": 1.301295280456543, "learning_rate": 9.267157894736842e-05, "loss": 0.3784, "step": 47868 }, { "epoch": 2.6805353343039533, "grad_norm": 1.8679041862487793, "learning_rate": 9.26713157894737e-05, "loss": 0.5409, "step": 47869 }, { "epoch": 2.6805913316160823, "grad_norm": 1.0675432682037354, "learning_rate": 9.267105263157895e-05, "loss": 0.3748, "step": 47870 }, { "epoch": 2.6806473289282113, "grad_norm": 1.2178475856781006, "learning_rate": 9.267078947368421e-05, "loss": 0.4539, "step": 47871 }, { "epoch": 2.6807033262403404, "grad_norm": 1.1309034824371338, "learning_rate": 9.267052631578947e-05, "loss": 0.4314, "step": 47872 }, { "epoch": 2.6807593235524694, "grad_norm": 1.1379342079162598, "learning_rate": 9.267026315789475e-05, "loss": 0.3011, "step": 47873 }, { "epoch": 2.6808153208645984, "grad_norm": 1.3592448234558105, "learning_rate": 9.267e-05, "loss": 0.4765, "step": 47874 }, { "epoch": 2.6808713181767274, "grad_norm": 1.4272902011871338, "learning_rate": 9.266973684210527e-05, "loss": 0.4197, "step": 47875 }, { "epoch": 2.6809273154888564, "grad_norm": 1.0565543174743652, "learning_rate": 9.266947368421052e-05, "loss": 0.4647, "step": 47876 }, { "epoch": 2.6809833128009855, "grad_norm": 1.3128750324249268, "learning_rate": 9.26692105263158e-05, "loss": 0.4921, "step": 47877 }, { "epoch": 2.6810393101131145, "grad_norm": 1.1479016542434692, "learning_rate": 9.266894736842106e-05, "loss": 0.341, "step": 47878 }, { "epoch": 2.6810953074252435, "grad_norm": 1.0815821886062622, "learning_rate": 9.266868421052632e-05, "loss": 0.4417, "step": 47879 }, { "epoch": 2.6811513047373725, "grad_norm": 1.3256213665008545, "learning_rate": 9.266842105263158e-05, "loss": 0.3821, "step": 47880 }, { "epoch": 2.6812073020495015, "grad_norm": 1.0697332620620728, "learning_rate": 9.266815789473685e-05, "loss": 0.3436, "step": 47881 }, { "epoch": 2.6812632993616305, "grad_norm": 1.280110239982605, "learning_rate": 9.266789473684211e-05, "loss": 0.4328, "step": 47882 }, { "epoch": 2.6813192966737596, "grad_norm": 1.5841667652130127, "learning_rate": 9.266763157894738e-05, "loss": 0.3989, "step": 47883 }, { "epoch": 2.6813752939858886, "grad_norm": 1.0869314670562744, "learning_rate": 9.266736842105263e-05, "loss": 0.3225, "step": 47884 }, { "epoch": 2.6814312912980176, "grad_norm": 1.1324384212493896, "learning_rate": 9.266710526315789e-05, "loss": 0.3942, "step": 47885 }, { "epoch": 2.6814872886101466, "grad_norm": 1.2003108263015747, "learning_rate": 9.266684210526316e-05, "loss": 0.3851, "step": 47886 }, { "epoch": 2.6815432859222756, "grad_norm": 1.059016466140747, "learning_rate": 9.266657894736842e-05, "loss": 0.4677, "step": 47887 }, { "epoch": 2.6815992832344047, "grad_norm": 1.198194146156311, "learning_rate": 9.26663157894737e-05, "loss": 0.4202, "step": 47888 }, { "epoch": 2.6816552805465337, "grad_norm": 1.0208699703216553, "learning_rate": 9.266605263157894e-05, "loss": 0.3789, "step": 47889 }, { "epoch": 2.6817112778586627, "grad_norm": 1.0194646120071411, "learning_rate": 9.266578947368422e-05, "loss": 0.3313, "step": 47890 }, { "epoch": 2.6817672751707917, "grad_norm": 1.2013812065124512, "learning_rate": 9.266552631578948e-05, "loss": 0.3816, "step": 47891 }, { "epoch": 2.6818232724829207, "grad_norm": 1.0947545766830444, "learning_rate": 9.266526315789475e-05, "loss": 0.3801, "step": 47892 }, { "epoch": 2.6818792697950498, "grad_norm": 1.2629832029342651, "learning_rate": 9.266500000000001e-05, "loss": 0.3579, "step": 47893 }, { "epoch": 2.6819352671071788, "grad_norm": 0.9853864908218384, "learning_rate": 9.266473684210527e-05, "loss": 0.2782, "step": 47894 }, { "epoch": 2.681991264419308, "grad_norm": 1.2657511234283447, "learning_rate": 9.266447368421053e-05, "loss": 0.5076, "step": 47895 }, { "epoch": 2.682047261731437, "grad_norm": 1.0073182582855225, "learning_rate": 9.26642105263158e-05, "loss": 0.3402, "step": 47896 }, { "epoch": 2.682103259043566, "grad_norm": 2.022655963897705, "learning_rate": 9.266394736842106e-05, "loss": 0.5163, "step": 47897 }, { "epoch": 2.682159256355695, "grad_norm": 1.2612284421920776, "learning_rate": 9.266368421052632e-05, "loss": 0.6015, "step": 47898 }, { "epoch": 2.682215253667824, "grad_norm": 1.1210434436798096, "learning_rate": 9.266342105263158e-05, "loss": 0.3964, "step": 47899 }, { "epoch": 2.682271250979953, "grad_norm": 1.2173187732696533, "learning_rate": 9.266315789473685e-05, "loss": 0.44, "step": 47900 }, { "epoch": 2.682327248292082, "grad_norm": 1.2398945093154907, "learning_rate": 9.266289473684211e-05, "loss": 0.3909, "step": 47901 }, { "epoch": 2.682383245604211, "grad_norm": 1.4878675937652588, "learning_rate": 9.266263157894737e-05, "loss": 0.4104, "step": 47902 }, { "epoch": 2.68243924291634, "grad_norm": 1.236393690109253, "learning_rate": 9.266236842105263e-05, "loss": 0.5328, "step": 47903 }, { "epoch": 2.682495240228469, "grad_norm": 1.2498973608016968, "learning_rate": 9.266210526315789e-05, "loss": 0.3093, "step": 47904 }, { "epoch": 2.682551237540598, "grad_norm": 1.0993890762329102, "learning_rate": 9.266184210526317e-05, "loss": 0.3808, "step": 47905 }, { "epoch": 2.682607234852727, "grad_norm": 1.2274836301803589, "learning_rate": 9.266157894736843e-05, "loss": 0.4328, "step": 47906 }, { "epoch": 2.682663232164856, "grad_norm": 1.1571154594421387, "learning_rate": 9.266131578947368e-05, "loss": 0.4065, "step": 47907 }, { "epoch": 2.682719229476985, "grad_norm": 1.1565924882888794, "learning_rate": 9.266105263157894e-05, "loss": 0.5049, "step": 47908 }, { "epoch": 2.682775226789114, "grad_norm": 1.1699274778366089, "learning_rate": 9.266078947368422e-05, "loss": 0.3033, "step": 47909 }, { "epoch": 2.682831224101243, "grad_norm": 1.2623804807662964, "learning_rate": 9.266052631578948e-05, "loss": 0.3946, "step": 47910 }, { "epoch": 2.682887221413372, "grad_norm": 1.051317811012268, "learning_rate": 9.266026315789475e-05, "loss": 0.3333, "step": 47911 }, { "epoch": 2.682943218725501, "grad_norm": 1.1511017084121704, "learning_rate": 9.266e-05, "loss": 0.3221, "step": 47912 }, { "epoch": 2.68299921603763, "grad_norm": 1.2766140699386597, "learning_rate": 9.265973684210527e-05, "loss": 0.4356, "step": 47913 }, { "epoch": 2.683055213349759, "grad_norm": 1.5672428607940674, "learning_rate": 9.265947368421053e-05, "loss": 0.4113, "step": 47914 }, { "epoch": 2.683111210661888, "grad_norm": 1.0446810722351074, "learning_rate": 9.26592105263158e-05, "loss": 0.3586, "step": 47915 }, { "epoch": 2.683167207974017, "grad_norm": 1.1152465343475342, "learning_rate": 9.265894736842105e-05, "loss": 0.386, "step": 47916 }, { "epoch": 2.683223205286146, "grad_norm": 0.9407862424850464, "learning_rate": 9.265868421052632e-05, "loss": 0.3601, "step": 47917 }, { "epoch": 2.6832792025982752, "grad_norm": 1.156243085861206, "learning_rate": 9.265842105263158e-05, "loss": 0.3488, "step": 47918 }, { "epoch": 2.6833351999104043, "grad_norm": 2.416597604751587, "learning_rate": 9.265815789473684e-05, "loss": 0.3958, "step": 47919 }, { "epoch": 2.6833911972225333, "grad_norm": 1.2758125066757202, "learning_rate": 9.265789473684212e-05, "loss": 0.3963, "step": 47920 }, { "epoch": 2.6834471945346623, "grad_norm": 1.1852244138717651, "learning_rate": 9.265763157894736e-05, "loss": 0.4184, "step": 47921 }, { "epoch": 2.6835031918467913, "grad_norm": 1.0322381258010864, "learning_rate": 9.265736842105264e-05, "loss": 0.2977, "step": 47922 }, { "epoch": 2.6835591891589203, "grad_norm": 1.0341237783432007, "learning_rate": 9.26571052631579e-05, "loss": 0.3016, "step": 47923 }, { "epoch": 2.6836151864710494, "grad_norm": 0.9271747469902039, "learning_rate": 9.265684210526317e-05, "loss": 0.3435, "step": 47924 }, { "epoch": 2.6836711837831784, "grad_norm": 1.0085557699203491, "learning_rate": 9.265657894736843e-05, "loss": 0.3356, "step": 47925 }, { "epoch": 2.6837271810953074, "grad_norm": 1.129323124885559, "learning_rate": 9.265631578947369e-05, "loss": 0.3355, "step": 47926 }, { "epoch": 2.6837831784074364, "grad_norm": 1.3734979629516602, "learning_rate": 9.265605263157895e-05, "loss": 0.4125, "step": 47927 }, { "epoch": 2.6838391757195654, "grad_norm": 1.2266839742660522, "learning_rate": 9.265578947368422e-05, "loss": 0.41, "step": 47928 }, { "epoch": 2.6838951730316944, "grad_norm": 1.1913975477218628, "learning_rate": 9.265552631578948e-05, "loss": 0.3633, "step": 47929 }, { "epoch": 2.6839511703438235, "grad_norm": 1.0053199529647827, "learning_rate": 9.265526315789474e-05, "loss": 0.3481, "step": 47930 }, { "epoch": 2.6840071676559525, "grad_norm": 1.1489588022232056, "learning_rate": 9.2655e-05, "loss": 0.4388, "step": 47931 }, { "epoch": 2.6840631649680815, "grad_norm": 1.1263055801391602, "learning_rate": 9.265473684210527e-05, "loss": 0.3868, "step": 47932 }, { "epoch": 2.6841191622802105, "grad_norm": 1.1484137773513794, "learning_rate": 9.265447368421053e-05, "loss": 0.4087, "step": 47933 }, { "epoch": 2.6841751595923395, "grad_norm": 1.2428553104400635, "learning_rate": 9.265421052631579e-05, "loss": 0.4271, "step": 47934 }, { "epoch": 2.6842311569044686, "grad_norm": 1.0594239234924316, "learning_rate": 9.265394736842105e-05, "loss": 0.4011, "step": 47935 }, { "epoch": 2.6842871542165976, "grad_norm": 1.0110453367233276, "learning_rate": 9.265368421052631e-05, "loss": 0.3102, "step": 47936 }, { "epoch": 2.6843431515287266, "grad_norm": 1.1090874671936035, "learning_rate": 9.265342105263159e-05, "loss": 0.4184, "step": 47937 }, { "epoch": 2.6843991488408556, "grad_norm": 4.073392868041992, "learning_rate": 9.265315789473684e-05, "loss": 0.4728, "step": 47938 }, { "epoch": 2.6844551461529846, "grad_norm": 1.1305532455444336, "learning_rate": 9.26528947368421e-05, "loss": 0.4704, "step": 47939 }, { "epoch": 2.6845111434651137, "grad_norm": 1.3449867963790894, "learning_rate": 9.265263157894736e-05, "loss": 0.3639, "step": 47940 }, { "epoch": 2.6845671407772427, "grad_norm": 1.0622413158416748, "learning_rate": 9.265236842105264e-05, "loss": 0.3348, "step": 47941 }, { "epoch": 2.6846231380893717, "grad_norm": 1.2072728872299194, "learning_rate": 9.26521052631579e-05, "loss": 0.3167, "step": 47942 }, { "epoch": 2.6846791354015007, "grad_norm": 1.2234331369400024, "learning_rate": 9.265184210526317e-05, "loss": 0.397, "step": 47943 }, { "epoch": 2.6847351327136297, "grad_norm": 1.2786176204681396, "learning_rate": 9.265157894736842e-05, "loss": 0.4768, "step": 47944 }, { "epoch": 2.6847911300257588, "grad_norm": 1.0533453226089478, "learning_rate": 9.265131578947369e-05, "loss": 0.4114, "step": 47945 }, { "epoch": 2.6848471273378878, "grad_norm": 1.072516679763794, "learning_rate": 9.265105263157895e-05, "loss": 0.3881, "step": 47946 }, { "epoch": 2.684903124650017, "grad_norm": 1.0557605028152466, "learning_rate": 9.265078947368422e-05, "loss": 0.4408, "step": 47947 }, { "epoch": 2.684959121962146, "grad_norm": 1.082983374595642, "learning_rate": 9.265052631578948e-05, "loss": 0.2921, "step": 47948 }, { "epoch": 2.685015119274275, "grad_norm": 0.924119770526886, "learning_rate": 9.265026315789474e-05, "loss": 0.3537, "step": 47949 }, { "epoch": 2.685071116586404, "grad_norm": 1.0329347848892212, "learning_rate": 9.265e-05, "loss": 0.3673, "step": 47950 }, { "epoch": 2.685127113898533, "grad_norm": 1.097707748413086, "learning_rate": 9.264973684210528e-05, "loss": 0.497, "step": 47951 }, { "epoch": 2.685183111210662, "grad_norm": 1.0307432413101196, "learning_rate": 9.264947368421054e-05, "loss": 0.4287, "step": 47952 }, { "epoch": 2.685239108522791, "grad_norm": 1.6949459314346313, "learning_rate": 9.26492105263158e-05, "loss": 0.412, "step": 47953 }, { "epoch": 2.68529510583492, "grad_norm": 1.2851179838180542, "learning_rate": 9.264894736842105e-05, "loss": 0.3064, "step": 47954 }, { "epoch": 2.685351103147049, "grad_norm": 1.037749171257019, "learning_rate": 9.264868421052631e-05, "loss": 0.3579, "step": 47955 }, { "epoch": 2.685407100459178, "grad_norm": 1.116367220878601, "learning_rate": 9.264842105263159e-05, "loss": 0.3778, "step": 47956 }, { "epoch": 2.685463097771307, "grad_norm": 1.2896662950515747, "learning_rate": 9.264815789473685e-05, "loss": 0.3838, "step": 47957 }, { "epoch": 2.685519095083436, "grad_norm": 0.9503831267356873, "learning_rate": 9.264789473684211e-05, "loss": 0.2932, "step": 47958 }, { "epoch": 2.685575092395565, "grad_norm": 1.1508657932281494, "learning_rate": 9.264763157894737e-05, "loss": 0.425, "step": 47959 }, { "epoch": 2.685631089707694, "grad_norm": 1.316573143005371, "learning_rate": 9.264736842105264e-05, "loss": 0.4829, "step": 47960 }, { "epoch": 2.685687087019823, "grad_norm": 0.9538385272026062, "learning_rate": 9.26471052631579e-05, "loss": 0.4012, "step": 47961 }, { "epoch": 2.685743084331952, "grad_norm": 1.1991348266601562, "learning_rate": 9.264684210526316e-05, "loss": 0.4134, "step": 47962 }, { "epoch": 2.685799081644081, "grad_norm": 1.221527099609375, "learning_rate": 9.264657894736842e-05, "loss": 0.4077, "step": 47963 }, { "epoch": 2.68585507895621, "grad_norm": 1.2710380554199219, "learning_rate": 9.264631578947369e-05, "loss": 0.3191, "step": 47964 }, { "epoch": 2.685911076268339, "grad_norm": 1.318284273147583, "learning_rate": 9.264605263157895e-05, "loss": 0.4607, "step": 47965 }, { "epoch": 2.685967073580468, "grad_norm": 1.359891414642334, "learning_rate": 9.264578947368423e-05, "loss": 0.5648, "step": 47966 }, { "epoch": 2.686023070892597, "grad_norm": 1.0615447759628296, "learning_rate": 9.264552631578947e-05, "loss": 0.3833, "step": 47967 }, { "epoch": 2.686079068204726, "grad_norm": 1.019260048866272, "learning_rate": 9.264526315789475e-05, "loss": 0.3797, "step": 47968 }, { "epoch": 2.686135065516855, "grad_norm": 1.2075841426849365, "learning_rate": 9.2645e-05, "loss": 0.4394, "step": 47969 }, { "epoch": 2.6861910628289842, "grad_norm": 1.1413755416870117, "learning_rate": 9.264473684210528e-05, "loss": 0.4114, "step": 47970 }, { "epoch": 2.6862470601411133, "grad_norm": 1.404134750366211, "learning_rate": 9.264447368421052e-05, "loss": 0.3621, "step": 47971 }, { "epoch": 2.6863030574532423, "grad_norm": 1.2854489088058472, "learning_rate": 9.264421052631578e-05, "loss": 0.3837, "step": 47972 }, { "epoch": 2.6863590547653713, "grad_norm": 1.9556503295898438, "learning_rate": 9.264394736842106e-05, "loss": 0.4353, "step": 47973 }, { "epoch": 2.6864150520775003, "grad_norm": 1.364250659942627, "learning_rate": 9.264368421052632e-05, "loss": 0.5261, "step": 47974 }, { "epoch": 2.6864710493896293, "grad_norm": 1.2445117235183716, "learning_rate": 9.264342105263159e-05, "loss": 0.2905, "step": 47975 }, { "epoch": 2.6865270467017583, "grad_norm": 1.1323368549346924, "learning_rate": 9.264315789473684e-05, "loss": 0.424, "step": 47976 }, { "epoch": 2.6865830440138874, "grad_norm": 1.2794936895370483, "learning_rate": 9.264289473684211e-05, "loss": 0.5011, "step": 47977 }, { "epoch": 2.6866390413260164, "grad_norm": 1.2332236766815186, "learning_rate": 9.264263157894737e-05, "loss": 0.4695, "step": 47978 }, { "epoch": 2.6866950386381454, "grad_norm": 1.0451505184173584, "learning_rate": 9.264236842105264e-05, "loss": 0.4082, "step": 47979 }, { "epoch": 2.6867510359502744, "grad_norm": 1.5541996955871582, "learning_rate": 9.26421052631579e-05, "loss": 0.6527, "step": 47980 }, { "epoch": 2.6868070332624034, "grad_norm": 1.4219582080841064, "learning_rate": 9.264184210526316e-05, "loss": 0.5364, "step": 47981 }, { "epoch": 2.6868630305745325, "grad_norm": 1.116012692451477, "learning_rate": 9.264157894736842e-05, "loss": 0.353, "step": 47982 }, { "epoch": 2.6869190278866615, "grad_norm": 1.1991639137268066, "learning_rate": 9.26413157894737e-05, "loss": 0.3888, "step": 47983 }, { "epoch": 2.6869750251987905, "grad_norm": 1.2383548021316528, "learning_rate": 9.264105263157896e-05, "loss": 0.3745, "step": 47984 }, { "epoch": 2.6870310225109195, "grad_norm": 1.2030359506607056, "learning_rate": 9.264078947368421e-05, "loss": 0.3804, "step": 47985 }, { "epoch": 2.6870870198230485, "grad_norm": 1.30626380443573, "learning_rate": 9.264052631578947e-05, "loss": 0.4919, "step": 47986 }, { "epoch": 2.6871430171351776, "grad_norm": 1.3223153352737427, "learning_rate": 9.264026315789475e-05, "loss": 0.4822, "step": 47987 }, { "epoch": 2.6871990144473066, "grad_norm": 1.3755075931549072, "learning_rate": 9.264000000000001e-05, "loss": 0.4073, "step": 47988 }, { "epoch": 2.6872550117594356, "grad_norm": 1.0453438758850098, "learning_rate": 9.263973684210527e-05, "loss": 0.4705, "step": 47989 }, { "epoch": 2.6873110090715646, "grad_norm": 1.2186061143875122, "learning_rate": 9.263947368421053e-05, "loss": 0.6236, "step": 47990 }, { "epoch": 2.6873670063836936, "grad_norm": 1.1164683103561401, "learning_rate": 9.263921052631579e-05, "loss": 0.3955, "step": 47991 }, { "epoch": 2.6874230036958227, "grad_norm": 1.1177412271499634, "learning_rate": 9.263894736842106e-05, "loss": 0.378, "step": 47992 }, { "epoch": 2.6874790010079517, "grad_norm": 1.1956828832626343, "learning_rate": 9.263868421052632e-05, "loss": 0.367, "step": 47993 }, { "epoch": 2.6875349983200807, "grad_norm": 1.2705851793289185, "learning_rate": 9.263842105263158e-05, "loss": 0.3338, "step": 47994 }, { "epoch": 2.6875909956322097, "grad_norm": 1.599159598350525, "learning_rate": 9.263815789473684e-05, "loss": 0.4086, "step": 47995 }, { "epoch": 2.6876469929443387, "grad_norm": 1.3865232467651367, "learning_rate": 9.263789473684211e-05, "loss": 0.4835, "step": 47996 }, { "epoch": 2.6877029902564677, "grad_norm": 1.4632568359375, "learning_rate": 9.263763157894737e-05, "loss": 0.4259, "step": 47997 }, { "epoch": 2.6877589875685968, "grad_norm": 1.3144479990005493, "learning_rate": 9.263736842105265e-05, "loss": 0.5303, "step": 47998 }, { "epoch": 2.687814984880726, "grad_norm": 1.648030161857605, "learning_rate": 9.263710526315789e-05, "loss": 0.3871, "step": 47999 }, { "epoch": 2.687870982192855, "grad_norm": 1.5107238292694092, "learning_rate": 9.263684210526316e-05, "loss": 0.5785, "step": 48000 } ], "logging_steps": 1, "max_steps": 400000, "num_input_tokens_seen": 0, "num_train_epochs": 23, "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.470653447395082e+17, "train_batch_size": 28, "trial_name": null, "trial_params": null }